User:Slick/import news.kremlin.ru news gallery.sh

From Wikimedia Commons, the free media repository
Jump to: navigation, search
#!/bin/bash

#
# Simply upload images from http://news.kremlin.ru/news/* news page gallery in a batch. No error handling!
# will may not work if the page layout is changed in future
#
# version 0.1 - written by http://commons.wikimedia.org/wiki/User:Slick
#
# Howto use:
#
# 1) change link
# 2) change filename
# 3) change template
# 4) be sure pywikipedia/upload.py exists and is working (and 
#

lynx -source "http://news.kremlin.ru/news/18089" | tr -d "[\r\n\t]" | sed 's/<li><a/\n<li><a/g' | sed 's/<\/li>/<\/li>\n/g'|grep '^<li><a rel="group"' | sed 's/<b>/\n<b>/' | sed 's/<\/b>/<\/b>\n/'| grep '^<b>' | while read line ; do

	# get description and big image url	
	desc="`echo \"$line\" | cut -d '|' -f 2`"
	url="`echo \"$line\" | cut -d '|' -f 5`"

	# extract uid in the image, independent of extensions
	uid="`echo \"${url}\" | rev | cut -d '/' -f 1 | rev | cut -d '.' -f 1`"

	# debug - print out, should use before uploading, to check
	#echo $desc
	#echo $url
	#echo $uid

	# build filename, use uid to be uniq
	file="Moscow_Victory_Day_Parade_2013-05-09_(${uid}).jpg"

	# get file
	wget "${url}" -O "${file}"

	#upload file, build template in the fly, pipe "n" to command to not ignore warning/dups
	yes 'n' | python2.7 pywikipedia/upload.py -keep -noverify "${file}" "{{Information
|description={{ru|1=${desc}}}
|date=2013-05-09
|source=http://news.kremlin.ru/news/18089
|author=Presidential Press and Information Office
|permission={{Kremlin.ru}}
|other_versions=
|other_fields=
}}

[[Category:2013 Moscow Victory Day Parade]]
[[Category:Import by User:Slick-o-bot/2013 Moscow Victory Day Parade]]"

	rm "${file}"

done