User:VICbot/source

From Wikimedia Commons, the free media repository
Jump to: navigation, search
#!/usr/bin/python
 
import sys, os
print os.environ['HOME']
sys.path.append(os.environ['HOME'] + '/dschwen_bot/pywikipedia')
 
import wikipedia
import MySQLdb
import re
import math
import unicodedata
import htmlentitydefs 
 
from urllib import FancyURLopener
from PHPUnserialize import *
 
 
def unescape_charref(ref): 
	name = ref[2:-1] 
	base = 10 
	if name.startswith("x"): 
		name = name[1:] 
		base = 16 
	return unichr(int(name, base)) 
 
def replace_entities(match): 
	ent = match.group() 
	if ent[1] == "#": 
		return unescape_charref(ent) 
 
	repl = htmlentitydefs.name2codepoint.get(ent[1:-1]) 
	if repl is not None: 
		repl = unichr(repl) 
	else: 
		repl = ent 
	return repl 
 
def unescape(data): 
	return re.sub(r"&#?[A-Za-z0-9]+?;", replace_entities, data) 
 
 
timeRE = re.compile('(\d\d):(\d\d), (\d\d?) (January|February|March|April|May|June|July|August|September|October|November|December) (\d\d\d\d) \((UTC|GMT)\)')
userRE = re.compile('\[\[[Uu]ser:([^\|\]]+)[^\]]*\]\]')
scopelistRE = re.compile('\*\s*\[\[:[Ii]mage:([^\|\]]+).*\|(.+)\]\]\s*$')
userNote = {}
removeCandidates = []
tagImages = []
scopeList = []
numChanges = 0
 
site = wikipedia.getSite()
pageName = 'Commons:Valued_image_candidates'
 
 
 
try:
	connection = MySQLdb.connect(host="commonswiki-p.db.ts.wikimedia.org", user="xxx", passwd="xxx", db="commonswiki_p" )
	cursor = connection.cursor() 
	cursor.execute( "select page_title, GROUP_CONCAT( DISTINCT cl_to SEPARATOR '|') from revision, page left join categorylinks on page_id = cl_from  where page_latest=rev_id and page_title like 'Valued_image_candidates/%' and page_namespace=4 and ( TO_DAYS(rev_timestamp) - TO_DAYS(CURRENT_DATE) ) > -2 group by page_id" )
 
 
except MySQLdb.OperationalError, message: 
	errorMessage = "Error %d:\n%s" % (message[ 0 ], message[ 1 ] ) 
 
else:
	data = cursor.fetchall() 
	fields = cursor.description
	cursor.close()
	connection.close()
 
 
page = wikipedia.Page(site, pageName + "/candidate_list" )
candidates = unescape( page.get() )
 
# abort if the qicbot marker is missing from the page 
if string.find(candidates, "<!-- VICBOT_ON -->") < 0:
	print "the string <!-- VICBOT_ON --> was not found on page " + pageName + "/candidate list"
	sys.exit(0)
 
 
#
# get potential candidates from db
#
 
for row in range(len(data)):
	name = data[row][0]
	cats = data[row][1]
 
	if cats == None :
		print "Candidate %s has no categories!" % name
		continue
 
	catlist = cats.split('|')
 
	status = 0
 
	for cat in catlist :
		if cat == 'Supported_valued_image_candidates' :
			status = 0
		if cat == 'Opposed_valued_image_candidates' :
			status = 0
		if cat == 'Promoted_valued_image_candidates' :
			status = 1
		if cat == 'Undecided_valued_image_candidates' :
			status = -1
		if cat == 'Declined_valued_image_candidates' :
			status = -1
		if cat == 'Discussed_valued_image_candidates' :
			status = 0
		if cat == 'Nominated_valued_image_candidates' :
			status = 0
 
	if status == 0 :
		print "Nothing to do here (%s, %s)" % ( name, cats )
		continue
 
	#
	# get nomination subpage
	#
 
	page = wikipedia.Page(site, 'Commons:' + name.decode('utf-8') )
	text = ""
	if page.exists() :
		text = page.get()
	else :
		print "Odd, VIC subpage does not exist!"
		continue
 
 
	#
	# extract parameters
	#
 
	subpage = ''
	image   = ''
	scope   = ''
	nominator = ''
	review = ''
	recordingReview = False
 
	for rawline in string.split(text, "\n") :
		line = rawline.lstrip(' ')
 
		if line[:9] == '|subpage=' and subpage == '' :
			subpage = unescape( line[9:] )
		if line[:7] == '|image=' and image == '' :
			image = line[7:]
		if line[:7] == '|scope=' and scope == '' :
			scope = line[7:]
		if line[:11] == '|nominator=' and nominator == '' :
			user = userRE.search(line)
			if user != None :
				nominator = user.group(1)
 
		if line[:8] == '|review=' :
			recordingReview = True
		if recordingReview :
			review += rawline + "\n"
 
	if subpage == '' or image == '' or scope == '' or nominator == '' :
		print "Candidate %s is missing cruicial parameters" % name
		continue
 
	if string.find( candidates, subpage ) < 0 :
		print "Candidate %s is not listed, I assume it was already handled!" % subpage
		continue
 
	if string.find( review, '}}' ) < 0 :
		print "Unable to extract the review"
		review = '}}'
 
	print "Handling (%d) %s on %s, nominated by %s" % ( status, image, subpage, nominator )
	numChanges += 1
 
	# queue for removal from candidate list
	removeCandidates.append(subpage)
 
	if status == 1:
 
		spParam = ''
		if subpage != image :
			spParam = '|subpage=' + subpage
 
		# queue user notification
		try:
			userNote[nominator] += "{{VICpromoted|%s|%s%s%s\n" % ( image, scope, spParam, review )
		except KeyError:
			userNote[nominator] = "{{VICpromoted|%s|%s%s%s\n" % ( image, scope, spParam, review ) 
 
		# queue image page tagging
		tagImages.append( [ image, "{{subst:VI-add|%s%s}}\n" % ( scope, spParam ) ] )
 
		# queue for insertion into alphabetical scope list
		scopeList.append( [ image, scope ] )
 
 
# no writing, just debugging
#for item in tagImages :
#	print ("Tag %s with %s" % ( item[0], item[1]) ).encode("utf-8")
#sys.exit(0)
 
if numChanges == 0 :
	print "No action taken"
	#sys.exit(0)
 
 
 
#
# removing candidates from candidate list
#
 
wikipedia.setAction( "extract processed nominations" )
newText = ''
page = wikipedia.Page(site, pageName + "/candidate_list" )
candidates = page.get()
for line in string.split(candidates, "\n") :
	keepLine = True
	uline = unescape( line )
 
	for remove in removeCandidates :
		if string.find( uline , remove ) >= 0:
			keepLine = False
			print "remove %s" % line.encode("utf-8")
			continue
 
	if keepLine :
		newText += line + "\n"
 
#print newText.encode("utf-8")
page.put(newText)
 
 
 
#
# Tag images
#
 
wikipedia.setAction("Tag promoted Valued Image")
for image in tagImages :
	page = wikipedia.Page(site, 'Image:' + image[0] )
 
	if page.exists() :
		# already tagged maybe?
		text = page.get()
		text += "\n" + image[1]
		page.put(text)
	else :
		print "Oops " + image[0].encode("utf-8") + " doesn't exist..."
 
 
 
#
# User notifications
#
 
wikipedia.setAction("Notify user of promoted Valued Image(s)")
for key in userNote.keys() :
	page = wikipedia.Page(site, "User talk:" + key )
 
	if page.exists() :
		text = page.get()
	else :
		text = 'Welcome to commons ' + key + ". What better way than starting off with a Valued Image promotion could there be? :-) --~~~~\n\n"
 
	text = text + "\n==Valued Image Promotion==\n" + userNote[key]
	page.put(text)
 
 
 
#
# Alphabetical scope list
#
 
page = wikipedia.Page(site, 'Commons:Valued_images_by_scope' )
wikipedia.setAction("Insert into and resort alphabetical VI list by scope")
if page.exists() :
	text = page.get()
	newList = {}
 
	for entry in scopeList :
		newList[ entry[1].replace("'","").upper() ] = "*[[:Image:%s|%s]]" % ( entry[0], entry[1] ) 
 
	for line in string.split(text, "\n") :
		match = scopelistRE.search(line)
		if match != None :
			newList[ match.group(2).replace("'","").upper() ] = line
 
	keys = newList.keys()
	keys.sort()
	sortedList = "\n".join( map( newList.get, keys) )
 
	listPrinted = False
	newText = ''
	for line in string.split(text, "\n") :
		match = scopelistRE.search(line)
		if match == None :
			newText += line + "\n"
		elif not listPrinted :
			listPrinted = True
			newText += sortedList + "\n"
 
	page.put(newText)
 
# done!