User:JarektBot/AddCreators2WaltersFiles.py

From Wikimedia Commons, the free media repository
Jump to: navigation, search
#!/usr/bin/python
# -*- coding: utf-8  -*-
'''
 
 
'''
import sys, wikipedia as pywikibot, csv, string, catlib, re
 
def main(args):
   csvFile = "Jarek's/Walters_Creators1.csv"
   edit_sum= "Add Creator template"
   Debug = False
 
   # Read CSV file
   enc='utf-8'
   Old = []
   New = []
   Cat = []
   Artists = {}
   reader = csv.DictReader(open(csvFile, "rb"), dialect='excel', delimiter=',')
   for row in reader:
     Old.append(unicode(row.get(u'old'), enc))
     New.append(unicode(row.get(u'new'), enc))
     Cat.append(unicode(row.get(u'cat'), enc))
 
   #for i in range(200, 250):
   #  pywikibot.output(Old[i]+'            '+New[i]+'            '+Cat[i])
 
   # 
   site = pywikibot.getSite(u'commons', u'commons')
   old_cat  = "Category:Media from the Walters Art Museum: no creator template"
   old_cat2 = "\[\[\s*Category:\s*Media from the Walters Art Museum: no creator template\s*\]\]\s*"
   file_cat = catlib.Category(site, old_cat)
   iCount=0
   for file_page in file_cat.articles(startFrom=None):
     if (file_page.namespace()==6):                   # make sure it is of correct namespace
       filename = file_page.title()
       #pywikibot.output(filename)
       #file_txt = file_page.get()
       m = re.search("File:([^\-]*)\-", filename)
       #m = re.search("artist\s*=\s*([^\(\n]*)", file_txt)
       changed = False
       if m!=None:
         artist = m.group(1).strip()
         #artist = re.sub('^[ \t]+|[ \t]+$','',artist) # trim spaces
         if artist in Old:
           i = Old.index(artist)
           file_txt = file_page.get()
           new_txt = file_txt
           new_txt = re.sub("artist\s*=\s*"+Old[i]+"\s*\([^\)]*\)", "artist           = "+New[i], new_txt, count=1)
           new_txt = re.sub("artist\s*=\s*"+Old[i], "artist           = "+New[i], new_txt, count=1)
           if (file_txt!=new_txt):
             if Cat[i] not in new_txt:
               new_txt = re.sub(old_cat2, "[["+Cat[i]+"]]\n", new_txt, count=1)
             else:
               new_txt = re.sub(old_cat2, "", new_txt, count=1)
             new_txt = re.sub("\n\n\n", "\n\n", new_txt) # delete empty lines
             changed = True
         else:
           if artist in Artists.keys(): 
             Artists[artist] += 1
           else:
             Artists[artist] = 1
 
       # save changed text if any
       if (changed):
         pywikibot.showDiff(file_txt, new_txt)
         iCount+=1
         if (iCount==-1):
            break
         try:
           if not Debug:
             status, reason, data = file_page.put(new_txt, edit_sum, False, True)
             if str(status) != '302':
               pywikibot.output(status, reason)
         except pywikibot.LockedPage:
           pywikibot.output(u"page is locked")
 
   # Save info on missing creator pages
   txt = '';
   for artist in sorted(Artists, key=Artists.get, reverse=True):
     if Artists[artist]==1:
        break
     txt += '* [[:Category:'+artist+'|'+artist+']] ! '+str(Artists[artist])+'\n'
   page = pywikibot.Page(site, u'User:Jarekt/JarektBot Tasks')
   page.put(txt, 'New Creators')
 
if __name__ == "__main__":
   try:
       main(sys.argv[1:])
   finally:
       print "All done!"