User:Pfctdayelise/daily-image-l
From Wikimedia Commons, the free media repository
cron job:
pfctdayelise@blants:0 ~$ crontab -l #min hr mday month wday cmd 0 15 * * * ~pfctdayelise/dailyimagel.py
dailyimagel.py [edit]
#!/usr/bin/python # -*- coding: utf-8 -*- wget = '''wget -S -erobots=off -q -O - ''' todaypotd = r'http://commons.wikimedia.org/w/index.php?title=Commons:Picture_of_the_day/Today&action=purge' urlbase = r'http://commons.wikimedia.org/wiki/' querycat = 'http://commons.wikimedia.org/w/query.php?what=categories&format=txt&titles=' querylinks = r'http://commons.wikimedia.org/w/query.php?what=imagelinks&ilnamespace=4&format=txt&illimit=300&titles=' import os,sys,re from commands import getoutput from datetime import date repotdcontent = re.compile('<!-- start content -->(.*?)<!-- end content -->', re.DOTALL) reimagename = re.compile('<div class="magnify"><a href="/wiki/([^"]*)" class="internal"') recats = re.compile('Category:(.*)') refplinks = re.compile('Commons:Featured pictures/([^c].*)') reqilinks = re.compile('Commons:Quality [Ii]mages/([^c].*)') recaptions = re.compile('<ul>(.*?)</ul>', re.DOTALL) reli = re.compile('</?li[^>]*>') rea = re.compile('</?a[^>]*>') rei = re.compile('</?i>') renocaption = re.compile('\n[^:]*: Template:Potd[^)]*\)') SENDMAIL = "/usr/sbin/sendmail" mailfilename = "/users/blaugher/dailyimagel/dailyimagel.txt" mailerror = "/users/blaugher/dailyimagel/mailerror.txt" mailto = "brianna.laugher@gmail.com" #mailto = "daily-image-l@lists.wikimedia.org" def createmail(): ''' Attempts to create an email at mailfilename. ''' f = getoutput(wget + '--post-data submit "' + todaypotd + '"') wgetfile = open('wget.txt','w') wgetfile.write(f) wgetfile.close() content = repotdcontent.findall(f) # extract image name/url imagename = reimagename.findall(content[0])[0] imageurl = urlbase + imagename # attempt to determine license status from categories catstext = getoutput(wget + '"' + querycat + imagename + '"') categories = recats.findall(catstext) licenses = {"GFDL":"GNU Free Documentation License", "CC-BY-SA-2.5,2.0,1.0":"Creative Commons Attribution ShareAlike license, all versions", "CC-BY-SA-1.0":"Creative Commons Attribution ShareAlike license, version 1.0", "CC-BY-SA-2.0":"Creative Commons Attribution ShareAlike license, version 2.0", "CC-BY-SA-2.5":"Creative Commons Attribution ShareAlike license, version 2.5", "CC-BY-1.0":"Creative Commons Attribution license, version 1.0", "CC-BY-2.0":"Creative Commons Attribution license, version 2.0", "CC-BY-2.5":"Creative Commons Attribution license, version 2.5" } lic = "" if "Self-published work" in categories: lic = "Created by a Wikimedian (see image page for details); " for l in licenses.keys(): if l in categories: lic += "Licensed under the " + licenses[l] +'. ' if "Public domain" in categories: lic = "Public domain" for cat in categories: if cat.startswith("PD"): if cat=="PD-self": lic = "Created by a Wikimedian (see image page for details); released into the public domain." elif cat=="PD Art": lic = "Reproduction of a two-dimensional work of art whose copyright has expired (public domain)." elif cat=="PD Old": lic = "Public domain (copyright expired due to the age of the work)." else: lic = "Public domain as a work of the " + cat[3:] + " organisation." # determine FP category (or 'topic') linkstext = getoutput(wget + '"' + querylinks + imagename + '"') isFP = True try: topics = refplinks.findall(linkstext)[0] except IndexError: try: isFP = False topics = reqilinks.findall(linkstext)[0] except IndexError: print "Could not find FP or QI backlink, aborting" raise IndexError, 'Could not find FP or QI backlink' if '/' in topics: topic = topics.split('/')[0] + ' (' + topics.split('/')[1] + ')' else: topic = topics # extract multilingual captions try: captions = recaptions.findall(content[0])[0] except IndexError: raise IndexError, 'no captions??' #print captions captions = reli.sub('',captions) captions = rea.sub('',captions) captions = rei.sub('',captions) captions = renocaption.sub('',captions) # write info to file g= open(mailfilename,'w') g.write("To: " + mailto + '\n') g.write('Content-Type: text/plain; charset=utf-8\r\n') #don't need this? #g.write("From: brianna.laugher@gmail.com\n") g.write("Subject: " + str(date.today()) + '\r\n\r\n') g.write("Body of email:\r\n") g.write(imageurl + '\n') g.write('Copyright status: ' + lic + '\n') if isFP: g.write('Featured Picture category: ' + topic + '\n\n') else: if 'Subject' in topic: g.write('Recognised as a Quality Image due to subject matter\n\n') else: g.write('Recognised as a Quality Image due to technical merit\n\n') g.write('Descriptions:\n') g.write(captions) g.close() return ############################### error = None try: createmail() except: # some Python error, catch its name and send error mail error = sys.exc_info()[0] mailfilename = mailerror # get the email message from a file f = open(mailfilename, 'r') mail = f.read() f.close() if error: mail += "Error information: " + str(error) # open a pipe to the mail program and # write the data to the pipe p = os.popen("%s -t" % SENDMAIL, 'w') p.write(mail) exitcode = p.close() if exitcode: print "sendmail error: Exit code: %s" % exitcode