User:Emijrpbot/Interwiki-updater

From Wikimedia Commons, the free media repository
Jump to: navigation, search

New version[edit]

  • Author: emijrp
  • License: GPL
# -*- coding: utf-8  -*-
 
import wikipedia, pagegenerators
import re
 
commons=wikipedia.Site('commons', 'commons')
wikipediaen=wikipedia.Site('en', 'wikipedia')
gen=pagegenerators.AllpagesPageGenerator(start=u'100 Great Paintings', namespace=0, includeredirects=False, site=commons)
preloadingGen=pagegenerators.PreloadingGenerator(gen, pageNumber=100, lookahead=100)
 
for page in preloadingGen:
	text=page.get()
	iws=wikipedia.getLanguageLinks(text)
	if iws.has_key(wikipediaen):
		try:
			wikipedia.output(iws[wikipediaen].title())
			iwsen=iws[wikipediaen].interwiki()
			iwsen.append(iws[wikipediaen])
			dic={}
			for i in iwsen:
				dic[i.site()]=i
			newtext=wikipedia.replaceLanguageLinks(text, dic)
			if len(text)+10<len(newtext):
				wikipedia.showDiff(text, newtext)
				page.put(newtext, u'Updating interwiki')
		except:
			pass

Old version[edit]

  • Author: emijrp
  • License: GPL
# -*- coding: utf-8  -*-
 
from __future__ import generators
import sys, re
import wikipedia, pagegenerators,catlib, config
import string
import random
 
site=wikipedia.Site("commons", "commons")
for arg in wikipedia.handleArgs():
		if arg.startswith('-file'):
				if len(arg) >= 6:
						textfilename = arg[6:]
				gen = pagegenerators.TextfilePageGenerator(textfilename)
		elif arg.startswith('-start'):
				if len(arg) == 6:
						firstPageTitle = wikipedia.input(u'Which page do you want to chage?')
				else:
						firstPageTitle = arg[7:]
				namespace = wikipedia.Page(site, firstPageTitle).namespace()
				firstPageTitle = wikipedia.Page(site, firstPageTitle).titleWithoutNamespace()
				gen = pagegenerators.AllpagesPageGenerator(firstPageTitle, namespace)
 
preloadingGen = pagegenerators.PreloadingGenerator(gen, pageNumber = 250)
 
for page in preloadingGen:
	if page.isRedirectPage() or page.isDisambig():
		pass
	else:
		ctext=page.get()
		ctitle=page.title()
 
		print "-"*50
		wikipedia.output(u"Analizando: [[%s]]" % ctitle)
		#m=re.compile(ur"(?i)\[\[en:(Category:.*?)\]\]").finditer(ctext)
		m=re.compile(ur"(?i)\[\[(en):([^]]*?)\]\]").finditer(ctext)
 
		id=u""
		iw=u""
		for i in m:
			if not id and not iw:
				id=i.group(1)
				iw=i.group(2)
 
		if not id or not iw:
			continue
 
		p=wikipedia.Page(wikipedia.Site(id, "wikipedia"), iw)
		try:
			if p.exists() and not p.isRedirectPage() and not p.isDisambig():
				wiws=p.interwiki()
				wiws.append(p)
				wiws.sort()
 
				nuevo=wikipedia.removeLanguageLinks(ctext, p.site())
				#wikipedia.showDiff(ctext, nuevo)
				nuevo+=u"\n"
 
				for i in wiws:
					nuevo+=u"\n[[%s:%s]]" % (i.site().lang, i.title())
 
				if nuevo!=ctext and len(nuevo)>len(ctext)+5:
					wikipedia.showDiff(ctext, nuevo)
					page.put(nuevo, u"updating interwikis")
				else:
					wikipedia.output(u"Los interwikis ya estan actualizados")
		except:
			pass