Commons:National Archives and Records Administration/Categorize/Configuration
Jump to navigation
Jump to search
nara-cat.py[edit]
#! /usr/bin/env python
# Public domain; MZMcBride; 2011
import MySQLdb
import wikitools
import settings
root_page = 'Commons:National Archives and Records Administration/Categorize/'
report_template = u'''\
{| class="wikitable sortable"
|+ [[:Category:%s]]
! No.
! File
! Categorized?
|-
%s
|}
'''
wiki = wikitools.Wiki(settings.apiurl)
wiki.login(settings.username, settings.password)
conn = MySQLdb.connect(host=settings.host,
db=settings.dbname,
read_default_file='~/.my.cnf')
cursor = conn.cursor()
categories = []
cursor.execute('''
/* nara-cat.py SLOW_OK */
SELECT
page_title
FROM page
WHERE page_namespace = 14
AND page_title LIKE 'US_National_Archives_series:_%';
''')
for row in cursor.fetchall():
categories.append(unicode(row[0], 'utf-8'))
progress = []
for category in categories:
total_count = 0
cat_count = 0
report_title = root_page + category.encode('utf-8')
cursor.execute('''
/* nara-cat.py SLOW_OK */
SELECT
page_title
FROM page
JOIN categorylinks
ON cl_from = page_id
WHERE cl_to = %s
AND page_namespace = 6;
''' , category.encode('utf-8').replace(' ', '_'))
i = 1
output = []
for row in cursor.fetchall():
page_title = row[0]
formatted_page_title = u'[[:File:%s|%s]]' % (unicode(page_title.replace('_', ' '), 'utf-8'), unicode(page_title.replace('_', ' '), 'utf-8'))
cursor.execute('''
SELECT
1
FROM page
JOIN templatelinks
ON tl_from = page_id
WHERE tl_namespace = 10
AND tl_title = 'Uncategorized'
AND page_namespace = 6
AND page_title = %s;
''' , page_title)
if cursor.fetchone():
cat_status = u''
else:
cat_status = u'{{done}}'
cat_count += 1
table_row = u'''\
| %d
| %s
| %s
|-''' % (i, formatted_page_title, cat_status)
output.append(table_row)
total_count += 1
i += 1
report = wikitools.Page(wiki, report_title)
report_text = report_template % (category.replace('_', ' '), '\n'.join(output))
report_text = report_text.encode('utf-8')
report.edit(report_text, summary=settings.editsumm, bot=1)
progress.append([category.replace('_', ' '), total_count, cat_count])
report = wikitools.Page(wiki, root_page+'Progress')
report_template = u'''\
{| class="wikitable sortable"
! Category
! Total images
! Images categorized
! Percent done
|-
%s
|- class="sortbottom"
! Totals
! style="text-align:left;" | %s
! style="text-align:left;" | %s
! style="text-align:left;" | {{#expr:trunc((%s/%s) * 100)}}
|}
'''
output = []
total_total_count = 0
total_cat_count = 0
for row in progress:
category = u'[[:Category:%s|%s]]' % (row[0], row[0].replace('US National Archives series: ', ''))
total_count = row[1]
cat_count = row[2]
total_total_count += int(total_count)
total_cat_count += int(cat_count)
percent = u'{{#expr:trunc((%s/%s) * 100)}}' % (cat_count, total_count)
table_row = u'''\
| %s
| %s
| %s
| %s
|-''' % (category, total_count, cat_count, percent)
output.append(table_row)
report_text = report_template % ('\n'.join(output),
total_total_count,
total_cat_count,
total_cat_count,
total_total_count)
report_text = report_text.encode('utf-8')
report.edit(report_text, summary=settings.editsumm, bot=1)
cursor.close()
conn.close()
crontab[edit]
30 * * * * PYTHONPATH=$HOME/scripts python $HOME/scripts/commons/nara-cat.py > /dev/null