Commons:National Archives and Records Administration/Categorize/Configuration

From Wikimedia Commons, the free media repository
Jump to navigation Jump to search

nara-cat.py[edit]

#! /usr/bin/env python
# Public domain; MZMcBride; 2011

import MySQLdb
import wikitools
import settings

root_page = 'Commons:National Archives and Records Administration/Categorize/'
report_template = u'''\
{| class="wikitable sortable"
|+ [[:Category:%s]]
! No.
! File
! Categorized?
|-
%s
|}
'''

wiki = wikitools.Wiki(settings.apiurl)
wiki.login(settings.username, settings.password)

conn = MySQLdb.connect(host=settings.host,
                       db=settings.dbname,
                       read_default_file='~/.my.cnf')
cursor = conn.cursor()

categories = []
cursor.execute('''
/* nara-cat.py SLOW_OK */
SELECT
  page_title
FROM page
WHERE page_namespace = 14
AND page_title LIKE 'US_National_Archives_series:_%';
''')
for row in cursor.fetchall():
    categories.append(unicode(row[0], 'utf-8'))

progress = []
for category in categories:
    total_count = 0
    cat_count = 0
    report_title = root_page + category.encode('utf-8')
    cursor.execute('''
    /* nara-cat.py SLOW_OK */
    SELECT
      page_title
    FROM page
    JOIN categorylinks
    ON cl_from = page_id
    WHERE cl_to = %s
    AND page_namespace = 6;
    ''' , category.encode('utf-8').replace(' ', '_'))

    i = 1
    output = []
    for row in cursor.fetchall():
        page_title = row[0]
        formatted_page_title = u'[[:File:%s|%s]]' % (unicode(page_title.replace('_', ' '), 'utf-8'), unicode(page_title.replace('_', ' '), 'utf-8'))
        cursor.execute('''
        SELECT
          1
        FROM page
        JOIN templatelinks
        ON tl_from = page_id
        WHERE tl_namespace = 10
        AND tl_title = 'Uncategorized'
        AND page_namespace = 6
        AND page_title = %s;
        ''' , page_title)
        if cursor.fetchone():
            cat_status = u''
        else:
            cat_status = u'{{done}}'
            cat_count += 1
        table_row = u'''\
| %d
| %s
| %s
|-''' % (i, formatted_page_title, cat_status)
        output.append(table_row)
        total_count += 1
        i += 1

    report = wikitools.Page(wiki, report_title)
    report_text = report_template % (category.replace('_', ' '), '\n'.join(output))
    report_text = report_text.encode('utf-8')
    report.edit(report_text, summary=settings.editsumm, bot=1)

    progress.append([category.replace('_', ' '), total_count, cat_count])

report = wikitools.Page(wiki, root_page+'Progress')
report_template = u'''\
{| class="wikitable sortable"
! Category
! Total images
! Images categorized
! Percent done
|-
%s
|- class="sortbottom"
! Totals
! style="text-align:left;" | %s
! style="text-align:left;" | %s
! style="text-align:left;" | {{#expr:trunc((%s/%s) * 100)}}
|}
'''

output = []
total_total_count = 0
total_cat_count = 0
for row in progress:
    category = u'[[:Category:%s|%s]]' % (row[0], row[0].replace('US National Archives series: ', ''))
    total_count = row[1]
    cat_count = row[2]
    total_total_count += int(total_count)
    total_cat_count += int(cat_count)
    percent = u'{{#expr:trunc((%s/%s) * 100)}}' % (cat_count, total_count)
    table_row = u'''\
| %s
| %s
| %s
| %s
|-''' % (category, total_count, cat_count, percent)
    output.append(table_row)

report_text = report_template % ('\n'.join(output),
                                 total_total_count,
                                 total_cat_count,
                                 total_cat_count,
                                 total_total_count)
report_text = report_text.encode('utf-8')
report.edit(report_text, summary=settings.editsumm, bot=1)

cursor.close()
conn.close()

crontab[edit]

30 * * * * PYTHONPATH=$HOME/scripts python $HOME/scripts/commons/nara-cat.py > /dev/null