User:AzaToth/wikimgrab.pl
From Wikimedia Commons, the free media repository
A simple script that will download images from common based on their commons name.
#!/usr/bin/perl use strict; use warnings; use URI::Escape; use Digest::MD5 qw(md5_hex); use LWP::UserAgent; my $ua = LWP::UserAgent->new; $ua->timeout(15); $ua->env_proxy; $ua->show_progress(1); foreach my $image( @ARGV ) { $image = uri_unescape($image); $image =~ s/ /_/g; $image =~ s/^(File|Image)://ig; $image =~ s/^(\w)/uc($1)/e; my $digest = lc(md5_hex( $image )); my $a = substr $digest, 0, 1; my $b = substr $digest, 0, 2; my $path = "http://upload.wikimedia.org/wikipedia/commons/$a/$b/$image"; if ($ua->mirror( $path, $image )->is_error) { #if failed, look for redirects warn("Could not get image directly - looking for alternative name on main image page"); my $basepage = "http://commons.wikimedia.org/wiki/File:$image"; my $response = $ua->get($basepage); if ($response->content =~ m!<link rel="canonical" href="/wiki/(.+?)"!) { $image = uri_unescape($1); #found an alternative "canonical" link } else { $image = uri_unescape($response->filename); #this is a redirect } $image =~ s/ /_/g; $image =~ s/^(File|Image)://ig; $image =~ s/^(\w)/uc($1)/e; $digest = lc(md5_hex( $image )); $a = substr $digest, 0, 1; $b = substr $digest, 0, 2; $path = "http://upload.wikimedia.org/wikipedia/commons/$a/$b/$image"; $ua->mirror( $path, $image ); } }
Make the script executable and install Bundle::LWP if necessary.
Example:
$ perl wikimgrab.pl 'file name' $ wikimgrab.pl 'File:Battery Park City 895'{2..4}'.JPG' ** GET http://upload.wikimedia.org/wikipedia/commons/f/f6/Battery_Park_City_8952.JPG ==> 200 OK (5s) ** GET http://upload.wikimedia.org/wikipedia/commons/0/0c/Battery_Park_City_8953.JPG ==> 200 OK (5s) ** GET http://upload.wikimedia.org/wikipedia/commons/1/12/Battery_Park_City_8954.JPG ==> 200 OK (6s)