credits/scripts/check-names.pl (94 lines of code) (raw):
#!/usr/bin/perl -w
#
use LWP::UserAgent;
use Text::CSV;
use IO::String;
use Data::Dumper;
use Text::Unaccent;
binmode STDOUT, ":utf8";
binmode STDERR, ":utf8";
my $csv = Text::CSV->new({binary => 1});
my @emails = ();
# Read inclusive range from command line
my ($from, $to) = @ARGV;
($from) || usage();
$to ||= $from;
$from =~ /^\d+$/ || usage();
$to =~ /^\d+$/ || usage();
if ($from > $to) {
abort("Please specify a line number range, lowest number first.");
}
if ($from == 1) {
abort("First line included in range, but first line is column headers.");
}
# Download file
my $ua = LWP::UserAgent->new;
$ua->cookie_jar({});
my $response = $ua->get("https://docs.google.com/spreadsheet/ccc?key=__KEY__&output=csv");
if (!$response->is_success) {
abort("Couldn't get credits data from Google Docs.");
}
# Parse into CSV
my $credits_data_io = IO::String->new($response->decoded_content);
$csv->column_names("timestamp",
"name",
"sortkey",
"email",
"citation",
"evidence",
"notes");
$entries = $csv->getline_hr_all($credits_data_io);
# Make sure range is present
my $num_lines = scalar(@$entries);
if ($num_lines < $to) {
abort("Range exceed size of sheet. Requested line $to but only $num_lines available.");
}
for (my $i = $from; $i <= $to; $i++) {
my $entry = $entries->[$i - 1]; # Lines are 1-based, but array is 0-based
my $name = trim($entry->{'name'});
my $sortkey = trim($entry->{'sortkey'});
my $email = trim(lc($entry->{'email'}));
my $citation = trim($entry->{'citation'});
my $notes = trim($entry->{'notes'});
next if $notes ne "Y";
# Deal with errant ' characters by stopping quote, starting new quote
# with double quote characters, doing the single quote, then switching
# back.
# http://stackoverflow.com/questions/1250079/escaping-single-quotes-within-single-quoted-strings
$citation =~ s/'/'"'"'/g;
my $checkin = $name . ' <' . $email . '>: "' . $citation . '"';
if (!$sortkey) {
# No specific sortkey given.
# Names with > 2 parts default to assuming single family name for sort.
$name =~ m/^(.*) ([^ ]+)$/;
$sortkey = $2;
}
if (!$name || !$sortkey) {
abort("Name or sortkey missing.");
}
# Make sortkey ASCII
$sortkey = lc(unac_string('utf-8', $sortkey));
# Check for duplicate
open(my $NAMES, '<:encoding(utf8)', "../names.csv") || abort("Can't open names.csv");
my %names;
while (<$NAMES>) {
chomp;
my ($existing_name) = split(",");
$names{$existing_name} = 1;
}
if ($names{$name}) {
abort($name . " is a duplicate");
}
my $line = "$name,$sortkey";
# Add name and commit
system("echo $line >> ../names.csv");
print "($sortkey) " . $checkin . "\n";
system("git commit ../names.csv -m '$checkin'");
push(@emails, $email);
}
print "Send email to: \n" . join(", ", @emails) . "\n\n";
sub usage {
print "Usage: import <from> <to>\n";
print " where <from> and <to> are integer line numbers in the credits sheet.\n";
exit(1);
}
sub trim {
my ($string) = @_;
$string =~ s/^\s+//;
$string =~ s/\s+$//;
return $string;
}
sub abort {
print STDERR $_[0] . "\n";
if (@emails) {
print "Send email to: \n" . join(", ", @emails) . "\n\n";
}
exit(1);
}