in lib/Mail/SpamAssassin/BayesStore/SQL.pm [1414:1616]
sub restore_database {
my ($self, $filename, $showdots) = @_;
local *DUMPFILE;
if (!open(DUMPFILE, '<', $filename)) {
dbg("bayes: unable to open backup file $filename: $!");
return 0;
}
# This is the critical phase (moving sql around), so don't allow it
# to be interrupted.
local $SIG{'INT'} = 'IGNORE';
local $SIG{'HUP'} = 'IGNORE' if (!Mail::SpamAssassin::Util::am_running_on_windows());
local $SIG{'TERM'} = 'IGNORE';
unless ($self->clear_database()) {
return 0;
}
# we need to go ahead close the db connection so we can then open it up
# in a fresh state after clearing
$self->untie_db();
unless ($self->tie_db_writable()) {
return 0;
}
my $token_count = 0;
my $db_version;
my $num_spam;
my $num_ham;
my $error_p = 0;
my $line_count = 0;
my $line = <DUMPFILE>;
defined $line or die "Error reading dump file: $!";
$line_count++;
# We require the database version line to be the first in the file so we can
# figure out how to properly deal with the file. If it is not the first
# line then fail
if ($line =~ m/^v\s+(\d+)\s+db_version/) {
$db_version = $1;
}
else {
dbg("bayes: database version must be the first line in the backup file, correct and re-run");
return 0;
}
unless ($db_version == 2 || $db_version == 3) {
warn("bayes: database version $db_version is unsupported, must be version 2 or 3");
return 0;
}
my $token_error_count = 0;
my $seen_error_count = 0;
for ($!=0; defined($line=<DUMPFILE>); $!=0) {
chomp($line);
$line_count++;
if ($line_count % 1000 == 0) {
print STDERR "." if ($showdots);
}
if ($line =~ /^v\s+/) { # variable line
my @parsed_line = split(/\s+/, $line, 3);
my $value = $parsed_line[1] + 0;
if ($parsed_line[2] eq 'num_spam') {
$num_spam = $value;
}
elsif ($parsed_line[2] eq 'num_nonspam') {
$num_ham = $value;
}
else {
dbg("bayes: restore_database: skipping unknown line: $line");
}
}
elsif ($line =~ /^t\s+/) { # token line
my @parsed_line = split(/\s+/, $line, 5);
my $spam_count = $parsed_line[1] + 0;
my $ham_count = $parsed_line[2] + 0;
my $atime = $parsed_line[3] + 0;
my $token = $parsed_line[4];
my $token_warn_p = 0;
my @warnings;
if ($spam_count < 0) {
$spam_count = 0;
push(@warnings, 'spam count < 0, resetting');
$token_warn_p = 1;
}
if ($ham_count < 0) {
$ham_count = 0;
push(@warnings, 'ham count < 0, resetting');
$token_warn_p = 1;
}
if ($spam_count == 0 && $ham_count == 0) {
dbg("bayes: token has zero spam and ham count, skipping");
next;
}
if ($atime > time()) {
$atime = time();
push(@warnings, 'atime > current time, resetting');
$token_warn_p = 1;
}
if ($token_warn_p) {
dbg("bayes: token ($token) has the following warnings:\n".join("\n",@warnings));
}
if ($db_version < 3) {
# versions < 3 use plain text tokens, so we need to convert to hash
$token = substr(sha1($token), -5);
}
else {
# turn unpacked binary token back into binary value
$token = pack("H*",$token);
}
unless ($self->_put_token($token, $spam_count, $ham_count, $atime)) {
dbg("bayes: error inserting token for line: $line");
$token_error_count++;
}
$token_count++;
}
elsif ($line =~ /^s\s+/) { # seen line
my @parsed_line = split(/\s+/, $line, 3);
my $flag = $parsed_line[1];
my $msgid = $parsed_line[2];
unless ($flag eq 'h' || $flag eq 's') {
dbg("bayes: unknown seen flag ($flag) for line: $line, skipping");
next;
}
unless ($msgid) {
dbg("bayes: blank msgid for line: $line, skipping");
next;
}
unless ($self->seen_put($msgid, $flag)) {
dbg("bayes: error inserting msgid in seen table for line: $line");
$seen_error_count++;
}
}
else {
dbg("bayes: skipping unknown line: $line");
next;
}
if ($token_error_count >= 20) {
warn "bayes: encountered too many errors (20) while parsing token line, reverting to empty database and exiting\n";
$self->clear_database();
return 0;
}
if ($seen_error_count >= 20) {
warn "bayes: encountered too many errors (20) while parsing seen lines, reverting to empty database and exiting\n";
$self->clear_database();
return 0;
}
}
defined $line || $!==0 or
$!==EBADF ? dbg("bayes: error reading dump file: $!")
: die "error reading dump file: $!";
close(DUMPFILE) or die "Can't close dump file: $!";
print STDERR "\n" if ($showdots);
unless (defined($num_spam)) {
dbg("bayes: unable to find num spam, please check file");
$error_p = 1;
}
unless (defined($num_ham)) {
dbg("bayes: unable to find num ham, please check file");
$error_p = 1;
}
if ($error_p) {
dbg("bayes: error(s) while attempting to load $filename, clearing database, correct and re-run");
$self->clear_database();
return 0;
}
if ($num_spam || $num_ham) {
unless ($self->nspam_nham_change($num_spam, $num_ham)) {
dbg("bayes: error updating num spam and num ham, clearing database");
$self->clear_database();
return 0;
}
}
dbg("bayes: parsed $line_count lines");
dbg("bayes: created database with $token_count tokens based on $num_spam spam messages and $num_ham ham messages");
$self->untie_db();
return 1;
}