in lib/Mail/SpamAssassin/BayesStore/DBM.pm [1625:1877]
sub restore_database {
my ($self, $filename, $showdots) = @_;
local *DUMPFILE;
if (!open(DUMPFILE, '<', $filename)) {
dbg("bayes: unable to open backup file $filename: $!");
return 0;
}
if (!$self->tie_db_writable()) {
dbg("bayes: failed to tie db writable");
return 0;
}
my $main = $self->{bayes}->{main};
my $path = $main->sed_path($main->{conf}->{bayes_path});
# use a temporary PID-based suffix just in case another one was
# created previously by an interrupted expire
my $tmpsuffix = "convert$$";
my $tmptoksdbname = $path.'_toks.'.$tmpsuffix;
my $tmpseendbname = $path.'_seen.'.$tmpsuffix;
my $toksdbname = $path.'_toks';
my $seendbname = $path.'_seen';
my %new_toks;
my %new_seen;
my $umask = umask 0;
unless (tie %new_toks, $self->DBM_MODULE, $tmptoksdbname, O_RDWR|O_CREAT|O_EXCL,
(oct ($main->{conf}->{bayes_file_mode}) & 0666)) {
dbg("bayes: failed to tie temp toks db: $!");
$self->untie_db();
umask $umask;
return 0;
}
unless (tie %new_seen, $self->DBM_MODULE, $tmpseendbname, O_RDWR|O_CREAT|O_EXCL,
(oct ($main->{conf}->{bayes_file_mode}) & 0666)) {
dbg("bayes: failed to tie temp seen db: $!");
untie %new_toks;
$self->_unlink_file($tmptoksdbname);
$self->untie_db();
umask $umask;
return 0;
}
umask $umask;
my $line_count = 0;
my $db_version;
my $token_count = 0;
my $num_spam;
my $num_ham;
my $error_p = 0;
my $newest_token_age = 0;
# Kinda weird I know, but we need a nice big value and we know there will be
# no tokens > time() since we reset atime if > time(), so use that with a
# little buffer just in case.
my $oldest_token_age = time() + 100000;
my $line = <DUMPFILE>;
defined $line or die "Error reading dump file: $!";
$line_count++;
# We require the database version line to be the first in the file so we can
# figure out how to properly deal with the file. If it is not the first
# line then fail
if ($line =~ m/^v\s+(\d+)\s+db_version/) {
$db_version = $1;
}
else {
dbg("bayes: database version must be the first line in the backup file, correct and re-run");
untie %new_toks;
untie %new_seen;
$self->_unlink_file($tmptoksdbname);
$self->_unlink_file($tmpseendbname);
$self->untie_db();
return 0;
}
unless ($db_version == 2 || $db_version == 3) {
warn("bayes: database version $db_version is unsupported, must be version 2 or 3");
untie %new_toks;
untie %new_seen;
$self->_unlink_file($tmptoksdbname);
$self->_unlink_file($tmpseendbname);
$self->untie_db();
return 0;
}
for ($!=0; defined($line=<DUMPFILE>); $!=0) {
chomp($line);
$line_count++;
if ($line_count % 1000 == 0) {
print STDERR "." if ($showdots);
}
if ($line =~ /^v\s+/) { # variable line
my @parsed_line = split(/\s+/, $line, 3);
my $value = $parsed_line[1] + 0;
if ($parsed_line[2] eq 'num_spam') {
$num_spam = $value;
}
elsif ($parsed_line[2] eq 'num_nonspam') {
$num_ham = $value;
}
else {
dbg("bayes: restore_database: skipping unknown line: $line");
}
}
elsif ($line =~ /^t\s+/) { # token line
my @parsed_line = split(/\s+/, $line, 5);
my $spam_count = $parsed_line[1] + 0;
my $ham_count = $parsed_line[2] + 0;
my $atime = $parsed_line[3] + 0;
my $token = $parsed_line[4];
my $token_warn_p = 0;
my @warnings;
if ($spam_count < 0) {
$spam_count = 0;
push(@warnings, 'spam count < 0, resetting');
$token_warn_p = 1;
}
if ($ham_count < 0) {
$ham_count = 0;
push(@warnings, 'ham count < 0, resetting');
$token_warn_p = 1;
}
if ($spam_count == 0 && $ham_count == 0) {
dbg("bayes: token has zero spam and ham count, skipping");
next;
}
if ($atime > time()) {
$atime = time();
push(@warnings, 'atime > current time, resetting');
$token_warn_p = 1;
}
if ($token_warn_p) {
dbg("bayes: token (%s) has the following warnings:\n%s",
$token, join("\n",@warnings));
}
# database versions < 3 did not encode their token values
if ($db_version < 3) {
$token = substr(sha1($token), -5);
}
else {
# turn unpacked binary token back into binary value
$token = pack("H*",$token);
}
$new_toks{$token} = $self->tok_pack($spam_count, $ham_count, $atime);
if ($atime < $oldest_token_age) {
$oldest_token_age = $atime;
}
if ($atime > $newest_token_age) {
$newest_token_age = $atime;
}
$token_count++;
}
elsif ($line =~ /^s\s+/) { # seen line
my @parsed_line = split(/\s+/, $line, 3);
my $flag = $parsed_line[1];
my $msgid = $parsed_line[2];
unless ($flag eq 'h' || $flag eq 's') {
dbg("bayes: unknown seen flag ($flag) for line: $line, skipping");
next;
}
unless ($msgid) {
dbg("bayes: blank msgid for line: $line, skipping");
next;
}
$new_seen{$msgid} = $flag;
}
else {
dbg("bayes: skipping unknown line: $line");
next;
}
}
defined $line || $!==0 or die "Error reading dump file: $!";
close(DUMPFILE) or die "Can't close dump file: $!";
print STDERR "\n" if ($showdots);
unless (defined($num_spam)) {
dbg("bayes: unable to find num spam, please check file");
$error_p = 1;
}
unless (defined($num_ham)) {
dbg("bayes: unable to find num ham, please check file");
$error_p = 1;
}
if ($error_p) {
dbg("bayes: error(s) while attempting to load $filename, correct and re-run");
untie %new_toks;
untie %new_seen;
$self->_unlink_file($tmptoksdbname);
$self->_unlink_file($tmpseendbname);
$self->untie_db();
return 0;
}
# set the calculated magic tokens
$new_toks{$DB_VERSION_MAGIC_TOKEN} = $self->DB_VERSION();
$new_toks{$NTOKENS_MAGIC_TOKEN} = $token_count;
$new_toks{$NSPAM_MAGIC_TOKEN} = $num_spam;
$new_toks{$NHAM_MAGIC_TOKEN} = $num_ham;
$new_toks{$NEWEST_TOKEN_AGE_MAGIC_TOKEN} = $newest_token_age;
$new_toks{$OLDEST_TOKEN_AGE_MAGIC_TOKEN} = $oldest_token_age;
# go ahead and zero out these, chances are good that they are bogus anyway.
$new_toks{$LAST_EXPIRE_MAGIC_TOKEN} = 0;
$new_toks{$LAST_JOURNAL_SYNC_MAGIC_TOKEN} = 0;
$new_toks{$LAST_ATIME_DELTA_MAGIC_TOKEN} = 0;
$new_toks{$LAST_EXPIRE_REDUCE_MAGIC_TOKEN} = 0;
local $SIG{'INT'} = 'IGNORE';
local $SIG{'TERM'} = 'IGNORE';
local $SIG{'HUP'} = 'IGNORE' if !am_running_on_windows();
untie %new_toks;
untie %new_seen;
$self->untie_db();
# Here is where something can go horribly wrong and screw up the bayes
# database files. If we are able to copy one and not the other then it
# will leave the database in an inconsistent state. Since this is an
# edge case, and they're trying to replace the DB anyway we should be ok.
unless ($self->_rename_file($tmptoksdbname, $toksdbname)) {
dbg("bayes: error while renaming $tmptoksdbname to $toksdbname: $!");
return 0;
}
unless ($self->_rename_file($tmpseendbname, $seendbname)) {
dbg("bayes: error while renaming $tmpseendbname to $seendbname: $!");
dbg("bayes: database now in inconsistent state");
return 0;
}
dbg("bayes: parsed $line_count lines");
dbg("bayes: created database with $token_count tokens based on $num_spam spam messages and $num_ham ham messages");
return 1;
}