sub restore_database()

in lib/Mail/SpamAssassin/BayesStore/SQL.pm [1414:1616]


sub restore_database {
  my ($self, $filename, $showdots) = @_;

  local *DUMPFILE;
  if (!open(DUMPFILE, '<', $filename)) {
    dbg("bayes: unable to open backup file $filename: $!");
    return 0;
  }

  # This is the critical phase (moving sql around), so don't allow it
  # to be interrupted.
  local $SIG{'INT'} = 'IGNORE';
  local $SIG{'HUP'} = 'IGNORE' if (!Mail::SpamAssassin::Util::am_running_on_windows());
  local $SIG{'TERM'} = 'IGNORE';

  unless ($self->clear_database()) {
    return 0;
  }

  # we need to go ahead close the db connection so we can then open it up
  # in a fresh state after clearing
  $self->untie_db();

  unless ($self->tie_db_writable()) {
    return 0;
  }

  my $token_count = 0;
  my $db_version;
  my $num_spam;
  my $num_ham;
  my $error_p = 0;
  my $line_count = 0;

  my $line = <DUMPFILE>;
  defined $line  or die "Error reading dump file: $!";
  $line_count++;
  # We require the database version line to be the first in the file so we can
  # figure out how to properly deal with the file.  If it is not the first
  # line then fail
  if ($line =~ m/^v\s+(\d+)\s+db_version/) {
    $db_version = $1;
  }
  else {
    dbg("bayes: database version must be the first line in the backup file, correct and re-run");
    return 0;
  }

  unless ($db_version == 2 || $db_version == 3) {
    warn("bayes: database version $db_version is unsupported, must be version 2 or 3");
    return 0;
  }

  my $token_error_count = 0;
  my $seen_error_count = 0;

  for ($!=0; defined($line=<DUMPFILE>); $!=0) {
    chomp($line);
    $line_count++;

    if ($line_count % 1000 == 0) {
      print STDERR "." if ($showdots);
    }

    if ($line =~ /^v\s+/) { # variable line
      my @parsed_line = split(/\s+/, $line, 3);
      my $value = $parsed_line[1] + 0;
      if ($parsed_line[2] eq 'num_spam') {
	$num_spam = $value;
      }
      elsif ($parsed_line[2] eq 'num_nonspam') {
	$num_ham = $value;
      }
      else {
	dbg("bayes: restore_database: skipping unknown line: $line");
      }
    }
    elsif ($line =~ /^t\s+/) { # token line
      my @parsed_line = split(/\s+/, $line, 5);
      my $spam_count = $parsed_line[1] + 0;
      my $ham_count = $parsed_line[2] + 0;
      my $atime = $parsed_line[3] + 0;
      my $token = $parsed_line[4];

      my $token_warn_p = 0;
      my @warnings;

      if ($spam_count < 0) {
	$spam_count = 0;
	push(@warnings, 'spam count < 0, resetting');
	$token_warn_p = 1;
      }
      if ($ham_count < 0) {
	$ham_count = 0;
	push(@warnings, 'ham count < 0, resetting');
	$token_warn_p = 1;
      }

      if ($spam_count == 0 && $ham_count == 0) {
	dbg("bayes: token has zero spam and ham count, skipping");
	next;
      }

      if ($atime > time()) {
	$atime = time();
	push(@warnings, 'atime > current time, resetting');
	$token_warn_p = 1;
      }

      if ($token_warn_p) {
	dbg("bayes: token ($token) has the following warnings:\n".join("\n",@warnings));
      }

      if ($db_version < 3) {
	# versions < 3 use plain text tokens, so we need to convert to hash
	$token = substr(sha1($token), -5);
      }
      else {
	# turn unpacked binary token back into binary value
	$token = pack("H*",$token);
      }

      unless ($self->_put_token($token, $spam_count, $ham_count, $atime)) {
	dbg("bayes: error inserting token for line: $line");
	$token_error_count++;
      }
      $token_count++;
    }
    elsif ($line =~ /^s\s+/) { # seen line
      my @parsed_line = split(/\s+/, $line, 3);
      my $flag = $parsed_line[1];
      my $msgid = $parsed_line[2];

      unless ($flag eq 'h' || $flag eq 's') {
	dbg("bayes: unknown seen flag ($flag) for line: $line, skipping");
	next;
      }

      unless ($msgid) {
	dbg("bayes: blank msgid for line: $line, skipping");
	next;
      }

      unless ($self->seen_put($msgid, $flag)) {
	dbg("bayes: error inserting msgid in seen table for line: $line");
	$seen_error_count++;
      }
    }
    else {
      dbg("bayes: skipping unknown line: $line");
      next;
    }

    if ($token_error_count >= 20) {
      warn "bayes: encountered too many errors (20) while parsing token line, reverting to empty database and exiting\n";
      $self->clear_database();
      return 0;
    }

    if ($seen_error_count >= 20) {
      warn "bayes: encountered too many errors (20) while parsing seen lines, reverting to empty database and exiting\n";
      $self->clear_database();
      return 0;
    }
  }
  defined $line || $!==0  or
    $!==EBADF ? dbg("bayes: error reading dump file: $!")
              : die "error reading dump file: $!";
  close(DUMPFILE) or die "Can't close dump file: $!";

  print STDERR "\n" if ($showdots);

  unless (defined($num_spam)) {
    dbg("bayes: unable to find num spam, please check file");
    $error_p = 1;
  }

  unless (defined($num_ham)) {
    dbg("bayes: unable to find num ham, please check file");
    $error_p = 1;
  }

  if ($error_p) {
    dbg("bayes: error(s) while attempting to load $filename, clearing database, correct and re-run");
    $self->clear_database();
    return 0;
  }

  if ($num_spam || $num_ham) {
    unless ($self->nspam_nham_change($num_spam, $num_ham)) {
      dbg("bayes: error updating num spam and num ham, clearing database");
      $self->clear_database();
      return 0;
    }
  }

  dbg("bayes: parsed $line_count lines");
  dbg("bayes: created database with $token_count tokens based on $num_spam spam messages and $num_ham ham messages");

  $self->untie_db();

  return 1;
}