sub _upgrade_db()

in lib/Mail/SpamAssassin/BayesStore/DBM.pm [350:598]


sub _upgrade_db {
  my ($self) = @_;

  my $verschk = $self->_check_db_version();
  my $res = 0; # used later on for tie() checks
  my $umask; # used later for umask modifications

  # If the DB is the latest version, no problem.
  return 1 if ($verschk == 0);

  # If the DB is a newer version that we know what to do with ... abort!
  if ($verschk == 1) {
    warn("bayes: bayes db version ".$self->{db_version}." is newer than we understand, aborting!");
    return 0;
  }

  # If the current DB version is lower than the new version, upgrade!
  # Do conversions in order so we can go 1 -> 3, make sure to update
  #   $self->{db_version} along the way

  dbg("bayes: detected bayes db format %s, upgrading", $self->{db_version});

  # since DB_File will not shrink a database (!!), we need to *create*
  # a new one instead.
  my $main = $self->{bayes}->{main};
  my $path = $main->sed_path($main->{conf}->{bayes_path});
  my $name = $path.'_toks';

  # older version's journal files are likely not in the same format as the new ones, so remove it.
  my $jpath = $self->_get_journal_filename();
  if (-f $jpath) {
    dbg("bayes: old journal file found, removing");
    warn "bayes: couldn't remove $jpath: $!" if (!unlink $jpath);
  }

  if ($self->{db_version} < 2) {
    dbg("bayes: upgrading database format from v%s to v2", $self->{db_version});
    $self->set_running_expire_tok();

    my ($DB_NSPAM_MAGIC_TOKEN, $DB_NHAM_MAGIC_TOKEN, $DB_NTOKENS_MAGIC_TOKEN);
    my ($DB_OLDEST_TOKEN_AGE_MAGIC_TOKEN, $DB_LAST_EXPIRE_MAGIC_TOKEN);

    # Magic tokens for version 0, defined as '**[A-Z]+'
    if ($self->{db_version} == 0) {
      $DB_NSPAM_MAGIC_TOKEN			= '**NSPAM';
      $DB_NHAM_MAGIC_TOKEN			= '**NHAM';
      $DB_NTOKENS_MAGIC_TOKEN			= '**NTOKENS';
      #$DB_OLDEST_TOKEN_AGE_MAGIC_TOKEN		= '**OLDESTAGE';
      #$DB_LAST_EXPIRE_MAGIC_TOKEN		= '**LASTEXPIRE';
      #$DB_SCANCOUNT_BASE_MAGIC_TOKEN		= '**SCANBASE';
      #$DB_RUNNING_EXPIRE_MAGIC_TOKEN		= '**RUNNINGEXPIRE';
    }
    else {
      $DB_NSPAM_MAGIC_TOKEN			= "\015\001\007\011\003NSPAM";
      $DB_NHAM_MAGIC_TOKEN			= "\015\001\007\011\003NHAM";
      $DB_NTOKENS_MAGIC_TOKEN			= "\015\001\007\011\003NTOKENS";
      #$DB_OLDEST_TOKEN_AGE_MAGIC_TOKEN		= "\015\001\007\011\003OLDESTAGE";
      #$DB_LAST_EXPIRE_MAGIC_TOKEN		= "\015\001\007\011\003LASTEXPIRE";
      #$DB_SCANCOUNT_BASE_MAGIC_TOKEN		= "\015\001\007\011\003SCANBASE";
      #$DB_RUNNING_EXPIRE_MAGIC_TOKEN		= "\015\001\007\011\003RUNNINGEXPIRE";
    }

    # remember when we started ...
    my $started = time;
    my $newatime = $started;

    # use O_EXCL to avoid races (bonus paranoia, since we should be locked
    # anyway)
    my %new_toks;
    $umask = umask 0;

    $res = tie %new_toks, $self->DBM_MODULE, "${name}.new",
             O_RDWR|O_CREAT|O_EXCL,
             (oct($main->{conf}->{bayes_file_mode}) & 0666);
    umask $umask;
    return 0 unless $res;
    undef $res;

    # add the magic tokens to the new db.
    $new_toks{$NSPAM_MAGIC_TOKEN} = $self->{db_toks}->{$DB_NSPAM_MAGIC_TOKEN};
    $new_toks{$NHAM_MAGIC_TOKEN} = $self->{db_toks}->{$DB_NHAM_MAGIC_TOKEN};
    $new_toks{$NTOKENS_MAGIC_TOKEN} = $self->{db_toks}->{$DB_NTOKENS_MAGIC_TOKEN};
    $new_toks{$DB_VERSION_MAGIC_TOKEN} = 2; # we're now a DB version 2 file
    $new_toks{$OLDEST_TOKEN_AGE_MAGIC_TOKEN} = $newatime;
    $new_toks{$LAST_EXPIRE_MAGIC_TOKEN} = $newatime;
    $new_toks{$NEWEST_TOKEN_AGE_MAGIC_TOKEN} = $newatime;
    $new_toks{$LAST_JOURNAL_SYNC_MAGIC_TOKEN} = $newatime;
    $new_toks{$LAST_ATIME_DELTA_MAGIC_TOKEN} = 0;
    $new_toks{$LAST_EXPIRE_REDUCE_MAGIC_TOKEN} = 0;

    # deal with the data tokens
    my ($tok, $packed);
    my $count = 0;
    while (($tok, $packed) = each %{$self->{db_toks}}) {
      next if ($tok =~ /^(?:\*\*[A-Z]+$|\015\001\007\011\003)/); # skip magic tokens

      my ($ts, $th, $atime) = $self->tok_unpack($packed);
      $new_toks{$tok} = $self->tok_pack($ts, $th, $newatime);

      # Refresh the lock every so often...
      if (($count++ % 1000) == 0) {
        $self->set_running_expire_tok();
      }
    }


    # now untie so we can do renames
    untie %{$self->{db_toks}};
    untie %new_toks;

    # This is the critical phase (moving files around), so don't allow
    # it to be interrupted.
    local $SIG{'INT'} = 'IGNORE';
    local $SIG{'TERM'} = 'IGNORE';
    local $SIG{'HUP'} = 'IGNORE' if !am_running_on_windows();

    # older versions used scancount, so kill the stupid little file ...
    my $msgc = $path.'_msgcount';
    if (-f $msgc) {
      dbg("bayes: old msgcount file found, removing");
      if (!unlink $msgc) {
        warn "bayes: couldn't remove $msgc: $!";
      }
    }

    # now rename in the new one.  Try several extensions
    for my $ext ($self->DB_EXTENSIONS) {
      my $newf = $name.'.new'.$ext;
      my $oldf = $name.$ext;
      next unless (-f $newf);
      if (!rename ($newf, $oldf)) {
        warn "bayes: rename $newf to $oldf failed: $!\n";
        return 0;
      }
    }

    # re-tie to the new db in read-write mode ...
    $umask = umask 0;
    # Bug 6901, [rt.cpan.org #83060]
    untie %{$self->{db_toks}};  # has no effect if the variable is not tied
    $res = tie %{$self->{db_toks}}, $self->DBM_MODULE, $name, O_RDWR|O_CREAT,
	 (oct($main->{conf}->{bayes_file_mode}) & 0666);
    umask $umask;
    return 0 unless $res;
    undef $res;

    dbg("bayes: upgraded database format from v%s to v2 in %d seconds",
        $self->{db_version}, time - $started);
    $self->{db_version} = 2; # need this for other functions which check
  }

  # Version 3 of the database converts all existing tokens to SHA1 hashes
  if ($self->{db_version} == 2) {
    dbg("bayes: upgrading database format from v%s to v3", $self->{db_version});
    $self->set_running_expire_tok();

    my $DB_NSPAM_MAGIC_TOKEN		  = "\015\001\007\011\003NSPAM";
    my $DB_NHAM_MAGIC_TOKEN		  = "\015\001\007\011\003NHAM";
    my $DB_NTOKENS_MAGIC_TOKEN		  = "\015\001\007\011\003NTOKENS";
    my $DB_OLDEST_TOKEN_AGE_MAGIC_TOKEN	  = "\015\001\007\011\003OLDESTAGE";
    my $DB_LAST_EXPIRE_MAGIC_TOKEN	  = "\015\001\007\011\003LASTEXPIRE";
    my $DB_NEWEST_TOKEN_AGE_MAGIC_TOKEN	  = "\015\001\007\011\003NEWESTAGE";
    my $DB_LAST_JOURNAL_SYNC_MAGIC_TOKEN  = "\015\001\007\011\003LASTJOURNALSYNC";
    my $DB_LAST_ATIME_DELTA_MAGIC_TOKEN	  = "\015\001\007\011\003LASTATIMEDELTA";
    my $DB_LAST_EXPIRE_REDUCE_MAGIC_TOKEN = "\015\001\007\011\003LASTEXPIREREDUCE";

    # remember when we started ...
    my $started = time;

    # use O_EXCL to avoid races (bonus paranoia, since we should be locked
    # anyway)
    my %new_toks;
    $umask = umask 0;
    $res = tie %new_toks, $self->DBM_MODULE, "${name}.new", O_RDWR|O_CREAT|O_EXCL,
          (oct($main->{conf}->{bayes_file_mode}) & 0666);
    umask $umask;
    return 0 unless $res;
    undef $res;

    # add the magic tokens to the new db.
    $new_toks{$NSPAM_MAGIC_TOKEN} = $self->{db_toks}->{$DB_NSPAM_MAGIC_TOKEN};
    $new_toks{$NHAM_MAGIC_TOKEN} = $self->{db_toks}->{$DB_NHAM_MAGIC_TOKEN};
    $new_toks{$NTOKENS_MAGIC_TOKEN} = $self->{db_toks}->{$DB_NTOKENS_MAGIC_TOKEN};
    $new_toks{$DB_VERSION_MAGIC_TOKEN} = 3; # we're now a DB version 3 file
    $new_toks{$OLDEST_TOKEN_AGE_MAGIC_TOKEN} = $self->{db_toks}->{$DB_OLDEST_TOKEN_AGE_MAGIC_TOKEN};
    $new_toks{$LAST_EXPIRE_MAGIC_TOKEN} = $self->{db_toks}->{$DB_LAST_EXPIRE_MAGIC_TOKEN};
    $new_toks{$NEWEST_TOKEN_AGE_MAGIC_TOKEN} = $self->{db_toks}->{$DB_NEWEST_TOKEN_AGE_MAGIC_TOKEN};
    $new_toks{$LAST_JOURNAL_SYNC_MAGIC_TOKEN} = $self->{db_toks}->{$DB_LAST_JOURNAL_SYNC_MAGIC_TOKEN};
    $new_toks{$LAST_ATIME_DELTA_MAGIC_TOKEN} = $self->{db_toks}->{$DB_LAST_ATIME_DELTA_MAGIC_TOKEN};
    $new_toks{$LAST_EXPIRE_REDUCE_MAGIC_TOKEN} =$self->{db_toks}->{$DB_LAST_EXPIRE_REDUCE_MAGIC_TOKEN};

    # deal with the data tokens
    my $count = 0;
    while (my ($tok, $packed) = each %{$self->{db_toks}}) {
      next if ($tok =~ /^\015\001\007\011\003/); # skip magic tokens
      my $tok_hash = substr(sha1($tok), -5);
      $new_toks{$tok_hash} = $packed;

      # Refresh the lock every so often...
      if (($count++ % 1000) == 0) {
        $self->set_running_expire_tok();
      }
    }

    # now untie so we can do renames
    untie %{$self->{db_toks}};
    untie %new_toks;

    # This is the critical phase (moving files around), so don't allow
    # it to be interrupted.
    local $SIG{'INT'} = 'IGNORE';
    local $SIG{'TERM'} = 'IGNORE';
    local $SIG{'HUP'} = 'IGNORE' if !am_running_on_windows();

    # now rename in the new one.  Try several extensions
    for my $ext ($self->DB_EXTENSIONS) {
      my $newf = $name.'.new'.$ext;
      my $oldf = $name.$ext;
      next unless (-f $newf);
      if (!rename($newf, $oldf)) {
        warn "bayes: rename $newf to $oldf failed: $!\n";
        return 0;
      }
    }

    # re-tie to the new db in read-write mode ...
    $umask = umask 0;
    # Bug 6901, [rt.cpan.org #83060]
    untie %{$self->{db_toks}};  # has no effect if the variable is not tied
    $res = tie %{$self->{db_toks}}, $self->DBM_MODULE, $name, O_RDWR|O_CREAT,
	 (oct ($main->{conf}->{bayes_file_mode}) & 0666);
    umask $umask;
    return 0 unless $res;
    undef $res;

    dbg("bayes: upgraded database format from v%s to v3 in %d seconds",
        $self->{db_version}, time - $started);

    $self->{db_version} = 3; # need this for other functions which check
  }

  # if ($self->{db_version} == 3) {
  #   ...
  #   $self->{db_version} = 4; # need this for other functions which check
  # }
  # ... and so on.

  return 1;
}