t/bayessql.t (343 lines of code) (raw):

#!/usr/bin/perl -T use File::Find qw(find); use lib '.'; use lib 't'; use SATest; sa_t_init("bayessql"); use Test::More; use Mail::SpamAssassin; use constant HAS_DBI => eval { require DBI; }; # for our cleanup stuff use constant SQLITE => eval { require DBD::SQLite; DBD::SQLite->VERSION(1.59_01); }; use constant SQL => conf_bool('run_bayes_sql_tests'); plan skip_all => "Long running tests disabled" unless conf_bool('run_long_tests'); plan skip_all => "DBI is unavailable on this system" unless (HAS_DBI); plan skip_all => "Bayes SQL tests are disabled or DBD::SQLite not found" unless (SQLITE || SQL); my $tests = 0; $tests += 59 if (SQLITE); $tests += 59 if (SQL); plan tests => $tests; diag "Note: If there is a failure it may be due to an incorrect SQL configuration." if (SQL); my ($dbconfig, $dbdsn, $dbusername, $dbpassword); if (SQLITE) { my $dbdir = tempdir("bayessql.XXXXXX", DIR => "log"); die "FATAL: failed to create dbdir: $!" unless -d $dbdir; # Bug 8033 - undocumented extension to dsn format we added for this test $dbdsn = "dbi:SQLite:dbname=$dbdir/bayes.db;synchronous=OFF"; $dbusername = ""; $dbpassword = ""; my $dbh = DBI->connect($dbdsn,$dbusername,$dbpassword); $dbh->do("PRAGMA synchronous = OFF"); $dbh->do(" CREATE TABLE bayes_expire ( id int(11) NOT NULL default '0', runtime int(11) NOT NULL default '0', PRIMARY KEY (id) ); ") or die "Failed to create $dbfile"; $dbh->do(" CREATE TABLE bayes_global_vars ( variable varchar(30) NOT NULL default '', value varchar(200) NOT NULL default '', PRIMARY KEY (variable) ); ") or die "Failed to create $dbfile"; $dbh->do(" INSERT INTO bayes_global_vars VALUES ('VERSION','3'); ") or die "Failed to create $dbfile"; $dbh->do(" CREATE TABLE bayes_seen ( id int(11) NOT NULL default '0', msgid varchar(200) NOT NULL default '' COLLATE binary, flag char(1) NOT NULL default '', PRIMARY KEY (id,msgid) ); ") or die "Failed to create $dbfile"; $dbh->do(" CREATE TABLE bayes_token ( id int(11) NOT NULL default '0', token char(5) NOT NULL default '' COLLATE binary, spam_count int(11) NOT NULL default '0', ham_count int(11) NOT NULL default '0', atime int(11) NOT NULL default '0', PRIMARY KEY (id, token) ); ") or die "Failed to create $dbfile"; $dbh->do(" CREATE INDEX idx_id_atime ON bayes_token (id, atime); ") or die "Failed to create $dbfile"; $dbh->do(" CREATE TABLE bayes_vars ( id INTEGER PRIMARY KEY AUTOINCREMENT, username varchar(200) NOT NULL default '', spam_count int(11) NOT NULL default '0', ham_count int(11) NOT NULL default '0', token_count int(11) NOT NULL default '0', last_expire int(11) NOT NULL default '0', last_atime_delta int(11) NOT NULL default '0', last_expire_reduce int(11) NOT NULL default '0', oldest_token_age int(11) NOT NULL default '2147483647', newest_token_age int(11) NOT NULL default '0' ); ") or die "Failed to create $dbfile"; $dbh->do(" CREATE UNIQUE INDEX idx_username ON bayes_vars (username); ") or die "Failed to create $dbfile"; $dbh->disconnect; undef $dbh; $dbconfig = " bayes_store_module Mail::SpamAssassin::BayesStore::SQL bayes_sql_dsn $dbdsn "; run_bayes(); rmtree($dbdir); } if (SQL) { $dbdsn = conf('bayes_sql_dsn'); $dbusername = conf('bayes_sql_username'); $dbpassword = conf('bayes_sql_password'); $dbconfig = ''; foreach my $setting (qw( bayes_store_module bayes_sql_dsn bayes_sql_username bayes_sql_password )) { my $val = conf($setting); $dbconfig .= "$setting $val\n" if $val; } run_bayes(); } #--------------------------------------------------------------------------- sub run_bayes { my $testuser = 'tstusr.'.$$.'.'.time(); tstprefs (" $dbconfig bayes_sql_override_username $testuser loadplugin validuserplugin ../../../data/validuserplugin.pm bayes_sql_username_authorized 1 "); my $sa = create_saobj(); $sa->init(); ok($sa); my $learner = $sa->call_plugins("learner_get_implementation"); ok($sa->{bayes_scanner} && $learner); ok($learner->{store}->tie_db_writable()); # This bit breaks abstraction a bit, the userid is an implementation detail, # but is necessary to perform some of the tests. Perhaps in the future we # can add some sort of official API for this sort of thing. my $testuserid = $learner->{store}->{_userid}; ok(defined($testuserid)); ok($learner->{store}->clear_database()); ok(database_clear_p($testuser, $testuserid)); $sa->finish_learner(); undef $sa; sa_t_init("bayessql"); tstprefs (" $dbconfig bayes_sql_override_username iwillfail loadplugin validuserplugin ../../../data/validuserplugin.pm bayes_sql_username_authorized 1 "); $sa = create_saobj(); $sa->init(); ok($sa); $learner = $sa->call_plugins("learner_get_implementation"); ok($sa->{bayes_scanner}); ok(!$learner->{store}->tie_db_writable()); $sa->finish_learner(); undef $sa; sa_t_init("bayessql"); tstprefs (" $dbconfig bayes_sql_override_username $testuser "); $sa = create_saobj(); $sa->init(); ok($sa); $learner = $sa->call_plugins("learner_get_implementation"); ok($sa->{bayes_scanner}); ok(!$sa->{bayes_scanner}->is_scan_available()); open(MAIL,"< data/spam/001"); my $raw_message = do { local $/; <MAIL>; }; close(MAIL); ok($raw_message); my @msg; foreach my $line (split(/^/m,$raw_message)) { $line =~ s/\r$//; push(@msg, $line); } my $mail = $sa->parse( \@msg ); ok($mail); my $body = $learner->get_body_from_msg($mail); ok($body); my $toks = $learner->tokenize($mail, $body); ok(scalar(keys %{$toks}) > 0); my $msgid = $mail->generate_msgid(); my $msgid_hdr = $mail->get_msgid(); # $msgid is the generated hash messageid # $msgid_hdr is the Message-Id header ok($msgid eq '71f849915d7e469ddc1890cd8175f6876843f99e@sa_generated'); ok($msgid_hdr eq '9PS291LhupY'); ok($learner->{store}->tie_db_writable()); ok(!$learner->{store}->seen_get($msgid)); $learner->{store}->untie_db(); ok($sa->{bayes_scanner}->learn(1, $mail)); ok(!$sa->{bayes_scanner}->learn(1, $mail)); ok($learner->{store}->tie_db_writable()); ok($learner->{store}->seen_get($msgid) eq 's'); $learner->{store}->untie_db(); ok($learner->{store}->tie_db_writable()); my $tokerror = 0; foreach my $tok (keys %{$toks}) { my ($spam, $ham, $atime) = $learner->{store}->tok_get($tok); if ($spam == 0 || $ham > 0) { $tokerror = 1; } } ok(!$tokerror); my $tokens = $learner->{store}->tok_get_all(keys %{$toks}); ok($tokens); $tokerror = 0; foreach my $tok (@{$tokens}) { my ($token, $tok_spam, $tok_ham, $atime) = @{$tok}; if ($tok_spam == 0 || $tok_ham > 0) { $tokerror = 1; } } ok(!$tokerror); $learner->{store}->untie_db(); ok($sa->{bayes_scanner}->learn(0, $mail)); ok($learner->{store}->tie_db_writable()); ok($learner->{store}->seen_get($msgid) eq 'h'); $learner->{store}->untie_db(); ok($learner->{store}->tie_db_writable()); $tokerror = 0; foreach my $tok (keys %{$toks}) { my ($spam, $ham, $atime) = $learner->{store}->tok_get($tok); if ($spam > 0 || $ham == 0) { $tokerror = 1; } } ok(!$tokerror); $learner->{store}->untie_db(); ok($sa->{bayes_scanner}->forget($mail)); ok($learner->{store}->tie_db_writable()); ok(!$learner->{store}->seen_get($msgid)); $learner->{store}->untie_db(); # This bit breaks abstraction a bit, the userid is an implementation detail, # but is necessary to perform some of the tests. Perhaps in the future we # can add some sort of official API for this sort of thing. $testuserid = $learner->{store}->{_userid}; ok(defined($testuserid)); ok($learner->{store}->clear_database()); ok(database_clear_p($testuser, $testuserid)); $sa->finish_learner(); undef $sa; sa_t_init("bayessql"); # this wipes out what is there and begins anew # make sure we learn to a journal tstprefs (" $dbconfig bayes_min_spam_num 10 bayes_min_ham_num 10 bayes_sql_override_username $testuser "); # we get to bastardize the existing pattern matching code here. It lets us provide # our own checking callback and keep using the existing ok_all_patterns call %patterns = ( 1 => 'Acted on message' ); $wanted_examined = count_files("data/spam"); ok(salearnrun("--spam data/spam", \&check_examined)); ok_all_patterns(); $wanted_examined = count_files("data/nice"); ok(salearnrun("--ham data/nice", \&check_examined)); ok_all_patterns(); $wanted_examined = count_files("data/welcomelists"); ok(salearnrun("--ham data/welcomelists", \&check_examined)); ok_all_patterns(); $wanted_examined = 3; ok(salearnrun("--ham --mbox data/nice.mbox", \&check_examined)); ok_all_patterns(); $wanted_examined = 3; ok(salearnrun("--ham --mbox < data/nice.mbox", \&check_examined)); ok_all_patterns(); $wanted_examined = 3; ok(salearnrun("--forget --mbox data/nice.mbox", \&check_examined)); ok_all_patterns(); %patterns = ( 'non-token data: bayes db version' => 'db version' ); ok(salearnrun("--dump magic", \&patterns_run_cb)); ok_all_patterns(); use constant SCAN_USING_PERL_CODE_TEST => 1; # jm: off! not working for some reason. Mind you, this is # not a supported way to call these APIs! so no biggie if (SCAN_USING_PERL_CODE_TEST) { $sa = create_saobj(); $sa->init(); $learner = $sa->call_plugins("learner_get_implementation"); open(MAIL,"< ../sample-nonspam.txt"); $raw_message = do { local $/; <MAIL>; }; close(MAIL); @msg = (); foreach my $line (split(/^/m,$raw_message)) { $line =~ s/\r$//; push(@msg, $line); } $mail = $sa->parse( \@msg ); $body = $learner->get_body_from_msg($mail); my $msgstatus = Mail::SpamAssassin::PerMsgStatus->new($sa, $mail); ok($msgstatus); my $score = $learner->scan($msgstatus, $mail, $body); # Pretty much we can't count on the data returned with such little training # so just make sure that the score wasn't equal to .5 which is the default # return value. print "\treturned score: $score\n"; ok($score =~ /\d/ && $score <= 1.0 && $score != .5); open(MAIL,"< ../sample-spam.txt"); $raw_message = do { local $/; <MAIL>; }; close(MAIL); @msg = (); foreach my $line (split(/^/m,$raw_message)) { $line =~ s/\r$//; push(@msg, $line); } $mail = $sa->parse( \@msg ); $body = $learner->get_body_from_msg($mail); $msgstatus = Mail::SpamAssassin::PerMsgStatus->new($sa, $mail); $score = $learner->scan($msgstatus, $mail, $body); # Pretty much we can't count on the data returned with such little training # so just make sure that the score wasn't equal to .5 which is the default # return value. print "\treturned score: $score\n"; ok($score =~ /\d/ && $score <= 1.0 && $score != .5); } # This bit breaks abstraction a bit, the userid is an implementation detail, # but is necessary to perform some of the tests. Perhaps in the future we # can add some sort of official API for this sort of thing. $testuserid = $learner->{store}->{_userid}; ok(defined($testuserid)); ok($learner->{store}->clear_database()); ok(database_clear_p($testuser, $testuserid)); $sa->finish_learner(); } #--------------------------------------------------------------------------- sub check_examined { local ($_); my $string = shift; if (defined $string) { $_ = $string; } else { $_ = join ('', <IN>); } if ($_ =~ /(?:Forgot|Learned) tokens from \d+ message\(s\) \((\d+) message\(s\) examined\)/) { #print STDERR "examined $1 messages\n"; if (defined $wanted_examined && $wanted_examined == $1) { $found{'Acted on message'}++; } } } sub count_files { my $cnt = 0; find({wanted => sub { $cnt++ if -f $_; }, no_chdir => 1}, $_[0]); return $cnt; } # WARNING! Do not use this as an example, this breaks abstraction # and is here strictly to help the regression tests. sub database_clear_p { my ($username, $userid) = @_; my $dbh = DBI->connect($dbdsn,$dbusername,$dbpassword); if (!defined($dbh)) { return 0; } my @row_ary; my $sql = "SELECT count(*) from bayes_vars where username = ?"; @row_ary = $dbh->selectrow_array($sql, undef, $username); return 0 if ($row_ary[0] != 0); $sql = "SELECT count(*) from bayes_token where id = ?"; @row_ary = $dbh->selectrow_array($sql, undef, $userid); return 0 if ($row_ary[0] != 0); $sql = "SELECT count(*) from bayes_seen where id = ?"; @row_ary = $dbh->selectrow_array($sql, undef, $userid); return 0 if ($row_ary[0] != 0); $sql = "SELECT count(*) from bayes_expire where id = ?"; @row_ary = $dbh->selectrow_array($sql, undef, $userid); return 0 if ($row_ary[0] != 0); $dbh->disconnect(); return 1; }