in t/bayessql.t [126:456]
sub run_bayes {
my $testuser = 'tstusr.'.$$.'.'.time();
tstprefs ("
$dbconfig
bayes_sql_override_username $testuser
loadplugin validuserplugin ../../../data/validuserplugin.pm
bayes_sql_username_authorized 1
");
my $sa = create_saobj();
$sa->init();
ok($sa);
my $learner = $sa->call_plugins("learner_get_implementation");
ok($sa->{bayes_scanner} && $learner);
ok($learner->{store}->tie_db_writable());
# This bit breaks abstraction a bit, the userid is an implementation detail,
# but is necessary to perform some of the tests. Perhaps in the future we
# can add some sort of official API for this sort of thing.
my $testuserid = $learner->{store}->{_userid};
ok(defined($testuserid));
ok($learner->{store}->clear_database());
ok(database_clear_p($testuser, $testuserid));
$sa->finish_learner();
undef $sa;
sa_t_init("bayessql");
tstprefs ("
$dbconfig
bayes_sql_override_username iwillfail
loadplugin validuserplugin ../../../data/validuserplugin.pm
bayes_sql_username_authorized 1
");
$sa = create_saobj();
$sa->init();
ok($sa);
$learner = $sa->call_plugins("learner_get_implementation");
ok($sa->{bayes_scanner});
ok(!$learner->{store}->tie_db_writable());
$sa->finish_learner();
undef $sa;
sa_t_init("bayessql");
tstprefs ("
$dbconfig
bayes_sql_override_username $testuser
");
$sa = create_saobj();
$sa->init();
ok($sa);
$learner = $sa->call_plugins("learner_get_implementation");
ok($sa->{bayes_scanner});
ok(!$sa->{bayes_scanner}->is_scan_available());
open(MAIL,"< data/spam/001");
my $raw_message = do {
local $/;
<MAIL>;
};
close(MAIL);
ok($raw_message);
my @msg;
foreach my $line (split(/^/m,$raw_message)) {
$line =~ s/\r$//;
push(@msg, $line);
}
my $mail = $sa->parse( \@msg );
ok($mail);
my $body = $learner->get_body_from_msg($mail);
ok($body);
my $toks = $learner->tokenize($mail, $body);
ok(scalar(keys %{$toks}) > 0);
my $msgid = $mail->generate_msgid();
my $msgid_hdr = $mail->get_msgid();
# $msgid is the generated hash messageid
# $msgid_hdr is the Message-Id header
ok($msgid eq '71f849915d7e469ddc1890cd8175f6876843f99e@sa_generated');
ok($msgid_hdr eq '9PS291LhupY');
ok($learner->{store}->tie_db_writable());
ok(!$learner->{store}->seen_get($msgid));
$learner->{store}->untie_db();
ok($sa->{bayes_scanner}->learn(1, $mail));
ok(!$sa->{bayes_scanner}->learn(1, $mail));
ok($learner->{store}->tie_db_writable());
ok($learner->{store}->seen_get($msgid) eq 's');
$learner->{store}->untie_db();
ok($learner->{store}->tie_db_writable());
my $tokerror = 0;
foreach my $tok (keys %{$toks}) {
my ($spam, $ham, $atime) = $learner->{store}->tok_get($tok);
if ($spam == 0 || $ham > 0) {
$tokerror = 1;
}
}
ok(!$tokerror);
my $tokens = $learner->{store}->tok_get_all(keys %{$toks});
ok($tokens);
$tokerror = 0;
foreach my $tok (@{$tokens}) {
my ($token, $tok_spam, $tok_ham, $atime) = @{$tok};
if ($tok_spam == 0 || $tok_ham > 0) {
$tokerror = 1;
}
}
ok(!$tokerror);
$learner->{store}->untie_db();
ok($sa->{bayes_scanner}->learn(0, $mail));
ok($learner->{store}->tie_db_writable());
ok($learner->{store}->seen_get($msgid) eq 'h');
$learner->{store}->untie_db();
ok($learner->{store}->tie_db_writable());
$tokerror = 0;
foreach my $tok (keys %{$toks}) {
my ($spam, $ham, $atime) = $learner->{store}->tok_get($tok);
if ($spam > 0 || $ham == 0) {
$tokerror = 1;
}
}
ok(!$tokerror);
$learner->{store}->untie_db();
ok($sa->{bayes_scanner}->forget($mail));
ok($learner->{store}->tie_db_writable());
ok(!$learner->{store}->seen_get($msgid));
$learner->{store}->untie_db();
# This bit breaks abstraction a bit, the userid is an implementation detail,
# but is necessary to perform some of the tests. Perhaps in the future we
# can add some sort of official API for this sort of thing.
$testuserid = $learner->{store}->{_userid};
ok(defined($testuserid));
ok($learner->{store}->clear_database());
ok(database_clear_p($testuser, $testuserid));
$sa->finish_learner();
undef $sa;
sa_t_init("bayessql"); # this wipes out what is there and begins anew
# make sure we learn to a journal
tstprefs ("
$dbconfig
bayes_min_spam_num 10
bayes_min_ham_num 10
bayes_sql_override_username $testuser
");
# we get to bastardize the existing pattern matching code here. It lets us provide
# our own checking callback and keep using the existing ok_all_patterns call
%patterns = ( 1 => 'Acted on message' );
$wanted_examined = count_files("data/spam");
ok(salearnrun("--spam data/spam", \&check_examined));
ok_all_patterns();
$wanted_examined = count_files("data/nice");
ok(salearnrun("--ham data/nice", \&check_examined));
ok_all_patterns();
$wanted_examined = count_files("data/welcomelists");
ok(salearnrun("--ham data/welcomelists", \&check_examined));
ok_all_patterns();
$wanted_examined = 3;
ok(salearnrun("--ham --mbox data/nice.mbox", \&check_examined));
ok_all_patterns();
$wanted_examined = 3;
ok(salearnrun("--ham --mbox < data/nice.mbox", \&check_examined));
ok_all_patterns();
$wanted_examined = 3;
ok(salearnrun("--forget --mbox data/nice.mbox", \&check_examined));
ok_all_patterns();
%patterns = ( 'non-token data: bayes db version' => 'db version' );
ok(salearnrun("--dump magic", \&patterns_run_cb));
ok_all_patterns();
use constant SCAN_USING_PERL_CODE_TEST => 1;
# jm: off! not working for some reason. Mind you, this is
# not a supported way to call these APIs! so no biggie
if (SCAN_USING_PERL_CODE_TEST) {
$sa = create_saobj();
$sa->init();
$learner = $sa->call_plugins("learner_get_implementation");
open(MAIL,"< ../sample-nonspam.txt");
$raw_message = do {
local $/;
<MAIL>;
};
close(MAIL);
@msg = ();
foreach my $line (split(/^/m,$raw_message)) {
$line =~ s/\r$//;
push(@msg, $line);
}
$mail = $sa->parse( \@msg );
$body = $learner->get_body_from_msg($mail);
my $msgstatus = Mail::SpamAssassin::PerMsgStatus->new($sa, $mail);
ok($msgstatus);
my $score = $learner->scan($msgstatus, $mail, $body);
# Pretty much we can't count on the data returned with such little training
# so just make sure that the score wasn't equal to .5 which is the default
# return value.
print "\treturned score: $score\n";
ok($score =~ /\d/ && $score <= 1.0 && $score != .5);
open(MAIL,"< ../sample-spam.txt");
$raw_message = do {
local $/;
<MAIL>;
};
close(MAIL);
@msg = ();
foreach my $line (split(/^/m,$raw_message)) {
$line =~ s/\r$//;
push(@msg, $line);
}
$mail = $sa->parse( \@msg );
$body = $learner->get_body_from_msg($mail);
$msgstatus = Mail::SpamAssassin::PerMsgStatus->new($sa, $mail);
$score = $learner->scan($msgstatus, $mail, $body);
# Pretty much we can't count on the data returned with such little training
# so just make sure that the score wasn't equal to .5 which is the default
# return value.
print "\treturned score: $score\n";
ok($score =~ /\d/ && $score <= 1.0 && $score != .5);
}
# This bit breaks abstraction a bit, the userid is an implementation detail,
# but is necessary to perform some of the tests. Perhaps in the future we
# can add some sort of official API for this sort of thing.
$testuserid = $learner->{store}->{_userid};
ok(defined($testuserid));
ok($learner->{store}->clear_database());
ok(database_clear_p($testuser, $testuserid));
$sa->finish_learner();
}