in lib/Mail/SpamAssassin/ArchiveIterator.pm [1088:1221]
sub _scan_mbx {
my ($self, $class, $folder, $bkfunc) = @_;
my (@files, $fp);
my $stat_errn = stat($folder) ? 0 : 0+$!;
if ($stat_errn == ENOENT) {
# no longer there?
}
elsif ($stat_errn != 0) {
warn "archive-iterator: no access to $folder: $!";
}
elsif (-f _) {
push(@files, $folder);
}
elsif (-d _) {
# got passed a directory full of mbx folders.
$folder =~ s/\/\s*$//; # remove trailing slash, if there is one
if (!opendir(DIR, $folder)) {
warn "archive-iterator: can't open '$folder' dir: $!\n";
return;
}
while ($_ = readdir(DIR)) {
next if $_ eq '.' || $_ eq '..' || !/^[^\.]\S*$/;
# hmmm, ignores folders with spaces in the name???
$stat_errn = stat("$folder/$_") ? 0 : 0+$!;
if ($stat_errn == ENOENT) {
# no longer there?
}
elsif ($stat_errn != 0) {
warn "archive-iterator: no access to $folder/$_: $!";
}
elsif (-f _) {
push(@files, "$folder/$_");
}
}
closedir(DIR) or die "error closing directory $folder: $!";
}
else {
warn "archive-iterator: $folder is not a plain file or directory: $!";
}
foreach my $file (@files) {
$self->_bump_scan_progress();
if ($folder =~ /\.(?:gz|bz2|xz|lz[o4]?)$/i) {
warn "archive-iterator: compressed mbx folders are not supported at this time\n";
next;
}
my @s = stat($file);
@s or warn "archive-iterator: no access to $file: $!";
next unless $self->_message_is_useful_by_file_modtime($s[9]);
my $info = {};
my $count;
$self->_create_cache('mbx', $file);
if ($self->{opt_cache}) {
if ($count = $AICache->count()) {
$info = $AICache->check();
}
}
unless ($count) {
my $fh = $self->_mail_open($file);
next unless $fh;
# check the mailbox is in mbx format
$! = 0; $fp = <$fh>;
defined $fp || $!==0 or
$!==EBADF ? dbg("archive-iterator: error reading: $!")
: die "error reading: $!";
if (!defined $fp) {
die "archive-iterator: error: mailbox not in mbx format - empty!\n";
} elsif ($fp !~ /\*mbx\*/) {
die "archive-iterator: error: mailbox not in mbx format!\n";
}
# skip mbx headers to the first email...
seek($fh,2048,0) or die "cannot reposition file to 2048: $!";
for ($!=0; <$fh>; $!=0) {
if ($_ =~ MBX_SEPARATOR) {
my $offset = tell $fh;
$offset >= 0 or die "cannot obtain file position: $!";
my $size = $2;
# gather up the headers...
my $header = '';
for ($!=0; <$fh>; $!=0) {
last if (/^\015?$/s);
$header .= $_;
}
defined $_ || $!==0 or
$!==EBADF ? dbg("archive-iterator: error reading: $!")
: die "error reading: $!";
if (!($self->{opt_skip_empty_messages} && $header eq '')) {
$self->_bump_scan_progress();
$info->{$offset} = Mail::SpamAssassin::Util::receive_date($header);
}
# go onto the next message
seek($fh, $offset + $size, 0)
or die "cannot reposition file to $offset + $size: $!";
}
else {
die "archive-iterator: error: failure to read message body!\n";
}
}
defined $_ || $!==0 or
$!==EBADF ? dbg("archive-iterator: error reading: $!")
: die "error reading: $!";
close $fh or die "error closing input file: $!";
}
while(my($k,$v) = each %{$info}) {
if (defined $AICache && !$count) {
$AICache->update($k, $v);
}
if ($self->{determine_receive_date}) {
next if !$self->_message_is_useful_by_date($v);
}
next if !$self->_scanprob_says_scan();
&{$bkfunc}($self, $v, $class, 'b', "$file.$k");
}
if (defined $AICache) {
$AICache = $AICache->finish();
}
}
}