sub _scan_mbx()

in lib/Mail/SpamAssassin/ArchiveIterator.pm [1088:1221]


sub _scan_mbx {
  my ($self, $class, $folder, $bkfunc) = @_;
  my (@files, $fp);

  my $stat_errn = stat($folder) ? 0 : 0+$!;
  if ($stat_errn == ENOENT) {
    # no longer there?
  }
  elsif ($stat_errn != 0) {
    warn "archive-iterator: no access to $folder: $!";
  }
  elsif (-f _) {
    push(@files, $folder);
  }
  elsif (-d _) {
    # got passed a directory full of mbx folders.
    $folder =~ s/\/\s*$//; # remove trailing slash, if there is one
    if (!opendir(DIR, $folder)) {
      warn "archive-iterator: can't open '$folder' dir: $!\n";
      return;
    }
    while ($_ = readdir(DIR)) {
      next if $_ eq '.' || $_ eq '..' || !/^[^\.]\S*$/;
      # hmmm, ignores folders with spaces in the name???
      $stat_errn = stat("$folder/$_") ? 0 : 0+$!;
      if ($stat_errn == ENOENT) {
        # no longer there?
      }
      elsif ($stat_errn != 0) {
        warn "archive-iterator: no access to $folder/$_: $!";
      }
      elsif (-f _) {
	push(@files, "$folder/$_");
      }
    }
    closedir(DIR)  or die "error closing directory $folder: $!";
  }
  else {
    warn "archive-iterator: $folder is not a plain file or directory: $!";
  }

  foreach my $file (@files) {
    $self->_bump_scan_progress();

    if ($folder =~ /\.(?:gz|bz2|xz|lz[o4]?)$/i) {
      warn "archive-iterator: compressed mbx folders are not supported at this time\n";
      next;
    }

    my @s = stat($file);
    @s  or warn "archive-iterator: no access to $file: $!";
    next unless $self->_message_is_useful_by_file_modtime($s[9]);

    my $info = {};
    my $count;

    $self->_create_cache('mbx', $file);

    if ($self->{opt_cache}) {
      if ($count = $AICache->count()) {
        $info = $AICache->check();
      }
    }

    unless ($count) {
      my $fh = $self->_mail_open($file);
      next unless $fh;

      # check the mailbox is in mbx format
      $! = 0; $fp = <$fh>;
      defined $fp || $!==0  or
        $!==EBADF ? dbg("archive-iterator: error reading: $!")
                  : die "error reading: $!";
      if (!defined $fp) {
        die "archive-iterator: error: mailbox not in mbx format - empty!\n";
      } elsif ($fp !~ /\*mbx\*/) {
        die "archive-iterator: error: mailbox not in mbx format!\n";
      }

      # skip mbx headers to the first email...
      seek($fh,2048,0)  or die "cannot reposition file to 2048: $!";

      for ($!=0; <$fh>; $!=0) {
        if ($_ =~ MBX_SEPARATOR) {
	  my $offset = tell $fh;
          $offset >= 0  or die "cannot obtain file position: $!";
	  my $size = $2;

	  # gather up the headers...
	  my $header = '';
          for ($!=0; <$fh>; $!=0) {
            last if (/^\015?$/s);
	    $header .= $_;
	  }
          defined $_ || $!==0  or
            $!==EBADF ? dbg("archive-iterator: error reading: $!")
                      : die "error reading: $!";
          if (!($self->{opt_skip_empty_messages} && $header eq '')) {
            $self->_bump_scan_progress();
            $info->{$offset} = Mail::SpamAssassin::Util::receive_date($header);
          }

	  # go onto the next message
	  seek($fh, $offset + $size, 0)
            or die "cannot reposition file to $offset + $size: $!";
	}
        else {
	  die "archive-iterator: error: failure to read message body!\n";
        }
      }
      defined $_ || $!==0  or
        $!==EBADF ? dbg("archive-iterator: error reading: $!")
                  : die "error reading: $!";
      close $fh  or die "error closing input file: $!";
    }

    while(my($k,$v) = each %{$info}) {
      if (defined $AICache && !$count) {
	$AICache->update($k, $v);
      }

      if ($self->{determine_receive_date}) {
        next if !$self->_message_is_useful_by_date($v);
      }
      next if !$self->_scanprob_says_scan();

      &{$bkfunc}($self, $v, $class, 'b', "$file.$k");
    }

    if (defined $AICache) {
      $AICache = $AICache->finish();
    }
  }
}