in lib/Mail/SpamAssassin/Plugin/MIMEEval.pm [300:505]
sub _check_attachments {
my ($self, $pms) = @_;
# MIME status
my $where = -1; # -1 = start, 0 = nowhere, 1 = header, 2 = body
my $qp_bytes = 0; # total bytes in QP regions
my $qp_count = 0; # QP-encoded bytes in QP regions
my @part_bytes; # MIME part total bytes
my @part_type; # MIME part types
my $normal_chars = 0; # MIME text bytes that aren't encoded
my $unicode_chars = 0; # MIME text bytes that are unicode entities
# MIME header information
my $part = -1; # MIME part index
# indicate the scan has taken place
$pms->{mime_checked_attachments} = 1;
# results
# $pms->{mime_base64_blanks} = 0; # expensive to determine, no longer avail
$pms->{mime_base64_count} = 0;
$pms->{mime_base64_encoded_text} = 0;
# $pms->{mime_base64_illegal} = 0;
# $pms->{mime_base64_latin} = 0;
$pms->{mime_body_html_count} = 0;
$pms->{mime_body_text_count} = 0;
$pms->{mime_faraway_charset} = 0;
# $pms->{mime_html_no_charset} = 0;
$pms->{mime_missing_boundary} = 0;
$pms->{mime_multipart_alternative} = 0;
$pms->{mime_multipart_ratio} = 1.0;
$pms->{mime_qp_count} = 0;
# $pms->{mime_qp_illegal} = 0;
# $pms->{mime_qp_inline_no_charset} = 0;
$pms->{mime_qp_long_line} = 0;
$pms->{mime_qp_ratio} = 0;
$pms->{mime_ascii_text_illegal} = 0;
$pms->{mime_text_unicode_ratio} = 0;
# Get all parts ...
foreach my $p ($pms->{msg}->find_parts(qr/./)) {
# message headers
my ($ctype, $boundary, $charset, $name) = Mail::SpamAssassin::Util::parse_content_type($p->get_header("content-type"));
if ($ctype eq 'multipart/alternative') {
$pms->{mime_multipart_alternative} = 1;
}
my $cte = $p->get_header('Content-Transfer-Encoding') || '';
chomp($cte = defined($cte) ? lc $cte : "");
my $cd = $p->get_header('Content-Disposition') || '';
chomp($cd = defined($cd) ? lc $cd : "");
$charset = lc $charset if ($charset);
$name = lc $name if ($name);
$self->_check_mime_header($pms, $ctype, $cte, $cd, $charset, $name);
# If we're not in a leaf node in the tree, there will be no raw
# section, so skip it.
if (! $p->is_leaf()) {
next;
}
$part++;
$part_type[$part] = $ctype;
$part_bytes[$part] = 0 if index($cd, 'attachment') == -1;
my $cte_is_base64 = index($cte, 'base64') >= 0;
my $previous = '';
foreach (@{$p->raw()}) {
# if ($cte_is_base64) {
# if ($previous =~ /^\s*$/ && /^\s*$/) { # expensive, avoid!
# $pms->{mime_base64_blanks} = 1; # never used, don't bother
# }
# # MIME_BASE64_ILLEGAL: now a zero-hitter
# # if (m@[^A-Za-z0-9+/=\n]@ || /=[^=\s]/) {
# # $pms->{mime_base64_illegal} = 1;
# # }
# }
# if ($pms->{mime_html_no_charset} && $ctype eq 'text/html' && defined $charset) {
# $pms->{mime_html_no_charset} = 0;
# }
if ($pms->{mime_multipart_alternative} && index($cd, 'attachment') == -1 &&
($ctype eq 'text/plain' || $ctype eq 'text/html')) {
$part_bytes[$part] += length;
}
if ($where != 1 && $cte eq "quoted-printable" && index($_, 'SPAM: ') != 0) {
# RFC 5322: Each line SHOULD be no more than 78 characters,
# excluding the CRLF.
# RFC 2045: The Quoted-Printable encoding REQUIRES that
# encoded lines be no more than 76 characters long.
# Bug 5491: 6% of email classified as HAM by SA triggered the
# MIME_QP_LONG_LINE rule. Apple Mail can generate a QP-line
# that is 2 chars too long. Same goes for Outlook Web Access.
# lines include one trailing \n character
# if (length > 76+1) { # conforms to RFC 5322 and RFC 2045
if (length > 78+1) { # conforms to RFC 5322 only, not RFC 2045
$pms->{mime_qp_long_line} = 1;
}
$qp_bytes += length;
# MIME_QP_DEFICIENT: zero-hitter now
# check for illegal substrings (RFC 2045), hexadecimal values 7F-FF and
# control characters other than TAB, or CR and LF as parts of CRLF pairs
# if (!$pms->{mime_qp_illegal} && /[\x00-\x08\x0b\x0c\x0e-\x1f\x7f-\xff]/)
# {
# $pms->{mime_qp_illegal} = 1;
# }
# count excessive QP bytes
if (index($_, '=') >= 0) {
## no critic (Perlsecret)
# whoever wrote this next line is an evil hacker -- jm
my $qp = () = m/=(?:09|3[0-9ABCEF]|[2456][0-9A-F]|7[0-9A-E])/g;
## use critic
if ($qp) {
$qp_count += $qp;
# tabs and spaces at end of encoded line are okay. Also, multiple
# whitespace at the end of a line are OK, like ">=20=20=20=20=20=20".
my ($trailing) = m/((?:=09|=20)+)\s*$/g;
if ($trailing) {
$qp_count -= (length($trailing) / 3);
}
}
}
}
# if our charset is ASCII, this should only contain 7-bit characters
# except NUL or a free-standing CR. anything else is a violation of
# the definition of charset="us-ascii".
if ($ctype eq 'text/plain' && (!defined $charset || $charset eq 'us-ascii')) {
# no re "strict"; # since perl 5.21.8: Ranges of ASCII printables...
if (m/[\x00\x0d\x80-\xff]+/) {
if (would_log('dbg', 'eval')) {
my $str = $_;
$str =~ s/([\x00\x0d\x80-\xff]+)/'<' . unpack('H*', $1) . '>'/eg;
dbg("check: ascii_text_illegal: matches " . $str . "\n");
}
$pms->{mime_ascii_text_illegal}++;
}
}
# if we're text/plain, we should never see unicode escapes in this
# format, especially not for 7bit or 8bit.
if ($ctype eq 'text/plain' && ($cte eq '' || $cte eq '7bit' || $cte eq '8bit')) {
my ($text, $subs) = $_;
$subs = $text =~ s/&#x[0-9A-F]{4};//g;
$normal_chars += length($text);
$unicode_chars += $subs;
if ($subs && would_log('dbg', 'eval')) {
my $str = $_;
$str = substr($str, 0, 512) . '...' if (length($str) > 512);
dbg("check: abundant_unicode: " . $str . " (" . $subs . ")\n");
}
}
$previous = $_;
}
}
if ($qp_bytes) {
$pms->{mime_qp_ratio} = $qp_count / $qp_bytes;
$pms->{mime_qp_count} = $qp_count;
$pms->{mime_qp_bytes} = $qp_bytes;
}
if ($normal_chars) {
$pms->{mime_text_unicode_ratio} = $unicode_chars / $normal_chars;
}
if ($pms->{mime_multipart_alternative}) {
my $text;
my $html;
# bug 4207: we want the size of the last parts
for (my $i = $part; $i >= 0; $i--) {
next if !defined $part_bytes[$i];
if (!defined($html) && $part_type[$i] eq 'text/html') {
$html = $part_bytes[$i];
}
elsif (!defined($text) && $part_type[$i] eq 'text/plain') {
$text = $part_bytes[$i];
}
last if (defined($html) && defined($text));
}
if (defined($text) && defined($html) && $html > 0) {
$pms->{mime_multipart_ratio} = ($text / $html);
}
}
# Look to see if any multipart boundaries are not "balanced"
foreach my $val (values %{$pms->{msg}->{mime_boundary_state}}) {
if ($val != 0) {
$pms->{mime_missing_boundary} = 1;
last;
}
}
}