in lib/Mail/SpamAssassin/HTML.pm [844:920]
sub html_tests {
my ($self, $tag, $attr, $num) = @_;
if ($tag eq "font" && exists $attr->{face}) {
# Fixes from Bug 5956, 7312
# Examples seen in ham:
# "Tahoma", Verdana, Arial, sans-serif
# 'Montserrat', sans-serif
# Arial,Helvetica,Sans-Serif;
# .SFUIDisplay
# hirakakupro-w3
# TODO: There's still the problem completely foreign unicode strings,
# probably this rule should be deprecated.
if ($attr->{face} !~ /^\s*["'.]?[a-z ][a-z -]*[a-z]\d?["']?(?:,\s*["']?[a-z][a-z -]*[a-z]\d?["']?)*;?$/i) {
$self->put_results(font_face_bad => 1);
}
}
if ($tag eq "img" && exists $self->{inside}{a} && $self->{inside}{a} > 0) {
my $uri = $self->{anchor_last};
utf8::encode($uri) if $self->{SA_encode_results};
$self->{uri}->{$uri}->{anchor_text}->[-1] .= "<img>\n";
$self->{anchor}->[-1] .= "<img>\n";
}
if ($tag eq "img" && exists $attr->{width} && exists $attr->{height}) {
my $width = 0;
my $height = 0;
my $area = 0;
# assume 800x600 screen for percentage values
if ($attr->{width} =~ /^(\d+)(\%)?$/) {
$width = $1;
$width *= 8 if (defined $2 && $2 eq "%");
}
if ($attr->{height} =~ /^(\d+)(\%)?$/) {
$height = $1;
$height *= 6 if (defined $2 && $2 eq "%");
}
# guess size
$width = 200 if $width <= 0;
$height = 200 if $height <= 0;
if ($width > 0 && $height > 0) {
$area = $width * $height;
$self->{image_area} += $area;
}
}
if ($tag eq "form" && exists $attr->{action}) {
$self->put_results(form_action_mailto => 1) if $attr->{action} =~ /mailto:/i
}
if ($tag eq "object" || $tag eq "embed") {
$self->put_results(embeds => 1);
}
# special text delimiters - <a> and <title>
if ($tag eq "a") {
my $uri = $self->{anchor_last} =
(exists $attr->{href} ? $self->canon_uri($attr->{href}) : "");
utf8::encode($uri) if $self->{SA_encode_results};
push(@{$self->{uri}->{$uri}->{anchor_text}}, '');
push(@{$self->{anchor}}, '');
}
if ($tag eq "title") {
$self->{title_index}++;
$self->{title}->[$self->{title_index}] = "";
}
if ($tag eq "meta" &&
exists $attr->{'http-equiv'} &&
exists $attr->{content} &&
$attr->{'http-equiv'} =~ /Content-Type/i &&
$attr->{content} =~ /\bcharset\s*=\s*["']?([^"']+)/i)
{
$self->{charsets} .= exists $self->{charsets} ? " $1" : $1;
}
# todo: capture URI from meta refresh tag
}