in lib/Mail/SpamAssassin/PerMsgStatus.pm [2237:2467]
sub _get {
my ($self, $request) = @_;
my @results;
my $getaddr = 0;
my $getname = 0;
my $getraw = 0;
my $needraw = 0;
my $gethost = 0;
my $getdomain = 0;
my $getip = 0;
my $getrevip = 0;
my $getfirst = 0;
my $getlast = 0;
# special queries - process and strip modifiers
if (index($request,':') >= 0) { # triage
local $1;
while ($request =~ s/:([^:]*)//) {
if ($1 eq 'raw') { $getraw = 1 }
elsif ($1 eq 'addr') { $getaddr = $needraw = 1 }
elsif ($1 eq 'name') { $getname = $needraw = 1 }
elsif ($1 eq 'host') { $gethost = 1 }
elsif ($1 eq 'domain') { $gethost = $getdomain = 1 }
elsif ($1 eq 'ip') { $getip = 1 }
elsif ($1 eq 'revip') { $getip = $getrevip = 1 }
elsif ($1 eq 'first') { $getfirst = 1 }
elsif ($1 eq 'last') { $getlast = 1 }
}
}
my $request_lc = lc $request;
# ALL: entire pristine or semi-raw headers
if ($request eq 'ALL') {
if ($getraw) {
@results = $self->{msg}->get_pristine_header() =~ /^([^ \t].*?\n)(?![ \t])/smgi;
} else {
@results = $self->{msg}->get_all_headers(0);
}
return \@results;
}
# ALL-TRUSTED: entire trusted raw headers
elsif ($request eq 'ALL-TRUSTED') {
# '+1' since we added the received header even though it's not considered
# trusted, so we know that those headers can be trusted too
@results = $self->get_all_hdrs_in_rcvd_index_range(
undef, $self->{last_trusted_relay_index}+1,
undef, undef, $getraw);
return \@results;
}
# ALL-INTERNAL: entire internal raw headers
elsif ($request eq 'ALL-INTERNAL') {
# '+1' for the same reason as in ALL-TRUSTED above
@results = $self->get_all_hdrs_in_rcvd_index_range(
undef, $self->{last_internal_relay_index}+1,
undef, undef, $getraw);
return \@results;
}
# ALL-UNTRUSTED: entire untrusted raw headers
elsif ($request eq 'ALL-UNTRUSTED') {
# '+1' for the same reason as in ALL-TRUSTED above
@results = $self->get_all_hdrs_in_rcvd_index_range(
$self->{last_trusted_relay_index}+1, undef,
undef, undef, $getraw);
return \@results;
}
# ALL-EXTERNAL: entire external raw headers
elsif ($request eq 'ALL-EXTERNAL') {
# '+1' for the same reason as in ALL-TRUSTED above
@results = $self->get_all_hdrs_in_rcvd_index_range(
$self->{last_internal_relay_index}+1, undef,
undef, undef, $getraw);
return \@results;
}
# EnvelopeFrom: the SMTP MAIL FROM: address
elsif ($request_lc eq "\LEnvelopeFrom") {
push @results, $self->get_envelope_from();
}
# untrusted relays list, as string
elsif ($request_lc eq "\LX-Spam-Relays-Untrusted") {
push @results, $self->{relays_untrusted_str};
}
# trusted relays list, as string
elsif ($request_lc eq "\LX-Spam-Relays-Trusted") {
push @results, $self->{relays_trusted_str};
}
# external relays list, as string
elsif ($request_lc eq "\LX-Spam-Relays-External") {
push @results, $self->{relays_external_str};
}
# internal relays list, as string
elsif ($request_lc eq "\LX-Spam-Relays-Internal") {
push @results, $self->{relays_internal_str};
}
# ToCc: the combined recipients list
elsif ($request_lc eq "\LToCc") {
push @results, $self->{msg}->get_header('To', $getraw);
push @results, $self->{msg}->get_header('Cc', $getraw);
}
# MESSAGEID: handle lists which move the real message-id to another
# header for resending.
elsif ($request eq 'MESSAGEID') {
push @results, grep { defined($_) && $_ ne '' } (
$self->{msg}->get_header('X-Message-Id', $getraw),
$self->{msg}->get_header('Resent-Message-Id', $getraw),
$self->{msg}->get_header('X-Original-Message-ID', $getraw),
$self->{msg}->get_header('Message-Id', $getraw));
}
# a conventional header
else {
my @res = $getraw||$needraw ? $self->{msg}->raw_header($request)
: $self->{msg}->get_header($request);
if (!@res) {
if (defined(my $m = $self->{msg}->get_metadata($request))) {
push @res, $m;
}
}
push @results, @res if @res;
}
# Nothing found to process further, bail out quick
if (!@results) {
return \@results;
}
# Continue processing only first (topmost) or last header
if ($getfirst) {
@results = ($results[0]);
} elsif ($getlast) {
@results = ($results[-1]);
}
# special addr/name
if ($getaddr || $getname) {
my @res;
foreach my $line (@results) {
next unless defined $line;
# Note: parse_header_addresses always called with raw undecoded value
# Skip invalid addresses here
my @addrs = parse_header_addresses($line);
if (@addrs) {
if ($getaddr) {
foreach my $addr (@addrs) {
push @res, $addr->{address} if defined $addr->{address};
}
}
elsif ($getname) {
foreach my $addr (@addrs) {
next unless defined $addr->{phrase};
if ($getraw) {
# phrase=name, could also be username or comment unless name found
push @res, $addr->{phrase};
} else {
# If :raw was not specifically asked, decode mimewords
# TODO: silly call to Node module, should probably be in Util
my $decoded = Mail::SpamAssassin::Message::Node::_decode_header(
$addr->{phrase}, "PMS:get:$request");
# Normalize whitespace, unless it's all white-space
if ($decoded =~ /\S/) {
$decoded =~ s/\s+/ /gs;
$decoded =~ s/^\s+//;
$decoded =~ s/\s+$//;
$decoded =~ s/^'(.*?)'$/$1/; # remove single quotes
}
push @res, $decoded if defined $decoded;
}
}
}
}
}
@results = @res;
}
# special host/domain
if (@results && ($gethost || $getdomain || $getip)) {
my @res;
if ($gethost) {
# TODO: IDN matching needs honing
my $tldsRE = $self->{main}->{registryboundaries}->{valid_tlds_re};
#my $hostRE = qr/(?<![._-])\b([a-z\d][a-z\d._-]{0,251}\.${tldsRE})\b(?![._-])/i;
my $hostRE = qr/(?<![._-])(\S{1,251}\.${tldsRE})(?![._-])/i;
foreach my $line (@results) {
next unless defined $line;
my $host;
if ($getaddr) {
# If :addr already preparsed the line, just grab domain liberally
if ($line =~ /.*\@(\S+)/) {
$host = $1;
}
}
else {
# try grabbing email/msgid domain first, because user part might look like
# a valid host..
if ($line =~ /.*\@${hostRE}/i) {
if (is_fqdn_valid(idn_to_ascii($1), 1)) {
$host = $1;
}
}
# otherwise try hard to find a valid host
if (!$host) {
while ($line =~ /${hostRE}/ig) {
if (is_fqdn_valid(idn_to_ascii($1), 1)) {
$host = $1;
last;
}
}
}
}
if ($host) {
if ($getdomain) {
$host = $self->{main}->{registryboundaries}->trim_domain($host, 1);
}
push @res, $host;
}
}
} else {
my $ipRE = qr/(?<!\.)\b(${IP_ADDRESS})\b(?!\.)/;
foreach my $line (@results) {
next unless defined $line;
my $host;
if ($line =~ $ipRE) {
$host = $getrevip ? reverse_ip_address($1) : $1;
}
push @res, $host if defined $host;
}
}
@results = @res;
}
return \@results;
}