sub _get()

in lib/Mail/SpamAssassin/PerMsgStatus.pm [2237:2467]


sub _get {
  my ($self, $request) = @_;

  my @results;
  my $getaddr = 0;
  my $getname = 0;
  my $getraw = 0;
  my $needraw = 0;
  my $gethost = 0;
  my $getdomain = 0;
  my $getip = 0;
  my $getrevip = 0;
  my $getfirst = 0;
  my $getlast = 0;

  # special queries - process and strip modifiers
  if (index($request,':') >= 0) {  # triage
    local $1;
    while ($request =~ s/:([^:]*)//) {
      if    ($1 eq 'raw')    { $getraw  = 1 }
      elsif ($1 eq 'addr')   { $getaddr = $needraw = 1 }
      elsif ($1 eq 'name')   { $getname = $needraw = 1 }
      elsif ($1 eq 'host')   { $gethost = 1 }
      elsif ($1 eq 'domain') { $gethost = $getdomain = 1 }
      elsif ($1 eq 'ip')     { $getip = 1 }
      elsif ($1 eq 'revip')  { $getip = $getrevip = 1 }
      elsif ($1 eq 'first')  { $getfirst = 1 }
      elsif ($1 eq 'last')   { $getlast = 1 }
    }
  }
  my $request_lc = lc $request;

  # ALL: entire pristine or semi-raw headers
  if ($request eq 'ALL') {
    if ($getraw) {
      @results = $self->{msg}->get_pristine_header() =~ /^([^ \t].*?\n)(?![ \t])/smgi;
    } else {
      @results = $self->{msg}->get_all_headers(0);
    }
    return \@results;
  }
  # ALL-TRUSTED: entire trusted raw headers
  elsif ($request eq 'ALL-TRUSTED') {
    # '+1' since we added the received header even though it's not considered
    # trusted, so we know that those headers can be trusted too
    @results = $self->get_all_hdrs_in_rcvd_index_range(
			undef, $self->{last_trusted_relay_index}+1,
			undef, undef, $getraw);
    return \@results;
  }
  # ALL-INTERNAL: entire internal raw headers
  elsif ($request eq 'ALL-INTERNAL') {
    # '+1' for the same reason as in ALL-TRUSTED above
    @results = $self->get_all_hdrs_in_rcvd_index_range(
			undef, $self->{last_internal_relay_index}+1,
			undef, undef, $getraw);
    return \@results;
  }
  # ALL-UNTRUSTED: entire untrusted raw headers
  elsif ($request eq 'ALL-UNTRUSTED') {
    # '+1' for the same reason as in ALL-TRUSTED above
    @results = $self->get_all_hdrs_in_rcvd_index_range(
			$self->{last_trusted_relay_index}+1, undef,
			undef, undef, $getraw);
    return \@results;
  }
  # ALL-EXTERNAL: entire external raw headers
  elsif ($request eq 'ALL-EXTERNAL') {
    # '+1' for the same reason as in ALL-TRUSTED above
    @results = $self->get_all_hdrs_in_rcvd_index_range(
			$self->{last_internal_relay_index}+1, undef,
			undef, undef, $getraw);
    return \@results;
  }
  # EnvelopeFrom: the SMTP MAIL FROM: address
  elsif ($request_lc eq "\LEnvelopeFrom") {
    push @results, $self->get_envelope_from();
  }
  # untrusted relays list, as string
  elsif ($request_lc eq "\LX-Spam-Relays-Untrusted") {
    push @results, $self->{relays_untrusted_str};
  }
  # trusted relays list, as string
  elsif ($request_lc eq "\LX-Spam-Relays-Trusted") {
    push @results, $self->{relays_trusted_str};
  }
  # external relays list, as string
  elsif ($request_lc eq "\LX-Spam-Relays-External") {
    push @results, $self->{relays_external_str};
  }
  # internal relays list, as string
  elsif ($request_lc eq "\LX-Spam-Relays-Internal") {
    push @results, $self->{relays_internal_str};
  }
  # ToCc: the combined recipients list
  elsif ($request_lc eq "\LToCc") {
    push @results, $self->{msg}->get_header('To', $getraw);
    push @results, $self->{msg}->get_header('Cc', $getraw);
  }
  # MESSAGEID: handle lists which move the real message-id to another
  # header for resending.
  elsif ($request eq 'MESSAGEID') {
    push @results, grep { defined($_) && $_ ne '' } (
		   $self->{msg}->get_header('X-Message-Id', $getraw),
		   $self->{msg}->get_header('Resent-Message-Id', $getraw),
		   $self->{msg}->get_header('X-Original-Message-ID', $getraw),
		   $self->{msg}->get_header('Message-Id', $getraw));
  }
  # a conventional header
  else {
    my @res = $getraw||$needraw ? $self->{msg}->raw_header($request)
                                : $self->{msg}->get_header($request);
    if (!@res) {
      if (defined(my $m = $self->{msg}->get_metadata($request))) {
        push @res, $m;
      }
    }
    push @results, @res if @res;
  }

  # Nothing found to process further, bail out quick
  if (!@results) {
    return \@results;
  }

  # Continue processing only first (topmost) or last header
  if ($getfirst) {
    @results = ($results[0]);
  } elsif ($getlast) {
    @results = ($results[-1]);
  }

  # special addr/name
  if ($getaddr || $getname) {
    my @res;
    foreach my $line (@results) {
      next unless defined $line;
      # Note: parse_header_addresses always called with raw undecoded value
      # Skip invalid addresses here
      my @addrs = parse_header_addresses($line);
      if (@addrs) {
        if ($getaddr) {
          foreach my $addr (@addrs) {
            push @res, $addr->{address} if defined $addr->{address};
          }
        }
        elsif ($getname) {
          foreach my $addr (@addrs) {
            next unless defined $addr->{phrase};
            if ($getraw) {
              # phrase=name, could also be username or comment unless name found
              push @res, $addr->{phrase};
            } else {
              # If :raw was not specifically asked, decode mimewords
              # TODO: silly call to Node module, should probably be in Util
              my $decoded = Mail::SpamAssassin::Message::Node::_decode_header(
                              $addr->{phrase}, "PMS:get:$request");
              # Normalize whitespace, unless it's all white-space
              if ($decoded =~ /\S/) {
                $decoded =~ s/\s+/ /gs;
                $decoded =~ s/^\s+//;
                $decoded =~ s/\s+$//;
                $decoded =~ s/^'(.*?)'$/$1/; # remove single quotes
              }
              push @res, $decoded if defined $decoded;
            }
          }
        }
      }
    }
    @results = @res;
  }

  # special host/domain
  if (@results && ($gethost || $getdomain || $getip)) {
    my @res;
    if ($gethost) {
      # TODO: IDN matching needs honing
      my $tldsRE = $self->{main}->{registryboundaries}->{valid_tlds_re};
      #my $hostRE = qr/(?<![._-])\b([a-z\d][a-z\d._-]{0,251}\.${tldsRE})\b(?![._-])/i;
      my $hostRE = qr/(?<![._-])(\S{1,251}\.${tldsRE})(?![._-])/i;
      foreach my $line (@results) {
        next unless defined $line;
        my $host;
        if ($getaddr) {
          # If :addr already preparsed the line, just grab domain liberally
          if ($line =~ /.*\@(\S+)/) {
            $host = $1;
          }
        }
        else {
          # try grabbing email/msgid domain first, because user part might look like
          # a valid host..
          if ($line =~ /.*\@${hostRE}/i) {
            if (is_fqdn_valid(idn_to_ascii($1), 1)) {
              $host = $1;
            }
          }
          # otherwise try hard to find a valid host
          if (!$host) {
            while ($line =~ /${hostRE}/ig) {
              if (is_fqdn_valid(idn_to_ascii($1), 1)) {
                $host = $1;
                last;
              }
            }
          }
        }
        if ($host) {
          if ($getdomain) {
            $host = $self->{main}->{registryboundaries}->trim_domain($host, 1);
          }
          push @res, $host;
        }
      }
    } else {
      my $ipRE = qr/(?<!\.)\b(${IP_ADDRESS})\b(?!\.)/;
      foreach my $line (@results) {
        next unless defined $line;
        my $host;
        if ($line =~ $ipRE) {
          $host = $getrevip ? reverse_ip_address($1) : $1;
        }
        push @res, $host  if defined $host;
      }
    }
    @results = @res;
  }

  return \@results;
}