sub get_envelope_from()

in lib/Mail/SpamAssassin/PerMsgStatus.pm [3282:3424]


sub get_envelope_from {
  my ($self) = @_;

  # Cached?
  return $self->{envelopefrom} if exists $self->{envelopefrom};

  my $suppl_attrib = $self->{msg}->{suppl_attrib};
  if (exists($suppl_attrib->{return_path})) {
    # Envelope information was provided by the caller
    my $envf = $suppl_attrib->{return_path};
    $envf = $1 if $envf =~ /^<(.*)>$/;  # strip '<' and '>' if present
    dbg("message: using EnvelopeFrom provided by suppl_attrib: '%s'", $envf);
    return $self->{envelopefrom} = $envf;
  }

  # bug 2142:
  # Get the SMTP MAIL FROM:, aka. the "envelope sender", if our
  # calling app has helpfully marked up the source message
  # with it.  Various MTAs and calling apps each have their
  # own idea of what header to use for this!   see

  my $envf;

  # Rely on the 'envelope-sender-header' header if the user has configured one.
  # Assume that because they have configured it, their MTA will always add it.
  # This will prevent us falling through and picking up inappropriate headers.
  if (defined $self->{conf}->{envelope_sender_header}) {
    # get the most recent (topmost) copy - there can be only one EnvelopeSender.
    $envf = ($self->get($self->{conf}->{envelope_sender_header}.":first:addr"))[0];
    # ok if it contains an "@" sign, or is "" (ie. "<>" without the < and >)
    if (defined $envf && (index($envf, '@') > 0 || $envf eq '')) {
      dbg("message: using envelope_sender_header '%s' as EnvelopeFrom: '%s'",
          $self->{conf}->{envelope_sender_header}, $envf);
      $self->{envelopefrom} = $envf;
      return $envf;
    }
    # Warn them if it's configured, but not there or not usable.
    if (defined $envf) {
      dbg("message: envelope_sender_header '%s': '%s' is not valid, ignoring",
          $self->{conf}->{envelope_sender_header}, $envf);
    } else {
      dbg("message: envelope_sender_header '%s' not found in message",
          $self->{conf}->{envelope_sender_header});
    }
    # Couldn't get envelope-sender using the configured header.
    $self->{envelopefrom} = undef;
    return;
  }

  # User hasn't given us a header to trust, so try to guess the sender.

  # use the "envelope-sender" string found in the Received headers,
  # if possible... use the last untrusted header, in case there's
  # trusted headers.
  my $lasthop = $self->{relays_untrusted}->[0];
  my $lasthop_str = 'last untrusted';
  if (!defined $lasthop) {
    # no untrusted headers?  in that case, the message is ALL_TRUSTED.
    # use the first trusted header (ie. the oldest, originating one).
    $lasthop = $self->{relays_trusted}->[-1];
    $lasthop_str = 'first trusted';
  }

  if (defined $lasthop) {
    $envf = $lasthop->{envfrom};
    # ok if it contains an "@" sign, or is "" (ie. "<>" without the < and >)
    if (defined $envf && (index($envf, '@') > 0 || $envf eq '')) {
      dbg("message: using $lasthop_str relay envelope-from as EnvelopeFrom: '$envf'");
      $self->{envelopefrom} = $envf;
      return $envf;
    }
  }

  # WARNING: a lot of list software adds an X-Sender for the original env-from
  # (including Yahoo! Groups).  Unfortunately, fetchmail will pick it up and
  # reuse it as the env-from for *its* delivery -- even though the list
  # software had used a different env-from in the intervening delivery.  Hence,
  # if this header is present, and there's a fetchmail sig in the Received
  # lines, we cannot trust any Envelope-From headers, since they're likely to
  # be incorrect fetchmail guesses.

  my $x_sender = ($self->get("X-Sender:first:addr"))[0];
  if (defined $x_sender && index($x_sender, '@') != -1) {
    foreach ($self->get("Received")) {
      if (index($_, '(fetchmail') != -1) {
        dbg("message: X-Sender and fetchmail signatures found, cannot trust envelope-from");
        $self->{envelopefrom} = undef;
        return;
      }
    }
  }

  # procmailrc notes this (we now recommend adding it to Received instead)
  if (defined($envf = ($self->get("X-Envelope-From:first:addr"))[0])) {
    # heuristic: this could have been relayed via a list which then used
    # a *new* Envelope-from.  check
    if ($self->get("ALL") =~ /^Received:.*?^X-Envelope-From:/smi) {
      dbg("message: X-Envelope-From header found after 1 or more Received lines, cannot trust envelope-from");
      $self->{envelopefrom} = undef;
      return;
    } else {
      dbg("message: using X-Envelope-From header as EnvelopeFrom: '$envf'");
      $self->{envelopefrom} = $envf;
      return $envf;
    }
  }

  # qmail, new-inject(1)
  if (defined($envf = ($self->get("Envelope-Sender:first:addr"))[0])) {
    # heuristic: this could have been relayed via a list which then used
    # a *new* Envelope-from.  check
    if ($self->get("ALL") =~ /^Received:.*?^Envelope-Sender:/smi) {
      dbg("message: Envelope-Sender header found after 1 or more Received lines, cannot trust envelope-from");
    } else {
      dbg("message: using Envelope-Sender header as EnvelopeFrom: '$envf'");
      $self->{envelopefrom} = $envf;
      return $envf;
    }
  }

  # Postfix, sendmail, amavisd-new, ...
  # RFC 2821 requires it:
  #   When the delivery SMTP server makes the "final delivery" of a
  #   message, it inserts a return-path line at the beginning of the mail
  #   data.  This use of return-path is required; mail systems MUST support
  #   it.  The return-path line preserves the information in the <reverse-
  #   path> from the MAIL command.
  if (defined($envf = ($self->get("Return-Path:first:addr"))[0])) {
    # heuristic: this could have been relayed via a list which then used
    # a *new* Envelope-from.  check
    if ($self->get("ALL") =~ /^Received:.*?^Return-Path:/smi) {
      dbg("message: Return-Path header found after 1 or more Received lines, cannot trust envelope-from");
    } else {
      dbg("message: using Return-Path header as EnvelopeFrom: '$envf'");
      $self->{envelopefrom} = $envf;
      return $envf;
    }
  }

  # give up.
  $self->{envelopefrom} = undef;
  return;
}