sub _check_spf()

in lib/Mail/SpamAssassin/Plugin/SPF.pm [405:746]


sub _check_spf {
  my ($self, $scanner, $ishelo) = @_;

  my $timer = $self->{main}->time_method("check_spf");

  # we can re-use results from any *INTERNAL* Received-SPF header in the message...
  # we can't use results from trusted but external hosts since (i) spf checks are
  # supposed to be done "on the domain boundary", (ii) even if an external header 
  # has a result that matches what we would get, the check was probably done on a
  # different envelope (like the apache.org list servers checking the ORCPT and
  # then using a new envelope to send the mail from the list) and (iii) if the
  # checks are being done right and the envelope isn't being changed it's 99%
  # likely that the trusted+external host really should be defined as part of your
  # internal network
  if ($scanner->{conf}->{ignore_received_spf_header}) {
    dbg("spf: ignoring any Received-SPF headers from internal hosts, by admin setting");
  } elsif ($scanner->{checked_for_received_spf_header}) {
    dbg("spf: already checked for Received-SPF headers, proceeding with DNS based checks");
  } else {
    $scanner->{checked_for_received_spf_header} = 1;
    dbg("spf: checking to see if the message has a Received-SPF header that we can use");

    my @internal_hdrs = $scanner->get('ALL-INTERNAL');
    unless ($scanner->{conf}->{use_newest_received_spf_header}) {
      # look for the LAST (earliest in time) header, it'll be the most accurate
      @internal_hdrs = reverse(@internal_hdrs);
    } else {
      dbg("spf: starting with the newest Received-SPF headers first");
    }

    foreach my $hdr (@internal_hdrs) {
      local($1,$2);
      if ($hdr =~ /^received-spf:/i) {
	dbg("spf: found a Received-SPF header added by an internal host: $hdr");

	# old version:
	# Received-SPF: pass (herse.apache.org: domain of spamassassin@dostech.ca
	# 	designates 69.61.78.188 as permitted sender)

	# new version:
	# Received-SPF: pass (dostech.ca: 69.61.78.188 is authorized to use
	# 	'spamassassin@dostech.ca' in 'mfrom' identity (mechanism 'mx' matched))
	# 	receiver=FC5-VPC; identity=mfrom; envelope-from="spamassassin@dostech.ca";
	# 	helo=smtp.dostech.net; client-ip=69.61.78.188

	# Received-SPF: pass (dostech.ca: 69.61.78.188 is authorized to use 'dostech.ca'
	# 	in 'helo' identity (mechanism 'mx' matched)) receiver=FC5-VPC; identity=helo;
	# 	helo=dostech.ca; client-ip=69.61.78.188

	# http://www.openspf.org/RFC_4408#header-field
	# wtf - for some reason something is sticking an extra space between the header name and field value
	if ($hdr =~ /^received-spf:\s*(pass|neutral|(?:soft)?fail|(?:temp|perm)error|none)\b(?:.*\bidentity=(\S+?);?\b)?/i) {
	  my $result = lc($1);

	  my $identity = '';	# we assume it's a mfrom check if we can't tell otherwise
	  if (defined $2) {
	    $identity = lc($2);
	    if ($identity eq 'mfrom' || $identity eq 'mailfrom') {
	      next if $scanner->{spf_checked};
	      $identity = '';
	    } elsif ($identity eq 'helo') {
	      next if $scanner->{spf_helo_checked};
	      $identity = 'helo_';
	    } else {
	      dbg("spf: found unknown identity value, cannot use: $identity");
	      next;	# try the next Received-SPF header, if any
	    }
	  } else {
	    next if $scanner->{spf_checked};
	  }

	  # we'd set these if we actually did the check
	  $scanner->{"spf_${identity}checked"} = 1;
	  $scanner->{"spf_${identity}pass"} = 0;
	  $scanner->{"spf_${identity}neutral"} = 0;
	  $scanner->{"spf_${identity}none"} = 0;
	  $scanner->{"spf_${identity}fail"} = 0;
	  $scanner->{"spf_${identity}softfail"} = 0;
	  $scanner->{"spf_${identity}temperror"} = 0;
	  $scanner->{"spf_${identity}permerror"} = 0;
	  $scanner->{"spf_${identity}failure_comment"} = undef;

	  # and the result
	  $scanner->{"spf_${identity}${result}"} = 1;
	  dbg("spf: re-using %s result from Received-SPF header: %s",
              ($identity ? 'helo' : 'mfrom'), $result);

	  # if we've got *both* the mfrom and helo results we're done
	  return if ($scanner->{spf_checked} && $scanner->{spf_helo_checked});

	} else {
	  dbg("spf: could not parse result from existing Received-SPF header");
	}

      } elsif ($hdr =~ /^(?:Arc\-)?Authentication-Results:.*;\s*SPF\s*=\s*([^;]*)/i) {
        dbg("spf: found an Authentication-Results header added by an internal host: $hdr");

        # RFC 5451 header parser - added by D. Stussy 2010-09-09:
        # Authentication-Results: mail.example.com; SPF=none smtp.mailfrom=example.org (comment)

        my $tmphdr = $1;
        if ($tmphdr =~ /^(pass|neutral|(?:hard|soft)?fail|(?:temp|perm)error|none)(?:[^;]*?\bsmtp\.(\S+)\s*=[^;]+)?/i) {
          my $result = lc($1);
          $result = 'fail'  if $result eq 'hardfail';  # RFC5451 permits this

          my $identity = '';    # we assume it's a mfrom check if we can't tell otherwise
          if (defined $2) {
            $identity = lc($2);
            if ($identity eq 'mfrom' || $identity eq 'mailfrom') {
              next if $scanner->{spf_checked};
              $identity = '';
            } elsif ($identity eq 'helo') {
              next if $scanner->{spf_helo_checked};
              $identity = 'helo_';
            } else {
              dbg("spf: found unknown identity value, cannot use: $identity");
              next;     # try the next Authentication-Results header, if any
            }
          } else {
            next if $scanner->{spf_checked};
          }

          # we'd set these if we actually did the check
          $scanner->{"spf_${identity}checked"} = 1;
          $scanner->{"spf_${identity}pass"} = 0;
          $scanner->{"spf_${identity}neutral"} = 0;
          $scanner->{"spf_${identity}none"} = 0;
          $scanner->{"spf_${identity}fail"} = 0;
          $scanner->{"spf_${identity}softfail"} = 0;
          $scanner->{"spf_${identity}temperror"} = 0;
          $scanner->{"spf_${identity}permerror"} = 0;
          $scanner->{"spf_${identity}failure_comment"} = undef;

          # and the result
          $scanner->{"spf_${identity}${result}"} = 1;
          dbg("spf: re-using %s result from Authentication-Results header: %s",
               ($identity ? 'helo' : 'mfrom'), $result);

          # if we've got *both* the mfrom and helo results we're done
          return if ($scanner->{spf_checked} && $scanner->{spf_helo_checked});

        } else {
          dbg("spf: could not parse result from existing Authentication-Results header");
        }
      }
    }
    # we can return if we've found the one we're being asked to get
    return if ( ($ishelo && $scanner->{spf_helo_checked}) ||
		(!$ishelo && $scanner->{spf_checked}) );
  }

  # abort if dns or an spf module isn't available
  return unless $scanner->is_dns_available();
  return if $self->{no_spf_module};

  # select the SPF module we're going to use
  unless (defined $self->{has_mail_spf}) {
    my $eval_stat;
    eval {
      require Mail::SPF;
      if (!defined $Mail::SPF::VERSION || $Mail::SPF::VERSION < 2.001) {
	die "Mail::SPF 2.001 or later required, this is ".
	  (defined $Mail::SPF::VERSION ? $Mail::SPF::VERSION : 'unknown')."\n";
      }
      # Mail::SPF::Server can be re-used, and we get to use our own resolver object!
      $self->{spf_server} = Mail::SPF::Server->new(
				hostname     => $scanner->get_tag('HOSTNAME'),
				dns_resolver => $self->{main}->{resolver},
				max_dns_interactive_terms => 20);
      # Bug 7112: max_dns_interactive_terms defaults to 10, but even 14 is
      # not enough for ebay.com, setting it to 15 NOTE: raising to 20 per bug 7182
      1;
    } or do {
      $eval_stat = $@ ne '' ? $@ : "errno=$!";  chomp $eval_stat;
    };

    if (!defined($eval_stat)) {
      dbg("spf: using Mail::SPF for SPF checks");
      $self->{has_mail_spf} = 1;
    } else {
      dbg("spf: cannot load Mail::SPF: module: $eval_stat");
      dbg("spf: Mail::SPF is required for SPF checks, SPF checks disabled");
      $self->{no_spf_module} = 1;
      return;
    }
  }

  # skip SPF checks if the A/MX records are nonexistent for the From
  # domain, anyway, to avoid crappy messages from slowing us down
  # (bug 3016)
  # TODO: this will only work if the queries are ready before SPF, so never?
  return if $scanner->{sender_host_fail} && $scanner->{sender_host_fail} == 2;

  if ($ishelo) {
    # SPF HELO-checking variant
    $scanner->{spf_helo_checked} = 1;
    $scanner->{spf_helo_pass} = 0;
    $scanner->{spf_helo_neutral} = 0;
    $scanner->{spf_helo_none} = 0;
    $scanner->{spf_helo_fail} = 0;
    $scanner->{spf_helo_softfail} = 0;
    $scanner->{spf_helo_permerror} = 0;
    $scanner->{spf_helo_temperror} = 0;
    $scanner->{spf_helo_failure_comment} = undef;
  } else {
    # SPF on envelope sender (where possible)
    $scanner->{spf_checked} = 1;
    $scanner->{spf_pass} = 0;
    $scanner->{spf_neutral} = 0;
    $scanner->{spf_none} = 0;
    $scanner->{spf_fail} = 0;
    $scanner->{spf_softfail} = 0;
    $scanner->{spf_permerror} = 0;
    $scanner->{spf_temperror} = 0;
    $scanner->{spf_failure_comment} = undef;
  }

  my $lasthop = $scanner->{relays_external}->[0];
  if (!defined $lasthop) {
    dbg("spf: no suitable relay for spf use found, skipping SPF%s check",
        $ishelo ? '-helo' : '');
    return;
  }

  my $ip = $lasthop->{ip};	# always present
  my $helo = $lasthop->{helo};	# could be missing

  if ($ishelo) {
    unless ($helo) {
      dbg("spf: cannot check HELO, HELO value unknown");
      return;
    }
    dbg("spf: checking HELO (helo=$helo, ip=$ip)");
  } else {
    # TODO: we're supposed to use the helo domain as the sender identity (for
    # mfrom checks) if the sender is the null sender, however determining that
    # it's the null sender, and not just a failure to get the envelope isn't
    # exactly trivial... so for now we'll just skip the check

    if (!$scanner->{spf_sender}) {
      # we already dbg'd that we couldn't get an Envelope-From and can't do SPF
      return;
    }
    dbg("spf: checking EnvelopeFrom (helo=%s, ip=%s, envfrom=%s)",
        ($helo ? $helo : ''), $ip, $scanner->{spf_sender});
  }

  # this test could probably stand to be more strict, but try to test
  # any invalid HELO hostname formats with a header rule
  if ($ishelo && ($helo =~ /^[\[!]?\d+\.\d+\.\d+\.\d+[\]!]?$/ || $helo =~ /^[^.]+$/)) {
    dbg("spf: cannot check HELO of '$helo', skipping");
    return;
  }

  if ($helo && $scanner->server_failed_to_respond_for_domain($helo)) {
    dbg("spf: we had a previous timeout on '$helo', skipping");
    return;
  }


  my ($result, $comment, $text, $err);

  # TODO: currently we won't get to here for a mfrom check with a null sender
  my $identity = $ishelo ? $helo : ($scanner->{spf_sender}); # || $helo);

  unless ($identity) {
    dbg("spf: cannot determine %s identity, skipping %s SPF check",
        ($ishelo ? 'helo' : 'mfrom'),  ($ishelo ? 'helo' : 'mfrom') );
    return;
  }
  $helo ||= 'unknown';  # only used for macro expansion in the mfrom explanation

  my $request;
  eval {
    $request = Mail::SPF::Request->new( scope => $ishelo ? 'helo' : 'mfrom',
			  identity      => $identity,
			  ip_address    => $ip,
			  helo_identity => $helo );
    1;
  } or do {
    my $eval_stat = $@ ne '' ? $@ : "errno=$!";  chomp $eval_stat;
    dbg("spf: cannot create Mail::SPF::Request object: $eval_stat");
    return;
  };

  my $timeout = $scanner->{conf}->{spf_timeout};

  my $timer_spf = Mail::SpamAssassin::Timeout->new(
              { secs => $timeout, deadline => $scanner->{master_deadline} });
  $err = $timer_spf->run_and_catch(sub {
    my $query = $self->{spf_server}->process($request);
    $result = $query->code;
    $comment = $query->authority_explanation if $query->can("authority_explanation");
    $text = $query->text;
  });

  if ($err) {
    chomp $err;
    warn("spf: lookup failed: $err\n");
    return 0;
  }

  $result ||= 'timeout';	# bug 5077
  $comment ||= '';
  $comment =~ s/\s+/ /gs;	# no newlines please
  $text ||= '';
  $text =~ s/\s+/ /gs;		# no newlines please

  if ($ishelo) {
    if ($result eq 'pass') { $scanner->{spf_helo_pass} = 1; }
    elsif ($result eq 'neutral') { $scanner->{spf_helo_neutral} = 1; }
    elsif ($result eq 'none') { $scanner->{spf_helo_none} = 1; }
    elsif ($result eq 'fail') { $scanner->{spf_helo_fail} = 1; }
    elsif ($result eq 'softfail') { $scanner->{spf_helo_softfail} = 1; }
    elsif ($result eq 'permerror') { $scanner->{spf_helo_permerror} = 1; }
    elsif ($result eq 'temperror') { $scanner->{spf_helo_temperror} = 1; }
    elsif ($result eq 'error') { $scanner->{spf_helo_temperror} = 1; }

    if ($result eq 'fail') {	# RFC 7208 6.2
      $scanner->{spf_helo_failure_comment} = "SPF failed: $comment";
    }
  } else {
    if ($result eq 'pass') { $scanner->{spf_pass} = 1; }
    elsif ($result eq 'neutral') { $scanner->{spf_neutral} = 1; }
    elsif ($result eq 'none') { $scanner->{spf_none} = 1; }
    elsif ($result eq 'fail') { $scanner->{spf_fail} = 1; }
    elsif ($result eq 'softfail') { $scanner->{spf_softfail} = 1; }
    elsif ($result eq 'permerror') { $scanner->{spf_permerror} = 1; }
    elsif ($result eq 'temperror') { $scanner->{spf_temperror} = 1; }
    elsif ($result eq 'error') { $scanner->{spf_temperror} = 1; }

    if ($result eq 'fail') {	# RFC 7208 6.2
      $scanner->{spf_failure_comment} = "SPF failed: $comment";
    }
  }

  if ($ishelo) {
    dbg("spf: query for $ip/$helo: result: $result, comment: $comment, text: $text");
  } else {
    dbg("spf: query for $scanner->{spf_sender}/$ip/$helo: result: $result, comment: $comment, text: $text");
  }
}