sub extract_metadata()

in lib/Mail/SpamAssassin/Plugin/ASN.pm [338:461]


sub extract_metadata {
  my ($self, $opts) = @_;

  my $pms = $opts->{permsgstatus};
  my $conf = $pms->{conf};

  my $geodb = $self->{main}->{geodb};
  my $has_geodb = $conf->{asn_use_geodb} && $geodb && $geodb->can('asn');
  if ($has_geodb) {
    dbg("asn: using GeoDB ASN for lookups");
  } else {
    dbg("asn: GeoDB ASN not available");
    if (!$conf->{asn_use_dns} || !$pms->is_dns_available()) {
      dbg("asn: DNS is not available, skipping ASN check");
      return;
    }
    if ($self->{main}->{learning}) {
      dbg("asn: learning message, skipping DNS-based ASN check");
      return;
    }
  }

  # initialize the tag data so that if no result is returned from the DNS
  # query we won't end up with a missing tag.  Don't use $pms->set_tag()
  # here to avoid triggering any tag-dependent action unnecessarily
  if ($conf->{asnlookups}) {
    foreach my $entry (@{$conf->{asnlookups}}) {
      $pms->{tag_data}->{$entry->{asn_tag}} ||= '';
      $pms->{tag_data}->{$entry->{route_tag}} ||= '';
    }
  }
  if ($conf->{asnlookups_ipv6}) {
    foreach my $entry (@{$conf->{asnlookups_ipv6}}) {
      $pms->{tag_data}->{$entry->{asn_tag}} ||= '';
      $pms->{tag_data}->{$entry->{route_tag}} ||= '';
    }
  }

  # Initialize status
  $pms->{asn_results} = ();

  # get IP address of last external relay to lookup
  my $relay = $opts->{msg}->{metadata}->{relays_external}->[0];
  if (!defined $relay) {
    dbg("asn: no first external relay IP available, skipping ASN check");
    return;
  } elsif ($relay->{ip_private}) {
    dbg("asn: first external relay is a private IP, skipping ASN check");
    return;
  }
  my $ip = $relay->{ip};
  dbg("asn: using first external relay IP for lookups: %s", $ip);

  # GeoDB lookup
  my $asn_found;
  if ($has_geodb) {
    my $asn = $geodb->get_asn($ip);
    my $org = $geodb->get_asn_org($ip);
    if (!defined $asn) {
      dbg("asn: GeoDB ASN lookup failed");
    } else {
      $asn_found = 1;
      dbg("asn: GeoDB found ASN $asn");
      # Prevent double prefix
      my $asn_value =
        length($conf->{asn_prefix}) && index($asn, $conf->{asn_prefix}) != 0 ?
          $conf->{asn_prefix}.$asn : $asn;
      $asn_value .= ' '.$org if defined $org && length($org);
      $pms->set_tag('ASN', $asn_value);
      # For Bayes
      $pms->{msg}->put_metadata('X-ASN', $asn);
    }
  }

  # Skip DNS if GeoDB was successful and preferred
  if ($asn_found && $conf->{asn_prefer_geodb}) {
    dbg("asn: GeoDB lookup successful, skipping DNS lookups");
    return;
  }

  # No point continuing without DNS from now on
  if (!$conf->{asn_use_dns} || !$pms->is_dns_available()) {
    dbg("asn: skipping disabled DNS lookups");
    return;
  }

  dbg("asn: using DNS for lookups");
  my $lookup_zone;
  if ($ip =~ IS_IPV4_ADDRESS) {
    if (!defined $conf->{asnlookups}) {
      dbg("asn: asn_lookup for IPv4 not defined, skipping");
      return;
    }
    $lookup_zone = "asnlookups";
  } else {
    if (!defined $conf->{asnlookups_ipv6}) {
      dbg("asn: asn_lookup_ipv6 for IPv6 not defined, skipping");
      return;
    }
    $lookup_zone = "asnlookups_ipv6";
  }
  
  my $reversed_ip = reverse_ip_address($ip);
  if (!defined $reversed_ip) {
    dbg("asn: could not parse IP: %s, skipping", $ip);
    return;
  }

  # we use arrays and array indices rather than hashes and hash keys
  # in case someone wants the same zone added to multiple sets of tags
  my $index = 0;
  foreach my $entry (@{$conf->{$lookup_zone}}) {
    # do the DNS query, have the callback process the result
    my $zone_index = $index;
    my $zone = $reversed_ip . '.' . $entry->{zone};
    $pms->{async}->bgsend_and_start_lookup($zone, 'TXT', undef,
      { rulename => 'asn_lookup', type => 'ASN' },
      sub { my($ent, $pkt) = @_;
            $self->process_dns_result($pms, $pkt, $zone_index, $lookup_zone) },
      master_deadline => $pms->{master_deadline}
    );
    $index++;
  }
}