You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spamassassin.apache.org by he...@apache.org on 2018/10/23 10:07:46 UTC

svn commit: r1844628 - in /spamassassin/trunk/lib/Mail/SpamAssassin: Message/Metadata/Received.pm PerMsgStatus.pm

Author: hege
Date: Tue Oct 23 10:07:45 2018
New Revision: 1844628

URL: http://svn.apache.org/viewvc?rev=1844628&view=rev
Log:
Improve EnvelopeFrom parsing, differentiate empty and missing

Modified:
    spamassassin/trunk/lib/Mail/SpamAssassin/Message/Metadata/Received.pm
    spamassassin/trunk/lib/Mail/SpamAssassin/PerMsgStatus.pm

Modified: spamassassin/trunk/lib/Mail/SpamAssassin/Message/Metadata/Received.pm
URL: http://svn.apache.org/viewvc/spamassassin/trunk/lib/Mail/SpamAssassin/Message/Metadata/Received.pm?rev=1844628&r1=1844627&r2=1844628&view=diff
==============================================================================
--- spamassassin/trunk/lib/Mail/SpamAssassin/Message/Metadata/Received.pm (original)
+++ spamassassin/trunk/lib/Mail/SpamAssassin/Message/Metadata/Received.pm Tue Oct 23 10:07:45 2018
@@ -53,7 +53,7 @@ use Mail::SpamAssassin::Constants qw(:ip
 # ---------------------------------------------------------------------------
 
 sub parse_received_headers {
-  my ($self, $permsgstatus, $msg) = @_;
+  my ($self, $pms, $msg) = @_;
 
   my $suppl_attrib = $msg->{suppl_attrib};  # out-of-band info from a caller
 
@@ -87,11 +87,11 @@ sub parse_received_headers {
   $self->{allow_mailfetch_markers} = 1;         # This needs to be set for the
                                                 # first Received: header
   # now figure out what relays are trusted...
-  my $trusted = $permsgstatus->{main}->{conf}->{trusted_networks};
-  my $internal = $permsgstatus->{main}->{conf}->{internal_networks};
-  my $msa = $permsgstatus->{main}->{conf}->{msa_networks};
-  my $did_user_specify_trust = $permsgstatus->{main}->{conf}->{trusted_networks_configured};
-  my $did_user_specify_internal = $permsgstatus->{main}->{conf}->{internal_networks_configured};
+  my $trusted = $pms->{main}->{conf}->{trusted_networks};
+  my $internal = $pms->{main}->{conf}->{internal_networks};
+  my $msa = $pms->{main}->{conf}->{msa_networks};
+  my $did_user_specify_trust = $pms->{main}->{conf}->{trusted_networks_configured};
+  my $did_user_specify_internal = $pms->{main}->{conf}->{internal_networks_configured};
   my $in_trusted = 1;
   my $in_internal = 1;
   my $found_msa = 0;
@@ -125,8 +125,7 @@ sub parse_received_headers {
   # Now add the single line headers like X-Originating-IP. (bug 5680)
   # we convert them into synthetic "Received" headers so we can share
   # code below.
-  for my $header (@{$permsgstatus->{main}->{conf}->{originating_ip_headers}})
-  {
+  foreach my $header (@{$pms->{main}->{conf}->{originating_ip_headers}}) {
     my $str = $msg->get_header($header);
     next unless ($str && $str =~ m/($IP_ADDRESS)/);
     push @hdrs, "from X-Originating-IP: $1\n";
@@ -334,7 +333,7 @@ sub parse_received_line {
   my $by = '';
   my $id = '';
   my $ident = '';
-  my $envfrom = '';
+  my $envfrom = undef;
   my $mta_looked_up_dns = 0;
   my $IP_ADDRESS = IP_ADDRESS;
   my $IP_PRIVATE = IP_PRIVATE;
@@ -1189,8 +1188,11 @@ sub parse_received_line {
     # details of the handover described here, it's just qmail-scanner
     # logging a little more.
     if (/^\S+ by \S+ \(.{0,100}\) with qmail-scanner/) {
-      $envfrom =~ s/^\s*<*//gs; $envfrom =~ s/>*\s*$//gs;
-      $envfrom =~ s/[\s\000\#\[\]\(\)\<\>\|]/!/gs;
+      if (defined $envfrom) {
+        $envfrom =~ s/^\s*<*//gs;
+        $envfrom =~ s/>*\s*$//gs;
+        $envfrom =~ s/[\s\000\#\[\]\(\)\<\>\|]/!/gs;
+      }
       $self->{qmail_scanner_env_from} = $envfrom; # hack!
       return 0;
     }
@@ -1318,19 +1320,23 @@ enough:
   # (only handles 'alternative form', not 'preferred form' - to be improved)
   $ip =~ s/^0*:0*:(?:0*:)*ffff:(\d+\.\d+\.\d+\.\d+)$/$1/i;
 
-  $envfrom =~ s/^\s*<*//gs; $envfrom =~ s/>*\s*$//gs;
   $by =~ s/\;$//;
 
   # ensure invalid chars are stripped.  Replace with '!' to flag their
   # presence, though.  NOTE: this means "[1.2.3.4]" IP addr HELO
   # strings, which are legit by RFC-2821, look like "!1.2.3.4!".
   # still useful though.
-  $ip =~ s/[\s\000\#\[\]\(\)\<\>\|]/!/gs;
-  $rdns =~ s/[\s\000\#\[\]\(\)\<\>\|]/!/gs;
-  $helo =~ s/[\s\000\#\[\]\(\)\<\>\|]/!/gs;
-  $by =~ s/[\s\000\#\[\]\(\)\<\>\|]/!/gs;
-  $ident =~ s/[\s\000\#\[\]\(\)\<\>\|]/!/gs;
-  $envfrom =~ s/[\s\000\#\[\]\(\)\<\>\|]/!/gs;
+  my $strip_chars = qr/[\s\000\#\[\]\(\)\<\>\|]/;
+  $ip =~ s/$strip_chars/!/gs;
+  $rdns =~ s/$strip_chars/!/gs;
+  $helo =~ s/$strip_chars/!/gs;
+  $by =~ s/$strip_chars/!/gs;
+  $ident =~ s/$strip_chars/!/gs;
+  if (defined $envfrom) {
+    $envfrom =~ s/^\s*<*//gs;
+    $envfrom =~ s/>*\s*$//gs;
+    $envfrom =~ s/$strip_chars/!/gs;
+  }
 
   my $relay = {
     ip => $ip,
@@ -1377,7 +1383,10 @@ sub make_relay_as_string {
   # of entries must be preserved, so that regexps that assume that
   # e.g. "ip" comes before "helo" will still work.
   #
-  my $asstr = "[ ip=$relay->{ip} rdns=$relay->{rdns} helo=$relay->{helo} by=$relay->{by} ident=$relay->{ident} envfrom=$relay->{envfrom} intl=0 id=$relay->{id} auth=$relay->{auth} msa=0 ]";
+
+  # we could mark envfrom as "undef" if missing? dunno if needed?
+  my $envfrom = $relay->{envfrom} || '';
+  my $asstr = "[ ip=$relay->{ip} rdns=$relay->{rdns} helo=$relay->{helo} by=$relay->{by} ident=$relay->{ident} envfrom=$envfrom intl=0 id=$relay->{id} auth=$relay->{auth} msa=0 ]";
   dbg("received-header: parsed as $asstr");
   $relay->{as_string} = $asstr;
 }

Modified: spamassassin/trunk/lib/Mail/SpamAssassin/PerMsgStatus.pm
URL: http://svn.apache.org/viewvc/spamassassin/trunk/lib/Mail/SpamAssassin/PerMsgStatus.pm?rev=1844628&r1=1844627&r2=1844628&view=diff
==============================================================================
--- spamassassin/trunk/lib/Mail/SpamAssassin/PerMsgStatus.pm (original)
+++ spamassassin/trunk/lib/Mail/SpamAssassin/PerMsgStatus.pm Tue Oct 23 10:07:45 2018
@@ -2836,7 +2836,7 @@ sub get_envelope_from {
     # make sure we get the most recent copy - there can be only one EnvelopeSender.
     $envf = $self->get($self->{conf}->{envelope_sender_header}.":addr",undef);
     # ok if it contains an "@" sign, or is "" (ie. "<>" without the < and >)
-    goto ok if defined $envf && ($envf =~ /\@/ || $envf eq '');
+    goto ok if defined $envf && (index($envf, '@') > 0 || $envf eq '');
     # Warn them if it's configured, but not there or not usable.
     if (defined $envf) {
       chomp $envf;
@@ -2856,17 +2856,19 @@ sub get_envelope_from {
   # if possible... use the last untrusted header, in case there's
   # trusted headers.
   my $lasthop = $self->{relays_untrusted}->[0];
+  my $lasthop_str = 'last untrusted';
   if (!defined $lasthop) {
     # no untrusted headers?  in that case, the message is ALL_TRUSTED.
     # use the first trusted header (ie. the oldest, originating one).
     $lasthop = $self->{relays_trusted}->[-1];
+    $lasthop_str = 'first trusted';
   }
 
   if (defined $lasthop) {
     $envf = $lasthop->{envfrom};
-    # TODO FIXME: Received.pm puts both null senders and absence-of-sender
-    # into the relays array as '', so we can't distinguish them :(
-    if ($envf && ($envf =~ /\@/)) {
+    # ok if it contains an "@" sign, or is "" (ie. "<>" without the < and >)
+    if (defined $envf && (index($envf, '@') > 0 || $envf eq '')) {
+      dbg("message: using $lasthop_str relay envelope-from as EnvelopeFrom: '$envf'");
       goto ok;
     }
   }
@@ -2895,6 +2897,7 @@ sub get_envelope_from {
       dbg("message: X-Envelope-From header found after 1 or more Received lines, cannot trust envelope-from");
       return;
     } else {
+      dbg("message: using X-Envelope-From header as EnvelopeFrom: '$envf'");
       goto ok;
     }
   }
@@ -2906,6 +2909,7 @@ sub get_envelope_from {
     if ($self->get("ALL") =~ /^Received:.*?^Envelope-Sender:/smi) {
       dbg("message: Envelope-Sender header found after 1 or more Received lines, cannot trust envelope-from");
     } else {
+      dbg("message: using Envelope-Sender header as EnvelopeFrom: '$envf'");
       goto ok;
     }
   }
@@ -2923,6 +2927,7 @@ sub get_envelope_from {
     if ($self->get("ALL") =~ /^Received:.*?^Return-Path:/smi) {
       dbg("message: Return-Path header found after 1 or more Received lines, cannot trust envelope-from");
     } else {
+      dbg("message: using Return-Path header as EnvelopeFrom: '$envf'");
       goto ok;
     }
   }