You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spamassassin.apache.org by he...@apache.org on 2018/10/23 10:07:46 UTC
svn commit: r1844628 - in /spamassassin/trunk/lib/Mail/SpamAssassin:
Message/Metadata/Received.pm PerMsgStatus.pm
Author: hege
Date: Tue Oct 23 10:07:45 2018
New Revision: 1844628
URL: http://svn.apache.org/viewvc?rev=1844628&view=rev
Log:
Improve EnvelopeFrom parsing, differentiate empty and missing
Modified:
spamassassin/trunk/lib/Mail/SpamAssassin/Message/Metadata/Received.pm
spamassassin/trunk/lib/Mail/SpamAssassin/PerMsgStatus.pm
Modified: spamassassin/trunk/lib/Mail/SpamAssassin/Message/Metadata/Received.pm
URL: http://svn.apache.org/viewvc/spamassassin/trunk/lib/Mail/SpamAssassin/Message/Metadata/Received.pm?rev=1844628&r1=1844627&r2=1844628&view=diff
==============================================================================
--- spamassassin/trunk/lib/Mail/SpamAssassin/Message/Metadata/Received.pm (original)
+++ spamassassin/trunk/lib/Mail/SpamAssassin/Message/Metadata/Received.pm Tue Oct 23 10:07:45 2018
@@ -53,7 +53,7 @@ use Mail::SpamAssassin::Constants qw(:ip
# ---------------------------------------------------------------------------
sub parse_received_headers {
- my ($self, $permsgstatus, $msg) = @_;
+ my ($self, $pms, $msg) = @_;
my $suppl_attrib = $msg->{suppl_attrib}; # out-of-band info from a caller
@@ -87,11 +87,11 @@ sub parse_received_headers {
$self->{allow_mailfetch_markers} = 1; # This needs to be set for the
# first Received: header
# now figure out what relays are trusted...
- my $trusted = $permsgstatus->{main}->{conf}->{trusted_networks};
- my $internal = $permsgstatus->{main}->{conf}->{internal_networks};
- my $msa = $permsgstatus->{main}->{conf}->{msa_networks};
- my $did_user_specify_trust = $permsgstatus->{main}->{conf}->{trusted_networks_configured};
- my $did_user_specify_internal = $permsgstatus->{main}->{conf}->{internal_networks_configured};
+ my $trusted = $pms->{main}->{conf}->{trusted_networks};
+ my $internal = $pms->{main}->{conf}->{internal_networks};
+ my $msa = $pms->{main}->{conf}->{msa_networks};
+ my $did_user_specify_trust = $pms->{main}->{conf}->{trusted_networks_configured};
+ my $did_user_specify_internal = $pms->{main}->{conf}->{internal_networks_configured};
my $in_trusted = 1;
my $in_internal = 1;
my $found_msa = 0;
@@ -125,8 +125,7 @@ sub parse_received_headers {
# Now add the single line headers like X-Originating-IP. (bug 5680)
# we convert them into synthetic "Received" headers so we can share
# code below.
- for my $header (@{$permsgstatus->{main}->{conf}->{originating_ip_headers}})
- {
+ foreach my $header (@{$pms->{main}->{conf}->{originating_ip_headers}}) {
my $str = $msg->get_header($header);
next unless ($str && $str =~ m/($IP_ADDRESS)/);
push @hdrs, "from X-Originating-IP: $1\n";
@@ -334,7 +333,7 @@ sub parse_received_line {
my $by = '';
my $id = '';
my $ident = '';
- my $envfrom = '';
+ my $envfrom = undef;
my $mta_looked_up_dns = 0;
my $IP_ADDRESS = IP_ADDRESS;
my $IP_PRIVATE = IP_PRIVATE;
@@ -1189,8 +1188,11 @@ sub parse_received_line {
# details of the handover described here, it's just qmail-scanner
# logging a little more.
if (/^\S+ by \S+ \(.{0,100}\) with qmail-scanner/) {
- $envfrom =~ s/^\s*<*//gs; $envfrom =~ s/>*\s*$//gs;
- $envfrom =~ s/[\s\000\#\[\]\(\)\<\>\|]/!/gs;
+ if (defined $envfrom) {
+ $envfrom =~ s/^\s*<*//gs;
+ $envfrom =~ s/>*\s*$//gs;
+ $envfrom =~ s/[\s\000\#\[\]\(\)\<\>\|]/!/gs;
+ }
$self->{qmail_scanner_env_from} = $envfrom; # hack!
return 0;
}
@@ -1318,19 +1320,23 @@ enough:
# (only handles 'alternative form', not 'preferred form' - to be improved)
$ip =~ s/^0*:0*:(?:0*:)*ffff:(\d+\.\d+\.\d+\.\d+)$/$1/i;
- $envfrom =~ s/^\s*<*//gs; $envfrom =~ s/>*\s*$//gs;
$by =~ s/\;$//;
# ensure invalid chars are stripped. Replace with '!' to flag their
# presence, though. NOTE: this means "[1.2.3.4]" IP addr HELO
# strings, which are legit by RFC-2821, look like "!1.2.3.4!".
# still useful though.
- $ip =~ s/[\s\000\#\[\]\(\)\<\>\|]/!/gs;
- $rdns =~ s/[\s\000\#\[\]\(\)\<\>\|]/!/gs;
- $helo =~ s/[\s\000\#\[\]\(\)\<\>\|]/!/gs;
- $by =~ s/[\s\000\#\[\]\(\)\<\>\|]/!/gs;
- $ident =~ s/[\s\000\#\[\]\(\)\<\>\|]/!/gs;
- $envfrom =~ s/[\s\000\#\[\]\(\)\<\>\|]/!/gs;
+ my $strip_chars = qr/[\s\000\#\[\]\(\)\<\>\|]/;
+ $ip =~ s/$strip_chars/!/gs;
+ $rdns =~ s/$strip_chars/!/gs;
+ $helo =~ s/$strip_chars/!/gs;
+ $by =~ s/$strip_chars/!/gs;
+ $ident =~ s/$strip_chars/!/gs;
+ if (defined $envfrom) {
+ $envfrom =~ s/^\s*<*//gs;
+ $envfrom =~ s/>*\s*$//gs;
+ $envfrom =~ s/$strip_chars/!/gs;
+ }
my $relay = {
ip => $ip,
@@ -1377,7 +1383,10 @@ sub make_relay_as_string {
# of entries must be preserved, so that regexps that assume that
# e.g. "ip" comes before "helo" will still work.
#
- my $asstr = "[ ip=$relay->{ip} rdns=$relay->{rdns} helo=$relay->{helo} by=$relay->{by} ident=$relay->{ident} envfrom=$relay->{envfrom} intl=0 id=$relay->{id} auth=$relay->{auth} msa=0 ]";
+
+ # we could mark envfrom as "undef" if missing? dunno if needed?
+ my $envfrom = $relay->{envfrom} || '';
+ my $asstr = "[ ip=$relay->{ip} rdns=$relay->{rdns} helo=$relay->{helo} by=$relay->{by} ident=$relay->{ident} envfrom=$envfrom intl=0 id=$relay->{id} auth=$relay->{auth} msa=0 ]";
dbg("received-header: parsed as $asstr");
$relay->{as_string} = $asstr;
}
Modified: spamassassin/trunk/lib/Mail/SpamAssassin/PerMsgStatus.pm
URL: http://svn.apache.org/viewvc/spamassassin/trunk/lib/Mail/SpamAssassin/PerMsgStatus.pm?rev=1844628&r1=1844627&r2=1844628&view=diff
==============================================================================
--- spamassassin/trunk/lib/Mail/SpamAssassin/PerMsgStatus.pm (original)
+++ spamassassin/trunk/lib/Mail/SpamAssassin/PerMsgStatus.pm Tue Oct 23 10:07:45 2018
@@ -2836,7 +2836,7 @@ sub get_envelope_from {
# make sure we get the most recent copy - there can be only one EnvelopeSender.
$envf = $self->get($self->{conf}->{envelope_sender_header}.":addr",undef);
# ok if it contains an "@" sign, or is "" (ie. "<>" without the < and >)
- goto ok if defined $envf && ($envf =~ /\@/ || $envf eq '');
+ goto ok if defined $envf && (index($envf, '@') > 0 || $envf eq '');
# Warn them if it's configured, but not there or not usable.
if (defined $envf) {
chomp $envf;
@@ -2856,17 +2856,19 @@ sub get_envelope_from {
# if possible... use the last untrusted header, in case there's
# trusted headers.
my $lasthop = $self->{relays_untrusted}->[0];
+ my $lasthop_str = 'last untrusted';
if (!defined $lasthop) {
# no untrusted headers? in that case, the message is ALL_TRUSTED.
# use the first trusted header (ie. the oldest, originating one).
$lasthop = $self->{relays_trusted}->[-1];
+ $lasthop_str = 'first trusted';
}
if (defined $lasthop) {
$envf = $lasthop->{envfrom};
- # TODO FIXME: Received.pm puts both null senders and absence-of-sender
- # into the relays array as '', so we can't distinguish them :(
- if ($envf && ($envf =~ /\@/)) {
+ # ok if it contains an "@" sign, or is "" (ie. "<>" without the < and >)
+ if (defined $envf && (index($envf, '@') > 0 || $envf eq '')) {
+ dbg("message: using $lasthop_str relay envelope-from as EnvelopeFrom: '$envf'");
goto ok;
}
}
@@ -2895,6 +2897,7 @@ sub get_envelope_from {
dbg("message: X-Envelope-From header found after 1 or more Received lines, cannot trust envelope-from");
return;
} else {
+ dbg("message: using X-Envelope-From header as EnvelopeFrom: '$envf'");
goto ok;
}
}
@@ -2906,6 +2909,7 @@ sub get_envelope_from {
if ($self->get("ALL") =~ /^Received:.*?^Envelope-Sender:/smi) {
dbg("message: Envelope-Sender header found after 1 or more Received lines, cannot trust envelope-from");
} else {
+ dbg("message: using Envelope-Sender header as EnvelopeFrom: '$envf'");
goto ok;
}
}
@@ -2923,6 +2927,7 @@ sub get_envelope_from {
if ($self->get("ALL") =~ /^Received:.*?^Return-Path:/smi) {
dbg("message: Return-Path header found after 1 or more Received lines, cannot trust envelope-from");
} else {
+ dbg("message: using Return-Path header as EnvelopeFrom: '$envf'");
goto ok;
}
}