You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spamassassin.apache.org by jm...@apache.org on 2005/03/12 02:22:59 UTC
svn commit: r157200 - in spamassassin/trunk:
lib/Mail/SpamAssassin/Message/Metadata/Received.pm t/rcvd_parser.t
Author: jm
Date: Fri Mar 11 17:22:58 2005
New Revision: 157200
URL: http://svn.apache.org/viewcvs?view=rev&rev=157200
Log:
bug 4099: add support for some previously-unparseable header formats to clear up the known T_UNPARSEABLE_RELAY ham hits. Also, add support for qmail-scanner adding the envelope-sender data for a handover, since it's useful for SPF testing etc.
Modified:
spamassassin/trunk/lib/Mail/SpamAssassin/Message/Metadata/Received.pm
spamassassin/trunk/t/rcvd_parser.t
Modified: spamassassin/trunk/lib/Mail/SpamAssassin/Message/Metadata/Received.pm
URL: http://svn.apache.org/viewcvs/spamassassin/trunk/lib/Mail/SpamAssassin/Message/Metadata/Received.pm?view=diff&r1=157199&r2=157200
==============================================================================
--- spamassassin/trunk/lib/Mail/SpamAssassin/Message/Metadata/Received.pm (original)
+++ spamassassin/trunk/lib/Mail/SpamAssassin/Message/Metadata/Received.pm Fri Mar 11 17:22:58 2005
@@ -97,10 +97,29 @@
my $LOCALHOST = LOCALHOST;
foreach my $line ( $msg->get_header('Received') ) {
+
+ # qmail-scanner support hack: we may have had one of these set from the
+ # previous (read: more recent) Received header. if so, add it on to this
+ # header's set, since that's the handover it was describing.
+
+ my $qms_env_from;
+ if ($self->{qmail_scanner_env_from}) {
+ $qms_env_from = $self->{qmail_scanner_env_from};
+ delete $self->{qmail_scanner_env_from};
+ }
+
$line =~ s/\n[ \t]+/ /gs;
my $relay = $self->parse_received_line ($line);
+
next unless $relay;
+ # hack for qmail-scanner, as described above; add in the saved
+ # metadata
+ if ($qms_env_from) {
+ $relay->{envfrom} = $qms_env_from;
+ $self->make_relay_as_string($relay);
+ }
+
# trusted_networks matches?
if ($in_trusted && $did_user_specify_trust && !$relay->{auth} && !$trusted->contains_ip ($relay->{ip}))
{
@@ -233,20 +252,20 @@
if ($in_trusted) {
push (@{$self->{relays_trusted}}, $relay);
- $self->{relays_trusted_str} .= $relay->{as_string}." ";
} else {
push (@{$self->{relays_untrusted}}, $relay);
- $self->{relays_untrusted_str} .= $relay->{as_string}." ";
}
}
+ $self->{relays_trusted_str} = join(' ', map { $_->{as_string} }
+ @{$self->{relays_trusted}});
+ $self->{relays_untrusted_str} = join(' ', map { $_->{as_string} }
+ @{$self->{relays_untrusted}});
+
# drop the temp PerMsgStatus object
$self->{dns_pms}->finish();
delete $self->{dns_pms};
- chop ($self->{relays_trusted_str}); # remove trailing ws
- chop ($self->{relays_untrusted_str}); # remove trailing ws
-
# OK, we've now split the relay list into trusted and untrusted.
# add the stringified representation to the message object, so Bayes
@@ -967,6 +986,32 @@
# Fri Feb 07 10:18:12 2003 -0800
if (/^FROM \S+ BY \S+ \; /) { return; }
+ # Internal Amazon traffic
+ # Received: from dc-mail-3102.iad3.amazon.com by mail-store-2001.amazon.com with ESMTP (peer crosscheck: dc-mail-3102.iad3.amazon.com)
+ if (/^from \S+\.amazon\.com by \S+\.amazon\.com with ESMTP \(peer crosscheck: /) { return; }
+
+ # Received: from GWGC6-MTA by gc6.jefferson.co.us with Novell_GroupWise; Tue, 30 Nov 2004 10:09:15 -0700
+ if (/^from [^\.]+ by \S+ with Novell_GroupWise; /) { return; }
+
+ # Received: from no.name.available by [165.224.43.143] via smtpd (for [165.224.216.89]) with ESMTP; Fri, 28 Jan 2005 13:06:39 -0500
+ # Received: from no.name.available by [165.224.216.88] via smtpd (for lists.sourceforge.net [66.35.250.206]) with ESMTP; Fri, 28 Jan 2005 15:42:30 -0500
+ # These are from an internal host protected by a Raptor firewall, to hosts
+ # outside the firewall. We can only ignore the handover since we don't have
+ # enough info in those headers; however, from googling, it appears that
+ # all samples are cases where the handover is safely ignored.
+ if (/^from no\.name\.available by \S+ via smtpd \(for /) { return; }
+
+ # from 156.56.111.196 by blazing.arsecandle.org (envelope-from <ge...@lists.gentoo.org>, uid 502) with qmail-scanner-1.24 (clamdscan: 0.80/594. f-prot: 4.4.2/3.14.11. Clear:RC:0(156.56.111.196):. Processed in 0.288806 secs); 06 Feb 2005 21:11:38 -0000
+ # these are safe to ignore. the previous handover line has the full
+ # details of the handover described here, it's just qmail-scanner
+ # logging a little more.
+ if (/^from \S+ by \S+ \(.{0,100}\) with qmail-scanner/) {
+ $envfrom =~ s/^\s*<*//gs; $envfrom =~ s/>*\s*$//gs;
+ $envfrom =~ s/[\s\0\#\[\]\(\)\<\>\|]/!/gs;
+ $self->{qmail_scanner_env_from} = $envfrom; # hack!
+ return;
+ }
+
# ------------------------------------------------------------------------
# HANDOVERS WE KNOW WE CAN'T DEAL WITH: TCP transmission, but to MTAs that
# just don't log enough info for us to use (ie. no IP address present).
@@ -978,7 +1023,7 @@
# Received: from MATT_LINUX by hippo.star.co.uk via smtpd (for mail.webnote.net [193.120.211.219]) with SMTP; 3 Jul 2002 15:43:50 UT
# Received: from cp-its-ieg01.mail.saic.com by cpmx.mail.saic.com for me@jmason.org; Tue, 23 Jul 2002 14:09:10 -0700
if (/^from \S+ by \S+ (?:with|via|for|\()/) { goto unparseable; }
-
+
# Received: from virtual-access.org by bolero.conactive.com ; Thu, 20 Feb 2003 23:32:58 +0100
if (/^from (\S+) by (\S+) *\;/) {
goto unparseable; # can't trust this
@@ -1002,6 +1047,7 @@
unparseable:
+ dbg("received-header: unparseable: $_");
$self->{num_relays_unparseable}++;
return;
@@ -1094,21 +1140,27 @@
$relay->{rdns} = $rdns;
$relay->{lc_rdns} = lc $rdns;
+ $self->make_relay_as_string($relay);
+
+ my $is_private = ($ip =~ /${IP_PRIVATE}/o);
+ $relay->{ip_private} = $is_private;
+
+ # add it to an internal array so Eval tests can use it
+ return $relay;
+}
+
+sub make_relay_as_string {
+ my ($self, $relay) = @_;
+
# as-string rep. use spaces so things like Bayes can tokenize them easily.
# NOTE: when tokenizing or matching, be sure to note that new
# entries may be added to this string later. However, the *order*
# of entries must be preserved, so that regexps that assume that
# e.g. "ip" comes before "helo" will still work.
#
- my $asstr = "[ ip=$ip rdns=$rdns helo=$helo by=$by ident=$ident envfrom=$envfrom intl=0 id=$id auth=$auth ]";
+ my $asstr = "[ ip=$relay->{ip} rdns=$relay->{rdns} helo=$relay->{helo} by=$relay->{by} ident=$relay->{ident} envfrom=$relay->{envfrom} intl=0 id=$relay->{id} auth=$relay->{auth} ]";
dbg("received-header: parsed as $asstr");
$relay->{as_string} = $asstr;
-
- my $is_private = ($ip =~ /${IP_PRIVATE}/o);
- $relay->{ip_private} = $is_private;
-
- # add it to an internal array so Eval tests can use it
- return $relay;
}
# restart the parse if we find a fetchmail marker or similar.
Modified: spamassassin/trunk/t/rcvd_parser.t
URL: http://svn.apache.org/viewcvs/spamassassin/trunk/t/rcvd_parser.t?view=diff&r1=157199&r2=157200
==============================================================================
--- spamassassin/trunk/t/rcvd_parser.t (original)
+++ spamassassin/trunk/t/rcvd_parser.t Fri Mar 11 17:22:58 2005
@@ -18,7 +18,7 @@
use lib '.'; use lib 't';
use SATest; sa_t_init("rcvd_parser");
-use Test; BEGIN { plan tests => 42 };
+use Test; BEGIN { plan tests => 43 };
use strict;
@@ -156,7 +156,24 @@
} => q{
- [ ip=65.54.245.95 rdns=bay1-f95.bay1.hotmail.com helo=hotmail.com by=Daffy.timing.com ident= envfrom= id= auth= ] [ ip=24.8.231.233 rdns= helo= by=by1fd.bay1.hotmail.msn.com ident= envfrom= id= auth= ]
+ [ ip=65.54.245.95 rdns=bay1-f95.bay1.hotmail.com helo=hotmail.com by=Daffy.timing.com ident= envfrom= id= auth= ] [ ip=24.8.231.233 rdns= helo= by=by1fd.bay1.hotmail.msn.com ident= envfrom= id= auth=HTTP ]
+
+},
+q{
+
+Received: (qmail 22147 invoked by uid 526); 6 Feb 2005 21:11:38 -0000
+Received: from 156.56.111.196 by blazing.arsecandle.org (envelope-from <ge...@lists.gentoo.org>, uid 502) with qmail-scanner-1.24
+ (clamdscan: 0.80/594. f-prot: 4.4.2/3.14.11.
+ Clear:RC:0(156.56.111.196):.
+ Processed in 0.288806 secs); 06 Feb 2005 21:11:38 -0000
+DomainKey-Status: no signature
+Received: from lists.gentoo.org (HELO parrot.gentoo.org) (156.56.111.196)
+ by blazing.arsecandle.org with (DHE-RSA-AES256-SHA encrypted) SMTP; 6 Feb 2005 21:11:37 -0000
+Received: (qmail 3988 invoked by uid 89); 6 Feb 2005 21:11:12 +0000
+
+} => q{
+
+ [ ip=156.56.111.196 rdns=lists.gentoo.org helo=parrot.gentoo.org by=blazing.arsecandle.org ident= envfrom=gentoo-announce-return-530-rod=arsecandle.org@lists.gentoo.org id= auth= ]
},
q{