You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spamassassin.apache.org by jm...@apache.org on 2004/01/20 08:55:55 UTC

svn commit: rev 6234 - in incubator/spamassassin/trunk: lib/Mail/SpamAssassin rules

Author: jm
Date: Mon Jan 19 23:55:55 2004
New Revision: 6234

Modified:
   incubator/spamassassin/trunk/lib/Mail/SpamAssassin/EvalTests.pm
   incubator/spamassassin/trunk/lib/Mail/SpamAssassin/Received.pm
   incubator/spamassassin/trunk/rules/70_cvs_rules_under_test.cf
Log:
more SPF work; added test for numeric HELOs as the non-first Received line; added test to find images/hrefs to terra.es, which seems to have a serious problem with hosting spammers

Modified: incubator/spamassassin/trunk/lib/Mail/SpamAssassin/EvalTests.pm
==============================================================================
--- incubator/spamassassin/trunk/lib/Mail/SpamAssassin/EvalTests.pm	(original)
+++ incubator/spamassassin/trunk/lib/Mail/SpamAssassin/EvalTests.pm	Mon Jan 19 23:55:55 2004
@@ -3110,16 +3110,27 @@
 
 ###########################################################################
 
+sub check_all_trusted {
+  my ($self) = @_;
+  if ($self->{num_relays_untrusted} > 0) {
+    return 0;
+  } else {
+    return 1;
+  }
+}
+
+###########################################################################
+
 # SPF support
 sub check_for_spf_pass {
   my ($self) = @_;
-  $self->_check_spf() unless $self->{spf_checked};
+  $self->_check_spf(0) unless $self->{spf_checked};
   $self->{spf_pass};
 }
 
 sub check_for_spf_fail {
   my ($self) = @_;
-  $self->_check_spf() unless $self->{spf_checked};
+  $self->_check_spf(0) unless $self->{spf_checked};
   if ($self->{spf_failure_comment}) {
     $self->test_log ($self->{spf_failure_comment});
   }
@@ -3128,30 +3139,54 @@
 
 sub check_for_spf_softfail {
   my ($self) = @_;
-  $self->_check_spf() unless $self->{spf_checked};
+  $self->_check_spf(0) unless $self->{spf_checked};
   if ($self->{spf_failure_comment}) {
     $self->test_log ($self->{spf_failure_comment});
   }
   $self->{spf_softfail};
 }
 
-sub check_all_trusted {
+sub check_for_spf_helo_pass {
   my ($self) = @_;
-  if ($self->{num_relays_untrusted} > 0) {
-    return 0;
-  } else {
-    return 1;
+  $self->_check_spf(1) unless $self->{spf_helo_checked};
+  $self->{spf_helo_pass};
+}
+
+sub check_for_spf_helo_fail {
+  my ($self) = @_;
+  $self->_check_spf(1) unless $self->{spf_helo_checked};
+  if ($self->{spf_helo_failure_comment}) {
+    $self->test_log ($self->{spf_helo_failure_comment});
   }
+  $self->{spf_helo_fail};
 }
 
-sub _check_spf {
+sub check_for_spf_helo_softfail {
   my ($self) = @_;
+  $self->_check_spf(1) unless $self->{spf_helo_checked};
+  if ($self->{spf_helo_failure_comment}) {
+    $self->test_log ($self->{spf_helo_failure_comment});
+  }
+  $self->{spf_helo_softfail};
+}
+
+sub _check_spf {
+  my ($self, $ishelo) = @_;
+
+  if ($ishelo) {
+    $self->{spf_helo_checked} = 1;
+    $self->{spf_helo_pass} = 0;
+    $self->{spf_helo_fail} = 0;
+    $self->{spf_helo_softfail} = 0;
+    $self->{spf_helo_failure_comment} = undef;
+  } else {
+    $self->{spf_checked} = 1;
+    $self->{spf_pass} = 0;
+    $self->{spf_fail} = 0;
+    $self->{spf_softfail} = 0;
+    $self->{spf_failure_comment} = undef;
+  }
 
-  $self->{spf_checked} = 1;
-  $self->{spf_pass} = 0;
-  $self->{spf_fail} = 0;
-  $self->{spf_softfail} = 0;
-  $self->{spf_failure_comment} = undef;
   return unless $self->is_dns_available();
 
   my $lasthop;
@@ -3198,14 +3233,38 @@
 
   my $ip = $lasthop->{ip};
   my $helo = $lasthop->{helo};
-  my $sender = $lasthop->{envfrom};
+  my $sender = '';
 
-  if (!$use_helo && !$sender) {
-    # we can (apparently) use whatever the current Envelope-From was,
-    # from the Return-Path, X-Envelope-From, or whatever header.
-    # it's better to get it from Received though, as that is updated
-    # hop-by-hop.
-    $sender = $self->get ("EnvelopeFrom");
+  if ($ishelo) {
+    dbg ("SPF: checking HELO");
+
+    # drop any hostname parts, if we can.
+if (1 && $helo) {
+    my @domparts = split (/\./, $helo);
+    my $numparts = scalar @domparts;
+
+    if ($numparts > 0) {
+      my $partsreqd = 2;
+      if (Mail::SpamAssassin::PerMsgStatus::is_in_subdelegated_cctld ($helo)) {
+        $partsreqd = 3;
+      }
+
+      if ($numparts >= $partsreqd) { $helo =~ s/^[^\.]+\.//; }
+    }
+}
+
+  } else {
+    if ($use_helo) { return; }	# we can't use the env-from reliably
+
+    dbg ("SPF: checking EnvelopeFrom");
+    $sender = $lasthop->{envfrom};
+    if (!$sender) {
+      # we can (apparently) use whatever the current Envelope-From was,
+      # from the Return-Path, X-Envelope-From, or whatever header.
+      # it's better to get it from Received though, as that is updated
+      # hop-by-hop.
+      $sender = $self->get ("EnvelopeFrom");
+    }
   }
 
   # if $sender is undef or "", that's OK; Mail::SPF::Query will use
@@ -3235,16 +3294,22 @@
   $comment ||= '';
   $comment =~ s/\s+/ /gs;	# no newlines please
 
-  if ($result eq 'pass') {
-    $self->{spf_pass} = 1;
-  } elsif ($result eq 'fail') {
-    $self->{spf_fail} = 1;
-  } elsif ($result eq 'softfail') {
-    $self->{spf_softfail} = 1;
-  }
+  if ($ishelo) {
+    if ($result eq 'pass') { $self->{spf_helo_pass} = 1; }
+    elsif ($result eq 'fail') { $self->{spf_helo_fail} = 1; }
+    elsif ($result eq 'softfail') { $self->{spf_helo_softfail} = 1; }
 
-  if ($self->{spf_fail} || $self->{spf_softfail}) {
-    $self->{spf_failure_comment} = "SPF failed: $comment";
+    if ($result eq 'fail' || $result eq 'softfail') {
+      $self->{spf_helo_failure_comment} = "SPF failed: $comment";
+    }
+  } else {
+    if ($result eq 'pass') { $self->{spf_pass} = 1; }
+    elsif ($result eq 'fail') { $self->{spf_fail} = 1; }
+    elsif ($result eq 'softfail') { $self->{spf_softfail} = 1; }
+
+    if ($result eq 'fail' || $result eq 'softfail') {
+      $self->{spf_failure_comment} = "SPF failed: $comment";
+    }
   }
 
   dbg ("SPF: query for $sender/$ip/$helo: result: $result, comment: $comment");
@@ -3441,6 +3506,7 @@
 
   # untaint the string for paranoia, making sure not to allow \n \0 \' \"
   $hc =~ /^([-A-Za-z0-9\xA0-\xFF:_\/\%\@\.\,\= \*\+]+)$/; $hc = $1;
+  if (!$hc) { return 0; }
 
   my ($ver, $date, $rsrc, $trial);
   ($ver, $date, $rsrc, $trial) = ($hc =~ /(\S+):(\S+):(\S+):(\S+)/ );

Modified: incubator/spamassassin/trunk/lib/Mail/SpamAssassin/Received.pm
==============================================================================
--- incubator/spamassassin/trunk/lib/Mail/SpamAssassin/Received.pm	(original)
+++ incubator/spamassassin/trunk/lib/Mail/SpamAssassin/Received.pm	Mon Jan 19 23:55:55 2004
@@ -1154,6 +1154,11 @@
   $self->{relays} = [ ];
 }
 
+sub is_in_subdelegated_cctld {
+  my ($domain) = @_;
+  return ($domain =~ /\.${CCTLDS_WITH_SUBDELEGATION}$/);
+}
+
 # ---------------------------------------------------------------------------
 
 1;

Modified: incubator/spamassassin/trunk/rules/70_cvs_rules_under_test.cf
==============================================================================
--- incubator/spamassassin/trunk/rules/70_cvs_rules_under_test.cf	(original)
+++ incubator/spamassassin/trunk/rules/70_cvs_rules_under_test.cf	Mon Jan 19 23:55:55 2004
@@ -139,6 +139,16 @@
 tflags T_SPF_SOFTFAIL	net
 score T_SPF_SOFTFAIL	0.1
 
+header T_SPF_HELO_PASS	eval:check_for_spf_helo_pass()
+tflags T_SPF_HELO_PASS	net nice
+score T_SPF_HELO_PASS	-0.1
+header T_SPF_HELO_FAIL	eval:check_for_spf_helo_fail()
+tflags T_SPF_HELO_FAIL	net
+score T_SPF_HELO_FAIL	0.2
+header T_SPF_HELO_SOFTFAIL	eval:check_for_spf_helo_softfail()
+tflags T_SPF_HELO_SOFTFAIL	net
+score T_SPF_HELO_SOFTFAIL	0.1
+
 # Not good, esp. considering how *slow* it runs..
 # 17.640   4.1041  29.0741    0.124   0.69   -0.10  T_ALL_RELAYS_NEAR_MXES
 # However, combined with SBL it might work out useful...
@@ -354,6 +364,9 @@
 # Received headers using "helo=" prefix
 header T_RCVD_NUMERIC_HELO	X-Spam-Relays-Untrusted =~ / helo=\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3} /
 describe T_RCVD_NUMERIC_HELO	Received: contains an IP address used for HELO
+# same but not for first line
+header T_RCVD_NUMERIC_HELO_NOTFIRST	X-Spam-Relays-Untrusted =~ / helo=\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3} .+\[/
+describe T_RCVD_NUMERIC_HELO_NOTFIRST	Received: contains an IP address used for HELO (not first line)
 
 # If the Message-Id claims to be added by one ISP, there should be a Received
 # header which mentions that ISP.
@@ -391,3 +404,8 @@
 body T_MPART_ALT_DIFF_97      eval:multipart_alternative_difference('97', '100')
 body T_MPART_ALT_DIFF_98      eval:multipart_alternative_difference('98', '100')
 body T_MPART_ALT_DIFF_99      eval:multipart_alternative_difference('99', '100')
+
+# 0 nonspam hits, hundreds of spam hits.  Serious problems there
+uri T_TERRA_ES		/terra\.es\//i
+describe T_TERRA_ES	Contains images or links to pages hosted at 'terra.es'
+