You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spamassassin.apache.org by qu...@apache.org on 2004/04/20 22:06:05 UTC

svn commit: rev 10131 - in incubator/spamassassin/trunk: lib/Mail/SpamAssassin rules

Author: quinlan
Date: Tue Apr 20 13:06:04 2004
New Revision: 10131

Modified:
   incubator/spamassassin/trunk/lib/Mail/SpamAssassin/EvalTests.pm
   incubator/spamassassin/trunk/rules/20_head_tests.cf
   incubator/spamassassin/trunk/rules/70_testing.cf
Log:
replace MSGID_FROM_MTA_SHORT with MSGID_FROM_MTA_ID
remove MSGID_FROM_MTA_LATER and MSGID_FROM_MTA_BACKUP
remove URL shortening rules
add T_LOTS_OF_STUFF*
add T_TO_HAS_SPACES*


Modified: incubator/spamassassin/trunk/lib/Mail/SpamAssassin/EvalTests.pm
==============================================================================
--- incubator/spamassassin/trunk/lib/Mail/SpamAssassin/EvalTests.pm	(original)
+++ incubator/spamassassin/trunk/lib/Mail/SpamAssassin/EvalTests.pm	Tue Apr 20 13:06:04 2004
@@ -212,177 +212,14 @@
 }
 
 ###########################################################################
-# tests to detect when the MTA added the Message-ID
-
-sub mta_added_message_id {
-  my ($self, $test) = @_;
-
-  if (!exists $self->{"mta_added_message_id_$test"}) {
-    $self->_mta_added_message_id();
-  }
-  return $self->{"mta_added_message_id_$test"};
-}
-
-sub backup_mx_host {
-  my ($self, $host, $test) = @_;
-
-  # check that DNS is available, if not do not perform this check
-  return 0 unless $self->is_dns_available();
-
-  $self->load_resolver();
-
-  if ($self->{conf}->{check_mx_attempts} < 1) {
-    return 0;
-  }
-
-  # try check_mx_attempts times to protect against temporary outages.
-  # sleep between checks to give the DNS a chance to recover.
-  for my $i (1..$self->{conf}->{check_mx_attempts}) {
-    my @mx = Net::DNS::mx($self->{res}, $host);
-    return 0 unless (scalar @mx);
-    my $primary;
-    my $preference;
-    foreach my $mx (@mx) {
-      if (!defined($primary) || ($mx->preference =~ /^\d+$/ &&
-				 $mx->preference < $primary))
-      {
-	$primary = $mx->preference;
-      }
-      if (lc($mx->exchange) eq lc($test)) {
-	$preference = $mx->preference;
-      }
-    }
-    if (defined($primary) && defined($preference) && $preference > $primary) {
-      return 1;
-    }
-  }
-
-  return 0;
-}
-
-# Please make sure you understand how this test works before changing
-# it, especially to add exemptions which are very unlikely be needed.
-sub _mta_added_message_id {
-  my ($self) = @_;
-
-  $self->{mta_added_message_id_short} = 0;
-  $self->{mta_added_message_id_later} = 0;
-  $self->{mta_added_message_id_backup} = 0;
-
-  # We may get headers with continuations in them, so deal with it ...
-  my @received = grep(/\S/, map { s/\r?\n\s+/ /g; $_; } $self->get('Received'));
-  my $id = $self->get('Resent-Message-ID') || $self->get('Message-ID');
-  return unless defined($id) && $id;
-  my $local = 1;
-
-  # general method to detect local messages
-  my $from = $self->get('From:addr');
-  $from =~ s/.*\@//;
-  $from = ($from =~ m/(\S+\.\S+)\s*$/) ? lc($1) : '';
-
-  # Postfix adds the Message-ID on the second local hop.  Note: this is not
-  # an exemption, this is a special case to classify these hits correctly.
-  if ($#received > 0 &&
-      $received[$#received] =~ /\[127\.0\.0\.1\].+\(Postfix.*?\)/i &&
-      $received[$#received - 1] =~ /\(Postfix, from userid \d+\)/i)
-  {
-    $local = 2;
-  }
-
-  # Message-ID headers added by qmail generally include the current local
-  # date and time instead of an ID, so no exemption is necessary for qmail.
-
-  # Note: these tests intentionally do not exempt localhost!
-  for (my $i = 0; $i <= $#received; $i++) {
-    if ($received[$i] =~ /\sid ([^\s;]{3,})/) {
-      my $received_id = $1;
-
-      if (index($id, $received_id) != -1) {
-	# if: only 1 or 2 hops
-	if ($local > $#received && !($from && $id =~ /\@.*\Q$from\E>/)) {
-	  $self->{mta_added_message_id_short} = 1;
-	}
-	# else: hops after first 1 or 2 hops
-	elsif ($i + $local <= $#received) {
-	  $self->{mta_added_message_id_later} = 1;
-	}
-	# else: first 1 or 2 hops and through a backup MX
-	else {
-	  my $host;
-	  my $test;
-	  if ($received[$i] =~ /\bfor\s\W*([^\s>;]+)/) {
-	    $host = lc($1);
-	    $host =~ s/.*\@//;
-	  }
-	  if ($host && $received[$i] =~ /\bby\s\W*([^\s>;]+)/) {
-	    $test = lc($1);
-	  }
-	  if ($host && $test && $self->backup_mx_host($host, $test)) {
-	    $self->{mta_added_message_id_backup} = 1;
-	  }
-	}
-      }
-    }
-  }
-}
 
 # Message-ID for untrusted message was added by a trusted relay
-sub message_id_from_mta_1 {
+sub message_id_from_mta {
   my ($self) = @_;
 
   my $id = $self->get('MESSAGEID');
-  return unless defined($id) && $id;
-
-  if ($self->{num_relays_untrusted} > 0) {
-    for my $rcvd (@{$self->{relays_untrusted}}[0], @{$self->{relays_trusted}})
-    {
-      return 1 if $rcvd->{id} && (index($id, $rcvd->{id}) != -1);
-    }
-  }
-  return 0;
-}
-
-# Message-ID for untrusted message was added by a trusted relay
-sub message_id_from_mta_2 {
-  my ($self) = @_;
-
-  my $id = $self->get('Resent-Message-ID') || $self->get('Message-ID');
-  return unless defined($id) && $id;
-
-  if ($self->{num_relays_untrusted} > 0) {
-    for my $rcvd (@{$self->{relays_untrusted}}[0], @{$self->{relays_trusted}})
-    {
-      return 1 if $rcvd->{id} && (index($id, $rcvd->{id}) != -1);
-    }
-  }
-  return 0;
-}
-
-
-# Message-ID for untrusted message was added by a trusted relay
-sub message_id_from_mta_3 {
-  my ($self) = @_;
-
-  my $id = $self->get('MESSAGEID');
-  return unless defined($id) && $id;
-
-  if ($self->{num_relays_untrusted} > 0) {
-    for my $rcvd (@{$self->{relays_untrusted}}[0], @{$self->{relays_trusted}})
-    {
-      return 1 if $rcvd->{id} && (index(lc($id), lc($rcvd->{id})) != -1);
-    }
-  }
-  return 0;
-}
-
-# Message-ID for untrusted message was added by a trusted relay
-sub message_id_from_mta_4 {
-  my ($self) = @_;
-
-  my $id = $self->get('Resent-Message-ID') || $self->get('Message-ID');
-  return unless defined($id) && $id;
 
-  if ($self->{num_relays_untrusted} > 0) {
+  if ($id && $self->{num_relays_untrusted} > 0) {
     for my $rcvd (@{$self->{relays_untrusted}}[0], @{$self->{relays_trusted}})
     {
       return 1 if $rcvd->{id} && (index(lc($id), lc($rcvd->{id})) != -1);

Modified: incubator/spamassassin/trunk/rules/20_head_tests.cf
==============================================================================
--- incubator/spamassassin/trunk/rules/20_head_tests.cf	(original)
+++ incubator/spamassassin/trunk/rules/20_head_tests.cf	Tue Apr 20 13:06:04 2004
@@ -149,20 +149,13 @@
 
 ###########################################################################
 
-header MSGID_FROM_MTA_SHORT	eval:mta_added_message_id('short')
-describe MSGID_FROM_MTA_SHORT	Message-Id was added by a relay
-
-header MSGID_FROM_MTA_LATER	eval:mta_added_message_id('later')
-describe MSGID_FROM_MTA_LATER	Message-Id was added by a relay
-
-header MSGID_FROM_MTA_BACKUP	eval:mta_added_message_id('backup')
-describe MSGID_FROM_MTA_BACKUP	Message-Id was added by a relay
-tflags MSGID_FROM_MTA_BACKUP	net
-
-header __MSGID_BEFORE_RECEIVED		ALL =~ /\nMessage-Id:.*\nReceived:/si
-header __MSGID_BEFORE_OKAY		Message-Id =~ /\@[a-z0-9.-]+\.(?:yahoo|wanadoo)(?:\.[a-z]{2,3}){1,2}>/
-meta MSGID_FROM_MTA_HEADER		(__MSGID_BEFORE_RECEIVED && !__MSGID_BEFORE_OKAY)
+header __MSGID_BEFORE_RECEIVED	ALL =~ /\nMessage-Id:.*\nReceived:/si
+header __MSGID_BEFORE_OKAY	Message-Id =~ /\@[a-z0-9.-]+\.(?:yahoo|wanadoo)(?:\.[a-z]{2,3}){1,2}>/
+meta MSGID_FROM_MTA_HEADER	(__MSGID_BEFORE_RECEIVED && !__MSGID_BEFORE_OKAY)
 describe MSGID_FROM_MTA_HEADER	Message-Id was added by a relay
+
+header MSGID_FROM_MTA_ID	eval:message_id_from_mta()
+describe MSGID_FROM_MTA_ID	Message-Id for external message added locally
 
 header MSGID_FROM_MTA_HOTMAIL	Message-Id =~ /<MC\d{1,2}-F{1,2}\w{21,22}\@\S*hotmail\.com>/
 describe MSGID_FROM_MTA_HOTMAIL	Message-Id was added by a hotmail.com relay

Modified: incubator/spamassassin/trunk/rules/70_testing.cf
==============================================================================
--- incubator/spamassassin/trunk/rules/70_testing.cf	(original)
+++ incubator/spamassassin/trunk/rules/70_testing.cf	Tue Apr 20 13:06:04 2004
@@ -477,22 +477,26 @@
 # bug 3262
 body T_NO_COST                    /\bno (?:(?:extra|hidden) )?(?:cost|charge)\b/i
 
-# URL obfuscation services
-uri T_URL_SHORTEN_1		m{http://makeashorterlink\.com/}i
-uri T_URL_SHORTEN_2		m{http://shorl\.com/}i
-uri T_URL_SHORTEN_3		m{http://tinyurl\.com/}i
-uri T_URL_SHORTEN_4		m{http://xrl\.us/}i
-uri T_URL_SHORTEN_5		m{http://snipurl\.com/}i
-uri T_URL_SHORTEN_6		m{http://lin\.kz/}i
-
-# possible replacement for MTA_FROM_MTA_SHORT
-# maybe delete MSGID_FROM_MTA_LATER and MSGID_FROM_MTA_BACKUP too
-header T_MSGID_FROM_MTA_1	eval:message_id_from_mta_1()
-header T_MSGID_FROM_MTA_2	eval:message_id_from_mta_2()
-header T_MSGID_FROM_MTA_3	eval:message_id_from_mta_3()
-header T_MSGID_FROM_MTA_4	eval:message_id_from_mta_4()
-
 # bug 3268
 body T_REDIRECTOR       eval:check_for_http_redirector()
 describe T_REDIRECTOR	Message has HTTP redirector URI
 
+# obviously a URI rule, replaces LOTS_OF_STUFF
+# rawbody seems to do better than uri which is somewhat troubling
+uri T_LOTS_OF_STUFF_1		/\b(?:\d{1,3}[,\.])+\d{3}.{0,20}\b(?:pics|pictures|images|photos|movies)/i
+uri T_LOTS_OF_STUFF_2		/\b(?:\d{1,3}[,\.])+\d{3}.{0,20}\b(?:pics|pictures|images|photos|movies|imgs?)/i
+uri T_LOTS_OF_STUFF_3	/\b(?:\d{1,3}[,\.])+\d{3}.{0,20}\b(?:pics?|pictures?|images?|photos?|movies?|imgs?)/i
+rawbody T_LOTS_OF_STUFF_4	/\b(?:\d{1,3}[,\.])+\d{3}.{0,20}\b(?:pics|pictures|images|photos|movies)/i
+rawbody T_LOTS_OF_STUFF_5	/\b(?:\d{1,3}[,\.])+\d{3}.{0,20}\b(?:pics|pictures|images|photos|movies|imgs?)/i
+rawbody T_LOTS_OF_STUFF_6	/\b(?:\d{1,3}[,\.])+\d{3}.{0,20}\b(?:pics?|pictures?|images?|photos?|movies?|imgs?)/i
+
+# possible replacement rule for TO_HAS_SPACES
+# the old rule tested To:addr but we changed the :addr code, so this is
+# based on my corpus hits from 2.6x.
+header T_TO_HAS_SPACES_1	To =~ /[ \t]$/
+header T_TO_HAS_SPACES_2	To =~ /[ \t]$/m
+
+# just an experiment
+header __MSGID_BEFORE_2RECEIVED	ALL =~ /\nMessage-Id:.*\nReceived:.*\nReceived:/si
+meta T_MSGID_FROM_MTA_HEADER_1	(__MSGID_BEFORE_2RECEIVED && !__MSGID_BEFORE_OKAY)
+meta T_MSGID_FROM_MTA_HEADER_2	(__MSGID_BEFORE_2RECEIVED)