You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spamassassin.apache.org by fe...@apache.org on 2007/07/06 07:29:26 UTC

svn commit: r553740 - in /spamassassin: rules/trunk/sandbox/felicity/70_other.cf trunk/lib/Mail/SpamAssassin/Message.pm trunk/lib/Mail/SpamAssassin/Plugin/MIMEEval.pm trunk/rules/20_head_tests.cf

Author: felicity
Date: Thu Jul  5 22:29:26 2007
New Revision: 553740

URL: http://svn.apache.org/viewvc?view=rev&rev=553740
Log:
promote new INVALID_DATE, make test rules specifically test rules, update the new mime epilogue/etc rules to be more efficient.

Modified:
    spamassassin/rules/trunk/sandbox/felicity/70_other.cf
    spamassassin/trunk/lib/Mail/SpamAssassin/Message.pm
    spamassassin/trunk/lib/Mail/SpamAssassin/Plugin/MIMEEval.pm
    spamassassin/trunk/rules/20_head_tests.cf

Modified: spamassassin/rules/trunk/sandbox/felicity/70_other.cf
URL: http://svn.apache.org/viewvc/spamassassin/rules/trunk/sandbox/felicity/70_other.cf?view=diff&rev=553740&r1=553739&r2=553740
==============================================================================
--- spamassassin/rules/trunk/sandbox/felicity/70_other.cf (original)
+++ spamassassin/rules/trunk/sandbox/felicity/70_other.cf Thu Jul  5 22:29:26 2007
@@ -24,16 +24,17 @@
 # I found a few whitespace issues in the original RE, and I wanted to avoid my
 # two common, but yes invalid, date headers.  specifically / \(GMT\)$/ and
 # / 0000 GMT$/.  dos has / "GMT"$/
-header T_INVALID_DATE		Date !~ /^\s*(?:(?i:Mon|Tue|Wed|Thu|Fri|Sat|Sun),\s)?\s*(?:[12]\d|3[01]|0?[1-9])\s+(?i:Jan|Feb|Ma[ry]|Apr|Ju[nl]|Aug|Sep|Oct|Nov|Dec)\s+(?:19[7-9]\d|2\d{3})\s+(?:[01]?\d|2[0-3])\:[0-5]\d(?::(?:[0-5]\d|60))?(?:\s+[AP]M)?(?:\s+(?:[+-][0-9]{4}|UT|[A-Z]{2,3}T|0000 GMT))?(?:\s*\(.*\))?\s*$/ [if-unset: Wed, 31 Jul 2002 16:41:57 +0200]
-header T_INVALID_DATE2		Date !~ /^\s*(?:(?i:Mon|Tue|Wed|Thu|Fri|Sat|Sun),\s)?\s*(?:[12]\d|3[01]|0?[1-9])\s+(?i:Jan|Feb|Ma[ry]|Apr|Ju[nl]|Aug|Sep|Oct|Nov|Dec)\s+(?:19[7-9]\d|2\d{3})\s+(?:[01]?\d|2[0-3])\:[0-5]\d(?::(?:[0-5]\d|60))?(?:\s+[AP]M)?(?:\s+(?:[+-][0-9]{4}|UT|[A-Z]{2,3}T|0000 GMT|"GMT"))?(?:\s*\(.*\))?\s*$/ [if-unset: Wed, 31 Jul 2002 16:41:57 +0200]
+#header INVALID_DATE		Date !~ /^\s*(?:(?i:Mon|Tue|Wed|Thu|Fri|Sat|Sun),\s)?\s*(?:[12]\d|3[01]|0?[1-9])\s+(?i:Jan|Feb|Ma[ry]|Apr|Ju[nl]|Aug|Sep|Oct|Nov|Dec)\s+(?:19[7-9]\d|2\d{3})\s+(?:[01]?\d|2[0-3])\:[0-5]\d(?::(?:[0-5]\d|60))?\s+(?:[AP]M\s+)?(?:[+-][0-9]{4}|UT|[A-Z]{2,3}T)(?:\s+\(.*\))?\s*$/ [if-unset: Wed, 31 Jul 2002 16:41:57 +0200]
+#header T_INVALID_DATE		Date !~ /^\s*(?:(?i:Mon|Tue|Wed|Thu|Fri|Sat|Sun),\s)?\s*(?:[12]\d|3[01]|0?[1-9])\s+(?i:Jan|Feb|Ma[ry]|Apr|Ju[nl]|Aug|Sep|Oct|Nov|Dec)\s+(?:19[7-9]\d|2\d{3})\s+(?:[01]?\d|2[0-3])\:[0-5]\d(?::(?:[0-5]\d|60))?(?:\s+[AP]M)?(?:\s+(?:[+-][0-9]{4}|UT|[A-Z]{2,3}T|0000 GMT))?(?:\s*\(.*\))?\s*$/ [if-unset: Wed, 31 Jul 2002 16:41:57 +0200]
+#header T_INVALID_DATE2		Date !~ /^\s*(?:(?i:Mon|Tue|Wed|Thu|Fri|Sat|Sun),\s)?\s*(?:[12]\d|3[01]|0?[1-9])\s+(?i:Jan|Feb|Ma[ry]|Apr|Ju[nl]|Aug|Sep|Oct|Nov|Dec)\s+(?:19[7-9]\d|2\d{3})\s+(?:[01]?\d|2[0-3])\:[0-5]\d(?::(?:[0-5]\d|60))?(?:\s+[AP]M)?(?:\s+(?:[+-][0-9]{4}|UT|[A-Z]{2,3}T|0000 GMT|"GMT"))?(?:\s*\(.*\))?\s*$/ [if-unset: Wed, 31 Jul 2002 16:41:57 +0200]
 
 #  4.470   5.2627   0.0000    1.000   1.00    0.00  T_TVD_SILLY_URI_OBFU
 # allow the obfuscation around the tld as well
-body TVD_SILLY_URI_OBFU	m!https?://[a-z0-9-]+\.[a-z0-9-]*\.?[^a-z0-9.:/\s"'\@?\)>-]+[a-z0-9.-]*[a-z]{3}(?:\s|$)!i
+body TVD_SILLY_URI_OBFU		m!https?://[a-z0-9-]+\.[a-z0-9-]*\.?[^a-z0-9.:/\s"'\@?\)>-]+[a-z0-9.-]*[a-z]{3}(?:\s|$)!i
 # much more generic -- since the current spammer(s) are using com domains,
 # let's specifically just look for when they do something like "example*com"
 # or "example-com", etc.
-body T_TVD_SILLY_URI_OBFU_COM	m!https?://[^/]+[^.]com(?:[/\s]|$)!i
+#body T_TVD_SILLY_URI_OBFU_COM	m!https?://[^/]+[^.]com(?:[/\s]|$)!i
 
 ifplugin Mail::SpamAssassin::Plugin::ReplaceTags
 
@@ -247,20 +248,20 @@
 ########################################################################
 loadplugin Mail::SpamAssassin::Plugin::Sandbox::felicity sandbox-felicity.pm
 ifplugin Mail::SpamAssassin::Plugin::Sandbox::felicity
-body QP_LENGTH_77_78	eval:check_quotedprintable_length('77','78')
-body QP_LENGTH_78_79	eval:check_quotedprintable_length('78','79')
-body QP_LENGTH_79_80	eval:check_quotedprintable_length('79','80')
-body QP_LENGTH_80_81	eval:check_quotedprintable_length('80','81')
-body QP_LENGTH_81_82	eval:check_quotedprintable_length('81','82')
-body QP_LENGTH_82_83	eval:check_quotedprintable_length('82','83')
-body QP_LENGTH_83_84	eval:check_quotedprintable_length('83','84')
-body QP_LENGTH_84_85	eval:check_quotedprintable_length('84','85')
-body QP_LENGTH_85_86	eval:check_quotedprintable_length('85','86')
-body QP_LENGTH_86_87	eval:check_quotedprintable_length('86','87')
-body QP_LENGTH_87_88	eval:check_quotedprintable_length('87','88')
-body QP_LENGTH_88_89	eval:check_quotedprintable_length('88','89')
-body QP_LENGTH_89_90	eval:check_quotedprintable_length('89','90')
-body QP_LENGTH_90_INF	eval:check_quotedprintable_length('90')
+body T_QP_LENGTH_77_78	eval:check_quotedprintable_length('77','78')
+body T_QP_LENGTH_78_79	eval:check_quotedprintable_length('78','79')
+body T_QP_LENGTH_79_80	eval:check_quotedprintable_length('79','80')
+body T_QP_LENGTH_80_81	eval:check_quotedprintable_length('80','81')
+body T_QP_LENGTH_81_82	eval:check_quotedprintable_length('81','82')
+body T_QP_LENGTH_82_83	eval:check_quotedprintable_length('82','83')
+body T_QP_LENGTH_83_84	eval:check_quotedprintable_length('83','84')
+body T_QP_LENGTH_84_85	eval:check_quotedprintable_length('84','85')
+body T_QP_LENGTH_85_86	eval:check_quotedprintable_length('85','86')
+body T_QP_LENGTH_86_87	eval:check_quotedprintable_length('86','87')
+body T_QP_LENGTH_87_88	eval:check_quotedprintable_length('87','88')
+body T_QP_LENGTH_88_89	eval:check_quotedprintable_length('88','89')
+body T_QP_LENGTH_89_90	eval:check_quotedprintable_length('89','90')
+body T_QP_LENGTH_90_INF	eval:check_quotedprintable_length('90')
 endif
 ########################################################################
 
@@ -275,7 +276,13 @@
 body BASE64_LENGTH_79_INF	eval:check_base64_length('79')
 
 # 0.177   0.2037   0.0000    1.000   0.00    0.00  TVD_MIME_EPI
-body TVD_MIME_EPI	eval:check_mime_epilogue()
+# with one exception, my FPs are all mails through mailing lists that tack on
+# a footer to all mails, outside the MIME boundary.
+# the one exception is a malformed mail which repeated the text/html
+# part content after the closing boundary.
+#
+body T_TVD_MIME_EPI		eval:check_msg_parse_flags('mime_epilogue_exists')
+body T_TVD_MIME_NO_HEADERS	eval:check_msg_parse_flags('missing_mime_headers')
 
 endif
 
@@ -339,26 +346,26 @@
 #  0.919   1.0598   0.0000    1.000   0.61    0.00  TVD_PDF_02
 #  1.597   1.8161   0.1611    0.919   0.86    0.00  TVD_PDF_03
 #  0.919   1.0598   0.0000    1.000   0.61    0.00  TVD_PDF_20
-header TVD_PDF_01 Message-Id =~ /<[0-9A-F]{8}\.\d{7}\@/
-header TVD_PDF_02 Subject =~ /\.pdf/
-header TVD_PDF_03 Content-Type =~ /boundary="-{12}\d{24}"/
-meta TVD_PDF_20 TVD_PDF_01 && TVD_PDF_02 && TVD_PDF_03
+header T_TVD_PDF_01 Message-Id =~ /<[0-9A-F]{8}\.\d{7}\@/
+header T_TVD_PDF_02 Subject =~ /\.pdf/
+header T_TVD_PDF_03 Content-Type =~ /boundary="-{12}\d{24}"/
+meta T_TVD_PDF_20 T_TVD_PDF_01 && T_TVD_PDF_02 && T_TVD_PDF_03
 
 #  0.919   1.0598   0.0000    1.000   0.61    0.00  TVD_PDF_21
 #  1.133   1.2977   0.0537    0.960   0.71    0.00  TVD_PDF_22
 #  0.919   1.0598   0.0000    1.000   0.61    0.00  TVD_PDF_23
-meta TVD_PDF_21 TVD_PDF_01 && TVD_PDF_02
-meta TVD_PDF_22 TVD_PDF_01 && TVD_PDF_03
-meta TVD_PDF_23 TVD_PDF_02 && TVD_PDF_03
+meta T_TVD_PDF_21 T_TVD_PDF_01 && T_TVD_PDF_02
+meta T_TVD_PDF_22 T_TVD_PDF_01 && T_TVD_PDF_03
+meta T_TVD_PDF_23 T_TVD_PDF_02 && T_TVD_PDF_03
 
 #  1.133   1.2977   0.0537    0.960   0.71    0.00  TVD_PDF_22B
 #  0.919   1.0598   0.0000    1.000   0.61    0.00  TVD_PDF_23B
 #  3.518   3.9094   0.9557    0.804   1.00    0.00  TVD_PDF_03B
 #  1.032   1.1894   0.0000    1.000   0.80    0.00  TVD_PDF_25B
-header TVD_PDF_03B Content-Type =~ /boundary="-{2,}\d{8,}"/
-meta TVD_PDF_22B TVD_PDF_01 && TVD_PDF_03B
-meta TVD_PDF_23B TVD_PDF_02 && TVD_PDF_03B
-meta TVD_PDF_25B TVD_PDF_22B && TVD_PDF_24
+header T_TVD_PDF_03B Content-Type =~ /boundary="-{2,}\d{8,}"/
+meta T_TVD_PDF_22B T_TVD_PDF_01 && T_TVD_PDF_03B
+meta T_TVD_PDF_23B T_TVD_PDF_02 && T_TVD_PDF_03B
+meta T_TVD_PDF_25B T_TVD_PDF_22B && T_TVD_PDF_24
 
 #  1.035   1.1910   0.0107    0.991   0.76    0.00  __TVD_PDF_ATT_AP
 # 74.260  77.0441  56.0399    0.579   0.51    0.00  __TVD_PDF_ATT_TP
@@ -369,17 +376,19 @@
 ifplugin Mail::SpamAssassin::Plugin::MIMEEval
 mimeheader __TVD_PDF_ATT_TP	Content-Type =~ /^text\/plain/i
 mimeheader __TVD_PDF_ATT_AP	Content-Type =~ /^application\/pdf/i
+mimeheader __TVD_PDF_ATT_AO	Content-Type =~ /^application\/octet-stream/i
 endif
-meta TVD_PDF_24 __TVD_PDF_CT_MM && __TVD_PDF_ATT_TP && __TVD_PDF_ATT_AP
-meta TVD_PDF_25 TVD_PDF_22 && TVD_PDF_24
+meta T_TVD_PDF_24 __TVD_PDF_CT_MM && __TVD_PDF_ATT_TP && __TVD_PDF_ATT_AP
+meta T_TVD_PDF_25 T_TVD_PDF_22 && T_TVD_PDF_24
 
 #  1.032   1.1894   0.0000    1.000   0.80    0.00  TVD_PDF_26
 #  1.032   1.1894   0.0000    1.000   0.80    0.00  TVD_PDF_26B
-meta TVD_PDF_26 __TVD_PDF_CT_MM && TVD_PDF_03 && __TVD_PDF_ATT_AP
-meta TVD_PDF_26B __TVD_PDF_CT_MM && TVD_PDF_03B && __TVD_PDF_ATT_AP
+meta T_TVD_PDF_26 __TVD_PDF_CT_MM && T_TVD_PDF_03 && __TVD_PDF_ATT_AP
+meta T_TVD_PDF_26B __TVD_PDF_CT_MM && T_TVD_PDF_03B && __TVD_PDF_ATT_AP
+meta T_TVD_PDF_26C __TVD_PDF_CT_MM && T_TVD_PDF_03B && ( __TVD_PDF_ATT_AP || __TVD_PDF_ATT_AO )
 
 # 98.910  98.7434  100.0000    0.497   0.00    0.00  __TVD_PDF_04
 #  1.025   1.1812   0.0000    1.000   0.00    0.00  TVD_PDF_27
 # body fails due to subject ...
 rawbody __TVD_PDF_04	/\S{4}/
-meta TVD_PDF_27 __TVD_PDF_CT_MM && __TVD_PDF_ATT_TP && __TVD_PDF_ATT_AP && !__TVD_PDF_04
+meta T_TVD_PDF_27 __TVD_PDF_CT_MM && __TVD_PDF_ATT_TP && __TVD_PDF_ATT_AP && !__TVD_PDF_04

Modified: spamassassin/trunk/lib/Mail/SpamAssassin/Message.pm
URL: http://svn.apache.org/viewvc/spamassassin/trunk/lib/Mail/SpamAssassin/Message.pm?view=diff&rev=553740&r1=553739&r2=553740
==============================================================================
--- spamassassin/trunk/lib/Mail/SpamAssassin/Message.pm (original)
+++ spamassassin/trunk/lib/Mail/SpamAssassin/Message.pm Thu Jul  5 22:29:26 2007
@@ -809,6 +809,9 @@
           my ( $key, $value ) = split ( /:\s*/, $header, 2 );
           $part_msg->header( $key, $value );
         }
+	else {
+	  $self->{'missing_mime_headers'} = 1;
+	}
         $in_body = 1;
 
 	# if there's a blank line separator, that's good.  if there isn't,
@@ -841,7 +844,7 @@
   if ($line_count) {
     for(; $line_count > 0; $line_count--) {
       if ($body->[-$line_count] =~ /[^\s.]/) {
-        $self->{mime_epilogue} = 1;
+        $self->{mime_epilogue_exists} = 1;
         last;
       }
     }

Modified: spamassassin/trunk/lib/Mail/SpamAssassin/Plugin/MIMEEval.pm
URL: http://svn.apache.org/viewvc/spamassassin/trunk/lib/Mail/SpamAssassin/Plugin/MIMEEval.pm?view=diff&rev=553740&r1=553739&r2=553740
==============================================================================
--- spamassassin/trunk/lib/Mail/SpamAssassin/Plugin/MIMEEval.pm (original)
+++ spamassassin/trunk/lib/Mail/SpamAssassin/Plugin/MIMEEval.pm Thu Jul  5 22:29:26 2007
@@ -47,7 +47,6 @@
   $self->register_eval_rule("check_for_uppercase");
   $self->register_eval_rule("check_ma_non_text");
   $self->register_eval_rule("check_base64_length");
-  $self->register_eval_rule("check_mime_epilogue");
 
   return $self;
 }
@@ -506,12 +505,6 @@
   }
   
   return $result;
-}
-
-sub check_mime_epilogue {
-  my $self = shift;
-  my $pms  = shift;
-  return defined $pms->{msg}->{mime_epilogue};
 }
 
 1;

Modified: spamassassin/trunk/rules/20_head_tests.cf
URL: http://svn.apache.org/viewvc/spamassassin/trunk/rules/20_head_tests.cf?view=diff&rev=553740&r1=553739&r2=553740
==============================================================================
--- spamassassin/trunk/rules/20_head_tests.cf (original)
+++ spamassassin/trunk/rules/20_head_tests.cf Thu Jul  5 22:29:26 2007
@@ -147,9 +147,15 @@
 # as noted on the dev@ list, ":60" is valid for seconds when there's a leap
 # second (12/31/2005 for instance), so let's accept that as valid.  ISO 8601
 # apparently allows for it.
+# there were a few whitespace issues in the original RE, and I wanted to avoid my
+# two common, but yes invalid, date headers.  specifically / \(GMT\)$/ and
+# / 0000 GMT$/.  dos has / "GMT"$/ - tvd
+# 2.229   2.7267   0.0517    0.981   0.86    0.00  INVALID_DATE
+# 2.263   2.7486   0.1368    0.953   0.78    0.00  INVALID_DATE_OLD
+#
 # WRT the tests, remember that ok and fail are reversed -- so valid dates
 # should be "fail" and invalid dates should be "ok".
-header INVALID_DATE		Date !~ /^\s*(?:(?i:Mon|Tue|Wed|Thu|Fri|Sat|Sun),\s)?\s*(?:[12]\d|3[01]|0?[1-9])\s+(?i:Jan|Feb|Ma[ry]|Apr|Ju[nl]|Aug|Sep|Oct|Nov|Dec)\s+(?:19[7-9]\d|2\d{3})\s+(?:[01]?\d|2[0-3])\:[0-5]\d(?::(?:[0-5]\d|60))?\s+(?:[AP]M\s+)?(?:[+-][0-9]{4}|UT|[A-Z]{2,3}T)(?:\s+\(.*\))?\s*$/ [if-unset: Wed, 31 Jul 2002 16:41:57 +0200]
+header INVALID_DATE		Date !~ /^\s*(?:(?i:Mon|Tue|Wed|Thu|Fri|Sat|Sun),\s)?\s*(?:[12]\d|3[01]|0?[1-9])\s+(?i:Jan|Feb|Ma[ry]|Apr|Ju[nl]|Aug|Sep|Oct|Nov|Dec)\s+(?:19[7-9]\d|2\d{3})\s+(?:[01]?\d|2[0-3])\:[0-5]\d(?::(?:[0-5]\d|60))?(?:\s+[AP]M)?(?:\s+(?:[+-][0-9]{4}|UT|[A-Z]{2,3}T|0000 GMT|"GMT"))?(?:\s*\(.*\))?\s*$/ [if-unset: Wed, 31 Jul 2002 16:41:57 +0200]
 describe INVALID_DATE		Invalid Date: header (not RFC 2822)
 test INVALID_DATE fail    Sat, 31 Dec 2005 23:59:60 -0500
 test INVALID_DATE fail    Wed, 31 Jul 2002 16:41:57 +0200