You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spamassassin.apache.org by fe...@apache.org on 2007/07/06 07:29:26 UTC
svn commit: r553740 - in /spamassassin:
rules/trunk/sandbox/felicity/70_other.cf
trunk/lib/Mail/SpamAssassin/Message.pm
trunk/lib/Mail/SpamAssassin/Plugin/MIMEEval.pm trunk/rules/20_head_tests.cf
Author: felicity
Date: Thu Jul 5 22:29:26 2007
New Revision: 553740
URL: http://svn.apache.org/viewvc?view=rev&rev=553740
Log:
promote new INVALID_DATE, make test rules specifically test rules, update the new mime epilogue/etc rules to be more efficient.
Modified:
spamassassin/rules/trunk/sandbox/felicity/70_other.cf
spamassassin/trunk/lib/Mail/SpamAssassin/Message.pm
spamassassin/trunk/lib/Mail/SpamAssassin/Plugin/MIMEEval.pm
spamassassin/trunk/rules/20_head_tests.cf
Modified: spamassassin/rules/trunk/sandbox/felicity/70_other.cf
URL: http://svn.apache.org/viewvc/spamassassin/rules/trunk/sandbox/felicity/70_other.cf?view=diff&rev=553740&r1=553739&r2=553740
==============================================================================
--- spamassassin/rules/trunk/sandbox/felicity/70_other.cf (original)
+++ spamassassin/rules/trunk/sandbox/felicity/70_other.cf Thu Jul 5 22:29:26 2007
@@ -24,16 +24,17 @@
# I found a few whitespace issues in the original RE, and I wanted to avoid my
# two common, but yes invalid, date headers. specifically / \(GMT\)$/ and
# / 0000 GMT$/. dos has / "GMT"$/
-header T_INVALID_DATE Date !~ /^\s*(?:(?i:Mon|Tue|Wed|Thu|Fri|Sat|Sun),\s)?\s*(?:[12]\d|3[01]|0?[1-9])\s+(?i:Jan|Feb|Ma[ry]|Apr|Ju[nl]|Aug|Sep|Oct|Nov|Dec)\s+(?:19[7-9]\d|2\d{3})\s+(?:[01]?\d|2[0-3])\:[0-5]\d(?::(?:[0-5]\d|60))?(?:\s+[AP]M)?(?:\s+(?:[+-][0-9]{4}|UT|[A-Z]{2,3}T|0000 GMT))?(?:\s*\(.*\))?\s*$/ [if-unset: Wed, 31 Jul 2002 16:41:57 +0200]
-header T_INVALID_DATE2 Date !~ /^\s*(?:(?i:Mon|Tue|Wed|Thu|Fri|Sat|Sun),\s)?\s*(?:[12]\d|3[01]|0?[1-9])\s+(?i:Jan|Feb|Ma[ry]|Apr|Ju[nl]|Aug|Sep|Oct|Nov|Dec)\s+(?:19[7-9]\d|2\d{3})\s+(?:[01]?\d|2[0-3])\:[0-5]\d(?::(?:[0-5]\d|60))?(?:\s+[AP]M)?(?:\s+(?:[+-][0-9]{4}|UT|[A-Z]{2,3}T|0000 GMT|"GMT"))?(?:\s*\(.*\))?\s*$/ [if-unset: Wed, 31 Jul 2002 16:41:57 +0200]
+#header INVALID_DATE Date !~ /^\s*(?:(?i:Mon|Tue|Wed|Thu|Fri|Sat|Sun),\s)?\s*(?:[12]\d|3[01]|0?[1-9])\s+(?i:Jan|Feb|Ma[ry]|Apr|Ju[nl]|Aug|Sep|Oct|Nov|Dec)\s+(?:19[7-9]\d|2\d{3})\s+(?:[01]?\d|2[0-3])\:[0-5]\d(?::(?:[0-5]\d|60))?\s+(?:[AP]M\s+)?(?:[+-][0-9]{4}|UT|[A-Z]{2,3}T)(?:\s+\(.*\))?\s*$/ [if-unset: Wed, 31 Jul 2002 16:41:57 +0200]
+#header T_INVALID_DATE Date !~ /^\s*(?:(?i:Mon|Tue|Wed|Thu|Fri|Sat|Sun),\s)?\s*(?:[12]\d|3[01]|0?[1-9])\s+(?i:Jan|Feb|Ma[ry]|Apr|Ju[nl]|Aug|Sep|Oct|Nov|Dec)\s+(?:19[7-9]\d|2\d{3})\s+(?:[01]?\d|2[0-3])\:[0-5]\d(?::(?:[0-5]\d|60))?(?:\s+[AP]M)?(?:\s+(?:[+-][0-9]{4}|UT|[A-Z]{2,3}T|0000 GMT))?(?:\s*\(.*\))?\s*$/ [if-unset: Wed, 31 Jul 2002 16:41:57 +0200]
+#header T_INVALID_DATE2 Date !~ /^\s*(?:(?i:Mon|Tue|Wed|Thu|Fri|Sat|Sun),\s)?\s*(?:[12]\d|3[01]|0?[1-9])\s+(?i:Jan|Feb|Ma[ry]|Apr|Ju[nl]|Aug|Sep|Oct|Nov|Dec)\s+(?:19[7-9]\d|2\d{3})\s+(?:[01]?\d|2[0-3])\:[0-5]\d(?::(?:[0-5]\d|60))?(?:\s+[AP]M)?(?:\s+(?:[+-][0-9]{4}|UT|[A-Z]{2,3}T|0000 GMT|"GMT"))?(?:\s*\(.*\))?\s*$/ [if-unset: Wed, 31 Jul 2002 16:41:57 +0200]
# 4.470 5.2627 0.0000 1.000 1.00 0.00 T_TVD_SILLY_URI_OBFU
# allow the obfuscation around the tld as well
-body TVD_SILLY_URI_OBFU m!https?://[a-z0-9-]+\.[a-z0-9-]*\.?[^a-z0-9.:/\s"'\@?\)>-]+[a-z0-9.-]*[a-z]{3}(?:\s|$)!i
+body TVD_SILLY_URI_OBFU m!https?://[a-z0-9-]+\.[a-z0-9-]*\.?[^a-z0-9.:/\s"'\@?\)>-]+[a-z0-9.-]*[a-z]{3}(?:\s|$)!i
# much more generic -- since the current spammer(s) are using com domains,
# let's specifically just look for when they do something like "example*com"
# or "example-com", etc.
-body T_TVD_SILLY_URI_OBFU_COM m!https?://[^/]+[^.]com(?:[/\s]|$)!i
+#body T_TVD_SILLY_URI_OBFU_COM m!https?://[^/]+[^.]com(?:[/\s]|$)!i
ifplugin Mail::SpamAssassin::Plugin::ReplaceTags
@@ -247,20 +248,20 @@
########################################################################
loadplugin Mail::SpamAssassin::Plugin::Sandbox::felicity sandbox-felicity.pm
ifplugin Mail::SpamAssassin::Plugin::Sandbox::felicity
-body QP_LENGTH_77_78 eval:check_quotedprintable_length('77','78')
-body QP_LENGTH_78_79 eval:check_quotedprintable_length('78','79')
-body QP_LENGTH_79_80 eval:check_quotedprintable_length('79','80')
-body QP_LENGTH_80_81 eval:check_quotedprintable_length('80','81')
-body QP_LENGTH_81_82 eval:check_quotedprintable_length('81','82')
-body QP_LENGTH_82_83 eval:check_quotedprintable_length('82','83')
-body QP_LENGTH_83_84 eval:check_quotedprintable_length('83','84')
-body QP_LENGTH_84_85 eval:check_quotedprintable_length('84','85')
-body QP_LENGTH_85_86 eval:check_quotedprintable_length('85','86')
-body QP_LENGTH_86_87 eval:check_quotedprintable_length('86','87')
-body QP_LENGTH_87_88 eval:check_quotedprintable_length('87','88')
-body QP_LENGTH_88_89 eval:check_quotedprintable_length('88','89')
-body QP_LENGTH_89_90 eval:check_quotedprintable_length('89','90')
-body QP_LENGTH_90_INF eval:check_quotedprintable_length('90')
+body T_QP_LENGTH_77_78 eval:check_quotedprintable_length('77','78')
+body T_QP_LENGTH_78_79 eval:check_quotedprintable_length('78','79')
+body T_QP_LENGTH_79_80 eval:check_quotedprintable_length('79','80')
+body T_QP_LENGTH_80_81 eval:check_quotedprintable_length('80','81')
+body T_QP_LENGTH_81_82 eval:check_quotedprintable_length('81','82')
+body T_QP_LENGTH_82_83 eval:check_quotedprintable_length('82','83')
+body T_QP_LENGTH_83_84 eval:check_quotedprintable_length('83','84')
+body T_QP_LENGTH_84_85 eval:check_quotedprintable_length('84','85')
+body T_QP_LENGTH_85_86 eval:check_quotedprintable_length('85','86')
+body T_QP_LENGTH_86_87 eval:check_quotedprintable_length('86','87')
+body T_QP_LENGTH_87_88 eval:check_quotedprintable_length('87','88')
+body T_QP_LENGTH_88_89 eval:check_quotedprintable_length('88','89')
+body T_QP_LENGTH_89_90 eval:check_quotedprintable_length('89','90')
+body T_QP_LENGTH_90_INF eval:check_quotedprintable_length('90')
endif
########################################################################
@@ -275,7 +276,13 @@
body BASE64_LENGTH_79_INF eval:check_base64_length('79')
# 0.177 0.2037 0.0000 1.000 0.00 0.00 TVD_MIME_EPI
-body TVD_MIME_EPI eval:check_mime_epilogue()
+# with one exception, my FPs are all mails through mailing lists that tack on
+# a footer to all mails, outside the MIME boundary.
+# the one exception is a malformed mail which repeated the text/html
+# part content after the closing boundary.
+#
+body T_TVD_MIME_EPI eval:check_msg_parse_flags('mime_epilogue_exists')
+body T_TVD_MIME_NO_HEADERS eval:check_msg_parse_flags('missing_mime_headers')
endif
@@ -339,26 +346,26 @@
# 0.919 1.0598 0.0000 1.000 0.61 0.00 TVD_PDF_02
# 1.597 1.8161 0.1611 0.919 0.86 0.00 TVD_PDF_03
# 0.919 1.0598 0.0000 1.000 0.61 0.00 TVD_PDF_20
-header TVD_PDF_01 Message-Id =~ /<[0-9A-F]{8}\.\d{7}\@/
-header TVD_PDF_02 Subject =~ /\.pdf/
-header TVD_PDF_03 Content-Type =~ /boundary="-{12}\d{24}"/
-meta TVD_PDF_20 TVD_PDF_01 && TVD_PDF_02 && TVD_PDF_03
+header T_TVD_PDF_01 Message-Id =~ /<[0-9A-F]{8}\.\d{7}\@/
+header T_TVD_PDF_02 Subject =~ /\.pdf/
+header T_TVD_PDF_03 Content-Type =~ /boundary="-{12}\d{24}"/
+meta T_TVD_PDF_20 T_TVD_PDF_01 && T_TVD_PDF_02 && T_TVD_PDF_03
# 0.919 1.0598 0.0000 1.000 0.61 0.00 TVD_PDF_21
# 1.133 1.2977 0.0537 0.960 0.71 0.00 TVD_PDF_22
# 0.919 1.0598 0.0000 1.000 0.61 0.00 TVD_PDF_23
-meta TVD_PDF_21 TVD_PDF_01 && TVD_PDF_02
-meta TVD_PDF_22 TVD_PDF_01 && TVD_PDF_03
-meta TVD_PDF_23 TVD_PDF_02 && TVD_PDF_03
+meta T_TVD_PDF_21 T_TVD_PDF_01 && T_TVD_PDF_02
+meta T_TVD_PDF_22 T_TVD_PDF_01 && T_TVD_PDF_03
+meta T_TVD_PDF_23 T_TVD_PDF_02 && T_TVD_PDF_03
# 1.133 1.2977 0.0537 0.960 0.71 0.00 TVD_PDF_22B
# 0.919 1.0598 0.0000 1.000 0.61 0.00 TVD_PDF_23B
# 3.518 3.9094 0.9557 0.804 1.00 0.00 TVD_PDF_03B
# 1.032 1.1894 0.0000 1.000 0.80 0.00 TVD_PDF_25B
-header TVD_PDF_03B Content-Type =~ /boundary="-{2,}\d{8,}"/
-meta TVD_PDF_22B TVD_PDF_01 && TVD_PDF_03B
-meta TVD_PDF_23B TVD_PDF_02 && TVD_PDF_03B
-meta TVD_PDF_25B TVD_PDF_22B && TVD_PDF_24
+header T_TVD_PDF_03B Content-Type =~ /boundary="-{2,}\d{8,}"/
+meta T_TVD_PDF_22B T_TVD_PDF_01 && T_TVD_PDF_03B
+meta T_TVD_PDF_23B T_TVD_PDF_02 && T_TVD_PDF_03B
+meta T_TVD_PDF_25B T_TVD_PDF_22B && T_TVD_PDF_24
# 1.035 1.1910 0.0107 0.991 0.76 0.00 __TVD_PDF_ATT_AP
# 74.260 77.0441 56.0399 0.579 0.51 0.00 __TVD_PDF_ATT_TP
@@ -369,17 +376,19 @@
ifplugin Mail::SpamAssassin::Plugin::MIMEEval
mimeheader __TVD_PDF_ATT_TP Content-Type =~ /^text\/plain/i
mimeheader __TVD_PDF_ATT_AP Content-Type =~ /^application\/pdf/i
+mimeheader __TVD_PDF_ATT_AO Content-Type =~ /^application\/octet-stream/i
endif
-meta TVD_PDF_24 __TVD_PDF_CT_MM && __TVD_PDF_ATT_TP && __TVD_PDF_ATT_AP
-meta TVD_PDF_25 TVD_PDF_22 && TVD_PDF_24
+meta T_TVD_PDF_24 __TVD_PDF_CT_MM && __TVD_PDF_ATT_TP && __TVD_PDF_ATT_AP
+meta T_TVD_PDF_25 T_TVD_PDF_22 && T_TVD_PDF_24
# 1.032 1.1894 0.0000 1.000 0.80 0.00 TVD_PDF_26
# 1.032 1.1894 0.0000 1.000 0.80 0.00 TVD_PDF_26B
-meta TVD_PDF_26 __TVD_PDF_CT_MM && TVD_PDF_03 && __TVD_PDF_ATT_AP
-meta TVD_PDF_26B __TVD_PDF_CT_MM && TVD_PDF_03B && __TVD_PDF_ATT_AP
+meta T_TVD_PDF_26 __TVD_PDF_CT_MM && T_TVD_PDF_03 && __TVD_PDF_ATT_AP
+meta T_TVD_PDF_26B __TVD_PDF_CT_MM && T_TVD_PDF_03B && __TVD_PDF_ATT_AP
+meta T_TVD_PDF_26C __TVD_PDF_CT_MM && T_TVD_PDF_03B && ( __TVD_PDF_ATT_AP || __TVD_PDF_ATT_AO )
# 98.910 98.7434 100.0000 0.497 0.00 0.00 __TVD_PDF_04
# 1.025 1.1812 0.0000 1.000 0.00 0.00 TVD_PDF_27
# body fails due to subject ...
rawbody __TVD_PDF_04 /\S{4}/
-meta TVD_PDF_27 __TVD_PDF_CT_MM && __TVD_PDF_ATT_TP && __TVD_PDF_ATT_AP && !__TVD_PDF_04
+meta T_TVD_PDF_27 __TVD_PDF_CT_MM && __TVD_PDF_ATT_TP && __TVD_PDF_ATT_AP && !__TVD_PDF_04
Modified: spamassassin/trunk/lib/Mail/SpamAssassin/Message.pm
URL: http://svn.apache.org/viewvc/spamassassin/trunk/lib/Mail/SpamAssassin/Message.pm?view=diff&rev=553740&r1=553739&r2=553740
==============================================================================
--- spamassassin/trunk/lib/Mail/SpamAssassin/Message.pm (original)
+++ spamassassin/trunk/lib/Mail/SpamAssassin/Message.pm Thu Jul 5 22:29:26 2007
@@ -809,6 +809,9 @@
my ( $key, $value ) = split ( /:\s*/, $header, 2 );
$part_msg->header( $key, $value );
}
+ else {
+ $self->{'missing_mime_headers'} = 1;
+ }
$in_body = 1;
# if there's a blank line separator, that's good. if there isn't,
@@ -841,7 +844,7 @@
if ($line_count) {
for(; $line_count > 0; $line_count--) {
if ($body->[-$line_count] =~ /[^\s.]/) {
- $self->{mime_epilogue} = 1;
+ $self->{mime_epilogue_exists} = 1;
last;
}
}
Modified: spamassassin/trunk/lib/Mail/SpamAssassin/Plugin/MIMEEval.pm
URL: http://svn.apache.org/viewvc/spamassassin/trunk/lib/Mail/SpamAssassin/Plugin/MIMEEval.pm?view=diff&rev=553740&r1=553739&r2=553740
==============================================================================
--- spamassassin/trunk/lib/Mail/SpamAssassin/Plugin/MIMEEval.pm (original)
+++ spamassassin/trunk/lib/Mail/SpamAssassin/Plugin/MIMEEval.pm Thu Jul 5 22:29:26 2007
@@ -47,7 +47,6 @@
$self->register_eval_rule("check_for_uppercase");
$self->register_eval_rule("check_ma_non_text");
$self->register_eval_rule("check_base64_length");
- $self->register_eval_rule("check_mime_epilogue");
return $self;
}
@@ -506,12 +505,6 @@
}
return $result;
-}
-
-sub check_mime_epilogue {
- my $self = shift;
- my $pms = shift;
- return defined $pms->{msg}->{mime_epilogue};
}
1;
Modified: spamassassin/trunk/rules/20_head_tests.cf
URL: http://svn.apache.org/viewvc/spamassassin/trunk/rules/20_head_tests.cf?view=diff&rev=553740&r1=553739&r2=553740
==============================================================================
--- spamassassin/trunk/rules/20_head_tests.cf (original)
+++ spamassassin/trunk/rules/20_head_tests.cf Thu Jul 5 22:29:26 2007
@@ -147,9 +147,15 @@
# as noted on the dev@ list, ":60" is valid for seconds when there's a leap
# second (12/31/2005 for instance), so let's accept that as valid. ISO 8601
# apparently allows for it.
+# there were a few whitespace issues in the original RE, and I wanted to avoid my
+# two common, but yes invalid, date headers. specifically / \(GMT\)$/ and
+# / 0000 GMT$/. dos has / "GMT"$/ - tvd
+# 2.229 2.7267 0.0517 0.981 0.86 0.00 INVALID_DATE
+# 2.263 2.7486 0.1368 0.953 0.78 0.00 INVALID_DATE_OLD
+#
# WRT the tests, remember that ok and fail are reversed -- so valid dates
# should be "fail" and invalid dates should be "ok".
-header INVALID_DATE Date !~ /^\s*(?:(?i:Mon|Tue|Wed|Thu|Fri|Sat|Sun),\s)?\s*(?:[12]\d|3[01]|0?[1-9])\s+(?i:Jan|Feb|Ma[ry]|Apr|Ju[nl]|Aug|Sep|Oct|Nov|Dec)\s+(?:19[7-9]\d|2\d{3})\s+(?:[01]?\d|2[0-3])\:[0-5]\d(?::(?:[0-5]\d|60))?\s+(?:[AP]M\s+)?(?:[+-][0-9]{4}|UT|[A-Z]{2,3}T)(?:\s+\(.*\))?\s*$/ [if-unset: Wed, 31 Jul 2002 16:41:57 +0200]
+header INVALID_DATE Date !~ /^\s*(?:(?i:Mon|Tue|Wed|Thu|Fri|Sat|Sun),\s)?\s*(?:[12]\d|3[01]|0?[1-9])\s+(?i:Jan|Feb|Ma[ry]|Apr|Ju[nl]|Aug|Sep|Oct|Nov|Dec)\s+(?:19[7-9]\d|2\d{3})\s+(?:[01]?\d|2[0-3])\:[0-5]\d(?::(?:[0-5]\d|60))?(?:\s+[AP]M)?(?:\s+(?:[+-][0-9]{4}|UT|[A-Z]{2,3}T|0000 GMT|"GMT"))?(?:\s*\(.*\))?\s*$/ [if-unset: Wed, 31 Jul 2002 16:41:57 +0200]
describe INVALID_DATE Invalid Date: header (not RFC 2822)
test INVALID_DATE fail Sat, 31 Dec 2005 23:59:60 -0500
test INVALID_DATE fail Wed, 31 Jul 2002 16:41:57 +0200