You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spamassassin.apache.org by jh...@apache.org on 2011/08/01 04:34:50 UTC
svn commit: r1152676 - in /spamassassin/trunk/rulesrc/sandbox/jhardin:
20_fillform.cf 20_misc_testing.cf
Author: jhardin
Date: Mon Aug 1 02:34:49 2011
New Revision: 1152676
URL: http://svn.apache.org/viewvc?rev=1152676&view=rev
Log:
Add maxhits=N to rules, do some FP avoidance tweaks
Modified:
spamassassin/trunk/rulesrc/sandbox/jhardin/20_fillform.cf
spamassassin/trunk/rulesrc/sandbox/jhardin/20_misc_testing.cf
Modified: spamassassin/trunk/rulesrc/sandbox/jhardin/20_fillform.cf
URL: http://svn.apache.org/viewvc/spamassassin/trunk/rulesrc/sandbox/jhardin/20_fillform.cf?rev=1152676&r1=1152675&r2=1152676&view=diff
==============================================================================
--- spamassassin/trunk/rulesrc/sandbox/jhardin/20_fillform.cf (original)
+++ spamassassin/trunk/rulesrc/sandbox/jhardin/20_fillform.cf Mon Aug 1 02:34:49 2011
@@ -64,10 +64,10 @@ ifplugin Mail::SpamAssassin::Plugin::Rep
# 5+ fields that body paragraph processing didn't paste together
body __FILL_THIS_FORM_PARTIAL /^\s?<FF_LNNO>?<FF_YOUR>(?:<FF_ALL><ANDOR>?){1,3}<FF_SUFFIX>(?:<FF_BLANK1>|(?:[-=_.,:;*\s]|=20){1,4}$)/im
replace_rules __FILL_THIS_FORM_PARTIAL
- tflags __FILL_THIS_FORM_PARTIAL multiple
+ tflags __FILL_THIS_FORM_PARTIAL multiple maxhits=5
rawbody __FILL_THIS_FORM_PARTIAL_RAW /^(?>\s{0,50})<FF_LNNO>?<FF_YOUR>(?:<FF_ALL><ANDOR>?){1,3}<FF_SUFFIX>(?:<FF_BLANK1>|(?:[-=_.,:;*\s]|=20| |<\/\w+>){0,4}$)/im
replace_rules __FILL_THIS_FORM_PARTIAL_RAW
- tflags __FILL_THIS_FORM_PARTIAL_RAW multiple
+ tflags __FILL_THIS_FORM_PARTIAL_RAW multiple maxhits=5
# 5+ fields in either format
# For easy use in metas
Modified: spamassassin/trunk/rulesrc/sandbox/jhardin/20_misc_testing.cf
URL: http://svn.apache.org/viewvc/spamassassin/trunk/rulesrc/sandbox/jhardin/20_misc_testing.cf?rev=1152676&r1=1152675&r2=1152676&view=diff
==============================================================================
--- spamassassin/trunk/rulesrc/sandbox/jhardin/20_misc_testing.cf (original)
+++ spamassassin/trunk/rulesrc/sandbox/jhardin/20_misc_testing.cf Mon Aug 1 02:34:49 2011
@@ -140,10 +140,10 @@ describe MAILER_EQ_ORG X-M
# observed in UCE 9/2009
#header __HDRS_LCASE ALL =~ /\n(?:Reply-to|Message-id|Content-type|X-MSMail-priority|from|subject|to|Disposition-notification-to):/sm
header __HDRS_LCASE ALL =~ /\n(?:Message-id|Content-type|X-MSMail-priority|from|subject|to|cc|Disposition-notification-to):/sm
-tflags __HDRS_LCASE multiple
-meta HDRS_LCASE __HDRS_LCASE && !__VIA_ML && !__freemail_safe && !__THREADED && !__DOS_HAS_LIST_ID && !__UNUSABLE_MSGID
+tflags __HDRS_LCASE multiple maxhits=2
+meta HDRS_LCASE __HDRS_LCASE && !__VIA_ML && !__freemail_safe && !__THREADED && !__UNUSABLE_MSGID
meta __MANY_HDRS_LCASE __HDRS_LCASE > 1
-meta MANY_HDRS_LCASE __MANY_HDRS_LCASE && !__VIA_ML && !__freemail_safe && !__THREADED && !__DOS_HAS_LIST_ID && !__UNUSABLE_MSGID
+meta MANY_HDRS_LCASE __MANY_HDRS_LCASE && !__VIA_ML && !__freemail_safe && !__THREADED && !__UNUSABLE_MSGID
describe MANY_HDRS_LCASE Odd capitalization of multiple message headers
# Some metas that appear to perform well in masscheck
@@ -246,20 +246,20 @@ body CALL_SKYPE /\b
# <SPAN> tags shouldn't appear in the midst of text
rawbody __SPAN_BEG_TEXT /[a-z]{2}<(?i:span)\s/
-tflags __SPAN_BEG_TEXT multiple
+tflags __SPAN_BEG_TEXT multiple maxhits=5
rawbody __SPAN_END_TEXT /[^;>]<\/(?i:span)>[a-z]{3}/
-tflags __SPAN_END_TEXT multiple
+tflags __SPAN_END_TEXT multiple maxhits=5
meta __MANY_SPAN_IN_TEXT (__SPAN_BEG_TEXT > 4) && (__SPAN_END_TEXT > 4)
meta MANY_SPAN_IN_TEXT __MANY_SPAN_IN_TEXT && !__VIA_ML
describe MANY_SPAN_IN_TEXT Many <SPAN> tags embedded within text
tflags MANY_SPAN_IN_TEXT publish
#score MANY_SPAN_IN_TEXT 2.50
-uri __FEEDPROXY_URI m;http://feedproxy\.google\.com/;i
-rawbody __FEEDPROXY m;http://feedproxy\.google\.com/;i
-tflags __FEEDPROXY multiple
-meta MANY_GOOG_PROXY __FEEDPROXY > 4
-describe MANY_GOOG_PROXY Many Google feedproxy URIs
+#uri __FEEDPROXY_URI m;http://feedproxy\.google\.com/;i
+#rawbody __FEEDPROXY m;http://feedproxy\.google\.com/;i
+#tflags __FEEDPROXY multiple maxhits=5
+#meta MANY_GOOG_PROXY __FEEDPROXY > 4
+#describe MANY_GOOG_PROXY Many Google feedproxy URIs
rawbody TINY_FLOAT /\bstyle\s*=\s*"[^"]{0,40}?(?:(?:FONT-SIZE\s*:\s+\dpx|FLOAT\s*:\s+(?:right|left))(?:;\s+)?(?:(?!(?:FONT-SIZE|FLOAT))\w+:\s+\w+;?\s*)*){2}/i
describe TINY_FLOAT Has small-font floating HTML - text obfuscation?
@@ -369,7 +369,7 @@ describe FROM_URI
# observed in spam feb 2010
# Apparently-To per RFC2821 SHOULD NOT be used
header __APPARENTLY_TO Apparently-To =~ /<.*>/
-tflags __APPARENTLY_TO multiple nopublish
+tflags __APPARENTLY_TO multiple maxhits=21 nopublish
meta HAS_APPARENTLY_TO __APPARENTLY_TO > 0
describe HAS_APPARENTLY_TO Has deprecated Apparently-To header
#score HAS_APPARENTLY_TO 0.50
@@ -490,10 +490,10 @@ describe DATE_DOTS
uri IMAGESHACK_URI /\.imageshack\.us\//i
describe IMAGESHACK_URI URI contains imageshack.us
-uri __DYNDNS_URI /\.dyndns\.org(?:\/.*)?/i
-tflags __DYNDNS_URI multiple
-meta DYNDNS_URIS __DYNDNS_URI > 1
-describe DYNDNS_URIS Has multiple dyndns.org URIs
+#uri __DYNDNS_URI /\.dyndns\.org(?:\/.*)?/i
+#tflags __DYNDNS_URI multiple maxhits=2
+#meta DYNDNS_URIS __DYNDNS_URI > 1
+#describe DYNDNS_URIS Has multiple dyndns.org URIs
uri __BITLY_URI /\/\/bit\.ly\//i
#describe __BITLY_URI URI contains bit.ly
@@ -523,18 +523,18 @@ header RPT_SPAM_HDR
#header LONG_FROM From =~ /<[^<@]{40,}\w\@/
-if can(Mail::SpamAssassin::Conf::feature_bug6558_free)
- body __MANY_RECORDS_1 /\s[A-Z][a-z]{1,30}s(?:\sDatabase)?[-:\s]{2,5}(?i:1\smillion\s|\d[\d,.]{1,8}[Kk]?\s(?i:thousand\s|million\s)?)(?i:total\s|full\sdata\s)?(?i:email|record)s/
- tflags __MANY_RECORDS_1 multiple
- body __MANY_RECORDS_2 /\W{1,4}\s(?:[a-z\/]{1,20}\s){0,4}(?:doctor|physician|provider|therapist|counselor|dentist|veterinarian|clinic|hospital|agent|chiropractor|psychologist|companie|supplier)s/i
- tflags __MANY_RECORDS_2 multiple
- body __MANY_RECORDS_3 /\W{1,4}\s(?:(?:[A-Z]{1,2}[a-z\/]{0,20}|and)\s){0,4}[A-Z][a-z]{1,20}s Database/
- tflags __MANY_RECORDS_3 multiple
- #meta BIG_LISTS (__MANY_RECORDS_1 + __MANY_RECORDS_2 + __MANY_RECORDS_3) > 5
- meta __MANY_BIG_LISTS (__MANY_RECORDS_1 + __MANY_RECORDS_2 + __MANY_RECORDS_3) > 15
- meta MANY_BIG_LISTS __MANY_BIG_LISTS && !HTML_MESSAGE && !__CTYPE_MULTIPART_ANY && !__HS_SUBJ_RE_FW && !__HAS_THREAD_INDEX
- describe MANY_BIG_LISTS Lots of mailing lists / databases available!
-endif
+#if can(Mail::SpamAssassin::Conf::feature_bug6558_free)
+# body __MANY_RECORDS_1 /\s[A-Z][a-z]{1,30}s(?:\sDatabase)?[-:\s]{2,5}(?i:1\smillion\s|\d[\d,.]{1,8}[Kk]?\s(?i:thousand\s|million\s)?)(?i:total\s|full\sdata\s)?(?i:email|record)s/
+# tflags __MANY_RECORDS_1 multiple maxhits=16
+# body __MANY_RECORDS_2 /\W{1,4}\s(?:[a-z\/]{1,20}\s){0,4}(?:doctor|physician|provider|therapist|counselor|dentist|veterinarian|clinic|hospital|agent|chiropractor|psychologist|companie|supplier)s/i
+# tflags __MANY_RECORDS_2 multiple maxhits=16
+# body __MANY_RECORDS_3 /\W{1,4}\s(?:(?:[A-Z]{1,2}[a-z\/]{0,20}|and)\s){0,4}[A-Z][a-z]{1,20}s Database/
+# tflags __MANY_RECORDS_3 multiple maxhits=16
+# #meta BIG_LISTS (__MANY_RECORDS_1 + __MANY_RECORDS_2 + __MANY_RECORDS_3) > 5
+# meta __MANY_BIG_LISTS (__MANY_RECORDS_1 + __MANY_RECORDS_2 + __MANY_RECORDS_3) > 15
+# meta MANY_BIG_LISTS __MANY_BIG_LISTS && !HTML_MESSAGE && !__CTYPE_MULTIPART_ANY && !__HS_SUBJ_RE_FW && !__HAS_THREAD_INDEX
+# describe MANY_BIG_LISTS Lots of mailing lists / databases available!
+#endif
# Suggested by Gerard Z 2010-08-15
@@ -603,19 +603,11 @@ header ART_NAMES_ORG Recei
#score ART_NAMES_ORG 4.0
describe ART_NAMES_ORG Arthur Simmons - registrar spammer extraordinaire
-# Causes infinite loops if compiled on some systems (users list 2011-03-20)
-#body __PILL_PRICE_1_EVIL m;\$?[\d .]{3,8}(?:/|per|each) ?(?:pill|tablet|cap(?:sule|let));i
-#body __PILL_PRICE_2_EVIL /(?:pill|tablet|cap(?:sule|let))s \$?[\d .]{3,8}/i
-#body __PILL_PRICE_3_EVIL /free (?:pill|tablet|cap(?:sule|let))s/i
-#tflags __PILL_PRICE_1_EVIL multiple
-#tflags __PILL_PRICE_2_EVIL multiple
-#tflags __PILL_PRICE_3_EVIL multiple
-#meta MANY_PILL_PRICE (__PILL_PRICE_1_EVIL + __PILL_PRICE_2_EVIL + __PILL_PRICE_3_EVIL) > 2
if can(Mail::SpamAssassin::Conf::feature_bug6558_free)
body __PILL_PRICE_01 m;(?=[\d .f])(?:free|[\d .]{3}(?:/|per|each)) ?(?=[ptc])(?:pill|tablet|cap(?:sule|let))s?\b;i
body __PILL_PRICE_02 /(?=[ptc])(?:pill|tablet|cap(?:sule|let))s[ :-]{1,5}\$?[\d .]{3}/i
- tflags __PILL_PRICE_01 multiple
- tflags __PILL_PRICE_02 multiple
+ tflags __PILL_PRICE_01 multiple maxhits=3
+ tflags __PILL_PRICE_02 multiple maxhits=3
meta MANY_PILL_PRICE (__PILL_PRICE_01 + __PILL_PRICE_02) > 2
describe MANY_PILL_PRICE Prices for pills
endif
@@ -662,38 +654,38 @@ endif
# for sale newsletters
if can(Mail::SpamAssassin::Conf::feature_bug6558_free)
body __FOR_SALE_OBO /\bor best offer\b/i
- tflags __FOR_SALE_OBO multiple
+ tflags __FOR_SALE_OBO multiple maxhits=6
meta __FOR_SALE_OBO_MANY __FOR_SALE_OBO > 5
body __FOR_SALE_PRC_1K /\bprice:? \$\d,?\d\d\d[.\s]/i
- tflags __FOR_SALE_PRC_1K multiple
+ tflags __FOR_SALE_PRC_1K multiple maxhits=11
meta __FOR_SALE_PRC_1K_MANY __FOR_SALE_PRC_1K > 10
body __FOR_SALE_PRC_10K /\bprice:? \$\d\d,\d\d\d/i
- tflags __FOR_SALE_PRC_10K multiple
+ tflags __FOR_SALE_PRC_10K multiple maxhits=11
meta __FOR_SALE_PRC_10K_MANY __FOR_SALE_PRC_10K > 10
body __FOR_SALE_PRC_100K /\bprice:? \$\d\d\d,\d\d\d/i
- tflags __FOR_SALE_PRC_100K multiple
+ tflags __FOR_SALE_PRC_100K multiple maxhits=11
meta __FOR_SALE_PRC_100K_MANY __FOR_SALE_PRC_100K > 5
meta __FOR_SALE_PRC_MANY (__FOR_SALE_PRC_1K + __FOR_SALE_PRC_10K + __FOR_SALE_PRC_100K) > 20
body __FOR_SALE_LTP /00\.? (?:less 10%|LTP)/i
- tflags __FOR_SALE_LTP multiple
+ tflags __FOR_SALE_LTP multiple maxhits=11
meta __FOR_SALE_LTP_MANY __FOR_SALE_LTP > 10
body __FOR_SALE_NET /00\.? NET/i
- tflags __FOR_SALE_NET multiple
+ tflags __FOR_SALE_NET multiple maxhits=11
meta __FOR_SALE_NET_MANY __FOR_SALE_NET > 10
rawbody __FOR_SALE_PRC_EOL /\s\$\d{1,3},\d00(?:\.00)?$/m
- tflags __FOR_SALE_PRC_EOL multiple
+ tflags __FOR_SALE_PRC_EOL multiple maxhits=11
meta __FOR_SALE_PRC_EOL_MANY __FOR_SALE_PRC_EOL > 10
endif
uri __URI_MAILTO /^mailto:/
-tflags __URI_MAILTO multiple
+tflags __URI_MAILTO multiple maxhits=16
meta __URI_MAILTO_MANY __URI_MAILTO > 15
@@ -713,11 +705,11 @@ describe GAPPY_PHONE_NA Phone
full __GAPPY_HTML_01 m;</?[a-z]{1,6}(?:\s[^>]{0,40})?>(?:\s|=09){0,80}(?:(?!\d)[\w'()\#,.:!]{1,15}(?:\s|=09){4,80}){7}\S;
full __GAPPY_HTML_02 m;\S(?:(?:\s|=09){4,80}(?!\d)[\w'()\#,.:!]{1,15}){7}(?:\s|=09){0,5}</?[a-z]{1,6}/?>;
full __GAPPY_HTML_03 /^(?:=09){3,20}</m
-tflags __GAPPY_HTML_03 multiple
+tflags __GAPPY_HTML_03 multiple maxhits=11
full __GAPPY_HTML_04 /^(?:=0A){4,20}/m
-tflags __GAPPY_HTML_04 multiple
+tflags __GAPPY_HTML_04 multiple maxhits=11
meta __GAPPY_HTML __MIME_HTML && (__GAPPY_HTML_01 || __GAPPY_HTML_02 || (__GAPPY_HTML_03 > 10) || (__GAPPY_HTML_04 > 10))
-meta GAPPY_HTML __GAPPY_HTML && !__UNSUB_LINK
+meta GAPPY_HTML __GAPPY_HTML && !__UNSUB_LINK && !__RP_MATCHES_RCVD && !__RCD_RDNS_MAIL_MESSY
describe GAPPY_HTML HTML body with much useless whitespace
# Try to improve S/O per bug 6119
@@ -726,7 +718,7 @@ meta TVD_SPACE_RATIO_MINFP __TVD
# sample from users list: Subject: Sta ffWork sFastToSen dTab le tsGood s
header __SUBJ_BROKEN_WORD Subject =~ /\s(?!i[PT])[a-z]{1,3}[A-Z][a-z]{2}/
-tflags __SUBJ_BROKEN_WORD multiple
+tflags __SUBJ_BROKEN_WORD multiple maxhits=2
meta SUBJ_BROKEN_WORDS __SUBJ_BROKEN_WORD > 1
describe SUBJ_BROKEN_WORDS Subject contains odd word breaks