You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spamassassin.apache.org by qu...@apache.org on 2004/11/23 20:33:50 UTC
svn commit: r106332 - /spamassassin/trunk/rules/20_head_tests.cf /spamassassin/trunk/rules/20_html_tests.cf /spamassassin/trunk/rules/70_testing.cf
Author: quinlan
Date: Tue Nov 23 11:33:50 2004
New Revision: 106332
Modified:
spamassassin/trunk/rules/20_head_tests.cf
spamassassin/trunk/rules/20_html_tests.cf
spamassassin/trunk/rules/70_testing.cf
Log:
promote T_UNCLOSED_BRACKET to UNCLOSED_BRACKET
promote T_HTML_LINK_OPT_OUT to HTML_LINK_OPT_OUT (rather marginal)
promote best grouping of T_HTML_LINK_IMAGE_* to HTML_SHORT_LINK_IMG_*
delete T_LONGWORDS, too much work for too little spam
Modified: spamassassin/trunk/rules/20_head_tests.cf
Url: http://svn.apache.org/viewcvs/spamassassin/trunk/rules/20_head_tests.cf?view=diff&rev=106332&p1=spamassassin/trunk/rules/20_head_tests.cf&r1=106331&p2=spamassassin/trunk/rules/20_head_tests.cf&r2=106332
==============================================================================
--- spamassassin/trunk/rules/20_head_tests.cf (original)
+++ spamassassin/trunk/rules/20_head_tests.cf Tue Nov 23 11:33:50 2004
@@ -592,3 +592,7 @@
header SUBJ_2_NUM_PARENS Subject =~ /^\(\d+\).*\(\d+\)\s*$/
describe SUBJ_2_NUM_PARENS Subject contains common spam sign (2 numbers)
+
+# thanks to David Ritz for passing this on; ready for post-3.0.0
+header UNCLOSED_BRACKET ALL =~ /\[\d+\r?\n/s
+describe UNCLOSED_BRACKET Headers contain an unclosed bracket
Modified: spamassassin/trunk/rules/20_html_tests.cf
Url: http://svn.apache.org/viewcvs/spamassassin/trunk/rules/20_html_tests.cf?view=diff&rev=106332&p1=spamassassin/trunk/rules/20_html_tests.cf&r1=106331&p2=spamassassin/trunk/rules/20_html_tests.cf&r2=106332
==============================================================================
--- spamassassin/trunk/rules/20_html_tests.cf (original)
+++ spamassassin/trunk/rules/20_html_tests.cf Tue Nov 23 11:33:50 2004
@@ -161,6 +161,9 @@
body HTML_LINK_PUSH_HERE eval:html_text_match('anchor', '(?i)(?:push|go|cl[1l]ck)\s*(?:here|this)')
describe HTML_LINK_PUSH_HERE HTML link text says "push here" or similar
+body HTML_LINK_OPT_OUT eval:html_text_match('anchor', '(?i)opt.?out')
+describe HTML_LINK_OPT_OUT HTML link text says "opt out" or similar
+
# HTML obfuscation
body HTML_OBFUSCATE_05_10 eval:html_range('obfuscation_ratio','.05','.1')
body HTML_OBFUSCATE_10_20 eval:html_range('obfuscation_ratio','.1','.2')
@@ -257,6 +260,17 @@
describe HTML_NONELEMENT_90_100 90% to 100% of HTML elements are non-standard
# short HTML messages with certain attributes
+body __HTML_LINK_IMAGE eval:html_text_match('anchor', '<img>')
+body __HTML_LENGTH_0000_1024 eval:html_range('length', '0', '1024')
+body __HTML_LENGTH_1024_1536 eval:html_range('length', '1024', '1536')
+body __HTML_LENGTH_1536_2048 eval:html_range('length', '1536', '2048')
+meta HTML_SHORT_LINK_IMG_1 __HTML_LENGTH_0000_1024 && __HTML_LINK_IMAGE
+meta HTML_SHORT_LINK_IMG_2 __HTML_LENGTH_1024_1536 && __HTML_LINK_IMAGE
+meta HTML_SHORT_LINK_IMG_3 __HTML_LENGTH_1536_2048 && __HTML_LINK_IMAGE
+describe HTML_SHORT_LINK_IMG_1 HTML is very short with a linked image
+describe HTML_SHORT_LINK_IMG_2 HTML is very short with a linked image
+describe HTML_SHORT_LINK_IMG_3 HTML is very short with a linked image
+
body HTML_SHORT_LENGTH eval:html_eval('length', '< 170')
describe HTML_SHORT_LENGTH HTML is extremely short
Modified: spamassassin/trunk/rules/70_testing.cf
Url: http://svn.apache.org/viewcvs/spamassassin/trunk/rules/70_testing.cf?view=diff&rev=106332&p1=spamassassin/trunk/rules/70_testing.cf&r1=106331&p2=spamassassin/trunk/rules/70_testing.cf&r2=106332
==============================================================================
--- spamassassin/trunk/rules/70_testing.cf (original)
+++ spamassassin/trunk/rules/70_testing.cf Tue Nov 23 11:33:50 2004
@@ -117,9 +117,6 @@
# describe T_FORGED_DEF_WHITELIST Forged From: is in default white-list
# tflags T_FORGED_DEF_WHITELIST userconf
-# thanks to David Ritz for passing this on; ready for post-3.0.0
-header T_UNCLOSED_BRACKET ALL =~ /\[\d+\r?\n/s
-
# testing for Jeff, do not use!!!
ifplugin Mail::SpamAssassin::Plugin::URIDNSBL
urirhsbl T_URIBL_MP fraud.rhs.mailpolice.com. A
@@ -127,57 +124,6 @@
describe T_URIBL_MP URI's domain appears in MailPolice fraud list
tflags T_URIBL_MP net
endif # Mail::SpamAssassin::Plugin::URIDNSBL
-
-# bug 3680: anchor tests
-body T_HTML_LINK_OPT eval:html_text_match('anchor', '(?i)opt.?(?:out|in)')
-body T_HTML_LINK_OPT_IN eval:html_text_match('anchor', '(?i)opt.?in')
-body T_HTML_LINK_OPT_OUT eval:html_text_match('anchor', '(?i)opt.?out')
-
-# linked image in a short document
-body __HTML_LINK_IMAGE eval:html_text_match('anchor', '<img>')
-body __HTML_LENGTH_1024 eval:html_eval('length', '< 1024')
-body __HTML_LENGTH_1536 eval:html_eval('length', '< 1536')
-body __HTML_LENGTH_2048 eval:html_eval('length', '< 2048')
-body __HTML_LENGTH_2650 eval:html_eval('length', '< 2650')
-body __HTML_LENGTH_3072 eval:html_eval('length', '< 3072')
-body __HTML_LENGTH_4096 eval:html_eval('length', '< 4096')
-meta T_HTML_LINK_IMAGE_512 __HTML_LENGTH_512 && __HTML_LINK_IMAGE
-meta T_HTML_LINK_IMAGE_1024 __HTML_LENGTH_1024 && __HTML_LINK_IMAGE
-meta T_HTML_LINK_IMAGE_1536 __HTML_LENGTH_1536 && __HTML_LINK_IMAGE
-meta T_HTML_LINK_IMAGE_2048 __HTML_LENGTH_2048 && __HTML_LINK_IMAGE
-meta T_HTML_LINK_IMAGE_2650 __HTML_LENGTH_2650 && __HTML_LINK_IMAGE
-meta T_HTML_LINK_IMAGE_3072 __HTML_LENGTH_3072 && __HTML_LINK_IMAGE
-meta T_HTML_LINK_IMAGE_4096 __HTML_LENGTH_4096 && __HTML_LINK_IMAGE
-
-# try using ranges
-body __HTML_LENGTH_0000_0512 eval:html_range('length', '0', '512')
-body __HTML_LENGTH_0000_1024 eval:html_range('length', '0', '1024')
-body __HTML_LENGTH_0512_1024 eval:html_range('length', '512', '1024')
-body __HTML_LENGTH_1024_1536 eval:html_range('length', '1024', '1536')
-body __HTML_LENGTH_1024_2048 eval:html_range('length', '1024', '2048')
-body __HTML_LENGTH_1536_2048 eval:html_range('length', '1536', '2048')
-
-# two
-meta T_HTML_LINK_IMAGE_ONE __HTML_LENGTH_0000_1024 && __HTML_LINK_IMAGE
-meta T_HTML_LINK_IMAGE_TWO __HTML_LENGTH_1024_2048 && __HTML_LINK_IMAGE
-
-# three
-meta T_HTML_LINK_IMAGE_A __HTML_LENGTH_0000_0512 && __HTML_LINK_IMAGE
-meta T_HTML_LINK_IMAGE_B __HTML_LENGTH_0512_1024 && __HTML_LINK_IMAGE
-meta T_HTML_LINK_IMAGE_C __HTML_LENGTH_1024_2048 && __HTML_LINK_IMAGE
-
-# four
-meta T_HTML_LINK_IMAGE_0 __HTML_LENGTH_0000_0512 && __HTML_LINK_IMAGE
-meta T_HTML_LINK_IMAGE_1 __HTML_LENGTH_0512_1024 && __HTML_LINK_IMAGE
-meta T_HTML_LINK_IMAGE_2 __HTML_LENGTH_1024_1536 && __HTML_LINK_IMAGE
-meta T_HTML_LINK_IMAGE_3 __HTML_LENGTH_1536_2048 && __HTML_LINK_IMAGE
-
-# try allowing uppercase for the first letter to supplement LONGWORDS
-# (probably not worth it)
-body __T_LONGWORDS_A /\b(?:[A-Za-z][a-z]{7,}\s+){6}/
-body __T_LONGWORDS_B /\b(?:[A-Za-z][a-z]{5,}\s+){9}/
-body __T_LONGWORDS_C /\b(?:[A-Za-z][a-z]{4,}\s+){10}/
-meta T_LONGWORDS (__T_LONGWORDS_A + __T_LONGWORDS_B + __T_LONGWORDS_C > 2 && !LONGWORDS)
##########################################################################
# bug 2843