You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spamassassin.apache.org by qu...@apache.org on 2004/11/23 20:33:50 UTC

svn commit: r106332 - /spamassassin/trunk/rules/20_head_tests.cf /spamassassin/trunk/rules/20_html_tests.cf /spamassassin/trunk/rules/70_testing.cf

Author: quinlan
Date: Tue Nov 23 11:33:50 2004
New Revision: 106332

Modified:
   spamassassin/trunk/rules/20_head_tests.cf
   spamassassin/trunk/rules/20_html_tests.cf
   spamassassin/trunk/rules/70_testing.cf
Log:
promote T_UNCLOSED_BRACKET to UNCLOSED_BRACKET
promote T_HTML_LINK_OPT_OUT to HTML_LINK_OPT_OUT (rather marginal)
promote best grouping of T_HTML_LINK_IMAGE_* to HTML_SHORT_LINK_IMG_*
delete T_LONGWORDS, too much work for too little spam


Modified: spamassassin/trunk/rules/20_head_tests.cf
Url: http://svn.apache.org/viewcvs/spamassassin/trunk/rules/20_head_tests.cf?view=diff&rev=106332&p1=spamassassin/trunk/rules/20_head_tests.cf&r1=106331&p2=spamassassin/trunk/rules/20_head_tests.cf&r2=106332
==============================================================================
--- spamassassin/trunk/rules/20_head_tests.cf	(original)
+++ spamassassin/trunk/rules/20_head_tests.cf	Tue Nov 23 11:33:50 2004
@@ -592,3 +592,7 @@
 
 header SUBJ_2_NUM_PARENS        Subject =~ /^\(\d+\).*\(\d+\)\s*$/
 describe SUBJ_2_NUM_PARENS      Subject contains common spam sign (2 numbers)
+
+# thanks to David Ritz for passing this on; ready for post-3.0.0
+header UNCLOSED_BRACKET		ALL =~ /\[\d+\r?\n/s
+describe UNCLOSED_BRACKET	Headers contain an unclosed bracket

Modified: spamassassin/trunk/rules/20_html_tests.cf
Url: http://svn.apache.org/viewcvs/spamassassin/trunk/rules/20_html_tests.cf?view=diff&rev=106332&p1=spamassassin/trunk/rules/20_html_tests.cf&r1=106331&p2=spamassassin/trunk/rules/20_html_tests.cf&r2=106332
==============================================================================
--- spamassassin/trunk/rules/20_html_tests.cf	(original)
+++ spamassassin/trunk/rules/20_html_tests.cf	Tue Nov 23 11:33:50 2004
@@ -161,6 +161,9 @@
 body HTML_LINK_PUSH_HERE	eval:html_text_match('anchor', '(?i)(?:push|go|cl[1l]ck)\s*(?:here|this)')
 describe HTML_LINK_PUSH_HERE	HTML link text says "push here" or similar
 
+body HTML_LINK_OPT_OUT		eval:html_text_match('anchor', '(?i)opt.?out')
+describe HTML_LINK_OPT_OUT	HTML link text says "opt out" or similar
+
 # HTML obfuscation
 body HTML_OBFUSCATE_05_10	eval:html_range('obfuscation_ratio','.05','.1')
 body HTML_OBFUSCATE_10_20	eval:html_range('obfuscation_ratio','.1','.2')
@@ -257,6 +260,17 @@
 describe HTML_NONELEMENT_90_100	90% to 100% of HTML elements are non-standard
 
 # short HTML messages with certain attributes
+body __HTML_LINK_IMAGE		eval:html_text_match('anchor', '<img>')
+body __HTML_LENGTH_0000_1024	eval:html_range('length', '0', '1024')
+body __HTML_LENGTH_1024_1536	eval:html_range('length', '1024', '1536')
+body __HTML_LENGTH_1536_2048	eval:html_range('length', '1536', '2048')
+meta HTML_SHORT_LINK_IMG_1	__HTML_LENGTH_0000_1024 && __HTML_LINK_IMAGE
+meta HTML_SHORT_LINK_IMG_2	__HTML_LENGTH_1024_1536 && __HTML_LINK_IMAGE
+meta HTML_SHORT_LINK_IMG_3	__HTML_LENGTH_1536_2048 && __HTML_LINK_IMAGE
+describe HTML_SHORT_LINK_IMG_1	HTML is very short with a linked image
+describe HTML_SHORT_LINK_IMG_2	HTML is very short with a linked image
+describe HTML_SHORT_LINK_IMG_3	HTML is very short with a linked image
+
 body HTML_SHORT_LENGTH		eval:html_eval('length', '< 170')
 describe HTML_SHORT_LENGTH	HTML is extremely short
 

Modified: spamassassin/trunk/rules/70_testing.cf
Url: http://svn.apache.org/viewcvs/spamassassin/trunk/rules/70_testing.cf?view=diff&rev=106332&p1=spamassassin/trunk/rules/70_testing.cf&r1=106331&p2=spamassassin/trunk/rules/70_testing.cf&r2=106332
==============================================================================
--- spamassassin/trunk/rules/70_testing.cf	(original)
+++ spamassassin/trunk/rules/70_testing.cf	Tue Nov 23 11:33:50 2004
@@ -117,9 +117,6 @@
 # describe T_FORGED_DEF_WHITELIST   Forged From: is in default white-list
 # tflags T_FORGED_DEF_WHITELIST     userconf
 
-# thanks to David Ritz for passing this on; ready for post-3.0.0
-header T_UNCLOSED_BRACKET   ALL =~ /\[\d+\r?\n/s
-
 # testing for Jeff, do not use!!!
 ifplugin Mail::SpamAssassin::Plugin::URIDNSBL
 urirhsbl T_URIBL_MP fraud.rhs.mailpolice.com.   A
@@ -127,57 +124,6 @@
 describe T_URIBL_MP URI's domain appears in MailPolice fraud list
 tflags   T_URIBL_MP net
 endif	# Mail::SpamAssassin::Plugin::URIDNSBL
-
-# bug 3680: anchor tests
-body T_HTML_LINK_OPT		eval:html_text_match('anchor', '(?i)opt.?(?:out|in)')
-body T_HTML_LINK_OPT_IN		eval:html_text_match('anchor', '(?i)opt.?in')
-body T_HTML_LINK_OPT_OUT	eval:html_text_match('anchor', '(?i)opt.?out')
-
-# linked image in a short document
-body __HTML_LINK_IMAGE		eval:html_text_match('anchor', '<img>')
-body __HTML_LENGTH_1024		eval:html_eval('length', '< 1024')
-body __HTML_LENGTH_1536		eval:html_eval('length', '< 1536')
-body __HTML_LENGTH_2048		eval:html_eval('length', '< 2048')
-body __HTML_LENGTH_2650		eval:html_eval('length', '< 2650')
-body __HTML_LENGTH_3072		eval:html_eval('length', '< 3072')
-body __HTML_LENGTH_4096		eval:html_eval('length', '< 4096')
-meta T_HTML_LINK_IMAGE_512	__HTML_LENGTH_512 && __HTML_LINK_IMAGE
-meta T_HTML_LINK_IMAGE_1024	__HTML_LENGTH_1024 && __HTML_LINK_IMAGE
-meta T_HTML_LINK_IMAGE_1536	__HTML_LENGTH_1536 && __HTML_LINK_IMAGE
-meta T_HTML_LINK_IMAGE_2048	__HTML_LENGTH_2048 && __HTML_LINK_IMAGE
-meta T_HTML_LINK_IMAGE_2650	__HTML_LENGTH_2650 && __HTML_LINK_IMAGE
-meta T_HTML_LINK_IMAGE_3072	__HTML_LENGTH_3072 && __HTML_LINK_IMAGE
-meta T_HTML_LINK_IMAGE_4096	__HTML_LENGTH_4096 && __HTML_LINK_IMAGE
-
-# try using ranges
-body __HTML_LENGTH_0000_0512	eval:html_range('length', '0', '512')
-body __HTML_LENGTH_0000_1024	eval:html_range('length', '0', '1024')
-body __HTML_LENGTH_0512_1024	eval:html_range('length', '512', '1024')
-body __HTML_LENGTH_1024_1536	eval:html_range('length', '1024', '1536')
-body __HTML_LENGTH_1024_2048	eval:html_range('length', '1024', '2048')
-body __HTML_LENGTH_1536_2048	eval:html_range('length', '1536', '2048')
-
-# two
-meta T_HTML_LINK_IMAGE_ONE	__HTML_LENGTH_0000_1024 && __HTML_LINK_IMAGE
-meta T_HTML_LINK_IMAGE_TWO	__HTML_LENGTH_1024_2048 && __HTML_LINK_IMAGE
-
-# three
-meta T_HTML_LINK_IMAGE_A	__HTML_LENGTH_0000_0512 && __HTML_LINK_IMAGE
-meta T_HTML_LINK_IMAGE_B	__HTML_LENGTH_0512_1024 && __HTML_LINK_IMAGE
-meta T_HTML_LINK_IMAGE_C	__HTML_LENGTH_1024_2048 && __HTML_LINK_IMAGE
-
-# four
-meta T_HTML_LINK_IMAGE_0	__HTML_LENGTH_0000_0512 && __HTML_LINK_IMAGE
-meta T_HTML_LINK_IMAGE_1	__HTML_LENGTH_0512_1024 && __HTML_LINK_IMAGE
-meta T_HTML_LINK_IMAGE_2	__HTML_LENGTH_1024_1536 && __HTML_LINK_IMAGE
-meta T_HTML_LINK_IMAGE_3	__HTML_LENGTH_1536_2048 && __HTML_LINK_IMAGE
-
-# try allowing uppercase for the first letter to supplement LONGWORDS
-# (probably not worth it)
-body __T_LONGWORDS_A	/\b(?:[A-Za-z][a-z]{7,}\s+){6}/
-body __T_LONGWORDS_B	/\b(?:[A-Za-z][a-z]{5,}\s+){9}/
-body __T_LONGWORDS_C	/\b(?:[A-Za-z][a-z]{4,}\s+){10}/
-meta T_LONGWORDS	(__T_LONGWORDS_A + __T_LONGWORDS_B + __T_LONGWORDS_C > 2 && !LONGWORDS)
 
 ##########################################################################
 # bug 2843