You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spamassassin.apache.org by qu...@apache.org on 2004/11/20 20:53:12 UTC

svn commit: r105993 - spamassassin/trunk/rules

Author: quinlan
Date: Sat Nov 20 11:53:12 2004
New Revision: 105993

Modified:
   spamassassin/trunk/rules/70_testing.cf
Log:
add T_HTML_LINK_IMAGE* tests for linked images
add T_HTML_IMAGE_ONLY* and T_HTML_IMAGE_RATIO* to experiment with those
  ranges for the existing tests
add T_HTML_LINK_IMAGE_ONLY* to see if adding links improves those tests
add T_HTML_LINK_IMAGE_RATIO* to see if adding links improves those tests
add T_HTML_LINK_IMAGE_BUG* to test for web bugs inside of linked images
do full test on LONGWORDS, try to get rid of one of them...


Modified: spamassassin/trunk/rules/70_testing.cf
==============================================================================
--- spamassassin/trunk/rules/70_testing.cf	(original)
+++ spamassassin/trunk/rules/70_testing.cf	Sat Nov 20 11:53:12 2004
@@ -161,6 +161,36 @@
 body T_HTML_LINK_OPT		eval:html_text_match('anchor', '(?i)opt.?(?:out|in)')
 body T_HTML_LINK_OPT_OUT	eval:html_text_match('anchor', '(?i)opt.?out')
 
+body T_HTML_LINK_IMAGE		eval:html_text_match('anchor', '<img>')
+body T_HTML_LINK_IMAGE2		eval:html_text_match('anchor', '^<img>$')
+body T_HTML_IMAGE_ONLY_28	eval:html_image_only('2400','2800')
+body T_HTML_IMAGE_ONLY_32	eval:html_image_only('2800','3200')
+body T_HTML_IMAGE_RATIO_10	eval:html_image_ratio('0.008','0.010')
+body T_HTML_IMAGE_RATIO_12	eval:html_image_ratio('0.010','0.012')
+
+meta T_HTML_LINK_IMAGE_ONLY_04	(T_HTML_LINK_IMAGE && HTML_IMAGE_ONLY_04)
+meta T_HTML_LINK_IMAGE_ONLY_08	(T_HTML_LINK_IMAGE && HTML_IMAGE_ONLY_08)
+meta T_HTML_LINK_IMAGE_ONLY_12	(T_HTML_LINK_IMAGE && HTML_IMAGE_ONLY_12)
+meta T_HTML_LINK_IMAGE_ONLY_16	(T_HTML_LINK_IMAGE && HTML_IMAGE_ONLY_16)
+meta T_HTML_LINK_IMAGE_ONLY_20	(T_HTML_LINK_IMAGE && HTML_IMAGE_ONLY_20)
+meta T_HTML_LINK_IMAGE_ONLY_24	(T_HTML_LINK_IMAGE && HTML_IMAGE_ONLY_24)
+meta T_LINK_HTML_IMAGE_ONLY_28	(T_HTML_LINK_IMAGE && T_HTML_IMAGE_ONLY_28)
+meta T_LINK_HTML_IMAGE_ONLY_32	(T_HTML_LINK_IMAGE && T_HTML_IMAGE_ONLY_32)
+
+meta T_HTML_LINK_IMAGE_RATIO_02	(T_HTML_LINK_IMAGE && HTML_IMAGE_RATIO_02)
+meta T_HTML_LINK_IMAGE_RATIO_04	(T_HTML_LINK_IMAGE && HTML_IMAGE_RATIO_04)
+meta T_HTML_LINK_IMAGE_RATIO_06	(T_HTML_LINK_IMAGE && HTML_IMAGE_RATIO_06)
+meta T_HTML_LINK_IMAGE_RATIO_08	(T_HTML_LINK_IMAGE && HTML_IMAGE_RATIO_08)
+meta T_LINK_HTML_IMAGE_RATIO_10	(T_HTML_LINK_IMAGE && T_HTML_IMAGE_RATIO_10)
+meta T_LINK_HTML_IMAGE_RATIO_12	(T_HTML_LINK_IMAGE && T_HTML_IMAGE_RATIO_12)
+
+body T_HTML_LINK_IMAGE_BUG_1	eval:html_test('t_anchor_image_bug_1')
+body T_HTML_LINK_IMAGE_BUG_2	eval:html_test('t_anchor_image_bug_2')
+body T_HTML_LINK_IMAGE_BUG_3	eval:html_test('t_anchor_image_bug_3')
+meta T_HTML_LINK_IMAGE_BUG_4	T_HTML_LINK_IMAGE_BUG_1 && T_HTML_LINK_IMAGE2
+meta T_HTML_LINK_IMAGE_BUG_5	T_HTML_LINK_IMAGE_BUG_2 && T_HTML_LINK_IMAGE2
+meta T_HTML_LINK_IMAGE_BUG_6	T_HTML_LINK_IMAGE_BUG_3 && T_HTML_LINK_IMAGE2
+
 body T_HTML_EXTRA_CLOSE_0	eval:html_test('extra_close')
 body T_HTML_EXTRA_CLOSE_1	eval:html_eval('extra_close', '> 1')
 body T_HTML_EXTRA_CLOSE_2	eval:html_eval('extra_close', '> 2')
@@ -177,11 +207,22 @@
 
 # bug 3529 - LONGWORDS false positives
 # fast version, but probably less accurate
-meta T_LONGWORDS_1	(__LONGWORDS_A || __LONGWORDS_D)
-# maybe more accurate versions
-meta T_LONGWORDS_2	(__LONGWORDS_B || __LONGWORDS_C || __LONGWORDS_D)
-meta T_LONGWORDS_3	(__LONGWORDS_B + __LONGWORDS_C + __LONGWORDS_D > 1)
-meta T_LONGWORDS_4	(__LONGWORDS_A + __LONGWORDS_B + __LONGWORDS_C + __LONGWORDS_D > 1)
+meta T_LONGWORDS_01	(__LONGWORDS_A || __LONGWORDS_B)
+meta T_LONGWORDS_02	(__LONGWORDS_A || __LONGWORDS_C)
+meta T_LONGWORDS_03	(__LONGWORDS_A || __LONGWORDS_D)
+meta T_LONGWORDS_04	(__LONGWORDS_B || __LONGWORDS_C)
+meta T_LONGWORDS_05	(__LONGWORDS_B || __LONGWORDS_D)
+meta T_LONGWORDS_06	(__LONGWORDS_C || __LONGWORDS_D)
+meta T_LONGWORDS_07	(__LONGWORDS_A || __LONGWORDS_B || __LONGWORDS_C)
+meta T_LONGWORDS_08	(__LONGWORDS_A || __LONGWORDS_B || __LONGWORDS_D)
+meta T_LONGWORDS_09	(__LONGWORDS_A || __LONGWORDS_C || __LONGWORDS_D)
+meta T_LONGWORDS_10	(__LONGWORDS_B || __LONGWORDS_C || __LONGWORDS_D)
+meta T_LONGWORDS_11	(__LONGWORDS_A + __LONGWORDS_B + __LONGWORDS_C > 1)
+meta T_LONGWORDS_12	(__LONGWORDS_A + __LONGWORDS_B + __LONGWORDS_D > 1)
+meta T_LONGWORDS_13	(__LONGWORDS_A + __LONGWORDS_C + __LONGWORDS_D > 1)
+meta T_LONGWORDS_14	(__LONGWORDS_B + __LONGWORDS_C + __LONGWORDS_D > 1)
+meta T_LONGWORDS_15	(__LONGWORDS_A + __LONGWORDS_B + __LONGWORDS_C + __LONGWORDS_D > 1)
+meta T_LONGWORDS_16	(__LONGWORDS_A + __LONGWORDS_B + __LONGWORDS_C + __LONGWORDS_D > 2)
 
 header T_INVALID_DATE		Date !~ /^\s*(?:(?i:Mon|Tue|Wed|Thu|Fri|Sat|Sun),\s+)?[0-3\s]?[0-9]\s+(?i:Jan|Feb|Ma[ry]|Apr|Ju[nl]|Aug|Sep|Oct|Nov|Dec)\s+(?:[12][901])?[0-9]{2}\s+[0-2]?[0-9](?:\:[0-5][0-9]){1,2}\s+(?:[AP]M\s+)?(?:[+-][0-9]{4}|UT|[A-Z]{2,3}T)(?:\s+\(.*\))?\s*$/ [if-unset: Wed, 31 Jul 2002 16:41:57 +0200]
 
@@ -479,4 +520,5 @@
 rawbody __INTERRUPTUS           /(?:[a-zA-Z0-9]<[\/ ]{0,2}?(?!br)(?!p)(?!sup)(?!li)(?!b)(?!i)(?!option)(?!a (?:href|name))(?:\b|!--)[^>]{0,64}?>[a-zA-Z0-9].{0,64}){3}/i
 meta T_INTERRUPTUS              MIME_HTML_ONLY && __INTERRUPTUS
 describe T_INTERRUPTUS          Hypertextus Interruptus - HTML-obfuscated text
+