You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spamassassin.apache.org by qu...@apache.org on 2004/11/22 21:48:28 UTC

svn commit: r106220 - /spamassassin/trunk/rules/70_testing.cf

Author: quinlan
Date: Mon Nov 22 12:48:27 2004
New Revision: 106220

Modified:
   spamassassin/trunk/rules/70_testing.cf
Log:
bunch of rule promotions
T_HTML_LINK_IMAGE* tests worked great, try using ranges instead to find
  the best breakdown
another spin on LONGWORDS supplement
remove T_HTML_EMPTT_* (blank_uri) tests, didn't seem to work well on HTML ham


Modified: spamassassin/trunk/rules/70_testing.cf
Url: http://svn.apache.org/viewcvs/spamassassin/trunk/rules/70_testing.cf?view=diff&rev=106220&p1=spamassassin/trunk/rules/70_testing.cf&r1=106219&p2=spamassassin/trunk/rules/70_testing.cf&r2=106220
==============================================================================
--- spamassassin/trunk/rules/70_testing.cf	(original)
+++ spamassassin/trunk/rules/70_testing.cf	Mon Nov 22 12:48:27 2004
@@ -149,23 +149,35 @@
 meta T_HTML_LINK_IMAGE_3072	__HTML_LENGTH_3072 && __HTML_LINK_IMAGE
 meta T_HTML_LINK_IMAGE_4096	__HTML_LENGTH_4096 && __HTML_LINK_IMAGE
 
-body T_HTML_EXTRA_CLOSE_06	eval:html_range('closed_extra_ratio', '0.06', 'inf')
-body T_HTML_EXTRA_CLOSE_07	eval:html_range('closed_extra_ratio', '0.07', 'inf')
-body T_HTML_EXTRA_CLOSE_08	eval:html_range('closed_extra_ratio', '0.08', 'inf')
-body T_HTML_EXTRA_CLOSE_09	eval:html_range('closed_extra_ratio', '0.09', 'inf')
-body T_HTML_EXTRA_CLOSE_10	eval:html_range('closed_extra_ratio', '0.10', 'inf')
-body T_HTML_EXTRA_CLOSE_11	eval:html_range('closed_extra_ratio', '0.11', 'inf')
-body T_HTML_EXTRA_CLOSE_12	eval:html_range('closed_extra_ratio', '0.12', 'inf')
-body T_HTML_EXTRA_CLOSE_13	eval:html_range('closed_extra_ratio', '0.13', 'inf')
-body T_HTML_EXTRA_CLOSE_14	eval:html_range('closed_extra_ratio', '0.14', 'inf')
-body T_HTML_EXTRA_CLOSE_15	eval:html_range('closed_extra_ratio', '0.15', 'inf')
-body T_HTML_EXTRA_CLOSE_16	eval:html_range('closed_extra_ratio', '0.16', 'inf')
+# try using ranges
+body __HTML_LENGTH_0000_0512	eval:html_range('length', '0', '512')
+body __HTML_LENGTH_0000_1024	eval:html_range('length', '0', '1024')
+body __HTML_LENGTH_0512_1024	eval:html_range('length', '512', '1024')
+body __HTML_LENGTH_1024_1536	eval:html_range('length', '1024', '1536')
+body __HTML_LENGTH_1024_2048	eval:html_range('length', '1024', '2048')
+body __HTML_LENGTH_1536_2048	eval:html_range('length', '1536', '2048')
+
+# two
+meta T_HTML_LINK_IMAGE_ONE	__HTML_LENGTH_0000_1024 && __HTML_LINK_IMAGE
+meta T_HTML_LINK_IMAGE_TWO	__HTML_LENGTH_1024_2048 && __HTML_LINK_IMAGE
+
+# three
+meta T_HTML_LINK_IMAGE_A	__HTML_LENGTH_0000_0512 && __HTML_LINK_IMAGE
+meta T_HTML_LINK_IMAGE_B	__HTML_LENGTH_0512_1024 && __HTML_LINK_IMAGE
+meta T_HTML_LINK_IMAGE_C	__HTML_LENGTH_1024_2048 && __HTML_LINK_IMAGE
+
+# four
+meta T_HTML_LINK_IMAGE_0	__HTML_LENGTH_0000_0512 && __HTML_LINK_IMAGE
+meta T_HTML_LINK_IMAGE_1	__HTML_LENGTH_0512_1024 && __HTML_LINK_IMAGE
+meta T_HTML_LINK_IMAGE_2	__HTML_LENGTH_1024_1536 && __HTML_LINK_IMAGE
+meta T_HTML_LINK_IMAGE_3	__HTML_LENGTH_1536_2048 && __HTML_LINK_IMAGE
 
-# try allowing uppercase for the first letter
+# try allowing uppercase for the first letter to supplement LONGWORDS
+# (probably not worth it)
 body __T_LONGWORDS_A	/\b(?:[A-Za-z][a-z]{7,}\s+){6}/
 body __T_LONGWORDS_B	/\b(?:[A-Za-z][a-z]{5,}\s+){9}/
 body __T_LONGWORDS_C	/\b(?:[A-Za-z][a-z]{4,}\s+){10}/
-meta T_LONGWORDS	(__T_LONGWORDS_A + __T_LONGWORDS_B + __T_LONGWORDS_C > 1)
+meta T_LONGWORDS	(__T_LONGWORDS_A + __T_LONGWORDS_B + __T_LONGWORDS_C > 2 && !LONGWORDS)
 
 ##########################################################################
 # bug 2843
@@ -265,10 +277,6 @@
 
 ##########################################################################
 
-body	T_HTML_EMPTY_URI	eval:html_test('blank_uri')
-body	T_HTML_EMPTY_URI_IMG	eval:html_text_match('blank_uri', '^img$')
-body	T_HTML_EMPTY_URI_IFRAME	eval:html_text_match('blank_uri', '^iframe$')
-
 # bug 3186
 uri T_BIZ_TLD		/^(?:https?:\/\/|mailto:)[^\/]+\.biz(?![-a-z0-9])/i
 
@@ -277,12 +285,7 @@
 # bug 3570
 # anti-phishing rules, will probably have a low hit-rate
 #
-#  1.415   1.7346   0.0000    1.000   0.85    0.01  T_FORGED_MSGID_HOTMAIL
-#  1.351   1.6569   0.0000    1.000   0.84    0.01  T_FORGED_MSGID_YAHOO
-#  0.443   0.5433   0.0000    1.000   0.73    0.01  T_FORGED_MSGID_EXCITE
-#  0.198   0.2423   0.0000    1.000   0.66    0.01  T_FORGED_MSGID_MSN
 #  0.057   0.0698   0.0000    1.000   0.55    0.01  T_FORGED_EBAY
-#  0.052   0.0642   0.0000    1.000   0.55    0.01  T_FORGED_MSGID_AOL
 #  0.047   0.0573   0.0000    1.000   0.54    0.01  T_FORGED_CITI
 #  0.034   0.0412   0.0000    1.000   0.52    0.01  T_FORGED_SUNTRUST
 #  0.016   0.0191   0.0000    1.000   0.49    0.01  T_FORGED_ABOUT
@@ -318,41 +321,6 @@
 meta     T_FORGED_ABOUT		(!__RCVD_ABOUT_COM && __FROM_ABOUT_COM && !__URI_ABOUT_COM)
 describe T_FORGED_ABOUT		Message appears to be forged, (about.com)
 
-header   __AT_YAHOO_MSGID	MESSAGEID =~ /\@yahoo\.com\b/i
-header   __FROM_YAHOO_COM	From =~ /\@yahoo\.com\b/i
-meta     T_FORGED_MSGID_YAHOO	(__AT_YAHOO_MSGID && !__FROM_YAHOO_COM)
-describe T_FORGED_MSGID_YAHOO	Message-ID is forged, (yahoo.com)
-
-header   __AT_MSN_MSGID		MESSAGEID =~ /\@msn\.com\b/i
-header   __FROM_MSN_COM		From =~ /\@msn\.com\b/i
-meta     T_FORGED_MSGID_MSN	(__AT_MSN_MSGID && (!__FROM_MSN_COM && !__FROM_HOTMAIL_COM && !__FROM_YAHOO_COM))
-describe T_FORGED_MSGID_MSN	Message-ID is forged, (msn.com)
-
-header   __AT_HOTMAIL_MSGID	MESSAGEID =~ /\@hotmail\.com\b/i
-header   __FROM_HOTMAIL_COM	From =~ /\@hotmail\.com\b/i
-meta     T_FORGED_MSGID_HOTMAIL	(__AT_HOTMAIL_MSGID && (!__FROM_HOTMAIL_COM && !__FROM_MSN_COM && !__FROM_YAHOO_COM))
-describe T_FORGED_MSGID_HOTMAIL	Message-ID is forged, (hotmail.com)
-
-header   __AT_AOL_MSGID		MESSAGEID =~ /\@aol\.com\b/i
-header   __FROM_AOL_COM		From =~ /\@aol\.com\b/i
-meta     T_FORGED_MSGID_AOL	(__AT_AOL_MSGID && !__FROM_AOL_COM)
-describe T_FORGED_MSGID_AOL	Message-ID is forged, (aol.com)
-
-header   __AT_EXCITE_MSGID	MESSAGEID =~ /\@excite\.com\b/i
-header   __MY_RCVD_EXCITE	Received =~ /\.excite\.com\b/i
-meta     T_FORGED_MSGID_EXCITE	(__AT_EXCITE_MSGID && !__MY_RCVD_EXCITE)
-describe T_FORGED_MSGID_EXCITE	Message-ID is forged, (excite.com)
-
-uri      T_SPOOF_COM2OTH 	m{^https?://(?:\w+\.)+?com\.(?:\w+\.){2,}}i
-describe T_SPOOF_COM2OTH	 a.com.b.c
-
-uri      T_SPOOF_COM2COM	 m{^https?://(?:\w+\.)+?com\.(?:\w+\.)+?com}i
-describe T_SPOOF_COM2COM 	a.com.b.com
-
-# CDNs (Akamai (edgesuite), Speedera, and NYUD, so far) do this, so skip them.
-uri      T_SPOOF_OURI		m{^https?://(?:[a-z0-9_-]+?\.){2,}(?:com|net|org|biz|info|edu|www)(?!\.(?:\w+\.)?(?:edgesuite|nyud|speedera)\.net)(?:\.[a-z0-9_%-]+?){2,}(?:(?::|%3a)\d+)?}i
-describe T_SPOOF_OURI	 	URL has items in odd places
-
 ##########################################################################
 
 # http://bugzilla.spamassassin.org/show_bug.cgi?id=3645
@@ -373,13 +341,3 @@
 header T_DNS_FROM_AHBL_RHSBL	eval:check_rbl_envfrom('t_ahbl', 'rhsbl.ahbl.org.')
 tflags T_DNS_FROM_AHBL_RHSBL	net
 
-# throw a few more on the stack
-uri      T_SPOOF_NET2COM	 m{^https?://(?:\w+\.)+?(?:net|org)\.(?:\w+\.)+?com}i
-describe T_SPOOF_NET2COM 	a.net.b.com
-
-uri      T_SPOOF_COM2NET	 m{^https?://(?:\w+\.)+?com\.(?:\w+\.)+?(?:net|org)}i
-describe T_SPOOF_COM2NET 	a.com.b.net
-
-# might be covered with T_SPOOF_OURI ?
-uri      T_SPOOF_NET2OTH 	m{^https?://(?:\w+\.)+?(?:net|org)\.(?:\w+\.){2,}}i
-describe T_SPOOF_NET2OTH	 a.net.b.c