You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spamassassin.apache.org by qu...@apache.org on 2005/01/17 22:50:04 UTC

svn commit: r125445 - /spamassassin/trunk/rules/70_testing.cf

Author: quinlan
Date: Mon Jan 17 13:50:02 2005
New Revision: 125445

URL: http://svn.apache.org/viewcvs?view=rev&rev=125445
Log:
work on T_MSGID_SHORT*, T_HEADER*, and T_URI_UPPER_LOWER*

Modified:
   spamassassin/trunk/rules/70_testing.cf

Modified: spamassassin/trunk/rules/70_testing.cf
Url: http://svn.apache.org/viewcvs/spamassassin/trunk/rules/70_testing.cf?view=diff&rev=125445&p1=spamassassin/trunk/rules/70_testing.cf&r1=125444&p2=spamassassin/trunk/rules/70_testing.cf&r2=125445
==============================================================================
--- spamassassin/trunk/rules/70_testing.cf	(original)
+++ spamassassin/trunk/rules/70_testing.cf	Mon Jan 17 13:50:02 2005
@@ -309,27 +309,11 @@
 ########################################################################
 
 # short message-ids
-# full id #1
-header T_MSGID_SHORT_A15	MESSAGEID =~ /^.{1,15}$/
-header T_MSGID_SHORT_A16	MESSAGEID =~ /^.{1,16}$/
-# local part #1
-header T_MSGID_SHORT_LP_0_1	MESSAGEID =~ /<.{0,1}\@/
-header T_MSGID_SHORT_LP_0_2	MESSAGEID =~ /<.{0,2}\@/
-header T_MSGID_SHORT_LP_0_3	MESSAGEID =~ /<.{0,3}\@/
-header T_MSGID_SHORT_LP_0_4	MESSAGEID =~ /<.{0,4}\@/
-header T_MSGID_SHORT_LP_0_5	MESSAGEID =~ /<.{0,5}\@/
-# combo #1
-meta T_MSGID_SHORT_LP_0_1A	T_MSGID_SHORT_LP_0_1 && T_MSGID_SHORT_A15
-meta T_MSGID_SHORT_LP_0_2A	T_MSGID_SHORT_LP_0_2 && T_MSGID_SHORT_A15
-meta T_MSGID_SHORT_LP_0_3A	T_MSGID_SHORT_LP_0_3 && T_MSGID_SHORT_A15
-meta T_MSGID_SHORT_LP_0_4A	T_MSGID_SHORT_LP_0_4 && T_MSGID_SHORT_A15
-meta T_MSGID_SHORT_LP_0_5A	T_MSGID_SHORT_LP_0_5 && T_MSGID_SHORT_A15
-# combo #2
-meta T_MSGID_SHORT_LP_0_1B	T_MSGID_SHORT_LP_0_1 && T_MSGID_SHORT_A16
-meta T_MSGID_SHORT_LP_0_2B	T_MSGID_SHORT_LP_0_2 && T_MSGID_SHORT_A16
-meta T_MSGID_SHORT_LP_0_3B	T_MSGID_SHORT_LP_0_3 && T_MSGID_SHORT_A16
-meta T_MSGID_SHORT_LP_0_4B	T_MSGID_SHORT_LP_0_4 && T_MSGID_SHORT_A16
-meta T_MSGID_SHORT_LP_0_5B	T_MSGID_SHORT_LP_0_5 && T_MSGID_SHORT_A16
+header T_MSGID_SHORT_A		MESSAGEID =~ /^.{1,15}$/
+header T_MSGID_SHORT_B		MESSAGEID =~ /^.{1,15}$/m
+header T_MSGID_SHORT_C		Message-Id =~ /^.{1,15}$/
+meta T_MSGID_SHORT_D		(T_MSGID_SHORT_A || T_MSGID_SHORT_B)
+meta T_MSGID_SHORT_E		(T_MSGID_SHORT_A || T_MSGID_SHORT_C)
 
 # some testing work on case-sensitive names
 header T_HEADER_01	ALL =~ /\nAlternate-Recipient:/
@@ -403,25 +387,11 @@
 header T_HEADER_71	ALL =~ /\nX-tri:/
 header T_HEADER_72	ALL =~ /\nX-yd:/
 
-meta T_HEADER_01_TO_10	(T_HEADER_33 || T_HEADER_16 || T_HEADER_07 || T_HEADER_58 || T_HEADER_01 || T_HEADER_08 || T_HEADER_05 || T_HEADER_03 || T_HEADER_49 || T_HEADER_12)
-meta T_HEADER_11_TO_20	(T_HEADER_04 || T_HEADER_71 || T_HEADER_11 || T_HEADER_02 || T_HEADER_10 || T_HEADER_29 || T_HEADER_41 || T_HEADER_43 || T_HEADER_24 || T_HEADER_23)
-meta T_HEADER_21_TO_30	(T_HEADER_22 || T_HEADER_21 || T_HEADER_20 || T_HEADER_62 || T_HEADER_69 || T_HEADER_14 || T_HEADER_09 || T_HEADER_15 || T_HEADER_25 || T_HEADER_06)
-meta T_HEADER_31_TO_40	(T_HEADER_46 || T_HEADER_31 || T_HEADER_30 || T_HEADER_13 || T_HEADER_26 || T_HEADER_48 || T_HEADER_68 || T_HEADER_61 || T_HEADER_47 || T_HEADER_18)
-meta T_HEADER_41_TO_46	(T_HEADER_44 || T_HEADER_53 || T_HEADER_45 || T_HEADER_34 || T_HEADER_70 || T_HEADER_54)
-
-# without T_HEADER_40
-meta T_HEADER_TOP_10	T_HEADER_01_TO_10
-meta T_HEADER_TOP_20	T_HEADER_01_TO_10 || T_HEADER_11_TO_20
-meta T_HEADER_TOP_30	T_HEADER_01_TO_10 || T_HEADER_11_TO_20 || T_HEADER_21_TO_30
-meta T_HEADER_TOP_40	T_HEADER_01_TO_10 || T_HEADER_11_TO_20 || T_HEADER_21_TO_30 || T_HEADER_31_TO_40
-meta T_HEADER_TOP_46	T_HEADER_01_TO_10 || T_HEADER_11_TO_20 || T_HEADER_21_TO_30 || T_HEADER_31_TO_40 || T_HEADER_41_TO_46
-
-# add in T_HEADER_40 - very low FP rate, but not perfect
-meta T_HEADER_TOP_10_B	T_HEADER_01_TO_10 || T_HEADER_40
-meta T_HEADER_TOP_20_B	T_HEADER_01_TO_10 || T_HEADER_11_TO_20 || T_HEADER_40
-meta T_HEADER_TOP_30_B	T_HEADER_01_TO_10 || T_HEADER_11_TO_20 || T_HEADER_21_TO_30 || T_HEADER_40
-meta T_HEADER_TOP_40_B	T_HEADER_01_TO_10 || T_HEADER_11_TO_20 || T_HEADER_21_TO_30 || T_HEADER_31_TO_40 || T_HEADER_40
-meta T_HEADER_TOP_46_B	T_HEADER_01_TO_10 || T_HEADER_11_TO_20 || T_HEADER_21_TO_30 || T_HEADER_31_TO_40 || T_HEADER_41_TO_46 || T_HEADER_40
+# case-sensitive rule
+# only significant rules with no FPs, hit recently, on 2+ corpuses
+header T_HEADER_SPAM	ALL =~ /^(Alternate-Recipient|Antivirus|Approved|Delivery-Notification|Disclose-Recipients|Error-path|Language|Location|Mime-Subversion|Newsletter-ID|PID|Rot|UID|X-BounceTrace|X-CS-IP|X-Company-Address|X-Company-City|X-Company-Country|X-Company-State|X-Company-Zip|X-E(?:[Mm]ail)?|X-Encoding|X-Originating-Company|X-RMD-Text|X-SG4|X-SP-Track-ID|X-Webmail-Time|X-bounce-to):/m
+# try case-insensitive, for kicks
+header T_HEADER_SPAM_I	ALL =~ /^(Alternate-Recipient|Antivirus|Approved|Delivery-Notification|Disclose-Recipients|Error-path|Language|Location|Mime-Subversion|Newsletter-ID|PID|Rot|UID|X-BounceTrace|X-CS-IP|X-Company-Address|X-Company-City|X-Company-Country|X-Company-State|X-Company-Zip|X-E(?:[Mm]ail)?|X-Encoding|X-Originating-Company|X-RMD-Text|X-SG4|X-SP-Track-ID|X-Webmail-Time|X-bounce-to):/mi
 
 # bug 2004 - trivial rules
 rawbody T_HTML_FONT_SIZE_0PX	/<font\b[^>]+0px/i
@@ -430,21 +400,5 @@
 ########################################################################
 
 # case matters for this one
-uri T_URI_UPPER_LOWER_01	m{(?i:https?)://([A-Z][a-z]+\.)+\S+\.(?:com|biz|info)}
-uri T_URI_UPPER_LOWER_02	m{(?i:https?)://([A-Z][a-z]+\.){2,}\S+\.(?:com|biz|info)}
-uri T_URI_UPPER_LOWER_03	m{(?i:https?)://([A-Z][a-z]+\.){1,2}\S+\.(?:com|biz|info)}
-uri T_URI_UPPER_LOWER_04	m{(?i:https?)://([A-Z][a-z]+\.)+\S+\.com}
-uri T_URI_UPPER_LOWER_05	m{(?i:https?)://([A-Z][a-z]+\.){2,}\S+\.com}
-uri T_URI_UPPER_LOWER_06	m{(?i:https?)://([A-Z][a-z]+\.){1,2}\S+\.com}
-uri T_URI_UPPER_LOWER_07	m{(?i:https?)://([A-Z][a-z]+\.)+\S+\.[a-z]{2,4}\b}
-uri T_URI_UPPER_LOWER_08	m{(?i:https?)://([A-Z][a-z]+\.){2,}\S+\.[a-z]{2,4}\b}
-uri T_URI_UPPER_LOWER_09	m{(?i:https?)://([A-Z][a-z]+\.){1,2}\S+\.[a-z]{2,4}\b}
-uri T_URI_UPPER_LOWER_10	m{(?i:https?)://([A-Z][a-z]+\.)+[A-Za-z\d-]+\.(?:com|biz|info)}
-uri T_URI_UPPER_LOWER_11	m{(?i:https?)://([A-Z][a-z]+\.){2,}[A-Za-z\d-]+\.(?:com|biz|info)}
-uri T_URI_UPPER_LOWER_12	m{(?i:https?)://([A-Z][a-z]+\.){1,2}[A-Za-z\d-]+\.(?:com|biz|info)}
-uri T_URI_UPPER_LOWER_13	m{(?i:https?)://([A-Z][a-z]+\.)+[A-Za-z\d-]+\.com}
-uri T_URI_UPPER_LOWER_14	m{(?i:https?)://([A-Z][a-z]+\.){2,}[A-Za-z\d-]+\.com}
-uri T_URI_UPPER_LOWER_15	m{(?i:https?)://([A-Z][a-z]+\.){1,2}[A-Za-z\d-]+\.com}
-uri T_URI_UPPER_LOWER_16	m{(?i:https?)://([A-Z][a-z]+\.)+[A-Za-z\d-]+\.[a-z]{2,4}\b}
-uri T_URI_UPPER_LOWER_17	m{(?i:https?)://([A-Z][a-z]+\.){2,}[A-Za-z\d-]+\.[a-z]{2,4}\b}
-uri T_URI_UPPER_LOWER_18	m{(?i:https?)://([A-Z][a-z]+\.){1,2}[A-Za-z\d-]+\.[a-z]{2,4}\b}
+uri T_URI_UPPER_LOWER_A	m{(?i:https?)://([A-Z][a-z]+\.){2,}[A-Za-z\d-]+\.[a-z]{2,4}\b}
+uri T_URI_UPPER_LOWER_B	m{(?i:https?)://([A-Z][a-z]+\.){2,}[A-Za-z\d-]+\.[a-z]{2,4}(?:[?/]|$)}