You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spamassassin.apache.org by kh...@apache.org on 2010/01/26 01:05:28 UTC

svn commit: r903019 - in /spamassassin/trunk/rulesrc/sandbox: khopesh/20_khop_bl.cf khopesh/20_khop_experimental.cf wtogami/20_unsafe.cf

Author: khopesh
Date: Tue Jan 26 00:05:28 2010
New Revision: 903019

URL: http://svn.apache.org/viewvc?rev=903019&view=rev
Log:
updated RCVD_VIA_APNIC and friends, pulled them out of warren's sandbox as per his request, sketchy from detectors

Modified:
    spamassassin/trunk/rulesrc/sandbox/khopesh/20_khop_bl.cf
    spamassassin/trunk/rulesrc/sandbox/khopesh/20_khop_experimental.cf
    spamassassin/trunk/rulesrc/sandbox/wtogami/20_unsafe.cf

Modified: spamassassin/trunk/rulesrc/sandbox/khopesh/20_khop_bl.cf
URL: http://svn.apache.org/viewvc/spamassassin/trunk/rulesrc/sandbox/khopesh/20_khop_bl.cf?rev=903019&r1=903018&r2=903019&view=diff
==============================================================================
--- spamassassin/trunk/rulesrc/sandbox/khopesh/20_khop_bl.cf (original)
+++ spamassassin/trunk/rulesrc/sandbox/khopesh/20_khop_bl.cf Tue Jan 26 00:05:28 2010
@@ -38,12 +38,34 @@
 tflags	   __NOT_SPOOFED	nice
 
 
-# Some tests:
+# I'm using the RCVD_VIA_ prefix to represent regional internet registries
+# rather than blocklists' RCVD_IN_ prefix.  It is VERY important that people
+# not consider these to be DNS blocklists ... especially given the fact that
+# their mass-check stats at http://ruleqa.spamassassin.org/?rule=/RCVD_VIA are
+# quite competitive with the DNSBLs, which is more a reflection of our lack of
+# foreign ham in the corpora than any real facts.
+
+# old version, does not reflect recent allocation of 1/8 and 27/8.
+#header     __RCVD_VIA_APNIC   Received =~ /(?-xism:[^0-9.](?:2(?:0(?:2(?:\.1(?:2(?:3\.(?:0?(?:[4-9][0-9]|3[2-9])|[12][0-9]{2})\.[012]?[0-9]{1,2}|[^3]\.(?:012]?[0-9]{1,2}){2})|[^2]3\.(?:012]?[0-9]{1,2}){2})|(?:\.[02]?[0-9]{1,2}){3})|3(?:\.[012]?[0-9]{1,2}){3})|(?:1[0189]|2[012])(?:\.[012]?[0-9]{1,2}){3})|1(?:(?:2[0123456]|8[023]|1\d|75)(?:\.[012]?[0-9]{1,2}){3}|69\.2(?:1[0-9]|2[0-3]|0[89])(?:\.[012]?[0-9]{1,2}){2})|(?:5[89]|6[01])(?:\.[012]?[0-9]{1,2}){3})(?:[\]\)\s]))/
+#describe   __RCVD_VIA_APNIC   Received through a relay in Asia/Pacific Network
+
+# from http://www.apnic.net/db/ranges.html at 20091002, updated 20100125
+# updates easily gleamed from http://www.cymru.com/Documents/bogon-list.html
+header __RCVD_VIA_APNIC X-Spam-Relays-External =~ /\[ ip=(?-xism:1|27|5[89]|6[01]|1(?:[12][0-6]|1[7-9]|75|8[0123])|2(?:03|1[0189]|2[012]|02(?!\.123\.0?(?:[012]\d|3[01])))|169\.2(?:0[89]|1\d|2[01]|223)|169\.2(?:1[04]|22))\.\d/
+#tflags __RCVD_VIA_APNIC nopublish
+#header __RCVD_VIA_APNIC_I X-Spam-Relays-Internal =~ /\[ ip=(?-xism:1|27|5[89]|6[01]|1(?:[12][0-6]|1[7-9]|75|8[0123])|2(?:03|1[0189]|2[012]|02(?!\.123\.0?(?:[012]\d|3[01])))|169\.2(?:0[89]|1\d|2[01]|223)|169\.2(?:1[04]|22))\.\d/
+#tflags __RCVD_VIA_APNIC_I nopublish
+
+header __RCVD_VIA_APNIC_LE X-Spam-Relays-External =~ /^\[ ip=(?-xism:1|27|5[89]|6[01]|1(?:[12][0-6]|1[7-9]|75|8[0123])|2(?:03|1[0189]|2[012]|02(?!\.123\.0?(?:[012]\d|3[01])))|169\.2(?:0[89]|1\d|2[01]|223)|169\.2(?:1[04]|22))\.\d/
+#tflags __RCVD_VIA_APNIC_LE nopublish
+
+# from http://lacnic.net/en/registro/ at 20100115
+header	 __RCVD_VIA_LACNIC	X-Spam-Relays-External =~ /\[ ip=(?:1(?:90|8[679]|20(?:[01]\.|6\.223\.1(?:24|30))))\.\d/
+tflags	 __RCVD_VIA_LACNIC	nopublish
+header	 __RCVD_VIA_LACNIC_LE	X-Spam-Relays-External =~ /^\[ ip=(?:1(?:90|8[679]|20(?:[01]\.|6\.223\.1(?:24|30))))\.\d/
+#tflags	 __RCVD_VIA_LACNIC_LE	nopublish
+
 
-# This should be a cleaner representation of __DOS_RELAYED_EXT
-# ... but it is not ... its 10.3/66.1 pales vs DOS's 35.3/83.7 ... why?
-#header	 __MULTIPLE_EXT 	X-Spam-Relays-External =~ /...\[/
-# turns out I can just use !__DOS_SINGLE_EXT_RELAY and not need my own rule
 
 ifplugin Mail::SpamAssassin::Plugin::DNSEval # {
 

Modified: spamassassin/trunk/rulesrc/sandbox/khopesh/20_khop_experimental.cf
URL: http://svn.apache.org/viewvc/spamassassin/trunk/rulesrc/sandbox/khopesh/20_khop_experimental.cf?rev=903019&r1=903018&r2=903019&view=diff
==============================================================================
--- spamassassin/trunk/rulesrc/sandbox/khopesh/20_khop_experimental.cf (original)
+++ spamassassin/trunk/rulesrc/sandbox/khopesh/20_khop_experimental.cf Tue Jan 26 00:05:28 2010
@@ -4,6 +4,14 @@
 describe MALFORMED_FREEMAIL	Bad headers on message from free email service
 #score	 MALFORMED_FREEMAIL	0.1
 
+header	 FROM_WEBSITE	From =~ /\b(?:f|ht)tps?:\/\/[^\/\@]{3,60}\.\w\w/i
+describe FROM_WEBSITE	Sender name appears to be a link
+header	 FROM_WWW	From:name =~ /\bwww\.[^\/\@]{3,60}\.\w\w/i
+describe FROM_WWW	Sender name appears to be a website
+
+header	 FROM_2_EMAILS	From =~ /([^\@]{2,}\@[^\@]{2,60}\.\w\w).*(?!\1)[^\@]{2,}\@[^\@]/
+describe FROM_2_EMAILS	Sender claims to have a different email
+
 header	 __FROM_THE	From:name =~ /\b(?:THE|[Tt]he)\b/
 meta	 FROM_THE	__FROM_THE && !(__VIA_ML || __SENDER_BOT || __DOS_HAS_LIST_UNSUB || __REPLYTO_EXISTS)
 describe FROM_THE	Non-bulk sender is "The" something
@@ -41,9 +49,6 @@
 # @ 20091214, 5.7617/0.0344 spam/ham, 0.994 s/o.
 header __5_SUBDOM X-Spam-Relays-External =~ /^[^\]]+ rdns=(?:[^. ]*\.){6,}\w+ /
 
-# Probably too similar to __S25R_1
-header __NUM_LTR_3 X-Spam-Relays-External =~ /^[^\]]+ rdns=\S*(?:\d\S*[^0-9. ]\S*\d){3,} /
-
 # IP address in relay's rDNS or HELO
 header __IP_IN_RELAY  X-Spam-Relays-External =~ /^\[ ip=(\d+)\.(\d+)\.(\d+)\.(\d+) (?:[^\]]* )?(?:rdns|helo)=\S*(?:\1\D\2\D\3\D\4|\4\D\3\D\2\D\1)/
 header __IP_PART_IN_RELAY X-Spam-Relays-External =~ /^\[ ip=\d+\.\d+\.(\d+)\.(\d+) (?:[^\]]* )?(?:rdns|helo)=\S*(?:\1\W\2\W|\2\W\1)\b/
@@ -53,28 +58,10 @@
 header __MSGID_JAVAMAIL	Message-ID =~ /\.JavaMail\./
 tflags __MSGID_JAVAMAIL	nice
 
-
 meta	 AOL_ALL_CAPS	__AT_AOL_MSGID && UPPERCASE_75_100
 describe AOL_ALL_CAPS	AOL users sometimes write mail in all uppercase
 tflags	 AOL_ALL_CAPS	nice
 
-# from http://lacnic.net/en/registro/ at 20100115
-header	 __RCVD_VIA_LACNIC	X-Spam-Relays-External =~ /\[ ip=(?:1(?:90|8[679]|20(?:[01]\.|6\.223\.1(?:24|30))))\./
-tflags	 __RCVD_VIA_LACNIC	nopublish
-header	 __RCVD_VIA_LACNIC_LE	X-Spam-Relays-External =~ /^\[ ip=(?:1(?:90|8[679]|20(?:[01]\.|6\.223\.1(?:24|30))))\./
-tflags	 __RCVD_VIA_LACNIC_LE	nopublish
-
-# __RCVD_VIA_APNIC is from my khop-bl and was adopted into warren's sandbox
-
-# just to test ... E + I should equal the original, LE should be the useful one
-header __RCVD_VIA_APNIC_E X-Spam-Relays-External =~ /\[ ip=(?-xism:0?5[89]|6[01]|1(?:[12][0-6]|1[7-9]|75|8[0123])|2(?:03|1[0189]|2[012]|02(?!\.123\.0?(?:[012]\d|3[01])))|169\.2(?:0[89]|1\d|2[01]|223)|169\.2(?:1[04]|22))\./
-tflags __RCVD_VIA_APNIC_E nopublish
-header __RCVD_VIA_APNIC_I X-Spam-Relays-Internal =~ /\[ ip=(?-xism:0?5[89]|6[01]|1(?:[12][0-6]|1[7-9]|75|8[0123])|2(?:03|1[0189]|2[012]|02(?!\.123\.0?(?:[012]\d|3[01])))|169\.2(?:0[89]|1\d|2[01]|223)|169\.2(?:1[04]|22))\./
-tflags __RCVD_VIA_APNIC_I nopublish
-
-header __RCVD_VIA_APNIC_LE X-Spam-Relays-External =~ /^\[ ip=(?-xism:0?5[89]|6[01]|1(?:[12][0-6]|1[7-9]|75|8[0123])|2(?:03|1[0189]|2[012]|02(?!\.123\.0?(?:[012]\d|3[01])))|169\.2(?:0[89]|1\d|2[01]|223)|169\.2(?:1[04]|22))\./
-tflags __RCVD_VIA_APNIC_LE nopublish
-
 # testing warren's theory from "Uppercase E-mail in Latin America" 2009-10-06
 # http://old.nabble.com/Uppercase-E-mail-in-Latin-America-td25748291.html
 # note, masscheck probably doesn't have enough LACNIC ham for this test

Modified: spamassassin/trunk/rulesrc/sandbox/wtogami/20_unsafe.cf
URL: http://svn.apache.org/viewvc/spamassassin/trunk/rulesrc/sandbox/wtogami/20_unsafe.cf?rev=903019&r1=903018&r2=903019&view=diff
==============================================================================
--- spamassassin/trunk/rulesrc/sandbox/wtogami/20_unsafe.cf (original)
+++ spamassassin/trunk/rulesrc/sandbox/wtogami/20_unsafe.cf Tue Jan 26 00:05:28 2010
@@ -1,8 +1,3 @@
-# from http://www.apnic.net/db/ranges.html at 20091002, meta bits added 20090930
-# copied from khop-bl.sa.khopesh.com
-header     __RCVD_VIA_APNIC   Received =~ /(?-xism:[^0-9.](?:2(?:0(?:2(?:\.1(?:2(?:3\.(?:0?(?:[4-9][0-9]|3[2-9])|[12][0-9]{2})\.[012]?[0-9]{1,2}|[^3]\.(?:012]?[0-9]{1,2}){2})|[^2]3\.(?:012]?[0-9]{1,2}){2})|(?:\.[02]?[0-9]{1,2}){3})|3(?:\.[012]?[0-9]{1,2}){3})|(?:1[0189]|2[012])(?:\.[012]?[0-9]{1,2}){3})|1(?:(?:2[0123456]|8[023]|1\d|75)(?:\.[012]?[0-9]{1,2}){3}|69\.2(?:1[0-9]|2[0-3]|0[89])(?:\.[012]?[0-9]{1,2}){2})|(?:5[89]|6[01])(?:\.[012]?[0-9]{1,2}){3})(?:[\]\)\s]))/
-describe   __RCVD_VIA_APNIC   Received through a relay in Asia/Pacific Network
-
 # 20091004: 8-character .cn domain
 uri            CN_EIGHT            m;^https?://(?:[^./]+\.)*[-\w]{8}\.cn(?:$|[/:\#]);
 describe       CN_EIGHT            .cn URI with eight-character domain name