You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spamassassin.apache.org by kh...@apache.org on 2010/01/26 01:05:28 UTC
svn commit: r903019 - in /spamassassin/trunk/rulesrc/sandbox:
khopesh/20_khop_bl.cf khopesh/20_khop_experimental.cf wtogami/20_unsafe.cf
Author: khopesh
Date: Tue Jan 26 00:05:28 2010
New Revision: 903019
URL: http://svn.apache.org/viewvc?rev=903019&view=rev
Log:
updated RCVD_VIA_APNIC and friends, pulled them out of warren's sandbox as per his request, sketchy from detectors
Modified:
spamassassin/trunk/rulesrc/sandbox/khopesh/20_khop_bl.cf
spamassassin/trunk/rulesrc/sandbox/khopesh/20_khop_experimental.cf
spamassassin/trunk/rulesrc/sandbox/wtogami/20_unsafe.cf
Modified: spamassassin/trunk/rulesrc/sandbox/khopesh/20_khop_bl.cf
URL: http://svn.apache.org/viewvc/spamassassin/trunk/rulesrc/sandbox/khopesh/20_khop_bl.cf?rev=903019&r1=903018&r2=903019&view=diff
==============================================================================
--- spamassassin/trunk/rulesrc/sandbox/khopesh/20_khop_bl.cf (original)
+++ spamassassin/trunk/rulesrc/sandbox/khopesh/20_khop_bl.cf Tue Jan 26 00:05:28 2010
@@ -38,12 +38,34 @@
tflags __NOT_SPOOFED nice
-# Some tests:
+# I'm using the RCVD_VIA_ prefix to represent regional internet registries
+# rather than blocklists' RCVD_IN_ prefix. It is VERY important that people
+# not consider these to be DNS blocklists ... especially given the fact that
+# their mass-check stats at http://ruleqa.spamassassin.org/?rule=/RCVD_VIA are
+# quite competitive with the DNSBLs, which is more a reflection of our lack of
+# foreign ham in the corpora than any real facts.
+
+# old version, does not reflect recent allocation of 1/8 and 27/8.
+#header __RCVD_VIA_APNIC Received =~ /(?-xism:[^0-9.](?:2(?:0(?:2(?:\.1(?:2(?:3\.(?:0?(?:[4-9][0-9]|3[2-9])|[12][0-9]{2})\.[012]?[0-9]{1,2}|[^3]\.(?:012]?[0-9]{1,2}){2})|[^2]3\.(?:012]?[0-9]{1,2}){2})|(?:\.[02]?[0-9]{1,2}){3})|3(?:\.[012]?[0-9]{1,2}){3})|(?:1[0189]|2[012])(?:\.[012]?[0-9]{1,2}){3})|1(?:(?:2[0123456]|8[023]|1\d|75)(?:\.[012]?[0-9]{1,2}){3}|69\.2(?:1[0-9]|2[0-3]|0[89])(?:\.[012]?[0-9]{1,2}){2})|(?:5[89]|6[01])(?:\.[012]?[0-9]{1,2}){3})(?:[\]\)\s]))/
+#describe __RCVD_VIA_APNIC Received through a relay in Asia/Pacific Network
+
+# from http://www.apnic.net/db/ranges.html at 20091002, updated 20100125
+# updates easily gleamed from http://www.cymru.com/Documents/bogon-list.html
+header __RCVD_VIA_APNIC X-Spam-Relays-External =~ /\[ ip=(?-xism:1|27|5[89]|6[01]|1(?:[12][0-6]|1[7-9]|75|8[0123])|2(?:03|1[0189]|2[012]|02(?!\.123\.0?(?:[012]\d|3[01])))|169\.2(?:0[89]|1\d|2[01]|223)|169\.2(?:1[04]|22))\.\d/
+#tflags __RCVD_VIA_APNIC nopublish
+#header __RCVD_VIA_APNIC_I X-Spam-Relays-Internal =~ /\[ ip=(?-xism:1|27|5[89]|6[01]|1(?:[12][0-6]|1[7-9]|75|8[0123])|2(?:03|1[0189]|2[012]|02(?!\.123\.0?(?:[012]\d|3[01])))|169\.2(?:0[89]|1\d|2[01]|223)|169\.2(?:1[04]|22))\.\d/
+#tflags __RCVD_VIA_APNIC_I nopublish
+
+header __RCVD_VIA_APNIC_LE X-Spam-Relays-External =~ /^\[ ip=(?-xism:1|27|5[89]|6[01]|1(?:[12][0-6]|1[7-9]|75|8[0123])|2(?:03|1[0189]|2[012]|02(?!\.123\.0?(?:[012]\d|3[01])))|169\.2(?:0[89]|1\d|2[01]|223)|169\.2(?:1[04]|22))\.\d/
+#tflags __RCVD_VIA_APNIC_LE nopublish
+
+# from http://lacnic.net/en/registro/ at 20100115
+header __RCVD_VIA_LACNIC X-Spam-Relays-External =~ /\[ ip=(?:1(?:90|8[679]|20(?:[01]\.|6\.223\.1(?:24|30))))\.\d/
+tflags __RCVD_VIA_LACNIC nopublish
+header __RCVD_VIA_LACNIC_LE X-Spam-Relays-External =~ /^\[ ip=(?:1(?:90|8[679]|20(?:[01]\.|6\.223\.1(?:24|30))))\.\d/
+#tflags __RCVD_VIA_LACNIC_LE nopublish
+
-# This should be a cleaner representation of __DOS_RELAYED_EXT
-# ... but it is not ... its 10.3/66.1 pales vs DOS's 35.3/83.7 ... why?
-#header __MULTIPLE_EXT X-Spam-Relays-External =~ /...\[/
-# turns out I can just use !__DOS_SINGLE_EXT_RELAY and not need my own rule
ifplugin Mail::SpamAssassin::Plugin::DNSEval # {
Modified: spamassassin/trunk/rulesrc/sandbox/khopesh/20_khop_experimental.cf
URL: http://svn.apache.org/viewvc/spamassassin/trunk/rulesrc/sandbox/khopesh/20_khop_experimental.cf?rev=903019&r1=903018&r2=903019&view=diff
==============================================================================
--- spamassassin/trunk/rulesrc/sandbox/khopesh/20_khop_experimental.cf (original)
+++ spamassassin/trunk/rulesrc/sandbox/khopesh/20_khop_experimental.cf Tue Jan 26 00:05:28 2010
@@ -4,6 +4,14 @@
describe MALFORMED_FREEMAIL Bad headers on message from free email service
#score MALFORMED_FREEMAIL 0.1
+header FROM_WEBSITE From =~ /\b(?:f|ht)tps?:\/\/[^\/\@]{3,60}\.\w\w/i
+describe FROM_WEBSITE Sender name appears to be a link
+header FROM_WWW From:name =~ /\bwww\.[^\/\@]{3,60}\.\w\w/i
+describe FROM_WWW Sender name appears to be a website
+
+header FROM_2_EMAILS From =~ /([^\@]{2,}\@[^\@]{2,60}\.\w\w).*(?!\1)[^\@]{2,}\@[^\@]/
+describe FROM_2_EMAILS Sender claims to have a different email
+
header __FROM_THE From:name =~ /\b(?:THE|[Tt]he)\b/
meta FROM_THE __FROM_THE && !(__VIA_ML || __SENDER_BOT || __DOS_HAS_LIST_UNSUB || __REPLYTO_EXISTS)
describe FROM_THE Non-bulk sender is "The" something
@@ -41,9 +49,6 @@
# @ 20091214, 5.7617/0.0344 spam/ham, 0.994 s/o.
header __5_SUBDOM X-Spam-Relays-External =~ /^[^\]]+ rdns=(?:[^. ]*\.){6,}\w+ /
-# Probably too similar to __S25R_1
-header __NUM_LTR_3 X-Spam-Relays-External =~ /^[^\]]+ rdns=\S*(?:\d\S*[^0-9. ]\S*\d){3,} /
-
# IP address in relay's rDNS or HELO
header __IP_IN_RELAY X-Spam-Relays-External =~ /^\[ ip=(\d+)\.(\d+)\.(\d+)\.(\d+) (?:[^\]]* )?(?:rdns|helo)=\S*(?:\1\D\2\D\3\D\4|\4\D\3\D\2\D\1)/
header __IP_PART_IN_RELAY X-Spam-Relays-External =~ /^\[ ip=\d+\.\d+\.(\d+)\.(\d+) (?:[^\]]* )?(?:rdns|helo)=\S*(?:\1\W\2\W|\2\W\1)\b/
@@ -53,28 +58,10 @@
header __MSGID_JAVAMAIL Message-ID =~ /\.JavaMail\./
tflags __MSGID_JAVAMAIL nice
-
meta AOL_ALL_CAPS __AT_AOL_MSGID && UPPERCASE_75_100
describe AOL_ALL_CAPS AOL users sometimes write mail in all uppercase
tflags AOL_ALL_CAPS nice
-# from http://lacnic.net/en/registro/ at 20100115
-header __RCVD_VIA_LACNIC X-Spam-Relays-External =~ /\[ ip=(?:1(?:90|8[679]|20(?:[01]\.|6\.223\.1(?:24|30))))\./
-tflags __RCVD_VIA_LACNIC nopublish
-header __RCVD_VIA_LACNIC_LE X-Spam-Relays-External =~ /^\[ ip=(?:1(?:90|8[679]|20(?:[01]\.|6\.223\.1(?:24|30))))\./
-tflags __RCVD_VIA_LACNIC_LE nopublish
-
-# __RCVD_VIA_APNIC is from my khop-bl and was adopted into warren's sandbox
-
-# just to test ... E + I should equal the original, LE should be the useful one
-header __RCVD_VIA_APNIC_E X-Spam-Relays-External =~ /\[ ip=(?-xism:0?5[89]|6[01]|1(?:[12][0-6]|1[7-9]|75|8[0123])|2(?:03|1[0189]|2[012]|02(?!\.123\.0?(?:[012]\d|3[01])))|169\.2(?:0[89]|1\d|2[01]|223)|169\.2(?:1[04]|22))\./
-tflags __RCVD_VIA_APNIC_E nopublish
-header __RCVD_VIA_APNIC_I X-Spam-Relays-Internal =~ /\[ ip=(?-xism:0?5[89]|6[01]|1(?:[12][0-6]|1[7-9]|75|8[0123])|2(?:03|1[0189]|2[012]|02(?!\.123\.0?(?:[012]\d|3[01])))|169\.2(?:0[89]|1\d|2[01]|223)|169\.2(?:1[04]|22))\./
-tflags __RCVD_VIA_APNIC_I nopublish
-
-header __RCVD_VIA_APNIC_LE X-Spam-Relays-External =~ /^\[ ip=(?-xism:0?5[89]|6[01]|1(?:[12][0-6]|1[7-9]|75|8[0123])|2(?:03|1[0189]|2[012]|02(?!\.123\.0?(?:[012]\d|3[01])))|169\.2(?:0[89]|1\d|2[01]|223)|169\.2(?:1[04]|22))\./
-tflags __RCVD_VIA_APNIC_LE nopublish
-
# testing warren's theory from "Uppercase E-mail in Latin America" 2009-10-06
# http://old.nabble.com/Uppercase-E-mail-in-Latin-America-td25748291.html
# note, masscheck probably doesn't have enough LACNIC ham for this test
Modified: spamassassin/trunk/rulesrc/sandbox/wtogami/20_unsafe.cf
URL: http://svn.apache.org/viewvc/spamassassin/trunk/rulesrc/sandbox/wtogami/20_unsafe.cf?rev=903019&r1=903018&r2=903019&view=diff
==============================================================================
--- spamassassin/trunk/rulesrc/sandbox/wtogami/20_unsafe.cf (original)
+++ spamassassin/trunk/rulesrc/sandbox/wtogami/20_unsafe.cf Tue Jan 26 00:05:28 2010
@@ -1,8 +1,3 @@
-# from http://www.apnic.net/db/ranges.html at 20091002, meta bits added 20090930
-# copied from khop-bl.sa.khopesh.com
-header __RCVD_VIA_APNIC Received =~ /(?-xism:[^0-9.](?:2(?:0(?:2(?:\.1(?:2(?:3\.(?:0?(?:[4-9][0-9]|3[2-9])|[12][0-9]{2})\.[012]?[0-9]{1,2}|[^3]\.(?:012]?[0-9]{1,2}){2})|[^2]3\.(?:012]?[0-9]{1,2}){2})|(?:\.[02]?[0-9]{1,2}){3})|3(?:\.[012]?[0-9]{1,2}){3})|(?:1[0189]|2[012])(?:\.[012]?[0-9]{1,2}){3})|1(?:(?:2[0123456]|8[023]|1\d|75)(?:\.[012]?[0-9]{1,2}){3}|69\.2(?:1[0-9]|2[0-3]|0[89])(?:\.[012]?[0-9]{1,2}){2})|(?:5[89]|6[01])(?:\.[012]?[0-9]{1,2}){3})(?:[\]\)\s]))/
-describe __RCVD_VIA_APNIC Received through a relay in Asia/Pacific Network
-
# 20091004: 8-character .cn domain
uri CN_EIGHT m;^https?://(?:[^./]+\.)*[-\w]{8}\.cn(?:$|[/:\#]);
describe CN_EIGHT .cn URI with eight-character domain name