You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spamassassin.apache.org by jh...@apache.org on 2014/09/06 22:50:35 UTC
svn commit: r1622928 -
/spamassassin/trunk/rulesrc/sandbox/khopesh/20_khop_experimental.cf
Author: jhardin
Date: Sat Sep 6 20:50:35 2014
New Revision: 1622928
URL: http://svn.apache.org/r1622928
Log:
FP avoidance exploration for __TO_EQ_FROM_USR_NN
Modified:
spamassassin/trunk/rulesrc/sandbox/khopesh/20_khop_experimental.cf
Modified: spamassassin/trunk/rulesrc/sandbox/khopesh/20_khop_experimental.cf
URL: http://svn.apache.org/viewvc/spamassassin/trunk/rulesrc/sandbox/khopesh/20_khop_experimental.cf?rev=1622928&r1=1622927&r2=1622928&view=diff
==============================================================================
--- spamassassin/trunk/rulesrc/sandbox/khopesh/20_khop_experimental.cf (original)
+++ spamassassin/trunk/rulesrc/sandbox/khopesh/20_khop_experimental.cf Sat Sep 6 20:50:35 2014
@@ -279,6 +279,17 @@ header __TO_EQ_FROM_USR_NN_2 ALL =~ /\n
meta __TO_EQ_FROM_USR_NN (__TO_EQ_FROM_USR_NN_1 || __TO_EQ_FROM_USR_NN_2) && !(__FROM_DNS || __FROM_INFO || __SENDER_BOT)
describe __TO_EQ_FROM_USR_NN To: username same as From: username sans trailing nums
+# JHardin:
+# __TO_EQ_FROM_USR_NN recent S/O is 0.992 on a large corpus
+# with most hits at <= 5 points
+# let's see if we can get those low-scored spams some more points
+# FP observation: __TO_EQ_FROM overlaps ~60% of _USER_NN ham hits;
+# this suggests the primary spam indicator is having *different* suffixes
+# Most of the rest are fairly reliable ham indicators
+# suggested scored FP avoidance rule:
+meta __TO_EQ_FROM_USR_NN_MINFP __TO_EQ_FROM_USR_NN && !__TO_EQ_FROM && !__LCL__ENV_AND_HDR_FROM_MATCH && !__DKIM_EXISTS && !__NOT_SPOOFED
+
+
header __SUBJ_NOT_SHORT Subject =~ /^.{16}/
header __SUBJ_HAS_WORDS Subject =~ /(?:^|\s)[^\W0-9_]{3,15}(?:\s|$)/
meta SUBJ_LACKS_WORDS __SUBJ_NOT_SHORT && !__SUBJ_HAS_WORDS && !__SUBJECT_ENCODED_B64