You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spamassassin.apache.org by jh...@apache.org on 2014/09/06 22:50:35 UTC

svn commit: r1622928 - /spamassassin/trunk/rulesrc/sandbox/khopesh/20_khop_experimental.cf

Author: jhardin
Date: Sat Sep  6 20:50:35 2014
New Revision: 1622928

URL: http://svn.apache.org/r1622928
Log:
FP avoidance exploration for __TO_EQ_FROM_USR_NN

Modified:
    spamassassin/trunk/rulesrc/sandbox/khopesh/20_khop_experimental.cf

Modified: spamassassin/trunk/rulesrc/sandbox/khopesh/20_khop_experimental.cf
URL: http://svn.apache.org/viewvc/spamassassin/trunk/rulesrc/sandbox/khopesh/20_khop_experimental.cf?rev=1622928&r1=1622927&r2=1622928&view=diff
==============================================================================
--- spamassassin/trunk/rulesrc/sandbox/khopesh/20_khop_experimental.cf (original)
+++ spamassassin/trunk/rulesrc/sandbox/khopesh/20_khop_experimental.cf Sat Sep  6 20:50:35 2014
@@ -279,6 +279,17 @@ header	 __TO_EQ_FROM_USR_NN_2	ALL =~ /\n
 meta	 __TO_EQ_FROM_USR_NN	(__TO_EQ_FROM_USR_NN_1 || __TO_EQ_FROM_USR_NN_2) && !(__FROM_DNS || __FROM_INFO || __SENDER_BOT)
 describe __TO_EQ_FROM_USR_NN	To: username same as From: username sans trailing nums
 
+# JHardin:
+# __TO_EQ_FROM_USR_NN recent S/O is 0.992 on a large corpus
+# with most hits at <= 5 points
+# let's see if we can get those low-scored spams some more points
+# FP observation: __TO_EQ_FROM overlaps ~60% of _USER_NN ham hits;
+#   this suggests the primary spam indicator is having *different* suffixes
+# Most of the rest are fairly reliable ham indicators
+# suggested scored FP avoidance rule:
+meta	 __TO_EQ_FROM_USR_NN_MINFP	__TO_EQ_FROM_USR_NN && !__TO_EQ_FROM && !__LCL__ENV_AND_HDR_FROM_MATCH && !__DKIM_EXISTS && !__NOT_SPOOFED 
+
+
 header	 __SUBJ_NOT_SHORT	Subject =~ /^.{16}/
 header	 __SUBJ_HAS_WORDS	Subject =~ /(?:^|\s)[^\W0-9_]{3,15}(?:\s|$)/
 meta	 SUBJ_LACKS_WORDS	__SUBJ_NOT_SHORT && !__SUBJ_HAS_WORDS && !__SUBJECT_ENCODED_B64