You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spamassassin.apache.org by jh...@apache.org on 2021/01/04 17:33:46 UTC

svn commit: r1885117 - /spamassassin/trunk/rulesrc/sandbox/jhardin/20_misc_testing.cf

Author: jhardin
Date: Mon Jan  4 17:33:45 2021
New Revision: 1885117

URL: http://svn.apache.org/viewvc?rev=1885117&view=rev
Log:
Improve *_INVIS discrimination to reduce FPs - Bug 7877

Modified:
    spamassassin/trunk/rulesrc/sandbox/jhardin/20_misc_testing.cf

Modified: spamassassin/trunk/rulesrc/sandbox/jhardin/20_misc_testing.cf
URL: http://svn.apache.org/viewvc/spamassassin/trunk/rulesrc/sandbox/jhardin/20_misc_testing.cf?rev=1885117&r1=1885116&r2=1885117&view=diff
==============================================================================
--- spamassassin/trunk/rulesrc/sandbox/jhardin/20_misc_testing.cf (original)
+++ spamassassin/trunk/rulesrc/sandbox/jhardin/20_misc_testing.cf Mon Jan  4 17:33:45 2021
@@ -2334,11 +2334,11 @@ meta      GOOG_REDIR_HTML_ONLY
 describe  GOOG_REDIR_HTML_ONLY          Google redirect to obscure spamvertised website + HTML only
 score     GOOG_REDIR_HTML_ONLY          2.000	# limit
 
-rawbody   __LONG_INVIS_DIV              /<div\s+style\s*=\s*"(?:visibility\s*:\s*hidden|display\s*:\s*none)\s*">[^<\s]{1400}/i
+rawbody   __LONG_INVIS_DIV              /<div\s+style\s*=\s*"(?:(?<!-)visibility\s*:\s*hidden|display\s*:\s*none)\s*">[^<\s]{1400}/i
 
 # low S/O, apparently lots of invisible ham...
 if can(Mail::SpamAssassin::Conf::feature_bug6558_free)
-  rawbody   __STY_INVIS                   /\bstyle\s*=\s*"[^">]{0,80}(?:visibility\s*:\s*hidden\s*|display\s*:\s*none\s*)[;"!]/i
+  rawbody   __STY_INVIS                   /\bstyle\s*=\s*"[^">]{0,80}(?:(?<!-)visibility\s*:\s*hidden\s*|display\s*:\s*none\s*)[;"!]/i
   tflags    __STY_INVIS                   multiple maxhits=6
   meta      __STY_INVIS_1                 __STY_INVIS == 1
   meta      __STY_INVIS_2                 __STY_INVIS > 1
@@ -2347,7 +2347,7 @@ if can(Mail::SpamAssassin::Conf::feature
 
   # Widely used in ham for hiding tracking images? See how it performs on non-IMG tags...
   # S/O the same. :(
-  #rawbody   __STY_INVIS_NONIMG            /<(?!img\s)[a-z]+\s[^>]{0,200}\bstyle\s*=\s*"[^">]{0,80}(?:visibility\s*:\s*hidden\s*|display\s*:\s*none\s*)[;"!]/i
+  #rawbody   __STY_INVIS_NONIMG            /<(?!img\s)[a-z]+\s[^>]{0,200}\bstyle\s*=\s*"[^">]{0,80}(?:(?<!-)visibility\s*:\s*hidden\s*|display\s*:\s*none\s*)[;"!]/i
 
   # *one* invisible style has better S/O than multiple...
   meta      __STY_INVIS_1_MINFP           __STY_INVIS_1 && !__HAS_CAMPAIGNID 
@@ -2370,7 +2370,7 @@ else
 endif
 
 # try it on span tags only...
-rawbody   __SPAN_INVIS                  /<span\s[^>]{0,200}style\s*=\s*"[^">]{0,80}(?:visibility\s*:\s*hidden\s*|display\s*:\s*none\s*)[;"!][^>]{1,200}>/i
+rawbody   __SPAN_INVIS                  /<span\s[^>]{0,200}style\s*=\s*"[^">]{0,80}(?:(?<!-)visibility\s*:\s*hidden\s*|display\s*:\s*none\s*)[;"!][^>]{1,200}>/i
 
 describe  LONG_INVISIBLE_TEXT           Long block of hidden text - bayes poison?
 score     LONG_INVISIBLE_TEXT           3.000	# limit
@@ -2379,7 +2379,7 @@ tflags    LONG_INVISIBLE_TEXT
 
 if can(Mail::SpamAssassin::Conf::feature_bug6558_free)
   # Lots of ham uses invisible fonts - WHY?
-  rawbody   __FONT_INVIS                  /<(?!style)[a-z]+\s[^>]{1,80}(?:font(?:-size)?\s*:\s*(?:0*[01](?:\.\d+)?(?:px|pt|Q|vw|vh|vmin)|0+(?:\.\d+)?(?:cm|mm|pc|ch|rem|lh|vmax|%)|0+(?:\.0\d*)(?:em|ex|in))(?:\s[a-z]|\s*[;'])|color\s*:\s*transparent\s*[;'])[^>]{0,80}>\w/i
+  rawbody   __FONT_INVIS                  /<(?!style)[a-z]+\s[^>]{1,80}(?:font(?:-size)?\s*:\s*(?:0*[01](?:\.\d+)?(?:px|pt|Q|vw|vh|vmin)|0+(?:\.\d+)?(?:cm|mm|pc|ch|rem|lh|vmax|%)|0+(?:\.0\d*)(?:em|ex|in))(?:\s[a-z]|\s*[;'])|['"\s;]color\s*:\s*transparent\s*[;'])[^>]{0,80}>\w/i
   tflags    __FONT_INVIS                  multiple maxhits=11
   meta      __FONT_INVIS_2                __FONT_INVIS > 2
   meta      __FONT_INVIS_5                __FONT_INVIS > 5
@@ -2391,7 +2391,7 @@ if can(Mail::SpamAssassin::Conf::feature
   tflags    HTML_TEXT_INVISIBLE_FONT      publish
 
   # Does this hit less ham while still hitting spam?
-  rawbody   __WORD_INVIS                  /<(?!style)[a-z]+\s[^>]{1,80}(?:font(?:-size)?\s*:\s*(?:0*[01](?:\.\d+)?(?:px|pt|Q|vw|vh|vmin)|0+(?:\.\d+)?(?:cm|mm|in|pc|em|ex|ch|rem|lh|vmax))\s*[;'a-z]|color\s*:\s*transparent\s*[;'])[^>]{0,80}>\w{1,20}</i
+  rawbody   __WORD_INVIS                  /<(?!style)[a-z]+\s[^>]{1,80}(?:font(?:-size)?\s*:\s*(?:0*[01](?:\.\d+)?(?:px|pt|Q|vw|vh|vmin)|0+(?:\.\d+)?(?:cm|mm|in|pc|em|ex|ch|rem|lh|vmax))\s*[;'a-z]|['"\s;]color\s*:\s*transparent\s*[;'])[^>]{0,80}>\w{1,20}</i
   tflags    __WORD_INVIS                  multiple maxhits=6
   meta      __WORD_INVIS_2                __WORD_INVIS > 1
   meta      __WORD_INVIS_5                __WORD_INVIS > 5