You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spamassassin.apache.org by mm...@apache.org on 2015/10/08 18:46:02 UTC

svn commit: r1707583 - /spamassassin/trunk/lib/Mail/SpamAssassin/Plugin/Bayes.pm

Author: mmartinec
Date: Thu Oct  8 16:46:01 2015
New Revision: 1707583

URL: http://svn.apache.org/viewvc?rev=1707583&view=rev
Log:
Plugin/Bayes.pm: add missing $tokprefix to u8: and 8: tokens, shorten also tokens in Content-Disposition and Content-Transfer-Encoding

Modified:
    spamassassin/trunk/lib/Mail/SpamAssassin/Plugin/Bayes.pm

Modified: spamassassin/trunk/lib/Mail/SpamAssassin/Plugin/Bayes.pm
URL: http://svn.apache.org/viewvc/spamassassin/trunk/lib/Mail/SpamAssassin/Plugin/Bayes.pm?rev=1707583&r1=1707582&r2=1707583&view=diff
==============================================================================
--- spamassassin/trunk/lib/Mail/SpamAssassin/Plugin/Bayes.pm (original)
+++ spamassassin/trunk/lib/Mail/SpamAssassin/Plugin/Bayes.pm Thu Oct  8 16:46:01 2015
@@ -200,7 +200,9 @@ use constant ADD_INVIZ_TOKENS_NO_PREFIX
   'X-Authentication-Warning' => '*a',
   'Organization'	=> '*o',
   'Organisation'        => '*o',
-  'Content-Type'	=> '*c',
+  'Content-Type'	=> '*ct',
+  'Content-Disposition'	=> '*cd',
+  'Content-Transfer-Encoding' => '*ce',
   'x-spam-relays-trusted' => '*RT',
   'x-spam-relays-untrusted' => '*RU',
 );
@@ -1120,7 +1122,7 @@ sub tokenize {
   # generate an SHA1 hash and take the lower 40 bits as our token
   my %tokens;
   foreach my $token (@tokens) {
-    # skip empty tokens
+  # dbg("bayes: token: %s", $token);
     $tokens{substr(sha1($token), -5)} = $token  if $token ne '';
   }
 
@@ -1217,7 +1219,7 @@ sub _tokenize_line {
 	my(@t) = $token =~ /( (?: [\xE0-\xEF] | [\xF0-\xF4][\x80-\xBF] )
                               [\x80-\xBF]{2} )/xsg;
 	if (@t) {
-          push (@rettokens, map('u8:'.$_, @t));
+          push (@rettokens, map($tokprefix.'u8:'.$_, @t));
 	  next;
 	}
       }
@@ -1227,7 +1229,7 @@ sub _tokenize_line {
 	# but I'm doing tuples to keep the dbs small(er)."  Sounds like a plan
 	# to me! (jm)
 	while ($token =~ s/^(..?)//) {
-	  push (@rettokens, "8:$1");
+	  push (@rettokens, $tokprefix.'8:'.$1);
 	}
 	next;
       }