You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spamassassin.apache.org by jm...@apache.org on 2007/11/02 00:22:18 UTC

svn commit: r591167 - /spamassassin/branches/bug-5293-pluginized-bayes/lib/Mail/SpamAssassin/Plugin/OSBF.pm

Author: jm
Date: Thu Nov  1 16:22:17 2007
New Revision: 591167

URL: http://svn.apache.org/viewvc?rev=591167&view=rev
Log:
new baseline; scale scores up by 10%, and also set token OSBF weights to be the shortest distance for that token

Modified:
    spamassassin/branches/bug-5293-pluginized-bayes/lib/Mail/SpamAssassin/Plugin/OSBF.pm

Modified: spamassassin/branches/bug-5293-pluginized-bayes/lib/Mail/SpamAssassin/Plugin/OSBF.pm
URL: http://svn.apache.org/viewvc/spamassassin/branches/bug-5293-pluginized-bayes/lib/Mail/SpamAssassin/Plugin/OSBF.pm?rev=591167&r1=591166&r2=591167&view=diff
==============================================================================
--- spamassassin/branches/bug-5293-pluginized-bayes/lib/Mail/SpamAssassin/Plugin/OSBF.pm (original)
+++ spamassassin/branches/bug-5293-pluginized-bayes/lib/Mail/SpamAssassin/Plugin/OSBF.pm Thu Nov  1 16:22:17 2007
@@ -718,6 +718,9 @@
   # Couldn't come up with a probability?
   goto skip unless defined $score;
 
+  # HACK HACK scale to a better range, since EDDC seems to bias low
+  $score *= 1.1; $score = 1.0 if $score > 1.0;
+
   dbg("osbf: score = $score");
 
   # no need to call tok_touch_all unless there were significant
@@ -925,10 +928,8 @@
   my %tokens;
   my %weights;
   foreach my $token (@tokens) {
-    next unless length($token); # skip 0 length tokens
-
-    my $distance;
-    if ($token =~ s/^([0-5])://) {   # remove token OSB distance
+    my $distance = '0';     # OSB bigram token distance; default to 0
+    if ($token && $token =~ s/^([0-5])://) {
       $distance = $1;
     }
 
@@ -937,12 +938,10 @@
     $tokens{$hash} = $token;
 
     # set the weight to be the lowest distance for that token
-    if (defined $distance) {
-      if (!(defined $weights{$hash})
-            || ($weights{$hash} > $distance)) 
-      {
-        $weights{$hash} = $distance;
-      }
+    if (!(defined $weights{$hash})
+          || ($weights{$hash} > $distance))
+    {
+      $weights{$hash} = $distance;
     }
   }