You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spamassassin.apache.org by jm...@apache.org on 2007/11/02 00:22:18 UTC
svn commit: r591167 -
/spamassassin/branches/bug-5293-pluginized-bayes/lib/Mail/SpamAssassin/Plugin/OSBF.pm
Author: jm
Date: Thu Nov 1 16:22:17 2007
New Revision: 591167
URL: http://svn.apache.org/viewvc?rev=591167&view=rev
Log:
new baseline; scale scores up by 10%, and also set token OSBF weights to be the shortest distance for that token
Modified:
spamassassin/branches/bug-5293-pluginized-bayes/lib/Mail/SpamAssassin/Plugin/OSBF.pm
Modified: spamassassin/branches/bug-5293-pluginized-bayes/lib/Mail/SpamAssassin/Plugin/OSBF.pm
URL: http://svn.apache.org/viewvc/spamassassin/branches/bug-5293-pluginized-bayes/lib/Mail/SpamAssassin/Plugin/OSBF.pm?rev=591167&r1=591166&r2=591167&view=diff
==============================================================================
--- spamassassin/branches/bug-5293-pluginized-bayes/lib/Mail/SpamAssassin/Plugin/OSBF.pm (original)
+++ spamassassin/branches/bug-5293-pluginized-bayes/lib/Mail/SpamAssassin/Plugin/OSBF.pm Thu Nov 1 16:22:17 2007
@@ -718,6 +718,9 @@
# Couldn't come up with a probability?
goto skip unless defined $score;
+ # HACK HACK scale to a better range, since EDDC seems to bias low
+ $score *= 1.1; $score = 1.0 if $score > 1.0;
+
dbg("osbf: score = $score");
# no need to call tok_touch_all unless there were significant
@@ -925,10 +928,8 @@
my %tokens;
my %weights;
foreach my $token (@tokens) {
- next unless length($token); # skip 0 length tokens
-
- my $distance;
- if ($token =~ s/^([0-5])://) { # remove token OSB distance
+ my $distance = '0'; # OSB bigram token distance; default to 0
+ if ($token && $token =~ s/^([0-5])://) {
$distance = $1;
}
@@ -937,12 +938,10 @@
$tokens{$hash} = $token;
# set the weight to be the lowest distance for that token
- if (defined $distance) {
- if (!(defined $weights{$hash})
- || ($weights{$hash} > $distance))
- {
- $weights{$hash} = $distance;
- }
+ if (!(defined $weights{$hash})
+ || ($weights{$hash} > $distance))
+ {
+ $weights{$hash} = $distance;
}
}