You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spamassassin.apache.org by jm...@apache.org on 2007/02/14 22:57:02 UTC
svn commit: r507708 - in /spamassassin/trunk/masses: fp-fn-statistics
logs-to-c rewrite-cf-with-new-scores validate-model
Author: jm
Date: Wed Feb 14 13:57:01 2007
New Revision: 507708
URL: http://svn.apache.org/viewvc?view=rev&rev=507708
Log:
stop zeroing scores; it seems to kill accuracy of the GA FP%/FN% computation compared to fp-fn-statistics. also, don't include T_ rules in the input files generated by logs-to-c. finally, support the LEARN_RATE var in validate-model
Modified:
spamassassin/trunk/masses/fp-fn-statistics
spamassassin/trunk/masses/logs-to-c
spamassassin/trunk/masses/rewrite-cf-with-new-scores
spamassassin/trunk/masses/validate-model
Modified: spamassassin/trunk/masses/fp-fn-statistics
URL: http://svn.apache.org/viewvc/spamassassin/trunk/masses/fp-fn-statistics?view=diff&rev=507708&r1=507707&r2=507708
==============================================================================
--- spamassassin/trunk/masses/fp-fn-statistics (original)
+++ spamassassin/trunk/masses/fp-fn-statistics Wed Feb 14 13:57:01 2007
@@ -96,7 +96,7 @@
readscores();
-die "wrong scoreset in tmp/rules.pl" unless $rules->{_scoreset} == $opt_scoreset;
+die "wrong scoreset in tmp/rules.pl" unless $allrules{_scoreset} == $opt_scoreset;
print "Reading per-message hit stat logs and scores...\n";
my ($num_spam, $num_ham);
@@ -189,8 +189,8 @@
sub readscores {
print "Reading scores from \"$opt_cffile\"...\n";
- system ("../build/parse-rules-for-masses -d \"$opt_cffile\" -s $opt_scoreset") and die;
- require "./tmp/rules_$opt_scoreset.pl";
+ system ("../build/parse-rules-for-masses -o tmp/rules_$opt_scoreset.pl -d \"$opt_cffile\" -s $opt_scoreset") and die;
+ require "tmp/rules_$opt_scoreset.pl";
%allrules = %rules; # ensure it stays global
}
Modified: spamassassin/trunk/masses/logs-to-c
URL: http://svn.apache.org/viewvc/spamassassin/trunk/masses/logs-to-c?view=diff&rev=507708&r1=507707&r2=507708
==============================================================================
--- spamassassin/trunk/masses/logs-to-c (original)
+++ spamassassin/trunk/masses/logs-to-c Wed Feb 14 13:57:01 2007
@@ -419,11 +419,23 @@
# catch up on the ones missed; seems to be userconf or 0-hitters mostly.
foreach my $t (sort keys %allrules) {
+ next if ($t eq '_scoreset');
next if (exists($range_lo{$t}));
+
if ($allrules{$t}->{issubrule}) {
if (!$ignored_rule{$t}) {
# warn "ignoring '$t': is sub-rule\n"; # no need to warn here
$ignored_rule{$t} = 1;
+ }
+ $mutable_tests{$t} = 0;
+ next;
+ }
+ if ($t =~ /^T_/) {
+ if (!$ignored_rule{$t}) {
+ # warn "ignoring '$t': is test rule\n"; # no need to warn here
+ $ignored_rule{$t} = 1;
+ $range_lo{$t} = 0.01; # clamp to insignificant range
+ $range_hi{$t} = 0.01;
}
$mutable_tests{$t} = 0;
next;
Modified: spamassassin/trunk/masses/rewrite-cf-with-new-scores
URL: http://svn.apache.org/viewvc/spamassassin/trunk/masses/rewrite-cf-with-new-scores?view=diff&rev=507708&r1=507707&r2=507708
==============================================================================
--- spamassassin/trunk/masses/rewrite-cf-with-new-scores (original)
+++ spamassassin/trunk/masses/rewrite-cf-with-new-scores Wed Feb 14 13:57:01 2007
@@ -98,7 +98,7 @@
$opt_out ||= "50_scores.cf";
my $NUM_SCORESETS = 4;
-my $ZERO_MINISCULE_SCORES = 1;
+my $ZERO_MINISCULE_SCORES = 0;
my $MINISCULE_THRESHOLD = 0.01; # points
my $UNZERO_META_PREDICATES = 1;
@@ -443,6 +443,8 @@
while (my ($name, $info) = each %rules)
{
+ next if ($name eq '_scoreset');
+
my $type = $info->{type} || "unknown";
# look at meta rules that are not disabled
next unless ($type eq "meta" && ($name =~ /^__/ || $info->{score} != 0));
Modified: spamassassin/trunk/masses/validate-model
URL: http://svn.apache.org/viewvc/spamassassin/trunk/masses/validate-model?view=diff&rev=507708&r1=507707&r2=507708
==============================================================================
--- spamassassin/trunk/masses/validate-model (original)
+++ spamassassin/trunk/masses/validate-model Wed Feb 14 13:57:01 2007
@@ -3,6 +3,8 @@
# set SCORESET
. config
+LEARN_RATE="${LEARN_RATE:-2.0}"
+
RUNS=10
PASSES="1 2 3 4 5 6 7 8 9 10"
@@ -104,7 +106,7 @@
echo "[pass $PASS start]"
pwd
date
- ./perceptron -p $HAM_PREFERENCE -t $THRESHOLD -e $EPOCHS
+ ./perceptron -p $HAM_PREFERENCE -t $THRESHOLD -e $EPOCHS -l $LEARN_RATE
mv perceptron.scores $LOGDIR/scores.$PASS
echo "[pass $PASS end]"
) | tee -a $LOGDIR/log