You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spamassassin.apache.org by jm...@apache.org on 2007/02/14 22:57:02 UTC

svn commit: r507708 - in /spamassassin/trunk/masses: fp-fn-statistics logs-to-c rewrite-cf-with-new-scores validate-model

Author: jm
Date: Wed Feb 14 13:57:01 2007
New Revision: 507708

URL: http://svn.apache.org/viewvc?view=rev&rev=507708
Log:
stop zeroing scores; it seems to kill accuracy of the GA FP%/FN% computation compared to fp-fn-statistics. also, don't include T_ rules in the input files generated by logs-to-c. finally, support the LEARN_RATE var in validate-model

Modified:
    spamassassin/trunk/masses/fp-fn-statistics
    spamassassin/trunk/masses/logs-to-c
    spamassassin/trunk/masses/rewrite-cf-with-new-scores
    spamassassin/trunk/masses/validate-model

Modified: spamassassin/trunk/masses/fp-fn-statistics
URL: http://svn.apache.org/viewvc/spamassassin/trunk/masses/fp-fn-statistics?view=diff&rev=507708&r1=507707&r2=507708
==============================================================================
--- spamassassin/trunk/masses/fp-fn-statistics (original)
+++ spamassassin/trunk/masses/fp-fn-statistics Wed Feb 14 13:57:01 2007
@@ -96,7 +96,7 @@
 
 readscores();
 
-die "wrong scoreset in tmp/rules.pl" unless $rules->{_scoreset} == $opt_scoreset;
+die "wrong scoreset in tmp/rules.pl" unless $allrules{_scoreset} == $opt_scoreset;
 
 print "Reading per-message hit stat logs and scores...\n";
 my ($num_spam, $num_ham);
@@ -189,8 +189,8 @@
 
 sub readscores {
   print "Reading scores from \"$opt_cffile\"...\n";
-  system ("../build/parse-rules-for-masses -d \"$opt_cffile\" -s $opt_scoreset") and die;
-  require "./tmp/rules_$opt_scoreset.pl";
+  system ("../build/parse-rules-for-masses -o tmp/rules_$opt_scoreset.pl -d \"$opt_cffile\" -s $opt_scoreset") and die;
+  require "tmp/rules_$opt_scoreset.pl";
   %allrules = %rules;           # ensure it stays global
 }
 

Modified: spamassassin/trunk/masses/logs-to-c
URL: http://svn.apache.org/viewvc/spamassassin/trunk/masses/logs-to-c?view=diff&rev=507708&r1=507707&r2=507708
==============================================================================
--- spamassassin/trunk/masses/logs-to-c (original)
+++ spamassassin/trunk/masses/logs-to-c Wed Feb 14 13:57:01 2007
@@ -419,11 +419,23 @@
 
   # catch up on the ones missed; seems to be userconf or 0-hitters mostly.
   foreach my $t (sort keys %allrules) {
+    next if ($t eq '_scoreset');
     next if (exists($range_lo{$t}));
+
     if ($allrules{$t}->{issubrule}) {
       if (!$ignored_rule{$t}) {
         # warn "ignoring '$t': is sub-rule\n";  # no need to warn here
         $ignored_rule{$t} = 1;
+      }
+      $mutable_tests{$t} = 0;
+      next;
+    }
+    if ($t =~ /^T_/) {
+      if (!$ignored_rule{$t}) {
+        # warn "ignoring '$t': is test rule\n";  # no need to warn here
+        $ignored_rule{$t} = 1;
+	$range_lo{$t} = 0.01;    # clamp to insignificant range
+	$range_hi{$t} = 0.01;
       }
       $mutable_tests{$t} = 0;
       next;

Modified: spamassassin/trunk/masses/rewrite-cf-with-new-scores
URL: http://svn.apache.org/viewvc/spamassassin/trunk/masses/rewrite-cf-with-new-scores?view=diff&rev=507708&r1=507707&r2=507708
==============================================================================
--- spamassassin/trunk/masses/rewrite-cf-with-new-scores (original)
+++ spamassassin/trunk/masses/rewrite-cf-with-new-scores Wed Feb 14 13:57:01 2007
@@ -98,7 +98,7 @@
 $opt_out ||= "50_scores.cf";
 
 my $NUM_SCORESETS = 4;
-my $ZERO_MINISCULE_SCORES =     1;
+my $ZERO_MINISCULE_SCORES =     0;
 my $MINISCULE_THRESHOLD =       0.01;      # points
 
 my $UNZERO_META_PREDICATES =    1;
@@ -443,6 +443,8 @@
 
   while (my ($name, $info) = each %rules)
   {
+    next if ($name eq '_scoreset');
+
     my $type = $info->{type} || "unknown";
     # look at meta rules that are not disabled
     next unless ($type eq "meta" && ($name =~ /^__/ || $info->{score} != 0));

Modified: spamassassin/trunk/masses/validate-model
URL: http://svn.apache.org/viewvc/spamassassin/trunk/masses/validate-model?view=diff&rev=507708&r1=507707&r2=507708
==============================================================================
--- spamassassin/trunk/masses/validate-model (original)
+++ spamassassin/trunk/masses/validate-model Wed Feb 14 13:57:01 2007
@@ -3,6 +3,8 @@
 # set SCORESET
 . config
 
+LEARN_RATE="${LEARN_RATE:-2.0}"
+
 RUNS=10
 PASSES="1 2 3 4 5 6 7 8 9 10"
 
@@ -104,7 +106,7 @@
 	echo "[pass $PASS start]"
 	pwd
 	date
-	./perceptron -p $HAM_PREFERENCE -t $THRESHOLD -e $EPOCHS
+	./perceptron -p $HAM_PREFERENCE -t $THRESHOLD -e $EPOCHS -l $LEARN_RATE
 	mv perceptron.scores $LOGDIR/scores.$PASS
 	echo "[pass $PASS end]"
 	) | tee -a $LOGDIR/log