You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spamassassin.apache.org by jm...@apache.org on 2007/02/12 20:05:35 UTC

svn commit: r506581 - in /spamassassin/trunk/masses: runGA score-ranges-from-freqs tenpass/split-log-into-buckets-cached

Author: jm
Date: Mon Feb 12 11:05:34 2007
New Revision: 506581

URL: http://svn.apache.org/viewvc?view=rev&rev=506581
Log:
more fixes for masses scripts; tenpass/split-log-into-buckets-cached needed to exit if the cached version was up to date; score-ranges-from-freqs should not mark 0-scored rules as immutable, unless they also had no hits, since the score could have been reset to 0 after the mass-check; and runGA could save disk by using ln instead of cp, and should save the freqs file used

Modified:
    spamassassin/trunk/masses/runGA
    spamassassin/trunk/masses/score-ranges-from-freqs
    spamassassin/trunk/masses/tenpass/split-log-into-buckets-cached

Modified: spamassassin/trunk/masses/runGA
URL: http://svn.apache.org/viewvc/spamassassin/trunk/masses/runGA?view=diff&rev=506581&r1=506580&r2=506581
==============================================================================
--- spamassassin/trunk/masses/runGA (original)
+++ spamassassin/trunk/masses/runGA Mon Feb 12 11:05:34 2007
@@ -37,16 +37,16 @@
 # Generate 90/10 split logs
 # keep the *-split*.logs in cwd so it's cacheable
 echo "[Generating 90/10 split ham]"
-tenpass/split-log-into-buckets-cached \
+perl tenpass/split-log-into-buckets-cached \
     9:gen-cache/ham-split9.log 1:gen-cache/ham-split1.log ORIG/ham-$NAME.log
-cp -p gen-cache/ham-split9.log $LOGDIR/NSBASE/ham.log
-cp -p gen-cache/ham-split1.log $LOGDIR/NSBASE/ham-test.log
+ln gen-cache/ham-split9.log $LOGDIR/NSBASE/ham.log
+ln gen-cache/ham-split1.log $LOGDIR/NSBASE/ham-test.log
 
 echo "[Generating 90/10 split spam]"
-tenpass/split-log-into-buckets-cached \
+perl tenpass/split-log-into-buckets-cached \
     9:gen-cache/spam-split9.log 1:gen-cache/spam-split1.log ORIG/spam-$NAME.log
-cp -p gen-cache/spam-split9.log $LOGDIR/NSBASE/spam.log
-cp -p gen-cache/spam-split1.log $LOGDIR/NSBASE/spam-test.log
+ln gen-cache/spam-split9.log $LOGDIR/SPBASE/spam.log
+ln gen-cache/spam-split1.log $LOGDIR/SPBASE/spam-test.log
 
 echo "[Setting up for gen run]"
 # Ok, setup for a run
@@ -61,6 +61,7 @@
 echo "[Generating perceptron]"
 # Generate perceptron with full logs
 make -j $numcpus SCORESET=$SCORESET > $LOGDIR/make.output 2>&1
+cp freqs $LOGDIR/freqs
 
 (
 echo "[config]"

Modified: spamassassin/trunk/masses/score-ranges-from-freqs
URL: http://svn.apache.org/viewvc/spamassassin/trunk/masses/score-ranges-from-freqs?view=diff&rev=506581&r1=506580&r2=506581
==============================================================================
--- spamassassin/trunk/masses/score-ranges-from-freqs (original)
+++ spamassassin/trunk/masses/score-ranges-from-freqs Mon Feb 12 11:05:34 2007
@@ -127,8 +127,12 @@
     $mutable_tests{$test} = 0;
   }
   elsif ($rules{$test}->{score} == 0) {
-    print "rule $test: immutable since score is 0\n";
-    $mutable_tests{$test} = 0;
+    # this causes trouble, since rewrite-with-new-scores has a tendency
+    # to "simplify" scores down to 0.  comment, since real zero-scored rules
+    # that were scored zero when the mass-check ran, will also have no hits
+    # and the 'less than 0.01%' case below takes care of that.
+    # print "rule $test: immutable since score is 0\n";
+    # $mutable_tests{$test} = 0;
   }
 
   if ($tflags =~ m/\bnice\b/i) {

Modified: spamassassin/trunk/masses/tenpass/split-log-into-buckets-cached
URL: http://svn.apache.org/viewvc/spamassassin/trunk/masses/tenpass/split-log-into-buckets-cached?view=diff&rev=506581&r1=506580&r2=506581
==============================================================================
--- spamassassin/trunk/masses/tenpass/split-log-into-buckets-cached (original)
+++ spamassassin/trunk/masses/tenpass/split-log-into-buckets-cached Mon Feb 12 11:05:34 2007
@@ -42,6 +42,7 @@
 
 if ($rebuild == 0) {
   print "Existing outputs are up-to-date\n";
+  exit;
 }
 
 my %buckets = ();