You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spamassassin.apache.org by jm...@apache.org on 2007/02/12 20:05:35 UTC
svn commit: r506581 - in /spamassassin/trunk/masses: runGA
score-ranges-from-freqs tenpass/split-log-into-buckets-cached
Author: jm
Date: Mon Feb 12 11:05:34 2007
New Revision: 506581
URL: http://svn.apache.org/viewvc?view=rev&rev=506581
Log:
more fixes for masses scripts; tenpass/split-log-into-buckets-cached needed to exit if the cached version was up to date; score-ranges-from-freqs should not mark 0-scored rules as immutable, unless they also had no hits, since the score could have been reset to 0 after the mass-check; and runGA could save disk by using ln instead of cp, and should save the freqs file used
Modified:
spamassassin/trunk/masses/runGA
spamassassin/trunk/masses/score-ranges-from-freqs
spamassassin/trunk/masses/tenpass/split-log-into-buckets-cached
Modified: spamassassin/trunk/masses/runGA
URL: http://svn.apache.org/viewvc/spamassassin/trunk/masses/runGA?view=diff&rev=506581&r1=506580&r2=506581
==============================================================================
--- spamassassin/trunk/masses/runGA (original)
+++ spamassassin/trunk/masses/runGA Mon Feb 12 11:05:34 2007
@@ -37,16 +37,16 @@
# Generate 90/10 split logs
# keep the *-split*.logs in cwd so it's cacheable
echo "[Generating 90/10 split ham]"
-tenpass/split-log-into-buckets-cached \
+perl tenpass/split-log-into-buckets-cached \
9:gen-cache/ham-split9.log 1:gen-cache/ham-split1.log ORIG/ham-$NAME.log
-cp -p gen-cache/ham-split9.log $LOGDIR/NSBASE/ham.log
-cp -p gen-cache/ham-split1.log $LOGDIR/NSBASE/ham-test.log
+ln gen-cache/ham-split9.log $LOGDIR/NSBASE/ham.log
+ln gen-cache/ham-split1.log $LOGDIR/NSBASE/ham-test.log
echo "[Generating 90/10 split spam]"
-tenpass/split-log-into-buckets-cached \
+perl tenpass/split-log-into-buckets-cached \
9:gen-cache/spam-split9.log 1:gen-cache/spam-split1.log ORIG/spam-$NAME.log
-cp -p gen-cache/spam-split9.log $LOGDIR/NSBASE/spam.log
-cp -p gen-cache/spam-split1.log $LOGDIR/NSBASE/spam-test.log
+ln gen-cache/spam-split9.log $LOGDIR/SPBASE/spam.log
+ln gen-cache/spam-split1.log $LOGDIR/SPBASE/spam-test.log
echo "[Setting up for gen run]"
# Ok, setup for a run
@@ -61,6 +61,7 @@
echo "[Generating perceptron]"
# Generate perceptron with full logs
make -j $numcpus SCORESET=$SCORESET > $LOGDIR/make.output 2>&1
+cp freqs $LOGDIR/freqs
(
echo "[config]"
Modified: spamassassin/trunk/masses/score-ranges-from-freqs
URL: http://svn.apache.org/viewvc/spamassassin/trunk/masses/score-ranges-from-freqs?view=diff&rev=506581&r1=506580&r2=506581
==============================================================================
--- spamassassin/trunk/masses/score-ranges-from-freqs (original)
+++ spamassassin/trunk/masses/score-ranges-from-freqs Mon Feb 12 11:05:34 2007
@@ -127,8 +127,12 @@
$mutable_tests{$test} = 0;
}
elsif ($rules{$test}->{score} == 0) {
- print "rule $test: immutable since score is 0\n";
- $mutable_tests{$test} = 0;
+ # this causes trouble, since rewrite-with-new-scores has a tendency
+ # to "simplify" scores down to 0. comment, since real zero-scored rules
+ # that were scored zero when the mass-check ran, will also have no hits
+ # and the 'less than 0.01%' case below takes care of that.
+ # print "rule $test: immutable since score is 0\n";
+ # $mutable_tests{$test} = 0;
}
if ($tflags =~ m/\bnice\b/i) {
Modified: spamassassin/trunk/masses/tenpass/split-log-into-buckets-cached
URL: http://svn.apache.org/viewvc/spamassassin/trunk/masses/tenpass/split-log-into-buckets-cached?view=diff&rev=506581&r1=506580&r2=506581
==============================================================================
--- spamassassin/trunk/masses/tenpass/split-log-into-buckets-cached (original)
+++ spamassassin/trunk/masses/tenpass/split-log-into-buckets-cached Mon Feb 12 11:05:34 2007
@@ -42,6 +42,7 @@
if ($rebuild == 0) {
print "Existing outputs are up-to-date\n";
+ exit;
}
my %buckets = ();