You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spamassassin.apache.org by qu...@apache.org on 2004/05/16 22:25:17 UTC
svn commit: rev 10701 - incubator/spamassassin/trunk/masses/tenpass
Author: quinlan
Date: Sun May 16 13:25:17 2004
New Revision: 10701
Modified:
incubator/spamassassin/trunk/masses/tenpass/10pass-compute-tcr
incubator/spamassassin/trunk/masses/tenpass/10pass-run
incubator/spamassassin/trunk/masses/tenpass/compute-current-tcr
Log:
get most of the tenpass stuff to the point where it runs
Modified: incubator/spamassassin/trunk/masses/tenpass/10pass-compute-tcr
==============================================================================
--- incubator/spamassassin/trunk/masses/tenpass/10pass-compute-tcr (original)
+++ incubator/spamassassin/trunk/masses/tenpass/10pass-compute-tcr Sun May 16 13:25:17 2004
@@ -1,15 +1,17 @@
#!/bin/sh
+SCORESET="0"
+
for run in 1 2 3 4 5 6 7 8 9 10
do
mkdir tmp/10passrules > /dev/null 2>&1
cp ../rules/[0-9]*.cf tmp/10passrules
- ./rewrite-cf-with-new-scores ../rules/50_scores.cf \
+ ./rewrite-cf-with-new-scores $SCORESET ../rules/50_scores.cf \
tenpass_results/scores.$run > tmp/10passrules/50_scores.cf
./fp-fn-statistics --cffile=tmp/10passrules \
--spam=tenpass_results/spam.log.$run \
- --nonspam=tenpass_results/nonspam.log.$run > tmp/stats
+ --nonspam=tenpass_results/ham.log.$run > tmp/stats
grep TCR: tmp/stats
done
Modified: incubator/spamassassin/trunk/masses/tenpass/10pass-run
==============================================================================
--- incubator/spamassassin/trunk/masses/tenpass/10pass-run (original)
+++ incubator/spamassassin/trunk/masses/tenpass/10pass-run Sun May 16 13:25:17 2004
@@ -1,11 +1,13 @@
#!/bin/sh
# change these!
-NSBASE=../../logs/nonspam-jm
-SPBASE=../../logs/spam-jm
+NSBASE=ham-logs
+SPBASE=spam-logs
+SCORESET="0"
passes="1 2 3 4 5 6 7 8 9 10"
-mkdir tenpass_results
+mkdir -p tenpass_results
+mkdir -p ORIG
> make.output
@@ -15,28 +17,28 @@
echo "Training for corpus $id..."
pwd; date
- > nonspam.log
- > spam.log
+ > ORIG/ham-set$SCORESET.log
+ > ORIG/spam-set$SCORESET.log
+
echo -n "(using corpora blocks: "
for notid in $passes ; do
if [ "$notid" != "$id" ] ; then
echo -n "$notid "
- cat $NSBASE/split-$notid.log >> nonspam.log
- cat $SPBASE/split-$notid.log >> spam.log
+ cat $NSBASE/split-$notid.log >> ORIG/ham-set$SCORESET.log
+ cat $SPBASE/split-$notid.log >> ORIG/spam-set$SCORESET.log
fi
done
echo "for training)"
make clean >> make.output
make >> make.output 2>&1
- ./evolve
- pwd; date
+ ./runGA
+ pwd
+ date
echo "Saving test data for corpus $id..."
- cp $NSBASE/split-$id.log tenpass_results/nonspam.log.$id
+ cp $NSBASE/split-$id.log tenpass_results/ham.log.$id
cp $SPBASE/split-$id.log tenpass_results/spam.log.$id
- cp craig-evolve.scores tenpass_results/scores.$id
+ cp gen-set$SCORESET.scores tenpass_results/scores.$id
done
-
-
Modified: incubator/spamassassin/trunk/masses/tenpass/compute-current-tcr
==============================================================================
--- incubator/spamassassin/trunk/masses/tenpass/compute-current-tcr (original)
+++ incubator/spamassassin/trunk/masses/tenpass/compute-current-tcr Sun May 16 13:25:17 2004
@@ -9,7 +9,7 @@
my $sumsp = 0;
open(SPAM, "<spam.log");
-open(NONSPAM, "<nonspam.log");
+open(HAM, "<ham.log");
open(SCORES, "<../rules/50_scores.cf");
while(<SCORES>)
@@ -41,7 +41,7 @@
}
close(SPAM);
-while(<NONSPAM>)
+while(<HAM>)
{
next if /^\#/;
/.\s+[-0-9]*\s+[^\s]+\s+([^\s]*)\s*$/;