You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spamassassin.apache.org by do...@apache.org on 2007/05/06 23:47:57 UTC

svn commit: r535659 - /spamassassin/rules/trunk/sandbox/dos/new-rule-score-gen/generate-new-scores

Author: dos
Date: Sun May  6 14:47:57 2007
New Revision: 535659

URL: http://svn.apache.org/viewvc?view=rev&rev=535659
Log:
sprinkle some dates throughout to see why the last few days have taken 4 times longer than previously

Modified:
    spamassassin/rules/trunk/sandbox/dos/new-rule-score-gen/generate-new-scores

Modified: spamassassin/rules/trunk/sandbox/dos/new-rule-score-gen/generate-new-scores
URL: http://svn.apache.org/viewvc/spamassassin/rules/trunk/sandbox/dos/new-rule-score-gen/generate-new-scores?view=diff&rev=535659&r1=535658&r2=535659
==============================================================================
--- spamassassin/rules/trunk/sandbox/dos/new-rule-score-gen/generate-new-scores (original)
+++ spamassassin/rules/trunk/sandbox/dos/new-rule-score-gen/generate-new-scores Sun May  6 14:47:57 2007
@@ -35,6 +35,9 @@
   exit
 fi
 
+date
+echo "[ rsyncing logs ]"
+
 # prep current nightly mass-check logs
 if [ ! -e corpus ]; then
   mkdir corpus
@@ -42,6 +45,9 @@
 cd corpus
 rsync -artvz $RSYNC_USERNAME@rsync.spamassassin.org::corpus/*.log .
 
+date
+echo "[ selecting log files to use for scoreset $SCORESET ]"
+
 # select a usable corpus (it'll use all available logs for the wanted score set
 # with the most recent revision found among logs for that score set)
 rm -rf usable-corpus-set$SCORESET
@@ -90,6 +96,9 @@
 
 cd ../..
 
+date
+echo "[ checking out code from svn repository ]"
+
 # make note of what logs we are going to use
 echo "# Using score set $SCORESET logs for revision $REVISION from:" > scores-set$SCORESET
 echo "#" `ls corpus/usable-corpus-set$SCORESET` >> scores-set$SCORESET
@@ -112,6 +121,9 @@
 cp extract-new-scores trunk-new-rules-set$SCORESET/masses/extract-new-scores
 cp add-hitless-active-to-freqs trunk-new-rules-set$SCORESET/masses/add-hitless-active-to-freqs
 
+date
+echo "[ generating active ruleset via make ]"
+
 cd trunk-new-rules-set$SCORESET
 perl Makefile.PL < /dev/null
 make
@@ -120,7 +132,8 @@
 grep -v ^score rules/72_active.cf > rules/72_active.cf-scoreless
 mv -f rules/72_active.cf-scoreless rules/72_active.cf
 
-chmod +x masses/log-grep-recent	# this can go after April 21, 2007
+date
+echo "[ running log-grep-recent ]"
 
 masses/log-grep-recent -m 38 ../corpus/usable-corpus-set$SCORESET/ham-*.log > masses/ham-full.log
 masses/log-grep-recent -m 2 ../corpus/usable-corpus-set$SCORESET/spam-*.log > masses/spam-full.log
@@ -131,6 +144,9 @@
 NAME="set$SCORESET"
 LOGDIR="gen-$NAME-$HAM_PREFERENCE-$THRESHOLD-$EPOCHS-ga"
 
+date
+echo "[ running make freqs ]"
+
 # generate new ruleset
 cd masses
 
@@ -152,9 +168,15 @@
   done
 done
 
+date
+echo "[ starting runGA ]"
+
 # generate the new scores
 ./runGA
 
+date
+echo "[ generating fp-fn-statistics ]"
+
 # generate stats on the old rules to compare against the new rules and their scores
 ./fp-fn-statistics --ham ham-test.log --spam spam-test.log --scoreset $SCORESET \
 	--cffile=../rules-base --fnlog $LOGDIR/false_negatives_original \
@@ -164,6 +186,9 @@
 	--cffile=../rules-base --fnlog $LOGDIR/false_negatives_original \
 	--fplog $LOGDIR/false_positives_original > $LOGDIR/stats-set$SCORESET-original-full
 
+date
+echo "[ extracting new scores ]"
+
 # extract the new scores
 ./extract-new-scores
 cat $LOGDIR/scores-new >> ../../scores-set$SCORESET
@@ -188,4 +213,7 @@
 echo "##### WITHOUT NEW RULES AND SCORES #####" >> stats-set$SCORESET
 cat trunk-new-rules-set$SCORESET/masses/$LOGDIR/stats-set$SCORESET-original-full >> stats-set$SCORESET
 cat trunk-new-rules-set$SCORESET/masses/$LOGDIR/stats-set$SCORESET-original-test >> stats-set$SCORESET
+
+date
+echo "[ completed ]"