You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spamassassin.apache.org by fe...@apache.org on 2004/01/28 01:57:40 UTC

svn commit: rev 6331 - incubator/spamassassin/trunk/contrib

Author: felicity
Date: Tue Jan 27 16:57:39 2004
New Revision: 6331

Modified:
   incubator/spamassassin/trunk/contrib/mbox-to-check
   incubator/spamassassin/trunk/contrib/run-corpora
Log:
updates for the corpora run scripts

Modified: incubator/spamassassin/trunk/contrib/mbox-to-check
==============================================================================
--- incubator/spamassassin/trunk/contrib/mbox-to-check	(original)
+++ incubator/spamassassin/trunk/contrib/mbox-to-check	Tue Jan 27 16:57:39 2004
@@ -17,9 +17,10 @@
 *dir    = *File::Find::dir;
 *prune  = *File::Find::prune;
 
+my @dirs = @ARGV || ( 'ham', 'spam' );
 
 # Traverse desired filesystems
-File::Find::find({wanted => \&wanted}, 'ham', 'spam');
+File::Find::find({wanted => \&wanted}, @dirs);
 exit;
 
 

Modified: incubator/spamassassin/trunk/contrib/run-corpora
==============================================================================
--- incubator/spamassassin/trunk/contrib/run-corpora	(original)
+++ incubator/spamassassin/trunk/contrib/run-corpora	Tue Jan 27 16:57:39 2004
@@ -8,17 +8,21 @@
 # By default, it'll do a set0 run, but you can change that by adding
 # --net or --bayes to the commandline.
 #
-# --net by itself will automatically try running 6 mass-checks in parallel
+# --net by itself will automatically try running 4 mass-checks in parallel
 #
 
 CORPUS=/home/felicity/SA/corpus
 SA_VER=/home/felicity/SA/spamassassin-corpora
-export RSYNC_PASSWORD="your_rsync_password"
+SVN=/usr/local/bin/svn
+SVNVERS=/usr/local/bin/svnversion
 
 NET=0
 BAYES=0
-OPTS="--progress"
-FILENAME="your_rsync_username"
+OPTS="--progress --after=-2592000"
+RSYNC_USER=your_rsync_username
+RSYNC_PASSWORD="your_rsync_password"; export RSYNC_PASSWORD
+VERS=nightly
+FILENAME=$RSYNC_USER
 
 while [ ! -z "$1" ]; do
   if [ "$1" = "--net" ]; then
@@ -32,10 +36,11 @@
 if [ $NET -eq 1 ]; then
   FILENAME="net-$FILENAME"
   OPTS="$OPTS --net"
+  VERS=weekly
 
   # We want to do this with more parallelization, but not if Bayes is also running ...
   if [ $BAYES -eq 0 ]; then
-    OPTS="$OPTS -j 6"
+    OPTS="$OPTS -j 4 --restart 1000"
   fi
 fi
 if [ $BAYES -eq 1 ]; then
@@ -47,11 +52,29 @@
 echo "[Updating $SA_VER]"
 cd $SA_VER
 COUNT=0
-while ! cvs -q up; do
+while ! wget -q -nd -m http://rsync.spamassassin.org/$VERS-versions.txt ; do
   sleep 60
   COUNT=`expr $COUNT + 1`
   if [ $COUNT -gt 5 ]; then
-    echo "Couldn't do a CVS update, aborting!" >&2
+    echo "Couldn't get the nightly revision version, aborting!" >&2
+    exit 2
+  fi
+done
+
+CREV=`$SVNVERS`
+NREV=`tail -1 nightly-versions.txt | awk '{print $2}'`
+
+if [ $CREV -ge $NREV ]; then
+  echo "Current rev ($CREV) newer or equal to nightly rev ($NREV)"
+  exit 0
+fi
+
+COUNT=0
+while ! $SVN update -r $NREV; do
+  sleep 60
+  COUNT=`expr $COUNT + 1`
+  if [ $COUNT -gt 5 ]; then
+    echo "Couldn't do a SVN update, aborting!" >&2
     exit 2
   fi
 done
@@ -74,13 +97,14 @@
 	exit 1
 fi
 
-mv -f ham.log ham-$FILENAME.log
-mv -f spam.log spam-$FILENAME.log
-mv -f results.log results-$FILENAME.log
+mv -f ham.log results/ham-$FILENAME.log
+mv -f spam.log results/spam-$FILENAME.log
+mv -f results.log results/hf/results-$FILENAME.log
 
+cd results
 # now we have our ham.log and spam.log files...
 echo "[Uploading daily corpus logs]"
-rsync -qCPcvuzb *-$FILENAME.log $FILENAME@rsync.spamassassin.org::corpus/
+rsync -qCPcvuzb *-$FILENAME $RSYNC_USER@rsync.spamassassin.org::corpus/
 
 echo "[Our results]"
-cat results-$FILENAME.log
+cat hf/results-$FILENAME.log