You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spamassassin.apache.org by jm...@apache.org on 2009/06/30 23:34:19 UTC

svn commit: r789964 - in /spamassassin/trunk/masses/bayes-testing/benchmark: ./ helper/db_file/ helper/mysql/ helper/pgsql/ tests/db_file/site/ tests/mysql/site/ tests/pgsql/site/ tests/sdbm/site/

Author: jm
Date: Tue Jun 30 21:34:19 2009
New Revision: 789964

URL: http://svn.apache.org/viewvc?rev=789964&view=rev
Log:
bug 6045: lots of fixes and cleanup for the Bayes benchmarking scripts, thanks to Michael Alan Dorman

Removed:
    spamassassin/trunk/masses/bayes-testing/benchmark/run-bench.driver
Modified:
    spamassassin/trunk/masses/bayes-testing/benchmark/helper/db_file/dbsize
    spamassassin/trunk/masses/bayes-testing/benchmark/helper/db_file/setup
    spamassassin/trunk/masses/bayes-testing/benchmark/helper/mysql/cleardb
    spamassassin/trunk/masses/bayes-testing/benchmark/helper/mysql/dbsize
    spamassassin/trunk/masses/bayes-testing/benchmark/helper/pgsql/cleardb
    spamassassin/trunk/masses/bayes-testing/benchmark/helper/pgsql/dbsize
    spamassassin/trunk/masses/bayes-testing/benchmark/run-bench
    spamassassin/trunk/masses/bayes-testing/benchmark/runmbox.pl
    spamassassin/trunk/masses/bayes-testing/benchmark/runmulti.pl
    spamassassin/trunk/masses/bayes-testing/benchmark/tests/db_file/site/init.pre
    spamassassin/trunk/masses/bayes-testing/benchmark/tests/mysql/site/init.pre
    spamassassin/trunk/masses/bayes-testing/benchmark/tests/pgsql/site/init.pre
    spamassassin/trunk/masses/bayes-testing/benchmark/tests/sdbm/site/init.pre

Modified: spamassassin/trunk/masses/bayes-testing/benchmark/helper/db_file/dbsize
URL: http://svn.apache.org/viewvc/spamassassin/trunk/masses/bayes-testing/benchmark/helper/db_file/dbsize?rev=789964&r1=789963&r2=789964&view=diff
==============================================================================
--- spamassassin/trunk/masses/bayes-testing/benchmark/helper/db_file/dbsize (original)
+++ spamassassin/trunk/masses/bayes-testing/benchmark/helper/db_file/dbsize Tue Jun 30 21:34:19 2009
@@ -1,6 +1,5 @@
 #!/bin/bash
 
 DBDIR=$1/dbdir
-
-echo "[Determining DB_File bayes database size - $DBDIR/bayes*]"
-ls -al $DBDIR/bayes*
+echo "[Determining DB_File bayes database size - $DBDIR]"
+du -sk $DBDIR

Modified: spamassassin/trunk/masses/bayes-testing/benchmark/helper/db_file/setup
URL: http://svn.apache.org/viewvc/spamassassin/trunk/masses/bayes-testing/benchmark/helper/db_file/setup?rev=789964&r1=789963&r2=789964&view=diff
==============================================================================
--- spamassassin/trunk/masses/bayes-testing/benchmark/helper/db_file/setup (original)
+++ spamassassin/trunk/masses/bayes-testing/benchmark/helper/db_file/setup Tue Jun 30 21:34:19 2009
@@ -2,6 +2,8 @@
 
 BAYESPATH=$1/dbdir/bayes
 
+mkdir -p $(dirname $BAYESPATH)
+
 echo "[Replacing BAYESPATH in $1/site/local.cf file]"
 sed -i -e "s:@@BAYESPATH@@:$BAYESPATH:" $1/site/local.cf
 

Modified: spamassassin/trunk/masses/bayes-testing/benchmark/helper/mysql/cleardb
URL: http://svn.apache.org/viewvc/spamassassin/trunk/masses/bayes-testing/benchmark/helper/mysql/cleardb?rev=789964&r1=789963&r2=789964&view=diff
==============================================================================
--- spamassassin/trunk/masses/bayes-testing/benchmark/helper/mysql/cleardb (original)
+++ spamassassin/trunk/masses/bayes-testing/benchmark/helper/mysql/cleardb Tue Jun 30 21:34:19 2009
@@ -3,4 +3,4 @@
 DIRNAME=`dirname $0`
 
 echo "[Removing bayes database]"
-/usr/local/mysql4.1/bin/mysql -usauser -psapasswd spamassassin < $DIRNAME/delete.sql
+mysql -usauser -psapasswd spamassassin < $DIRNAME/delete.sql

Modified: spamassassin/trunk/masses/bayes-testing/benchmark/helper/mysql/dbsize
URL: http://svn.apache.org/viewvc/spamassassin/trunk/masses/bayes-testing/benchmark/helper/mysql/dbsize?rev=789964&r1=789963&r2=789964&view=diff
==============================================================================
--- spamassassin/trunk/masses/bayes-testing/benchmark/helper/mysql/dbsize (original)
+++ spamassassin/trunk/masses/bayes-testing/benchmark/helper/mysql/dbsize Tue Jun 30 21:34:19 2009
@@ -3,4 +3,4 @@
 DIRNAME=`dirname $0`
 
 echo "[Determining tables size for MySQL tables]"
-/usr/local/mysql4.1/bin/mysql -usauser -psapasswd spamassassin < $DIRNAME/show_table_status.sql
+mysql -usauser -psapasswd spamassassin < $DIRNAME/show_table_status.sql

Modified: spamassassin/trunk/masses/bayes-testing/benchmark/helper/pgsql/cleardb
URL: http://svn.apache.org/viewvc/spamassassin/trunk/masses/bayes-testing/benchmark/helper/pgsql/cleardb?rev=789964&r1=789963&r2=789964&view=diff
==============================================================================
--- spamassassin/trunk/masses/bayes-testing/benchmark/helper/pgsql/cleardb (original)
+++ spamassassin/trunk/masses/bayes-testing/benchmark/helper/pgsql/cleardb Tue Jun 30 21:34:19 2009
@@ -3,4 +3,4 @@
 DIRNAME=`dirname $0`
 
 echo "[Removing bayes database]"
-/usr/local/pgsql/bin/psql -U sapostgres -f $DIRNAME/delete.sql spamassassin
+psql -U sapostgres -f $DIRNAME/delete.sql spamassassin

Modified: spamassassin/trunk/masses/bayes-testing/benchmark/helper/pgsql/dbsize
URL: http://svn.apache.org/viewvc/spamassassin/trunk/masses/bayes-testing/benchmark/helper/pgsql/dbsize?rev=789964&r1=789963&r2=789964&view=diff
==============================================================================
--- spamassassin/trunk/masses/bayes-testing/benchmark/helper/pgsql/dbsize (original)
+++ spamassassin/trunk/masses/bayes-testing/benchmark/helper/pgsql/dbsize Tue Jun 30 21:34:19 2009
@@ -3,4 +3,4 @@
 DIRNAME=`dirname $0`
 
 echo "[Determining tables size for pgsql tables]"
-/usr/local/pgsql/bin/psql -U sapostgres -f $DIRNAME/analyze.sql spamassassin
+psql -U sapostgres -f $DIRNAME/analyze.sql spamassassin

Modified: spamassassin/trunk/masses/bayes-testing/benchmark/run-bench
URL: http://svn.apache.org/viewvc/spamassassin/trunk/masses/bayes-testing/benchmark/run-bench?rev=789964&r1=789963&r2=789964&view=diff
==============================================================================
--- spamassassin/trunk/masses/bayes-testing/benchmark/run-bench (original)
+++ spamassassin/trunk/masses/bayes-testing/benchmark/run-bench Tue Jun 30 21:34:19 2009
@@ -2,16 +2,99 @@
 
 TESTNAME=$1
 RESULTNAME=$2
-PROGNAME=`which $0`
-DIRNAME=`dirname $PROGNAME`
 
-RESULTSPATH=$DIRNAME/results/$RESULTNAME
-TESTSPATH=$DIRNAME/tests/$TESTNAME
+cd $(dirname $0)
 
-mkdir -p $RESULTSPATH
+RESULTSPATH=$PWD/results/$RESULTNAME
+
+CORPUS=$PWD/corpus
+HELPERPATH=$PWD/helper/$TESTNAME
+TESTSPATH=$PWD/tests/$TESTNAME
+
+USERPREFS=$RESULTSPATH/user_prefs
+SITECONFIG=$RESULTSPATH/site
+CONFIGPATH=$RESULTSPATH/share
+DBPATH=$RESULTSPATH/dbdir/bayes
+
+# Uncomment this variable if you want to override the system wide configpath
+CONFIGPATHVALUE=--configpath=$CONFIGPATH
+
+runcmd () {
+    echo "$*"
+    #time env PERL5OPT=-d:NYTProf NYTPROF=trace=2; $*
+    time $*
+
+    echo "** DB STATS **"
+    /usr/bin/sa-learn $CONFIGPATHVALUE -p $USERPREFS --siteconfigpath=$SITECONFIG --dbpath $DBPATH --dump magic
+    $HELPERPATH/dbsize $RESULTSPATH
+}
 
+mkdir -p $RESULTSPATH
 cp -R $TESTSPATH/* $RESULTSPATH
+exec &> $RESULTSPATH/output.txt
+
+$HELPERPATH/setup $RESULTSPATH
+
+$HELPERPATH/cleardb $RESULTSPATH
+
+echo "[Running sa-learn --ham on hambucket1.mbox]"
+runcmd /usr/bin/sa-learn $CONFIGPATHVALUE -p $USERPREFS --siteconfigpath=$SITECONFIG --dbpath $DBPATH --ham --mbox $CORPUS/hambucket1.mbox
+
+echo "[Running sa-learn --spam on spambucket1.mbox]"
+runcmd /usr/bin/sa-learn $CONFIGPATHVALUE -p $USERPREFS --siteconfigpath=$SITECONFIG --dbpath $DBPATH --spam --mbox $CORPUS/spambucket1.mbox
+
+echo "[Backing Up Bayes Tokens]"
+runcmd /usr/bin/sa-learn $CONFIGPATHVALUE -p $USERPREFS --siteconfigpath=$SITECONFIG --dbpath $DBPATH --backup > $RESULTSPATH/backup1.txt
+
+echo "[Running Bayes force-expire]"
+runcmd /usr/bin/sa-learn $CONFIGPATHVALUE -p $USERPREFS --siteconfigpath=$SITECONFIG --dbpath $DBPATH --force-expire
+
+echo "[Removing old spamd.log file]"
+rm $RESULTSPATH/spamd.log
+
+echo "[Starting up spamd]"
+/usr/sbin/spamd -d -L $CONFIGPATHVALUE --siteconfigpath=$SITECONFIG -x --syslog=$RESULTSPATH/spamd.log --socketpath=/tmp/spamd.sock --pidfile=$RESULTSPATH/spamd.pid
+
+echo "[Sleeping a little to make sure spamd starts up]"
+sleep 10
+
+spamdpid=$(cat $RESULTSPATH/spamd.pid)
+
+trap "kill $spamdpid" ERR EXIT INT TERM
+
+echo "[Running Bucket 2]"
+#$PWD/runmbox.pl $CORPUS/hambucket2.mbox $CORPUS/spambucket2.mbox
+time $PWD/runmulti.pl $CORPUS/hambucket2.mbox $CORPUS/spambucket2.mbox $CORPUS/hambucket3.mbox $CORPUS/spambucket3.mbox
+
+echo "[Running Bayes sync]"
+runcmd /usr/bin/sa-learn $CONFIGPATHVALUE -p $USERPREFS --siteconfigpath=$SITECONFIG --dbpath $DBPATH --sync
+
+echo "[Backing Up Bayes Tokens]"
+runcmd /usr/bin/sa-learn $CONFIGPATHVALUE -p $USERPREFS --siteconfigpath=$SITECONFIG --dbpath $DBPATH --backup > $RESULTSPATH/backup2.txt
+
+echo "[Running Bayes force-expire]"
+runcmd /usr/bin/sa-learn $CONFIGPATHVALUE -p $USERPREFS --siteconfigpath=$SITECONFIG --dbpath $DBPATH --force-expire
+
+echo "[Backing Up Bayes Tokens]"
+runcmd /usr/bin/sa-learn $CONFIGPATHVALUE -p $USERPREFS --siteconfigpath=$SITECONFIG --dbpath $DBPATH --backup > $RESULTSPATH/backup3.txt
+
+echo "[Running sa-learn --forget on hamforget1.mbox]"
+runcmd /usr/bin/sa-learn $CONFIGPATHVALUE -p $USERPREFS --siteconfigpath=$SITECONFIG --dbpath $DBPATH --forget --mbox $CORPUS/hamforget1.mbox
+
+echo "[Running sa-learn --forget on spamforget1.mbox]"
+runcmd /usr/bin/sa-learn $CONFIGPATHVALUE -p $USERPREFS --siteconfigpath=$SITECONFIG --dbpath $DBPATH --forget --mbox $CORPUS/spamforget1.mbox
+
+echo "[Backing Up Bayes Tokens]"
+runcmd /usr/bin/sa-learn $CONFIGPATHVALUE -p $USERPREFS --siteconfigpath=$SITECONFIG --dbpath $DBPATH --backup > $RESULTSPATH/backup4.txt
+
+echo "[Running spamassassin on hambucket3.mbox]"
+runcmd /usr/bin/spamassassin -L $CONFIGPATHVALUE -p $USERPREFS --siteconfigpath=$SITECONFIG --mbox $CORPUS/hambucket4.mbox > /dev/null
 
-$DIRNAME/run-bench.driver $TESTNAME $RESULTSPATH > $RESULTSPATH/output.txt 2>&1
+echo "[Running spamassassin on spambucket3.mbox]"
+runcmd /usr/bin/spamassassin -L $CONFIGPATHVALUE -p $USERPREFS --siteconfigpath=$SITECONFIG --mbox $CORPUS/spambucket4.mbox > /dev/null
 
+echo "[Running Bayes sync]"
+runcmd /usr/bin/sa-learn $CONFIGPATHVALUE -p $USERPREFS --siteconfigpath=$SITECONFIG --dbpath $DBPATH --sync
 
+echo "[Backing Up Bayes Tokens]"
+runcmd /usr/bin/sa-learn $CONFIGPATHVALUE -p $USERPREFS --siteconfigpath=$SITECONFIG --dbpath $DBPATH --backup > $RESULTSPATH/backup5.txt

Modified: spamassassin/trunk/masses/bayes-testing/benchmark/runmbox.pl
URL: http://svn.apache.org/viewvc/spamassassin/trunk/masses/bayes-testing/benchmark/runmbox.pl?rev=789964&r1=789963&r2=789964&view=diff
==============================================================================
--- spamassassin/trunk/masses/bayes-testing/benchmark/runmbox.pl (original)
+++ spamassassin/trunk/masses/bayes-testing/benchmark/runmbox.pl Tue Jun 30 21:34:19 2009
@@ -1,26 +1,22 @@
 #!/usr/bin/perl
 
-use Mail::Box::Manager;
-use File::Basename;
+use strict;
+use warnings;
+use Mail::SpamAssassin::ArchiveIterator;
+
+my $iterator = Mail::SpamAssassin::ArchiveIterator->new ({wanted_sub => \&wanted, result_sub => sub {}});
+my @folders = map {"ham:mbox:$_"} @ARGV;
+eval { $iterator->run(@folders); };
+if ($@) { die $@ unless ($@ =~ /HITLIMIT/); }
+
+sub wanted {
+    my($class, $filename, $recv_date, $msg_array) = @_;
+
+    open MAILOUT, "|/usr/bin/spamc -y -U /tmp/spamd.sock >> /dev/null" or die "Unable to open pipe: $!\n";
+    for (@{$msg_array}) {
+        print MAILOUT;
+    }
+    close MAILOUT;
 
-my $foldername = shift;
-
-my $folderbasename = basename($foldername);
-
-my $mgr = Mail::Box::Manager->new;
-my $folder = $mgr->open(folder => $foldername,
-			access => 'r');
-my $nummsg = $folder->messages;
-
-my $count = 0;
-
-while ($count < $nummsg) {
-  my $msg = $folder->message($count);
-
-  open MAILOUT, "|/usr/bin/spamc -y -U /tmp/spamd.sock >> /dev/null" or
-#  open MAILOUT, "|/usr/bin/spamc -U /tmp/spamd.sock >> $folderbasename.output" or
-    die "Unable to open pipe: $!\n";
-  $msg->print(\*MAILOUT);
-  close MAILOUT;
-  $count++;
+    return 1;
 }

Modified: spamassassin/trunk/masses/bayes-testing/benchmark/runmulti.pl
URL: http://svn.apache.org/viewvc/spamassassin/trunk/masses/bayes-testing/benchmark/runmulti.pl?rev=789964&r1=789963&r2=789964&view=diff
==============================================================================
--- spamassassin/trunk/masses/bayes-testing/benchmark/runmulti.pl (original)
+++ spamassassin/trunk/masses/bayes-testing/benchmark/runmulti.pl Tue Jun 30 21:34:19 2009
@@ -1,23 +1,17 @@
 #!/usr/bin/perl -w
 
 use strict;
-
 use Proc::Background;
 
-my $hamfoldername = shift;
-my $spamfoldername = shift;
+my %procs;
 
-my $command = "./runmbox.pl";
+for my $folder (@ARGV) {
+    $procs{$folder} = Proc::Background->new("./runmbox.pl", $folder);
+}
 
-my $proc1 = Proc::Background->new($command, $hamfoldername);
-my $proc2 = Proc::Background->new($command, $spamfoldername);
+sleep 1 while (grep {$procs{$_}->alive} keys %procs);
 
-while ($proc1->alive() || $proc2->alive()) {
-    sleep 1;
+for my $folder (keys %procs) {
+    my $time = $procs{$folder}->end_time - $procs{$folder}->start_time;
+    print STDERR "$folder: $time\n";
 }
-my $time1 = $proc1->start_time;
-my $time2 = $proc1->end_time;
-my $time3 = $proc2->start_time;
-my $time4 = $proc2->end_time;
-print STDERR "Proc1: $time1 -- $time2\n";
-print STDERR "Proc2: $time3 -- $time4\n";

Modified: spamassassin/trunk/masses/bayes-testing/benchmark/tests/db_file/site/init.pre
URL: http://svn.apache.org/viewvc/spamassassin/trunk/masses/bayes-testing/benchmark/tests/db_file/site/init.pre?rev=789964&r1=789963&r2=789964&view=diff
==============================================================================
--- spamassassin/trunk/masses/bayes-testing/benchmark/tests/db_file/site/init.pre (original)
+++ spamassassin/trunk/masses/bayes-testing/benchmark/tests/db_file/site/init.pre Tue Jun 30 21:34:19 2009
@@ -1,3 +1,6 @@
 # AutoLearnThreshold - threshold-based discriminator for Bayes auto-learning
 #
+loadplugin Mail::SpamAssassin::Plugin::Check
+loadplugin Mail::SpamAssassin::Plugin::Bayes
+loadplugin Mail::SpamAssassin::Plugin::AWL
 loadplugin Mail::SpamAssassin::Plugin::AutoLearnThreshold

Modified: spamassassin/trunk/masses/bayes-testing/benchmark/tests/mysql/site/init.pre
URL: http://svn.apache.org/viewvc/spamassassin/trunk/masses/bayes-testing/benchmark/tests/mysql/site/init.pre?rev=789964&r1=789963&r2=789964&view=diff
==============================================================================
--- spamassassin/trunk/masses/bayes-testing/benchmark/tests/mysql/site/init.pre (original)
+++ spamassassin/trunk/masses/bayes-testing/benchmark/tests/mysql/site/init.pre Tue Jun 30 21:34:19 2009
@@ -1,3 +1,6 @@
 # AutoLearnThreshold - threshold-based discriminator for Bayes auto-learning
 #
+loadplugin Mail::SpamAssassin::Plugin::Check
+loadplugin Mail::SpamAssassin::Plugin::Bayes
+loadplugin Mail::SpamAssassin::Plugin::AWL
 loadplugin Mail::SpamAssassin::Plugin::AutoLearnThreshold

Modified: spamassassin/trunk/masses/bayes-testing/benchmark/tests/pgsql/site/init.pre
URL: http://svn.apache.org/viewvc/spamassassin/trunk/masses/bayes-testing/benchmark/tests/pgsql/site/init.pre?rev=789964&r1=789963&r2=789964&view=diff
==============================================================================
--- spamassassin/trunk/masses/bayes-testing/benchmark/tests/pgsql/site/init.pre (original)
+++ spamassassin/trunk/masses/bayes-testing/benchmark/tests/pgsql/site/init.pre Tue Jun 30 21:34:19 2009
@@ -1,3 +1,6 @@
 # AutoLearnThreshold - threshold-based discriminator for Bayes auto-learning
 #
+loadplugin Mail::SpamAssassin::Plugin::Check
+loadplugin Mail::SpamAssassin::Plugin::Bayes
+loadplugin Mail::SpamAssassin::Plugin::AWL
 loadplugin Mail::SpamAssassin::Plugin::AutoLearnThreshold

Modified: spamassassin/trunk/masses/bayes-testing/benchmark/tests/sdbm/site/init.pre
URL: http://svn.apache.org/viewvc/spamassassin/trunk/masses/bayes-testing/benchmark/tests/sdbm/site/init.pre?rev=789964&r1=789963&r2=789964&view=diff
==============================================================================
--- spamassassin/trunk/masses/bayes-testing/benchmark/tests/sdbm/site/init.pre (original)
+++ spamassassin/trunk/masses/bayes-testing/benchmark/tests/sdbm/site/init.pre Tue Jun 30 21:34:19 2009
@@ -1,3 +1,6 @@
 # AutoLearnThreshold - threshold-based discriminator for Bayes auto-learning
 #
+loadplugin Mail::SpamAssassin::Plugin::Check
+loadplugin Mail::SpamAssassin::Plugin::Bayes
+loadplugin Mail::SpamAssassin::Plugin::AWL
 loadplugin Mail::SpamAssassin::Plugin::AutoLearnThreshold