You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spamassassin.apache.org by jm...@apache.org on 2008/01/18 10:58:08 UTC

svn commit: r613117 [1/2] - in /spamassassin/branches/bug-5293-pluginized-bayes: ./ build/ build/automc/ build/nightlymc/ lib/Mail/SpamAssassin/ lib/Mail/SpamAssassin/BayesStore/ lib/Mail/SpamAssassin/Conf/ lib/Mail/SpamAssassin/Message/ lib/Mail/SpamA...

Author: jm
Date: Fri Jan 18 01:57:53 2008
New Revision: 613117

URL: http://svn.apache.org/viewvc?rev=613117&view=rev
Log:
merged latest changes from trunk using: 'svn merge -r 602889:613114 https://svn.apache.org/repos/asf/spamassassin/trunk'

Added:
    spamassassin/branches/bug-5293-pluginized-bayes/masses/rule-qa/nightly-slave-stop
      - copied unchanged from r613114, spamassassin/trunk/masses/rule-qa/nightly-slave-stop
    spamassassin/branches/bug-5293-pluginized-bayes/masses/rule-qa/nightly-slaves-stop
      - copied unchanged from r613114, spamassassin/trunk/masses/rule-qa/nightly-slaves-stop
Modified:
    spamassassin/branches/bug-5293-pluginized-bayes/MANIFEST.SKIP
    spamassassin/branches/bug-5293-pluginized-bayes/Makefile.PL
    spamassassin/branches/bug-5293-pluginized-bayes/build/README
    spamassassin/branches/bug-5293-pluginized-bayes/build/automc/etc-init.d-freqsd
    spamassassin/branches/bug-5293-pluginized-bayes/build/automc/freqsd
    spamassassin/branches/bug-5293-pluginized-bayes/build/automc/freqsd-infrequent
    spamassassin/branches/bug-5293-pluginized-bayes/build/automc/gzip_old_ruleqa_data
    spamassassin/branches/bug-5293-pluginized-bayes/build/nightlymc/corpus.doc
    spamassassin/branches/bug-5293-pluginized-bayes/build/nightlymc/corpus.fredt
    spamassassin/branches/bug-5293-pluginized-bayes/build/nightlymc/corpus.jm
    spamassassin/branches/bug-5293-pluginized-bayes/build/nightlymc/corpus.zmi
    spamassassin/branches/bug-5293-pluginized-bayes/build/update_devel
    spamassassin/branches/bug-5293-pluginized-bayes/build/update_stable
    spamassassin/branches/bug-5293-pluginized-bayes/lib/Mail/SpamAssassin/AsyncLoop.pm
    spamassassin/branches/bug-5293-pluginized-bayes/lib/Mail/SpamAssassin/BayesStore/DBM.pm
    spamassassin/branches/bug-5293-pluginized-bayes/lib/Mail/SpamAssassin/Conf/Parser.pm
    spamassassin/branches/bug-5293-pluginized-bayes/lib/Mail/SpamAssassin/DBBasedAddrList.pm
    spamassassin/branches/bug-5293-pluginized-bayes/lib/Mail/SpamAssassin/Dns.pm
    spamassassin/branches/bug-5293-pluginized-bayes/lib/Mail/SpamAssassin/HTML.pm
    spamassassin/branches/bug-5293-pluginized-bayes/lib/Mail/SpamAssassin/Message/Metadata/Received.pm
    spamassassin/branches/bug-5293-pluginized-bayes/lib/Mail/SpamAssassin/Message/Node.pm
    spamassassin/branches/bug-5293-pluginized-bayes/lib/Mail/SpamAssassin/Plugin/BodyRuleBaseExtractor.pm
    spamassassin/branches/bug-5293-pluginized-bayes/lib/Mail/SpamAssassin/Plugin/DCC.pm
    spamassassin/branches/bug-5293-pluginized-bayes/lib/Mail/SpamAssassin/Plugin/DKIM.pm
    spamassassin/branches/bug-5293-pluginized-bayes/lib/Mail/SpamAssassin/Plugin/HeaderEval.pm
    spamassassin/branches/bug-5293-pluginized-bayes/masses/hit-frequencies
    spamassassin/branches/bug-5293-pluginized-bayes/masses/mass-check
    spamassassin/branches/bug-5293-pluginized-bayes/masses/rule-qa/automc/ruleqa.cgi
    spamassassin/branches/bug-5293-pluginized-bayes/masses/rule-qa/automc/ruleqa.css
    spamassassin/branches/bug-5293-pluginized-bayes/masses/rule-qa/corpus-hourly
    spamassassin/branches/bug-5293-pluginized-bayes/masses/rule-qa/corpus-nightly
    spamassassin/branches/bug-5293-pluginized-bayes/masses/rule-qa/corpus-nightly.post-svn
    spamassassin/branches/bug-5293-pluginized-bayes/masses/rule-qa/nightly-slave-start
    spamassassin/branches/bug-5293-pluginized-bayes/rules/20_ratware.cf
    spamassassin/branches/bug-5293-pluginized-bayes/rules/50_scores.cf
    spamassassin/branches/bug-5293-pluginized-bayes/rules/active.list
    spamassassin/branches/bug-5293-pluginized-bayes/rules/sa-update-pubkey.txt
    spamassassin/branches/bug-5293-pluginized-bayes/sa-compile.raw
    spamassassin/branches/bug-5293-pluginized-bayes/sa-learn.raw
    spamassassin/branches/bug-5293-pluginized-bayes/sa-update.raw
    spamassassin/branches/bug-5293-pluginized-bayes/t/basic_meta.t
    spamassassin/branches/bug-5293-pluginized-bayes/t/data/spam/gtubedcc.eml
    spamassassin/branches/bug-5293-pluginized-bayes/t/dkim.t
    spamassassin/branches/bug-5293-pluginized-bayes/t/razor2.t

Modified: spamassassin/branches/bug-5293-pluginized-bayes/MANIFEST.SKIP
URL: http://svn.apache.org/viewvc/spamassassin/branches/bug-5293-pluginized-bayes/MANIFEST.SKIP?rev=613117&r1=613116&r2=613117&view=diff
==============================================================================
--- spamassassin/branches/bug-5293-pluginized-bayes/MANIFEST.SKIP (original)
+++ spamassassin/branches/bug-5293-pluginized-bayes/MANIFEST.SKIP Fri Jan 18 01:57:53 2008
@@ -103,3 +103,4 @@
 ^masses/rule-dev/seek-phrases-in-corpus$
 ^build/announcements/.*.txt$
 t/mass_check.t
+^build/backup

Modified: spamassassin/branches/bug-5293-pluginized-bayes/Makefile.PL
URL: http://svn.apache.org/viewvc/spamassassin/branches/bug-5293-pluginized-bayes/Makefile.PL?rev=613117&r1=613116&r2=613117&view=diff
==============================================================================
--- spamassassin/branches/bug-5293-pluginized-bayes/Makefile.PL (original)
+++ spamassassin/branches/bug-5293-pluginized-bayes/Makefile.PL Fri Jan 18 01:57:53 2008
@@ -5,7 +5,18 @@
 use warnings;
 use Config;
 
-use ExtUtils::MakeMaker 5.45;
+use ExtUtils::MakeMaker;
+
+# Store the version for later use
+my $mm_version = $ExtUtils::MakeMaker::VERSION;
+
+# avoid stupid 'Argument "6.30_01" isn't numeric in numeric ge (>=)' warnings;
+# strip off the beta subversion noise that causes the trouble.
+$mm_version =~ s/_\S+$//;      # "6.30_01" => "6.30"
+
+if ($mm_version+0 < 5.45) {
+  die "SpamAssassin Makefile.PL requires at least ExtUtils::MakeMaker v5.45";
+}
 
 use constant RUNNING_ON_WINDOWS => ($^O =~ /^(mswin|dos|os2)/oi);
 use constant HAS_DBI => eval { require DBI; };
@@ -120,15 +131,11 @@
 
 # Gather some information about what EU::MM offers and/or needs
 my(
-  $mm_version,
   $mm_knows_destdir,
   $mm_has_destdir,
   $mm_has_good_destdir,
   $mm_needs_destdir,
 );
-
-# Store the version for later use
-$mm_version          = $ExtUtils::MakeMaker::VERSION;
 
 # MakeMaker prior to 6.11 doesn't support DESTDIR which is needed for
 # packaging with builddir!=destdir. See bug 2388.

Modified: spamassassin/branches/bug-5293-pluginized-bayes/build/README
URL: http://svn.apache.org/viewvc/spamassassin/branches/bug-5293-pluginized-bayes/build/README?rev=613117&r1=613116&r2=613117&view=diff
==============================================================================
--- spamassassin/branches/bug-5293-pluginized-bayes/build/README (original)
+++ spamassassin/branches/bug-5293-pluginized-bayes/build/README Fri Jan 18 01:57:53 2008
@@ -123,7 +123,7 @@
     svn commit -m "preparing to release X.Y.Z"
 
   (If you are privately preparing a security release, and don't wish to
-  perform commits to public SVN repos, you can omit this step until
+  perform commits to public SVN repos, you can defer this step until
   later.)
 
 - SVN tag the release files.  This is done using "svn copy".
@@ -156,7 +156,7 @@
   base of the tag release.
 
   (If you are privately preparing a security release, and don't wish to
-  perform commits to public SVN repos, you can omit this step until
+  perform commits to public SVN repos, you can defer this step until
   later.)
 
 
@@ -183,24 +183,12 @@
 
 - take a copy of the MD5sum line output.
 
-- by default, they're written to ~/site/released/ .
-  Copy them to wherever you want, yourself:
-
-    mkdir -p ~/public_html/devel
-    mv ~/site/released/Mail-SpamAssassin-3.2.0-rc2.* ~/public_html/devel
-
-  (For me, "~/public_html/devel" is http://people.apache.org/~jm/devel/ .)
-
 - test the tar.gz and zip files!  redo until they work!! ;)
 
 - Write the release announcement mail!  This is a simple matter of copying
   the previous release's announcement, updating the version numbers and
   links, fixing the MD5 and SHA1 checksums in this mail, and summarising
-  the important changes from the Changes file.  
-  
-  Warning: you must send this using a "From:" address @apache.org,
-  otherwise it will be silently discarded at the ASF instead of being
-  delivered to the announce list.
+  the important changes from the Changes file.
 
     cp build/announcements/3.1.7.txt build/announcements/3.1.8.txt
     svn add !$
@@ -211,7 +199,14 @@
   of the website, but included in the vote mail) and request a vote on the
   development mailing list to make the release.  Post the URL,
   md5sums/sha1sums, and proposed release announcement mail to the dev
-  list. 
+  list.  The default location -- ~/public_html/devel/ , or
+  http://people.apache.org/~jm/devel/ , qualifies as "discreet".
+  
+  While doing this, also upload a copy of the proposed release
+  announcement to the website:
+
+    version=3.2.4
+    cp build/announcements/$version.txt ~/public_html/devel/PROPOSED-$version.txt
 
   Pre-releases and RCs require just lazy consensus -- ie. no objections.
 
@@ -274,12 +269,18 @@
   make it clear that this is an unofficial "test build" by placing it
   in your public_html dir:
 
-        http://people.apache.org/~jm/devel/
+        http://people.apache.org/~jm/rc/
+
+  this command will do it:
+
+        version=X.Y.Z
+        cp -p ~/public_html/devel/Mail-SpamAssassin-$version.* \
+            ~/public_html/rc/
 
   (for full release builds) copy the tarballs to www.apache.org/dist:
 
         version=X.Y.Z
-        cp -p ~/site/released/Mail-SpamAssassin-$version.* \
+        cp -p ~/public_html/devel/Mail-SpamAssassin-$version.* \
             /www/www.apache.org/dist/spamassassin/source
 
         cd /www/www.apache.org/dist/spamassassin
@@ -295,7 +296,7 @@
 
         cd /www/www.apache.org/dist/spamassassin
         prev=X.Y.notZ
-        rm -f source/Mail-SpamAssassin-$prev.*
+        rm -f source/Mail-SpamAssassin-$prev.* Mail-SpamAssassin-$prev.*
         rm -f binaries/*/Mail-SpamAssassin-$prev.*
 
   (Archive copies are automatically kept on archive.apache.org/dist/ .)
@@ -312,6 +313,7 @@
 
 - rebuild the SpamAssassin website with webmake:
 
+        PATH=$HOME/sabuildtools/perl584/bin:$HOME/sabuildtools/bin:$PATH
         cd /www/spamassassin.apache.org
         webmake -F
 
@@ -341,10 +343,8 @@
 - Release a new rules update matching the released code:
 
     ssh spamassassin.zones.apache.org
-    sudo -H -u updatesd \
-        /home/updatesd/svn/spamassassin/build/mkupdates/run_nightly
-    sudo -H -u updatesd \
-        /home/updatesd/svn/spamassassin/build/mkupdates/run_part2
+    cd /home/updatesd/svn/spamassassin/build/mkupdates
+    sudo -H -u updatesd ./update-rules 3.2
 
 - update the tag used to point to "current release":
 
@@ -394,5 +394,6 @@
 - Add the new version to the Bugzilla versions list:
 
         http://issues.apache.org/SpamAssassin/editversions.cgi?product=Spamassassin&action=add
+
 
 // vim:tw=74:

Modified: spamassassin/branches/bug-5293-pluginized-bayes/build/automc/etc-init.d-freqsd
URL: http://svn.apache.org/viewvc/spamassassin/branches/bug-5293-pluginized-bayes/build/automc/etc-init.d-freqsd?rev=613117&r1=613116&r2=613117&view=diff
==============================================================================
--- spamassassin/branches/bug-5293-pluginized-bayes/build/automc/etc-init.d-freqsd (original)
+++ spamassassin/branches/bug-5293-pluginized-bayes/build/automc/etc-init.d-freqsd Fri Jan 18 01:57:53 2008
@@ -1,22 +1,23 @@
 #!/bin/sh
+# 
+# symlinked from 
+# lrwxrwxrwx   1 root     root          60 Dec  2 10:03 /etc/init.d/freqsd -> /home/automc/svn/spamassassin/build/automc/etc-init.d-freqsd*
 
 . /etc/profile
 prog=freqsd
 
-HOME=/home/automc
+HOME=/export/home/automc
 export HOME
 
 start() {
     echo "Starting $prog: " 
 
-    cd /export/home/automc/svn/spamassassin
-    rm /export/home/automc/freqsd/log
+    cd $HOME/svn/spamassassin
+    rm $HOME/freqsd/log.1
+    mv $HOME/freqsd/log $HOME/freqsd/log.1
 
-    HOME=/export/home/automc
-    export HOME
-
-    su automc -c "./build/automc/freqsd -pidfile /export/home/automc/freqsd/pid" \
-	> /export/home/automc/freqsd/log 2>&1 \
+    su automc -c "./build/automc/freqsd -pidfile $HOME/freqsd/pid" \
+	> $HOME/freqsd/log 2>&1 \
 	< /dev/null &
 
     echo
@@ -24,11 +25,9 @@
 }
 
 stop() {
-    cd /export/home/automc/svn/spamassassin
-    su automc -c "./build/automc/freqsd -pidfile /export/home/automc/freqsd/pid -kill"
+    cd $HOME/svn/spamassassin
+    su automc -c "./build/automc/freqsd -pidfile $HOME/freqsd/pid -kill"
 
-    # sleep 1
-    # pkill -15 -f automc/freqsd            # just in case
     sleep 1
     pkill -15 -u automc
 }

Modified: spamassassin/branches/bug-5293-pluginized-bayes/build/automc/freqsd
URL: http://svn.apache.org/viewvc/spamassassin/branches/bug-5293-pluginized-bayes/build/automc/freqsd?rev=613117&r1=613116&r2=613117&view=diff
==============================================================================
--- spamassassin/branches/bug-5293-pluginized-bayes/build/automc/freqsd (original)
+++ spamassassin/branches/bug-5293-pluginized-bayes/build/automc/freqsd Fri Jan 18 01:57:53 2008
@@ -196,7 +196,7 @@
   system ($cmd);
 
   if (!$ignoreexit) {
-    die "command '$cmd' failed with status $?" if (($? >> 8) != 0);
+    warn "command '$cmd' failed with status $?" if (($? >> 8) != 0);
   }
 }
 

Modified: spamassassin/branches/bug-5293-pluginized-bayes/build/automc/freqsd-infrequent
URL: http://svn.apache.org/viewvc/spamassassin/branches/bug-5293-pluginized-bayes/build/automc/freqsd-infrequent?rev=613117&r1=613116&r2=613117&view=diff
==============================================================================
--- spamassassin/branches/bug-5293-pluginized-bayes/build/automc/freqsd-infrequent (original)
+++ spamassassin/branches/bug-5293-pluginized-bayes/build/automc/freqsd-infrequent Fri Jan 18 01:57:53 2008
@@ -1,14 +1,7 @@
 #!/bin/sh
 
-BBMHOME=/export/home/bbmass
-
-# clean up our temporary dir
-cd $BBMHOME/tmp
-find . -type d -mtime +7 -print | xargs rm -rf
-
 # clean out old copies of mass-check logs
 # 180 days = ~6 months
 cd /home/automc/corpus/html
-find . -mtime +180 -name '*.log.gz' > $BBMHOME/tmp/o
-xargs rm -f < $BBMHOME/tmp/o
+find . -mtime +180 -name '*.log.gz' | xargs rm -f
 

Modified: spamassassin/branches/bug-5293-pluginized-bayes/build/automc/gzip_old_ruleqa_data
URL: http://svn.apache.org/viewvc/spamassassin/branches/bug-5293-pluginized-bayes/build/automc/gzip_old_ruleqa_data?rev=613117&r1=613116&r2=613117&view=diff
==============================================================================
--- spamassassin/branches/bug-5293-pluginized-bayes/build/automc/gzip_old_ruleqa_data (original)
+++ spamassassin/branches/bug-5293-pluginized-bayes/build/automc/gzip_old_ruleqa_data Fri Jan 18 01:57:53 2008
@@ -1,7 +1,13 @@
 #!/bin/sh
 
+# gzip old files
 cd /export/home/automc/corpus/html
-exec find . -mtime +200 -name '*.???' -print | \
+find . -mtime +200 -name '*.???' -print | \
         egrep '(age|all|new)$' | \
         xargs gzip -9
+
+# clean up a temporary dir; these dirs are used for interchange
+# of log results between preflight mass-check mc-fast and freqsd
+cd /home/bbmass/tmp
+find . -type d -mtime +7 -print | xargs rm -rf
 

Modified: spamassassin/branches/bug-5293-pluginized-bayes/build/nightlymc/corpus.doc
URL: http://svn.apache.org/viewvc/spamassassin/branches/bug-5293-pluginized-bayes/build/nightlymc/corpus.doc?rev=613117&r1=613116&r2=613117&view=diff
==============================================================================
--- spamassassin/branches/bug-5293-pluginized-bayes/build/nightlymc/corpus.doc (original)
+++ spamassassin/branches/bug-5293-pluginized-bayes/build/nightlymc/corpus.doc Fri Jan 18 01:57:53 2008
@@ -1,11 +1,11 @@
-opts_weekly="--net --cache --cachedir=/tmpfs/aicache_nightly --cs_schedule_cache --cs_cachedir=/export/home/bbmass/cache --restart=500 --after="15552000" --tail=15000 --net -j 8 -f /home/bbmass/mc-nightly/targets.doc"
-opts_nightly=" --cache --cachedir=/tmpfs/aicache_nightly --cs_schedule_cache --cs_cachedir=/export/home/bbmass/cache --restart=500 --after="15552000" --tail=15000 -f /home/bbmass/mc-nightly/targets.doc"
+opts_weekly="--net -j 8 --reuse --cache --cachedir=/tmpfs/aicache_nightly --cs_schedule_cache --cs_cachedir=/export/home/bbmass/cache --restart=500 ham:detect:/export/home/bbmass/rawcor/doc/ham/* --after="15552000" --tail=25000 spam:detect:/export/home/bbmass/rawcor/doc/spam/*"
+opts_nightly=" --reuse --cache --cachedir=/tmpfs/aicache_nightly --cs_schedule_cache --cs_cachedir=/export/home/bbmass/cache --restart=500 ham:detect:/export/home/bbmass/rawcor/doc/ham/* --after="15552000" --tail=25000 spam:detect:/export/home/bbmass/rawcor/doc/spam/*"
 tmp=$HOME/tmp
 tree=$HOME/svn
 prefs_weekly=$HOME/user_prefs.weekly
 prefs_nightly=$HOME/user_prefs.nightly
 username=bb-doc
 password=__RSYNC_PASSWORD__
-serverhost=spamassassin.zones.apache.org:38892
+serverhost=spamassassin.zones.apache.org.:38892
 clienthosts=__CLIENTHOSTS__
 clienttree=nightlymc_doc

Modified: spamassassin/branches/bug-5293-pluginized-bayes/build/nightlymc/corpus.fredt
URL: http://svn.apache.org/viewvc/spamassassin/branches/bug-5293-pluginized-bayes/build/nightlymc/corpus.fredt?rev=613117&r1=613116&r2=613117&view=diff
==============================================================================
--- spamassassin/branches/bug-5293-pluginized-bayes/build/nightlymc/corpus.fredt (original)
+++ spamassassin/branches/bug-5293-pluginized-bayes/build/nightlymc/corpus.fredt Fri Jan 18 01:57:53 2008
@@ -1,11 +1,11 @@
-opts_weekly="--net --cache --cachedir=/tmpfs/aicache_nightly --cs_schedule_cache --cs_cachedir=/export/home/bbmass/cache --restart=500 --after="15552000" --tail=15000 --net -j 8 -f /home/bbmass/mc-nightly/targets.fredt"
-opts_nightly="--cache --cachedir=/tmpfs/aicache_nightly --cs_schedule_cache --cs_cachedir=/export/home/bbmass/cache --restart=500 --after="15552000" --tail=15000 -f /home/bbmass/mc-nightly/targets.fredt"
+opts_weekly="--net -j 8 --reuse --cache --cachedir=/tmpfs/aicache_nightly --cs_schedule_cache --cs_cachedir=/export/home/bbmass/cache --restart=500 ham:detect:/export/home/bbmass/rawcor/fredt/ham/* --after="15552000" --tail=25000 spam:detect:/export/home/bbmass/rawcor/fredt/spam/*"
+opts_nightly=" --reuse --cache --cachedir=/tmpfs/aicache_nightly --cs_schedule_cache --cs_cachedir=/export/home/bbmass/cache --restart=500 ham:detect:/export/home/bbmass/rawcor/fredt/ham/* --after="15552000" --tail=25000 spam:detect:/export/home/bbmass/rawcor/fredt/spam/*"
 tmp=$HOME/tmp
 tree=$HOME/svn
 prefs_weekly=$HOME/user_prefs.weekly
 prefs_nightly=$HOME/user_prefs.nightly
 username=bb-fredt
 password=__RSYNC_PASSWORD__
-serverhost=spamassassin.zones.apache.org:38893
+serverhost=spamassassin.zones.apache.org.:38893
 clienthosts=__CLIENTHOSTS__
 clienttree=nightlymc_fredt

Modified: spamassassin/branches/bug-5293-pluginized-bayes/build/nightlymc/corpus.jm
URL: http://svn.apache.org/viewvc/spamassassin/branches/bug-5293-pluginized-bayes/build/nightlymc/corpus.jm?rev=613117&r1=613116&r2=613117&view=diff
==============================================================================
--- spamassassin/branches/bug-5293-pluginized-bayes/build/nightlymc/corpus.jm (original)
+++ spamassassin/branches/bug-5293-pluginized-bayes/build/nightlymc/corpus.jm Fri Jan 18 01:57:53 2008
@@ -1,11 +1,11 @@
-opts_weekly="--net --cache --cachedir=/tmpfs/aicache_nightly --cs_schedule_cache --cs_cachedir=/export/home/bbmass/cache --restart=500 ham:detect:/home/bbmass/rawcor/jm/ham/* --after="15552000" --tail=25000 --scanprob=0.3 spam:detect:/home/bbmass/rawcor/jm/spam/*"
-opts_nightly="--cache --cachedir=/tmpfs/aicache_nightly --cs_schedule_cache --cs_cachedir=/export/home/bbmass/cache --restart=500 ham:detect:/home/bbmass/rawcor/jm/ham/* --after="15552000" --tail=25000 --scanprob=0.3 spam:detect:/home/bbmass/rawcor/jm/spam/*"
+opts_weekly="--net -j 8 --reuse --cache --cachedir=/tmpfs/aicache_nightly --cs_schedule_cache --cs_cachedir=/export/home/bbmass/cache --restart=500 ham:detect:/home/bbmass/rawcor/jm/ham/* --after="15552000" --tail=40000 --scanprob=0.3 spam:detect:/home/bbmass/rawcor/jm/spam/*"
+opts_nightly="--reuse --cache --cachedir=/tmpfs/aicache_nightly --cs_schedule_cache --cs_cachedir=/export/home/bbmass/cache --restart=500 ham:detect:/home/bbmass/rawcor/jm/ham/* --after="15552000" --tail=40000 --scanprob=0.3 spam:detect:/home/bbmass/rawcor/jm/spam/*"
 tmp=$HOME/tmp
 tree=$HOME/svn
 prefs_weekly=$HOME/user_prefs.weekly
 prefs_nightly=$HOME/user_prefs.nightly
 username=bb-jm
 password=__RSYNC_PASSWORD__
-serverhost=spamassassin.zones.apache.org:38891
+serverhost=spamassassin.zones.apache.org.:38891
 clienthosts=__CLIENTHOSTS__
 clienttree=nightlymc_jm

Modified: spamassassin/branches/bug-5293-pluginized-bayes/build/nightlymc/corpus.zmi
URL: http://svn.apache.org/viewvc/spamassassin/branches/bug-5293-pluginized-bayes/build/nightlymc/corpus.zmi?rev=613117&r1=613116&r2=613117&view=diff
==============================================================================
--- spamassassin/branches/bug-5293-pluginized-bayes/build/nightlymc/corpus.zmi (original)
+++ spamassassin/branches/bug-5293-pluginized-bayes/build/nightlymc/corpus.zmi Fri Jan 18 01:57:53 2008
@@ -1,11 +1,11 @@
-opts_weekly="--net --cache --cachedir=/tmpfs/aicache_nightly --cs_schedule_cache --cs_cachedir=/export/home/bbmass/cache --restart=500 --after="15552000" --tail=15000 --net -j 8 -f /home/bbmass/mc-nightly/targets.zmi"
-opts_nightly="--cache --cachedir=/tmpfs/aicache_nightly --cs_schedule_cache --cs_cachedir=/export/home/bbmass/cache --restart=500 --after="15552000" --tail=15000 -f /home/bbmass/mc-nightly/targets.zmi"
+opts_weekly="--net -j 8 --reuse --cache --cachedir=/tmpfs/aicache_nightly --cs_schedule_cache --cs_cachedir=/export/home/bbmass/cache --restart=500 ham:detect:/export/home/bbmass/rawcor/zmi/ham/* --after="15552000" --tail=25000 spam:detect:/export/home/bbmass/rawcor/zmi/spam/*"
+opts_nightly=" --reuse --cache --cachedir=/tmpfs/aicache_nightly --cs_schedule_cache --cs_cachedir=/export/home/bbmass/cache --restart=500 ham:detect:/export/home/bbmass/rawcor/zmi/ham/* --after="15552000" --tail=25000 spam:detect:/export/home/bbmass/rawcor/zmi/spam/*"
 tmp=$HOME/tmp
 tree=$HOME/svn
 prefs_weekly=$HOME/user_prefs.weekly
 prefs_nightly=$HOME/user_prefs.nightly
 username=bb-zmi
 password=__RSYNC_PASSWORD__
-serverhost=spamassassin.zones.apache.org:38894
+serverhost=spamassassin.zones.apache.org.:38894
 clienthosts=__CLIENTHOSTS__
 clienttree=nightlymc_zmi

Modified: spamassassin/branches/bug-5293-pluginized-bayes/build/update_devel
URL: http://svn.apache.org/viewvc/spamassassin/branches/bug-5293-pluginized-bayes/build/update_devel?rev=613117&r1=613116&r2=613117&view=diff
==============================================================================
--- spamassassin/branches/bug-5293-pluginized-bayes/build/update_devel (original)
+++ spamassassin/branches/bug-5293-pluginized-bayes/build/update_devel Fri Jan 18 01:57:53 2008
@@ -4,17 +4,15 @@
 
 umask 002
 
-WEBDIR=$SA_WEBDIR
-test -n "$WEBDIR"  || WEBDIR=$HOME/site
+WEBDIR=$HOME/public_html
 
 if [ ! -d $WEBDIR ] ; then
   echo "WARNING: $WEBDIR does not exist" 1>&2
   echo 1>&2
 fi
 
-DEVDIR=$WEBDIR/devel
-RELDIR=$WEBDIR/released
-mkdir -p $DEVDIR $RELDIR
+RELDIR=$WEBDIR/devel
+mkdir -p $RELDIR
 
 DISTNAME=Mail-SpamAssassin
 
@@ -32,9 +30,6 @@
 DISTDNAME=$DISTNAME-$DIST_VERSION
 DISTVNAME=$DISTNAME-$VERSION
 
-# delete old CVS files
-find . -name '.#*' -print | xargs rm -f
-
 make || exit $?
 
 make text_html_doc
@@ -52,15 +47,15 @@
 zip   -rv - $DISTDNAME     > $DISTVNAME.zip     || exit $?
 
 for ext in tar.bz2 tar.gz zip ; do
-  cp $DISTVNAME.$ext $DEVDIR
+  cp $DISTVNAME.$ext $RELDIR
 
-  perl build/md5sum.pl $DISTVNAME.$ext > $DEVDIR/$DISTVNAME.$ext.md5  || exit $?
-  perl build/sha1sum.pl $DISTVNAME.$ext > $DEVDIR/$DISTVNAME.$ext.sha1 || exit $?
+  perl build/md5sum.pl $DISTVNAME.$ext > $RELDIR/$DISTVNAME.$ext.md5  || exit $?
+  perl build/sha1sum.pl $DISTVNAME.$ext > $RELDIR/$DISTVNAME.$ext.sha1 || exit $?
 
   rm -f $DISTVNAME.$ext.asc*
   if [ -d $HOME/sabuildtools/sasigningkey ]; then
     $HOME/sabuildtools/bin/gpg --homedir $HOME/sabuildtools/sasigningkey -bsa $DISTVNAME.$ext || exit $?
-    mv $DISTVNAME.$ext.asc $DEVDIR/$DISTVNAME.$ext.asc || exit $?
+    mv $DISTVNAME.$ext.asc $RELDIR/$DISTVNAME.$ext.asc || exit $?
   fi
 
   rm -f $DISTVNAME.$ext
@@ -69,5 +64,5 @@
 test -f Makefile && make distclean
 rm -f $DISTVNAME.*
 
-chgrp -R spamassassin $DEVDIR $RELDIR
-ls -l $DEVDIR
+chgrp -R spamassassin $RELDIR
+ls -l $RELDIR

Modified: spamassassin/branches/bug-5293-pluginized-bayes/build/update_stable
URL: http://svn.apache.org/viewvc/spamassassin/branches/bug-5293-pluginized-bayes/build/update_stable?rev=613117&r1=613116&r2=613117&view=diff
==============================================================================
--- spamassassin/branches/bug-5293-pluginized-bayes/build/update_stable (original)
+++ spamassassin/branches/bug-5293-pluginized-bayes/build/update_stable Fri Jan 18 01:57:53 2008
@@ -9,15 +9,6 @@
 rm -rf $WEBDIR/doc
 tar cf - --exclude=CVS --exclude='.#*' doc | ( cd $WEBDIR ; tar xf - )
 
-for ext in tar.bz2 tar.gz zip ; do
-  for fxt in '' .asc .md5 .sha1 ; do
-    rm -f $RELDIR/$DISTVNAME.$ext$fxt
-    cp -p $DEVDIR/$DISTVNAME.$ext$fxt $RELDIR || exit $?
-    rm -f $DEVDIR/$DISTVNAME.$ext$fxt
-  done
-done
-
-
 set +x
 echo
 

Modified: spamassassin/branches/bug-5293-pluginized-bayes/lib/Mail/SpamAssassin/AsyncLoop.pm
URL: http://svn.apache.org/viewvc/spamassassin/branches/bug-5293-pluginized-bayes/lib/Mail/SpamAssassin/AsyncLoop.pm?rev=613117&r1=613116&r2=613117&view=diff
==============================================================================
--- spamassassin/branches/bug-5293-pluginized-bayes/lib/Mail/SpamAssassin/AsyncLoop.pm (original)
+++ spamassassin/branches/bug-5293-pluginized-bayes/lib/Mail/SpamAssassin/AsyncLoop.pm Fri Jan 18 01:57:53 2008
@@ -48,8 +48,10 @@
   use vars qw($timer_resolution);
   eval {
     require Time::HiRes or die "Error loading Time::HiRes: $@, $!";
-    Time::HiRes->import( qw(time CLOCK_REALTIME) );
-    $timer_resolution = Time::HiRes::clock_getres(CLOCK_REALTIME());
+    Time::HiRes->import( qw(time) );
+    $timer_resolution = Time::HiRes->can('clock_getres')
+      ? Time::HiRes::clock_getres(Time::HiRes::CLOCK_REALTIME())
+      : 0.001;  # wild guess, assume resolution is better than 1s
     1;
   } or do {
     $timer_resolution = 1;  # Perl's builtin timer ticks at one second

Modified: spamassassin/branches/bug-5293-pluginized-bayes/lib/Mail/SpamAssassin/BayesStore/DBM.pm
URL: http://svn.apache.org/viewvc/spamassassin/branches/bug-5293-pluginized-bayes/lib/Mail/SpamAssassin/BayesStore/DBM.pm?rev=613117&r1=613116&r2=613117&view=diff
==============================================================================
--- spamassassin/branches/bug-5293-pluginized-bayes/lib/Mail/SpamAssassin/BayesStore/DBM.pm (original)
+++ spamassassin/branches/bug-5293-pluginized-bayes/lib/Mail/SpamAssassin/BayesStore/DBM.pm Fri Jan 18 01:57:53 2008
@@ -27,6 +27,7 @@
 use Mail::SpamAssassin::Util qw(untaint_var);
 use Mail::SpamAssassin::BayesStore;
 use Mail::SpamAssassin::Logger;
+use Mail::SpamAssassin::Timeout;
 use Digest::SHA1 qw(sha1);
 use File::Basename;
 use File::Spec;
@@ -169,23 +170,41 @@
     my $db_var = 'db_'.$dbname;
     dbg("bayes: tie-ing to DB file R/O $name");
 
-    # untie %{$self->{$db_var}} if (tied %{$self->{$db_var}});
-    if (!tie %{$self->{$db_var}},$self->DBM_MODULE, $name, O_RDONLY,
-		 (oct($main->{conf}->{bayes_file_mode}) & 0666))
-    {
-      # bug 2975: it's acceptable for the db_seen to not be present,
-      # to allow it to be recycled.  if that's the case, just create
-      # a new, empty one. we don't need to lock it, since we won't
-      # be writing to it; let the R/W api deal with that case.
-
-      if ($dbname eq 'seen') {
-        tie %{$self->{$db_var}},$self->DBM_MODULE, $name, O_RDWR|O_CREAT,
-                    (oct($main->{conf}->{bayes_file_mode}) & 0666)
-          or goto failed_to_tie;
-      }
-      else {
-        goto failed_to_tie;
+    # bug 5731: something in DB_File appears to hang on tie() on gutsy
+    my $err;
+    dbg("starting tie timeout at ".(scalar localtime time));
+    my $timer = Mail::SpamAssassin::Timeout->new({ secs => 30 });
+    $timer->run_and_catch(sub {
+
+      # untie %{$self->{$db_var}} if (tied %{$self->{$db_var}});
+      if (!tie %{$self->{$db_var}},$self->DBM_MODULE, $name, O_RDONLY,
+                  (oct($main->{conf}->{bayes_file_mode}) & 0666))
+      {
+        # bug 2975: it's acceptable for the db_seen to not be present,
+        # to allow it to be recycled.  if that's the case, just create
+        # a new, empty one. we don't need to lock it, since we won't
+        # be writing to it; let the R/W api deal with that case.
+
+        if ($dbname eq 'seen') {
+          tie %{$self->{$db_var}},$self->DBM_MODULE, $name, O_RDWR|O_CREAT,
+                      (oct($main->{conf}->{bayes_file_mode}) & 0666)
+            or $err++;
+        }
+        else {
+          $err++;
+        }
       }
+
+      dbg("tie returned: ".(scalar localtime time));
+    });
+    dbg("timer returned: ".(scalar localtime time));
+
+    if ($timer->timed_out()) {
+      warn "bayes: DB_File tie() call timed out after 30 seconds";
+      goto failed_to_tie;
+    }
+    if ($err) {
+      goto failed_to_tie;
     }
   }
 
@@ -281,12 +300,30 @@
     my $db_var = 'db_'.$dbname;
     dbg("bayes: tie-ing to DB file R/W $name");
 
-    ($self->DBM_MODULE eq 'DB_File') and
-         Mail::SpamAssassin::Util::avoid_db_file_locking_bug ($name);
-
-    tie %{$self->{$db_var}},$self->DBM_MODULE, $name, O_RDWR|O_CREAT,
-		 (oct($main->{conf}->{bayes_file_mode}) & 0666)
-       or goto failed_to_tie;
+    # bug 5731: something in DB_File appears to hang on tie() on gutsy
+    my $err;
+    dbg("starting tie timeout at ".(scalar localtime time));
+    my $timer = Mail::SpamAssassin::Timeout->new({ secs => 30 });
+    $timer->run_and_catch(sub {
+
+      ($self->DBM_MODULE eq 'DB_File') and
+          Mail::SpamAssassin::Util::avoid_db_file_locking_bug ($name);
+
+      tie %{$self->{$db_var}},$self->DBM_MODULE, $name, O_RDWR|O_CREAT,
+                  (oct($main->{conf}->{bayes_file_mode}) & 0666)
+        or $err++;
+
+      dbg("tie returned: ".(scalar localtime time));
+    });
+    dbg("timer returned: ".(scalar localtime time));
+
+    if ($timer->timed_out()) {
+      warn "bayes: DB_File tie() call timed out after 30 seconds";
+      goto failed_to_tie;
+    }
+    if ($err) {
+      goto failed_to_tie;
+    }
   }
   umask $umask;
 

Modified: spamassassin/branches/bug-5293-pluginized-bayes/lib/Mail/SpamAssassin/Conf/Parser.pm
URL: http://svn.apache.org/viewvc/spamassassin/branches/bug-5293-pluginized-bayes/lib/Mail/SpamAssassin/Conf/Parser.pm?rev=613117&r1=613116&r2=613117&view=diff
==============================================================================
--- spamassassin/branches/bug-5293-pluginized-bayes/lib/Mail/SpamAssassin/Conf/Parser.pm (original)
+++ spamassassin/branches/bug-5293-pluginized-bayes/lib/Mail/SpamAssassin/Conf/Parser.pm Fri Jan 18 01:57:53 2008
@@ -546,7 +546,8 @@
 
     while ( my($sk) = each %{$conf->{scores}} ) {
       if (!exists $conf->{tests}->{$sk}) {
-        $self->lint_warn("config: warning: score set for non-existent rule $sk\n", $sk);
+        # bug 5514: not a lint warning any more
+        dbg("config: warning: score set for non-existent rule $sk");
       }
     }
   }

Modified: spamassassin/branches/bug-5293-pluginized-bayes/lib/Mail/SpamAssassin/DBBasedAddrList.pm
URL: http://svn.apache.org/viewvc/spamassassin/branches/bug-5293-pluginized-bayes/lib/Mail/SpamAssassin/DBBasedAddrList.pm?rev=613117&r1=613116&r2=613117&view=diff
==============================================================================
--- spamassassin/branches/bug-5293-pluginized-bayes/lib/Mail/SpamAssassin/DBBasedAddrList.pm (original)
+++ spamassassin/branches/bug-5293-pluginized-bayes/lib/Mail/SpamAssassin/DBBasedAddrList.pm Fri Jan 18 01:57:53 2008
@@ -26,12 +26,9 @@
 use Mail::SpamAssassin::PersistentAddrList;
 use Mail::SpamAssassin::Util;
 use Mail::SpamAssassin::Logger;
+use Mail::SpamAssassin::Timeout;
 
-use vars qw{
-  @ISA
-};
-
-@ISA = qw(Mail::SpamAssassin::PersistentAddrList);
+our @ISA = qw(Mail::SpamAssassin::PersistentAddrList);
 
 ###########################################################################
 
@@ -83,13 +80,30 @@
 
     dbg("auto-whitelist: tie-ing to DB file of type $dbm_module $mod1 in $path");
 
-    ($self->{is_locked} && $dbm_module eq 'DB_File') and 
-            Mail::SpamAssassin::Util::avoid_db_file_locking_bug ($path);
+    # bug 5731: something in DB_File appears to hang on tie() on gutsy
+    my $err;
+    my $tied;
+    dbg("starting tie timeout at ".(scalar localtime time));
+    my $timer = Mail::SpamAssassin::Timeout->new({ secs => 30 });
+    $timer->run_and_catch(sub {
+
+      ($self->{is_locked} && $dbm_module eq 'DB_File') and 
+              Mail::SpamAssassin::Util::avoid_db_file_locking_bug ($path);
+
+      $tied = tie %{ $self->{accum} }, $dbm_module, $path, $mod2,
+              oct($main->{conf}->{auto_whitelist_file_mode});
+      $err = $!;
+
+      dbg("tie returned: ".(scalar localtime time));
+    });
+    dbg("timer returned: ".(scalar localtime time));
+
+    if ($timer->timed_out() || !$tied) {
+      if ($timer->timed_out()) {
+        warn "auto-whitelist: DB_File tie() call timed out after 30 seconds";
+        $err = "timed out";
+      }
 
-    if (! tie %{ $self->{accum} }, $dbm_module, $path, $mod2,
-            oct($main->{conf}->{auto_whitelist_file_mode}) )
-    {
-      my $err = $!;   # might get overwritten later
       if ($self->{is_locked}) {
         $self->{main}->{locker}->safe_unlock($self->{locked_file});
         $self->{is_locked} = 0;

Modified: spamassassin/branches/bug-5293-pluginized-bayes/lib/Mail/SpamAssassin/Dns.pm
URL: http://svn.apache.org/viewvc/spamassassin/branches/bug-5293-pluginized-bayes/lib/Mail/SpamAssassin/Dns.pm?rev=613117&r1=613116&r2=613117&view=diff
==============================================================================
--- spamassassin/branches/bug-5293-pluginized-bayes/lib/Mail/SpamAssassin/Dns.pm (original)
+++ spamassassin/branches/bug-5293-pluginized-bayes/lib/Mail/SpamAssassin/Dns.pm Fri Jan 18 01:57:53 2008
@@ -632,12 +632,6 @@
   # DNS if we're only supposed to be looking at local tests.
   goto done if ($self->{main}->{local_tests_only});
 
-  if ($dnsopt eq "yes") {
-    $IS_DNS_AVAILABLE = 1;
-    dbg("dns: dns_available set to yes in config file, skipping test");
-    return $IS_DNS_AVAILABLE;
-  }
-
   # Check version numbers - runtime check only
   if (defined $Net::DNS::VERSION) {
     if (Mail::SpamAssassin::Util::am_running_on_windows()) {
@@ -657,6 +651,22 @@
   $self->clear_resolver();
   goto done unless $self->load_resolver();
 
+  my @nameservers = $self->{resolver}->nameservers();
+
+  # optionally shuffle the list of nameservers to distribute the load
+  if ($self->{conf}->{dns_options}->{rotate}) {
+    Mail::SpamAssassin::Util::fisher_yates_shuffle(\@nameservers);
+    dbg("dns: shuffled NS list: ".join(", ", @nameservers));
+    $self->{resolver}->nameservers(@nameservers);
+    $self->{resolver}->connect_sock();
+  }
+
+  if ($dnsopt eq "yes") {
+    $IS_DNS_AVAILABLE = 1;
+    dbg("dns: dns_available set to yes in config file, skipping test");
+    return $IS_DNS_AVAILABLE;
+  }
+
   if ($dnsopt =~ /test:\s+(.+)$/) {
     my $servers=$1;
     dbg("dns: servers: $servers");
@@ -667,15 +677,10 @@
     @domains = @EXISTING_DOMAINS;
   }
 
-  # TODO: retry every now and again if we get this far, but the
-  # next test fails?  could be because the ethernet cable has
-  # simply fallen out ;)
-
   # Net::DNS::Resolver scans a list of nameservers when it does a foreground
   # query but only uses the first in a background query like we use.
   # Try the different nameservers here in case the first one is not working
-  
-  my @nameservers = $self->{resolver}->nameservers();
+
   my @good_nameservers = ();
   dbg("dns: testing resolver nameservers: " . join(", ", @nameservers));
   my $ns;
@@ -710,9 +715,6 @@
 
   if ($IS_DNS_AVAILABLE == 1)
   {
-    if ($self->{conf}->{dns_options}->{rotate}) {
-      Mail::SpamAssassin::Util::fisher_yates_shuffle(\@good_nameservers);
-    }
     dbg("dns: NS list: ".join(", ", @good_nameservers));
     $self->{resolver}->nameservers(@good_nameservers);
     $self->{resolver}->connect_sock();

Modified: spamassassin/branches/bug-5293-pluginized-bayes/lib/Mail/SpamAssassin/HTML.pm
URL: http://svn.apache.org/viewvc/spamassassin/branches/bug-5293-pluginized-bayes/lib/Mail/SpamAssassin/HTML.pm?rev=613117&r1=613116&r2=613117&view=diff
==============================================================================
--- spamassassin/branches/bug-5293-pluginized-bayes/lib/Mail/SpamAssassin/HTML.pm (original)
+++ spamassassin/branches/bug-5293-pluginized-bayes/lib/Mail/SpamAssassin/HTML.pm Fri Jan 18 01:57:53 2008
@@ -668,17 +668,21 @@
 
   if ($display{whitespace}) {
     # trim trailing whitespace from previous element if it was not whitespace
+    # and it was not invisible
     if (@{ $self->{text} } &&
 	(!defined $self->{text_whitespace} ||
-	 !vec($self->{text_whitespace}, $#{$self->{text}}, 1)))
+	 !vec($self->{text_whitespace}, $#{$self->{text}}, 1)) &&
+	(!defined $self->{text_invisible} ||
+	 !vec($self->{text_invisible}, $#{$self->{text}}, 1)))
     {
       $self->{text}->[-1] =~ s/ $//;
     }
   }
   else {
     $text =~ s/[ \t\n\r\f\x0b\xa0]+/ /g;
-    # trim leading whitespace if previous element was whitespace
-    if (@{ $self->{text} } &&
+    # trim leading whitespace if previous element was whitespace 
+    # and current element is not invisible
+    if (@{ $self->{text} } && !$display{invisible} &&
 	defined $self->{text_whitespace} &&
 	vec($self->{text_whitespace}, $#{$self->{text}}, 1))
     {

Modified: spamassassin/branches/bug-5293-pluginized-bayes/lib/Mail/SpamAssassin/Message/Metadata/Received.pm
URL: http://svn.apache.org/viewvc/spamassassin/branches/bug-5293-pluginized-bayes/lib/Mail/SpamAssassin/Message/Metadata/Received.pm?rev=613117&r1=613116&r2=613117&view=diff
==============================================================================
--- spamassassin/branches/bug-5293-pluginized-bayes/lib/Mail/SpamAssassin/Message/Metadata/Received.pm (original)
+++ spamassassin/branches/bug-5293-pluginized-bayes/lib/Mail/SpamAssassin/Message/Metadata/Received.pm Fri Jan 18 01:57:53 2008
@@ -373,8 +373,12 @@
     $id = $1;
   }
 
-  if (/\bhelo=([-A-Za-z0-9\.]+)(?:[^-A-Za-z0-9\.]|$)/) { $helo = $1; }
-  elsif (/\b(?:HELO|EHLO) ([-A-Za-z0-9\.]+)(?:[^-A-Za-z0-9\.]|$)/) { $helo = $1; }
+  if (/\bhelo=([-A-Za-z0-9\.\^+_&:=?!@%*\$\\\/]+)(?:[^-A-Za-z0-9\.\^+_&:=?!@%*\$\\\/]|$)/) {
+      $helo = $1;
+  }
+  elsif (/\b(?:HELO|EHLO) ([-A-Za-z0-9\.\^+_&:=?!@%*\$\\\/]+)(?:[^-A-Za-z0-9\.\^+_&:=?!@%*\$\\\/]|$)/) {
+      $helo = $1;
+  }
   if (/ by (\S+)(?:[^-A-Za-z0-9\;\.]|$)/) { $by = $1; }
 
 # ---------------------------------------------------------------------------

Modified: spamassassin/branches/bug-5293-pluginized-bayes/lib/Mail/SpamAssassin/Message/Node.pm
URL: http://svn.apache.org/viewvc/spamassassin/branches/bug-5293-pluginized-bayes/lib/Mail/SpamAssassin/Message/Node.pm?rev=613117&r1=613116&r2=613117&view=diff
==============================================================================
--- spamassassin/branches/bug-5293-pluginized-bayes/lib/Mail/SpamAssassin/Message/Node.pm (original)
+++ spamassassin/branches/bug-5293-pluginized-bayes/lib/Mail/SpamAssassin/Message/Node.pm Fri Jan 18 01:57:53 2008
@@ -306,6 +306,7 @@
       seek $fd, 0, 0;
       local $/ = undef;
       $raw = <$fd>;
+      $raw = ''  if !defined $raw;
     }
     else {
       # create a new scalar from the raw array in memory

Modified: spamassassin/branches/bug-5293-pluginized-bayes/lib/Mail/SpamAssassin/Plugin/BodyRuleBaseExtractor.pm
URL: http://svn.apache.org/viewvc/spamassassin/branches/bug-5293-pluginized-bayes/lib/Mail/SpamAssassin/Plugin/BodyRuleBaseExtractor.pm?rev=613117&r1=613116&r2=613117&view=diff
==============================================================================
--- spamassassin/branches/bug-5293-pluginized-bayes/lib/Mail/SpamAssassin/Plugin/BodyRuleBaseExtractor.pm (original)
+++ spamassassin/branches/bug-5293-pluginized-bayes/lib/Mail/SpamAssassin/Plugin/BodyRuleBaseExtractor.pm Fri Jan 18 01:57:53 2008
@@ -610,12 +610,13 @@
       # we can do both, since we canonicalize to lc.
       if (!$spcs && $item =~ /^EXACT/ && $args =~ /<(.*)>/)
       {
-        $buf .= $1;
+        my $str = $1;
+        $buf .= $str;
         if ($buf =~ s/\\x\{[0-9a-fA-F]{4,}\}.*$//) {
           # a high Unicode codepoint, interpreted by perl 5.8.x.  cut and stop
           $add_candidate->();
         }
-        if ($1 && length $1 >= 55 && $buf =~ s/\.\.\.$//) {
+        if (length $str >= 55 && $buf =~ s/\.\.\.$//) {
           # perl 5.8.x truncates with a "..." here!  cut and stop
           $add_candidate->();
         }

Modified: spamassassin/branches/bug-5293-pluginized-bayes/lib/Mail/SpamAssassin/Plugin/DCC.pm
URL: http://svn.apache.org/viewvc/spamassassin/branches/bug-5293-pluginized-bayes/lib/Mail/SpamAssassin/Plugin/DCC.pm?rev=613117&r1=613116&r2=613117&view=diff
==============================================================================
--- spamassassin/branches/bug-5293-pluginized-bayes/lib/Mail/SpamAssassin/Plugin/DCC.pm (original)
+++ spamassassin/branches/bug-5293-pluginized-bayes/lib/Mail/SpamAssassin/Plugin/DCC.pm Fri Jan 18 01:57:53 2008
@@ -414,7 +414,8 @@
   my $right;
   my $timeout = $self->{main}->{conf}->{dcc_timeout};
   my $sockpath = $self->{main}->{conf}->{dcc_dccifd_path};
-  my @opts = split(' ',$self->{main}->{conf}->{dcc_options});
+  my $opts = $self->{main}->{conf}->{dcc_options};
+  my @opts = !defined $opts ? () : split(' ',$opts);
 
   $count{body} = 0;
   $count{fuz1} = 0;
@@ -481,6 +482,7 @@
     dbg("dcc: dccifd check failed - no X-DCC returned: $response");
     return 0;
   }
+  $response =~ s/[ \t]\z//;  # strip trailing whitespace
 
   if ($response =~ /^X-DCC-(.*)-Metrics: (.*)$/) {
     $permsgstatus->{tag_data}->{DCCB} = $1;
@@ -538,7 +540,8 @@
     # note: not really tainted, this came from system configuration file
     my $path = Mail::SpamAssassin::Util::untaint_file_path($self->{main}->{conf}->{dcc_path});
 
-    my @opts = split(' ',$self->{main}->{conf}->{dcc_options}||'');
+    my $opts = $self->{main}->{conf}->{dcc_options};
+    my @opts = !defined $opts ? () : split(' ',$opts);
     untaint_var(\@opts);
 
     unshift(@opts, "-a",
@@ -693,7 +696,8 @@
   my $timeout = $self->{main}->{conf}->{dcc_timeout};
   my $sockpath = $self->{main}->{conf}->{dcc_dccifd_path};
   # instead of header use whatever the report option is
-  my @opts = split(' ',$self->{main}->{conf}->{dcc_options});
+  my $opts = $self->{main}->{conf}->{dcc_options};
+  my @opts = !defined $opts ? () : split(' ',$opts);
 
   $options->{report}->enter_helper_run_mode();
   my $timer = Mail::SpamAssassin::Timeout->new({ secs => $timeout });
@@ -749,7 +753,8 @@
 
   # note: not really tainted, this came from system configuration file
   my $path = Mail::SpamAssassin::Util::untaint_file_path($options->{report}->{conf}->{dcc_path});
-  my @opts = split(' ',$self->{main}->{conf}->{dcc_options});
+  my $opts = $self->{main}->{conf}->{dcc_options};
+  my @opts = !defined $opts ? () : split(' ',$opts);
   untaint_var(\@opts);
 
   # get the metadata from the message so we can pass the external relay info

Modified: spamassassin/branches/bug-5293-pluginized-bayes/lib/Mail/SpamAssassin/Plugin/DKIM.pm
URL: http://svn.apache.org/viewvc/spamassassin/branches/bug-5293-pluginized-bayes/lib/Mail/SpamAssassin/Plugin/DKIM.pm?rev=613117&r1=613116&r2=613117&view=diff
==============================================================================
--- spamassassin/branches/bug-5293-pluginized-bayes/lib/Mail/SpamAssassin/Plugin/DKIM.pm (original)
+++ spamassassin/branches/bug-5293-pluginized-bayes/lib/Mail/SpamAssassin/Plugin/DKIM.pm Fri Jan 18 01:57:53 2008
@@ -23,7 +23,10 @@
 
  loadplugin Mail::SpamAssassin::Plugin::DKIM [/path/to/DKIM.pm]
 
- full DOMAINKEY_DOMAIN eval:check_dkim_verified()
+ full DKIM_VALID     eval:check_dkim_valid()
+ full DKIM_VALID_AU  eval:check_dkim_valid_author_sig()
+
+(for compatibility, a check_dkim_verified is a synonym for check_dkim_valid)
 
 =head1 DESCRIPTION
 
@@ -38,7 +41,7 @@
 =head1 TAGS
 
 The following tags are added to the set, available for use in reports,
-headers, other plugins, etc.:
+header fields, other plugins, etc.:
 
   _DKIMIDENTITY_  signing identities (the 'i' tag) from valid signatures;
   _DKIMDOMAIN_    signing domains (the 'd' tag) from valid signatures;
@@ -87,7 +90,9 @@
   bless ($self, $class);
 
   $self->register_eval_rule ("check_dkim_signed");
-  $self->register_eval_rule ("check_dkim_verified");
+  $self->register_eval_rule ("check_dkim_verified");  # old synonym for _valid
+  $self->register_eval_rule ("check_dkim_valid");
+  $self->register_eval_rule ("check_dkim_valid_author_sig");
   $self->register_eval_rule ("check_dkim_signsome");
   $self->register_eval_rule ("check_dkim_testing");
   $self->register_eval_rule ("check_dkim_signall");
@@ -109,27 +114,27 @@
 
 =over 4
 
-=item whitelist_from_dkim originator@example.com [signing-identity]
+=item whitelist_from_dkim author@example.com [signing-identity]
 
-Use this to supplement the whitelist_from addresses with a check to make sure
-the message with a given From: author's address (originator address) carries
-a valid Domain Keys Identified Mail (DKIM) signature by a verifier-acceptable
+Use this to supplement the whitelist_from addresses with a check to make
+sure the message with a given From address (the author's address) carries a
+valid Domain Keys Identified Mail (DKIM) signature by a verifier-acceptable
 signing-identity (the i= tag).
 
 Only one whitelist entry is allowed per line, as in C<whitelist_from_rcvd>.
-Multiple C<whitelist_from_dkim> lines are allowed. File-glob style
-meta characters are allowed for the From: address (the first parameter),
-just like with C<whitelist_from_rcvd>.
+Multiple C<whitelist_from_dkim> lines are allowed. File-glob style characters
+are allowed for the From address (the first parameter), just like with
+C<whitelist_from_rcvd>. The second parameter does not accept wildcards.
 
 If no signing identity parameter is specified, the only acceptable signature
-will be an originator signature (not a third-party signature). An originator
-signature is a signature where the signing identity of a signature matches
-the originator address (i.e. the address in a From header field).
+will be a first-party signature, i.e. the so called author signature, which
+is a signature where the signing identity of a signature matches the author
+address (i.e. the address in a From header field).
 
 Since this whitelist requires a DKIM check to be made, network tests must
 be enabled.
 
-Examples of whitelisting based on an originator signature:
+Examples of whitelisting based on an author signature (first-party):
 
   whitelist_from_dkim joe@example.com
   whitelist_from_dkim *@corp.example.com
@@ -143,7 +148,7 @@
   whitelist_from_dkim *@info.example.com   example.com
   whitelist_from_dkim *@*                  remailer.example.com
 
-=item def_whitelist_from_dkim originator@example.com [signing-identity]
+=item def_whitelist_from_dkim author@example.com [signing-identity]
 
 Same as C<whitelist_from_dkim>, but used for the default whitelist entries
 in the SpamAssassin distribution.  The whitelist score is lower, because
@@ -163,7 +168,7 @@
         return $Mail::SpamAssassin::Conf::INVALID_VALUE;
       }
       my $address = $1;
-      my $identity = defined $2 ? $2 : ''; # empty implies originator signature
+      my $identity = defined $2 ? $2 : '';  # empty implies author signature
       $self->{parser}->add_to_addrlist_rcvd('whitelist_from_dkim',
                                             $address, $identity);
     }
@@ -181,7 +186,7 @@
         return $Mail::SpamAssassin::Conf::INVALID_VALUE;
       }
       my $address = $1;
-      my $identity = defined $2 ? $2 : ''; # empty implies originator signature
+      my $identity = defined $2 ? $2 : '';  # empty implies author signature
       $self->{parser}->add_to_addrlist_rcvd('def_whitelist_from_dkim',
                                             $address, $identity);
     }
@@ -218,17 +223,33 @@
   return $scan->{dkim_signed};
 }
 
-# mosnomer, should be check_dkim_valid, keep for compatibility
+
+sub check_dkim_valid_author_sig {
+  my ($self, $scan) = @_;
+  $self->_check_dkim_signature($scan) unless $scan->{dkim_checked_signature};
+  return $scan->{dkim_valid_author_sig};
+}
+
+sub check_dkim_valid {
+  my ($self, $scan) = @_;
+  $self->_check_dkim_signature($scan) unless $scan->{dkim_checked_signature};
+  return $scan->{dkim_valid};
+}
+
+# mosnomer, old synonym for check_dkim_valid, kept for compatibility
 sub check_dkim_verified {
   my ($self, $scan) = @_;
   $self->_check_dkim_signature($scan) unless $scan->{dkim_checked_signature};
   return $scan->{dkim_valid};
 }
 
+# useless, semantically always true according to the current SSP draft
 sub check_dkim_signsome {
   my ($self, $scan) = @_;
-  $self->_check_dkim_policy($scan) unless $scan->{dkim_checked_policy};
-  return $scan->{dkim_signsome};
+# $self->_check_dkim_policy($scan) unless $scan->{dkim_checked_policy};
+# return $scan->{dkim_signsome};
+  # just return false to avoid rule DKIM_POLICY_SIGNSOME always firing
+  return 0;
 }
 
 sub check_dkim_signall {
@@ -254,15 +275,15 @@
 sub check_for_dkim_whitelist_from {
   my ($self, $scan) = @_;
   $self->_check_dkim_whitelist($scan) unless $scan->{whitelist_checked};
-  return $scan->{match_in_whitelist_from_dkim} || 
-         $scan->{match_in_whitelist_auth};
+  return $scan->{dkim_match_in_whitelist_from_dkim} || 
+         $scan->{dkim_match_in_whitelist_auth};
 }
 
 sub check_for_def_dkim_whitelist_from {
   my ($self, $scan) = @_;
   $self->_check_dkim_whitelist($scan) unless $scan->{whitelist_checked};
-  return $scan->{match_in_def_whitelist_from_dkim} || 
-         $scan->{match_in_def_whitelist_auth};
+  return $scan->{dkim_match_in_def_whitelist_from_dkim} || 
+         $scan->{dkim_match_in_def_whitelist_auth};
 }
 
 # ---------------------------------------------------------------------------
@@ -273,26 +294,30 @@
   $scan->{dkim_checked_signature} = 1;
   $scan->{dkim_signed} = 0;
   $scan->{dkim_valid} = 0;
+  $scan->{dkim_valid_author_sig} = 0;
   $scan->{dkim_key_testing} = 0;
+  $scan->{dkim_author_address} =
+    $scan->get('from:addr')  if !defined $scan->{dkim_author_address};
 
   my $timemethod = $self->{main}->time_method("check_dkim_signature");
 
-  my $message = Mail::DKIM::Verifier->new_object();
-  if (!$message) {
+# my $verifier = Mail::DKIM::Verifier->new();         # per new docs
+  my $verifier = Mail::DKIM::Verifier->new_object();  # old style???
+  if (!$verifier) {
     dbg("dkim: cannot create Mail::DKIM::Verifier");
     return;
   }
-  $scan->{dkim_object} = $message;
+  $scan->{dkim_object} = $verifier;
 
   # feed content of message into verifier, using \r\n endings,
   # required by Mail::DKIM API (see bug 5300)
   # note: bug 5179 comment 28: perl does silly things on non-Unix platforms
   # unless we use \015\012 instead of \r\n
   eval {
-    foreach my $line (split(/\n/s, $scan->{msg}->get_pristine)) {
-      $line =~ s/\r?$/\015\012/s;       # ensure \015\012 ending
-      $message->PRINT($line);
-    }
+    my $str = $scan->{msg}->get_pristine;
+    $str =~ s/\r?\n/\015\012/sg;  # ensure \015\012 ending
+    # feeding large chunks to Mail::DKIM is much faster than line-by-line feed
+    $verifier->PRINT($str);
     1;
   } or do {  # intercept die() exceptions and render safe
     my $eval_stat = $@ ne '' ? $@ : "errno=$!";  chomp $eval_stat;
@@ -306,20 +331,28 @@
   my $err = $timer->run_and_catch(sub {
 
     dbg("dkim: performing public key lookup and signature verification");
-    $message->CLOSE();      # the action happens here
+    $verifier->CLOSE();      # the action happens here
 
-    $scan->{dkim_address} = !$message->message_originator ? ''
-                              : $message->message_originator->address();
-    dbg("dkim: originator: ".
-        ($scan->{dkim_address} ? $scan->{dkim_address} : 'none'));
+    my $author = $verifier->message_originator;
+    $author = $author->address()  if $author;
+    $author = '' if !defined $author;  # when a From header field is missing
+    # Mail::DKIM sometimes leaves leading or trailing whitespace in address
+    $author =~ s/^[ \t]+//s;  $author =~ s/[ \t]+\z//s;  # trim
+    if ($author ne $scan->{dkim_author_address}) {
+      dbg("dkim: author parsing inconsistency, SA: <%s>, DKIM: <%s>",
+           $author, $scan->{dkim_author_address});
+    # currently SpamAssassin's parsing is better than Mail::Address parsing
+    # $scan->{dkim_author_address} = $author;
+    }
 
     $scan->{dkim_signatures} = [];
 
     # versions before 0.29 only provided a public interface to fetch one
     # signature, new versions allow access to all signatures of a message
-    my @signatures = Mail::DKIM->VERSION >= 0.29 ? $message->signatures
-                                                 : $message->signature;
+    my @signatures = Mail::DKIM->VERSION >= 0.29 ? $verifier->signatures
+                                                 : $verifier->signature;
     @signatures = grep { defined } @signatures;  # just in case
+    my $has_author_sig = 0;
     foreach my $signature (@signatures) {
       # i=  Identity of the user or agent (e.g., a mailing list manager) on
       #     behalf of which this message is signed (dkim-quoted-printable;
@@ -336,45 +369,58 @@
         $identity = '@' . $identity;
         $signature->identity($identity);
       }
+      if ($signature->result eq 'pass') {
+        local ($1);  # check if we have a valid first-party signature
+        if ($identity =~ /.\@[^@]*\z/s) {  # identity has a localpart
+          $has_author_sig = 1  if lc($author) eq lc($identity);
+        } elsif ($author =~ /^.*?(\@[^\@]*)?\z/s && lc($1) eq lc($identity)) {
+          # ignoring localpart if identity doesn't have a localpart
+          $has_author_sig = 1;
+        }
+      }
     }
     $scan->{dkim_signatures} = \@signatures;
     { my (%seen1,%seen2);
-      my @v_sign = grep { $_->result eq 'pass' } @signatures;
+      my @valid_s = grep { $_->result eq 'pass' } @signatures;
       $scan->set_tag('DKIMIDENTITY',
-              join(" ", grep { !$seen1{$_}++ } map { $_->identity } @v_sign));
+              join(" ", grep { !$seen1{$_}++ } map { $_->identity } @valid_s));
       $scan->set_tag('DKIMDOMAIN',
-              join(" ", grep { !$seen2{$_}++ } map { $_->domain } @v_sign));
+              join(" ", grep { !$seen2{$_}++ } map { $_->domain } @valid_s));
     }
-    my $result = $message->result();
-    my $detail = $message->result_detail();
+    # corresponds to 'best' result in case of multiple signatures
+    my $result = $verifier->result();
+    my $detail = $verifier->result_detail();
     # let the result stand out more clearly in the log, use uppercase
     dbg("dkim: signature verification result: ".
         ($detail eq 'none' ? $detail : uc $detail));
 
-    # extract the actual lookup results
+    # check and remember verification results
     if ($result eq 'pass') {
       $scan->{dkim_signed} = 1;
       $scan->{dkim_valid} = 1;
+      $scan->{dkim_valid_author_sig} = $has_author_sig;
     }
     elsif ($result eq 'fail') {
       $scan->{dkim_signed} = 1;
-    }
-    elsif ($result eq 'none') {
-      # no-op, this is the default state
+      # Returned if a valid DKIM-Signature header was found, but the
+      # signature does not contain a correct value for the message.
     }
     elsif ($result eq 'invalid') {
+      $scan->{dkim_signed} = 1;
       # Returned if no valid DKIM-Signature headers were found,
       # but there is at least one invalid DKIM-Signature header.
-      dbg("dkim: invalid DKIM-Signature: $detail");
+    }
+    elsif ($result eq 'none') {
+      # no signatures, this is a default state
     }
 
   });
 
   if ($timer->timed_out()) {
-    dbg("dkim: public key lookup timed out after $timeout seconds");
+    dbg("dkim: public key lookup or verification timed out after $timeout s");
   } elsif ($err) {
     chomp $err;
-    dbg("dkim: public key lookup failed: $err");
+    dbg("dkim: public key lookup or verification failed: $err");
   }
 }
 
@@ -385,24 +431,26 @@
   $scan->{dkim_signsome} = 0;
   $scan->{dkim_signall} = 0;
   $scan->{dkim_policy_testing} = 0;
+  $scan->{dkim_author_address} =
+    $scan->get('from:addr')  if !defined $scan->{dkim_author_address};
 
   # must check the message first to obtain signer, domain, and verif. status
   $self->_check_dkim_signature($scan) unless $scan->{dkim_checked_signature};
-  my $message = $scan->{dkim_object};
+  my $verifier = $scan->{dkim_object};
 
   my $timemethod = $self->{main}->time_method("check_dkim_policy");
 
-  if (!$message) {
+  if (!$verifier) {
     dbg("dkim: policy: dkim object not available (programming error?)");
   } elsif (!$scan->is_dns_available()) {
     dbg("dkim: policy: not retrieved, no DNS resolving available");
-  } elsif ($scan->{dkim_valid}) {  # no need to fetch policy when valid
-    # draft-allman-dkim-ssp-02: If the message contains a valid Originator
+  } elsif ($scan->{dkim_valid_author_sig}) {  # don't fetch policy when valid
+    # draft-allman-dkim-ssp: If the message contains a valid Author
     # Signature, no Sender Signing Practices check need be performed:
     # the Verifier SHOULD NOT look up the Sender Signing Practices
     # and the message SHOULD be considered non-Suspicious.
 
-    dbg("dkim: policy: not retrieved, signature is valid");
+    dbg("dkim: policy: not retrieved, author signature is valid");
 
   } else {
     my $timeout = $scan->{conf}->{dkim_timeout};
@@ -413,7 +461,7 @@
 
       my $policy;
       eval {
-        $policy = $message->fetch_author_policy;  1;
+        $policy = $verifier->fetch_author_policy;  1;
       } or do {
         # fetching or parsing a policy may throw an error, ignore such policy
         my $eval_stat = $@ ne '' ? $@ : "errno=$!";  chomp $eval_stat;
@@ -423,7 +471,7 @@
       if (!$policy) {
         dbg("dkim: policy: none");
       } else {
-        my $policy_result = $policy->apply($message);
+        my $policy_result = $policy->apply($verifier);
         dbg("dkim: policy result $policy_result: ".$policy->as_string());
 
         # extract the flags we expose, from the policy
@@ -455,28 +503,36 @@
   $scan->{whitelist_checked} = 1;
   return unless $scan->is_dns_available();
 
-  # if the message doesn't pass DKIM validation, it can't pass DKIM whitelist
-
-  # trigger a DKIM check so we can get address/identity info
-  # continue if verification succeeded or we want the debug info
-  return unless $self->check_dkim_verified($scan) || would_log("dbg","dkim");
-
-  my $originator = $scan->{dkim_address};
-  unless ($originator) {
-    dbg("dkim: check_dkim_whitelist: could not find originator address");
+  my $author = $scan->{dkim_author_address};
+  if (!defined $author) {
+    $scan->{dkim_author_address} = $author = $scan->get('from:addr');
+  }
+  if (!defined $author || $author eq '') {
+    dbg("dkim: check_dkim_whitelist: could not find author address");
     return;
   }
 
+  # collect whitelist entries matching the author from all lists
   my @acceptable_identity_tuples;
-  # collect whitelist entries matching the originator from all lists
   $self->_wlcheck_acceptable_signature($scan, \@acceptable_identity_tuples,
                                        'def_whitelist_from_dkim');
-  $self->_wlcheck_originator_signature($scan, \@acceptable_identity_tuples,
+  $self->_wlcheck_author_signature($scan, \@acceptable_identity_tuples,
                                        'def_whitelist_auth');
   $self->_wlcheck_acceptable_signature($scan, \@acceptable_identity_tuples,
                                        'whitelist_from_dkim');
-  $self->_wlcheck_originator_signature($scan, \@acceptable_identity_tuples,
+  $self->_wlcheck_author_signature($scan, \@acceptable_identity_tuples,
                                        'whitelist_auth');
+  if (!@acceptable_identity_tuples) {
+    dbg("dkim: no wl entries match author $author, no need to verify sigs");
+    return;
+  }
+
+  # if the message doesn't pass DKIM validation, it can't pass DKIM whitelist
+
+  # trigger a DKIM check so we can get address/identity info;
+  # continue if one or more signatures are valid or we want the debug info
+  return unless $self->check_dkim_valid($scan) || would_log("dbg","dkim");
+
   # now do all the matching in one go, against all signatures in a message
   my($any_match_at_all, $any_match_by_wl_ref) =
     _wlcheck_list($self, $scan, \@acceptable_identity_tuples);
@@ -485,30 +541,28 @@
   foreach my $wl (keys %$any_match_by_wl_ref) {
     my $match = $any_match_by_wl_ref->{$wl};
     if (defined $match) {
-      $scan->{"match_in_$wl"} = 1  if $match;
+      $scan->{"dkim_match_in_$wl"} = 1  if $match;
       if ($match) { push(@valid,$wl) } else { push(@fail,$wl) }
     }
   }
   if (@valid) {
-    dbg("dkim: originator %s, WHITELISTED by %s",
-         $originator, join(", ",@valid));
+    dbg("dkim: author %s, WHITELISTED by %s", $author, join(", ",@valid));
   } elsif (@fail) {
-    dbg("dkim: originator %s, found in %s BUT IGNORED",
-         $originator, join(", ",@fail));
+    dbg("dkim: author %s, found in %s BUT IGNORED", $author, join(", ",@fail));
   } else {
-    dbg("dkim: originator %s, not in any dkim whitelist", $originator);
+    dbg("dkim: author %s, not in any dkim whitelist", $author);
   }
 }
 
 # check for verifier-acceptable signatures; an empty (or undefined) signing
-# identity in a whitelist implies checking for an originator signature
+# identity in a whitelist implies checking for an author signature
 #
 sub _wlcheck_acceptable_signature {
   my ($self, $scan, $acceptable_identity_tuples_ref, $wl) = @_;
-  my $originator = $scan->{dkim_address};
+  my $author = $scan->{dkim_author_address};
   foreach my $white_addr (keys %{$scan->{conf}->{$wl}}) {
     my $re = qr/$scan->{conf}->{$wl}->{$white_addr}{re}/i;
-    if ($originator =~ $re) {
+    if ($author =~ $re) {
       foreach my $acc_id (@{$scan->{conf}->{$wl}->{$white_addr}{domain}}) {
         push(@$acceptable_identity_tuples_ref, [$acc_id,$wl,$re] );
       }
@@ -517,15 +571,15 @@
 }
 
 # use a traditional whitelist_from -style addrlist, the only acceptable DKIM
-# signature is an Originator Signature.  Note: don't pre-parse and store the
+# signature is an Author Signature.  Note: don't pre-parse and store the
 # domains; that's inefficient memory-wise and only saves one m//
 #
-sub _wlcheck_originator_signature {
+sub _wlcheck_author_signature {
   my ($self, $scan, $acceptable_identity_tuples_ref, $wl) = @_;
-  my $originator = $scan->{dkim_address};
+  my $author = $scan->{dkim_author_address};
   foreach my $white_addr (keys %{$scan->{conf}->{$wl}}) {
     my $re = $scan->{conf}->{$wl}->{$white_addr};
-    if ($originator =~ $re) {
+    if ($author =~ $re) {
       push(@$acceptable_identity_tuples_ref, [undef,$wl,$re] );
     }
   }
@@ -537,7 +591,7 @@
   my %any_match_by_wl;
   my $any_match_at_all = 0;
   my $expiration_supported = Mail::DKIM->VERSION >= 0.29 ? 1 : 0;
-  my $originator = $scan->{dkim_address};  # address in a 'From' header field
+  my $author = $scan->{dkim_author_address};  # address in a From header field
 
   # walk through all signatures present in a message
   foreach my $signature (@{$scan->{dkim_signatures}}) {
@@ -555,15 +609,15 @@
     $identity =~ /^ (.*?) \@ ([^\@]*) $/xs;
     my($identity_mbx, $identity_dom) = ($1,$2);
 
-    my $originator_matching_part = $originator;
+    my $author_matching_part = $author;
     if ($identity =~ /^\@/) {  # empty localpart in signing identity
-      $originator_matching_part =~ s/^.*?(\@[^\@]*)?$/$1/s; # strip localpart
+      $author_matching_part =~ s/^.*?(\@[^\@]*)?$/$1/s; # strip localpart
     }
 
     my $info = '';  # summary info string to be used for logging
     $info .= ($valid ? 'VALID' : 'FAILED') . ($expired ? ' EXPIRED' : '');
-    $info .= lc $identity eq lc $originator_matching_part ? ' originator'
-                                                          : ' third-party';
+    $info .= lc $identity eq lc $author_matching_part ? ' author'
+                                                      : ' third-party';
     $info .= " signature by id " . $identity;
 
     foreach my $entry (@$acceptable_identity_tuples_ref) {
@@ -571,18 +625,19 @@
       # $re and $wl are here for logging purposes only, $re already checked.
       # The $acceptable_identity is a verifier-acceptable signing identity.
       # When $acceptable_identity is undef or an empty string it implies an
-      # originator signature check.
+      # author signature check.
 
       my $matches = 0;
       if (!defined $acceptable_identity || $acceptable_identity eq '') {
 
-        # An "Originator Signature" is any Valid Signature where the signing
-        # identity matches the Originator Address. If the signing identity
-        # does not include a localpart, then only the domains must match;
-        # otherwise, the two addresses must be identical.
+        # An "Author Signature" (also called a first-party signature) is
+        # any Valid Signature where the signing identity matches the Author
+        # Address. If the signing identity does not include a localpart,
+        # then only the domains must match; otherwise, the two addresses
+        # must be identical.
 
-        # checking for originator signature
-        $matches = 1  if lc $identity eq lc $originator_matching_part;
+        # checking for author signature
+        $matches = 1  if lc $identity eq lc $author_matching_part;
       }
       else {  # checking for verifier-acceptable signature
         if ($acceptable_identity !~ /\@/) {
@@ -601,7 +656,7 @@
         }
       }
       if ($matches) {
-        dbg("dkim: $info, originator $originator, MATCHES $wl $re");
+        dbg("dkim: $info, author $author, MATCHES $wl $re");
         # a defined value indicates at least a match, not necessarily valid
         $any_match_by_wl{$wl} = 0  if !exists $any_match_by_wl{$wl};
       }
@@ -610,8 +665,7 @@
 
       $any_match_by_wl{$wl} = $any_match_at_all = 1  if $matches;
     }
-    dbg("dkim: $info, originator $originator, no valid matches")
-      if !$any_match_at_all;
+    dbg("dkim: $info, author $author, no valid matches") if !$any_match_at_all;
   }
   return ($any_match_at_all, \%any_match_by_wl);
 }

Modified: spamassassin/branches/bug-5293-pluginized-bayes/lib/Mail/SpamAssassin/Plugin/HeaderEval.pm
URL: http://svn.apache.org/viewvc/spamassassin/branches/bug-5293-pluginized-bayes/lib/Mail/SpamAssassin/Plugin/HeaderEval.pm?rev=613117&r1=613116&r2=613117&view=diff
==============================================================================
--- spamassassin/branches/bug-5293-pluginized-bayes/lib/Mail/SpamAssassin/Plugin/HeaderEval.pm (original)
+++ spamassassin/branches/bug-5293-pluginized-bayes/lib/Mail/SpamAssassin/Plugin/HeaderEval.pm Fri Jan 18 01:57:53 2008
@@ -67,6 +67,7 @@
   $self->register_eval_rule("check_unresolved_template");
   $self->register_eval_rule("check_ratware_name_id");
   $self->register_eval_rule("check_ratware_envelope_from");
+  $self->register_eval_rule("gated_through_received_hdr_remover");
 
   return $self;
 }

Modified: spamassassin/branches/bug-5293-pluginized-bayes/masses/hit-frequencies
URL: http://svn.apache.org/viewvc/spamassassin/branches/bug-5293-pluginized-bayes/masses/hit-frequencies?rev=613117&r1=613116&r2=613117&view=diff
==============================================================================
--- spamassassin/branches/bug-5293-pluginized-bayes/masses/hit-frequencies (original)
+++ spamassassin/branches/bug-5293-pluginized-bayes/masses/hit-frequencies Fri Jan 18 01:57:53 2008
@@ -192,6 +192,7 @@
 my $num_ham = 0;
 my %ranking = ();
 my $ok_lang = '';
+my %meta_subrule_pairs = ();
 
 my %rule_times = ();
 
@@ -200,8 +201,8 @@
 $ok_lang = lc ($opt_l || $opt_L || '');
 if ($ok_lang eq 'all') { $ok_lang = '.'; }
 
-if ($opt_t && $rules_pl_unparseable) {
-  die "-t requires rules.pl to be parseable";
+if (($opt_t || $opt_o) && $rules_pl_unparseable) {
+  die "-t/-o require rules.pl to be parseable";
 }
 
 foreach my $key (keys %rules) {
@@ -843,6 +844,9 @@
       my $reverse_ratio = $1 || 0;
       next unless defined $r2;
 
+      my $is_subrule = ($meta_subrule_pairs{"$r1.$r2"}
+                      || $meta_subrule_pairs{"$r2.$r1"});
+
       if ($opt_d) {
         print qq{
           <overlappair>
@@ -852,9 +856,10 @@
         };
 
       } else {
-        printf "  overlap %4s: %3d%% of %s hits also hit %s; %3d%% of %s hits also hit %s\n",
-                    $type, $ratio, $r2, $r1,
-                    $reverse_ratio, $r1, $r2;
+        printf "  overlap %4s: %3d%% of %s hits also hit %s; %3d%% of %s hits also hit %s%s\n",
+                    $type, $ratio, $r1, $r2,
+                    $reverse_ratio, $r2, $r1,
+                    ($is_subrule ? ' (meta rule and subrule)' : '');
       }
     }
   }
@@ -865,8 +870,7 @@
 }
 
 sub _prettify_overlap_rules {
-  my $rule = shift;
-  my $str = shift;
+  my ($rule, $str) = @_;
 
   my @rules = sort split(' ', $str);
   if ($rules{$rule} && $rules{$rule}->{type} eq 'meta') {
@@ -879,12 +883,15 @@
     @rules = grep {
       my $tmp = $_;
       $tmp =~ s/\[.*\]$//;
-      $code !~ /\b\Q${tmp}\E\b/;
+      if ($code =~ /\b\Q${tmp}\E\b/) {
+        $meta_subrule_pairs{"$rule.$tmp"} = 1;
+        0;
+      } else {
+        1;
+      }
     } @rules;
   }
-
-  my $s = join (' ', @rules);
-  return $s;
+  return join (' ', @rules);
 }
 
 sub _hmap_to_overlap_ratio {

Modified: spamassassin/branches/bug-5293-pluginized-bayes/masses/mass-check
URL: http://svn.apache.org/viewvc/spamassassin/branches/bug-5293-pluginized-bayes/masses/mass-check?rev=613117&r1=613116&r2=613117&view=diff
==============================================================================
--- spamassassin/branches/bug-5293-pluginized-bayes/masses/mass-check (original)
+++ spamassassin/branches/bug-5293-pluginized-bayes/masses/mass-check Fri Jan 18 01:57:53 2008
@@ -127,6 +127,7 @@
   options used during score generation process
   --learn=N     learn N% of messages as spam or ham
   --reuse       reuse network checks if X-Spam-Status: is present in messages
+                (note: both clients and servers in c/s mode need this)
 
   non-option arguments are used as target names (mail files and folders),
   the target format is: <class>:<format>:<location>
@@ -285,40 +286,23 @@
   exit;
 }
 
-my $user_prefs = "$opt_p/user_prefs";
-
 # either --net or --reuse means we should use set1/set3
 my $use_net_rules = $opt_net || $opt_reuse;
 
 # --lint
-# In theory we could probably use the same spamtest object as below,
-# but since it's probably not expecting that, and we don't want
-# strange things happening, create a local object.
 if ($opt_lint) {
-  my $spamlint = new Mail::SpamAssassin ({
-    'debug'              			=> $opt_debug,
-    'rules_filename'     			=> $opt_c,
-    'userprefs_filename' 			=> $user_prefs,
-    'site_rules_filename'			=> "$opt_p/local.cf",
-    'userstate_dir'     			=> "$opt_p",
-    'save_pattern_hits'  			=> $opt_loghits,
-    'dont_copy_prefs'   			=> 1,
-    'local_tests_only'   			=> $use_net_rules ? 0 : 1,
-    'only_these_rules'   			=> $opt_rules,
-    'ignore_safety_expire_timeout'		=> 1,
-    'post_config_text'                          => join("\n", @{$opt_cf})."\n",
-    PREFIX					=> '',
-    DEF_RULES_DIR        			=> $opt_c,
-    # TODO: it would be nicer for mass-check to not have to specify
-    # this, or to use the same compiler as spamassassin, sa-update etc.
-    LOCAL_RULES_DIR      			=> '',
-  });
-
+  # In theory we could probably use the same spamtest object as below,
+  # but since it's probably not expecting that, and we don't want
+  # strange things happening, create a local object.
+  my $spamlint = create_spamtest();
   $spamlint->debug_diagnostics();
   my $res = $spamlint->lint_rules();
   $spamlint->finish();
-  warn "lint: $res issues detected, please rerun with debug enabled for more information\n" if ($res);
-  exit 1 if $res;
+  if ($res) {
+    warn "lint: $res issues detected, ".
+        "please rerun with debug enabled for more information\n";
+    exit 1;
+  }
 }
 
 # test messages for the mass-check
@@ -356,25 +340,31 @@
   }
 }
 
-my $spamtest = new Mail::SpamAssassin ({
-  'debug'              			=> $opt_debug,
-  'rules_filename'     			=> $opt_c,
-  'userprefs_filename' 			=> $user_prefs,
-  'site_rules_filename'			=> "$opt_p/local.cf",
-  'userstate_dir'     			=> "$opt_p",
-  'save_pattern_hits'  			=> $opt_loghits,
-  'dont_copy_prefs'   			=> 1,
-  'local_tests_only'   			=> $use_net_rules ? 0 : 1,
-  'only_these_rules'   			=> $opt_rules,
-  'ignore_safety_expire_timeout'	=> 1,
-  'post_config_text'                    => join("\n", @{$opt_cf})."\n",
-  PREFIX				=> '',
-  DEF_RULES_DIR        			=> $opt_c,
-  LOCAL_RULES_DIR      			=> '',
-});
+my $user_prefs = "$opt_p/user_prefs";
+
+sub create_spamtest {
+  return new Mail::SpamAssassin ({
+    'debug'              		=> $opt_debug,
+    'rules_filename'     		=> $opt_c,
+    'site_rules_filename'		=> "$opt_p/local.cf",
+    'userprefs_filename'		=> $user_prefs,
+    'userstate_dir'     		=> $opt_p,
+    'save_pattern_hits'  		=> $opt_loghits,
+    'dont_copy_prefs'   		=> 1,
+    'local_tests_only'   		=> $use_net_rules ? 0 : 1,
+    'only_these_rules'   		=> $opt_rules,
+    'ignore_safety_expire_timeout'	=> 1,
+    'post_config_text'                  => join("\n", @{$opt_cf})."\n",
+    PREFIX				=> '',
+    DEF_RULES_DIR        		=> $opt_c,
+    LOCAL_RULES_DIR      		=> '',
+  });
+}
 
+my $spamtest = create_spamtest();
 $spamtest->compile_now(0);      # 0 since we will be reading more configs
 $spamtest->read_scoreonly_config("$FindBin::Bin/mass-check.cf");
+$spamtest->read_scoreonly_config($user_prefs);
 
 # generated user_prefs
 if ($opt_reuse) {

Modified: spamassassin/branches/bug-5293-pluginized-bayes/masses/rule-qa/automc/ruleqa.cgi
URL: http://svn.apache.org/viewvc/spamassassin/branches/bug-5293-pluginized-bayes/masses/rule-qa/automc/ruleqa.cgi?rev=613117&r1=613116&r2=613117&view=diff
==============================================================================
--- spamassassin/branches/bug-5293-pluginized-bayes/masses/rule-qa/automc/ruleqa.cgi (original)
+++ spamassassin/branches/bug-5293-pluginized-bayes/masses/rule-qa/automc/ruleqa.cgi Fri Jan 18 01:57:53 2008
@@ -1253,20 +1253,22 @@
   }
 
   my %uniq=();
+  my $max_x = 0;
   my $max_y = 0;
   for my $i (keys %{$chart{'spam'}}, keys %{$chart{'ham'}}) {
     next if exists $uniq{$i}; undef $uniq{$i};
     if (($chart{'spam'}{$i}||0) > $max_y) { $max_y = $chart{'spam'}{$i}; }
     if (($chart{'ham'}{$i}||0)  > $max_y) { $max_y = $chart{'ham'}{$i}; }
+    if ($i > $max_x) { $max_x = $i; }
   }
   $max_y ||= 0.001;
 
+  # ensure 0 .. $max_x are always set
+  foreach my $i (0 .. $max_x) { $uniq{$i} = undef; }
+
   my @idxes = sort { $a <=> $b } keys %uniq;
-  my $max_x;
   if (!scalar @idxes) {
     $max_x = 1; @idxes = ( 0 );
-  } else {
-    $max_x = $idxes[scalar(@idxes) - 1];
   }
   my $min_x = $idxes[0];
   
@@ -1275,10 +1277,8 @@
   my @ycoords_h = map { sprintf "%.2f", (100/$max_y) * ($chart{'ham'}{$_}||0) } @idxes;
   my @xcoords   = map { sprintf "%.2f", (100/$max_x) * $_ } @idxes;
 
-  my $thresh_x;
-  foreach my $i (@xcoords) {
-    if ($i >= 5) { $thresh_x = $i; last; }
-  }
+  my $xgrid = (100/$max_x) * 5;
+  my $ygrid = (100/$max_y) * 10;
 
   # http://code.google.com/apis/chart/ , woo
   my $chartsetup = 
@@ -1289,13 +1289,14 @@
       ."\&amp;chts=ff0000,18"
       ."\&amp;chdl=Ham|Spam"
       ."\&amp;chco=ff0000,0000ff,00ff00"
-      ."\&amp;chls=3,1,0"
-      ."\&amp;chm=V,00ff00,0,$thresh_x,1"
+      ."\&amp;chg=$xgrid,$ygrid"
       ."\&amp;chxl=0:|$min_x+points|$max_x+points|1:|0\%|$max_y\%"
       ."\&amp;chxt=x,y";
 
-  $$outref .= "<p><img src='http://chart.apis.google.com/chart?$chartsetup' 
-		width='400' height='200' align='right' /></p>\n";
+  $$outref .= "<div style='scoremap_chart'>
+       <img src='http://chart.apis.google.com/chart?$chartsetup'
+         style='scoremap_chart' width='400' height='200' align='right'
+       /></div>\n";
 }
 
 sub format_overlap {
@@ -1306,7 +1307,8 @@
   my $out_subrules = '';
 
   foreach my $line (split(/^/m, $ovl)) {
-    my $issubrule = ($line =~ /\d+\%\s+of __/);
+    my $issubrule = ($line =~ /\d+\%\s+of __/
+                    || $line =~ /\(meta rule and subrule\)/);
 
     $line =~ s{^(\s+overlap\s+(?:ham|spam):\s+\d+% )(\S.+?)$}{
         my $str = "$1";
@@ -1472,7 +1474,7 @@
     $self->{cgi_params}{$k} = "$k=$v";
     push (@{$self->{cgi_param_order}}, $k);
   }
-  $self->{q}->param(-name=>$k, -value=>$v);
+  $self->{q}->param(-name=>$k, -value=>uri_unescape($v));
 }
 
 sub get_params_except {

Modified: spamassassin/branches/bug-5293-pluginized-bayes/masses/rule-qa/automc/ruleqa.css
URL: http://svn.apache.org/viewvc/spamassassin/branches/bug-5293-pluginized-bayes/masses/rule-qa/automc/ruleqa.css?rev=613117&r1=613116&r2=613117&view=diff
==============================================================================
--- spamassassin/branches/bug-5293-pluginized-bayes/masses/rule-qa/automc/ruleqa.css (original)
+++ spamassassin/branches/bug-5293-pluginized-bayes/masses/rule-qa/automc/ruleqa.css Fri Jan 18 01:57:53 2008
@@ -1,147 +1,155 @@
+/* in SVN at
 
-    body {
-      padding: 1em 1em 1em 1em;
-    }
-    pre.freqs {
-      font-family: monospace;
-      font-size: 14px;
-      border: 1px dashed #ddb;
-      margin: 0em -0.5em 0em -0.5em;
-      padding: 10px 20px 10px 20px;
-    }
-    div.updateform {
-      border: 3px solid #aaa;
-      background: #eec;
-      margin: 0em 0em 1em 0em;
-      padding: 0em 1em 0em 2em;
-    }
-
-    p.showfreqslink {
-      color: #999;
-      font-size: 50%;
-      text-align: right;
-      margin: 0px 0px 0px 0px;
-      border: 0px 0px 0px 0px;
-    }
-    p.showfreqslink a { color: #999; }
-
-    div.headdiv {
-      border: 1px solid;
-      background: #f0f8c0;
-      margin: 0px 0px 0px 20px;
-    }
-    p.headclosep {
-      margin: 0px 0px 0px 0px;
-      border: 0px 0px 0px 0px;
-    }
-    pre.head {
-      margin-left: 10px;
-    }
-    
-    table.freqs {
-      border: 1px dashed #ddb;
-      background: #fff;
-      padding: 10px 5px 10px 5px;
-    }
-
-    tr.freqsline_promo1 td {
-      text-align: right;
-      padding: 0.1em 0.2em 0.1em 0.2em;
-    }
-    tr.freqsline_promo0 td {
-      text-align: right;
-      padding: 0.1em 0.2em 0.1em 0.2em;
-      color: #999;
-    }
-    tr.freqsline_promo0 td a { color: #999; }
-
-    a.mcloghref {
-      color: #999;
-      font-size: 50%;
-    }
-
-    h3 {
-      border: 1px solid;
-      padding: 10px 20px 10px 20px;
-      margin: 20px -20px -10px -20px;
-      background: #fe8;
-    }
-
-    td.daterevtd {
-      font-size: 75%;
-      padding: 1px 3px 1px 5px;
-    }
-
-    td.daterevcommittd {
-      font-size: 75%;
-      padding: 1px 3px 1px 5px;
-      background: #ffc;
-    }
-
-    td.mcviewing {
-      background: #7f9;
-    }
-
-    div.commitmsgdiv {
-      font-size: 75%;
-      overflow: auto;
-    }
-
-    td.daterevtdempty {
-      background: #eec;
-    }
-
-    tr.daterevtr {
-      background: #fff;
-    }
-
-    tr.daterevdesc {
-      background: #fea;
-    }
-
-    div.ui_label {
-      font-size: 75%;
-      color: #676;
-    }
-    label.ui_label {
-      font-size: 75%;
-      color: #676;
-    }
-
-
-    /* Sortable tables, see http://www.kryogenix.org/code/browser/sorttable/ */
-    table.sortable a.sortheader {
-       background: #ddd;
-       color:#666;
-       font-weight: bold;
-       text-decoration: none;
-       display: block;
-    }
-    tr.freqsheader {
-       background: #ddd;
-    }
-    table.sortable span.sortarrow {
-       color: black;
-       text-decoration: none;
-    }
-
-
-    /* mouseover data for the freqs spam% and ham% figures using CSS2.
-     * see: http://www.meyerweb.com/eric/css/edge/popups/demo.html
-     */
-    table tr td a.ftd {
-      position: relative;
-      /* relative positioning so that the span will be
-       * "absolute" positioned relative to this block */
-    }
-    table tr td a.ftd span {
-      display: none;
-    }
-    table tr td a.ftd:hover span {
-      display: block;
-      position: absolute; top: 1em; left: 0.5em;
-      padding: 5px 20px 5px 20px; margin: 10px; z-index: 100;
-      border: 1px dashed;
-      background: #ffc;
-    }
+https://svn.apache.org/repos/asf/spamassassin/trunk/masses/rule-qa/automc/ruleqa.css
+
+*/
+
+body {
+  padding: 1em 1em 1em 1em;
+}
+pre.freqs {
+  font-family: monospace;
+  font-size: 14px;
+  border: 1px dashed #ddb;
+  margin: 0em -0.5em 0em -0.5em;
+  padding: 10px 20px 10px 20px;
+}
+div.updateform {
+  border: 3px solid #aaa;
+  background: #eec;
+  margin: 0em 0em 1em 0em;
+  padding: 0em 1em 0em 2em;
+}
+
+p.showfreqslink {
+  color: #999;
+  font-size: 50%;
+  text-align: right;
+  margin: 0px 0px 0px 0px;
+  border: 0px 0px 0px 0px;
+}
+p.showfreqslink a { color: #999; }
+
+div.headdiv {
+  border: 1px solid;
+  background: #f0f8c0;
+  margin: 0px 0px 0px 20px;
+}
+p.headclosep {
+  margin: 0px 0px 0px 0px;
+  border: 0px 0px 0px 0px;
+}
+pre.head {
+  margin-left: 10px;
+}
+
+table.freqs {
+  border: 1px dashed #ddb;
+  background: #fff;
+  padding: 10px 5px 10px 5px;
+}
+
+tr.freqsline_promo1 td {
+  text-align: right;
+  padding: 0.1em 0.2em 0.1em 0.2em;
+}
+tr.freqsline_promo0 td {
+  text-align: right;
+  padding: 0.1em 0.2em 0.1em 0.2em;
+  color: #999;
+}
+tr.freqsline_promo0 td a { color: #999; }
+
+tr.freqsline_promo0:hover { background: #e0e6f6; }
+tr.freqsline_promo1:hover { background: #e0e6f6; }
+
+a.mcloghref {
+  color: #999;
+  font-size: 50%;
+}
+
+h3 {
+  border: 1px solid;
+  padding: 10px 20px 10px 20px;
+  margin: 20px -20px -10px -20px;
+  background: #fe8;
+}
+
+td.daterevtd {
+  font-size: 75%;
+  padding: 1px 3px 1px 5px;
+}
+
+td.daterevcommittd {
+  font-size: 75%;
+  padding: 1px 3px 1px 5px;
+  background: #ffc;
+}
+
+td.mcviewing {
+  background: #7f9;
+}
+
+div.commitmsgdiv {
+  font-size: 75%;
+  overflow: auto;
+}
+
+td.daterevtdempty {
+  background: #eec;
+}
+
+tr.daterevtr {
+  background: #fff;
+}
+
+tr.daterevdesc {
+  background: #fea;
+}
+
+div.ui_label {
+  font-size: 75%;
+  color: #676;
+}
+label.ui_label {
+  font-size: 75%;
+  color: #676;
+}
+
+
+/* Sortable tables, see http://www.kryogenix.org/code/browser/sorttable/ */
+table.sortable a.sortheader {
+    background: #ddd;
+    color:#666;
+    font-weight: bold;
+    text-decoration: none;
+    display: block;
+}
+tr.freqsheader {
+    background: #ddd;
+}
+table.sortable span.sortarrow {
+    color: black;
+    text-decoration: none;
+}
+
+
+/* mouseover data for the freqs spam% and ham% figures using CSS2.
+  * see: http://www.meyerweb.com/eric/css/edge/popups/demo.html
+  */
+table tr td a.ftd {
+  position: relative;
+  /* relative positioning so that the span will be
+    * "absolute" positioned relative to this block */
+}
+table tr td a.ftd span {
+  display: none;
+}
+table tr td a.ftd:hover span {
+  display: block;
+  position: absolute; top: 1em; left: 0.5em;
+  padding: 5px 20px 5px 20px; margin: 10px; z-index: 100;
+  border: 1px dashed;
+  background: #ffc;
+}
 

Modified: spamassassin/branches/bug-5293-pluginized-bayes/masses/rule-qa/corpus-hourly
URL: http://svn.apache.org/viewvc/spamassassin/branches/bug-5293-pluginized-bayes/masses/rule-qa/corpus-hourly?rev=613117&r1=613116&r2=613117&view=diff
==============================================================================
--- spamassassin/branches/bug-5293-pluginized-bayes/masses/rule-qa/corpus-hourly (original)
+++ spamassassin/branches/bug-5293-pluginized-bayes/masses/rule-qa/corpus-hourly Fri Jan 18 01:57:53 2008
@@ -22,7 +22,7 @@
 use File::Path;
 use File::Copy;
 use Time::ParseDate;
-use Cwd 'abs_path';
+use Cwd qw(abs_path);
 use POSIX qw(nice strftime);
 
 use constant WEEK => 7*60*60*24;
@@ -422,7 +422,6 @@
         return;
       }
 
-      chdir "$opt{tree}/masses" or die "cannot chdir $opt{tree}/masses";
       for my $user (sort keys %spam) {
         next unless $ham{$user};
 
@@ -431,12 +430,13 @@
         time_filter_fileset([ "$corpusdir/$spam{$user}" ],
                 "$opt{tmp}/spam.log.$$", $OLDEST_SPAM_WEEKS, undef);
 
-        open(IN, "./hit-frequencies -TxpaP $flags $opt{tmp}/spam.log.$$ $opt{tmp}/ham.log.$$ |");
-        while(<IN>) {
+        start_freqs($rev, "$flags $opt{tmp}/spam.log.$$ $opt{tmp}/ham.log.$$");
+
+        while(<FREQS>) {
           chomp;
           push @output, "$_:$user\n";
         }
-        close(IN);
+        close(FREQS);
 
         system("cat $opt{tmp}/ham.log.$$ >> $opt{tmp}/hamall.log.$$");
         system("cat $opt{tmp}/spam.log.$$ >> $opt{tmp}/spamall.log.$$");
@@ -447,12 +447,13 @@
         return;     # we'll try again later
       }
 
-      open(IN, "./hit-frequencies -TxpaP $flags $opt{tmp}/spamall.log.$$ $opt{tmp}/hamall.log.$$ |");
-      while(<IN>) {
+      start_freqs($rev, "$flags $opt{tmp}/spamall.log.$$ $opt{tmp}/hamall.log.$$");
+
+      while(<FREQS>) {
         /\s0\s+0\s+0.500\s+0.00\s+0.00\s+\(all messages\)/ and $no_msgs = 1;
         push @output, $_;
       }
-      close(IN);
+      close(FREQS);
 
       for (sort sort_all @output) { print OUT; }
     }
@@ -467,13 +468,13 @@
         time_filter_fileset(\@spam, "$opt{tmp}/spam.log.$$", $after, $before);
 
         # print out by age
-        chdir "$opt{tree}/masses" or die "cannot chdir $opt{tree}/masses";
-        open(IN, "./hit-frequencies -TxpaP $flags $opt{tmp}/spam.log.$$ $opt{tmp}/ham.log.$$ |");
-        while(<IN>) {
+        start_freqs($rev, "$flags $opt{tmp}/spam.log.$$ $opt{tmp}/ham.log.$$");
+        
+        while(<FREQS>) {
           chomp;
           push @output, "$_:$which\n";
         }
-        close(IN);
+        close(FREQS);
       }
       for (sort sort_all @output) { print OUT; }
     }
@@ -487,13 +488,13 @@
         return;     # we'll try again later
       }
 
-      chdir "$opt{tree}/masses" or die "cannot chdir $opt{tree}/masses";
-      open(IN, "./hit-frequencies -TxpaP $flags $opt{tmp}/spam.log.$$ $opt{tmp}/ham.log.$$ |");
-      while(<IN>) {
+      start_freqs($rev, "$flags $opt{tmp}/spam.log.$$ $opt{tmp}/ham.log.$$");
+
+      while(<FREQS>) {
         /\s0\s+0\s+0.500\s+0.00\s+0.00\s+\(all messages\)/ and $no_msgs = 1;
         print(OUT);
       }
-      close(IN);
+      close(FREQS);
     }
 
     $bytes = (-s OUT);
@@ -597,3 +598,32 @@
     warn "'$cmd' failed";
   }
 }
+
+sub start_freqs {
+  my ($rev, $args) = @_;
+
+  $rev ||= 'HEAD';
+  my $hfdir = "$opt{tmp}/hfdir/r$rev";
+
+  print "setting up hit-frequencies for r$rev in $hfdir\n";
+
+  (-d "$opt{tmp}/hfdir") or system("mkdir $opt{tmp}/hfdir");
+  if (!-d $hfdir) {
+    system ("cp -pr $opt{tree} $hfdir");
+    ($?>>8 != 0) and die "cp $opt{tree}/masses $hfdir failed";
+  }
+
+  chdir "$hfdir" or die "cannot chdir $hfdir";
+  system("svn up -r$rev; svn up -r$rev rulesrc");
+
+  # ensure these are rebuilt
+  system("rm -f rules/70_sandbox.cf rules/72_active.cf");
+
+  # do this twice in case Makefile.PL is rebuilt
+  system ("( make build_rules; make build_rules ) < /dev/null");
+
+  chdir "$hfdir/masses" or die "cannot chdir $hfdir/masses";
+  open (FREQS, "./hit-frequencies -TxpaP $args |")
+            or die "cannot run ./hit-frequencies $args |";
+}
+

Modified: spamassassin/branches/bug-5293-pluginized-bayes/masses/rule-qa/corpus-nightly
URL: http://svn.apache.org/viewvc/spamassassin/branches/bug-5293-pluginized-bayes/masses/rule-qa/corpus-nightly?rev=613117&r1=613116&r2=613117&view=diff
==============================================================================
--- spamassassin/branches/bug-5293-pluginized-bayes/masses/rule-qa/corpus-nightly (original)
+++ spamassassin/branches/bug-5293-pluginized-bayes/masses/rule-qa/corpus-nightly Fri Jan 18 01:57:53 2008
@@ -2,7 +2,8 @@
 
 # settings are located in $HOME/.corpus
 
-echo "Using corpus-nightly settings from $HOME/.corpus"
+echo "Using corpus-nightly settings from $HOME/.corpus at"
+date
 . $HOME/.corpus
 
 # use $PERL from env if unset