You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spamassassin.apache.org by jm...@apache.org on 2006/06/30 21:36:34 UTC

svn commit: r418365 [1/3] - in /spamassassin/branches/bug-3109-shortcircuiting: ./ build/ build/automc/ build/mkupdates/ ldap/ lib/ lib/Mail/ lib/Mail/SpamAssassin/ lib/Mail/SpamAssassin/Conf/ lib/Mail/SpamAssassin/Message/ lib/Mail/SpamAssassin/Messag...

Author: jm
Date: Fri Jun 30 12:36:31 2006
New Revision: 418365

URL: http://svn.apache.org/viewvc?rev=418365&view=rev
Log:
merge forward SVN trunk changes from r394351 to 418362; now up to date with head

Added:
    spamassassin/branches/bug-3109-shortcircuiting/lib/Mail/SpamAssassin/Plugin/URIDetail.pm
    spamassassin/branches/bug-3109-shortcircuiting/rules/v312.pre
    spamassassin/branches/bug-3109-shortcircuiting/spamc/getopt.c
    spamassassin/branches/bug-3109-shortcircuiting/spamc/getopt.h
    spamassassin/branches/bug-3109-shortcircuiting/t/data/spam/gtubedcc.eml
    spamassassin/branches/bug-3109-shortcircuiting/t/dcc.t
    spamassassin/branches/bug-3109-shortcircuiting/t/uribl.t   (with props)
Modified:
    spamassassin/branches/bug-3109-shortcircuiting/BUGS
    spamassassin/branches/bug-3109-shortcircuiting/INSTALL
    spamassassin/branches/bug-3109-shortcircuiting/INSTALL.VMS
    spamassassin/branches/bug-3109-shortcircuiting/MANIFEST
    spamassassin/branches/bug-3109-shortcircuiting/Makefile.PL
    spamassassin/branches/bug-3109-shortcircuiting/PACKAGING
    spamassassin/branches/bug-3109-shortcircuiting/README
    spamassassin/branches/bug-3109-shortcircuiting/build/automc/freqsd
    spamassassin/branches/bug-3109-shortcircuiting/build/automc/freqsd-infrequent
    spamassassin/branches/bug-3109-shortcircuiting/build/automc/populate_cor
    spamassassin/branches/bug-3109-shortcircuiting/build/automc/populate_cor_nightly
    spamassassin/branches/bug-3109-shortcircuiting/build/automc/run_nightly
    spamassassin/branches/bug-3109-shortcircuiting/build/automc/run_preflight
    spamassassin/branches/bug-3109-shortcircuiting/build/mkrules
    spamassassin/branches/bug-3109-shortcircuiting/build/mkupdates/listpromotable
    spamassassin/branches/bug-3109-shortcircuiting/build/mkupdates/run_part2
    spamassassin/branches/bug-3109-shortcircuiting/build/update_devel
    spamassassin/branches/bug-3109-shortcircuiting/ldap/README
    spamassassin/branches/bug-3109-shortcircuiting/lib/Mail/SpamAssassin.pm
    spamassassin/branches/bug-3109-shortcircuiting/lib/Mail/SpamAssassin/AICache.pm
    spamassassin/branches/bug-3109-shortcircuiting/lib/Mail/SpamAssassin/ArchiveIterator.pm
    spamassassin/branches/bug-3109-shortcircuiting/lib/Mail/SpamAssassin/Bayes.pm
    spamassassin/branches/bug-3109-shortcircuiting/lib/Mail/SpamAssassin/Client.pm
    spamassassin/branches/bug-3109-shortcircuiting/lib/Mail/SpamAssassin/Conf.pm
    spamassassin/branches/bug-3109-shortcircuiting/lib/Mail/SpamAssassin/Conf/LDAP.pm
    spamassassin/branches/bug-3109-shortcircuiting/lib/Mail/SpamAssassin/Conf/Parser.pm
    spamassassin/branches/bug-3109-shortcircuiting/lib/Mail/SpamAssassin/Conf/SQL.pm
    spamassassin/branches/bug-3109-shortcircuiting/lib/Mail/SpamAssassin/Dns.pm
    spamassassin/branches/bug-3109-shortcircuiting/lib/Mail/SpamAssassin/DnsResolver.pm
    spamassassin/branches/bug-3109-shortcircuiting/lib/Mail/SpamAssassin/EvalTests.pm
    spamassassin/branches/bug-3109-shortcircuiting/lib/Mail/SpamAssassin/Message.pm
    spamassassin/branches/bug-3109-shortcircuiting/lib/Mail/SpamAssassin/Message/Metadata/Received.pm
    spamassassin/branches/bug-3109-shortcircuiting/lib/Mail/SpamAssassin/Message/Node.pm
    spamassassin/branches/bug-3109-shortcircuiting/lib/Mail/SpamAssassin/PerMsgStatus.pm
    spamassassin/branches/bug-3109-shortcircuiting/lib/Mail/SpamAssassin/Plugin.pm
    spamassassin/branches/bug-3109-shortcircuiting/lib/Mail/SpamAssassin/Plugin/DCC.pm
    spamassassin/branches/bug-3109-shortcircuiting/lib/Mail/SpamAssassin/Plugin/Pyzor.pm
    spamassassin/branches/bug-3109-shortcircuiting/lib/Mail/SpamAssassin/Plugin/TextCat.pm
    spamassassin/branches/bug-3109-shortcircuiting/lib/Mail/SpamAssassin/PluginHandler.pm
    spamassassin/branches/bug-3109-shortcircuiting/lib/Mail/SpamAssassin/Reporter.pm
    spamassassin/branches/bug-3109-shortcircuiting/lib/Mail/SpamAssassin/Util.pm
    spamassassin/branches/bug-3109-shortcircuiting/lib/spamassassin-run.pod
    spamassassin/branches/bug-3109-shortcircuiting/masses/mass-check
    spamassassin/branches/bug-3109-shortcircuiting/masses/rule-qa/automc/RUNME.after
    spamassassin/branches/bug-3109-shortcircuiting/masses/rule-qa/automc/RUNME.before
    spamassassin/branches/bug-3109-shortcircuiting/masses/rule-qa/automc/config.example
    spamassassin/branches/bug-3109-shortcircuiting/masses/rule-qa/automc/gen_info_xml
    spamassassin/branches/bug-3109-shortcircuiting/masses/rule-qa/automc/post-comments
    spamassassin/branches/bug-3109-shortcircuiting/masses/rule-qa/automc/ruleqa.cgi
    spamassassin/branches/bug-3109-shortcircuiting/masses/rule-qa/automc/scrape-bugzilla
    spamassassin/branches/bug-3109-shortcircuiting/masses/rule-qa/corpus-hourly
    spamassassin/branches/bug-3109-shortcircuiting/masses/rule-qa/post-bugs-for-retired-tests
    spamassassin/branches/bug-3109-shortcircuiting/rules/20_body_tests.cf
    spamassassin/branches/bug-3109-shortcircuiting/rules/active.list
    spamassassin/branches/bug-3109-shortcircuiting/rules/v320.pre
    spamassassin/branches/bug-3109-shortcircuiting/sa-update.raw
    spamassassin/branches/bug-3109-shortcircuiting/spamassassin.raw
    spamassassin/branches/bug-3109-shortcircuiting/spamc/Makefile.in
    spamassassin/branches/bug-3109-shortcircuiting/spamc/config.h.in
    spamassassin/branches/bug-3109-shortcircuiting/spamc/configure
    spamassassin/branches/bug-3109-shortcircuiting/spamc/configure.in
    spamassassin/branches/bug-3109-shortcircuiting/spamc/libspamc.c
    spamassassin/branches/bug-3109-shortcircuiting/spamc/libspamc.h
    spamassassin/branches/bug-3109-shortcircuiting/spamc/qmail-spamc.c
    spamassassin/branches/bug-3109-shortcircuiting/spamc/spamc.c
    spamassassin/branches/bug-3109-shortcircuiting/spamc/spamc.pod
    spamassassin/branches/bug-3109-shortcircuiting/spamd/README
    spamassassin/branches/bug-3109-shortcircuiting/spamd/netbsd-rc-script.sh
    spamassassin/branches/bug-3109-shortcircuiting/spamd/redhat-rc-script.sh
    spamassassin/branches/bug-3109-shortcircuiting/spamd/spamd.raw
    spamassassin/branches/bug-3109-shortcircuiting/t/config.dist
    spamassassin/branches/bug-3109-shortcircuiting/t/dnsbl.t
    spamassassin/branches/bug-3109-shortcircuiting/t/gtube.t
    spamassassin/branches/bug-3109-shortcircuiting/t/mkrules.t
    spamassassin/branches/bug-3109-shortcircuiting/t/rcvd_parser.t
    spamassassin/branches/bug-3109-shortcircuiting/t/recreate.t
    spamassassin/branches/bug-3109-shortcircuiting/t/spamc_l.t
    spamassassin/branches/bug-3109-shortcircuiting/t/spf.t

Modified: spamassassin/branches/bug-3109-shortcircuiting/BUGS
URL: http://svn.apache.org/viewvc/spamassassin/branches/bug-3109-shortcircuiting/BUGS?rev=418365&r1=418364&r2=418365&view=diff
==============================================================================
--- spamassassin/branches/bug-3109-shortcircuiting/BUGS (original)
+++ spamassassin/branches/bug-3109-shortcircuiting/BUGS Fri Jun 30 12:36:31 2006
@@ -4,6 +4,6 @@
 
 - NONE!  ;)
 
-(See http://bugzilla.spamassassin.org/ for the SpamAssassin bug tracking
+(See http://issues.apache.org/SpamAssassin/ for the SpamAssassin bug tracking
 database, and to report a bug.)
 

Modified: spamassassin/branches/bug-3109-shortcircuiting/INSTALL
URL: http://svn.apache.org/viewvc/spamassassin/branches/bug-3109-shortcircuiting/INSTALL?rev=418365&r1=418364&r2=418365&view=diff
==============================================================================
--- spamassassin/branches/bug-3109-shortcircuiting/INSTALL (original)
+++ spamassassin/branches/bug-3109-shortcircuiting/INSTALL Fri Jun 30 12:36:31 2006
@@ -298,6 +298,13 @@
     more precisely.
 
 
+  - Mail::DKIM (from CPAN)
+
+    If this module is installed, and you enable the DKIM plugin,
+    SpamAssassin will perform Domain Keys Identified Mail lookups when
+    DKIM information is present in the message headers.
+
+
   - Mail::DomainKeys (from CPAN)
 
     If this module is installed, and you enable the DomainKeys plugin,

Modified: spamassassin/branches/bug-3109-shortcircuiting/INSTALL.VMS
URL: http://svn.apache.org/viewvc/spamassassin/branches/bug-3109-shortcircuiting/INSTALL.VMS?rev=418365&r1=418364&r2=418365&view=diff
==============================================================================
--- spamassassin/branches/bug-3109-shortcircuiting/INSTALL.VMS (original)
+++ spamassassin/branches/bug-3109-shortcircuiting/INSTALL.VMS Fri Jun 30 12:36:31 2006
@@ -30,6 +30,6 @@
 
   - bug 1099 in the SA Bugzilla is being used to track progress.
 
-        http://bugzilla.spamassassin.org/show_bug.cgi?id=1099
+        http://issues.apache.org/SpamAssassin/show_bug.cgi?id=1099
 
 

Modified: spamassassin/branches/bug-3109-shortcircuiting/MANIFEST
URL: http://svn.apache.org/viewvc/spamassassin/branches/bug-3109-shortcircuiting/MANIFEST?rev=418365&r1=418364&r2=418365&view=diff
==============================================================================
--- spamassassin/branches/bug-3109-shortcircuiting/MANIFEST (original)
+++ spamassassin/branches/bug-3109-shortcircuiting/MANIFEST Fri Jun 30 12:36:31 2006
@@ -86,6 +86,7 @@
 lib/Mail/SpamAssassin/Plugin/SpamCop.pm
 lib/Mail/SpamAssassin/Plugin/Test.pm
 lib/Mail/SpamAssassin/Plugin/TextCat.pm
+lib/Mail/SpamAssassin/Plugin/URIDetail.pm
 lib/Mail/SpamAssassin/Plugin/URIDNSBL.pm
 lib/Mail/SpamAssassin/Plugin/WhiteListSubject.pm
 lib/Mail/SpamAssassin/PluginHandler.pm
@@ -187,9 +188,8 @@
 spamc/libspamc.c
 spamc/libspamc.h
 spamc/qmail-spamc.c
-spamc/replace/README.getopt
-spamc/replace/getopt.c
-spamc/replace/getopt.h
+spamc/getopt.c
+spamc/getopt.h
 spamc/spamc.c
 spamc/spamc.h.in
 spamc/spamc.h.win
@@ -296,6 +296,7 @@
 t/data/spam/bsmtpnull
 t/data/spam/dnsbl.eml
 t/data/spam/gtube.eml
+t/data/spam/gtubedcc.eml
 t/data/spam/spf1
 t/data/spam/spf2
 t/data/spam/spf3
@@ -326,6 +327,7 @@
 t/db_awl_path.t
 t/db_based_whitelist.t
 t/db_based_whitelist_ips.t
+t/dcc.t
 t/debug.t
 t/desc_wrap.t
 t/dnsbl.t
@@ -361,6 +363,7 @@
 t/reportheader_8bit.t
 t/rule_multiple.t
 t/rule_names.t
+t/rule_tests.t
 t/rule_types.t
 t/sha1.t
 t/spam.t
@@ -461,6 +464,8 @@
 rules/sa-update-pubkey.txt
 rules/user_prefs.template
 rules/v310.pre
+rules/v312.pre
+rules/v320.pre
 rules/active.list
 rules/70_inactive.cf
 t/mkrules.t
@@ -476,5 +481,5 @@
 t/data/nice/dkim/Nowsp_03
 t/data/nice/dkim/Simple_02
 t/dkim.t
-rules/v320.pre
+t/uribl.t
 t/shortcircuit.t

Modified: spamassassin/branches/bug-3109-shortcircuiting/Makefile.PL
URL: http://svn.apache.org/viewvc/spamassassin/branches/bug-3109-shortcircuiting/Makefile.PL?rev=418365&r1=418364&r2=418365&view=diff
==============================================================================
--- spamassassin/branches/bug-3109-shortcircuiting/Makefile.PL (original)
+++ spamassassin/branches/bug-3109-shortcircuiting/Makefile.PL Fri Jun 30 12:36:31 2006
@@ -145,9 +145,13 @@
 }
 
 
-# Gather the rules files in the range 00-69; we do this in perl because it's more portable
-my @datafiles = map { s,^rules/,,; $_ } (<rules/*.cf>);
-my $datafiles = join(' ', (grep { /^(?:[0-6][0-9]|72)_/ } @datafiles), qw(user_prefs.template languages sa-update-pubkey.txt));
+# Gather the rules files in the range 00-69; we do this in perl because
+# it's more portable.  Also, plugin .pm files.
+my @datafiles = map { s,^rules/,,; $_ }
+                grep { -f $_ } (<rules/*.cf>, <rules/*.pm>);
+my $datafiles = join(' ', (grep 
+                { /^(?:(?:[0-6][0-9]|72)_\S+\.cf|\S+\.pm)/ } @datafiles),
+                qw(user_prefs.template languages sa-update-pubkey.txt));
 
 
 # See lib/ExtUtils/MakeMaker.pm for details of how to influence
@@ -501,7 +505,7 @@
 # If it is omitted, the value set in the current EU::MM instance is used.
 sub macro_def {
   my($name, $val) = (@_, undef);
-  my $MUST_NOT_HAPPEN = "THIS MUST NOT HAPPEN. PLEASE REPORT A BUG VIA <http://bugzilla.spamassassin.org>";
+  my $MUST_NOT_HAPPEN = "THIS MUST NOT HAPPEN. PLEASE REPORT A BUG VIA <http://issues.apache.org/SpamAssassin/>";
   die $MUST_NOT_HAPPEN  unless defined $name;
   die $MUST_NOT_HAPPEN  unless defined $EQ;
   $val = $SELF->{$name} unless defined $val;
@@ -1185,6 +1189,7 @@
 	$(PERL) -MFile::Copy -e "copy(q{rules/local.cf}, q{$(B_CONFDIR)/local.cf}) unless -f q{$(B_CONFDIR)/local.cf}"
 	$(PERL) -MFile::Copy -e "copy(q{rules/init.pre}, q{$(B_CONFDIR)/init.pre}) unless -f q{$(B_CONFDIR)/init.pre}"
 	$(PERL) -MFile::Copy -e "copy(q{rules/v310.pre}, q{$(B_CONFDIR)/v310.pre}) unless -f q{$(B_CONFDIR)/v310.pre}"
+	$(PERL) -MFile::Copy -e "copy(q{rules/v312.pre}, q{$(B_CONFDIR)/v312.pre}) unless -f q{$(B_CONFDIR)/v312.pre}"
 	$(PERL) -MFile::Copy -e "copy(q{rules/v320.pre}, q{$(B_CONFDIR)/v320.pre}) unless -f q{$(B_CONFDIR)/v320.pre}"
 
 data__install:

Modified: spamassassin/branches/bug-3109-shortcircuiting/PACKAGING
URL: http://svn.apache.org/viewvc/spamassassin/branches/bug-3109-shortcircuiting/PACKAGING?rev=418365&r1=418364&r2=418365&view=diff
==============================================================================
--- spamassassin/branches/bug-3109-shortcircuiting/PACKAGING (original)
+++ spamassassin/branches/bug-3109-shortcircuiting/PACKAGING Fri Jun 30 12:36:31 2006
@@ -348,7 +348,7 @@
 ---------
 
 [BUGZILLA] SpamAssassin bug database:
-  <http://bugzilla.spamassassin.org>
+  <http://issues.apache.org/SpamAssassin/>
 
 [DEBPERL] Debian Perl Policy, Chapter 3: Packaged Modules:
   <http://www.debian.org/doc/packaging-manuals/perl-policy/ch-module_packages.html>

Modified: spamassassin/branches/bug-3109-shortcircuiting/README
URL: http://svn.apache.org/viewvc/spamassassin/branches/bug-3109-shortcircuiting/README?rev=418365&r1=418364&r2=418365&view=diff
==============================================================================
--- spamassassin/branches/bug-3109-shortcircuiting/README (original)
+++ spamassassin/branches/bug-3109-shortcircuiting/README Fri Jun 30 12:36:31 2006
@@ -70,7 +70,7 @@
 
 	[1]: http://wiki.apache.org/spamassassin/
 	[2]: http://wiki.apache.org/spamassassin/MailingLists
-	[3]: http://bugzilla.spamassassin.org/
+	[3]: http://issues.apache.org/SpamAssassin/
 
 Please also be sure to read the man pages.
 
@@ -272,7 +272,7 @@
 distribution. Please file a bug in our Bugzilla[4], and attach your
 translations. You will, of course, be credited for this work!
 
-	[4]: http://bugzilla.spamassassin.org/
+	[4]: http://issues.apache.org/SpamAssassin/
 
 
 Disabled code

Modified: spamassassin/branches/bug-3109-shortcircuiting/build/automc/freqsd
URL: http://svn.apache.org/viewvc/spamassassin/branches/bug-3109-shortcircuiting/build/automc/freqsd?rev=418365&r1=418364&r2=418365&view=diff
==============================================================================
--- spamassassin/branches/bug-3109-shortcircuiting/build/automc/freqsd (original)
+++ spamassassin/branches/bug-3109-shortcircuiting/build/automc/freqsd Fri Jun 30 12:36:31 2006
@@ -1,4 +1,7 @@
 #!/local/perl586/bin/perl
+#
+# freqsd - perform "background" operations required by the automc
+# infrastructure, such as building indices and "hit-frequencies" reports
 
 use strict;
 use warnings;
@@ -91,6 +94,7 @@
 
 $| = 1;
 logit "freqsd starting";
+my $is_first_time = 1;
 
 if ($am_parent) {
   while (1) {
@@ -107,7 +111,18 @@
 # ---------------------------------------------------------------------------
 
 sub parent_loop {
-  my $job = $dq_fast->wait_for_queued_job($idle_sleep);
+  # I'm impatient.  many times when I have to restart this script, I want to
+  # see "faster" report results built immediately, without the 10-minute wait.
+  # So first time around, just sleep for 5 secs, so that we get started almost
+  # immediately.
+
+  my $this_sleep = $idle_sleep;
+  if ($is_first_time) {
+    $is_first_time = 0;
+    $this_sleep = 5;
+  }
+
+  my $job = $dq_fast->wait_for_queued_job($this_sleep);
 
   if ($job && $job->{metadata}->{dir}) {
     # if a dir was specified, it's always a "b" (buildbot) mass-check;
@@ -126,7 +141,6 @@
   run ("cd masses ; ./rule-qa/automc/gen_info_xml");
 
   logit "completed faster reports";
-
   if ($job) { $job->finish(); }
 }
 

Modified: spamassassin/branches/bug-3109-shortcircuiting/build/automc/freqsd-infrequent
URL: http://svn.apache.org/viewvc/spamassassin/branches/bug-3109-shortcircuiting/build/automc/freqsd-infrequent?rev=418365&r1=418364&r2=418365&view=diff
==============================================================================
--- spamassassin/branches/bug-3109-shortcircuiting/build/automc/freqsd-infrequent (original)
+++ spamassassin/branches/bug-3109-shortcircuiting/build/automc/freqsd-infrequent Fri Jun 30 12:36:31 2006
@@ -2,22 +2,22 @@
 
 # recreate the corpus link-farm; log results to a web-visible file
 
-BBMHOME=/home/bbmass
-
-(
-
-  # create links to the user-submitted corpora in the right
-  # volumes etc.
-  date
-  ./build/automc/populate_cor
-  date
-
-  # permit mass-check processes to create files in these dirs,
-  # so they can use the ArchiveIterator cache code
-  cd $BBMHOME/tmpfs/cor
-  find . -type d | xargs chmod 1777
-
-) 2>&1 | tee \
-    /var/www/buildbot.spamassassin.org/bbmass/corpus_makeup.txt
-    # note: needs to be customised for each site
+# BBMHOME=/home/bbmass
+# 
+# (
+# 
+  # # create links to the user-submitted corpora in the right
+  # # volumes etc.
+  # date
+  # ./build/automc/populate_cor
+  # date
+# 
+  # # permit mass-check processes to create files in these dirs,
+  # # so they can use the ArchiveIterator cache code
+  # cd $BBMHOME/tmpfs/cor
+  # find . -type d | xargs chmod 1777
+# 
+# ) 2>&1 | tee \
+    # /var/www/buildbot.spamassassin.org/bbmass/corpus_makeup.txt
+    # # note: needs to be customised for each site
 

Modified: spamassassin/branches/bug-3109-shortcircuiting/build/automc/populate_cor
URL: http://svn.apache.org/viewvc/spamassassin/branches/bug-3109-shortcircuiting/build/automc/populate_cor?rev=418365&r1=418364&r2=418365&view=diff
==============================================================================
--- spamassassin/branches/bug-3109-shortcircuiting/build/automc/populate_cor (original)
+++ spamassassin/branches/bug-3109-shortcircuiting/build/automc/populate_cor Fri Jun 30 12:36:31 2006
@@ -1,43 +0,0 @@
-#!/bin/sh -x
-#
-# as seen on the zone's "bbmass" mass-checking buildbot
-
-SADIR=/home/automc/svn/spamassassin
-BBMHOME=/home/bbmass
-
-[ -d $BBMHOME/tmpfs/cor ] || \
-    mkdir $BBMHOME/tmpfs/tmp $BBMHOME/tmpfs/cor
-
-cd $BBMHOME/rawcor
-date
-
-mkdir $BBMHOME/tmpfs/newcor
-
-TMPDIR=$BBMHOME/tmpfs/tmp \
-  /local/perl586/bin/perl \
-  $SADIR/masses/corpora/mk-corpus-link-farm \
-          -after="6 months ago" \
-          -dest $BBMHOME/tmpfs/newcor/mc-fast -num 1000 \
-          -dest $BBMHOME/tmpfs/newcor/mc-med -num 5000 \
-          -dest $BBMHOME/tmpfs/newcor/mc-slow -num 10000 \
-          -dest $BBMHOME/tmpfs/newcor/mc-slower -num 20000 \
-        *
-
-# remove this "out of the way" name, just in case.   for some reason
-# this seems to occasionally be un-rm'able; Solaris zones bug?
-rm -rf $BBMHOME/tmpfs/cor.old.$$
-
-# and put the new corpus linkfarm in place...
-mv $BBMHOME/tmpfs/cor $BBMHOME/tmpfs/cor.old.$$
-mv $BBMHOME/tmpfs/newcor $BBMHOME/tmpfs/cor
-
-# remove the old one (or try, at least)
-rm -rf $BBMHOME/tmpfs/cor.old.$$
-
-for dir in mc-fast mc-med mc-slow mc-slower
-do
-echo "ham:dir:$BBMHOME/tmpfs/cor/$dir/ham
-spam:dir:$BBMHOME/tmpfs/cor/$dir/spam" > $BBMHOME/tmpfs/cor/$dir/targets
-done
-
-date

Modified: spamassassin/branches/bug-3109-shortcircuiting/build/automc/populate_cor_nightly
URL: http://svn.apache.org/viewvc/spamassassin/branches/bug-3109-shortcircuiting/build/automc/populate_cor_nightly?rev=418365&r1=418364&r2=418365&view=diff
==============================================================================
--- spamassassin/branches/bug-3109-shortcircuiting/build/automc/populate_cor_nightly (original)
+++ spamassassin/branches/bug-3109-shortcircuiting/build/automc/populate_cor_nightly Fri Jun 30 12:36:31 2006
@@ -16,8 +16,7 @@
   mkdir ($TGTS_DIR) or die "cannot create $TGTS_DIR";
 }
 
-chdir ($RAWCOR_DIR) or die "cd $RAWCOR_DIR failed";
-
+chdir $RAWCOR_DIR or die "cd $RAWCOR_DIR failed";
 foreach my $submitter (<*>) {
   next unless (-d $submitter);
 
@@ -26,17 +25,8 @@
   open (OUT, ">$TGTS_DIR/targets.$submitter")
     or warn "cannot write to $TGTS_DIR/targets.$submitter";
 
-  foreach my $type (qw(ham spam))
-  {
-    foreach my $dir (<$submitter/$type/*>) {
-      next unless (-d $dir);
-      print OUT "$type:dir:$RAWCOR_DIR/$dir\n";
-    }
-    foreach my $mbox (<$submitter/$type/*.[mM][bB][oO][xX]>) {
-      next unless (-f $mbox);
-      print OUT "$type:mbox:$RAWCOR_DIR/$mbox\n";
-    }
-  }
+  print OUT "ham:detect:$RAWCOR_DIR/$submitter/ham/*\n",
+            "spam:detect:$RAWCOR_DIR/$submitter/spam/*\n";
 
   close OUT or die "write failed";
 }

Modified: spamassassin/branches/bug-3109-shortcircuiting/build/automc/run_nightly
URL: http://svn.apache.org/viewvc/spamassassin/branches/bug-3109-shortcircuiting/build/automc/run_nightly?rev=418365&r1=418364&r2=418365&view=diff
==============================================================================
--- spamassassin/branches/bug-3109-shortcircuiting/build/automc/run_nightly (original)
+++ spamassassin/branches/bug-3109-shortcircuiting/build/automc/run_nightly Fri Jun 30 12:36:31 2006
@@ -2,11 +2,14 @@
 #
 # driver for nightly mass-checks on the zone. run from cron as:
 # 10 9,10 * * * /home/automc/svn/spamassassin/build/automc/run_nightly > /var/www/buildbot.spamassassin.org/bbmass/nightly_masschecks.txt 2>&1
+#
+# details: http://wiki.apache.org/spamassassin/NightlyMassCheck
 
 # add usernames who you want to do nightlies for here:
 nightly_users="
   doc
   zmi
+  jm
 "
 
 # and where the $HOMEs are:

Modified: spamassassin/branches/bug-3109-shortcircuiting/build/automc/run_preflight
URL: http://svn.apache.org/viewvc/spamassassin/branches/bug-3109-shortcircuiting/build/automc/run_preflight?rev=418365&r1=418364&r2=418365&view=diff
==============================================================================
--- spamassassin/branches/bug-3109-shortcircuiting/build/automc/run_preflight (original)
+++ spamassassin/branches/bug-3109-shortcircuiting/build/automc/run_preflight Fri Jun 30 12:36:31 2006
@@ -11,14 +11,24 @@
   die "no perl path found in ARGV!";
 }
 
-my $slavename = "generic";
+my $slavename;
 
 $|=1;
 my $pwd = `pwd`;
-$pwd =~ /slaves\/([-_A-Za-z0-9]+)\//; if ($1) { $slavename = $1; }
+$pwd =~ /slaves\/([-_A-Za-z0-9]+)\//; if ($1) {
+  $slavename = $1;
+} else {
+  die "cannot work out slavename!  $pwd";
+}
+
+my %mass_check_args = (
 
-my $targets = "/tmpfs/cor/$1/targets";
-print "[using corpus targets file: $targets]\n";
+  'mc-fast' =>      '--tail=1000',
+  'mc-med' =>       '--tail=6000  --head=5000',
+  'mc-slow' =>      '--tail=16000 --head=10000',
+  'mc-slower' =>    '--tail=36000 --head=20000',
+
+);
 
 # super-nice please!
 #
@@ -30,13 +40,34 @@
 
 unlink ("ham.log", "spam.log");
 
-# just the sandbox rules and the timing plugin
+# just the sandbox rules, sandbox plugins, and the timing plugin
 #
 system ("rm -rf tstrules");
 run "mkdir tstrules";
-run "cp ../rules/70_sandbox.cf tstrules";
+run "cp ../rules/*.pm tstrules";
 run "cp plugins/*.* tstrules";
 
+# don't just copy; instead, transcribe while dropping score and describe lines
+# (to avoid '[26260] warn: config: warning: score set for non-existent rule
+# HTML_SHORT_LENGTH').
+## run "cp ../rules/70_sandbox.cf tstrules";
+open IN, "<../rules/70_sandbox.cf" or die "cannot read ../rules/sandbox.cf";
+open OUT, ">tstrules/70_sandbox.cf" or die "cannot write tstrules/70_sandbox.cf";
+
+while (<IN>) {
+  s/^\s+//;
+  next if /^(?:
+    score|
+    describe|
+    lang
+    )/x;
+
+  print OUT;
+}
+
+close IN or die "close failed!";
+close OUT or die "close failed!";
+
 # well, ok just those, and anything that's been mailed-in
 # 
 if (-f 'mailed.cf') {
@@ -60,16 +91,21 @@
 # notes on this mass-check command:
 #
 # this is run in a chroot jail, just in case there's hostile rule code in
-# there. limit to the most recent 30k messages of each type, as the corpora are
-# getting big.  de-encapsulate 'report_safe' messages from
-# petuniapress.com.  produce lots of noisy output to stop the buildbot from
-# timing out on mass-checks of large corpora.
+# there. 
+# de-encapsulate 'report_safe' messages from petuniapress.com.
+# produce lots of noisy output to stop the buildbot from timing out on
+# mass-checks of large corpora.
+# store AICache data in /tmpfs/aicache.
 
 run "/local/bbmasstools/masschroot $perl ".
-    "mass-check -c=tstrules --tail=30000 --cache -j=1 ".
-    "--noisy ".
-    "--deencap='petuniapress.com' ".
-    "-f $targets";
+    "mass-check -c=tstrules --cache -j=1 ".
+    "--noisy --deencap='petuniapress.com' ".
+    "--cachedir=/tmpfs/aicache ".
+    $mass_check_args{$slavename}." ".
+    "ham:detect:/home/bbmass/rawcor/*/ham/* ".
+    "spam:detect:/home/bbmass/rawcor/*/spam/*";
+
+    # TODO: add --after="6 months ago"?
 
 exit;
 

Modified: spamassassin/branches/bug-3109-shortcircuiting/build/mkrules
URL: http://svn.apache.org/viewvc/spamassassin/branches/bug-3109-shortcircuiting/build/mkrules?rev=418365&r1=418364&r2=418365&view=diff
==============================================================================
--- spamassassin/branches/bug-3109-shortcircuiting/build/mkrules (original)
+++ spamassassin/branches/bug-3109-shortcircuiting/build/mkrules Fri Jun 30 12:36:31 2006
@@ -162,6 +162,7 @@
 
 sub lint_rule_text {
   my ($text) = @_;
+
   my $mailsa = Mail::SpamAssassin->new({
       rules_filename => "./rules",
       # debug => 1,
@@ -185,6 +186,7 @@
   };
 
   $mailsa->lint_rules();
+  $mailsa->finish();
   return $errors;       # 0 means good
 }
 
@@ -348,6 +350,11 @@
       {
         $rules->{$name}->{found_definition} = 1;
       }
+      # userconf rules are always published in "active"
+      elsif (($type eq 'tflags') && ($val =~ /\buserconf\b/))
+      {
+        $rules->{$name}->{forceactive} = 1;
+      }
 
       $current_comments = '';
 
@@ -745,6 +752,7 @@
   my ($rule, $path) = @_;
   my $new;
   my $newreason;
+  my $dowarn = 0;
 
   return $rule if $opt_listpromotable;
   return $rule if $active_rules->{$rule};
@@ -764,11 +772,14 @@
     $new =~ s/_$//;
     $new = $rule.'_'.$new;
     $newreason = "collision with existing rule";
+    $dowarn = 1;
   }
 
   if (!$renamed_rules->{$new}) {
     $renamed_rules->{$new} = $rule;
-    warn "WARNING: $rule: renamed as $new due to $newreason\n";
+    if ($dowarn) {
+      warn "WARNING: $rule: renamed as $new due to $newreason\n";
+    }
   }
 
   return $new;

Modified: spamassassin/branches/bug-3109-shortcircuiting/build/mkupdates/listpromotable
URL: http://svn.apache.org/viewvc/spamassassin/branches/bug-3109-shortcircuiting/build/mkupdates/listpromotable?rev=418365&r1=418364&r2=418365&view=diff
==============================================================================
--- spamassassin/branches/bug-3109-shortcircuiting/build/mkupdates/listpromotable (original)
+++ spamassassin/branches/bug-3109-shortcircuiting/build/mkupdates/listpromotable Fri Jun 30 12:36:31 2006
@@ -21,12 +21,12 @@
 
 ###########################################################################
 
-my $cgi_url = "http://buildbot.spamassassin.org/";
+my $cgi_url = "http://ruleqa.spamassassin.org/";
 my $doc;
 my $cache = 'ruleqa.cache';
 
 if (!$FROM_CACHE || !-f $cache) {
-  my $url = $cgi_url."ruleqa?daterev=last-night";
+  my $url = $cgi_url."last-night?xml=1";
   $doc = get ($url);
   if (!$doc) {
     die "HTTP get failed: $doc\n";
@@ -42,6 +42,15 @@
 
 ###########################################################################
 
+my $submitters = '';
+if ($doc =~ m{ <td\sclass=daterevtd><b>Viewing</b></td>.*?
+                <mcsubmitters>(.*?)</mcsubmitters> }sx)
+{
+  $submitters = $1;
+}
+
+###########################################################################
+
 # <rule><test>__HIGHBITS</test><promo>0</promo>
 # <spc>8.7654</spc><hpc>0.2056</hpc><so>0.977</so>
 # <detailhref>ruleqa%3Fdaterev%3Dlast-night%26rule%3D__HIGHBITS%26s_detail%3D1</detailhref></rule>
@@ -114,7 +123,13 @@
 
 $mailsa->lint_rules();
 
-print "# active ruleset list (automatically generated from $cgi_url)\n";
+print "# active ruleset list, automatically generated from $cgi_url\n";
+print "# with results from: $submitters\n";
+
+my @spcs = ($submitters =~ /\s+/g);
+if (scalar @spcs < 1) {
+  die "not generating results; less than 2 submitter results available!\n";
+}
 
 foreach my $plistkey (sort keys %$plist) {
   my $name = $plistkey;
@@ -135,7 +150,7 @@
   # now that it's ok to have sandbox rules without a T_ prefix,
   # "T_" prefix implies "tflags nopublish"
   next if ($name =~ /^T_/);
-
+  
   # ignore rules that don't exist (if they have a desc or score,
   # they exist according to the Conf parser)
   next unless ($mailsa->{conf}->{descriptions}->{$name}
@@ -173,6 +188,7 @@
     my $pkg = $1;
     # grep out the ones we *do* have, and do use in "ifplugin"
     # lines in "rulesrc", here...
+    next;   #JMD:
     next if ($pkg =~ /${PROMOTABLE_PLUGINS_RE}/o);
     print "\n# not publishing $name: needs $ifs\n";
     $skip++;
@@ -187,18 +203,4 @@
 
   print "\n# $notes\n$name\n";
 }
-
-
-## # now write that to a tmp file so 'mkrules' can use it
-## my $tmp = new File::Temp( UNLINK => 1, SUFFIX => '.pl' );
-## print $tmp $dump;
-## 
-## my $perl = $^X;
-## if (!$perl) {
-## die "no perl path found in ARGV!";
-## }
-## 
-## # and exec that script
-## exec $perl, "build/mkrules", "--listpromotable=$tmp";
-## die "exec failed";
 

Modified: spamassassin/branches/bug-3109-shortcircuiting/build/mkupdates/run_part2
URL: http://svn.apache.org/viewvc/spamassassin/branches/bug-3109-shortcircuiting/build/mkupdates/run_part2?rev=418365&r1=418364&r2=418365&view=diff
==============================================================================
--- spamassassin/branches/bug-3109-shortcircuiting/build/mkupdates/run_part2 (original)
+++ spamassassin/branches/bug-3109-shortcircuiting/build/mkupdates/run_part2 Fri Jun 30 12:36:31 2006
@@ -54,7 +54,7 @@
 # to be honest, right now this is unused.
 version="$1"
 
-tmpdir=/home/updatesd/tmp/stage
+tmpdir=/home/updatesd/tmp/stage/$version
 rm -rf $tmpdir; mkdir -p $tmpdir         || exit $?
 
 
@@ -73,7 +73,7 @@
 
 # remove new features, unsupported in existing code in the field
 # (TODO: need a better way to exclude files that require new features
-# like this)
+# like this; judicious use of "ifplugin" may help)
 rm rules/60_shortcircuit.cf
 
 # and ensure the ruleset lints!
@@ -85,7 +85,7 @@
 
 (
   cd $rulesdir 
-  tar cf - *.cf                          || exit $?
+  tar cvf - *.cf *.pm                    || exit $?
 
 ) | gzip -9 > $tmpdir/update.tgz         || exit $?
 

Modified: spamassassin/branches/bug-3109-shortcircuiting/build/update_devel
URL: http://svn.apache.org/viewvc/spamassassin/branches/bug-3109-shortcircuiting/build/update_devel?rev=418365&r1=418364&r2=418365&view=diff
==============================================================================
--- spamassassin/branches/bug-3109-shortcircuiting/build/update_devel (original)
+++ spamassassin/branches/bug-3109-shortcircuiting/build/update_devel Fri Jun 30 12:36:31 2006
@@ -12,9 +12,6 @@
   echo 1>&2
 fi
 
-GPGSIGN=$SA_GPGSIGN
-test -x "$GPGSIGN" || GPGSIGN=$HOME/sabuildtools/bin/gpgsign
-
 DEVDIR=$WEBDIR/devel
 RELDIR=$WEBDIR/released
 mkdir -p $DEVDIR $RELDIR
@@ -61,8 +58,8 @@
   build/sha1sum.pl $DISTVNAME.$ext > $DEVDIR/$DISTVNAME.$ext.sha1 || exit $?
 
   rm -f $DISTVNAME.$ext.asc*
-  if [ -x "$GPGSIGN" ]; then
-    $GPGSIGN $DISTVNAME.$ext                           || exit $?
+  if [ -d $HOME/sabuildtools/sasigningkey ]; then
+    gpg --homedir $HOME/sabuildtools/sasigningkey -bsa $DISTVNAME.$ext || exit $?
     mv $DISTVNAME.$ext.asc $DEVDIR/$DISTVNAME.$ext.asc || exit $?
   fi
 
@@ -71,8 +68,6 @@
 
 test -f Makefile && make distclean
 rm -f $DISTVNAME.*
-
-# ( cd $DEVDIR && svn add *SpamAssassin* )
 
 chgrp -R spamassassin $DEVDIR $RELDIR
 ls -l $DEVDIR

Modified: spamassassin/branches/bug-3109-shortcircuiting/ldap/README
URL: http://svn.apache.org/viewvc/spamassassin/branches/bug-3109-shortcircuiting/ldap/README?rev=418365&r1=418364&r2=418365&view=diff
==============================================================================
--- spamassassin/branches/bug-3109-shortcircuiting/ldap/README (original)
+++ spamassassin/branches/bug-3109-shortcircuiting/ldap/README Fri Jun 30 12:36:31 2006
@@ -111,6 +111,6 @@
 ******
 
 Please send any comments to <kris at koehntopp.de> and file bugs via
-<http://bugzilla.spamassassin.org/>.
+<http://issues.apache.org/SpamAssassin/>.
 
 Kristian Köhntopp

Modified: spamassassin/branches/bug-3109-shortcircuiting/lib/Mail/SpamAssassin.pm
URL: http://svn.apache.org/viewvc/spamassassin/branches/bug-3109-shortcircuiting/lib/Mail/SpamAssassin.pm?rev=418365&r1=418364&r2=418365&view=diff
==============================================================================
--- spamassassin/branches/bug-3109-shortcircuiting/lib/Mail/SpamAssassin.pm (original)
+++ spamassassin/branches/bug-3109-shortcircuiting/lib/Mail/SpamAssassin.pm Fri Jun 30 12:36:31 2006
@@ -561,7 +561,7 @@
 
 sub finish_learner {
   my $self = shift;
-  $self->{bayes_scanner}->finish() if $self->{bayes_scanner};
+  $self->{bayes_scanner}->sanity_check_is_untied(1) if $self->{bayes_scanner};
   1;
 }
 
@@ -1365,14 +1365,15 @@
       $fname = $self->{userprefs_filename};
       $fname ||= $self->first_existing_path (@default_userprefs_path);
 
-      if (defined $fname) {
-        if (!-f $fname && !$self->{dont_copy_prefs} && !$self->create_default_prefs($fname)) {
+      if (!$self->{dont_copy_prefs}) {
+        # bug 4932: if the userprefs path doesn't exist, we need to make it, so
+        # just use the last entry in the array as the default path.
+        $fname ||= $self->sed_path($default_userprefs_path[-1]);
+
+	if (!-f $fname && !$self->create_default_prefs($fname)) {
           warn "config: failed to create default user preference file $fname\n";
         }
       }
-      else {
-	warn "config: could not find userprefs file\n";
-      }
 
       $self->{config_text} .= $self->read_cf ($fname, 'user prefs file');
     }
@@ -1480,18 +1481,17 @@
 
   $fname ||= $self->first_existing_path (@default_userstate_dir);
 
-  if (defined $fname) {
-    if (!$self->{dont_copy_prefs}) {
-      dbg("config: using \"$fname\" for user state dir");
-    }
+  # bug 4932: use the last default_userstate_dir entry if none of the others
+  # already exist
+  $fname ||= $self->sed_path($default_userstate_dir[-1]);
 
-    if (!-d $fname) {
-      # not being able to create the *dir* is not worth a warning at all times
-      eval { mkpath($fname, 0, 0700) } or dbg("config: mkdir $fname failed: $@ $!\n");
-    }
+  if (!$self->{dont_copy_prefs}) {
+    dbg("config: using \"$fname\" for user state dir");
   }
-  else {
-    warn "config: can not determine userstate dir\n";
+
+  if (!-d $fname) {
+    # not being able to create the *dir* is not worth a warning at all times
+    eval { mkpath($fname, 0, 0700) } or dbg("config: mkdir $fname failed: $@ $!\n");
   }
 
   $fname;
@@ -1840,7 +1840,7 @@
 
 =head1 BUGS
 
-See E<lt>http://bugzilla.spamassassin.org/E<gt>
+See E<lt>http://issues.apache.org/SpamAssassin/E<gt>
 
 =head1 AUTHORS
 

Modified: spamassassin/branches/bug-3109-shortcircuiting/lib/Mail/SpamAssassin/AICache.pm
URL: http://svn.apache.org/viewvc/spamassassin/branches/bug-3109-shortcircuiting/lib/Mail/SpamAssassin/AICache.pm?rev=418365&r1=418364&r2=418365&view=diff
==============================================================================
--- spamassassin/branches/bug-3109-shortcircuiting/lib/Mail/SpamAssassin/AICache.pm (original)
+++ spamassassin/branches/bug-3109-shortcircuiting/lib/Mail/SpamAssassin/AICache.pm Fri Jun 30 12:36:31 2006
@@ -35,6 +35,8 @@
 package Mail::SpamAssassin::AICache;
 
 use File::Spec;
+use File::Path;
+use File::Basename;
 
 use strict;
 use warnings;
@@ -54,17 +56,24 @@
 
   $self->{cache} = {};
   $self->{dirty} = 0;
+  $self->{prefix} ||= '/';
 
   my $use_cache = 1;
 
   if ($self->{type} eq 'dir') {
-    $self->{cache_file} = File::Spec->catdir($self->{path}, '.spamassassin_cache');
+    $self->{cache_file} = File::Spec->catdir(
+                $self->{prefix},
+                $self->{path}, '.spamassassin_cache');
+
     $self->{cache_mtime} = (stat($self->{cache_file}))[9] || 0;
   }
   else {
     my @split = File::Spec->splitpath($self->{path});
-    $self->{cache_file} = File::Spec->catdir($split[1], join('_',
-	'.spamassassin_cache', $self->{type}, $split[2]));
+    $self->{cache_file} = File::Spec->catdir(
+                $self->{prefix},
+                $split[1],
+                join('_', '.spamassassin_cache', $self->{type}, $split[2]));
+
     $self->{cache_mtime} = (stat($self->{cache_file}))[9] || 0;
 
     # for mbox and mbx, verify whether mtime on cache file is >= mtime of
@@ -122,7 +131,16 @@
   my ($self) = @_;
 
   # Cache is dirty, so write out new file
-  if ($self->{dirty}) {
+  if ($self->{dirty})
+  {
+    # create enclosing dir tree, if required
+    eval {
+      mkpath(dirname($self->{cache_file}));
+    };
+    if ($@) {
+      warn "Can't mkpath for AI cache file (".$self->{cache_file}."): $@ $!";
+    }
+
     if (open(CACHE, ">" . $self->{cache_file})) {
       while(my($k,$v) = each %{$self->{cache}}) {
 	print CACHE "$k\t$v\n";

Modified: spamassassin/branches/bug-3109-shortcircuiting/lib/Mail/SpamAssassin/ArchiveIterator.pm
URL: http://svn.apache.org/viewvc/spamassassin/branches/bug-3109-shortcircuiting/lib/Mail/SpamAssassin/ArchiveIterator.pm?rev=418365&r1=418364&r2=418365&view=diff
==============================================================================
--- spamassassin/branches/bug-3109-shortcircuiting/lib/Mail/SpamAssassin/ArchiveIterator.pm (original)
+++ spamassassin/branches/bug-3109-shortcircuiting/lib/Mail/SpamAssassin/ArchiveIterator.pm Fri Jun 30 12:36:31 2006
@@ -143,6 +143,10 @@
 Only use the last N ham and N spam (or if the value is -N, only use the last
 N total messages regardless of class).
 
+If both C<opt_head> and C<opt_tail> are specified, then the C<opt_head> value
+specifies a subset of the C<opt_tail> selection to use; in other words, the
+C<opt_tail> splice is applied first.
+
 =item opt_before
 
 Only use messages which are received after the given time_t value.
@@ -166,6 +170,12 @@
 Set to 0 (default) if you don't want to use cached information to help speed
 up ArchiveIterator.  Set to 1 to enable.
 
+=item opt_cachedir
+
+Set to the path of a directory where you wish to store cached information for
+C<opt_cache>, if you don't want to mix them with the input files (as is the
+default).  The directory must be both readable and writable.
+
 =item wanted_sub
 
 Reference to a subroutine which will process message data.  Usually
@@ -267,8 +277,9 @@
 files are individual messages, C<file> a file with a single message,
 C<mbox> an mbox formatted file, or C<mbx> for an mbx formatted directory.
 
-C<detect> can also be used; assumes C<file> for STDIN and anything that is not
-a directory, or C<directory> otherwise.
+C<detect> can also be used.  This assumes C<mbox> for any file whose path
+contains the pattern C</\.mbox/i>, C<file> for STDIN and anything that is
+not a directory, or C<directory> otherwise.
 
 =item raw_location
 
@@ -292,7 +303,7 @@
   }
 
   # non-forking model (generally sa-learn), everything in a single process
-  if ($self->{opt_j} == 0) {
+  if ($self->{opt_j} < 2) {
     my $messages;
 
     # message-array
@@ -306,8 +317,9 @@
   # forking model (generally mass-check), avoid extended memory usage
   else {
     my $tmpf;
-    ($tmpf, $self->{messageh}) = Mail::SpamAssassin::Util::secure_tmpfile();
-    unlink $tmpf;
+    ($tmpf, $self->{messageh}) = Mail::SpamAssassin::Util::secure_tmpfile()
+      or die 'archive-iterator: failed to create temp file';
+    unlink $tmpf or die "archive-iterator: unlink '$tmpf': $!";
     undef $tmpf;
 
     # forked child process scans messages
@@ -362,66 +374,53 @@
       # feed childen, make them work for it, repeat
       while ($select->count()) {
         foreach my $socket ($select->can_read()) {
-	  my $result = '';
-	  my $line;
-	  while ($line = readline $socket) {
-	    if ($line =~ /^RESULT (.+)$/) {
-	      my ($date,$class,$type) = run_index_unpack($1);
-	      #warn ">> RESULT: $class, $type, $date\n";
-
-	      if (defined $self->{opt_restart} &&
-		  ($total_count % $self->{opt_restart}) == 0)
-	      {
-	        $needs_restart = 1;
-	      }
-
-	      # if messages remain, and we don't need to restart, send message
-	      if (($MESSAGES > $total_count) && !$needs_restart) {
-	        print { $socket } $self->next_message() . "\n";
-	        $total_count++;
-	        #warn ">> recv: $MESSAGES $total_count\n";
-	      }
-	      else {
-	        # stop listening on this child since we're done with it
-	        #warn ">> removeresult: $needs_restart $MESSAGES $total_count\n";
-	        $select->remove($socket);
-	      }
-
-	      # deal with the result we received
-	      if ($result) {
-	        chop $result;	# need to chop the \n before RESULT
-	        &{$self->{result_sub}}($class, $result, $date);
-	      }
+	  my $line = $self->read_line($socket);
 
-	      last;	# this will avoid the read for this client
+          # some error happened during the read!
+          if (!defined $line) {
+            $needs_restart = 1;
+            warn "archive-iterator: readline failed, attempting to recover\n";
+            $select->remove($socket);
+          }
+	  elsif ($line =~ /^([^\0]+)\0RESULT (.+)$/s) {
+	    my $result = $1;
+	    my ($date,$class,$type) = index_unpack($2);
+	    #warn ">> RESULT: $class, $type, $date\n";
+
+	    if (defined $self->{opt_restart} && ($total_count % $self->{opt_restart}) == 0) {
+	      $needs_restart = 1;
 	    }
-	    elsif ($line eq "START\n") {
-	      if ($MESSAGES > $total_count) {
-	        # we still have messages, send one to child
-	        print { $socket } $self->next_message() . "\n";
-	        $total_count++;
-	        #warn ">> new: $MESSAGES $total_count\n";
-	      }
-	      else {
-	        # no more messages, so stop listening on this child
-	        #warn ">> removestart: $needs_restart $MESSAGES $total_count\n";
-	        $select->remove($socket);
-	      }
 
-	      last;	# this will avoid the read for this client
+	    # if messages remain, and we don't need to restart, send message
+	    if (($MESSAGES > $total_count) && !$needs_restart) {
+	      $self->send_line($socket, $self->next_message());
+	      $total_count++;
+	      #warn ">> recv: $MESSAGES $total_count\n";
 	    }
 	    else {
-	      # result line, remember it
-	      $result .= $line;
+	      # stop listening on this child since we're done with it
+	      #warn ">> removeresult: $needs_restart $MESSAGES $total_count\n";
+	      $select->remove($socket);
 	    }
-	  }
 
-          # some error happened during the read!
-          if (!defined $line || !$line) {
-            $needs_restart = 1;
-            warn "archive-iterator: readline failed, attempting to recover\n";
-            $select->remove($socket);
-          }
+	    # deal with the result we received
+	    if ($result) {
+	      &{$self->{result_sub}}($class, $result, $date);
+	    }
+	  }
+	  elsif ($line eq "START") {
+	    if ($MESSAGES > $total_count) {
+	      # we still have messages, send one to child
+	      $self->send_line($socket, $self->next_message());
+	      $total_count++;
+	      #warn ">> new: $MESSAGES $total_count\n";
+	    }
+	    else {
+	      # no more messages, so stop listening on this child
+	      #warn ">> removestart: $needs_restart $MESSAGES $total_count\n";
+	      $select->remove($socket);
+	    }
+	  }
         }
 
         #warn ">> out of loop, $MESSAGES $total_count $needs_restart ".$select->count()."\n";
@@ -458,7 +457,7 @@
 sub run_message {
   my ($self, $msg) = @_;
 
-  my ($date, $class, $format, $mail) = run_index_unpack($msg);
+  my ($date, $class, $format, $mail) = index_unpack($msg);
 
   if ($format eq 'f') {
     return $self->run_file($class, $format, $mail, $date);
@@ -581,8 +580,7 @@
 
 sub next_message {
   my ($self) = @_;
-  my $line = readline $self->{messageh};
-  chomp $line if defined $line;
+  my $line = $self->read_line($self->{messageh});
   return $line;
 }
 
@@ -621,11 +619,9 @@
       close $child->[$i];
       select($parent);
       $| = 1;	# print to parent by default, turn off buffering
-      print "START\n";
-      while ($line = readline $parent) {
-	chomp $line;
+      $self->send_line($parent,"START");
+      while ($line = $self->read_line($parent)) {
 	if ($line eq "exit") {
-	  print "END\n";
 	  close $parent;
 	  exit;
 	}
@@ -638,10 +634,10 @@
 	# the packed version if possible ...  use defined for date since
 	# it could == 0.
         if (!$self->{determine_receive_date} && $class && $format && defined $date && $where) {
-	  $line = run_index_pack($date, $class, $format, $where);
+	  $line = index_pack($date, $class, $format, $where);
         }
 
-	print "$result\nRESULT $line\n";
+	$self->send_line($parent,"$result\0RESULT $line");
       }
       exit;
     }
@@ -664,8 +660,7 @@
 
   for (my $i = 0; $i < $count; $i++) {
     #warn "debug: killing child $i (pid ",$pid->[$i],")\n";
-    print { $socket->[$i] } "exit\n"; # tell the child to die.
-    my $line = readline $socket->[$i]; # read its END statement.
+    $self->send_line($socket->[$i],"exit"); # tell the child to die.
     close $socket->[$i];
     waitpid($pid->[$i], 0); # wait for the signal ...
   }
@@ -673,17 +668,34 @@
 
 ############################################################################
 
-# 0 850852128			atime
-# 1 h				class
-# 2 m				format
-# 3 ./ham/goodmsgs.0		path
+# four bytes in network/vax format (little endian) as length of message
+# the rest is the actual message
 
-sub run_index_pack {
-  return join("\000", @_);
+sub read_line {
+  my($self, $fd) = @_;
+  my($length,$msg);
+
+  # read in the 4 byte length and unpack
+  sysread($fd, $length, 4);
+  $length = unpack("V", $length);
+#  warn "<< $$ $length\n";
+  return unless $length;
+
+  # read in the rest of the single message
+  sysread($fd, $msg, $length);
+#  warn "<< $$ $msg\n";
+  return $msg;
 }
 
-sub run_index_unpack {
-  return split(/\000/, $_[0]);
+sub send_line {
+  my $self = shift;
+  my $fd = shift;
+
+  foreach ( @_ ) {
+    my $length = pack("V", length $_);
+#    warn ">> $$ ".length($_)." $_\n";
+    syswrite($fd, $length . $_);
+  }
 }
 
 ############################################################################
@@ -724,9 +736,13 @@
 
       if ($format eq 'detect') {
 	# detect the format
-	if ($location eq '-' || !(-d $location)) {
+        if (!-d $location && $location =~ /\.mbox/i) {
+          # filename indicates mbox
+          $method = \&scan_mailbox;
+        } 
+	elsif ($location eq '-' || !(-d $location)) {
 	  # stdin is considered a file if not passed as mbox
-	  $method = \&scan_file;
+          $method = \&scan_file;
 	}
 	else {
 	  # it's a directory
@@ -762,14 +778,14 @@
     # OPT_N == 1 means don't bother sorting on message receive date
 
     # head or tail > 0 means crop each list
-    if ($self->{opt_head} > 0) {
-      splice(@{$self->{s}}, $self->{opt_head});
-      splice(@{$self->{h}}, $self->{opt_head});
-    }
     if ($self->{opt_tail} > 0) {
       splice(@{$self->{s}}, 0, -$self->{opt_tail});
       splice(@{$self->{h}}, 0, -$self->{opt_tail});
     }
+    if ($self->{opt_head} > 0) {
+      splice(@{$self->{s}}, min ($self->{opt_head}, scalar @{$self->{s}}));
+      splice(@{$self->{h}}, min ($self->{opt_head}, scalar @{$self->{h}}));
+    }
 
     @messages = ( @{$self->{s}}, @{$self->{h}} );
     undef $self->{s};
@@ -785,14 +801,14 @@
     undef $self->{h};
 
     # head or tail > 0 means crop each list
-    if ($self->{opt_head} > 0) {
-      splice(@s, $self->{opt_head});
-      splice(@h, $self->{opt_head});
-    }
     if ($self->{opt_tail} > 0) {
       splice(@s, 0, -$self->{opt_tail});
       splice(@h, 0, -$self->{opt_tail});
     }
+    if ($self->{opt_head} > 0) {
+      splice(@s, min ($self->{opt_head}, scalar @s));
+      splice(@h, min ($self->{opt_head}, scalar @h));
+    }
 
     # interleave ordered spam and ham
     if (@s && @h) {
@@ -806,23 +822,16 @@
   }
 
   # head or tail < 0 means crop the total list, negate the value appropriately
-  if ($self->{opt_head} < 0) {
-    splice(@messages, -$self->{opt_head});
-  }
   if ($self->{opt_tail} < 0) {
     splice(@messages, 0, $self->{opt_tail});
   }
-
-  # Convert scan index format to run index format
-  # TODO: figure out a better scan index format which doesn't include newlines
-  # so readline() works (or replace readline with something else ...?)
-  foreach (@messages) {
-    $_ = run_index_pack(scan_index_unpack($_));
+  if ($self->{opt_head} < 0) {
+    splice(@messages, -$self->{opt_head});
   }
 
   # Dump out the messages to the temp file if we're using one
   if (defined $fh) {
-    print { $fh } map { "$_\n" } scalar(@messages), @messages;
+    $self->send_line($fh, scalar(@messages), @messages);
     return;
   }
 
@@ -878,11 +887,11 @@
 
 # put the date in first, big-endian packed format
 # this format lets cmp easily sort by date, then class, format, and path.
-sub scan_index_pack {
+sub index_pack {
   return pack("NAAA*", @_);
 }
 
-sub scan_index_unpack {
+sub index_unpack {
   return unpack("NAAA*", $_[0]);
 }
 
@@ -912,11 +921,7 @@
     return;
   }
 
-  if ($self->{opt_cache}) {
-    $AICache = Mail::SpamAssassin::AICache->new({	'type' => 'dir',
-      							'path' => $folder,
-					        });
-  }
+  $self->create_cache('dir', $folder);
 
   foreach my $mail (@files) {
     $self->scan_file($class, $mail);
@@ -932,7 +937,7 @@
 
   $self->bump_scan_progress();
   if (!$self->{determine_receive_date}) {
-    push(@{$self->{$class}}, scan_index_pack(AI_TIME_UNKNOWN, $class, "f", $mail));
+    push(@{$self->{$class}}, index_pack(AI_TIME_UNKNOWN, $class, "f", $mail));
     return;
   }
 
@@ -956,7 +961,7 @@
   }
 
   return if !$self->message_is_useful_by_date($date);
-  push(@{$self->{$class}}, scan_index_pack($date, $class, "f", $mail));
+  push(@{$self->{$class}}, index_pack($date, $class, "f", $mail));
 }
 
 sub scan_mailbox {
@@ -994,10 +999,9 @@
     my $info = {};
     my $count;
 
+    $self->create_cache('mbox', $file);
+
     if ($self->{opt_cache}) {
-      $AICache = Mail::SpamAssassin::AICache->new({	'type' => 'mbox',
-      							'path' => $file,
-					          });
       if ($count = $AICache->count()) {
         $info = $AICache->check();
       }
@@ -1019,7 +1023,7 @@
         my $header = $first;	# remember first line
         while (<INPUT>) {
 	  if ($in_header) {
-	    if (/^\s*$/) {
+	    if (/^$/) {
 	      $in_header = 0;
 	    }
 	    else {
@@ -1052,7 +1056,7 @@
         next if !$self->message_is_useful_by_date($v);
       }
 
-      push(@{$self->{$class}}, scan_index_pack($v, $class, "m", "$file.$k"));
+      push(@{$self->{$class}}, index_pack($v, $class, "m", "$file.$k"));
     }
 
     if (defined $AICache) {
@@ -1097,10 +1101,9 @@
     my $info = {};
     my $count;
 
+    $self->create_cache('mbx', $file);
+
     if ($self->{opt_cache}) {
-      $AICache = Mail::SpamAssassin::AICache->new({	'type' => 'mbx',
-      							'path' => $file,
-					          });
       if ($count = $AICache->count()) {
         $info = $AICache->check();
       }
@@ -1157,7 +1160,7 @@
         next if !$self->message_is_useful_by_date($v);
       }
 
-      push(@{$self->{$class}}, scan_index_pack($v, $class, "b", "$file.$k"));
+      push(@{$self->{$class}}, index_pack($v, $class, "b", "$file.$k"));
     }
 
     if (defined $AICache) {
@@ -1211,6 +1214,22 @@
   }
 }
 
+sub min {
+  return ($_[0] < $_[1] ? $_[0] : $_[1]);
+}
+
+sub create_cache {
+  my ($self, $type, $path) = @_;
+
+  if ($self->{opt_cache}) {
+    $AICache = Mail::SpamAssassin::AICache->new({
+                                    'type' => $type,
+                                    'prefix' => $self->{opt_cachedir},
+                                    'path' => $path,
+                              });
+  }
+}
+
 ############################################################################
 
 1;
@@ -1224,3 +1243,7 @@
 C<Mail::SpamAssassin>
 C<spamassassin>
 C<mass-check>
+
+=cut
+
+# vim: ts=8 sw=2 et

Modified: spamassassin/branches/bug-3109-shortcircuiting/lib/Mail/SpamAssassin/Bayes.pm
URL: http://svn.apache.org/viewvc/spamassassin/branches/bug-3109-shortcircuiting/lib/Mail/SpamAssassin/Bayes.pm?rev=418365&r1=418364&r2=418365&view=diff
==============================================================================
--- spamassassin/branches/bug-3109-shortcircuiting/lib/Mail/SpamAssassin/Bayes.pm (original)
+++ spamassassin/branches/bug-3109-shortcircuiting/lib/Mail/SpamAssassin/Bayes.pm Fri Jun 30 12:36:31 2006
@@ -167,7 +167,7 @@
 use constant MAP_HEADERS_FROMTOCC => 1;
 use constant MAP_HEADERS_USERAGENT => 1;
 
-# tweaks, see http://bugzilla.spamassassin.org/show_bug.cgi?id=3173#c26
+# tweaks, see http://issues.apache.org/SpamAssassin/show_bug.cgi?id=3173#c26
 use constant ADD_INVIZ_TOKENS_I_PREFIX => 1;
 use constant ADD_INVIZ_TOKENS_NO_PREFIX => 0;
 
@@ -262,6 +262,7 @@
   # use Carp qw(cluck); cluck "stack trace at untie";
 
   $self->{store}->untie_db();
+  delete $self->{store};
 }
 
 sub sa_die { Mail::SpamAssassin::sa_die(@_); }
@@ -270,12 +271,13 @@
 
 sub sanity_check_is_untied {
   my $self = shift;
+  my $quiet = shift;
 
   # do a sanity check here.  Wierd things happen if we remain tied
   # after compiling; for example, spamd will never see that the
   # number of messages has reached the bayes-scanning threshold.
   if ($self->{store}->db_readable()) {
-    warn "bayes: oops! still tied to bayes DBs, untying\n";
+    warn "bayes: oops! still tied to bayes DBs, untying\n" unless $quiet;
     $self->{store}->untie_db();
   }
 }

Modified: spamassassin/branches/bug-3109-shortcircuiting/lib/Mail/SpamAssassin/Client.pm
URL: http://svn.apache.org/viewvc/spamassassin/branches/bug-3109-shortcircuiting/lib/Mail/SpamAssassin/Client.pm?rev=418365&r1=418364&r2=418365&view=diff
==============================================================================
--- spamassassin/branches/bug-3109-shortcircuiting/lib/Mail/SpamAssassin/Client.pm (original)
+++ spamassassin/branches/bug-3109-shortcircuiting/lib/Mail/SpamAssassin/Client.pm Fri Jun 30 12:36:31 2006
@@ -423,7 +423,7 @@
 
   my ($version, $resp_code, $resp_msg) = $self->_parse_response_line($line);
 
-  return 0 unless ($resp_msg eq 'PONG');
+  return 0 unless ($resp_msg =~ /^PONG/);
 
   return 1;
 }

Modified: spamassassin/branches/bug-3109-shortcircuiting/lib/Mail/SpamAssassin/Conf.pm
URL: http://svn.apache.org/viewvc/spamassassin/branches/bug-3109-shortcircuiting/lib/Mail/SpamAssassin/Conf.pm?rev=418365&r1=418364&r2=418365&view=diff
==============================================================================
--- spamassassin/branches/bug-3109-shortcircuiting/lib/Mail/SpamAssassin/Conf.pm (original)
+++ spamassassin/branches/bug-3109-shortcircuiting/lib/Mail/SpamAssassin/Conf.pm Fri Jun 30 12:36:31 2006
@@ -2885,6 +2885,8 @@
                    "failed", "unavailable")
  _TESTS(,)_        tests hit separated by "," (or other separator)
  _TESTSSCORES(,)_  as above, except with scores appended (eg. AWL=-3.0,...)
+ _SUBTESTS(,)_     subtests (start with "__") hit separated by ","
+                   (or other separator)
  _DCCB_            DCC's "Brand"
  _DCCR_            DCC's results
  _PYZOR_           Pyzor results
@@ -2897,6 +2899,8 @@
  _REPORT_          terse report of tests hit (for header reports)
  _SUMMARY_         summary of tests hit for standard report (for body reports)
  _CONTACTADDRESS_  contents of the 'report_contact' setting
+ _HEADER(NAME)_    includes the value of a message header.  value is the same
+                   as is found for header rules (see elsewhere in this doc)
 
 If a tag reference uses the name of a tag which is not in this list or defined
 by a loaded plugin, the reference will be left intact and not replaced by any

Modified: spamassassin/branches/bug-3109-shortcircuiting/lib/Mail/SpamAssassin/Conf/LDAP.pm
URL: http://svn.apache.org/viewvc/spamassassin/branches/bug-3109-shortcircuiting/lib/Mail/SpamAssassin/Conf/LDAP.pm?rev=418365&r1=418364&r2=418365&view=diff
==============================================================================
--- spamassassin/branches/bug-3109-shortcircuiting/lib/Mail/SpamAssassin/Conf/LDAP.pm (original)
+++ spamassassin/branches/bug-3109-shortcircuiting/lib/Mail/SpamAssassin/Conf/LDAP.pm Fri Jun 30 12:36:31 2006
@@ -45,7 +45,6 @@
 use strict;
 use warnings;
 use bytes;
-use Carp;
 
 use vars qw{
   @ISA

Modified: spamassassin/branches/bug-3109-shortcircuiting/lib/Mail/SpamAssassin/Conf/Parser.pm
URL: http://svn.apache.org/viewvc/spamassassin/branches/bug-3109-shortcircuiting/lib/Mail/SpamAssassin/Conf/Parser.pm?rev=418365&r1=418364&r2=418365&view=diff
==============================================================================
--- spamassassin/branches/bug-3109-shortcircuiting/lib/Mail/SpamAssassin/Conf/Parser.pm (original)
+++ spamassassin/branches/bug-3109-shortcircuiting/lib/Mail/SpamAssassin/Conf/Parser.pm Fri Jun 30 12:36:31 2006
@@ -127,7 +127,6 @@
 use strict;
 use warnings;
 use bytes;
-use Carp;
 
 use vars qw{
   @ISA
@@ -652,6 +651,8 @@
 
   $self->trace_meta_dependencies();
   $self->fix_priorities();
+
+  dbg("conf: finish parsing");
 
   while (my ($name, $text) = each %{$conf->{tests}}) {
     my $type = $conf->{test_types}->{$name};

Modified: spamassassin/branches/bug-3109-shortcircuiting/lib/Mail/SpamAssassin/Conf/SQL.pm
URL: http://svn.apache.org/viewvc/spamassassin/branches/bug-3109-shortcircuiting/lib/Mail/SpamAssassin/Conf/SQL.pm?rev=418365&r1=418364&r2=418365&view=diff
==============================================================================
--- spamassassin/branches/bug-3109-shortcircuiting/lib/Mail/SpamAssassin/Conf/SQL.pm (original)
+++ spamassassin/branches/bug-3109-shortcircuiting/lib/Mail/SpamAssassin/Conf/SQL.pm Fri Jun 30 12:36:31 2006
@@ -45,7 +45,6 @@
 use strict;
 use warnings;
 use bytes;
-use Carp;
 
 use vars qw{
   @ISA

Modified: spamassassin/branches/bug-3109-shortcircuiting/lib/Mail/SpamAssassin/Dns.pm
URL: http://svn.apache.org/viewvc/spamassassin/branches/bug-3109-shortcircuiting/lib/Mail/SpamAssassin/Dns.pm?rev=418365&r1=418364&r2=418365&view=diff
==============================================================================
--- spamassassin/branches/bug-3109-shortcircuiting/lib/Mail/SpamAssassin/Dns.pm (original)
+++ spamassassin/branches/bug-3109-shortcircuiting/lib/Mail/SpamAssassin/Dns.pm Fri Jun 30 12:36:31 2006
@@ -31,7 +31,6 @@
 use strict;
 use warnings;
 use bytes;
-use Carp;
 
 use vars qw{
   $KNOWN_BAD_DIALUP_RANGES @EXISTING_DOMAINS $IS_DNS_AVAILABLE $LAST_DNS_CHECK $VERSION

Modified: spamassassin/branches/bug-3109-shortcircuiting/lib/Mail/SpamAssassin/DnsResolver.pm
URL: http://svn.apache.org/viewvc/spamassassin/branches/bug-3109-shortcircuiting/lib/Mail/SpamAssassin/DnsResolver.pm?rev=418365&r1=418364&r2=418365&view=diff
==============================================================================
--- spamassassin/branches/bug-3109-shortcircuiting/lib/Mail/SpamAssassin/DnsResolver.pm (original)
+++ spamassassin/branches/bug-3109-shortcircuiting/lib/Mail/SpamAssassin/DnsResolver.pm Fri Jun 30 12:36:31 2006
@@ -358,10 +358,9 @@
 
   my $pkt = $self->new_dns_packet($host, $type, $class);
 
-  my $data = $pkt->data;
   $self->connect_sock_if_reqd();
-  if (!$self->{sock}->send ($pkt->data, 0)) {
-    warn "dns: sendto() failed: $@";
+  if (!defined($self->{sock}->send($pkt->data, 0))) {
+    warn "dns: sendto() failed: $!";
     return;
   }
   my $id = $self->_packet_id($pkt);

Modified: spamassassin/branches/bug-3109-shortcircuiting/lib/Mail/SpamAssassin/EvalTests.pm
URL: http://svn.apache.org/viewvc/spamassassin/branches/bug-3109-shortcircuiting/lib/Mail/SpamAssassin/EvalTests.pm?rev=418365&r1=418364&r2=418365&view=diff
==============================================================================
--- spamassassin/branches/bug-3109-shortcircuiting/lib/Mail/SpamAssassin/EvalTests.pm (original)
+++ spamassassin/branches/bug-3109-shortcircuiting/lib/Mail/SpamAssassin/EvalTests.pm Fri Jun 30 12:36:31 2006
@@ -3081,4 +3081,18 @@
   return 1 if ($self->{tvd_vertical_words} >= $min && $self->{tvd_vertical_words} < $max);
 }
 
+# came up on the users@ list, look for multipart/alternative parts which
+# include non-text parts -- skip multipart/related parts which occurs in ham
+sub check_ma_non_text {
+  my $self = shift;
+
+  foreach my $map ($self->{msg}->find_parts(qr@^multipart/alternative$@i)) {
+    foreach my $p ($map->find_parts(qr/./, 1, 0)) {
+      return 1 if ($p->{'type'} !~ m@^text/@i && $p->{'type'} !~ m@^multipart/related$@i);
+    }
+  }
+  
+  return 0;
+}
+
 1;

Modified: spamassassin/branches/bug-3109-shortcircuiting/lib/Mail/SpamAssassin/Message.pm
URL: http://svn.apache.org/viewvc/spamassassin/branches/bug-3109-shortcircuiting/lib/Mail/SpamAssassin/Message.pm?rev=418365&r1=418364&r2=418365&view=diff
==============================================================================
--- spamassassin/branches/bug-3109-shortcircuiting/lib/Mail/SpamAssassin/Message.pm (original)
+++ spamassassin/branches/bug-3109-shortcircuiting/lib/Mail/SpamAssassin/Message.pm Fri Jun 30 12:36:31 2006
@@ -135,14 +135,18 @@
   elsif (ref $message) {
     dbg("message: Input is a reference of unknown type!");
   }
-  else {
+  elsif (defined $message) {
     @message = split ( /^/m, $message );
   }
 
-  return $self unless @message;
-
   # Pull off mbox and mbx separators
-  if ( $message[0] =~ /^From\s/ ) {
+  # also deal with null messages
+  if (!@message) {
+    # bug 4884:
+    # if we get here, it means that the input was null, so fake the message
+    # content as a single newline...
+    @message = ("\n");
+  } elsif ($message[0] =~ /^From\s/) {
     # mbox formated mailbox
     $self->{'mbox_sep'} = shift @message;
   } elsif ($message[0] =~ MBX_SEPARATOR) {
@@ -349,13 +353,13 @@
 # objects which match.
 #
 sub find_parts {
-  my ($self, $re, $onlyleaves, $recursive) = @_;
+  my $self = shift;
 
   # ok, we need to do the parsing now...
   $self->_do_parse() if (exists $self->{'toparse'});
 
   # and pass through to the Message::Node version of the method
-  return $self->SUPER::find_parts($re, $onlyleaves, $recursive);
+  return $self->SUPER::find_parts(@_);
 }
 
 # ---------------------------------------------------------------------------

Modified: spamassassin/branches/bug-3109-shortcircuiting/lib/Mail/SpamAssassin/Message/Metadata/Received.pm
URL: http://svn.apache.org/viewvc/spamassassin/branches/bug-3109-shortcircuiting/lib/Mail/SpamAssassin/Message/Metadata/Received.pm?rev=418365&r1=418364&r2=418365&view=diff
==============================================================================
--- spamassassin/branches/bug-3109-shortcircuiting/lib/Mail/SpamAssassin/Message/Metadata/Received.pm (original)
+++ spamassassin/branches/bug-3109-shortcircuiting/lib/Mail/SpamAssassin/Message/Metadata/Received.pm Fri Jun 30 12:36:31 2006
@@ -381,7 +381,8 @@
   # Received: (qmail 84907 invoked from network); 13 Feb 2003 20:59:28 -0000
   # Received: (ofmipd 208.31.42.38); 17 Mar 2003 04:09:01 -0000
   # we don't care about this kind of gateway noise
-  if (/^\(/) { return; }
+  # Bug 4943: give /^(from/ a chance to be parsed
+  if (/^\((?!from)/) { return; }
 
   # OK -- given knowledge of most Received header formats,
   # break them down.  We have to do something like this, because
@@ -771,7 +772,7 @@
     }
 
     # Let's try to support a few qmailish formats in one;
-    # http://bugzilla.spamassassin.org/show_bug.cgi?id=2744#c14 :
+    # http://issues.apache.org/SpamAssassin/show_bug.cgi?id=2744#c14 :
     # Received: from unknown (HELO feux01a-isp) (213.199.4.210) by totor.bouissou.net with SMTP; 1 Nov 2003 07:05:19 -0000 
     # Received: from adsl-207-213-27-129.dsl.lsan03.pacbell.net (HELO merlin.net.au) (Owner50@207.213.27.129) by totor.bouissou.net with SMTP; 10 Nov 2003 06:30:34 -0000 
     if (/^from (\S+) \((?:HELO|EHLO) ([^\)]*)\) \((\S*@)?\[?(${IP_ADDRESS})\]?\).* by (\S+) /)
@@ -787,8 +788,7 @@
     if (/^from (\S+) \((\S*@)?\[?(${IP_ADDRESS})\]?\).* by (\S+) /)
     {
       $mta_looked_up_dns = 1;
-      # http://bugzilla.spamassassin.org/show_bug.cgi?id=2744 notes that
-      # if HELO == rDNS, qmail drops it.
+      # bug 2744 notes that if HELO == rDNS, qmail drops it.
       $rdns = $1; $helo = $rdns; $ident = (defined $2) ? $2 : '';
       $ip = $3; $by = $4;
       if ($ident) { $ident =~ s/\@$//; }
@@ -1004,6 +1004,15 @@
       $helo = $1; $rdns = $2; $ip = $3; $by = $4;
       $id = $5 if (defined $5);
       goto enough;
+  }
+
+  # Norton AntiVirus Gateway
+  # Received: (from localhost [24.180.47.240])
+  #  by host.name (NAVGW 2.5.2.12) with SMTP id M2006060503484615455
+  #  for <us...@domain.co.uk>; Mon, 05 Jun 2006 03:48:47 +0100
+  if (/^\(from (\S*) \[(${IP_ADDRESS})\]\) by (\S+) \(NAVGW .*?\) with /) {
+    $helo = $1; $ip = $2; $by = $3;
+    goto enough;
   }
 
   # ------------------------------------------------------------------------

Modified: spamassassin/branches/bug-3109-shortcircuiting/lib/Mail/SpamAssassin/Message/Node.pm
URL: http://svn.apache.org/viewvc/spamassassin/branches/bug-3109-shortcircuiting/lib/Mail/SpamAssassin/Message/Node.pm?rev=418365&r1=418364&r2=418365&view=diff
==============================================================================
--- spamassassin/branches/bug-3109-shortcircuiting/lib/Mail/SpamAssassin/Message/Node.pm (original)
+++ spamassassin/branches/bug-3109-shortcircuiting/lib/Mail/SpamAssassin/Message/Node.pm Fri Jun 30 12:36:31 2006
@@ -86,7 +86,8 @@
 tree (ie: parts that aren't multipart), set this to true (1).
 
 Recursive - By default, when find_parts() finds a multipart which has
-parts underneath it, it will recurse.
+parts underneath it, it will recurse through all sub-children.  If set to 0,
+only look at the part and any direct children of the part.
 
 =cut
 
@@ -101,16 +102,20 @@
   return () unless $re;
 
   $onlyleaves = 0 unless defined $onlyleaves;
-  $recursive = 1 unless defined $recursive;
+
+  my $depth;
+  if (defined $recursive && $recursive == 0) {
+    $depth = 1;
+  }
   
-  return $self->_find_parts($re, $onlyleaves, $recursive);
+  return $self->_find_parts($re, $onlyleaves, $depth);
 }
 
 # We have 2 functions in find_parts() to optimize out the penalty of
 # $onlyleaves, $re, and $recursive over and over again.
 #
 sub _find_parts {
-  my ($self, $re, $onlyleaves, $recursive) = @_;
+  my ($self, $re, $onlyleaves, $depth) = @_;
   my @ret = ();
 
   # If this object matches, mark it for return.
@@ -120,11 +125,13 @@
     push(@ret, $self);
   }
   
-  if ( $recursive && !$amialeaf ) {
+  if ( !$amialeaf && (!defined $depth || $depth > 0)) {
+    $depth-- if defined $depth;
+
     # This object is a subtree root.  Search all children.
     foreach my $parts ( @{$self->{'body_parts'}} ) {
       # Add the recursive results to our results
-      push(@ret, $parts->_find_parts($re, $onlyleaves, 1));
+      push(@ret, $parts->_find_parts($re, $onlyleaves, $depth));
     }
   }
 

Modified: spamassassin/branches/bug-3109-shortcircuiting/lib/Mail/SpamAssassin/PerMsgStatus.pm
URL: http://svn.apache.org/viewvc/spamassassin/branches/bug-3109-shortcircuiting/lib/Mail/SpamAssassin/PerMsgStatus.pm?rev=418365&r1=418364&r2=418365&view=diff
==============================================================================
--- spamassassin/branches/bug-3109-shortcircuiting/lib/Mail/SpamAssassin/PerMsgStatus.pm (original)
+++ spamassassin/branches/bug-3109-shortcircuiting/lib/Mail/SpamAssassin/PerMsgStatus.pm Fri Jun 30 12:36:31 2006
@@ -50,7 +50,6 @@
 
 use strict;
 use warnings;
-use Carp;
 
 use Mail::SpamAssassin::Constants qw(:sa);
 use Mail::SpamAssassin::EvalTests;
@@ -1028,10 +1027,11 @@
   # a tag for it (bug 4793)
   my $t;
   my $v;
-  $text =~ s{_(\w+?)(?:\((.*?)\))?_}{
-        $t = $1;
-        $v = $self->_get_tag($t,$2);
-        (defined $v) ? $v : "_".$t."_"
+  $text =~ s{(_(\w+?)(?:\((.*?)\))?_)}{
+	my $full = $1;
+        my $tag = $2;
+        my $result = $self->_get_tag($tag,$3);
+        (defined $result) ? $result : $full;
       }ge;
 
   return $text;
@@ -1108,6 +1108,8 @@
 See C<Mail::SpamAssassin::Conf>'s C<TEMPLATE TAGS> section for more details on
 how template tags are used.
 
+C<undef> will be returned if a tag by that name has not been defined.
+
 =cut
 
 sub set_tag {
@@ -1156,8 +1158,7 @@
     $score = (substr($pad, 0, $count) . $score) if $count > 0;
   }
 
-  # Do some rounding tricks to avoid the 5.0!=5.0-phenomenon,
-  # see <http://bugzilla.spamassassin.org/show_bug.cgi?id=2607>
+  # bug 2607: Do some rounding tricks to avoid the 5.0!=5.0-phenomenon,
   return $score if $self->{is_spam} or $score < $rscore;
   return $rscore - 0.1;
 }
@@ -1264,6 +1265,11 @@
               return (join($arg, sort(@{$self->{test_names_hit}})) || "none");
             },
 
+            SUBTESTS => sub {
+              my $arg = (shift || ',');
+              return (join($arg, sort(@{$self->{subtest_names_hit}})) || "none");
+            },
+
             TESTSSCORES => sub {
               my $arg = (shift || ",");
               my $line = '';
@@ -1283,6 +1289,11 @@
               return "\n" . ($self->{tag_data}->{REPORT} || "");
             },
 
+	    HEADER => sub {
+	      my $hdr = shift || return;
+	      return $self->get($hdr);
+	    },
+
           );
 
   my $data = "";
@@ -1295,7 +1306,8 @@
       $data = $data->(@_);
     }
   }
-  else {
+  # known valid tags that might not get defined in some circumstances
+  elsif ($tag !~ /^(?:BAYESTC(?:|LEARNED|SPAMMY|HAMMY)|RBL)$/) {
     return undef;
   }
   $data = "" unless defined $data;
@@ -1322,6 +1334,9 @@
 	});
 
   foreach(keys %{$self}) {
+    # TODO: we should not be explicitly deleting every key here,
+    # just the ones that need it.  This is surprisingly slow
+    # (in the top 10 measured with Devel::SmallProf)
     delete $self->{$_};
   }
 }
@@ -1626,12 +1641,15 @@
 # $_[1] is request
 # $_[2] is defval
 sub get {
-  # fill in cache entry if it is empty
-  $_[0]->{c}->{$_[1]} = _get(@_) unless exists $_[0]->{c}->{$_[1]};
-
   # return cache entry if it is defined
   return $_[0]->{c}->{$_[1]} if defined $_[0]->{c}->{$_[1]};
 
+  # fill in cache entry if it is empty
+  if (!exists $_[0]->{c}->{$_[1]}) {
+    $_[0]->{c}->{$_[1]} = _get(@_);
+    return $_[0]->{c}->{$_[1]} if defined $_[0]->{c}->{$_[1]};
+  }
+
   # if the requested header wasn't found, we should return either
   # a default value as specified by the caller, or the blank string ''
   return $_[2] || '';
@@ -2561,11 +2579,11 @@
   }
 
   my (%rule_deps, %setup_rules, %meta, $rulename);
-  my $evalstr = '
+  my $evalstr = q{
 
-    my $hit = $self->{tests_already_hit};
-  
-  ';
+    my $h = $self->{tests_already_hit};
+
+  };
 
   # Get the list of meta tests
   my @metas = keys %{ $conf->{meta_tests}->{$priority} };
@@ -2593,7 +2611,7 @@
         $meta{$rulename} .= "$token ";
       }
       else {
-        $meta{$rulename} .= "\$hit->{'$token'} ";
+        $meta{$rulename} .= "\$h->{'$token'} ";
         $setup_rules{$token}=1;
 
         # If the token is another meta rule, add it as a dependency
@@ -2604,7 +2622,7 @@
   }
 
   # avoid "undefined" warnings by providing a default value for needed rules
-  $evalstr .= join("\n", (map { "\$hit->{'$_'} ||= 0;" } keys %setup_rules), "");
+  $evalstr .= join("\n", (map { "\$h->{'$_'} ||= 0;" } keys %setup_rules), "");
 
   # Sort by length of dependencies list.  It's more likely we'll get
   # the dependencies worked out this way.
@@ -2726,28 +2744,32 @@
 
   return if (exists $self->{shortcircuit_type});
   
+  # look these up once in advance to save repeated lookups in loop below
   my $debugenabled = would_log('dbg');
+  my $scoresref = $self->{conf}->{scores};
+  my $tflagsref = $self->{conf}->{tflags};
+  my $have_start_rules = $self->{main}->have_plugin("start_rules");
+  my $have_ran_rule = $self->{main}->have_plugin("ran_rule");
 
   my $scoreset = $self->{conf}->get_score_set();
   while (my ($rulename, $test) = each %{$evalhash}) {
     last if (exists $self->{shortcircuit_type});
 
     # Score of 0, skip it.
-    next unless ($self->{conf}->{scores}->{$rulename});
+    my $score = $scoresref->{$rulename};
+    next unless $score;
 
     # If the rule is a net rule, and we're in a non-net scoreset, skip it.
-    next if (exists $self->{conf}->{tflags}->{$rulename} &&
-             (($scoreset & 1) == 0) &&
-             $self->{conf}->{tflags}->{$rulename} =~ /\bnet\b/);
+    next if ((($scoreset & 1) == 0) &&
+             $tflagsref->{$rulename} &&
+             $tflagsref->{$rulename} =~ /\bnet\b/);
 
     # If the rule is a bayes rule, and we're in a non-bayes scoreset, skip it.
-    next if (exists $self->{conf}->{tflags}->{$rulename} &&
-             (($scoreset & 2) == 0) &&
-             $self->{conf}->{tflags}->{$rulename} =~ /\bbayes\b/);
+    next if ((($scoreset & 2) == 0) &&
+             $tflagsref->{$rulename} &&
+             $tflagsref->{$rulename} =~ /\bbayes\b/);
 
-    my $score = $self->{conf}{scores}{$rulename};
     my $result;
-
     $self->{test_log_msgs} = ();        # clear test state
 
     my ($function, @args) = @{$test};
@@ -2768,11 +2790,13 @@
     # run
     $self->{current_rule_name} = $rulename;
 
-    $self->{main}->call_plugins("start_rules", { permsgstatus => $self, ruletype => "eval" });
+    if ($have_start_rules) {
+      $self->{main}->call_plugins("start_rules", { permsgstatus => $self, ruletype => "eval" });
+    }
+
     eval {
       $result = $self->$function(@args);
     };
-    $self->{main}->call_plugins("ran_rule", { permsgstatus => $self, ruletype => "eval", rulename => $rulename });
 
     if ($@) {
       warn "rules: failed to run $rulename test, skipping:\n" . "\t($@)\n";
@@ -2780,14 +2804,15 @@
       next;
     }
 
+    if ($have_ran_rule) {
+      $self->{main}->call_plugins("ran_rule", { permsgstatus => $self, ruletype => "eval", rulename => $rulename });
+    }
+
     if ($result) {
       $self->got_hit ($rulename, $prepend2desc, $result);
       dbg("rules: ran eval rule $rulename ======> got hit ($result)") if $debugenabled;
       $self->{main}->call_plugins("hit_rule", { permsgstatus => $self, ruletype => "eval", rulename => $rulename });
     }
-    #else {
-    #  dbg("rules: ran eval rule $rulename ======> no hit") if $debugenabled;
-    #}
   }
 }
 
@@ -2895,6 +2920,12 @@
     # ignore meta-match sub-rules.
     if ($rule =~ /^__/) { push(@{$self->{subtest_names_hit}}, $rule); return; }
 
+    # this should not happen; warn about it
+    if (!defined $score) {
+      warn "rules: score undef for rule '$rule' in '$area' '$desc'";
+      return;
+    }
+
     # Add the rule hit to the score
     $self->{score} += $score;
 
@@ -2999,11 +3030,11 @@
 sub get_envelope_from {
   my ($self) = @_;
   
+  # bug 2142:
   # Get the SMTP MAIL FROM:, aka. the "envelope sender", if our
   # calling app has helpfully marked up the source message
   # with it.  Various MTAs and calling apps each have their
   # own idea of what header to use for this!   see
-  # http://bugzilla.spamassassin.org/show_bug.cgi?id=2142 .
 
   my $envf;
 

Modified: spamassassin/branches/bug-3109-shortcircuiting/lib/Mail/SpamAssassin/Plugin.pm
URL: http://svn.apache.org/viewvc/spamassassin/branches/bug-3109-shortcircuiting/lib/Mail/SpamAssassin/Plugin.pm?rev=418365&r1=418364&r2=418365&view=diff
==============================================================================
--- spamassassin/branches/bug-3109-shortcircuiting/lib/Mail/SpamAssassin/Plugin.pm (original)
+++ spamassassin/branches/bug-3109-shortcircuiting/lib/Mail/SpamAssassin/Plugin.pm Fri Jun 30 12:36:31 2006
@@ -885,6 +885,6 @@
 
 http://wiki.apache.org/spamassassin/PluginWritingTips
 
-http://bugzilla.spamassassin.org/show_bug.cgi?id=2163
+http://issues.apache.org/SpamAssassin/show_bug.cgi?id=2163
 
 =cut

Modified: spamassassin/branches/bug-3109-shortcircuiting/lib/Mail/SpamAssassin/Plugin/DCC.pm
URL: http://svn.apache.org/viewvc/spamassassin/branches/bug-3109-shortcircuiting/lib/Mail/SpamAssassin/Plugin/DCC.pm?rev=418365&r1=418364&r2=418365&view=diff
==============================================================================
--- spamassassin/branches/bug-3109-shortcircuiting/lib/Mail/SpamAssassin/Plugin/DCC.pm (original)
+++ spamassassin/branches/bug-3109-shortcircuiting/lib/Mail/SpamAssassin/Plugin/DCC.pm Fri Jun 30 12:36:31 2006
@@ -97,7 +97,7 @@
     type => $Mail::SpamAssassin::Conf::CONF_TYPE_BOOL,
   });
 
-=item dcc_timeout n		(default: 5)
+=item dcc_timeout n		(default: 8)
 
 How many seconds you wait for DCC to complete, before scanning continues
 without the DCC results.
@@ -106,7 +106,7 @@
 
   push (@cmds, {
     setting => 'dcc_timeout',
-    default => 5,
+    default => 8,
     type => $Mail::SpamAssassin::Conf::CONF_TYPE_NUMERIC,
   });
 
@@ -238,14 +238,14 @@
 Specify additional options to the dccproc(8) command. Please note that only
 characters in the range [0-9A-Za-z ,._/-] are allowed for security reasons.
 
-The default is C<-R>.
+The default is C<undef>.
 
 =cut
 
   push (@cmds, {
     setting => 'dcc_options',
     is_admin => 1,
-    default => '-R',
+    default => undef,
     code => sub {
       my ($self, $key, $value, $line) = @_;
       if ($value !~ m{^([0-9A-Za-z ,._/-]+)$}) {
@@ -255,6 +255,28 @@
     }
   });
 
+=item dccifd_options options
+
+Specify additional options to send to the dccifd(8) daemon. Please note that only
+characters in the range [0-9A-Za-z ,._/-] are allowed for security reasons.
+
+The default is C<undef>.
+
+=cut
+
+  push (@cmds, {
+    setting => 'dccifd_options',
+    is_admin => 1,
+    default => undef,
+    code => sub {
+      my ($self, $key, $value, $line) = @_;
+      if ($value !~ m{^([0-9A-Za-z ,._/-]+)$}) {
+	return $Mail::SpamAssassin::Conf::INVALID_VALUE;
+      }
+      $self->{dccifd_options} = $1;
+    }
+  });
+
   $conf->{parser}->register_commands(\@cmds);
 }
 
@@ -340,10 +362,16 @@
 
   # short-circuit if there's already a X-DCC header with value of
   # "bulk" from an upstream DCC check
-  if ($permsgstatus->get('ALL') =~ /^X-DCC-(?:[^:]{1,80}-)?Metrics:.*bulk/m) {
+  if ($permsgstatus->get('ALL') =~ /^X-DCC-([^:]{1,80})?-?Metrics:.*bulk/m) {
+    $permsgstatus->{tag_data}->{DCCB} = $1;
+    $permsgstatus->{tag_data}->{DCCR} = "bulk";
     return 1;
   }
 
+  # initialize valid tags
+  $permsgstatus->{tag_data}->{DCCB} = "";
+  $permsgstatus->{tag_data}->{DCCR} = "";
+
   $self->get_dcc_interface();
   return 0 if $self->{dcc_disabled};
 
@@ -352,23 +380,34 @@
     return 0;
   }
 
+  my $client = $permsgstatus->{relays_external}->[0]->{ip};
   if ($self->{dccifd_available}) {
-    return $self->dccifd_lookup($permsgstatus, $full);
+    my $clientname = $permsgstatus->{relays_external}->[0]->{rdns};
+    my $helo = $permsgstatus->{relays_external}->[0]->{helo} || "";
+    if ($client) {
+      if ($clientname) {
+        $client = $client . "\r" . $clientname;
+      }
+    } else {
+      $client = "0.0.0.0";
+    }
+    return $self->dccifd_lookup($permsgstatus, $full, $client, $clientname, $helo);
   }
   else {
-    return $self->dccproc_lookup($permsgstatus, $full);
+    return $self->dccproc_lookup($permsgstatus, $full, $client);
   }
   return 0;
 }
 
 sub dccifd_lookup {
-  my ($self, $permsgstatus, $fulltext) = @_;
+  my ($self, $permsgstatus, $fulltext, $client, $clientname, $helo) = @_;
   my $response = "";
   my %count;
   my $left;
   my $right;
   my $timeout = $self->{main}->{conf}->{dcc_timeout};
   my $sockpath = $self->{main}->{conf}->{dcc_dccifd_path};
+  my $opts = $self->{main}->{conf}->{dccifd_options} || '';
 
   $count{body} = 0;
   $count{fuz1} = 0;
@@ -385,17 +424,7 @@
       Peer => $sockpath) || dbg("dcc: failed to open socket") && die;
 
     # send the options and other parameters to the daemon
-    my $client = $permsgstatus->{relays_external}->[0]->{ip};
-    my $clientname = $permsgstatus->{relays_external}->[0]->{rdns};
-    my $helo = $permsgstatus->{relays_external}->[0]->{helo} || "";
-    if ($client) {
-      if ($clientname) {
-        $client = $client . "\r" . $clientname;
-      }
-    } else {
-      $client = "0.0.0.0";
-    }
-    $sock->print("header\n") || dbg("dcc: failed write") && die; # options
+    $sock->print("header " . $opts . "\n") || dbg("dcc: failed write") && die; # options
     $sock->print($client . "\n") || dbg("dcc: failed write") && die; # client
     $sock->print($helo . "\n") || dbg("dcc: failed write") && die; # HELO value
     $sock->print("\n") || dbg("dcc: failed write") && die; # sender
@@ -479,7 +508,7 @@
 }
 
 sub dccproc_lookup {
-  my ($self, $permsgstatus, $fulltext) = @_;
+  my ($self, $permsgstatus, $fulltext, $client) = @_;
   my $response = undef;
   my %count;
   my $timeout = $self->{main}->{conf}->{dcc_timeout};
@@ -503,11 +532,12 @@
     my $path = Mail::SpamAssassin::Util::untaint_file_path($self->{main}->{conf}->{dcc_path});
 
     my $opts = $self->{main}->{conf}->{dcc_options} || '';
+    $opts = "-a " . $client . " " . $opts if $client;
 
-    dbg("dcc: opening pipe: " . join(' ', $path, "-H", $opts, "< $tmpf"));
+    dbg("dcc: opening pipe: " . join(' ', $path, "-H", "-x", "0", $opts, "< $tmpf"));
 
     $pid = Mail::SpamAssassin::Util::helper_app_pipe_open(*DCC,
-	$tmpf, 1, $path, "-H", split(' ', $opts));
+	$tmpf, 1, $path, "-H", "-x", "0", split(' ', $opts));
     $pid or die "$!\n";
 
     my @null = <DCC>;
@@ -606,29 +636,103 @@
 sub plugin_report {
   my ($self, $options) = @_;
 
+  return if $self->{options}->{dont_report_to_dcc};
+  $self->get_dcc_interface();
   return if $self->{dcc_disabled};
 
-  if (!defined $self->{dccproc_available}) {
-    $self->is_dccproc_available();
-  }
-
-  if ($self->{dccproc_available} && !$self->{options}->{dont_report_to_dcc}) {
+  # get the metadata from the message so we can pass the external relay information
+  $options->{msg}->extract_message_metadata($options->{report}->{main});
+  my $client = $options->{msg}->{metadata}->{relays_external}->[0]->{ip};
+  if ($self->{dccifd_available}) {
+    my $clientname = $options->{msg}->{metadata}->{relays_external}->[0]->{rdns};
+    my $helo = $options->{msg}->{metadata}->{relays_external}->[0]->{helo} || "";
+    if ($client) {
+      if ($clientname) {
+        $client = $client . "\r" . $clientname;
+      }
+    } else {
+      $client = "0.0.0.0";
+    }
+    if ($self->dccifd_report($options, $options->{text}, $client, $helo)) {
+      $options->{report}->{report_available} = 1;
+      info("reporter: spam reported to DCC");
+      $options->{report}->{report_return} = 1;
+    }
+    else {
+      info("reporter: could not report spam to DCC via dccifd");
+    }
+  } else {
     # use temporary file: open2() is unreliable due to buffering under spamd
     my $tmpf = $options->{report}->create_fulltext_tmpfile($options->{text});
-    if ($self->dcc_report($options, $tmpf)) {
+    
+    if ($self->dcc_report($options, $tmpf, $client)) {
       $options->{report}->{report_available} = 1;
       info("reporter: spam reported to DCC");
       $options->{report}->{report_return} = 1;
     }
     else {
-      info("reporter: could not report spam to DCC");
+      info("reporter: could not report spam to DCC via dccproc");
     }
     $options->{report}->delete_fulltext_tmpfile();
   }
 }
 
+sub dccifd_report {
+  my ($self, $options, $fulltext, $client, $helo) = @_;
+  my $timeout = $self->{main}->{conf}->{dcc_timeout};
+  my $sockpath = $self->{main}->{conf}->{dcc_dccifd_path};
+  my $opts = $self->{main}->{conf}->{dccifd_options} || ''; # instead of header use whatever the report option is
+
+  $options->{report}->enter_helper_run_mode();
+  my $timer = Mail::SpamAssassin::Timeout->new({ secs => $timeout });
+
+  my $err = $timer->run_and_catch(sub {
+
+    local $SIG{PIPE} = sub { die "__brokenpipe__ignore__\n" };
+
+    my $sock = IO::Socket::UNIX->new(Type => SOCK_STREAM,
+                                     Peer => $sockpath) || dbg("report: dccifd failed to open socket") && die;
+
+    # send the options and other parameters to the daemon
+    $sock->print("spam " . $opts . "\n") || dbg("report: dccifd failed write") && die; # options
+    $sock->print($client . "\n") || dbg("report: dccifd failed write") && die; # client
+    $sock->print($helo . "\n") || dbg("report: dccifd failed write") && die; # HELO value
+    $sock->print("\n") || dbg("report: dccifd failed write") && die; # sender
+    $sock->print("unknown\r\n") || dbg("report: dccifd failed write") && die; # recipients
+    $sock->print("\n") || dbg("report: dccifd failed write") && die; # recipients
+
+    $sock->print($$fulltext);
+
+    $sock->shutdown(1) || dbg("report: dccifd failed socket shutdown: $!") && die;
+
+    $sock->getline() || dbg("report: dccifd failed read status") && die;
+    $sock->getline() || dbg("report: dccifd failed read multistatus") && die;
+
+    my @ignored = $sock->getlines();
+  });
+
+  $options->{report}->leave_helper_run_mode();
+  
+  if ($timer->timed_out()) {
+    dbg("reporter: DCC report via dccifd timed out after $timeout secs.");
+    return 0;
+  }
+  
+  if ($err) {
+    chomp $err;
+    if ($err eq "__brokenpipe__ignore__") {
+      dbg("reporter: DCC report via dccifd failed: broken pipe");
+    } else {
+      warn("reporter: DCC report via dccifd failed: $err\n");
+    }
+    return 0;
+  }
+  
+  return 1;
+}
+  
 sub dcc_report {
-  my ($self, $options, $tmpf) = @_;
+  my ($self, $options, $tmpf, $client) = @_;
   my $timeout = $options->{report}->{conf}->{dcc_timeout};
 
   # note: not really tainted, this came from system configuration file
@@ -636,6 +740,9 @@
 
   my $opts = $options->{report}->{conf}->{dcc_options} || '';
 
+  # get the metadata from the message so we can pass the external relay information
+  $opts = "-a " . $client . " " . $opts if $client;
+
   my $timer = Mail::SpamAssassin::Timeout->new({ secs => $timeout });
 
   $options->{report}->enter_helper_run_mode();
@@ -643,8 +750,10 @@
 
     local $SIG{PIPE} = sub { die "__brokenpipe__ignore__\n" };
 
+    dbg("report: opening pipe: " . join(' ', $path, "-H", "-t", "many", "-x", "0", $opts, "< $tmpf"));
+
     my $pid = Mail::SpamAssassin::Util::helper_app_pipe_open(*DCC,
-        $tmpf, 1, $path, "-t", "many", split(' ', $opts));
+        $tmpf, 1, $path, "-H", "-t", "many", "-x", "0", split(' ', $opts));
     $pid or die "$!\n";
 
     my @ignored = <DCC>;
@@ -655,16 +764,16 @@
   $options->{report}->leave_helper_run_mode();
 
   if ($timer->timed_out()) {
-    dbg("reporter: DCC report timed out after $timeout seconds");
+    dbg("reporter: DCC report via dccproc timed out after $timeout seconds");
     return 0;
   }
 
   if ($err) {
     chomp $err;
     if ($err eq "__brokenpipe__ignore__") {
-      dbg("reporter: DCC report failed: broken pipe");
+      dbg("reporter: DCC report via dccproc failed: broken pipe");
     } else {
-      warn("reporter: DCC report failed: $err\n");
+      warn("reporter: DCC report via dccproc failed: $err\n");
     }
     return 0;
   }

Modified: spamassassin/branches/bug-3109-shortcircuiting/lib/Mail/SpamAssassin/Plugin/Pyzor.pm
URL: http://svn.apache.org/viewvc/spamassassin/branches/bug-3109-shortcircuiting/lib/Mail/SpamAssassin/Plugin/Pyzor.pm?rev=418365&r1=418364&r2=418365&view=diff
==============================================================================
--- spamassassin/branches/bug-3109-shortcircuiting/lib/Mail/SpamAssassin/Plugin/Pyzor.pm (original)
+++ spamassassin/branches/bug-3109-shortcircuiting/lib/Mail/SpamAssassin/Plugin/Pyzor.pm Fri Jun 30 12:36:31 2006
@@ -215,6 +215,9 @@
 sub check_pyzor {
   my ($self, $permsgstatus, $full) = @_;
 
+  # initialize valid tags
+  $permsgstatus->{tag_data}->{PYZOR} = "";
+
   $self->get_pyzor_interface();
   return 0 unless $self->{pyzor_available};