You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spamassassin.apache.org by jm...@apache.org on 2006/07/21 21:06:11 UTC

svn commit: r424421 - in /spamassassin/branches/bug-3109-shortcircuiting: ./ build/automc/ build/buildbot/ lib/Mail/ lib/Mail/SpamAssassin/ lib/Mail/SpamAssassin/Locker/ lib/Mail/SpamAssassin/Message/Metadata/ lib/Mail/SpamAssassin/Plugin/ lib/Mail/Spa...

Author: jm
Date: Fri Jul 21 12:06:09 2006
New Revision: 424421

URL: http://svn.apache.org/viewvc?rev=424421&view=rev
Log:
latest code from bug 3109, and merge up to r424414

Removed:
    spamassassin/branches/bug-3109-shortcircuiting/build/automc/populate_cor
    spamassassin/branches/bug-3109-shortcircuiting/masses/rule-qa/automc/post-comments
    spamassassin/branches/bug-3109-shortcircuiting/masses/rule-qa/automc/scrape-bugzilla
Modified:
    spamassassin/branches/bug-3109-shortcircuiting/INSTALL
    spamassassin/branches/bug-3109-shortcircuiting/MANIFEST
    spamassassin/branches/bug-3109-shortcircuiting/build/buildbot/bbmass_master.cfg
    spamassassin/branches/bug-3109-shortcircuiting/build/buildbot/master.cfg
    spamassassin/branches/bug-3109-shortcircuiting/lib/Mail/SpamAssassin.pm
    spamassassin/branches/bug-3109-shortcircuiting/lib/Mail/SpamAssassin/AICache.pm
    spamassassin/branches/bug-3109-shortcircuiting/lib/Mail/SpamAssassin/Conf.pm
    spamassassin/branches/bug-3109-shortcircuiting/lib/Mail/SpamAssassin/DBBasedAddrList.pm
    spamassassin/branches/bug-3109-shortcircuiting/lib/Mail/SpamAssassin/Dns.pm
    spamassassin/branches/bug-3109-shortcircuiting/lib/Mail/SpamAssassin/EvalTests.pm
    spamassassin/branches/bug-3109-shortcircuiting/lib/Mail/SpamAssassin/Locker/Flock.pm
    spamassassin/branches/bug-3109-shortcircuiting/lib/Mail/SpamAssassin/Locker/UnixNFSSafe.pm
    spamassassin/branches/bug-3109-shortcircuiting/lib/Mail/SpamAssassin/Message/Metadata/Received.pm
    spamassassin/branches/bug-3109-shortcircuiting/lib/Mail/SpamAssassin/PerMsgStatus.pm
    spamassassin/branches/bug-3109-shortcircuiting/lib/Mail/SpamAssassin/Plugin.pm
    spamassassin/branches/bug-3109-shortcircuiting/lib/Mail/SpamAssassin/Plugin/AWL.pm
    spamassassin/branches/bug-3109-shortcircuiting/lib/Mail/SpamAssassin/Util.pm
    spamassassin/branches/bug-3109-shortcircuiting/lib/Mail/SpamAssassin/Util/DependencyInfo.pm
    spamassassin/branches/bug-3109-shortcircuiting/procmailrc.example
    spamassassin/branches/bug-3109-shortcircuiting/rules/10_default_prefs.cf
    spamassassin/branches/bug-3109-shortcircuiting/rules/60_shortcircuit.cf
    spamassassin/branches/bug-3109-shortcircuiting/rules/active.list
    spamassassin/branches/bug-3109-shortcircuiting/rules/v320.pre
    spamassassin/branches/bug-3109-shortcircuiting/sa-learn.raw
    spamassassin/branches/bug-3109-shortcircuiting/sa-update.raw
    spamassassin/branches/bug-3109-shortcircuiting/spamc/libspamc.c
    spamassassin/branches/bug-3109-shortcircuiting/spamc/spamc.c
    spamassassin/branches/bug-3109-shortcircuiting/spamc/spamc.pod
    spamassassin/branches/bug-3109-shortcircuiting/spamd/spamd.raw
    spamassassin/branches/bug-3109-shortcircuiting/t/gtube.t
    spamassassin/branches/bug-3109-shortcircuiting/t/rcvd_parser.t
    spamassassin/branches/bug-3109-shortcircuiting/t/shortcircuit.t

Modified: spamassassin/branches/bug-3109-shortcircuiting/INSTALL
URL: http://svn.apache.org/viewvc/spamassassin/branches/bug-3109-shortcircuiting/INSTALL?rev=424421&r1=424420&r2=424421&view=diff
==============================================================================
--- spamassassin/branches/bug-3109-shortcircuiting/INSTALL (original)
+++ spamassassin/branches/bug-3109-shortcircuiting/INSTALL Fri Jul 21 12:06:09 2006
@@ -339,11 +339,13 @@
   - IO::Zlib (from CPAN)
 
     Used by sa-update to uncompress update archives.
+    Version 1.04 or later is required.
 
 
   - Archive::Tar (from CPAN)
 
     Used by sa-update to expand update archives.
+    Version 1.23 or later is required.
 
 
   - Encode::Detect (from CPAN)

Modified: spamassassin/branches/bug-3109-shortcircuiting/MANIFEST
URL: http://svn.apache.org/viewvc/spamassassin/branches/bug-3109-shortcircuiting/MANIFEST?rev=424421&r1=424420&r2=424421&view=diff
==============================================================================
--- spamassassin/branches/bug-3109-shortcircuiting/MANIFEST (original)
+++ spamassassin/branches/bug-3109-shortcircuiting/MANIFEST Fri Jul 21 12:06:09 2006
@@ -82,6 +82,7 @@
 lib/Mail/SpamAssassin/Plugin/Razor2.pm
 lib/Mail/SpamAssassin/Plugin/RelayCountry.pm
 lib/Mail/SpamAssassin/Plugin/ReplaceTags.pm
+lib/Mail/SpamAssassin/Plugin/Shortcircuit.pm
 lib/Mail/SpamAssassin/Plugin/SPF.pm
 lib/Mail/SpamAssassin/Plugin/SpamCop.pm
 lib/Mail/SpamAssassin/Plugin/Test.pm
@@ -331,6 +332,7 @@
 t/debug.t
 t/desc_wrap.t
 t/dnsbl.t
+t/get_headers.t
 t/gtube.t
 t/hashcash.t
 t/html_colors.t
@@ -340,6 +342,7 @@
 t/ip_addrs.t
 t/lang_lint.t
 t/lang_pl_tests.t
+t/lint_nocreate_prefs.t
 t/memory_cycles.t
 t/meta.t
 t/metadata.t
@@ -483,3 +486,4 @@
 t/dkim.t
 t/uribl.t
 t/shortcircuit.t
+t/spamc_y.t

Modified: spamassassin/branches/bug-3109-shortcircuiting/build/buildbot/bbmass_master.cfg
URL: http://svn.apache.org/viewvc/spamassassin/branches/bug-3109-shortcircuiting/build/buildbot/bbmass_master.cfg?rev=424421&r1=424420&r2=424421&view=diff
==============================================================================
--- spamassassin/branches/bug-3109-shortcircuiting/build/buildbot/bbmass_master.cfg (original)
+++ spamassassin/branches/bug-3109-shortcircuiting/build/buildbot/bbmass_master.cfg Fri Jul 21 12:06:09 2006
@@ -150,7 +150,7 @@
 # the Waterfall 'status' entry, but at an externally-visible host name which
 # the buildbot cannot on its own.
 
-c['buildbotURL'] = "http://spamassassin.zones.apache.org/preflight/"
+c['buildbotURL'] = "http://bbmass.spamassassin.org:8011/"
 
 # finally we define the name that the buildmaster has been waiting for.
 

Modified: spamassassin/branches/bug-3109-shortcircuiting/build/buildbot/master.cfg
URL: http://svn.apache.org/viewvc/spamassassin/branches/bug-3109-shortcircuiting/build/buildbot/master.cfg?rev=424421&r1=424420&r2=424421&view=diff
==============================================================================
--- spamassassin/branches/bug-3109-shortcircuiting/build/buildbot/master.cfg (original)
+++ spamassassin/branches/bug-3109-shortcircuiting/build/buildbot/master.cfg Fri Jul 21 12:06:09 2006
@@ -314,7 +314,7 @@
 # the Waterfall 'status' entry, but at an externally-visible host name which
 # the buildbot cannot on its own.
 
-c['buildbotURL'] = "http://spamassassin.zones.apache.org:8010/"
+c['buildbotURL'] = "http://buildbot.spamassassin.org:8010/"
 
 # finally we define the name that the buildmaster has been waiting for.
 

Modified: spamassassin/branches/bug-3109-shortcircuiting/lib/Mail/SpamAssassin.pm
URL: http://svn.apache.org/viewvc/spamassassin/branches/bug-3109-shortcircuiting/lib/Mail/SpamAssassin.pm?rev=424421&r1=424420&r2=424421&view=diff
==============================================================================
--- spamassassin/branches/bug-3109-shortcircuiting/lib/Mail/SpamAssassin.pm (original)
+++ spamassassin/branches/bug-3109-shortcircuiting/lib/Mail/SpamAssassin.pm Fri Jul 21 12:06:09 2006
@@ -1248,8 +1248,13 @@
   $self->{lint_rules} = $self->{conf}->{lint_rules} = 1;
   $self->{syntax_errors} = 0;
 
+  my $olddcp = $self->{dont_copy_prefs};
+  $self->{dont_copy_prefs} = 1;
+
   $self->init(1);
   $self->{syntax_errors} += $self->{conf}->{errors};
+
+  $self->{dont_copy_prefs} = $olddcp;       # revert back to previous
 
   my $mail = $self->parse(\@testmsg, 1);
   my $status = Mail::SpamAssassin::PerMsgStatus->new($self, $mail,

Modified: spamassassin/branches/bug-3109-shortcircuiting/lib/Mail/SpamAssassin/AICache.pm
URL: http://svn.apache.org/viewvc/spamassassin/branches/bug-3109-shortcircuiting/lib/Mail/SpamAssassin/AICache.pm?rev=424421&r1=424420&r2=424421&view=diff
==============================================================================
--- spamassassin/branches/bug-3109-shortcircuiting/lib/Mail/SpamAssassin/AICache.pm (original)
+++ spamassassin/branches/bug-3109-shortcircuiting/lib/Mail/SpamAssassin/AICache.pm Fri Jul 21 12:06:09 2006
@@ -60,10 +60,13 @@
 
   my $use_cache = 1;
 
+  # be sure to use rel2abs() here, since otherwise relative paths
+  # are broken by the prefix stuff
   if ($self->{type} eq 'dir') {
     $self->{cache_file} = File::Spec->catdir(
                 $self->{prefix},
-                $self->{path}, '.spamassassin_cache');
+                File::Spec->rel2abs($self->{path}),
+                '.spamassassin_cache');
 
     $self->{cache_mtime} = (stat($self->{cache_file}))[9] || 0;
   }
@@ -71,7 +74,7 @@
     my @split = File::Spec->splitpath($self->{path});
     $self->{cache_file} = File::Spec->catdir(
                 $self->{prefix},
-                $split[1],
+                File::Spec->rel2abs($split[1]),
                 join('_', '.spamassassin_cache', $self->{type}, $split[2]));
 
     $self->{cache_mtime} = (stat($self->{cache_file}))[9] || 0;

Modified: spamassassin/branches/bug-3109-shortcircuiting/lib/Mail/SpamAssassin/Conf.pm
URL: http://svn.apache.org/viewvc/spamassassin/branches/bug-3109-shortcircuiting/lib/Mail/SpamAssassin/Conf.pm?rev=424421&r1=424420&r2=424421&view=diff
==============================================================================
--- spamassassin/branches/bug-3109-shortcircuiting/lib/Mail/SpamAssassin/Conf.pm (original)
+++ spamassassin/branches/bug-3109-shortcircuiting/lib/Mail/SpamAssassin/Conf.pm Fri Jul 21 12:06:09 2006
@@ -43,8 +43,8 @@
 directories.
 
 The C<#> character starts a comment, which continues until end of line.
-B<NOTE:> using the C<#> character in the regular expression rules requires
-escaping.  i.e.: C<\#>
+B<NOTE:> if the C<#> character is to be used as part of a rule or
+configuration option, it must be escaped with a backslash.  i.e.: C<\#>
 
 Whitespace in the files is not significant, but please note that starting a
 line with whitespace is deprecated, as we reserve its use for multi-line rule
@@ -2218,146 +2218,6 @@
     type => $CONF_TYPE_HASH_KEY_VALUE
   });
 
-=item shortcircuit SYMBOLIC_TEST_NAME {ham|spam|on|off}
-
-Shortcircuiting a test will force all other pending rules to be skipped, if
-that test is hit.
-
-Recomended usage is to use C<priority> to set rules with strong S/O values (ie.
-1.0) to be run first, and make instant spam or ham classification based on
-that.
-
-To override a test that uses shortcircuiting, you can set the classification
-type to C<off>.
-
-=over 4
-
-=item on
-
-Shortcircuits the rest of the tests, but does not make a strict classification
-of spam or ham.  Rather, it uses the default score for the rule being
-shortcircuited.  This would allow you, for example, to define a rule such as 
-  
-=over 4
-
-  body TEST /test/
-  describe TEST test rule that scores barely over spam threshold
-  score TEST 5.5
-  priority TEST -100
-  shortcircuit TEST on
-
-=back
-
-The result of a message hitting the above rule would be a final score of 5.5,
-as opposed to 100 (default) if it were classified as spam.
-
-=item off
-
-Disables shortcircuiting on said rule.
-
-=item spam
-
-Shortcircuit the rule using a set of defaults; override the default score of
-this rule with the score from C<shortcircuit_spam_score>, set the
-C<noautolearn> tflag, and set priority to C<-100>.  In other words,
-equivalent to:
-
-=over 4
-
-  shortcircuit TEST on
-  priority TEST -100
-  score TEST 100
-  tflags TEST noautolearn
-
-=back
-
-=item ham
-
-Shortcircuit the rule using a set of defaults; override the default score of
-this rule with the score from C<shortcircuit_ham_score>, set the C<noautolearn>
-and C<nice> tflags, and set priority to C<-100>.   In other words, equivalent
-to:
-
-=over 4
-
-  shortcircuit TEST on
-  priority TEST -100
-  score TEST -100
-  tflags TEST noautolearn nice
-
-=back
-
-=back
-
-=cut
-
-  push (@cmds, {
-    setting => 'shortcircuit',
-    code => sub {
-      my ($self, $key, $value, $line) = @_;
-      my ($rule,$type);
-      unless (defined $value && $value !~ /^$/) {
-        return $MISSING_REQUIRED_VALUE;
-      }
-      if ($value =~ /^(\S+)\s+(\S+)$/) {
-        $rule=$1;
-        $type=$2;
-      } else {
-        return $INVALID_VALUE;
-      }
-
-      if ($type =~ m/^(?:spam|ham)$/) {
-        dbg("shortcircuit: adding $rule using abbreviation $type");
-
-        # set the defaults:
-        $self->{shortcircuit}->{$rule} = $type;
-        $self->{priority}->{$rule} = -100;
-
-        my $tf = $self->{tflags}->{$rule};
-        $self->{tflags}->{$rule} = ($tf ? $tf." " : "") .
-                ($type eq 'ham' ? "nice " : "") .
-                "noautolearn";
-      }
-      elsif ($type eq "on") {
-        $self->{shortcircuit}->{$rule} = "on";
-      }
-      elsif ($type eq "off") {
-        delete $self->{shortcircuit}->{$rule};
-      }
-      else {
-        return $INVALID_VALUE;
-      }
-    }
-  });
-
-=item shortcircuit_spam_score n.nn (default: 100)
-
-When shortcircuit is used on a rule, and the shortcircuit classification type
-is set to C<spam>, this value should be applied in place of the default score
-for that rule.
-
-=cut
-
-  push (@cmds, {
-    setting => 'shortcircuit_spam_score',
-    default => 100,
-    type => $CONF_TYPE_NUMERIC
-  });
-
-=item shortcircuit_ham_score n.nn (default: -100)
-
-When shortcircuit is used on a rule, and the shortcircuit classification type
-is set to C<ham>, this value should be applied in place of the default score
-for that rule.
-
-=cut
-
-  push (@cmds, {
-    setting => 'shortcircuit_ham_score',
-    default => -100,
-    type => $CONF_TYPE_NUMERIC
-  });
-
 =back
 
 =head1 ADMINISTRATOR SETTINGS
@@ -2894,9 +2754,6 @@
  _DCCR_            DCC's results
  _PYZOR_           Pyzor results
  _RBL_             full results for positive RBL queries in DNS URI format
- _SC_              shortcircuit status (classification and rule name)
- _SCRULE_          rulename that caused the shortcircuit 
- _SCTYPE_          shortcircuit classification ("spam", "ham", "default", "none")
  _LANGUAGES_       possible languages of mail
  _PREVIEW_         content preview
  _REPORT_          terse report of tests hit (for header reports)

Modified: spamassassin/branches/bug-3109-shortcircuiting/lib/Mail/SpamAssassin/DBBasedAddrList.pm
URL: http://svn.apache.org/viewvc/spamassassin/branches/bug-3109-shortcircuiting/lib/Mail/SpamAssassin/DBBasedAddrList.pm?rev=424421&r1=424420&r2=424421&view=diff
==============================================================================
--- spamassassin/branches/bug-3109-shortcircuiting/lib/Mail/SpamAssassin/DBBasedAddrList.pm (original)
+++ spamassassin/branches/bug-3109-shortcircuiting/lib/Mail/SpamAssassin/DBBasedAddrList.pm Fri Jul 21 12:06:09 2006
@@ -55,8 +55,6 @@
     'locked_file'	=> ''
   };
 
-  my $path;
-
   my @order = split (' ', $main->{conf}->{auto_whitelist_db_modules});
   my $dbm_module = Mail::SpamAssassin::Util::first_available_module (@order);
   if (!$dbm_module) {
@@ -64,43 +62,38 @@
 	$main->{conf}->{auto_whitelist_db_modules}."\n";
   }
 
-  my $umask = umask 0;
-  if(defined($main->{conf}->{auto_whitelist_path})) # if undef then don't worry -- empty hash!
-  {
-    $path = $main->sed_path ($main->{conf}->{auto_whitelist_path});
+  # if undef then don't worry -- empty hash!
+  if (defined($main->{conf}->{auto_whitelist_path})) {
+    my $path = $main->sed_path($main->{conf}->{auto_whitelist_path});
+    my ($mod1, $mod2);
 
     if ($main->{locker}->safe_lock
-			($path, 30, $main->{conf}->{auto_whitelist_file_mode}))
+                       ($path, 30, $main->{conf}->{auto_whitelist_file_mode}))
     {
       $self->{locked_file} = $path;
-      $self->{is_locked} = 1;
-      dbg("auto-whitelist: tie-ing to DB file of type $dbm_module R/W in $path");
-      tie %{$self->{accum}},$dbm_module,$path,
-		  O_RDWR|O_CREAT,   #open rw w/lock
-		  (oct ($main->{conf}->{auto_whitelist_file_mode}) & 0666)
-	 or goto failed_to_tie;
-
-    } else {
+      $self->{is_locked}   = 1;
+      ($mod1, $mod2) = ('R/W', O_RDWR | O_CREAT);
+    }
+    else {
       $self->{is_locked} = 0;
-      dbg("auto-whitelist: tie-ing to DB file of type $dbm_module R/O in $path");
-      tie %{$self->{accum}},$dbm_module,$path,
-		  O_RDONLY,         #open ro w/o lock
-		  (oct ($main->{conf}->{auto_whitelist_file_mode}) & 0666)
-	 or goto failed_to_tie;
+      ($mod1, $mod2) = ('R/O', O_RDONLY);
     }
+
+    dbg("auto-whitelist: tie-ing to DB file of type $dbm_module $mod1 in $path");
+
+    if (! tie %{ $self->{accum} }, $dbm_module, $path, $mod2,
+      oct($main->{conf}->{auto_whitelist_file_mode}) ) {
+        my $err = $!;   # might get overwritten later
+        if ($self->{is_locked}) {
+          $self->{main}->{locker}->safe_unlock($self->{locked_file});
+          $self->{is_locked} = 0;
+        }
+        die "auto-whitelist: cannot open auto_whitelist_path $path: $err\n";
+      }
   }
-  umask $umask;
 
   bless ($self, $class);
   return $self;
-
-failed_to_tie:
-  umask $umask;
-  if ($self->{is_locked}) {
-    $self->{main}->{locker}->safe_unlock ($self->{locked_file});
-    $self->{is_locked} = 0;
-  }
-  die "auto-whitelist: cannot open auto_whitelist_path $path: $!\n";
 }
 
 ###########################################################################

Modified: spamassassin/branches/bug-3109-shortcircuiting/lib/Mail/SpamAssassin/Dns.pm
URL: http://svn.apache.org/viewvc/spamassassin/branches/bug-3109-shortcircuiting/lib/Mail/SpamAssassin/Dns.pm?rev=424421&r1=424420&r2=424421&view=diff
==============================================================================
--- spamassassin/branches/bug-3109-shortcircuiting/lib/Mail/SpamAssassin/Dns.pm (original)
+++ spamassassin/branches/bug-3109-shortcircuiting/lib/Mail/SpamAssassin/Dns.pm Fri Jul 21 12:06:09 2006
@@ -206,7 +206,7 @@
   }
 
   if (!defined $self->{tests_already_hit}->{$rule}) {
-    $self->got_hit($rule, "RBL: ");
+    $self->got_hit($rule, "RBL: ", ruletype => "dnsbl");
   }
 }
 
@@ -247,7 +247,7 @@
       ++$self->{sender_host_fail} == 2)
   {
     for my $rule (@{$rules}) {
-      $self->got_hit($rule, "DNS: ");
+      $self->got_hit($rule, "DNS: ", ruletype => "dns");
     }
   }
 
@@ -308,7 +308,7 @@
       my $untainted = $1;
       $subtest = $untainted;
 
-      $self->got_hit($rule, "SenderBase: ") if !$undef && eval $subtest;
+      $self->got_hit($rule, "SenderBase: ", ruletype => "dnsbl") if !$undef && eval $subtest;
     }
     # bitmask
     elsif ($subtest =~ /^\d+$/) {

Modified: spamassassin/branches/bug-3109-shortcircuiting/lib/Mail/SpamAssassin/EvalTests.pm
URL: http://svn.apache.org/viewvc/spamassassin/branches/bug-3109-shortcircuiting/lib/Mail/SpamAssassin/EvalTests.pm?rev=424421&r1=424420&r2=424421&view=diff
==============================================================================
--- spamassassin/branches/bug-3109-shortcircuiting/lib/Mail/SpamAssassin/EvalTests.pm (original)
+++ spamassassin/branches/bug-3109-shortcircuiting/lib/Mail/SpamAssassin/EvalTests.pm Fri Jul 21 12:06:09 2006
@@ -3082,13 +3082,16 @@
 }
 
 # came up on the users@ list, look for multipart/alternative parts which
-# include non-text parts -- skip multipart/related parts which occurs in ham
+# include non-text parts -- skip certain types which occur normally in ham
 sub check_ma_non_text {
   my $self = shift;
 
   foreach my $map ($self->{msg}->find_parts(qr@^multipart/alternative$@i)) {
     foreach my $p ($map->find_parts(qr/./, 1, 0)) {
-      return 1 if ($p->{'type'} !~ m@^text/@i && $p->{'type'} !~ m@^multipart/related$@i);
+      next if (lc $p->{'type'} eq 'multipart/related');
+      next if (lc $p->{'type'} eq 'application/rtf');
+      next if ($p->{'type'} =~ m@^text/@i);
+      return 1;
     }
   }
   

Modified: spamassassin/branches/bug-3109-shortcircuiting/lib/Mail/SpamAssassin/Locker/Flock.pm
URL: http://svn.apache.org/viewvc/spamassassin/branches/bug-3109-shortcircuiting/lib/Mail/SpamAssassin/Locker/Flock.pm?rev=424421&r1=424420&r2=424421&view=diff
==============================================================================
--- spamassassin/branches/bug-3109-shortcircuiting/lib/Mail/SpamAssassin/Locker/Flock.pm (original)
+++ spamassassin/branches/bug-3109-shortcircuiting/lib/Mail/SpamAssassin/Locker/Flock.pm Fri Jul 21 12:06:09 2006
@@ -51,13 +51,14 @@
   my @stat;
 
   $max_retries ||= 30;
-  $mode ||= 0700;
+  $mode ||= 0600;
+  $mode = oct $mode if $mode =~ /^0/;   # accept number or string
 
   my $lock_file = "$path.mutex";
-  my $umask = umask (oct($mode) ^ 0700);
+  my $umask = umask(~$mode);
   my $fh = new IO::File();
 
-  if (!$fh->open ("$lock_file", O_RDWR|O_CREAT)) {
+  if (!$fh->open ($lock_file, O_RDWR|O_CREAT)) {
       umask $umask; # just in case
       die "locker: safe_lock: cannot create lockfile $lock_file: $!\n";
   }

Modified: spamassassin/branches/bug-3109-shortcircuiting/lib/Mail/SpamAssassin/Locker/UnixNFSSafe.pm
URL: http://svn.apache.org/viewvc/spamassassin/branches/bug-3109-shortcircuiting/lib/Mail/SpamAssassin/Locker/UnixNFSSafe.pm?rev=424421&r1=424420&r2=424421&view=diff
==============================================================================
--- spamassassin/branches/bug-3109-shortcircuiting/lib/Mail/SpamAssassin/Locker/UnixNFSSafe.pm (original)
+++ spamassassin/branches/bug-3109-shortcircuiting/lib/Mail/SpamAssassin/Locker/UnixNFSSafe.pm Fri Jul 21 12:06:09 2006
@@ -60,7 +60,8 @@
   my @stat;
 
   $max_retries ||= 30;
-  $mode ||= 0700;
+  $mode ||= 0600;
+  $mode = oct $mode if $mode =~ /^0/;   # accept number or string
 
   my $lock_file = "$path.lock";
   my $hname = Mail::SpamAssassin::Util::fq_hostname();
@@ -70,7 +71,7 @@
   # keep this for unlocking
   $self->{lock_tmp} = $lock_tmp;
 
-  my $umask = umask (oct($mode) ^ 0700);
+  my $umask = umask(~$mode);
   if (!open(LTMP, ">$lock_tmp")) {
       umask $umask; # just in case
       die "locker: safe_lock: cannot create tmp lockfile $lock_tmp for $lock_file: $!\n";

Modified: spamassassin/branches/bug-3109-shortcircuiting/lib/Mail/SpamAssassin/Message/Metadata/Received.pm
URL: http://svn.apache.org/viewvc/spamassassin/branches/bug-3109-shortcircuiting/lib/Mail/SpamAssassin/Message/Metadata/Received.pm?rev=424421&r1=424420&r2=424421&view=diff
==============================================================================
--- spamassassin/branches/bug-3109-shortcircuiting/lib/Mail/SpamAssassin/Message/Metadata/Received.pm (original)
+++ spamassassin/branches/bug-3109-shortcircuiting/lib/Mail/SpamAssassin/Message/Metadata/Received.pm Fri Jul 21 12:06:09 2006
@@ -414,6 +414,10 @@
   elsif (/\) by .+ \(\d{1,2}\.\d\.\d{3}(?:\.\d{1,3})?\) \(authenticated as .+\) id /) {
     $auth = 'CriticalPath';
   }
+  # Postfix 2.3 and later with "smtpd_sasl_authenticated_header yes"
+  elsif (/\)\s+\(Authenticated sender:\s+\S+\)\s+by\s+\S+\s+\(Postfix\)\s+with\s+/) {
+    $auth = 'Postfix';
+  }
 
   if (/^from /) {
     # try to catch enveloper senders

Modified: spamassassin/branches/bug-3109-shortcircuiting/lib/Mail/SpamAssassin/PerMsgStatus.pm
URL: http://svn.apache.org/viewvc/spamassassin/branches/bug-3109-shortcircuiting/lib/Mail/SpamAssassin/PerMsgStatus.pm?rev=424421&r1=424420&r2=424421&view=diff
==============================================================================
--- spamassassin/branches/bug-3109-shortcircuiting/lib/Mail/SpamAssassin/PerMsgStatus.pm (original)
+++ spamassassin/branches/bug-3109-shortcircuiting/lib/Mail/SpamAssassin/PerMsgStatus.pm Fri Jul 21 12:06:09 2006
@@ -59,11 +59,14 @@
 use Mail::SpamAssassin::Logger;
 
 use vars qw{
-  @ISA
+  @ISA @TEMPORARY_METHODS
 };
 
 @ISA = qw();
 
+# methods defined by the compiled ruleset; deleted in finish_tests()
+@TEMPORARY_METHODS = ();
+
 ###########################################################################
 
 sub new {
@@ -78,12 +81,13 @@
     'test_logs'         => '',
     'test_names_hit'    => [ ],
     'subtest_names_hit' => [ ],
+    'spamd_result_log_items' => [ ],
     'tests_already_hit' => { },
     'hdr_cache'         => { },
     'rule_errors'       => 0,
     'disable_auto_learning' => 0,
     'auto_learn_status' => undef,
-    'conf'                => $main->{conf},
+    'conf'              => $main->{conf},
     'async'             => Mail::SpamAssassin::AsyncLoop->new($main)
   };
 
@@ -171,13 +175,14 @@
       next unless ($self->{conf}->{priorities}->{$priority} > 0);
 
       # if shortcircuiting is hit, we skip all other priorities...
-      last if (exists $self->{shortcircuit_type});
+      last if $self->have_shortcircuited();
 
       dbg("check: running tests for priority: $priority");
 
       # only harvest the dnsbl queries once priority HARVEST_DNSBL_PRIORITY
       # has been reached and then only run once
-      if ($priority >= HARVEST_DNSBL_PRIORITY && $needs_dnsbl_harvest_p && !exists $self->{shortcircuit_type})
+      if ($priority >= HARVEST_DNSBL_PRIORITY && $needs_dnsbl_harvest_p
+            && !$self->have_shortcircuited())
       {
 	# harvest the DNS results
 	$self->harvest_dnsbl_queries();
@@ -213,20 +218,15 @@
     # sanity check, it is possible that no rules >= HARVEST_DNSBL_PRIORITY ran so the harvest
     # may not have run yet.  Check, and if so, go ahead and harvest here.
     if ($needs_dnsbl_harvest_p) {
-      if (!exists $self->{shortcircuit_type}) {
+      if (!$self->have_shortcircuited()) {
         # harvest the DNS results
         $self->harvest_dnsbl_queries();
       }
 
       # finish the DNS results
+      # TODO: this should be consolidated with the identical code above
       $self->rbl_finish();
-
-      if (!exists $self->{shortcircuit_type}) {
-        # TODO: should we call this even if we're short-circuiting?
-        # in URIDNSBL, it used to be a time-consuming operation.
-        $self->{main}->call_plugins("check_post_dnsbl", { permsgstatus => $self });
-      }
-
+      $self->{main}->call_plugins ("check_post_dnsbl", { permsgstatus => $self });
       $self->{resolver}->finish_socket() if $self->{resolver};
     }
 
@@ -1142,6 +1142,46 @@
 
 ###########################################################################
 
+# public API for plugins
+
+=item $status->set_spamd_result_item($subref)
+
+Set an entry for the spamd result log line.  C<$subref> should be a code
+reference for a subroutine which will return a string in C<'name=VALUE'>
+format, similar to the other entries in the spamd result line:
+
+  Jul 17 14:10:47 radish spamd[16670]: spamd: result: Y 22 - ALL_NATURAL,
+  DATE_IN_FUTURE_03_06,DIET_1,DRUGS_ERECTILE,DRUGS_PAIN,
+  TEST_FORGED_YAHOO_RCVD,TEST_INVALID_DATE,TEST_NOREALNAME,
+  TEST_NORMAL_HTTP_TO_IP,UNDISC_RECIPS scantime=0.4,size=3138,user=jm,
+  uid=1000,required_score=5.0,rhost=localhost,raddr=127.0.0.1,
+  rport=33153,mid=<9PS291LhupY>,autolearn=spam
+
+C<name> and C<VALUE> must not contain C<=> or C<,> characters, as it
+is important that these log lines are easy to parse.
+
+The code reference will be called by spamd after the message has been scanned,
+and the C<PerMsgStatus::check()> method has returned.
+
+=cut
+
+sub set_spamd_result_item {
+  my ($self, $ref) = @_;
+  push @{$self->{spamd_result_log_items}}, $ref;
+}
+
+# called by spamd
+sub get_spamd_result_log_items {
+  my ($self) = @_;
+  my @ret = ();
+  foreach my $ref (@{$self->{spamd_result_log_items}}) {
+    push @ret, &$ref;
+  }
+  return @ret;
+}
+
+###########################################################################
+
 sub _get_tag_value_for_yesno {
   my $self   = shift;
   
@@ -1251,17 +1291,6 @@
 
             AUTOLEARN => sub { return $self->get_autolearn_status(); },
 
-            SC => sub {
-              my $rule = $self->{shortcircuit_rule};
-              my $type = $self->{shortcircuit_type};
-              return "$rule ($type)" if ($rule);
-              return "no";
-            },
-
-            SCRULE => sub { return ($self->{shortcircuit_rule} || "none") ; },
-
-            SCTYPE => sub { return ($self->{shortcircuit_type} || "no") ; },
-
             TESTS => sub {
               my $arg = (shift || ',');
               return (join($arg, sort(@{$self->{test_names_hit}})) || "none");
@@ -1382,6 +1411,13 @@
       undef &{'_meta_tests_'.$clean_priority};
     }
   }
+
+  foreach my $method (@TEMPORARY_METHODS) {
+    if (defined &{$method}) {
+      undef &{$method};
+    }
+  }
+  @TEMPORARY_METHODS = ();      # clear for next time
 }
 
 
@@ -1670,9 +1706,7 @@
 }
 
 sub hit_rule_plugin_code {
-  my ($self, $rulename, $ruletype) = @_;
-
-  return '' unless exists($self->{should_log_rule_hits}) || $self->{main}->have_plugin("hit_rule");
+  my ($self, $rulename, $ruletype, $loop_break_directive) = @_;
 
   # note: keep this in 'single quotes' to avoid the $ & performance hit,
   # unless specifically requested by the caller.   Also split the
@@ -1685,7 +1719,6 @@
     $debug_code = '
         dbg("rules: ran '.$ruletype.' rule '.$rulename.' ======> got hit: \"" . '.
             $match.' . "\"");
-        
     ';
   }
 
@@ -1696,15 +1729,14 @@
     ';
   }
 
-  my $plugin_code = '';
-  if ($self->{main}->have_plugin("hit_rule")) {
-    $plugin_code = '
-        $self->{main}->call_plugins ("hit_rule", { permsgstatus => $self, rulename => \''.$rulename.'\', ruletype => \''.$ruletype.'\' });
-    ';
+  # if we're not running "tflags multiple", break out of the matching
+  # loop this way
+  my $multiple_code = '';
+  if ($self->{conf}->{tflags}->{$rulename} !~ /\bmultiple\b/) {
+    $multiple_code = $loop_break_directive.';';
   }
 
-  return $debug_code.$save_hits_code.$plugin_code.'
-  ';
+  return $debug_code.$save_hits_code.$multiple_code;
 }
 
 sub ran_rule_plugin_code {
@@ -1730,7 +1762,7 @@
   my ($self, $priority) = @_;
   local ($_);
 
-  return if (exists $self->{shortcircuit_type});
+  return if $self->have_shortcircuited();
 
   # note: we do this only once for all head pattern tests.  Only
   # eval tests need to use stuff in here.
@@ -1787,10 +1819,8 @@
         my($self,$text) = @_;
         '.$self->hash_line_for_rule($rulename).'
         while ($text '.$testtype.'~ '.$pat.'g) {
-          $self->got_hit (q#'.$rulename.'#, q{});
-          '. $self->hit_rule_plugin_code($rulename, "header") . '
-          # Ok, we hit, stop now.
-          last unless $self->{conf}->{tflags}->{q{'.$rulename.'}} =~ /\bmultiple\b/;
+          $self->got_hit(q#'.$rulename.'#, "", ruletype => "header");
+          '. $self->hit_rule_plugin_code($rulename, "header", "last") . '
         }
       }';
 
@@ -1851,7 +1881,7 @@
   my ($self, $priority, $textary) = @_;
   local ($_);
     
-  return if (exists $self->{shortcircuit_type});
+  return if $self->have_shortcircuited();
 
   dbg("rules: running body-text per-line regexp tests; score so far=".$self->{score});
 
@@ -1895,10 +1925,8 @@
              '.$self->hash_line_for_rule($rulename).'
              pos = 0;
              while ('.$pat.'g) { 
-                $self->got_pattern_hit(q{'.$rulename.'}, "BODY: "); 
-                '. $self->hit_rule_plugin_code($rulename, "body") . '
-		# Ok, we hit, stop now.
-		return unless $self->{conf}->{tflags}->{q{'.$rulename.'}} =~ /\bmultiple\b/;
+                $self->got_hit(q{'.$rulename.'}, "BODY: ", ruletype => "body"); 
+                '. $self->hit_rule_plugin_code($rulename, "body", "return") . '
              }
            }
     }
@@ -2267,7 +2295,7 @@
   my ($self, $priority, @uris) = @_;
   local ($_);
 
-  return if (exists $self->{shortcircuit_type});
+  return if $self->have_shortcircuited();
   
   dbg("uri: running uri tests; score so far=".$self->{score});
 
@@ -2311,10 +2339,8 @@
          '.$self->hash_line_for_rule($rulename).'
          pos = 0;
          while ('.$pat.'g) { 
-            $self->got_pattern_hit(q{'.$rulename.'}, "URI: ");
-            '. $self->hit_rule_plugin_code($rulename, "uri") . '
-            # Ok, we hit, stop now.
-	    return unless $self->{conf}->{tflags}->{q{'.$rulename.'}} =~ /\bmultiple\b/;
+            $self->got_hit(q{'.$rulename.'}, "URI: ", ruletype => "uri");
+            '. $self->hit_rule_plugin_code($rulename, "uri", "return") .'
          }
        }
     }
@@ -2361,7 +2387,7 @@
   my ($self, $priority, $textary) = @_;
   local ($_);
 
-  return if (exists $self->{shortcircuit_type});
+  return if $self->have_shortcircuited();
 
   dbg("rules: running raw-body-text per-line regexp tests; score so far=".$self->{score});
 
@@ -2404,10 +2430,8 @@
          '.$self->hash_line_for_rule($rulename).'
          pos = 0;
          while ('.$pat.'g) { 
-            $self->got_pattern_hit(q{'.$rulename.'}, "RAW: ");
-            '. $self->hit_rule_plugin_code($rulename, "rawbody") . '
-            # Ok, we hit, stop now.
-	    return unless $self->{conf}->{tflags}->{q{'.$rulename.'}} =~ /\bmultiple\b/;
+            $self->got_hit(q{'.$rulename.'}, "RAW: ", ruletype => "rawbody");
+            '. $self->hit_rule_plugin_code($rulename, "rawbody", "return") . '
          }
        }
     }
@@ -2454,7 +2478,7 @@
   my ($self, $priority, $fullmsgref) = @_;
   local ($_);
     
-  return if (exists $self->{shortcircuit_type});
+  return if $self->have_shortcircuited();
   
   dbg("rules: running full-text regexp tests; score so far=".$self->{score});
 
@@ -2484,10 +2508,8 @@
         '.$self->hash_line_for_rule($rulename).'
         pos $$fullmsgref = 0;
         while ($$fullmsgref =~ '.$pat.'g) {
-          $self->got_pattern_hit(q{'.$rulename.'}, "FULL: ");
-          '. $self->hit_rule_plugin_code($rulename, "full") . '
-	  # Ok, we hit, stop now.
-	  last unless $self->{conf}->{tflags}->{q{'.$rulename.'}} =~ /\bmultiple\b/;
+          $self->got_hit(q{'.$rulename.'}, "FULL: ", ruletype => "full");
+          '. $self->hit_rule_plugin_code($rulename, "full", "last") . '
         }
         '.$self->ran_rule_plugin_code($rulename, "full").'
       }
@@ -2531,25 +2553,32 @@
 sub do_head_eval_tests {
   my ($self, $priority) = @_;
   return unless (defined($self->{conf}->{head_evals}->{$priority}));
-  $self->run_eval_tests ($self->{conf}->{head_evals}->{$priority}, '');
+  $self->run_eval_tests ($Mail::SpamAssassin::Conf::TYPE_HEAD_EVALS,
+                         $self->{conf}->{head_evals}->{$priority}, '', $priority);
 }
 
 sub do_body_eval_tests {
   my ($self, $priority, $bodystring) = @_;
   return unless (defined($self->{conf}->{body_evals}->{$priority}));
-  $self->run_eval_tests ($self->{conf}->{body_evals}->{$priority}, 'BODY: ', $bodystring);
+  $self->run_eval_tests ($Mail::SpamAssassin::Conf::TYPE_BODY_EVALS,
+                         $self->{conf}->{body_evals}->{$priority}, 'BODY: ',
+                         $priority, $bodystring);
 }
 
 sub do_rawbody_eval_tests {
   my ($self, $priority, $bodystring) = @_;
   return unless (defined($self->{conf}->{rawbody_evals}->{$priority}));
-  $self->run_eval_tests ($self->{conf}->{rawbody_evals}->{$priority}, 'RAW: ', $bodystring);
+  $self->run_eval_tests ($Mail::SpamAssassin::Conf::TYPE_RAWBODY_EVALS,
+                         $self->{conf}->{rawbody_evals}->{$priority}, 'RAW: ',
+                         $priority, $bodystring);
 }
 
 sub do_full_eval_tests {
   my ($self, $priority, $fullmsgref) = @_;
   return unless (defined($self->{conf}->{full_evals}->{$priority}));
-  $self->run_eval_tests ($self->{conf}->{full_evals}->{$priority}, '', $fullmsgref);
+  $self->run_eval_tests ($Mail::SpamAssassin::Conf::TYPE_FULL_EVALS,
+                         $self->{conf}->{full_evals}->{$priority}, '',
+                         $priority, $fullmsgref);
 }
 
 ###########################################################################
@@ -2558,7 +2587,7 @@
   my ($self, $priority) = @_;
   local ($_);
     
-  return if (exists $self->{shortcircuit_type});
+  return if $self->have_shortcircuited();
 
   dbg("rules: running meta tests; score so far=" . $self->{score} );
   my $conf = $self->{conf};
@@ -2615,6 +2644,13 @@
         $meta{$rulename} .= "\$h->{'$token'} ";
         $setup_rules{$token}=1;
 
+        if (!exists $conf->{scores}->{$token}) {
+          info("rules: meta test $rulename has undefined dependency '$token'");
+        }
+        elsif ($conf->{scores}->{$token} == 0) {
+          info("rules: meta test $rulename has dependency '$token' with a zero score");
+        }
+
         # If the token is another meta rule, add it as a dependency
         push (@{ $rule_deps{$rulename} }, $token)
           if (exists $conf->{meta_tests}->{$priority}->{$token});
@@ -2654,8 +2690,10 @@
       }
 
       # Add this meta rule to the eval line
-      $evalstr .= '  $r = '.$meta{$metas[$i]}.";\n";
-      $evalstr .= '  if ($r) { $self->got_hit (q#'.$metas[$i].'#, "", $r); }'."\n";
+      $evalstr .= '
+        $r = '.$meta{$metas[$i]}.';
+        if ($r) { $self->got_hit(q#'.$metas[$i].'#, "", ruletype => "meta", value => $r); }
+      ';
 
       splice @metas, $i--, 1;    # remove this rule from our list
     }
@@ -2740,88 +2778,172 @@
 ###########################################################################
 
 sub run_eval_tests {
-  my ($self, $evalhash, $prepend2desc, @extraevalargs) = @_;
+  my ($self, $testtype, $evalhash, $prepend2desc, $priority, @extraevalargs) = @_;
   local ($_);
 
-  return if (exists $self->{shortcircuit_type});
-  
+  return if $self->have_shortcircuited();
+
+  my $doing_user_rules = $self->{conf}->{user_rules_to_compile}->{$testtype};
+
+  # clean up priority value so it can be used in a subroutine name
+  my $clean_priority;
+  ($clean_priority = $priority) =~ s/-/neg/;
+
+  my $scoreset = $self->{conf}->get_score_set();
+
+  my $methodname = '_eval_tests'.
+                        '_type'.$testtype .
+                        '_pri'.$clean_priority .
+                        '_set'.$scoreset;
+
+  # Some of the rules are scoreset specific, so we need additional 
+  # subroutines to handle those
+  if (defined &{'Mail::SpamAssassin::PerMsgStatus::'.$methodname}
+        && !$doing_user_rules)
+  {
+    no strict "refs";
+    &{'Mail::SpamAssassin::PerMsgStatus::'.$methodname}($self,@extraevalargs);
+    use strict "refs";
+    return;
+  }
+
   # look these up once in advance to save repeated lookups in loop below
-  my $debugenabled = would_log('dbg');
-  my $scoresref = $self->{conf}->{scores};
   my $tflagsref = $self->{conf}->{tflags};
   my $have_start_rules = $self->{main}->have_plugin("start_rules");
   my $have_ran_rule = $self->{main}->have_plugin("ran_rule");
 
-  my $scoreset = $self->{conf}->get_score_set();
-  while (my ($rulename, $test) = each %{$evalhash}) {
-    last if (exists $self->{shortcircuit_type});
-
-    # Score of 0, skip it.
-    my $score = $scoresref->{$rulename};
-    next unless $score;
-
-    # If the rule is a net rule, and we're in a non-net scoreset, skip it.
-    next if ((($scoreset & 1) == 0) &&
-             $tflagsref->{$rulename} &&
-             $tflagsref->{$rulename} =~ /\bnet\b/);
-
-    # If the rule is a bayes rule, and we're in a non-bayes scoreset, skip it.
-    next if ((($scoreset & 2) == 0) &&
-             $tflagsref->{$rulename} &&
-             $tflagsref->{$rulename} =~ /\bbayes\b/);
+  # the buffer for the evaluated code
+  my $evalstr = q{ };
+$evalstr .= q{ my $function; };
+
+  # conditionally include the dbg in the eval str
+  my $dbgstr = q{ };
+  if (would_log('dbg')) {
+    $dbgstr = q{ 
+      dbg("rules: ran eval rule $rulename ======> got hit ($result)");
+    };
+  }
 
-    my $result;
-    $self->{test_log_msgs} = ();        # clear test state
+  while (my ($rulename, $test) = each %{$evalhash})
+  {
+    if ($tflagsref->{$rulename}) {
+      # If the rule is a net rule, and we are in a non-net scoreset, skip it.
+      if ($tflagsref->{$rulename} =~ /\bnet\b/) {
+        next if (($scoreset & 1) == 0);
+      }
+      # If the rule is a bayes rule, and we are in a non-bayes scoreset, skip it.
+      if ($tflagsref->{$rulename} =~ /\bbayes\b/) {
+        next if (($scoreset & 2) == 0);
+      }
+    }
 
     my ($function, @args) = @{$test};
-    unshift(@args, @extraevalargs);
 
-    # check to make sure the function is defined
-    if (!$self->can ($function)) {
-      my $pluginobj = $self->{conf}->{eval_plugins}->{$function};
-      if ($pluginobj) {
-	# we have a plugin for this.  eval its function
-	$self->register_plugin_eval_glue ($pluginobj, $function);
-      } else {
-	dbg("rules: no method found for eval test $function");
-      }
-    }
+    $evalstr .= '
+      $rulename = q#'.$rulename.'#;
+      $self->{test_log_msgs} = ();
+    ';
 
-    # let plugins get the name of the rule that's currently being
-    # run
-    $self->{current_rule_name} = $rulename;
+    # only need to set current_rule_name for plugin evals
+    if ($self->{conf}->{eval_plugins}->{$function}) {
+      # let plugins get the name of the rule that is currently being run,
+      # and ensure their eval functions exist
+      $evalstr .= '
+        $self->{current_rule_name} = $rulename;
+        $self->register_plugin_eval_glue(q#'.$function.'#);
+      ';
+    }
 
+    # this stuff is quite slow, and totally superfluous if
+    # no plugin is loaded for those hooks
     if ($have_start_rules) {
-      $self->{main}->call_plugins("start_rules", { permsgstatus => $self, ruletype => "eval" });
+      $evalstr .= '
+        $self->{main}->call_plugins("start_rules", {
+                permsgstatus => $self, ruletype => "eval"
+              });
+      ';
     }
 
-    eval {
-      $result = $self->$function(@args);
-    };
-
-    if ($@) {
-      warn "rules: failed to run $rulename test, skipping:\n" . "\t($@)\n";
-      $self->{rule_errors}++;
-      next;
+    my $argstr = '';
+    if (scalar @args > 0) {
+      $argstr = ',' . join (', ', map { "q#".$_."#" } @args);
     }
 
+    $evalstr .= '
+      eval {
+        $result = $self->' . $function . ' (@extraevalargs '. $argstr .' );
+      };
+      if ($@) { $self->handle_eval_rule_errors($rulename); }
+    ';
+
     if ($have_ran_rule) {
-      $self->{main}->call_plugins("ran_rule", { permsgstatus => $self, ruletype => "eval", rulename => $rulename });
+      $evalstr .= '
+        $self->{main}->call_plugins("ran_rule", {
+            permsgstatus => $self, ruletype => "eval", rulename => $rulename
+          });
+      ';
     }
 
-    if ($result) {
-      $self->got_hit ($rulename, $prepend2desc, $result);
-      dbg("rules: ran eval rule $rulename ======> got hit ($result)") if $debugenabled;
-      $self->{main}->call_plugins("hit_rule", { permsgstatus => $self, ruletype => "eval", rulename => $rulename });
+    $evalstr .= '
+      if ($result) {
+        $self->got_hit($rulename, $prepend2desc, ruletype => "eval", value => $result);
+        '.$dbgstr.'
+      }
+    ';
+  }
+
+  # nothing done in the loop, that means no rules
+  return unless ($evalstr);
+
+  $evalstr = <<"EOT";
+{
+  package Mail::SpamAssassin::PerMsgStatus;
+
+    sub ${methodname} {
+      my (\$self, \@extraevalargs) = \@_;
+
+      my \$prepend2desc = q#$prepend2desc#;
+      my \$rulename;
+      my \$result;
+
+      $evalstr
     }
+
+  1;
+}
+EOT
+
+  eval $evalstr;
+
+  push (@TEMPORARY_METHODS, $methodname);
+
+  if ($@) {
+    warn "rules: failed to compile eval tests, skipping some: $@\n";
+    $self->{rule_errors}++;
   }
+  else {
+    no strict "refs";
+    &{'Mail::SpamAssassin::PerMsgStatus::'.$methodname}($self,@extraevalargs);
+    use strict "refs";
+  }
+}
+
+# use a separate sub here, for brevity
+sub handle_eval_rule_errors {
+  my ($self, $rulename) = @_;
+  warn "rules: failed to run $rulename test, skipping:\n\t($@)\n";
+  $self->{rule_errors}++;
 }
 
 sub register_plugin_eval_glue {
-  my ($self, $pluginobj, $function) = @_;
+  my ($self, $function) = @_;
+
+  # return if it's not an eval_plugin function
+  return if (!exists $self->{conf}->{eval_plugins}->{$function});
 
-  # stop reporting this -- it's too noisy!
-  # dbg("plugin: registering glue method for $function ($pluginobj)");
+  # return if it's been registered already
+  return if ($self->can ($function) &&
+        defined &{'Mail::SpamAssassin::PerMsgStatus::'.$function});
 
   my $evalstr = <<"ENDOFEVAL";
 {
@@ -2842,13 +2964,15 @@
     warn "rules: failed to run header tests, skipping some: $@\n";
     $self->{rule_errors}++;
   }
+
+  # ensure this method is deleted if finish_tests() is called
+  push (@TEMPORARY_METHODS, $function);
 }
 
 ###########################################################################
 
 sub run_rbl_eval_tests {
   my ($self, $evalhash) = @_;
-  my ($rulename, $pat, @args);
   local ($_);
 
   if ($self->{main}->{local_tests_only}) {
@@ -2879,10 +3003,12 @@
 
 ###########################################################################
 
-sub got_pattern_hit {
-  my ($self, $rulename, $prefix) = @_;
-
-  $self->got_hit ($rulename, $prefix);
+sub have_shortcircuited
+{
+  my ($self) = @_;
+  return 1 if $self->{main}->call_plugins ("have_shortcircuited", {
+        permsgstatus => $self
+      });
 }
 
 ###########################################################################
@@ -2907,16 +3033,17 @@
     $self->{test_log_msgs} = ();
 }
 
+# internal API, called only by get_hit()
+# TODO: refactor and merge this into that function
 sub _handle_hit {
-    my ($self, $rule, $score, $area, $desc, $scrule) = @_;
+    my ($self, $rule, $score, $area, $ruletype, $desc) = @_;
 
-    # if this was a shortcircuited rule hit, lets do some cleanup first  
-    if ($scrule) {
-       undef $self->{test_names_hit};       # reset rule hits
-       $self->{score}                = 0;   # reset score
-       $self->{tag_data}->{REPORT}   = '';  # reset tag data
-       $self->{tag_data}->{SUMMARY}  = '';  # reset tag data
-    }
+    $self->{main}->call_plugins ("hit_rule", {
+        permsgstatus => $self,
+        rulename => $rule,
+        ruletype => $ruletype,
+        score => $score
+      });
 
     # ignore meta-match sub-rules.
     if ($rule =~ /^__/) { push(@{$self->{subtest_names_hit}}, $rule); return; }
@@ -2967,44 +3094,77 @@
   $wrapped;
 }
 
+###########################################################################
+
+=item $status->got_hit ($rulename, $desc_prepend [, name => value, ...])
+
+Register a hit against a rule in the ruleset.
+
+There are two mandatory arguments. These are C<$rulename>, the name of the rule
+that fired, and C<$desc_prepend>, which is a short string that will be
+prepended to the rules C<describe> string in output reports.
+
+In addition, callers can supplement that with the following optional
+data:
+
+=over 4
+
+=item score => $num
+
+Optional: the score to use for the rule hit.  If unspecified,
+the value from the C<Mail::SpamAssassin::Conf> object's C<{scores}>
+hash will be used.
+
+=item value => $num
+
+Optional: the value to assign to the rule; the default value is C<1>.
+I<tflags multiple> rules use values of greater than 1 to indicate
+multiple hits.  This value is accessible to meta rules.
+
+=item ruletype => $type
+
+Optional, but recommended: the rule type string.  This is used in the
+C<hit_rule> plugin call, called by this method.  If unset, I<'unknown'> is
+used.
+
+=back
+
+Backwards compatibility: the two mandatory arguments have been part of this API
+since SpamAssassin 2.x.  The optional I<name=<gt>value> pairs, however, are a
+new addition in SpamAssassin 3.2.0.
+
+=cut
+
 sub got_hit {
-  my ($self, $rule, $area, $value) = @_;
-  $value ||= 1;
+  my ($self, $rule, $area, %params) = @_;
+
+  return if $self->have_shortcircuited();
 
-  return if (exists $self->{shortcircuit_type});
+  # ensure that rule values always result in an *increase* of
+  # $self->{tests_already_hit}->{$rule}:
+  my $value = $params{value}; if (!$value || $value <= 0) { $value = 1; }
+
+  # default ruletype, if not specified:
+  $params{ruletype} ||= 'unknown';
 
   my $already_hit = $self->{tests_already_hit}->{$rule} || 0;
   $self->{tests_already_hit}->{$rule} = $already_hit + $value;
 
-  # only allow each test to be scored once per mail
+  # only allow each test to be scored once per mail, once we
+  # get into this method ('tflags multiple' rules must be dealt
+  # with in callers to this method)
   return if ($already_hit);
 
-  my $desc = $self->{conf}->{descriptions}->{$rule};
-  $desc ||= $rule;
-
-  my $score = $self->{conf}->{scores}->{$rule};
-
-  my $sctype = $self->{conf}->{shortcircuit}->{$rule};
-  if ($sctype) {
-    $self->{shortcircuit_rule} = $rule;
-    if ($sctype eq 'on') {  # guess by rule score
-      $self->{shortcircuit_type} = ($score < 0 ? 'ham' : 'spam');
-      dbg("shortcircuit: s/c due to $rule, using score of $score");
-    }
-    else {
-      $self->{shortcircuit_type} = $sctype;
-      if ($sctype eq 'ham') {
-        $score = $self->{conf}->{shortcircuit_ham_score};
-      } else {
-        $score = $self->{conf}->{shortcircuit_spam_score};
-      }
-      dbg("shortcircuit: s/c $sctype due to $rule, using score of $score");
-    }
-  }
-
-  $self->_handle_hit($rule, $score, $area, $desc, $self->{shortcircuit_rule});
+  $self->_handle_hit($rule,
+            $params{score} || $self->{conf}->{scores}->{$rule},
+            $area,
+            $params{ruletype},
+            ($self->{conf}->{descriptions}->{$rule} || $rule));
 }
 
+###########################################################################
+
+# TODO: this needs API doc
 sub test_log {
   my ($self, $msg) = @_;
   while ($msg =~ s/^(.{30,48})\s//) {

Modified: spamassassin/branches/bug-3109-shortcircuiting/lib/Mail/SpamAssassin/Plugin.pm
URL: http://svn.apache.org/viewvc/spamassassin/branches/bug-3109-shortcircuiting/lib/Mail/SpamAssassin/Plugin.pm?rev=424421&r1=424420&r2=424421&view=diff
==============================================================================
--- spamassassin/branches/bug-3109-shortcircuiting/lib/Mail/SpamAssassin/Plugin.pm (original)
+++ spamassassin/branches/bug-3109-shortcircuiting/lib/Mail/SpamAssassin/Plugin.pm Fri Jul 21 12:06:09 2006
@@ -429,6 +429,10 @@
 
 The name of the rule that fired.
 
+=item score
+
+The rule's score in the active scoreset.
+
 =back
 
 =item $plugin->ran_rule ( { options ... } )

Modified: spamassassin/branches/bug-3109-shortcircuiting/lib/Mail/SpamAssassin/Plugin/AWL.pm
URL: http://svn.apache.org/viewvc/spamassassin/branches/bug-3109-shortcircuiting/lib/Mail/SpamAssassin/Plugin/AWL.pm?rev=424421&r1=424420&r2=424421&view=diff
==============================================================================
--- spamassassin/branches/bug-3109-shortcircuiting/lib/Mail/SpamAssassin/Plugin/AWL.pm (original)
+++ spamassassin/branches/bug-3109-shortcircuiting/lib/Mail/SpamAssassin/Plugin/AWL.pm Fri Jul 21 12:06:09 2006
@@ -235,7 +235,7 @@
 		type => $Mail::SpamAssassin::Conf::CONF_TYPE_STRING
 	       });
 
-=item auto_whitelist_file_mode		(default: 0700)
+=item auto_whitelist_file_mode		(default: 0600)
 
 The file mode bits used for the automatic-whitelist directory or file.
 
@@ -248,7 +248,7 @@
   push (@cmds, {
 		setting => 'auto_whitelist_file_mode',
 		is_admin => 1,
-		default => '0700',
+		default => '0600',
 		type => $Mail::SpamAssassin::Conf::CONF_TYPE_NUMERIC
 	       });
 
@@ -380,11 +380,7 @@
       }
 
       if ($delta != 0) {
-	# We have to use the private _handle_hit method here because we want
-	# to pass in a dynamically generated score.  Perhaps we should extend
-	# handle_hit or add a handle_dynamic_hit method to help here.
-	$pms->_handle_hit("AWL", $delta, "AWL: ",
-			  $pms->{conf}->{descriptions}->{AWL});
+	$pms->got_hit("AWL", "AWL: ", ruletype => 'eval', score => $delta);
       }
 
       $whitelist->finish();

Modified: spamassassin/branches/bug-3109-shortcircuiting/lib/Mail/SpamAssassin/Util.pm
URL: http://svn.apache.org/viewvc/spamassassin/branches/bug-3109-shortcircuiting/lib/Mail/SpamAssassin/Util.pm?rev=424421&r1=424420&r2=424421&view=diff
==============================================================================
--- spamassassin/branches/bug-3109-shortcircuiting/lib/Mail/SpamAssassin/Util.pm (original)
+++ spamassassin/branches/bug-3109-shortcircuiting/lib/Mail/SpamAssassin/Util.pm Fri Jul 21 12:06:09 2006
@@ -421,14 +421,40 @@
   # Time::Local (v1.10 at least) throws warnings when the dates cause
   # a 32-bit overflow.  So force a min/max for year.
   if ($yyyy > 2037) {
-    dbg("util: date after supported range, forcing year to 2037: $date");
+    dbg("util: year after supported range, forcing year to 2037: $date");
     $yyyy = 2037;
   }
   elsif ($yyyy < 1970) {
-    dbg("util: date before supported range, forcing year to 1970: $date");
+    dbg("util: year before supported range, forcing year to 1970: $date");
     $yyyy = 1971;
   }
 
+  # Fudge invalid times so that we get a usable date.
+  if ($ss > 59) { 
+    dbg("util: second after supported range, forcing second to 59: $date");  
+    $ss = 59;
+  } 
+  elsif ($ss < 0) {
+    dbg("util: second before supported range, forcing second to 00: $date");
+    $ss = "00";
+  }
+  if ($mm > 59) { 
+    dbg("util: minute after supported range, forcing minute to 59: $date");
+    $mm = 59;
+  }
+  elsif ($mm < 0) {   
+    dbg("util: minute before supported range, forcing minute to 00: $date");
+    $mm = "00";
+  }
+  if ($hh > 23) { 
+    dbg("util: hour after supported range, forcing hour to 23: $date"); 
+    $hh = 23;
+  }
+  elsif ($hh < 0) {
+    dbg("util: hour before supported range, forcing hour to 00: $date"); 
+    $hh = "00";
+  }
+
   my $time;
   eval {		# could croak
     $time = timegm($ss, $mm, $hh, $dd, $mmm-1, $yyyy);
@@ -1206,7 +1232,7 @@
 
   # use a traditional open(FOO, "cmd |")
   my $cmd = join(' ', @cmdline);
-  if ($stdinfile) { $cmd .= " < '$stdinfile'"; }
+  if ($stdinfile) { $cmd .= qq/ < "$stdinfile"/; }
   if ($duperr2out) { $cmd .= " 2>&1"; }
   return open ($fh, $cmd.'|');
 }

Modified: spamassassin/branches/bug-3109-shortcircuiting/lib/Mail/SpamAssassin/Util/DependencyInfo.pm
URL: http://svn.apache.org/viewvc/spamassassin/branches/bug-3109-shortcircuiting/lib/Mail/SpamAssassin/Util/DependencyInfo.pm?rev=424421&r1=424420&r2=424421&view=diff
==============================================================================
--- spamassassin/branches/bug-3109-shortcircuiting/lib/Mail/SpamAssassin/Util/DependencyInfo.pm (original)
+++ spamassassin/branches/bug-3109-shortcircuiting/lib/Mail/SpamAssassin/Util/DependencyInfo.pm Fri Jul 21 12:06:09 2006
@@ -156,13 +156,13 @@
 },
 {
   module => 'Archive::Tar',
-  version => '0.00',
+  version => '1.23',
   desc => 'The "sa-update" script requires this module to access tar update
   archive files.',
 },
 {
   module => 'IO::Zlib',
-  version => '0.00',
+  version => '1.04',
   desc => 'The "sa-update" script requires this module to access compressed
   update archive files.',
 },

Modified: spamassassin/branches/bug-3109-shortcircuiting/procmailrc.example
URL: http://svn.apache.org/viewvc/spamassassin/branches/bug-3109-shortcircuiting/procmailrc.example?rev=424421&r1=424420&r2=424421&view=diff
==============================================================================
--- spamassassin/branches/bug-3109-shortcircuiting/procmailrc.example (original)
+++ spamassassin/branches/bug-3109-shortcircuiting/procmailrc.example Fri Jul 21 12:06:09 2006
@@ -11,8 +11,8 @@
 # Pipe the mail through spamassassin (replace 'spamassassin' with 'spamc'
 # if you use the spamc/spamd combination)
 #
-# The condition line ensures that only messages smaller than 250 kB
-# (250 * 1024 = 256000 bytes) are processed by SpamAssassin. Most spam
+# The condition line ensures that only messages smaller than 500 kB
+# (500 * 1024 = 512000 bytes) are processed by SpamAssassin. Most spam
 # isn't bigger than a few k and working with big messages can bring
 # SpamAssassin to its knees.
 #
@@ -20,7 +20,7 @@
 # at 1 time, to keep the load down.
 #
 :0fw: spamassassin.lock
-* < 256000
+* < 512000
 | spamassassin
 
 # Mails with a score of 15 or higher are almost certainly spam (with 0.05%

Modified: spamassassin/branches/bug-3109-shortcircuiting/rules/10_default_prefs.cf
URL: http://svn.apache.org/viewvc/spamassassin/branches/bug-3109-shortcircuiting/rules/10_default_prefs.cf?rev=424421&r1=424420&r2=424421&view=diff
==============================================================================
--- spamassassin/branches/bug-3109-shortcircuiting/rules/10_default_prefs.cf (original)
+++ spamassassin/branches/bug-3109-shortcircuiting/rules/10_default_prefs.cf Fri Jul 21 12:06:09 2006
@@ -33,7 +33,7 @@
 report 
 report Content preview:  _PREVIEW_
 report 
-report Content analysis details:   (_SCORE_ points, _REQD_ required, s/c _SCTYPE_)
+report Content analysis details:   (_SCORE_ points, _REQD_ required)
 report
 report " pts rule name              description"
 report  ---- ---------------------- --------------------------------------------------
@@ -92,7 +92,7 @@
 #        FROM_HAS_MIXED_NUMS3,HOME_EMPLOYMENT,INVALID_DATE,INVALID_MSGID
 #        LINES_OF_YELLING,MSGID_HAS_NO_AT,NO_REAL_NAME,ONCE_IN_LIFETIME
 #        UNDISC_RECIPS autolearn=spam version=2.60-cvs
-add_header all Status "_YESNO_, score=_SCORE_ required=_REQD_ tests=_TESTS_ shortcircuit=_SCTYPE_ autolearn=_AUTOLEARN_ version=_VERSION_"
+add_header all Status "_YESNO_, score=_SCORE_ required=_REQD_ tests=_TESTS_ autolearn=_AUTOLEARN_ version=_VERSION_"
 
 ###########################################################################
 # Default prefs values: users can override these in their 

Modified: spamassassin/branches/bug-3109-shortcircuiting/rules/60_shortcircuit.cf
URL: http://svn.apache.org/viewvc/spamassassin/branches/bug-3109-shortcircuiting/rules/60_shortcircuit.cf?rev=424421&r1=424420&r2=424421&view=diff
==============================================================================
--- spamassassin/branches/bug-3109-shortcircuiting/rules/60_shortcircuit.cf (original)
+++ spamassassin/branches/bug-3109-shortcircuiting/rules/60_shortcircuit.cf Fri Jul 21 12:06:09 2006
@@ -23,6 +23,8 @@
 ###########################################################################
 # HIGH PRIORITY RULES
 
+ifplugin Mail::SpamAssassin::Plugin::Shortcircuit
+
 priority USER_IN_WHITELIST     -1000
 priority USER_IN_DEF_WHITELIST -1000
 priority USER_IN_ALL_SPAM_TO   -1000
@@ -52,4 +54,6 @@
 
 # shortcircuit BAYES_99              spam
 # shortcircuit BAYES_00              ham
+
+endif # Mail::SpamAssassin::Plugin::Shortcircuit
 

Modified: spamassassin/branches/bug-3109-shortcircuiting/rules/active.list
URL: http://svn.apache.org/viewvc/spamassassin/branches/bug-3109-shortcircuiting/rules/active.list?rev=424421&r1=424420&r2=424421&view=diff
==============================================================================
--- spamassassin/branches/bug-3109-shortcircuiting/rules/active.list (original)
+++ spamassassin/branches/bug-3109-shortcircuiting/rules/active.list Fri Jul 21 12:06:09 2006
@@ -1,8 +1,5 @@
 # active ruleset list, automatically generated from http://ruleqa.spamassassin.org/
-# with results from: net-bb-doc net-bb-zmi net-daf net-parkerm net-theo
-
-# good enough
-ADDR_NUMS_AT_BIGSITE
+# with results from: bb-doc bb-jm bb-zmi cthielen daf parkerm theo wtogami zmi
 
 # good enough
 ADVANCE_FEE_3
@@ -14,6 +11,9 @@
 AWL
 
 # good enough
+AXB_FAKETZ
+
+# good enough
 BAD_ENC_HEADER
 
 # good enough
@@ -74,9 +74,6 @@
 DATE_SPAMWARE_Y2K
 
 # good enough
-DAV_NON_HOTMAIL
-
-# good enough
 DEAR_WINNER
 
 # tflags net
@@ -119,6 +116,9 @@
 DRUGS_DIET
 
 # good enough
+DRUGS_ERECTILE
+
+# good enough
 DRUGS_ERECTILE_OBFU
 
 # good enough
@@ -131,18 +131,15 @@
 DRUGS_SLEEP_EREC
 
 # good enough
-DRUG_ED_GENERIC
+DRUG_DOSAGE
 
 # good enough
-DRUG_ED_ONLINE
+DRUG_ED_GENERIC
 
 # good enough
 DRUG_ED_SILD
 
 # good enough
-EARN_PER_WEEK
-
-# good enough
 EMPTY_MESSAGE
 
 # good enough
@@ -158,12 +155,6 @@
 EXCUSE_24
 
 # good enough
-FAKE_HELO_EMAIL_COM
-
-# good enough
-FAKE_HELO_EXCITE
-
-# good enough
 FAKE_HELO_LYCOS
 
 # good enough
@@ -221,10 +212,10 @@
 FH_RCVD_WITHSMTPFOR
 
 # good enough
-FORGED_AOL_TAGS
+FM_CLAIM_IPOD
 
 # good enough
-FORGED_EUDORAMAIL_RCVD
+FORGED_AOL_TAGS
 
 # good enough
 FORGED_HOTMAIL_RCVD
@@ -302,28 +293,28 @@
 FROM_BLANK_NAME
 
 # good enough
-FROM_DOMAIN_NOVOWEL
-
-# good enough
 FROM_ENDS_IN_NUMS
 
 # good enough
 FROM_HAS_MIXED_NUMS
 
 # good enough
-FROM_HAS_ULINE_NUMS
-
-# good enough
 FROM_ILLEGAL_CHARS
 
 # good enough
 FROM_LOCAL_NOVOWEL
 
 # good enough
-FROM_NO_LOWER
+FROM_NO_USER
 
 # good enough
-FROM_NO_USER
+FROM_OFFERS
+
+# good enough
+FR_WWW_DOMAIN_23SUBDIR
+
+# good enough
+FS_START_DOYOU2
 
 # good enough
 FUZZY_MERIDIA
@@ -332,11 +323,20 @@
 FUZZY_SPRM
 
 # good enough
+FUZZY_STOCK
+
+# good enough
 FU_HOODIA
 
 # good enough
 GAPPY_SUBJECT
 
+# good enough
+GEO_QUERY_STRING
+
+# good enough
+GMD_FAKETZ
+
 # tflags net
 HABEAS_ACCREDITED_COI
 
@@ -371,6 +371,9 @@
 HASHCASH_HIGH
 
 # good enough
+HEADER_COUNT_CTYPE
+
+# good enough
 HEADER_SPAM
 
 # good enough
@@ -410,14 +413,17 @@
 HS_GETMEOFF
 
 # good enough
-HS_MEETUP_FOR_SEX
+HS_INDEX_PARAM
 
 # good enough
-HS_PHARMA_1
+HS_MEETUP_FOR_SEX
 
 # good enough
 HS_SUBJ_ONLINE_PHARMACEUTICAL
 
+# good enough
+HS_SYNDICATE_P2
+
 # tflags userconf
 HTML_CHARSET_FARAWAY
 
@@ -437,9 +443,6 @@
 HTTP_EXCESSIVE_ESCAPES
 
 # good enough
-INFO_TLD
-
-# good enough
 INTERRUPTUS
 
 # good enough
@@ -488,6 +491,9 @@
 KAM_STOCKTIP24
 
 # good enough
+KAM_STOCKTIP3
+
+# good enough
 KAM_STOCKTIP6
 
 # good enough
@@ -509,19 +515,19 @@
 MID_DEGREES
 
 # good enough
+MID_MJW_STOX
+
+# good enough
 MILLION_USD
 
 # good enough
 MIME_BAD_ISO_CHARSET
 
 # good enough
-MIME_BASE64_BLANKS
+MIME_BASE64_TEXT
 
 # good enough
-MIME_BASE64_NO_NAME
-
-# good enough
-MIME_BASE64_TEXT
+MIME_BOUND_ALLHEX_17
 
 # good enough
 MIME_BOUND_DD_DIGITS
@@ -545,13 +551,10 @@
 MISSING_MIMEOLE
 
 # good enough
-MISSING_MIME_HB_SEP
-
-# good enough
-MISSING_SUBJECT
+MORE_SEX
 
 # good enough
-ML_MARKETING
+MPART_ALT_DIFF
 
 # good enough
 MSGID_DOLLARS_RANDOM
@@ -577,9 +580,6 @@
 # good enough
 MSGID_YAHOO_CAPS
 
-# good enough
-NOT_ADVISOR
-
 # tflags net
 NO_DNS_FOR_FROM
 
@@ -602,27 +602,18 @@
 NUMERIC_HTTP_ADDR
 
 # good enough
-OBSCURED_EMAIL
-
-# good enough
-ONE_TIME
+ONLINE_PHARMACY
 
 # good enough
 PERCENT_RANDOM
 
 # good enough
-PLING_PLING
-
-# good enough
 PLING_QUERY
 
 # good enough
 PORN_15
 
 # good enough
-PORN_URL_SEX
-
-# good enough
 PREST_NON_ACCREDITED
 
 # good enough
@@ -794,9 +785,6 @@
 REPLICA_WATCH
 
 # good enough
-REPLY_TO_EMPTY
-
-# good enough
 REPTO_OVERQUOTE_THEBAT
 
 # good enough
@@ -848,12 +836,6 @@
 SPF_SOFTFAIL
 
 # good enough
-SPOOF_COM2COM
-
-# good enough
-SPOOF_OURI
-
-# good enough
 STOCK_ALERT
 
 # good enough
@@ -881,16 +863,10 @@
 SUBJECT_NEEDS_ENCODING
 
 # good enough
-SUBJECT_NOVOWEL
-
-# good enough
 SUBJECT_SEXUAL
 
 # good enough
-SUBJ_CONSONANTS
-
-# good enough
-SUBJ_HAS_SPACES
+SUBJ_AS_SEEN
 
 # good enough
 SUBJ_ILLEGAL_CHARS
@@ -899,13 +875,13 @@
 SUBJ_RE_NUM
 
 # good enough
-SUSPICIOUS_RECIPS
+SUBJ_YOUR_FAMILY
 
 # good enough
-TO_CC_NONE
+SUB_HELLO
 
 # good enough
-TO_EMPTY
+SUSPICIOUS_RECIPS
 
 # good enough
 TO_MALFORMED
@@ -929,6 +905,9 @@
 TVD_BODY_END_STAR
 
 # good enough
+TVD_DEAD_JOB
+
+# good enough
 TVD_DEAR_HOMEOWNER
 
 # good enough
@@ -1022,6 +1001,9 @@
 TVD_RATWARE_CB_2
 
 # good enough
+TVD_RATWARE_MSGID_01
+
+# good enough
 TVD_RATWARE_MSGID_02
 
 # good enough
@@ -1031,6 +1013,9 @@
 TVD_SINGLE_SPAN_DIV
 
 # good enough
+TVD_SPACED_SUBJECT_WORD3
+
+# good enough
 TVD_SPACED_WORDS
 
 # good enough
@@ -1055,10 +1040,13 @@
 TVD_UA_FOSTERING
 
 # good enough
-TVD_VIS_HIDDEN
+TVD_UNDER_VALUED
 
-# tflags net
-DNS_FROM_DOB
+# good enough
+TVD_VISIT_PHARMA
+
+# good enough
+TVD_VIS_HIDDEN
 
 # good enough
 FORGED_IMS_HTML
@@ -1073,49 +1061,19 @@
 FORGED_OUTLOOK_TAGS
 
 # good enough
-HS_SYNDICATE_P2
-
-# tflags net
-RCVD_IN_DOB
-
-# tflags net
-RCVD_IN_DSBL
-
-# tflags net
-RCVD_IN_NJABL_DUL
-
-# tflags net
-RCVD_IN_SORBS_DUL
-
-# tflags net
-RCVD_IN_WHOIS_INVALID
-
-# tflags net
-RCVD_IN_XBL
-
-# good enough
-TVD_FW_GRAPHIC_ID1
-
-# good enough
-TVD_PH_1
+HS_PHARMA_1
 
 # good enough
-TVD_PH_BODY_ACCOUNTS_PRE
+RATWARE_MS_HASH
 
 # good enough
-TVD_PH_FR5
+RATWARE_OUTLOOK_NONAME
 
 # good enough
-TVD_RATWARE_MSGID_01
-
-# tflags net
-URIBL_RHS_DOB
+TVD_FW_GRAPHIC_NAME_LONG
 
 # good enough
-ZMIde_REPLICA1
-
-# good enough
-ZMIde_REPLICASURE
+TVD_FW_GRAPHIC_NAME_MID
 
 # good enough
 UNCLAIMED_MONEY
@@ -1123,12 +1081,6 @@
 # good enough
 UNCLOSED_BRACKET
 
-# good enough
-UNDISC_RECIPS
-
-# good enough
-UNIQUE_WORDS
-
 # tflags userconf
 UNPARSEABLE_RELAY
 
@@ -1175,9 +1127,6 @@
 URIBL_WS_SURBL
 
 # good enough
-URI_4YOU
-
-# good enough
 URI_L_PHP
 
 # good enough
@@ -1244,19 +1193,19 @@
 X_PRIORITY_CC
 
 # good enough
-X_PRIORITY_HIGH
+ZMIde_EBAYJOBSURI
 
 # good enough
-ZMIde_EBAYJOBSURI
+ZMIde_REPLICA1
 
 # good enough
 ZMIde_REPLICA2
 
 # good enough
-ZMIde_SEXUALEXPL1
+ZMIde_REPLICASURE
 
-# tflags net
-__RCVD_IN_DOB
+# good enough
+ZMIde_SEXUALEXPL1
 
 # tflags net
 __RCVD_IN_IADB

Modified: spamassassin/branches/bug-3109-shortcircuiting/rules/v320.pre
URL: http://svn.apache.org/viewvc/spamassassin/branches/bug-3109-shortcircuiting/rules/v320.pre?rev=424421&r1=424420&r2=424421&view=diff
==============================================================================
--- spamassassin/branches/bug-3109-shortcircuiting/rules/v320.pre (original)
+++ spamassassin/branches/bug-3109-shortcircuiting/rules/v320.pre Fri Jul 21 12:06:09 2006
@@ -18,3 +18,8 @@
 # URIDetail - test URIs using detailed URI information
 #
 loadplugin Mail::SpamAssassin::Plugin::URIDetail
+
+# Shortcircuit - stop evaluation early if high-accuracy rules fire
+# 
+# loadplugin Mail::SpamAssassin::Plugin::Shortcircuit
+

Modified: spamassassin/branches/bug-3109-shortcircuiting/sa-learn.raw
URL: http://svn.apache.org/viewvc/spamassassin/branches/bug-3109-shortcircuiting/sa-learn.raw?rev=424421&r1=424420&r2=424421&view=diff
==============================================================================
--- spamassassin/branches/bug-3109-shortcircuiting/sa-learn.raw (original)
+++ spamassassin/branches/bug-3109-shortcircuiting/sa-learn.raw Fri Jul 21 12:06:09 2006
@@ -563,7 +563,7 @@
  --mbx                             Input sources are in mbx format
  --showdots                        Show progress using dots
  --progress                        Show progress using progress bar
- --no-sync                         Skip syncronizing the database and journal
+ --no-sync                         Skip synchronizing the database and journal
                                    after learning
  -L, --local                       Operate locally, no network accesses
  --import                          Migrate data from older version/non DB_File
@@ -665,7 +665,7 @@
 or not.  Note: This doesn't mean any tokens will actually expire.
 Please see the EXPIRATION section below.
 
-Note: C<--force-expire> also causes the journal data to be syncronized
+Note: C<--force-expire> also causes the journal data to be synchronized
 into the Bayes databases.
 
 =item B<--forget>
@@ -754,7 +754,7 @@
 
 =item B<--no-sync>
 
-Skip the slow syncronization step which normally takes place after
+Skip the slow synchronization step which normally takes place after
 changing database entries.  If you plan to learn from many folders in
 a batch, or to learn many individual messages one-by-one, it is faster
 to use this switch and run C<sa-learn --sync> once all the folders have
@@ -849,7 +849,7 @@
 readable, even if statistics make me break out in hives.
 
 The short semi-inaccurate version: Given training, a spam heuristics engine
-can take the most "spammy" and "hammy" words and apply probablistic
+can take the most "spammy" and "hammy" words and apply probabilistic
 analysis. Furthermore, once given a basis for the analysis, the engine can
 continue to learn iteratively by applying both the non-Bayesian and Bayesian
 rulesets together to create evolving "intelligence".
@@ -885,7 +885,7 @@
 it is fairly easy to understand why.
 
 With Bayesian analysis, it's all probabilities - "because the past says
-it is likely as this falls into a probablistic distribution common to past
+it is likely as this falls into a probabilistic distribution common to past
 spam in your systems". Tell that to your users!  Tell that to the client
 when he asks "what can I do to change this". (By the way, the answer in
 this case is "use whitelisting".)
@@ -1070,12 +1070,12 @@
 it uses in its calculations.  To avoid the contention of having each
 SpamAssassin process attempting to gain write access to the Bayes DB,
 the token timestamps are written to a 'journal' file which will later
-(either automatically or via C<sa-learn --sync>) be used to syncronize
+(either automatically or via C<sa-learn --sync>) be used to synchronize
 the Bayes DB.
 
 Also, through the use of C<bayes_learn_to_journal>, or when using the
 C<--no-sync> option with sa-learn, the actual learning data will take
-be placed into the journal for later syncronization.  This is typically
+be placed into the journal for later synchronization.  This is typically
 useful for high-traffic sites to avoid the same contention as stated
 above.
 
@@ -1213,7 +1213,7 @@
 =over 4
 
 =item C<bayes_auto_expire> is used to specify whether or not SpamAssassin
-ought to opportunistically attempt to expire the Bayes databaase.
+ought to opportunistically attempt to expire the Bayes database.
 The default is 1 (yes).
 
 =item C<bayes_expiry_max_db_size> specifies both the auto-expire token

Modified: spamassassin/branches/bug-3109-shortcircuiting/sa-update.raw
URL: http://svn.apache.org/viewvc/spamassassin/branches/bug-3109-shortcircuiting/sa-update.raw?rev=424421&r1=424420&r2=424421&view=diff
==============================================================================
--- spamassassin/branches/bug-3109-shortcircuiting/sa-update.raw (original)
+++ spamassassin/branches/bug-3109-shortcircuiting/sa-update.raw Fri Jul 21 12:06:09 2006
@@ -88,8 +88,8 @@
 eval { use Net::DNS; };
 eval { use LWP::UserAgent; };
 eval { use HTTP::Date qw(time2str); };
-eval { use Archive::Tar; };
-eval { use IO::Zlib; };
+eval { use Archive::Tar 1.23; };
+eval { use IO::Zlib 1.04; };
 
 # These should already be available
 use Mail::SpamAssassin;
@@ -401,11 +401,7 @@
     next;
   }
 
-  # ensure dirs exist, upfront
-  unless (-d $UPDDir) {
-    dbg("channel: creating $UPDDir");
-    mkpath([$UPDDir], 0, 0777) or die "fatal: can't create $UPDDir: $!\n";
-  }
+  # ensure tmp dir exists, upfront
   unless (-d $UPDTmp) {
     dbg("channel: creating $UPDTmp");
     mkpath([$UPDTmp], 0, 0777) or die "fatal: can't create $UPDTmp: $!\n";
@@ -751,22 +747,62 @@
   }
   closedir(DIR);
   if (!close(CF)) {
-    warn "write to $CFFTmp failed! attempting to continue";
-    channel_failed("write to $CFFTmp failed");
-    next;
+    die "write to $CFFTmp failed! $!";  # write failed = fatal
   }
 
+  # create a test file, in an attempt to mitigate dangers of incomplete
+  # upgrades.  If we fail to move this file the same way we expect to with the
+  # "real" upgrade files, there's no point in continuing.  (bug 4941)
+  my $testfile = "$UPDTmp/.rename_test.tmp";
+  my $testtofile = "$UPDDir/.rename_test.tmp";
+  open(TST, ">".$testfile) or die "write to $testfile failed! $!";
+  print TST time;
+  close TST or die "close of $testfile failed! $!";
+
   dbg("channel: applying changes to $UPDDir...");
 
-  # too late to stop now!   At this stage, if there are errors,
-  # we have to attempt to carry on regardless, since we've already
-  # blown away the old ruleset.
-
-  # clean out the "real" update dir, and copy from tmp areas
-  if (!clean_update_dir($UPDDir)) {
-    warn("channel: attempt to rm contents failed, attempting to continue anyway");
+  if (-d $UPDDir) {
+    if (!rename($testfile, $testtofile)) {
+      warn "rename $testfile $testtofile failed: $!";
+      unlink ($testfile, $testtofile);
+      die "rename test failed (existing dir), aborting upgrade"
+    }
+
+    unlink $testtofile;
+
+    # ok that worked, too late to stop now!   At this stage, if there are
+    # errors, we have to attempt to carry on regardless, since we've already
+    # blown away the old ruleset.
+    dbg("channel: point of no return for existing $UPDDir");
+
+    # clean out the "real" update dir
+    if (!clean_update_dir($UPDDir)) {
+      warn("channel: attempt to rm contents failed, attempting to continue anyway");
+    }
+
+  } else {
+    # create the dir, if it doesn't exist
+    dbg("channel: creating $UPDDir");
+    if (!mkpath([$UPDDir], 0, 0777)) {
+      rmdir $UPDDir;        # be sure it can't be used (bug 4941)
+      die "fatal: can't create $UPDDir: $!\n";
+    }
+
+    if (!rename($testfile, $testtofile)) {
+      warn "rename $testfile $testtofile failed: $!";
+      unlink ($testfile, $testtofile);
+      rmdir $UPDDir;        # be sure it can't be used (bug 4941)
+      die "rename test failed (new dir), aborting upgrade"
+    }
+
+    unlink $testtofile;
+
+    # ok, that test worked.  it's now likely that the .cf's will
+    # similarly be ok to rename, too.   Too late to stop from here on
+    dbg("channel: point of no return for new $UPDDir");
   }
 
+  # move in the files
   foreach my $file (@files) {
     rename("$UPDTmp/$file", "$UPDDir/$file")
         or warn "rename $UPDTmp/$file $UPDDir/$file failed: $!";
@@ -1005,8 +1041,14 @@
   my $gpghome = '';
   if ($opt{'gpghomedir'}) {
     $gpghome = $opt{'gpghomedir'};
-    $gpghome =~ s/\'/\\\'/gs;
-    $gpghome = "--homedir='$gpghome'";
+    if (Mail::SpamAssassin::Util::am_running_on_windows()) {
+      # windows is single-quote-phobic; bug 4958 cmt 7
+      $gpghome =~ s/\"/\\\"/gs;
+      $gpghome = "--homedir=\"$gpghome\"";
+    } else {
+      $gpghome =~ s/\'/\\\'/gs;
+      $gpghome = "--homedir='$gpghome'";
+    }
   }
   return $gpghome;
 }
@@ -1061,6 +1103,7 @@
 
 sub clean_update_dir {
   my $dir = shift;
+
   unless (opendir(DIR, $dir)) {
     warn "error: can't readdir $dir: $!\n";
     dbg("channel: attempt to readdir failed, channel failed");
@@ -1074,6 +1117,7 @@
     $file = $1;
     if (!unlink "$dir/$file") {
       warn "error: can't remove file $dir/$file: $!\n";
+      closedir(DIR);
       return 0;
     }
   }

Modified: spamassassin/branches/bug-3109-shortcircuiting/spamc/libspamc.c
URL: http://svn.apache.org/viewvc/spamassassin/branches/bug-3109-shortcircuiting/spamc/libspamc.c?rev=424421&r1=424420&r2=424421&view=diff
==============================================================================
--- spamassassin/branches/bug-3109-shortcircuiting/spamc/libspamc.c (original)
+++ spamassassin/branches/bug-3109-shortcircuiting/spamc/libspamc.c Fri Jul 21 12:06:09 2006
@@ -608,20 +608,20 @@
     }
 
     /* Find the end-of-DATA line */
-    /* if bad format with no end ".\n" will truncate the last two characters of the buffer */
     prev = '\n';
-    for (i = j = 0; (i+2) < (unsigned int) m->msg_len; i++) { /* (i+2) prevents out of bound reference msg[i+2] */
+    for (i = j = 0; i < (unsigned int) m->msg_len; i++) {
 	if (prev == '\n' && m->msg[i] == '.') {
 	    /* Dot at the beginning of a line */
-	    if ((m->msg[i + 1] == '\r' && m->msg[i + 2] == '\n')
-		|| m->msg[i + 1] == '\n') {
+            if (((i+1) == m->msg_len)
+                || ((i+1) < m->msg_len && m->msg[i + 1] == '\n')
+                || ((i+2) < m->msg_len && m->msg[i + 1] == '\r' && m->msg[i + 2] == '\n')) {
 		/* Lone dot! That's all, folks */
 		m->post = m->msg + i;
 		m->post_len = m->msg_len - i;
 		m->msg_len = j;
 		break;
 	    }
-	    else if (m->msg[i + 1] == '.') {
+	    else if ((i+1) < m->msg_len && m->msg[i + 1] == '.') {
 		/* Escaping dot, eliminate. */
 		prev = '.';
 		continue;
@@ -631,6 +631,9 @@
 	m->msg[j++] = m->msg[i];
     }
 
+    /* if bad format with no end "\n.\n", error out */
+    if (m->post == NULL)
+	return EX_DATAERR;
     m->type = MESSAGE_BSMTP;
     m->out = m->msg;
     m->out_len = m->msg_len;

Modified: spamassassin/branches/bug-3109-shortcircuiting/spamc/spamc.c
URL: http://svn.apache.org/viewvc/spamassassin/branches/bug-3109-shortcircuiting/spamc/spamc.c?rev=424421&r1=424420&r2=424421&view=diff
==============================================================================
--- spamassassin/branches/bug-3109-shortcircuiting/spamc/spamc.c (original)
+++ spamassassin/branches/bug-3109-shortcircuiting/spamc/spamc.c Fri Jul 21 12:06:09 2006
@@ -153,7 +153,7 @@
         "                      Timeout in seconds for communications to\n"
         "                      spamd. [default: 600]\n");
     usg("  -s, --max-size size Specify maximum message size, in bytes.\n"
-        "                      [default: 250k]\n");
+        "                      [default: 500k]\n");
     usg("  -u, --username username\n"
         "                      User for spamd to process this message under.\n"
         "                      [default: current user]\n");
@@ -673,7 +673,7 @@
 #endif
 
     /* set some defaults */
-    max_size = 250 * 1024;
+    max_size = 500 * 1024;
     username = NULL;
  
     combo_argc = 1;
@@ -879,6 +879,17 @@
     }
     else if (flags & (SPAMC_LEARN|SPAMC_PING) ) {
         message_cleanup(&m);
+    }
+    else if (flags & SPAMC_SYMBOLS) {
+	/* bug 4991: -y should only output a blank line on connection failure */
+	full_write(out_fd, 1, "\n", 1);
+        message_cleanup(&m);
+        if (use_exit_code) {
+            ret = result;
+        }
+	else if (flags & SPAMC_SAFE_FALLBACK) {
+	    ret = EX_OK;
+	}
     }
     else {
 	message_dump(STDIN_FILENO, out_fd, &m);

Modified: spamassassin/branches/bug-3109-shortcircuiting/spamc/spamc.pod
URL: http://svn.apache.org/viewvc/spamassassin/branches/bug-3109-shortcircuiting/spamc/spamc.pod?rev=424421&r1=424420&r2=424421&view=diff
==============================================================================
--- spamassassin/branches/bug-3109-shortcircuiting/spamc/spamc.pod (original)
+++ spamassassin/branches/bug-3109-shortcircuiting/spamc/spamc.pod Fri Jul 21 12:06:09 2006
@@ -148,11 +148,11 @@
 =item B<-s> I<max_size>, B<--max-size>=I<max_size>
 
 Set the maximum message size which will be sent to spamd -- any bigger than
-this threshold and the message will be returned unprocessed (default: 250 KB).
+this threshold and the message will be returned unprocessed (default: 500 KB).
 If spamc gets handed a message bigger than this, it won't be passed to spamd.
 
 The size is specified in bytes, as a positive integer greater than 0.
-For example, B<-s 250000>.
+For example, B<-s 500000>.
 
 =item B<-S>, B<--ssl>
 

Modified: spamassassin/branches/bug-3109-shortcircuiting/spamd/spamd.raw
URL: http://svn.apache.org/viewvc/spamassassin/branches/bug-3109-shortcircuiting/spamd/spamd.raw?rev=424421&r1=424420&r2=424421&view=diff
==============================================================================
--- spamassassin/branches/bug-3109-shortcircuiting/spamd/spamd.raw (original)
+++ spamassassin/branches/bug-3109-shortcircuiting/spamd/spamd.raw Fri Jul 21 12:06:09 2006
@@ -1393,7 +1393,7 @@
     push(@extra, "bayes=".sprintf("%06f", $status->{bayes_score}));
   }
   push(@extra, "autolearn=".$status->get_autolearn_status());
-  push(@extra, "shortcircuit=".$status->get_tag("SCTYPE"));
+  push(@extra, $status->get_spamd_result_log_items());
 
   my $yorn = $status->is_spam() ? 'Y' : '.';
   my $score = $status->get_score();

Modified: spamassassin/branches/bug-3109-shortcircuiting/t/gtube.t
URL: http://svn.apache.org/viewvc/spamassassin/branches/bug-3109-shortcircuiting/t/gtube.t?rev=424421&r1=424420&r2=424421&view=diff
==============================================================================
--- spamassassin/branches/bug-3109-shortcircuiting/t/gtube.t (original)
+++ spamassassin/branches/bug-3109-shortcircuiting/t/gtube.t Fri Jul 21 12:06:09 2006
@@ -22,6 +22,8 @@
         endif
 ");
 
+$ENV{'LC_ALL'} = 'C';             # a cheat, but we match the description
+
 ok (sarun ("-L -t < data/spam/gtube.eml", \&patterns_run_cb));
 ok_all_patterns();
 

Modified: spamassassin/branches/bug-3109-shortcircuiting/t/rcvd_parser.t
URL: http://svn.apache.org/viewvc/spamassassin/branches/bug-3109-shortcircuiting/t/rcvd_parser.t?rev=424421&r1=424420&r2=424421&view=diff
==============================================================================
--- spamassassin/branches/bug-3109-shortcircuiting/t/rcvd_parser.t (original)
+++ spamassassin/branches/bug-3109-shortcircuiting/t/rcvd_parser.t Fri Jul 21 12:06:09 2006
@@ -18,7 +18,7 @@
 
 use lib '.'; use lib 't';
 use SATest; sa_t_init("rcvd_parser");
-use Test; BEGIN { plan tests => 49 };
+use Test; BEGIN { plan tests => 50 };
 
 
 use strict;
@@ -619,6 +619,20 @@
 } => q{
 
 [ ip=70.20.57.51 rdns= helo=KRYPTIK by=host.name ident= envfrom= id=M2006040415284308595 auth= ]
+
+},
+q{
+
+Received: from bar.example.org (bar.example.org [127.0.0.1])
+	(using TLSv1 with cipher DHE-RSA-AES256-SHA (256/256 bits))
+	(Client did not present a certificate)
+	(Authenticated sender: sender.example.net)
+	by foo.example.net (Postfix) with ESMTP id 44A8959ED6B0
+	for <re...@example.com>; Fri, 30 Jun 2006 08:02:00 +0100 (BST) 
+
+} => q{
+
+[ ip=127.0.0.1 rdns=bar.example.org helo=bar.example.org by=foo.example.net ident= envfrom= id=44A8959ED6B0 auth=Postfix ]
 
 },
 );

Modified: spamassassin/branches/bug-3109-shortcircuiting/t/shortcircuit.t
URL: http://svn.apache.org/viewvc/spamassassin/branches/bug-3109-shortcircuiting/t/shortcircuit.t?rev=424421&r1=424420&r2=424421&view=diff
==============================================================================
--- spamassassin/branches/bug-3109-shortcircuiting/t/shortcircuit.t (original)
+++ spamassassin/branches/bug-3109-shortcircuiting/t/shortcircuit.t Fri Jul 21 12:06:09 2006
@@ -12,6 +12,10 @@
 
 tstlocalrules ('
 
+  loadplugin Mail::SpamAssassin::Plugin::Shortcircuit
+
+  add_header all Status "_YESNO_, score=_SCORE_ required=_REQD_ tests=_TESTS_ shortcircuit=_SCTYPE_ autolearn=_AUTOLEARN_ version=_VERSION_"
+
   # hits spam/001
   body X_FOO            /Congratulations/
   header X_BAR          From =~ /sb55/