You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spamassassin.apache.org by du...@apache.org on 2009/03/02 23:49:51 UTC
svn commit: r749462 - in /spamassassin/trunk: ./ build/ lib/Mail/ lib/Mail/SpamAssassin/ lib/Mail/SpamAssassin/Conf/ lib/Mail/SpamAssassin/Plugin/ masses/ rules/ t/

Author: duncf
Date: Mon Mar  2 22:49:50 2009
New Revision: 749462

URL: http://svn.apache.org/viewvc?rev=749462&view=rev
Log:
Bug 6012: Improve reuse logic to better simulate rule hits. This moves
the reuse logic to a plugin and adds several plugin hooks.

Added:
    spamassassin/trunk/lib/Mail/SpamAssassin/Plugin/Reuse.pm
    spamassassin/trunk/t/reuse.t
Modified:
    spamassassin/trunk/MANIFEST
    spamassassin/trunk/build/mkrules
    spamassassin/trunk/build/parse-rules-for-masses
    spamassassin/trunk/lib/Mail/SpamAssassin.pm
    spamassassin/trunk/lib/Mail/SpamAssassin/Conf.pm
    spamassassin/trunk/lib/Mail/SpamAssassin/Conf/Parser.pm
    spamassassin/trunk/lib/Mail/SpamAssassin/PerMsgStatus.pm
    spamassassin/trunk/lib/Mail/SpamAssassin/Plugin.pm
    spamassassin/trunk/masses/mass-check
    spamassassin/trunk/rules/20_dnsbl_tests.cf
    spamassassin/trunk/rules/20_net_tests.cf
    spamassassin/trunk/rules/25_dcc.cf
    spamassassin/trunk/rules/25_pyzor.cf
    spamassassin/trunk/rules/25_razor2.cf
    spamassassin/trunk/rules/25_spf.cf
    spamassassin/trunk/rules/25_uribl.cf

Modified: spamassassin/trunk/MANIFEST
URL: http://svn.apache.org/viewvc/spamassassin/trunk/MANIFEST?rev=749462&r1=749461&r2=749462&view=diff
==============================================================================
--- spamassassin/trunk/MANIFEST (original)
+++ spamassassin/trunk/MANIFEST Mon Mar  2 22:49:50 2009
@@ -95,6 +95,7 @@
 lib/Mail/SpamAssassin/Plugin/RelayCountry.pm
 lib/Mail/SpamAssassin/Plugin/RelayEval.pm
 lib/Mail/SpamAssassin/Plugin/ReplaceTags.pm
+lib/Mail/SpamAssassin/Plugin/Reuse.pm
 lib/Mail/SpamAssassin/Plugin/Rule2XSBody.pm
 lib/Mail/SpamAssassin/Plugin/SPF.pm
 lib/Mail/SpamAssassin/Plugin/Shortcircuit.pm
@@ -371,6 +372,7 @@
 t/report_safe.t
 t/reportheader.t
 t/reportheader_8bit.t
+t/reuse.t
 t/rule_multiple.t
 t/rule_names.t
 t/rule_tests.t

Modified: spamassassin/trunk/build/mkrules
URL: http://svn.apache.org/viewvc/spamassassin/trunk/build/mkrules?rev=749462&r1=749461&r2=749462&view=diff
==============================================================================
--- spamassassin/trunk/build/mkrules (original)
+++ spamassassin/trunk/build/mkrules Mon Mar  2 22:49:50 2009
@@ -412,9 +412,6 @@
   while (<IN>) {
     my $orig = $_;
 
-    s/^#reuse/reuse/;   # TODO - dirty hack.  we need to fix this to just be
-    # a keyword which the engine ignores, this is absurd! 
-
     s/#.*$//g; s/^\s+//; s/\s+$//;
 
     # drop comments/blank lines from output
@@ -673,9 +670,6 @@
   while (<IN>) {
     my $orig = $_;
 
-    s/^#reuse/reuse/;   # TODO - dirty hack.  we need to fix this to just be
-    # a keyword which the engine ignores, this is absurd! 
-
     s/#.*$//g; s/^\s+//; s/\s+$//;
 
     # drop comments/blank lines from output

Modified: spamassassin/trunk/build/parse-rules-for-masses
URL: http://svn.apache.org/viewvc/spamassassin/trunk/build/parse-rules-for-masses?rev=749462&r1=749461&r2=749462&view=diff
==============================================================================
--- spamassassin/trunk/build/parse-rules-for-masses (original)
+++ spamassassin/trunk/build/parse-rules-for-masses Mon Mar  2 22:49:50 2009
@@ -56,7 +56,6 @@
 $scoreset = 0 if ( !defined $scoreset );
 
 my $rules = { };
-my $reuse = { };
 $rules->{_scoreset} = $scoreset;
 readrules(@rulesdirs);
 
@@ -94,9 +93,6 @@
           $scores_mutable = 1;
         }
 
-	# oh, this is a dirty dirty hack, but we don't need this at runtime
-	s/^#reuse/reuse/;
-
         s/#.*$//g; s/^\s+//; s/\s+$//; next if /^$/;
 
 	# TODO: this could be overwriting stuff
@@ -105,11 +101,16 @@
           $lang = $1;
         }
 
-        if (/^(header|rawbody|body|full|uri|meta|mimeheader)\s+(\S+)\s+(.*)$/) {
+        if (/^(header|rawbody|body|full|uri|meta|mimeheader|reuse)\s+(\S+)\s+(.*)$/) {
           my $type = $1;
           my $name = $2;
           my $val = $3;
 
+          if (exists $rules->{$name}->{type} && $type eq 'reuse') {
+            # "reuse" should be skipped if we already have a rule
+            next;
+          }
+
           $rules->{$name} ||= { };
           $rules->{$name}->{type} = $type;
           $rules->{$name}->{lang} = $lang;
@@ -143,16 +144,7 @@
           $rules->{$name}->{score} = $score;
           $rules->{$name}->{mutable} = $scores_mutable;
 
-        } elsif (/^reuse\s+(.*)$/) {
-	  my ($new, @old) = split(' ', $1);
-	  push @old, $new;
-	  for my $old (@old) {
-	    $reuse->{$old} ||= { };
-	    $reuse->{$old}->{reuse} = $new;
-	  }
-	  $reuse->{$new} ||= { };
-	  $reuse->{$new}->{skip} = 1;
-	}
+        }
       }
       close IN;
     }
@@ -218,7 +210,7 @@
   print OUT "# dumped at ".`date`."\n";
 
   $Data::Dumper::Purity = 1;
-  print OUT Data::Dumper->Dump ([$rules, $scores, $reuse], ['*rules', '*scores', '*reuse']);
+  print OUT Data::Dumper->Dump ([$rules, $scores], ['*rules', '*scores']);
 
   print OUT "1;";
   close OUT;

Modified: spamassassin/trunk/lib/Mail/SpamAssassin.pm
URL: http://svn.apache.org/viewvc/spamassassin/trunk/lib/Mail/SpamAssassin.pm?rev=749462&r1=749461&r2=749462&view=diff
==============================================================================
--- spamassassin/trunk/lib/Mail/SpamAssassin.pm (original)
+++ spamassassin/trunk/lib/Mail/SpamAssassin.pm Mon Mar  2 22:49:50 2009
@@ -220,6 +220,11 @@
 override the settings for C<rules_filename>, C<site_rules_filename>,
 and C<userprefs_filename>.
 
+=item pre_config_text
+
+Similar to C<config_text>, this text is placed before config_text to allow an
+override of config files.
+
 =item post_config_text
 
 Similar to C<config_text>, this text is placed after config_text to allow an
@@ -1650,7 +1655,12 @@
     }
   }
 
-  $self->{config_text} .= $self->{post_config_text} if ($self->{post_config_text});
+  if ($self->{pre_config_text}) {
+    $self->{config_text} = $self->{pre_config_text} . $self->{config_text};
+  }
+  if ($self->{post_config_text}) {
+    $self->{config_text} .= $self->{post_config_text};
+  }
 
   if ($self->{config_text} !~ /\S/) {
     my $m = "config: no configuration text or files found! do you need to run 'sa-update'?\n";

Modified: spamassassin/trunk/lib/Mail/SpamAssassin/Conf.pm
URL: http://svn.apache.org/viewvc/spamassassin/trunk/lib/Mail/SpamAssassin/Conf.pm?rev=749462&r1=749461&r2=749462&view=diff
==============================================================================
--- spamassassin/trunk/lib/Mail/SpamAssassin/Conf.pm (original)
+++ spamassassin/trunk/lib/Mail/SpamAssassin/Conf.pm Mon Mar  2 22:49:50 2009
@@ -102,7 +102,7 @@
 $TYPE_HEAD_TESTS $TYPE_HEAD_EVALS
 $TYPE_BODY_TESTS $TYPE_BODY_EVALS $TYPE_FULL_TESTS $TYPE_FULL_EVALS
 $TYPE_RAWBODY_TESTS $TYPE_RAWBODY_EVALS $TYPE_URI_TESTS $TYPE_URI_EVALS
-$TYPE_META_TESTS $TYPE_RBL_EVALS
+$TYPE_META_TESTS $TYPE_RBL_EVALS $TYPE_EMPTY_TESTS
 };
 
 @ISA = qw();
@@ -121,6 +121,7 @@
 $TYPE_URI_EVALS     = 0x0011;
 $TYPE_META_TESTS    = 0x0012;
 $TYPE_RBL_EVALS     = 0x0013;
+$TYPE_EMPTY_TESTS   = 0x0014;
 
 my @rule_types = ("body_tests", "uri_tests", "uri_evals",
                   "head_tests", "head_evals", "body_evals", "full_tests",
@@ -2325,6 +2326,38 @@
     }
   });
 
+=item reuse SYMBOLIC_TEST_NAME [ OLD_SYMBOLIC_TEST_NAME_1 ... ]
+
+Defines the name of a test that should be "reused" during the scoring
+process. If a message has an X-Spam-Status header that shows a hit for
+this rule or any of the old rule names given, a hit will be added for
+this rule when B<mass-check --reuse> is used. Examples:
+
+C<reuse SPF_PASS>
+
+C<reuse MY_NET_RULE_V2 MY_NET_RULE_V1>
+
+The actual logic for reuse tests is done by
+B<Mail::SpamAssassin::Plugin::Reuse>.
+
+=cut
+
+  push (@cmds, {
+    setting => 'reuse',
+    is_priv => 1,
+    code => sub {
+      my ($self, $key, $value, $line) = @_;
+      if ($value !~ /\s*(\w+)(?:\s+(?:\w+(?:\s+\w+)*))?\s*$/) {
+        return $Mail::SpamAssassin::Conf::INVALID_VALUE;
+      }
+      my $rule_name = $1;
+      # don't overwrite tests, just define them so scores, priorities work
+      if (!exists $self->{tests}->{$rule_name}) {
+        $self->{parser}->add_test($rule_name, undef, $TYPE_EMPTY_TESTS);
+      }
+    }
+  });
+
 =item tflags SYMBOLIC_TEST_NAME [ {net|nice|learn|userconf|noautolearn|multiple} ]
 
 Used to set flags on a test.  These flags are used in the

Modified: spamassassin/trunk/lib/Mail/SpamAssassin/Conf/Parser.pm
URL: http://svn.apache.org/viewvc/spamassassin/trunk/lib/Mail/SpamAssassin/Conf/Parser.pm?rev=749462&r1=749461&r2=749462&view=diff
==============================================================================
--- spamassassin/trunk/lib/Mail/SpamAssassin/Conf/Parser.pm (original)
+++ spamassassin/trunk/lib/Mail/SpamAssassin/Conf/Parser.pm Mon Mar  2 22:49:50 2009
@@ -699,6 +699,11 @@
 
   # note: this function is called once for system-wide configuration
   # with $isuserconf set to 0, then again for user conf with $isuserconf set to 1.
+  if (!$isuserconf) {
+    $conf->{main}->call_plugins("finish_parsing_start", { conf => $conf });
+  } else {
+    $conf->{main}->call_plugins("user_conf_parsing_start", { conf => $conf });
+  }
 
   $self->trace_meta_dependencies();
   $self->fix_priorities();
@@ -773,6 +778,8 @@
       elsif ($type == $Mail::SpamAssassin::Conf::TYPE_FULL_TESTS) {
         $conf->{full_tests}->{$priority}->{$name} = $text;
       }
+      elsif ($type == $Mail::SpamAssassin::Conf::TYPE_EMPTY_TESTS) {
+      }
       else {
         $self->lint_warn("unknown type $type for $name: $text", $name);
       }
@@ -782,7 +789,6 @@
   $self->lint_trusted_networks();
 
   if (!$isuserconf) {
-    # named this way just in case we ever want a "finish_parsing_start"
     $conf->{main}->call_plugins("finish_parsing_end", { conf => $conf });
   } else {
     $conf->{main}->call_plugins("user_conf_parsing_end", { conf => $conf });
@@ -880,7 +886,10 @@
   my %dups;
   while (my ($name, $text) = each %{$conf->{tests}}) {
     my $type = $conf->{test_types}->{$name};
-    next if ($type & 1); # skip eval tests
+
+    # skip eval and empty tests
+    next if ($type & 1) ||
+      ($type eq $Mail::SpamAssassin::Conf::TYPE_EMPTY_TESTS);
 
     my $tf = ($conf->{tflags}->{$name}||''); $tf =~ s/\s+/ /gs;
     # ensure similar, but differently-typed, rules are not marked as dups;

Modified: spamassassin/trunk/lib/Mail/SpamAssassin/PerMsgStatus.pm
URL: http://svn.apache.org/viewvc/spamassassin/trunk/lib/Mail/SpamAssassin/PerMsgStatus.pm?rev=749462&r1=749461&r2=749462&view=diff
==============================================================================
--- spamassassin/trunk/lib/Mail/SpamAssassin/PerMsgStatus.pm (original)
+++ spamassassin/trunk/lib/Mail/SpamAssassin/PerMsgStatus.pm Mon Mar  2 22:49:50 2009
@@ -2177,6 +2177,12 @@
 sub got_hit {
   my ($self, $rule, $area, %params) = @_;
 
+  my $score = $params{score} || $self->{conf}->{scores}->{$rule};
+
+  # adding a hit does nothing if we don't have a score -- we probably
+  # shouldn't have run it in the first place
+  return unless $score;
+
   # ensure that rule values always result in an *increase* of
   # $self->{tests_already_hit}->{$rule}:
   my $value = $params{value}; if (!$value || $value <= 0) { $value = 1; }
@@ -2194,7 +2200,7 @@
   $params{ruletype} ||= 'unknown';
 
   $self->_handle_hit($rule,
-            $params{score} || $self->{conf}->{scores}->{$rule},
+            $score,
             $area,
             $params{ruletype},
             $self->{conf}->get_description_for_rule($rule) || $rule);

Modified: spamassassin/trunk/lib/Mail/SpamAssassin/Plugin.pm
URL: http://svn.apache.org/viewvc/spamassassin/trunk/lib/Mail/SpamAssassin/Plugin.pm?rev=749462&r1=749461&r2=749462&view=diff
==============================================================================
--- spamassassin/trunk/lib/Mail/SpamAssassin/Plugin.pm (original)
+++ spamassassin/trunk/lib/Mail/SpamAssassin/Plugin.pm Mon Mar  2 22:49:50 2009
@@ -198,6 +198,30 @@
 C<$options> hash reference above.   By storing it on C<conf>, this allows
 per-user and system-wide configuration precedence to be dealt with correctly.
 
+=item $plugin->finish_parsing_start ( { options ... } )
+
+Signals that the system-wide configuration has been completely read,
+but internal data structures are not yet created. It is possible to
+use this hook to dynamically change the configuration already read in
+or add new config options.
+
+C<options> is a reference to a hash containing these options:
+
+=over 4
+
+=item conf
+
+The C<Mail::SpamAssassin::Conf> object on which the configuration
+data should be stored.
+
+=back
+
+Note: there are no guarantees that the internal data structures of
+SpamAssassin will not change from release to release.  In particular to
+this plugin hook, if you modify the rules data structures in a
+third-party plugin, all bets are off until such time that an API is
+present for modifying that configuration data.
+
 =item $plugin->finish_parsing_end ( { options ... } )
 
 Signals that the system-wide configuration parsing has just finished, and
@@ -220,6 +244,32 @@
 third-party plugin, all bets are off until such time that an API is
 present for modifying that configuration data.
 
+=item $plugin->user_conf_parsing_start ( { options ... } )
+
+Signals that the per-user configuration has been completely read, but
+not converted to internal data structures. It is possible to use this
+hook to dynamically change the configuration already read in or add
+new config options.
+
+If C<allow_user_rules> is enabled in the configuration, it is possible
+that additional rules have been added since the C<finish_parsing_start>
+plugin hook invocation was called.
+
+=over 4
+
+=item conf
+
+The C<Mail::SpamAssassin::Conf> object on which the configuration
+data should be stored.
+
+=back
+
+Note: there are no guarantees that the internal data structures of
+SpamAssassin will not change from release to release.  In particular to
+this plugin hook, if you modify the rules data structures in a
+third-party plugin, all bets are off until such time that an API is
+present for modifying that configuration data.
+
 =item $plugin->user_conf_parsing_end ( { options ... } )
 
 Signals that the per-user configuration parsing has just finished, and
@@ -238,6 +288,12 @@
 
 =back
 
+Note: there are no guarantees that the internal data structures of
+SpamAssassin will not change from release to release.  In particular to
+this plugin hook, if you modify the rules data structures in a
+third-party plugin, all bets are off until such time that an API is
+present for modifying that configuration data.
+
 =item $plugin->signal_user_changed ( { options ... } )
 
 Signals that the current user has changed for a new one.

Added: spamassassin/trunk/lib/Mail/SpamAssassin/Plugin/Reuse.pm
URL: http://svn.apache.org/viewvc/spamassassin/trunk/lib/Mail/SpamAssassin/Plugin/Reuse.pm?rev=749462&view=auto
==============================================================================
--- spamassassin/trunk/lib/Mail/SpamAssassin/Plugin/Reuse.pm (added)
+++ spamassassin/trunk/lib/Mail/SpamAssassin/Plugin/Reuse.pm Mon Mar  2 22:49:50 2009
@@ -0,0 +1,241 @@
+=head1 NAME
+
+Mail::SpamAssassin::Plugin::Reuse - For reusing old rule hits during a mass-check
+
+=head1 SYNOPSIS
+
+  loadplugin    Mail::SpamAssassin::Plugin::Reuse
+
+  ifplugin      Mail::SpamAssassin::Plugin::Reuse
+
+  reuse NETWORK_RULE [ NETWORK_RULE_OLD_NAME ]
+
+  endif
+
+=head1 DESCRIPTION
+
+The purpose of this plugin is to work in conjunction with B<mass-check
+--reuse> to map rules hit in input messages to rule hits in the
+mass-check output.
+
+=cut
+
+package Mail::SpamAssassin::Plugin::Reuse;
+
+use bytes;
+use strict;
+use warnings;
+
+use Mail::SpamAssassin::Conf;
+use Mail::SpamAssassin::Logger;
+
+use vars qw(@ISA);
+@ISA = qw(Mail::SpamAssassin::Plugin);
+
+# constructor
+sub new {
+  my $invocant = shift;
+  my $samain = shift;
+
+  # some boilerplate...
+  my $class = ref($invocant) || $invocant;
+  my $self = $class->SUPER::new($samain);
+  bless ($self, $class);
+
+  $self->set_config($samain->{conf});
+  # make sure we run last (or close) of the finish_parsing_start since
+  # we need all other rules to be defined
+  $self->register_method_priority("finish_parsing_start", 100);
+  return $self;
+}
+
+sub set_config {
+  my ($self, $conf) = @_;
+  my @cmds = ();
+
+  # reuse CURRENT_NAME ADDITIONAL_NAMES_IN_INPUT ...
+  # e.g.
+  # reuse NET_TEST_V1 NET_TEST_V0
+
+  push (@cmds, { setting => 'reuse',
+                 code => sub {
+                   my ($conf, $key, $value, $line) = @_;
+
+                   if ($value !~ /\s*(\w+)(?:\s+(\w+(?:\s+\w+)*))?\s*$/) {
+                     return $Mail::SpamAssassin::Conf::INVALID_VALUE;
+                   }
+
+                   my $new_name = $1;
+		   my @old_names = ($new_name);
+		   if ($2) {
+		     push @old_names, split (' ', $2);
+		   }
+
+                   dbg("reuse: read rule, old: @old_names new: $new_name");
+
+                   foreach my $old (@old_names) {
+                     push @{$conf->{reuse_tests}->{$new_name}}, $old;
+                   }
+
+               }});
+
+
+  $conf->{parser}->register_commands(\@cmds);
+}
+
+sub finish_parsing_start {
+  my ($self, $opts) = @_;
+
+  my $conf = $opts->{conf};
+
+  dbg("reuse: finish_parsing_start called");
+
+  return 0 if (!exists $conf->{reuse_tests});
+
+  foreach my $rule_name (keys %{$conf->{reuse_tests}}) {
+
+    # If the rule does not exist, add a new EMPTY test, set default score
+    if (!exists $conf->{tests}->{$rule_name}) {
+      dbg("reuse: $rule_name does not exist, adding empty test");
+      $conf->{parser}->add_test($rule_name, undef, $Mail::SpamAssassin::Conf::TYPE_EMPTY_TESTS);
+    }
+    if (!exists $conf->{scores}->{$rule_name}) {
+      my $set_score = ($rule_name =~/^T_/) ? 0.01 : 1.0;
+      $set_score = -$set_score if ( ($conf->{tflags}->{$rule_name}||'') =~ /\bnice\b/ );
+      foreach my $ss (0..3) {
+        $conf->{scoreset}->[$ss]->{$rule_name} = $set_score;
+      }
+    }
+
+    # Figure out when to add any hits -- grab priority and "stage"
+    my $priority = $conf->{priority}->{$rule_name} || 0;
+    my $stage = $self->_get_stage_from_rule($opts->{conf}, $rule_name);
+    $conf->{reuse_tests_order}->{$rule_name} = [ $priority, $stage ];
+
+  }
+}
+
+sub check_start {
+  my ($self, $opts) = @_;
+
+  my $pms = $opts->{permsgstatus};
+
+  # Can we reuse?
+  my $msg = $pms->get_message();
+
+  unless (exists $msg->{metadata}->{reuse_tests_hit}) {
+    dbg("reuse: no old test hits passed in");
+    return 0;
+  }
+  my $old_hash = $msg->{metadata}->{reuse_tests_hit};
+
+  # now go through the rules and priorities and figure out which ones
+  # need to be disabled
+  foreach my $rule (keys %{$pms->{conf}->{reuse_tests}}) {
+
+    dbg("reuse: looking at rule $rule");
+    my ($priority, $stage) = @{$pms->{conf}->{reuse_tests_order}->{$rule}};
+
+    # score set could change after check_start but before we add hits,
+    # so we need to disable the rule in all sets
+    foreach my $ss (0..3) {
+      if (exists $pms->{conf}->{scoreset}->[$ss]->{$rule}) {
+	dbg("reuse: disabling rule $rule in score set $ss");
+	$pms->{reuse_old_scores}->{$rule}->[$ss] =
+	  $pms->{conf}->{scoreset}->[$ss]->{$rule};
+	$pms->{conf}->{scoreset}->[$ss]->{$rule} = 0;
+      }
+    }
+
+    # now, check for hits
+  OLD: foreach my $old_test (@{$pms->{conf}->{reuse_tests}->{$rule}}) {
+      dbg("reuse: looking for rule $old_test");
+      if ($old_hash->{$old_test}) {
+        push @{$pms->{reuse_hits_to_add}->{"$priority $stage"}}, $rule;
+        dbg("reuse: rule $rule hit, will add at priority $priority, stage " .
+	    "$stage");
+        last OLD;
+      }
+    }
+  }
+}
+
+sub check_end {
+  my ($self, $opts) = @_;
+
+  my $pms = $opts->{permsgstatus};
+
+  foreach my $disabled_rule (keys %{$pms->{reuse_old_scores}}) {
+      foreach my $ss (0..3) {
+	next unless exists $pms->{conf}->{scoreset}->[$ss]->{$disabled_rule};
+	$pms->{conf}->{scoreset}->[$ss]->{$disabled_rule} =
+	  $pms->{reuse_old_scores}->{$disabled_rule}->[$ss];
+      }
+  }
+
+  delete $pms->{reuse_old_scores};
+}
+
+sub start_rules {
+  my ($self, $opts) = @_;
+
+  return $self->_add_hits($opts->{permsgstatus}, $opts->{priority},
+			  $opts->{ruletype});
+}
+
+sub _add_hits {
+  my ($self, $pms, $priority, $stage) = @_;
+
+  return unless exists $pms->{reuse_hits_to_add}->{"$priority $stage"};
+  foreach my $rule (@{$pms->{reuse_hits_to_add}->{"$priority $stage"}}) {
+    # Add hit even if rule was originally disabled
+    my $ss = $pms->{conf}->get_score_set();
+    $pms->{conf}->{scores}->{$rule} =
+      $pms->{reuse_old_scores}->{$rule}->[$ss] || 0.001;
+
+    dbg("reuse: registering hit for $rule: score: " .
+	$pms->{conf}->{scores}->{$rule});
+    $pms->got_hit($rule);
+
+    $pms->{conf}->{scores}->{$rule} = 0;
+  }
+}
+
+my %type_to_stage = (
+		     $Mail::SpamAssassin::Conf::TYPE_HEAD_TESTS    => "head",
+		     $Mail::SpamAssassin::Conf::TYPE_HEAD_EVALS    => "eval",
+		     $Mail::SpamAssassin::Conf::TYPE_BODY_TESTS    => "body",
+		     $Mail::SpamAssassin::Conf::TYPE_BODY_EVALS    => "eval",
+		     $Mail::SpamAssassin::Conf::TYPE_FULL_TESTS    => "full",
+		     $Mail::SpamAssassin::Conf::TYPE_FULL_EVALS    => "eval",
+		     $Mail::SpamAssassin::Conf::TYPE_RAWBODY_TESTS => "rawbody",
+		     $Mail::SpamAssassin::Conf::TYPE_RAWBODY_EVALS => "eval",
+		     $Mail::SpamAssassin::Conf::TYPE_URI_TESTS     => "uri",
+		     $Mail::SpamAssassin::Conf::TYPE_URI_EVALS     => "eval",
+		     $Mail::SpamAssassin::Conf::TYPE_META_TESTS    => "meta",
+		     $Mail::SpamAssassin::Conf::TYPE_RBL_EVALS     => "eval",
+		    );
+
+sub _get_stage_from_rule {
+  my  ($self, $conf, $rule) = @_;
+
+  my $type = $conf->{test_types}->{$rule};
+  if ($type && $type == $Mail::SpamAssassin::Conf::TYPE_EMPTY_TESTS) {
+    # this is a "fake" rule... see if the rule "text"/"definition" is
+    # the name of the "parent" rule"
+    my $parent = $conf->{tests}->{$rule};
+    if ($parent) {
+      $type = $conf->{test_types}->{$parent};
+    }
+  }
+  if ($type && exists $type_to_stage{$type}) {
+    return $type_to_stage{$type};
+  }
+  else {
+    # Run before the meta rules run so that they can use these hits as
+    # inputs.
+    return "meta";
+  }
+}
+
+

Modified: spamassassin/trunk/masses/mass-check
URL: http://svn.apache.org/viewvc/spamassassin/trunk/masses/mass-check?rev=749462&r1=749461&r2=749462&view=diff
==============================================================================
--- spamassassin/trunk/masses/mass-check (original)
+++ spamassassin/trunk/masses/mass-check Mon Mar  2 22:49:50 2009
@@ -44,6 +44,7 @@
                 (default = extract all SpamAssassin-encapsulated mails)
   --lint        check rules for syntax before running
   --cf='config line'  Additional line of configuration
+  --pre='config line' Additional line of ".pre" (prepended to configuration)
   --run_post_scan='command'  Run the named command after the 'scan' stage,
                 before starting the 'run' stage
 
@@ -158,8 +159,8 @@
             $opt_cs_ssl $opt_run_post_scan $opt_cs_verbose %client_caches
             %server_caches @cache_tmp_files %min_other_caches
             %unique_cache_completed $opt_cs_schedule_cache $opt_cs_cache
-            $opt_cs_cachedir $opt_cs_max_tries
-	    $tmpfd %rules %reuse %orig_conf %reuse_conf $reuse_rules_loaded_p);
+            $opt_cs_cachedir $opt_cs_max_tries $opt_pre
+	    $tmpfd);
 
 use FindBin;
 
@@ -200,8 +201,8 @@
 $opt_hamlog = "ham.log";
 $opt_spamlog = "spam.log";
 $opt_learn = 0;
-$reuse_rules_loaded_p = 0;
 $opt_cf = [];
+$opt_pre = [];
 
 my $rcvd_bytes = 0;
 my $sent_bytes = 0;
@@ -224,7 +225,8 @@
            "cs_schedule_cache", "cs_cache", "cs_cachedir=s", "cs_max_tries=i",
 	   "before=s" => \&deal_with_before_after,
 	   "after=s" => \&deal_with_before_after,
-           'cf=s' => \@{$opt_cf},
+	   "cf=s" => \@{$opt_cf},
+	   "pre=s" => \@{$opt_pre},
 	   "dir" => sub { $opt_format = "dir"; },
 	   "file" => sub { $opt_format = "file"; },
 	   "mbox" => sub { $opt_format = "mbox"; },
@@ -241,23 +243,6 @@
   die "IO::Socket::SSL required for --cs_ssl!\n";
 }
 
-# rules.pl is for the --reuse option, score set doesn't matter
-if ($opt_reuse) {
-  my $rules_path = "$FindBin::Bin/tmp/rules$$.pl";
-  # some people specify paths relatively, whereas this needs an absolute path,
-  # so "do the right thing"(tm).
-  my $abs_opt_c = File::Spec->rel2abs($opt_c);
-  system("cd $FindBin::Bin; perl ../build/parse-rules-for-masses -d $abs_opt_c -o $rules_path");
-
-  unless (-s $rules_path > 130) {
-    die '--reuse configuration error detected; rules.pl file size is too small'.
-        ' ('.(-s $rules_path)." bytes)\nrules.pl path: $rules_path\n";
-  }
-
-  require $rules_path;
-  unlink $rules_path;
-}
-
 if ($opt_noisy) {
   $opt_progress = 1;        # implies --progress
 }
@@ -290,9 +275,6 @@
   exit;
 }
 
-# either --net or --reuse means we should use set1/set3
-my $use_net_rules = $opt_net || $opt_reuse;
-
 # --lint
 if ($opt_lint) {
   # In theory we could probably use the same spamtest object as below,
@@ -325,23 +307,9 @@
   # the wire at all, and in fact we may be running without a net connection
   push @{$opt_cf}, "dns_available yes\n";
 
-  # --reuse without --net means we need to just zero ALL net rules; skip net
-  # lookups entirely except for the reused ones.  Do this before constructing
-  # the Mail::SpamAssassin object to ensure no network-rules stuff is compiled
-  # in compile_now().  (This config is for the reuse=no case.  We create a
-  # reuse=yes config after compile_now().)
-  if (!$opt_net) {
-    my @zero = (sort grep {
-          (ref($rules{$_}) eq 'HASH')
-                && $rules{$_}->{tflags}
-                && $rules{$_}->{tflags} =~ /\bnet\b/; 
-        } keys %rules);
-
-    foreach my $r (@zero) {
-      push @{$opt_cf}, "score $r 0\n";
-      # warn "--reuse/!--net zeroed $r";
-    }
-  }
+  # need to load M:SA:Plugin:Reuse
+  push @{$opt_pre}, "loadplugin Mail::SpamAssassin::Plugin::Reuse\n";
+
 }
 
 my $user_prefs = "$opt_p/user_prefs";
@@ -355,9 +323,10 @@
     'userstate_dir'     		=> $opt_p,
     'save_pattern_hits'  		=> $opt_loghits,
     'dont_copy_prefs'   		=> 1,
-    'local_tests_only'   		=> $use_net_rules ? 0 : 1,
+    'local_tests_only'   		=> $opt_net ? 0 : 1,
     'only_these_rules'   		=> $opt_rules,
     'ignore_safety_expire_timeout'	=> 1,
+    'pre_config_text'                   => join("\n", @{$opt_pre})."\n",
     'post_config_text'                  => join("\n", @{$opt_cf})."\n",
     PREFIX				=> '',
     DEF_RULES_DIR        		=> $opt_c,
@@ -370,35 +339,6 @@
 $spamtest->read_scoreonly_config("$FindBin::Bin/mass-check.cf");
 $spamtest->read_scoreonly_config($user_prefs);
 
-# generated user_prefs
-if ($opt_reuse) {
-  # copy current prefs if it exists
-  $spamtest->copy_config(undef, \%orig_conf);
-
-  # zeroed scores to reuse_prefs
-  my @zero = (sort grep {
-          defined $reuse{$_}->{skip} 
-        } keys %reuse);
-  zero_rule_scores(@zero);
-
-  $spamtest->copy_config(undef, \%reuse_conf);
-  $reuse_rules_loaded_p = 1;
-}
-
-sub zero_rule_scores {
-  my @zero = @_;
-
-  my $pfile = "$FindBin::Bin/tmp/reuse_prefs";
-  open(PREFS, ">$pfile") or die "Unable to open $pfile: $!\n".
-            "Needed for --reuse to work properly";
-  for my $rule (@zero) {
-    print PREFS "score $rule 0\n";
-  }
-  close PREFS or die "failed to write $pfile";
-  $spamtest->read_scoreonly_config($pfile);
-  unlink $pfile;
-}
-
 my $who = `id -un 2>/dev/null`;
 my $where = `uname -n 2>/dev/null`;
 my $when = `date -u`;
@@ -727,34 +667,11 @@
 
   # get X-Spam-Status: header for rule hit resue
   my $x_spam_status;
+  my $reusing;
   if ($opt_reuse) {
     $x_spam_status = $ma->get_header("X-Spam-Status");
     $x_spam_status and $x_spam_status =~ s/,\s+/,/gs;
   }
-  my @previous; # previous hits, only set if $opt_reuse
-
-  if ($opt_reuse) {
-    if ($x_spam_status
-        && $x_spam_status =~ m/tests=(\S*)/
-        && $x_spam_status !~ /\bshortcircuit=(?:ham|spam|default)\b/)
-    {
-      push @previous, split(/,/, $1);
-
-      # we found previous tests, so move the reuse config into place
-      unless ($reuse_rules_loaded_p) {
-        $spamtest->copy_config(\%reuse_conf, undef);
-        $reuse_rules_loaded_p = 1;
-        dbg "mass-check: reusing hits";
-      }
-    }
-    else {
-      if ($reuse_rules_loaded_p) {
-        $spamtest->copy_config(\%orig_conf, undef);
-        $reuse_rules_loaded_p = 0;
-        dbg "mass-check: not reusing hits";
-      }
-    }
-  }
 
   # remove SpamAssassin markup, if present and the mail was spam
   my $header = $ma->get_header("Received");
@@ -766,6 +683,17 @@
     }
   }
 
+  if ($opt_reuse) {
+    if ($x_spam_status 
+        && $x_spam_status =~ m/tests=(\S*)/
+        && $x_spam_status !~ /\bshortcircuit=(?:ham|spam|default)\b/)
+    {
+      my @previous = split(/,/, $1);
+      $ma->{metadata}->{reuse_tests_hit} = { map {$_ => 1} @previous };
+      $reusing = 1;
+    }
+  }
+
   # plugin hook to cause us to skip messages
   my $skip = $spamtest->call_plugins("mass_check_skip_message", {
         class => $class,
@@ -859,17 +787,12 @@
     }
   }
 
-  if ($reuse_rules_loaded_p) {
-    push(@extra, "reuse=yes");
-  } else {
-    push(@extra, "reuse=no");
-  }
+  push(@extra, "reuse=" . ($reusing ? "yes" : "no"));
 
   # log the scoreset we're in
   {
     my $set = 0;
     if ($opt_net) { $set |= 1; }
-    if ($reuse_rules_loaded_p) { $set |= 1; }
     if ($status && defined $status->{bayes_score}) { $set |= 2; }
     push(@extra, "set=".$set);
   }
@@ -896,15 +819,7 @@
     my @tests;
     push @tests, split(/,/, $status->get_names_of_tests_hit());
     push @tests, split(/,/, $status->get_names_of_subtests_hit());
-    # hit reuse; $opt_reuse
-    if ($x_spam_status) {
-      # generate mapping of hits to remove hits that are marked as skip
-      @tests = grep { !$reuse{$_}->{skip} } @tests;
-      # add hits from previous
-      for (@previous) {
-	push(@tests, $reuse{$_}->{reuse}) if $reuse{$_}->{reuse};
-      }
-    }
+
     $tests = join(",", sort(@tests));
     $extra = join(",", @extra);
   }

Modified: spamassassin/trunk/rules/20_dnsbl_tests.cf
URL: http://svn.apache.org/viewvc/spamassassin/trunk/rules/20_dnsbl_tests.cf?rev=749462&r1=749461&r2=749462&view=diff
==============================================================================
--- spamassassin/trunk/rules/20_dnsbl_tests.cf (original)
+++ spamassassin/trunk/rules/20_dnsbl_tests.cf Mon Mar  2 22:49:50 2009
@@ -46,34 +46,34 @@
 header __RCVD_IN_NJABL		eval:check_rbl('njabl', 'combined.njabl.org.')
 describe __RCVD_IN_NJABL	Received via a relay in combined.njabl.org
 tflags __RCVD_IN_NJABL		net
-#reuse __RCVD_IN_NJABL
+reuse  __RCVD_IN_NJABL
 
 header RCVD_IN_NJABL_RELAY	eval:check_rbl_sub('njabl', '127.0.0.2')
 describe RCVD_IN_NJABL_RELAY	NJABL: sender is confirmed open relay
 tflags RCVD_IN_NJABL_RELAY	net
-#reuse RCVD_IN_NJABL_RELAY
+reuse  RCVD_IN_NJABL_RELAY
 
 # NJABL DUL: obsoleted by PBL (bug 5187)
 
 header RCVD_IN_NJABL_SPAM	eval:check_rbl_sub('njabl', '127.0.0.4')
 describe RCVD_IN_NJABL_SPAM	NJABL: sender is confirmed spam source
 tflags RCVD_IN_NJABL_SPAM	net
-#reuse RCVD_IN_NJABL_SPAM
+reuse  RCVD_IN_NJABL_SPAM
 
 header RCVD_IN_NJABL_MULTI	eval:check_rbl_sub('njabl', '127.0.0.5')
 describe RCVD_IN_NJABL_MULTI	NJABL: sent through multi-stage open relay
 tflags RCVD_IN_NJABL_MULTI	net
-#reuse RCVD_IN_NJABL_MULTI
+reuse  RCVD_IN_NJABL_MULTI
 
 header RCVD_IN_NJABL_CGI	eval:check_rbl_sub('njabl', '127.0.0.8')
 describe RCVD_IN_NJABL_CGI	NJABL: sender is an open formmail
 tflags RCVD_IN_NJABL_CGI	net
-#reuse RCVD_IN_NJABL_CGI
+reuse  RCVD_IN_NJABL_CGI
 
 header RCVD_IN_NJABL_PROXY	eval:check_rbl_sub('njabl', '127.0.0.9')
 describe RCVD_IN_NJABL_PROXY	NJABL: sender is an open proxy
 tflags RCVD_IN_NJABL_PROXY	net
-#reuse RCVD_IN_NJABL_PROXY
+reuse  RCVD_IN_NJABL_PROXY
 
 # ---------------------------------------------------------------------------
 # SORBS
@@ -85,53 +85,53 @@
 header __RCVD_IN_SORBS		eval:check_rbl('sorbs', 'dnsbl.sorbs.net.')
 describe __RCVD_IN_SORBS	SORBS: sender is listed in SORBS
 tflags __RCVD_IN_SORBS		net
-#reuse __RCVD_IN_SORBS
+reuse  __RCVD_IN_SORBS
 
 header RCVD_IN_SORBS_HTTP	eval:check_rbl_sub('sorbs', '127.0.0.2')
 describe RCVD_IN_SORBS_HTTP	SORBS: sender is open HTTP proxy server
 tflags RCVD_IN_SORBS_HTTP	net
-#reuse RCVD_IN_SORBS_HTTP
+reuse  RCVD_IN_SORBS_HTTP
 
 header RCVD_IN_SORBS_SOCKS	eval:check_rbl_sub('sorbs', '127.0.0.3')
 describe RCVD_IN_SORBS_SOCKS	SORBS: sender is open SOCKS proxy server
 tflags RCVD_IN_SORBS_SOCKS	net
-#reuse RCVD_IN_SORBS_SOCKS
+reuse  RCVD_IN_SORBS_SOCKS
 
 header RCVD_IN_SORBS_MISC	eval:check_rbl_sub('sorbs', '127.0.0.4')
 describe RCVD_IN_SORBS_MISC	SORBS: sender is open proxy server
 tflags RCVD_IN_SORBS_MISC	net
-#reuse RCVD_IN_SORBS_MISC
+reuse  RCVD_IN_SORBS_MISC
 
 header RCVD_IN_SORBS_SMTP	eval:check_rbl_sub('sorbs', '127.0.0.5')
 describe RCVD_IN_SORBS_SMTP	SORBS: sender is open SMTP relay
 tflags RCVD_IN_SORBS_SMTP	net
-#reuse RCVD_IN_SORBS_SMTP
+reuse  RCVD_IN_SORBS_SMTP
 
 # delist: $50 fee
 #header RCVD_IN_SORBS_SPAM	eval:check_rbl_sub('sorbs', '127.0.0.6')
 #describe RCVD_IN_SORBS_SPAM	SORBS: sender is a spam source
 #tflags RCVD_IN_SORBS_SPAM	net
-#reuse RCVD_IN_SORBS_SPAM
+#reuse  RCVD_IN_SORBS_SPAM	RCVD_IN_SORBS_SPAM
 
 header RCVD_IN_SORBS_WEB	eval:check_rbl_sub('sorbs', '127.0.0.7')
 describe RCVD_IN_SORBS_WEB	SORBS: sender is a abuseable web server
 tflags RCVD_IN_SORBS_WEB	net
-#reuse RCVD_IN_SORBS_WEB
+reuse  RCVD_IN_SORBS_WEB
 
 header RCVD_IN_SORBS_BLOCK	eval:check_rbl_sub('sorbs', '127.0.0.8')
 describe RCVD_IN_SORBS_BLOCK	SORBS: sender demands to never be tested
 tflags RCVD_IN_SORBS_BLOCK	net
-#reuse RCVD_IN_SORBS_BLOCK
+reuse  RCVD_IN_SORBS_BLOCK
 
 header RCVD_IN_SORBS_ZOMBIE	eval:check_rbl_sub('sorbs', '127.0.0.9')
 describe RCVD_IN_SORBS_ZOMBIE	SORBS: sender is on a hijacked network
 tflags RCVD_IN_SORBS_ZOMBIE	net
-#reuse RCVD_IN_SORBS_ZOMBIE
+reuse  RCVD_IN_SORBS_ZOMBIE
 
 header RCVD_IN_SORBS_DUL	eval:check_rbl('sorbs-lastexternal', 'dnsbl.sorbs.net.', '127.0.0.10')
 describe RCVD_IN_SORBS_DUL	SORBS: sent directly from dynamic IP address
 tflags RCVD_IN_SORBS_DUL	net
-#reuse RCVD_IN_SORBS_DUL
+reuse  RCVD_IN_SORBS_DUL
 
 # ---------------------------------------------------------------------------
 # Spamhaus SBL+XBL, now called Zen
@@ -142,25 +142,25 @@
 header __RCVD_IN_ZEN            eval:check_rbl('zen', 'zen.spamhaus.org.')
 describe __RCVD_IN_ZEN          Received via a relay in Spamhaus Zen
 tflags __RCVD_IN_ZEN            net
-#reuse __RCVD_IN_ZEN
+reuse  __RCVD_IN_ZEN
 
 # SBL is the Spamhaus Block List: http://www.spamhaus.org/sbl/
 header RCVD_IN_SBL              eval:check_rbl_sub('zen', '127.0.0.2')
 describe RCVD_IN_SBL            Received via a relay in Spamhaus SBL
 tflags RCVD_IN_SBL              net
-#reuse RCVD_IN_SBL
+reuse  RCVD_IN_SBL
 
 # XBL is the Exploits Block List: http://www.spamhaus.org/xbl/
 header RCVD_IN_XBL              eval:check_rbl('zen-lastexternal', 'zen.spamhaus.org.', '127.0.0.[45678]')
 describe RCVD_IN_XBL            Received via a relay in Spamhaus XBL
 tflags RCVD_IN_XBL              net
-#reuse RCVD_IN_XBL
+reuse  RCVD_IN_XBL
 
 # PBL is the Policy Block List: http://www.spamhaus.org/pbl/
 header RCVD_IN_PBL              eval:check_rbl('zen-lastexternal', 'zen.spamhaus.org.', '127.0.0.1[01]')
 describe RCVD_IN_PBL            Received via a relay in Spamhaus PBL
 tflags RCVD_IN_PBL              net
-#reuse RCVD_IN_PBL T_RCVD_IN_PBL_WITH_NJABL_DUL RCVD_IN_NJABL_DUL
+reuse  RCVD_IN_PBL		RCVD_IN_PBL T_RCVD_IN_PBL_WITH_NJABL_DUL RCVD_IN_NJABL_DUL
 
 # ---------------------------------------------------------------------------
 # RFC-Ignorant blacklists (both name and IP based)
@@ -171,25 +171,25 @@
 header DNS_FROM_RFC_DSN		eval:check_rbl_sub('rfci_envfrom', '127.0.0.2')
 describe DNS_FROM_RFC_DSN	Envelope sender in dsn.rfc-ignorant.org
 tflags DNS_FROM_RFC_DSN		net
-#reuse DNS_FROM_RFC_DSN
+reuse  DNS_FROM_RFC_DSN
 
 header DNS_FROM_RFC_BOGUSMX	eval:check_rbl_sub('rfci_envfrom', '127.0.0.8')
 describe DNS_FROM_RFC_BOGUSMX	Envelope sender in bogusmx.rfc-ignorant.org
 tflags DNS_FROM_RFC_BOGUSMX	net
-#reuse DNS_FROM_RFC_BOGUSMX
+reuse  DNS_FROM_RFC_BOGUSMX
 
 # bug 4628: these rules are too unreliable to assign scores to
 header __DNS_FROM_RFC_POST      eval:check_rbl_sub('rfci_envfrom', '127.0.0.3')
 tflags __DNS_FROM_RFC_POST      net
-#reuse __DNS_FROM_RFC_POST DNS_FROM_RFC_POST
+reuse  __DNS_FROM_RFC_POST	DNS_FROM_RFC_POST
 
 header __DNS_FROM_RFC_ABUSE     eval:check_rbl_sub('rfci_envfrom', '127.0.0.4')
 tflags __DNS_FROM_RFC_ABUSE     net
-#reuse __DNS_FROM_RFC_ABUSE DNS_FROM_RFC_ABUSE
+reuse  __DNS_FROM_RFC_ABUSE	DNS_FROM_RFC_ABUSE
 
 header __DNS_FROM_RFC_WHOIS     eval:check_rbl_sub('rfci_envfrom', '127.0.0.5')
 tflags __DNS_FROM_RFC_WHOIS     net
-#reuse __DNS_FROM_RFC_WHOIS DNS_FROM_RFC_WHOIS
+reuse  __DNS_FROM_RFC_WHOIS	DNS_FROM_RFC_WHOIS
 
 # ---------------------------------------------------------------------------
 # Now, single zone BLs follow:
@@ -198,7 +198,7 @@
 header DNS_FROM_AHBL_RHSBL	eval:check_rbl_envfrom('ahbl', 'rhsbl.ahbl.org.')
 describe DNS_FROM_AHBL_RHSBL	Envelope sender listed in dnsbl.ahbl.org
 tflags DNS_FROM_AHBL_RHSBL	net
-#reuse DNS_FROM_AHBL_RHSBL
+reuse  DNS_FROM_AHBL_RHSBL
 
 # ---------------------------------------------------------------------------
 # NOTE: donation tests, see README file for details
@@ -206,7 +206,7 @@
 header RCVD_IN_BL_SPAMCOP_NET	eval:check_rbl_txt('spamcop', 'bl.spamcop.net.', '(?i:spamcop)')
 describe RCVD_IN_BL_SPAMCOP_NET	Received via a relay in bl.spamcop.net
 tflags RCVD_IN_BL_SPAMCOP_NET	net
-#reuse RCVD_IN_BL_SPAMCOP_NET
+reuse  RCVD_IN_BL_SPAMCOP_NET
 
 # ---------------------------------------------------------------------------
 # NOTE: commercial tests, see README file for details
@@ -240,18 +240,18 @@
 header RCVD_IN_BSP_TRUSTED	eval:check_rbl_txt('bsp-firsttrusted', 'sa-trusted.bondedsender.org.', '(?i:bonded)')
 describe RCVD_IN_BSP_TRUSTED	Sender is in Sender Score Certified (trusted relay)
 tflags RCVD_IN_BSP_TRUSTED	net nice
-#reuse RCVD_IN_BSP_TRUSTED
+reuse  RCVD_IN_BSP_TRUSTED
 
 header RCVD_IN_BSP_OTHER	eval:check_rbl_txt('bsp-untrusted', 'sa-other.bondedsender.org.', '(?i:bonded)')
 describe RCVD_IN_BSP_OTHER	Sender is in Sender Score Certified (other relay)
 tflags RCVD_IN_BSP_OTHER	net nice
-#reuse RCVD_IN_BSP_OTHER
+reuse  RCVD_IN_BSP_OTHER
 
 # confirmed-opt-in list; see bug 5476
 header RCVD_IN_SSC_TRUSTED_COI    eval:check_rbl('ssc-firsttrusted', 'plus.bondedsender.org.')
 describe RCVD_IN_SSC_TRUSTED_COI  Sender is in Sender Score Certified (confirmed opt-in)
 tflags RCVD_IN_SSC_TRUSTED_COI    net nice
-#reuse RCVD_IN_SSC_TRUSTED_COI
+reuse  RCVD_IN_SSC_TRUSTED_COI
 
 # ---------------------------------------------------------------------------
 

Modified: spamassassin/trunk/rules/20_net_tests.cf
URL: http://svn.apache.org/viewvc/spamassassin/trunk/rules/20_net_tests.cf?rev=749462&r1=749461&r2=749462&view=diff
==============================================================================
--- spamassassin/trunk/rules/20_net_tests.cf (original)
+++ spamassassin/trunk/rules/20_net_tests.cf Mon Mar  2 22:49:50 2009
@@ -36,14 +36,14 @@
 meta DIGEST_MULTIPLE		RAZOR2_CHECK + DCC_CHECK + PYZOR_CHECK > 1
 describe DIGEST_MULTIPLE	Message hits more than one network digest check
 tflags DIGEST_MULTIPLE		net
-#reuse DIGEST_MULTIPLE
+reuse  DIGEST_MULTIPLE
 
 ifplugin Mail::SpamAssassin::Plugin::DNSEval
 
 header NO_DNS_FOR_FROM		eval:check_dns_sender()
 describe NO_DNS_FOR_FROM	Envelope sender has no MX or A DNS records
 tflags NO_DNS_FOR_FROM		net
-#reuse NO_DNS_FOR_FROM
+reuse  NO_DNS_FOR_FROM
 
 endif
 

Modified: spamassassin/trunk/rules/25_dcc.cf
URL: http://svn.apache.org/viewvc/spamassassin/trunk/rules/25_dcc.cf?rev=749462&r1=749461&r2=749462&view=diff
==============================================================================
--- spamassassin/trunk/rules/25_dcc.cf (original)
+++ spamassassin/trunk/rules/25_dcc.cf Mon Mar  2 22:49:50 2009
@@ -28,6 +28,6 @@
 full DCC_CHECK		eval:check_dcc()
 describe DCC_CHECK	Listed in DCC (http://rhyolite.com/anti-spam/dcc/)
 tflags DCC_CHECK	net
-#reuse DCC_CHECK
+reuse  DCC_CHECK
 
 endif

Modified: spamassassin/trunk/rules/25_pyzor.cf
URL: http://svn.apache.org/viewvc/spamassassin/trunk/rules/25_pyzor.cf?rev=749462&r1=749461&r2=749462&view=diff
==============================================================================
--- spamassassin/trunk/rules/25_pyzor.cf (original)
+++ spamassassin/trunk/rules/25_pyzor.cf Mon Mar  2 22:49:50 2009
@@ -28,6 +28,6 @@
 full PYZOR_CHECK	eval:check_pyzor()
 describe PYZOR_CHECK	Listed in Pyzor (http://pyzor.sf.net/)
 tflags PYZOR_CHECK	net
-#reuse PYZOR_CHECK
+reuse  PYZOR_CHECK
 
 endif

Modified: spamassassin/trunk/rules/25_razor2.cf
URL: http://svn.apache.org/viewvc/spamassassin/trunk/rules/25_razor2.cf?rev=749462&r1=749461&r2=749462&view=diff
==============================================================================
--- spamassassin/trunk/rules/25_razor2.cf (original)
+++ spamassassin/trunk/rules/25_razor2.cf Mon Mar  2 22:49:50 2009
@@ -28,7 +28,7 @@
 full RAZOR2_CHECK	eval:check_razor2()
 describe RAZOR2_CHECK	Listed in Razor2 (http://razor.sf.net/)
 tflags RAZOR2_CHECK	net
-#reuse RAZOR2_CHECK
+reuse  RAZOR2_CHECK
 
 lang de describe RAZOR2_CHECK	Gelistet im "Razor2"-System (http://razor.sf.net/)
 lang nl describe RAZOR2_CHECK	Gevonden in Razor2 (http://razor.sf.net/)
@@ -53,17 +53,17 @@
 
 full    RAZOR2_CF_RANGE_51_100  eval:check_razor2_range('','51','100')
 tflags  RAZOR2_CF_RANGE_51_100	net
-#reuse	RAZOR2_CF_RANGE_51_100
+reuse 	RAZOR2_CF_RANGE_51_100
 describe RAZOR2_CF_RANGE_51_100	Razor2 gives confidence level above 50%
 
 full	RAZOR2_CF_RANGE_E4_51_100	eval:check_razor2_range('4','51','100')
 tflags  RAZOR2_CF_RANGE_E4_51_100	net
-#reuse	RAZOR2_CF_RANGE_E4_51_100
+reuse 	RAZOR2_CF_RANGE_E4_51_100
 describe RAZOR2_CF_RANGE_E4_51_100	Razor2 gives engine 4 confidence level above 50%
 
 full	RAZOR2_CF_RANGE_E8_51_100	eval:check_razor2_range('8','51','100')
 tflags  RAZOR2_CF_RANGE_E8_51_100	net
-#reuse	RAZOR2_CF_RANGE_E8_51_100
+reuse 	RAZOR2_CF_RANGE_E8_51_100
 describe RAZOR2_CF_RANGE_E8_51_100	Razor2 gives engine 8 confidence level above 50%
 
 lang de describe RAZOR2_CF_RANGE_51_100	Razor2 Spam-Bewertung liegt zwischen 51 und 100

Modified: spamassassin/trunk/rules/25_spf.cf
URL: http://svn.apache.org/viewvc/spamassassin/trunk/rules/25_spf.cf?rev=749462&r1=749461&r2=749462&view=diff
==============================================================================
--- spamassassin/trunk/rules/25_spf.cf (original)
+++ spamassassin/trunk/rules/25_spf.cf Mon Mar  2 22:49:50 2009
@@ -68,13 +68,13 @@
 tflags SPF_HELO_SOFTFAIL	net
 
 # rules from earlier than current release that can be reused
-#reuse SPF_PASS
-#reuse SPF_FAIL
-#reuse SPF_SOFTFAIL
-#reuse SPF_HELO_PASS
-#reuse SPF_HELO_FAIL
-#reuse SPF_HELO_SOFTFAIL
-#reuse SPF_NEUTRAL
-#reuse SPF_HELO_NEUTRAL
+reuse  SPF_PASS
+reuse  SPF_FAIL
+reuse  SPF_SOFTFAIL
+reuse  SPF_HELO_PASS
+reuse  SPF_HELO_FAIL
+reuse  SPF_HELO_SOFTFAIL
+reuse  SPF_NEUTRAL
+reuse  SPF_HELO_NEUTRAL
 
 endif   # Mail::SpamAssassin::Plugin::SPF

Modified: spamassassin/trunk/rules/25_uribl.cf
URL: http://svn.apache.org/viewvc/spamassassin/trunk/rules/25_uribl.cf?rev=749462&r1=749461&r2=749462&view=diff
==============================================================================
--- spamassassin/trunk/rules/25_uribl.cf (original)
+++ spamassassin/trunk/rules/25_uribl.cf Mon Mar  2 22:49:50 2009
@@ -37,7 +37,7 @@
 body		URIBL_SBL	eval:check_uridnsbl('URIBL_SBL')
 describe	URIBL_SBL	Contains an URL listed in the SBL blocklist
 tflags		URIBL_SBL	net
-#reuse		URIBL_SBL
+reuse 		URIBL_SBL
 
 ###########################################################################
 ## SURBL
@@ -46,37 +46,37 @@
 body		URIBL_SC_SURBL	eval:check_uridnsbl('URIBL_SC_SURBL')
 describe	URIBL_SC_SURBL	Contains an URL listed in the SC SURBL blocklist
 tflags		URIBL_SC_SURBL	net
-#reuse		URIBL_SC_SURBL
+reuse 		URIBL_SC_SURBL
 
 urirhssub	URIBL_WS_SURBL	multi.surbl.org.	A   4
 body		URIBL_WS_SURBL	eval:check_uridnsbl('URIBL_WS_SURBL')
 describe	URIBL_WS_SURBL	Contains an URL listed in the WS SURBL blocklist
 tflags		URIBL_WS_SURBL	net
-#reuse		URIBL_WS_SURBL
+reuse 		URIBL_WS_SURBL
 
 urirhssub	URIBL_PH_SURBL	multi.surbl.org.	A   8
 body		URIBL_PH_SURBL	eval:check_uridnsbl('URIBL_PH_SURBL')
 describe	URIBL_PH_SURBL	Contains an URL listed in the PH SURBL blocklist
 tflags		URIBL_PH_SURBL	net
-#reuse		URIBL_PH_SURBL
+reuse 		URIBL_PH_SURBL
 
 urirhssub	URIBL_OB_SURBL	multi.surbl.org.	A   16
 body		URIBL_OB_SURBL	eval:check_uridnsbl('URIBL_OB_SURBL')
 describe	URIBL_OB_SURBL	Contains an URL listed in the OB SURBL blocklist
 tflags		URIBL_OB_SURBL	net
-#reuse		URIBL_OB_SURBL
+reuse 		URIBL_OB_SURBL
 
 urirhssub	URIBL_AB_SURBL	multi.surbl.org.	A   32
 body		URIBL_AB_SURBL	eval:check_uridnsbl('URIBL_AB_SURBL')
 describe	URIBL_AB_SURBL	Contains an URL listed in the AB SURBL blocklist
 tflags		URIBL_AB_SURBL	net
-#reuse		URIBL_AB_SURBL
+reuse 		URIBL_AB_SURBL
 
 urirhssub	URIBL_JP_SURBL	multi.surbl.org.	A   64
 body		URIBL_JP_SURBL	eval:check_uridnsbl('URIBL_JP_SURBL')
 describe	URIBL_JP_SURBL	Contains an URL listed in the JP SURBL blocklist
 tflags		URIBL_JP_SURBL	net
-#reuse		URIBL_JP_SURBL
+reuse 		URIBL_JP_SURBL
 
 ###########################################################################
 ## URIBL
@@ -85,19 +85,19 @@
 body		URIBL_BLACK	eval:check_uridnsbl('URIBL_BLACK')
 describe	URIBL_BLACK	Contains an URL listed in the URIBL blacklist
 tflags		URIBL_BLACK	net
-#reuse		URIBL_BLACK
+reuse 		URIBL_BLACK
 
 urirhssub	URIBL_GREY	multi.uribl.com.        A   4
 body		URIBL_GREY	eval:check_uridnsbl('URIBL_GREY')
 describe	URIBL_GREY	Contains an URL listed in the URIBL greylist
 tflags		URIBL_GREY	net
-#reuse		URIBL_GREY
+reuse 		URIBL_GREY
 
 urirhssub	URIBL_RED	multi.uribl.com.        A   8
 body		URIBL_RED	eval:check_uridnsbl('URIBL_RED')
 describe	URIBL_RED	Contains an URL listed in the URIBL redlist
 tflags		URIBL_RED	net
-#reuse		URIBL_RED
+reuse 		URIBL_RED
 
 ###########################################################################
 ## DOMAINS TO SKIP (KNOWN GOOD)

Added: spamassassin/trunk/t/reuse.t
URL: http://svn.apache.org/viewvc/spamassassin/trunk/t/reuse.t?rev=749462&view=auto
==============================================================================
--- spamassassin/trunk/t/reuse.t (added)
+++ spamassassin/trunk/t/reuse.t Mon Mar  2 22:49:50 2009
@@ -0,0 +1,217 @@
+#!/usr/bin/perl -w
+
+BEGIN {
+  if (-e 't/test_dir') { # if we are running "t/rule_tests.t", kluge around ...
+    chdir 't';
+  }
+
+  if (-e 'test_dir') {            # running from test directory, not ..
+    unshift(@INC, '../blib/lib');
+  }
+}
+
+my $prefix = '.';
+if (-e 'test_dir') {            # running from test directory, not ..
+  $prefix = '..';
+}
+
+use strict;
+use SATest; sa_t_init("reuse");
+use Test;
+
+use vars qw(%patterns %anti_patterns $perl_path &patterns_run_cb);
+
+use Mail::SpamAssassin;
+
+plan tests => 37;
+
+# Tests the following cases:
+# - No reuse: no change
+# - Reuse and no X-Spam-Status: no change
+# - Reuse on: metas work
+# - Reuse works with existing tests (disabled)
+# - Reuse works with non-existing tests (they get scores)
+# - Reuse handles multiple "old rule names"
+# - Reuse works in positive and negative cases
+# - Rules defined only by "reuse" can have arbitrary scores and priorities set
+
+tstlocalrules('
+
+# Check that order of reuse/body lines for BODY_RULE_* does not matter
+reuse  BODY_RULE_1
+
+body   BODY_RULE_1    /./
+score  BODY_RULE_1    1.0
+
+body   BODY_RULE_2    /\bfoobar\b/
+score  BODY_RULE_2    1.0
+
+header HEADER_RULE_1  Subject =~ /\bmessage\b/
+
+meta   META_RULE_1    BODY_RULE_1 || BODY_RULE_2
+
+reuse    BODY_RULE_2
+priority BODY_RULE_2  -2
+score    BODY_RULE_2  1.5
+
+reuse    NEW_RULE     OTHER_RULE
+priority NEW_RULE     -3
+score    NEW_RULE     0.5
+
+reuse    OTHER_RULE
+priority OTHER_RULE   -4
+
+reuse    RENAMED_RULE OLD_RULE_1 OLD_RULE_2 OLD_RULE_3
+
+reuse    SCORED_RULE  OLD_RULE_2
+score    SCORED_RULE  2.0
+priority SCORED_RULE -1
+
+');
+
+# reuse on, mail has no X-Spam-Status
+write_mail(0);
+ok_system("$perl_path -w ../masses/mass-check -c=log/localrules.tmp --reuse --file log/mail.txt > log/noxss.out");
+
+%patterns = (
+             'BODY_RULE_1' => 'BODY_RULE_1',
+             'HEADER_RULE_1' => 'HEADER_RULE_1',
+             'META_RULE_1' => 'META_RULE_1'
+             );
+%anti_patterns = (
+                  'NEW_RULE' => 'NEW_RULE',
+                  'OTHER_RULE' => 'OTHER_RULE',
+                  'RENAMED_RULE' => 'RENAMED_RULE',
+                  'NONEXISTANT_RULE' => 'NONEXISTANT_RULE',
+                  'BODY_RULE_2' => 'BODY_RULE_2',
+                  'SCORED_RULE' => 'SCORED_RULE'
+                  );
+
+checkfile("noxss.out", \&patterns_run_cb);
+ok_all_patterns();
+clear_pattern_counters();
+
+# write mail with X-Spam-Status
+write_mail(1);
+
+# test without reuse
+ok_system("$perl_path -w ../masses/mass-check -c=log/localrules.tmp --file log/mail.txt > log/noreuse.out");
+
+%patterns = (
+             'BODY_RULE_1' => 'BODY_RULE_1',
+             'HEADER_RULE_1' => 'HEADER_RULE_1',
+             'META_RULE_1' => 'META_RULE_1'
+             );
+%anti_patterns = (
+                  'NEW_RULE' => 'NEW_RULE',
+                  'OTHER_RULE' => 'OTHER_RULE',
+                  'RENAMED_RULE' => 'RENAMED_RULE',
+                  'NONEXISTANT_RULE' => 'NONEXISTANT_RULE',
+                  'BODY_RULE_2' => 'BODY_RULE_2',
+                  'SCORED_RULE' => 'SCORED_RULE'
+                  );
+checkfile("noreuse.out", \&patterns_run_cb);
+ok_all_patterns();
+clear_pattern_counters();
+
+# test with reuse
+ok_system("$perl_path -w ../masses/mass-check -c=log/localrules.tmp --reuse --file log/mail.txt > log/reuse.out");
+
+
+%patterns = (
+             'HEADER_RULE_1' => 'HEADER_RULE_1',
+             'BODY_RULE_2' => 'BODY_RULE_2',
+             'META_RULE_1' => 'META_RULE_1',
+             'NEW_RULE' => 'NEW_RULE',
+             'OTHER_RULE' => 'OTHER_RULE',
+             'RENAMED_RULE' => 'RENAMED_RULE',
+             'SCORED_RULE' => 'SCORED_RULE',
+             'Y 8' => 'score'
+             );
+%anti_patterns = (
+                  'BODY_RULE_1' => 'BODY_RULE_1',
+                  'NONEXISTANT_RULE' => 'NONEXISTANT_RULE'
+                  );
+
+checkfile("reuse.out", \&patterns_run_cb);
+ok_all_patterns();
+clear_pattern_counters();
+
+tstlocalrules('
+
+meta META_RULE_1 RULE_A && !RULE_B
+
+body  RULE_A /./
+reuse RULE_B OTHER_RULE
+
+body  RULE_C / does not hit /
+
+meta META_RULE_2 (RULE_A && RULE_B) || RULE_C
+
+');
+
+write_mail(1);
+
+# test with reuse
+ok_system("$perl_path -w ../masses/mass-check -c=log/localrules.tmp --reuse --file log/mail.txt > log/metareuse.out");
+
+%patterns = (
+	     'META_RULE_2' => 'META_RULE_2',
+	     'RULE_A' => 'RULE_A',
+	     'RULE_B' => 'RULE_B',
+             );
+%anti_patterns = (
+	     'META_RULE_1' => 'META_RULE_1',
+	     'RULE_C' => 'RULE_C',
+		 );
+checkfile("metareuse.out", \&patterns_run_cb);
+ok_all_patterns();
+clear_pattern_counters();
+
+
+sub write_mail {
+    my ($x_spam_status) = @_;
+
+    my $msg = <<EOF;
+Received: from internal.example.com [127.0.0.1] by localhost
+    for recipient\@example.com; Fri, 07 Oct 2002 09:02:00 +0000
+Received: from external.spammer.com (external.spammer.com
+    [150.51.53.1]) by internal.example.com for recipient\@example.com;
+    Fri, 07 Oct 2002 09:01:00 +0000
+Message-ID: <cl...@example.com>
+Date: Mon, 07 Oct 2002 09:00:00 +0000
+From: Sender <se...@this-spammer.com>
+MIME-Version: 1.0
+To: Recipient <re...@example.com>
+Subject: trivial message
+Content-Type: text/plain; charset=us-ascii; format=flowed
+Content-Transfer-Encoding: 7bit
+EOF
+
+    if ($x_spam_status) {
+        $msg .= <<END;
+X-Spam-Status: Yes, score=15.3 required=5.0 tests=BODY_RULE_2,
+	NONEXISTANT_RULE,OTHER_RULE,OLD_RULE_2,OLD_RULE_3
+END
+    }
+
+    $msg .= <<END;
+
+
+This is a test message.
+
+END
+
+    tstfile($msg);
+}
+
+sub ok_system {
+    my $cmd = shift;
+
+    print "\t$cmd\n";
+    system($cmd);
+    my $exit_code = ($?>>8);
+    ok ($exit_code == 0)
+
+}
+