You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spamassassin.apache.org by jm...@apache.org on 2007/11/16 18:55:08 UTC

svn commit: r595759 - /spamassassin/trunk/masses/mass-check

Author: jm
Date: Fri Nov 16 09:55:08 2007
New Revision: 595759

URL: http://svn.apache.org/viewvc?rev=595759&view=rev
Log:
bug 5711: a first step towards 'mass-check --reuse' without '--net' reusing net hits when they are available

Modified:
    spamassassin/trunk/masses/mass-check

Modified: spamassassin/trunk/masses/mass-check
URL: http://svn.apache.org/viewvc/spamassassin/trunk/masses/mass-check?rev=595759&r1=595758&r2=595759&view=diff
==============================================================================
--- spamassassin/trunk/masses/mass-check (original)
+++ spamassassin/trunk/masses/mass-check Fri Nov 16 09:55:08 2007
@@ -155,7 +155,7 @@
             %server_caches @cache_tmp_files %min_other_caches
             %unique_cache_completed $opt_cs_schedule_cache $opt_cs_cache
             $opt_cs_cachedir
-	    $tmpfd %reuse %orig_conf %reuse_conf $reuse_rules_loaded_p);
+	    $tmpfd %rules %reuse %orig_conf %reuse_conf $reuse_rules_loaded_p);
 
 use FindBin;
 
@@ -238,17 +238,18 @@
 
 # rules.pl is for the --reuse option, score set doesn't matter
 if ($opt_reuse) {
-  my $rules_path = "$FindBin::Bin/tmp/rules.pl";
+  my $rules_path = "$FindBin::Bin/tmp/rules$$.pl";
   # some people specify paths relatively, whereas this needs an absolute path,
   # so "do the right thing"(tm).
   my $abs_opt_c = File::Spec->rel2abs($opt_c);
-  system("cd $FindBin::Bin; perl ../build/parse-rules-for-masses -d $abs_opt_c");
+  system("cd $FindBin::Bin; perl ../build/parse-rules-for-masses -d $abs_opt_c -o $rules_path");
 
-  require $rules_path;
   unless (-s $rules_path > 130) {
     die '--reuse configuration error detected; rules.pl file size is too small'.
         ' ('.(-s $rules_path)." bytes)\nrules.pl path: $rules_path\n";
   }
+
+  require $rules_path;
 }
 
 if ($opt_noisy) {
@@ -285,6 +286,9 @@
 
 my $user_prefs = "$opt_p/user_prefs";
 
+# either --net or --reuse means we should use set1/set3
+my $use_net_rules = $opt_net || $opt_reuse;
+
 # --lint
 # In theory we could probably use the same spamtest object as below,
 # but since it's probably not expecting that, and we don't want
@@ -298,7 +302,7 @@
     'userstate_dir'     			=> "$opt_p",
     'save_pattern_hits'  			=> $opt_loghits,
     'dont_copy_prefs'   			=> 1,
-    'local_tests_only'   			=> $opt_net ? 0 : 1,
+    'local_tests_only'   			=> $use_net_rules ? 0 : 1,
     'only_these_rules'   			=> $opt_rules,
     'ignore_safety_expire_timeout'		=> 1,
     'post_config_text'                          => join("\n", @{$opt_cf})."\n",
@@ -335,7 +339,7 @@
   'userstate_dir'     			=> "$opt_p",
   'save_pattern_hits'  			=> $opt_loghits,
   'dont_copy_prefs'   			=> 1,
-  'local_tests_only'   			=> $opt_net ? 0 : 1,
+  'local_tests_only'   			=> $use_net_rules ? 0 : 1,
   'only_these_rules'   			=> $opt_rules,
   'ignore_safety_expire_timeout'	=> 1,
   'post_config_text'                    => join("\n", @{$opt_cf})."\n",
@@ -349,23 +353,47 @@
 
 # generated user_prefs
 if ($opt_reuse) {
-  # copy current prefs if it exists
-  $spamtest->copy_config(undef, \%orig_conf);
 
-  # zeroed scores to mass_prefs
-  my @zero = sort grep { defined $reuse{$_}->{skip} } keys %reuse;
-  open(PREFS, "> $opt_p/mass_prefs") || die "Unable to open $opt_p/mass_prefs: $!\nNeeded for --reuse to work properly";
-  for my $zero (@zero) {
-    print PREFS "score $zero 0\n";
+  # --reuse without --net means we need to just zero ALL net rules;
+  # skip net lookups entirely except for the reused ones.  This
+  # config is for the reuse=no case.
+  if (!$opt_net) {
+    zero_scores_for_rules (sort grep {
+          (ref($rules{$_}) eq 'HASH')
+                && $rules{$_}->{tflags}
+                && $rules{$_}->{tflags} =~ /\bnet\b/; 
+        } keys %rules);
+    zero_scores_for_rules (@s);
   }
-  close(PREFS);
 
-  $spamtest->read_scoreonly_config("$opt_p/mass_prefs");
+  # copy current prefs if it exists
+  $spamtest->copy_config(undef, \%orig_conf);
+
+  # zeroed scores to reuse_prefs
+  zero_scores_for_rules (sort grep {
+          defined $reuse{$_}->{skip} 
+        } keys %reuse);
 
   $spamtest->copy_config(undef, \%reuse_conf);
   $reuse_rules_loaded_p = 1;
 }
 
+sub zero_scores_for_rules {
+  my @zero = @_;
+
+  # TODO: this should use a file in ./tmp/, not something that could
+  # accidentally be loaded later
+  open(PREFS, ">$opt_p/reuse_prefs") 
+        or die "Unable to open $opt_p/reuse_prefs: $!\n".
+            "Needed for --reuse to work properly";
+
+  for my $rule (@zero) {
+    print PREFS "score $rule 0\n";
+  }
+  close(PREFS);
+  $spamtest->read_scoreonly_config("$opt_p/reuse_prefs");
+}
+
 my $who = `id -un 2>/dev/null`;
 my $where = `uname -n 2>/dev/null`;
 my $when = `date -u`;
@@ -702,14 +730,13 @@
   my $ma = $spamtest->parse($dataref, 1);
 
   # remove SpamAssassin markup, if present and the mail was spam
-  my $header = $ma->get_header("Received");
   my $x_spam_status;
   if ($opt_reuse) {
     # get X-Spam-Status: header for rule hit resue
     $x_spam_status = $ma->get_header("X-Spam-Status");
   }
-  # previous hits
-  my @previous;
+
+  my @previous; # previous hits, only set if $opt_reuse
   if ($x_spam_status) {
     $x_spam_status =~ s/,\s+/,/gs;
     if ($x_spam_status =~ m/tests=(\S*)/
@@ -731,6 +758,7 @@
     }
   }
 
+  my $header = $ma->get_header("Received");
   if ($header && $header =~ /\bwith SpamAssassin\b/) {
     if (!$opt_deencap || message_should_be_deencapped($ma)) {
       my $new_ma = $spamtest->parse($spamtest->remove_spamassassin_markup($ma), 1);
@@ -861,7 +889,7 @@
     my @tests;
     push @tests, split(/,/, $status->get_names_of_tests_hit());
     push @tests, split(/,/, $status->get_names_of_subtests_hit());
-    # hit reuse
+    # hit reuse; $opt_reuse
     if ($x_spam_status) {
       # generate mapping of hits to remove hits that are marked as skip
       @tests = grep { !$reuse{$_}->{skip} } @tests;