You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spamassassin.apache.org by jm...@apache.org on 2007/11/16 18:55:08 UTC
svn commit: r595759 - /spamassassin/trunk/masses/mass-check
Author: jm
Date: Fri Nov 16 09:55:08 2007
New Revision: 595759
URL: http://svn.apache.org/viewvc?rev=595759&view=rev
Log:
bug 5711: a first step towards 'mass-check --reuse' without '--net' reusing net hits when they are available
Modified:
spamassassin/trunk/masses/mass-check
Modified: spamassassin/trunk/masses/mass-check
URL: http://svn.apache.org/viewvc/spamassassin/trunk/masses/mass-check?rev=595759&r1=595758&r2=595759&view=diff
==============================================================================
--- spamassassin/trunk/masses/mass-check (original)
+++ spamassassin/trunk/masses/mass-check Fri Nov 16 09:55:08 2007
@@ -155,7 +155,7 @@
%server_caches @cache_tmp_files %min_other_caches
%unique_cache_completed $opt_cs_schedule_cache $opt_cs_cache
$opt_cs_cachedir
- $tmpfd %reuse %orig_conf %reuse_conf $reuse_rules_loaded_p);
+ $tmpfd %rules %reuse %orig_conf %reuse_conf $reuse_rules_loaded_p);
use FindBin;
@@ -238,17 +238,18 @@
# rules.pl is for the --reuse option, score set doesn't matter
if ($opt_reuse) {
- my $rules_path = "$FindBin::Bin/tmp/rules.pl";
+ my $rules_path = "$FindBin::Bin/tmp/rules$$.pl";
# some people specify paths relatively, whereas this needs an absolute path,
# so "do the right thing"(tm).
my $abs_opt_c = File::Spec->rel2abs($opt_c);
- system("cd $FindBin::Bin; perl ../build/parse-rules-for-masses -d $abs_opt_c");
+ system("cd $FindBin::Bin; perl ../build/parse-rules-for-masses -d $abs_opt_c -o $rules_path");
- require $rules_path;
unless (-s $rules_path > 130) {
die '--reuse configuration error detected; rules.pl file size is too small'.
' ('.(-s $rules_path)." bytes)\nrules.pl path: $rules_path\n";
}
+
+ require $rules_path;
}
if ($opt_noisy) {
@@ -285,6 +286,9 @@
my $user_prefs = "$opt_p/user_prefs";
+# either --net or --reuse means we should use set1/set3
+my $use_net_rules = $opt_net || $opt_reuse;
+
# --lint
# In theory we could probably use the same spamtest object as below,
# but since it's probably not expecting that, and we don't want
@@ -298,7 +302,7 @@
'userstate_dir' => "$opt_p",
'save_pattern_hits' => $opt_loghits,
'dont_copy_prefs' => 1,
- 'local_tests_only' => $opt_net ? 0 : 1,
+ 'local_tests_only' => $use_net_rules ? 0 : 1,
'only_these_rules' => $opt_rules,
'ignore_safety_expire_timeout' => 1,
'post_config_text' => join("\n", @{$opt_cf})."\n",
@@ -335,7 +339,7 @@
'userstate_dir' => "$opt_p",
'save_pattern_hits' => $opt_loghits,
'dont_copy_prefs' => 1,
- 'local_tests_only' => $opt_net ? 0 : 1,
+ 'local_tests_only' => $use_net_rules ? 0 : 1,
'only_these_rules' => $opt_rules,
'ignore_safety_expire_timeout' => 1,
'post_config_text' => join("\n", @{$opt_cf})."\n",
@@ -349,23 +353,47 @@
# generated user_prefs
if ($opt_reuse) {
- # copy current prefs if it exists
- $spamtest->copy_config(undef, \%orig_conf);
- # zeroed scores to mass_prefs
- my @zero = sort grep { defined $reuse{$_}->{skip} } keys %reuse;
- open(PREFS, "> $opt_p/mass_prefs") || die "Unable to open $opt_p/mass_prefs: $!\nNeeded for --reuse to work properly";
- for my $zero (@zero) {
- print PREFS "score $zero 0\n";
+ # --reuse without --net means we need to just zero ALL net rules;
+ # skip net lookups entirely except for the reused ones. This
+ # config is for the reuse=no case.
+ if (!$opt_net) {
+ zero_scores_for_rules (sort grep {
+ (ref($rules{$_}) eq 'HASH')
+ && $rules{$_}->{tflags}
+ && $rules{$_}->{tflags} =~ /\bnet\b/;
+ } keys %rules);
+ zero_scores_for_rules (@s);
}
- close(PREFS);
- $spamtest->read_scoreonly_config("$opt_p/mass_prefs");
+ # copy current prefs if it exists
+ $spamtest->copy_config(undef, \%orig_conf);
+
+ # zeroed scores to reuse_prefs
+ zero_scores_for_rules (sort grep {
+ defined $reuse{$_}->{skip}
+ } keys %reuse);
$spamtest->copy_config(undef, \%reuse_conf);
$reuse_rules_loaded_p = 1;
}
+sub zero_scores_for_rules {
+ my @zero = @_;
+
+ # TODO: this should use a file in ./tmp/, not something that could
+ # accidentally be loaded later
+ open(PREFS, ">$opt_p/reuse_prefs")
+ or die "Unable to open $opt_p/reuse_prefs: $!\n".
+ "Needed for --reuse to work properly";
+
+ for my $rule (@zero) {
+ print PREFS "score $rule 0\n";
+ }
+ close(PREFS);
+ $spamtest->read_scoreonly_config("$opt_p/reuse_prefs");
+}
+
my $who = `id -un 2>/dev/null`;
my $where = `uname -n 2>/dev/null`;
my $when = `date -u`;
@@ -702,14 +730,13 @@
my $ma = $spamtest->parse($dataref, 1);
# remove SpamAssassin markup, if present and the mail was spam
- my $header = $ma->get_header("Received");
my $x_spam_status;
if ($opt_reuse) {
# get X-Spam-Status: header for rule hit resue
$x_spam_status = $ma->get_header("X-Spam-Status");
}
- # previous hits
- my @previous;
+
+ my @previous; # previous hits, only set if $opt_reuse
if ($x_spam_status) {
$x_spam_status =~ s/,\s+/,/gs;
if ($x_spam_status =~ m/tests=(\S*)/
@@ -731,6 +758,7 @@
}
}
+ my $header = $ma->get_header("Received");
if ($header && $header =~ /\bwith SpamAssassin\b/) {
if (!$opt_deencap || message_should_be_deencapped($ma)) {
my $new_ma = $spamtest->parse($spamtest->remove_spamassassin_markup($ma), 1);
@@ -861,7 +889,7 @@
my @tests;
push @tests, split(/,/, $status->get_names_of_tests_hit());
push @tests, split(/,/, $status->get_names_of_subtests_hit());
- # hit reuse
+ # hit reuse; $opt_reuse
if ($x_spam_status) {
# generate mapping of hits to remove hits that are marked as skip
@tests = grep { !$reuse{$_}->{skip} } @tests;