You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spamassassin.apache.org by jm...@apache.org on 2007/11/18 17:11:21 UTC
svn commit: r596095 - in /spamassassin/trunk: build/nightlymc/
lib/Mail/SpamAssassin/Plugin/ masses/ rules/
Author: jm
Date: Sun Nov 18 08:11:20 2007
New Revision: 596095
URL: http://svn.apache.org/viewvc?rev=596095&view=rev
Log:
bug 5711: allow 'mass-check --reuse' without '--net' to reuse net-rule hits, and output mass-check results for scoreset 1; while lines that are not reusable use set 0. Also, fix a few tests to use 'tflags net' if they use network lookups (including calls to lookup_ptr().) Fix nightly mass-checks on the zone to use --reuse to gain this.
Modified:
spamassassin/trunk/build/nightlymc/corpus.doc
spamassassin/trunk/build/nightlymc/corpus.fredt
spamassassin/trunk/build/nightlymc/corpus.jm
spamassassin/trunk/build/nightlymc/corpus.zmi
spamassassin/trunk/lib/Mail/SpamAssassin/Plugin/URIDNSBL.pm
spamassassin/trunk/masses/mass-check
spamassassin/trunk/rules/20_fake_helo_tests.cf
spamassassin/trunk/rules/20_head_tests.cf
spamassassin/trunk/rules/25_spf.cf
Modified: spamassassin/trunk/build/nightlymc/corpus.doc
URL: http://svn.apache.org/viewvc/spamassassin/trunk/build/nightlymc/corpus.doc?rev=596095&r1=596094&r2=596095&view=diff
==============================================================================
--- spamassassin/trunk/build/nightlymc/corpus.doc (original)
+++ spamassassin/trunk/build/nightlymc/corpus.doc Sun Nov 18 08:11:20 2007
@@ -1,5 +1,5 @@
-opts_weekly="--net --cache --cachedir=/tmpfs/aicache_nightly --cs_schedule_cache --cs_cachedir=/export/home/bbmass/cache --restart=500 --after="15552000" --tail=15000 --net -j 8 -f /home/bbmass/mc-nightly/targets.doc"
-opts_nightly="--cache --cachedir=/tmpfs/aicache_nightly --cs_schedule_cache --cs_cachedir=/export/home/bbmass/cache --restart=500 --after="15552000" --tail=15000 -f /home/bbmass/mc-nightly/targets.doc"
+opts_weekly="--net --reuse --cache --cachedir=/tmpfs/aicache_nightly --cs_schedule_cache --cs_cachedir=/export/home/bbmass/cache --restart=500 --after="15552000" --tail=15000 --net -j 8 -f /home/bbmass/mc-nightly/targets.doc"
+opts_nightly="--reuse --cache --cachedir=/tmpfs/aicache_nightly --cs_schedule_cache --cs_cachedir=/export/home/bbmass/cache --restart=500 --after="15552000" --tail=15000 -f /home/bbmass/mc-nightly/targets.doc"
tmp=$HOME/tmp
tree=$HOME/svn
prefs_weekly=$HOME/user_prefs.weekly
Modified: spamassassin/trunk/build/nightlymc/corpus.fredt
URL: http://svn.apache.org/viewvc/spamassassin/trunk/build/nightlymc/corpus.fredt?rev=596095&r1=596094&r2=596095&view=diff
==============================================================================
--- spamassassin/trunk/build/nightlymc/corpus.fredt (original)
+++ spamassassin/trunk/build/nightlymc/corpus.fredt Sun Nov 18 08:11:20 2007
@@ -1,5 +1,5 @@
-opts_weekly="--net --cache --cachedir=/tmpfs/aicache_nightly --cs_schedule_cache --cs_cachedir=/export/home/bbmass/cache --restart=500 --after="15552000" --tail=15000 --net -j 8 -f /home/bbmass/mc-nightly/targets.fredt"
-opts_nightly="--cache --cachedir=/tmpfs/aicache_nightly --cs_schedule_cache --cs_cachedir=/export/home/bbmass/cache --restart=500 --after="15552000" --tail=15000 -f /home/bbmass/mc-nightly/targets.fredt"
+opts_weekly="--net --reuse --cache --cachedir=/tmpfs/aicache_nightly --cs_schedule_cache --cs_cachedir=/export/home/bbmass/cache --restart=500 --after="15552000" --tail=15000 --net -j 8 -f /home/bbmass/mc-nightly/targets.fredt"
+opts_nightly="--reuse --cache --cachedir=/tmpfs/aicache_nightly --cs_schedule_cache --cs_cachedir=/export/home/bbmass/cache --restart=500 --after="15552000" --tail=15000 -f /home/bbmass/mc-nightly/targets.fredt"
tmp=$HOME/tmp
tree=$HOME/svn
prefs_weekly=$HOME/user_prefs.weekly
Modified: spamassassin/trunk/build/nightlymc/corpus.jm
URL: http://svn.apache.org/viewvc/spamassassin/trunk/build/nightlymc/corpus.jm?rev=596095&r1=596094&r2=596095&view=diff
==============================================================================
--- spamassassin/trunk/build/nightlymc/corpus.jm (original)
+++ spamassassin/trunk/build/nightlymc/corpus.jm Sun Nov 18 08:11:20 2007
@@ -1,5 +1,5 @@
-opts_weekly="--net --cache --cachedir=/tmpfs/aicache_nightly --cs_schedule_cache --cs_cachedir=/export/home/bbmass/cache --restart=500 ham:detect:/home/bbmass/rawcor/jm/ham/* --after="15552000" --tail=25000 --scanprob=0.3 spam:detect:/home/bbmass/rawcor/jm/spam/*"
-opts_nightly="--cache --cachedir=/tmpfs/aicache_nightly --cs_schedule_cache --cs_cachedir=/export/home/bbmass/cache --restart=500 ham:detect:/home/bbmass/rawcor/jm/ham/* --after="15552000" --tail=25000 --scanprob=0.3 spam:detect:/home/bbmass/rawcor/jm/spam/*"
+opts_weekly="--net --reuse --cache --cachedir=/tmpfs/aicache_nightly --cs_schedule_cache --cs_cachedir=/export/home/bbmass/cache --restart=500 ham:detect:/home/bbmass/rawcor/jm/ham/* --after="15552000" --tail=25000 --scanprob=0.3 spam:detect:/home/bbmass/rawcor/jm/spam/*"
+opts_nightly="--reuse --cache --cachedir=/tmpfs/aicache_nightly --cs_schedule_cache --cs_cachedir=/export/home/bbmass/cache --restart=500 ham:detect:/home/bbmass/rawcor/jm/ham/* --after="15552000" --tail=25000 --scanprob=0.3 spam:detect:/home/bbmass/rawcor/jm/spam/*"
tmp=$HOME/tmp
tree=$HOME/svn
prefs_weekly=$HOME/user_prefs.weekly
Modified: spamassassin/trunk/build/nightlymc/corpus.zmi
URL: http://svn.apache.org/viewvc/spamassassin/trunk/build/nightlymc/corpus.zmi?rev=596095&r1=596094&r2=596095&view=diff
==============================================================================
--- spamassassin/trunk/build/nightlymc/corpus.zmi (original)
+++ spamassassin/trunk/build/nightlymc/corpus.zmi Sun Nov 18 08:11:20 2007
@@ -1,5 +1,5 @@
-opts_weekly="--net --cache --cachedir=/tmpfs/aicache_nightly --cs_schedule_cache --cs_cachedir=/export/home/bbmass/cache --restart=500 --after="15552000" --tail=15000 --net -j 8 -f /home/bbmass/mc-nightly/targets.zmi"
-opts_nightly="--cache --cachedir=/tmpfs/aicache_nightly --cs_schedule_cache --cs_cachedir=/export/home/bbmass/cache --restart=500 --after="15552000" --tail=15000 -f /home/bbmass/mc-nightly/targets.zmi"
+opts_weekly="--net --reuse --cache --cachedir=/tmpfs/aicache_nightly --cs_schedule_cache --cs_cachedir=/export/home/bbmass/cache --restart=500 --after="15552000" --tail=15000 --net -j 8 -f /home/bbmass/mc-nightly/targets.zmi"
+opts_nightly="--reuse --cache --cachedir=/tmpfs/aicache_nightly --cs_schedule_cache --cs_cachedir=/export/home/bbmass/cache --restart=500 --after="15552000" --tail=15000 -f /home/bbmass/mc-nightly/targets.zmi"
tmp=$HOME/tmp
tree=$HOME/svn
prefs_weekly=$HOME/user_prefs.weekly
Modified: spamassassin/trunk/lib/Mail/SpamAssassin/Plugin/URIDNSBL.pm
URL: http://svn.apache.org/viewvc/spamassassin/trunk/lib/Mail/SpamAssassin/Plugin/URIDNSBL.pm?rev=596095&r1=596094&r2=596095&view=diff
==============================================================================
--- spamassassin/trunk/lib/Mail/SpamAssassin/Plugin/URIDNSBL.pm (original)
+++ spamassassin/trunk/lib/Mail/SpamAssassin/Plugin/URIDNSBL.pm Sun Nov 18 08:11:20 2007
@@ -435,10 +435,12 @@
$single_dnsbl = 1;
}
+ my $rhsblrules = $scanner->{uridnsbl_active_rules_rhsbl};
+ my $reviprules = $scanner->{uridnsbl_active_rules_revipbl};
+
if ($single_dnsbl) {
# look up the domain in the RHSBL subset
- my $cf = $scanner->{uridnsbl_active_rules_rhsbl};
- foreach my $rulename (keys %{$cf}) {
+ foreach my $rulename (keys %{$rhsblrules}) {
my $rulecf = $scanner->{conf}->{uridnsbls}->{$rulename};
$self->lookup_single_dnsbl($scanner, $obj, $rulename,
$dom, $rulecf->{zone}, $rulecf->{type});
@@ -447,8 +449,9 @@
$scanner->register_async_rule_start($rulename);
}
- # perform NS, A lookups to look up the domain in the non-RHSBL subset
- if ($dom !~ /^\d+\.\d+\.\d+\.\d+$/) {
+ # perform NS, A lookups to look up the domain in the non-RHSBL subset,
+ # but only if there are active reverse-IP-URIBL rules
+ if ($dom !~ /^\d+\.\d+\.\d+\.\d+$/ && (scalar keys %{$reviprules})) {
$self->lookup_domain_ns($scanner, $obj, $dom);
}
}
@@ -456,8 +459,7 @@
# note that these rules are now underway. important: unless the
# rule hits, in the current design, these will not be considered
# "finished" until harvest_dnsbl_queries() completes
- my $cf = $scanner->{uridnsbl_active_rules_revipbl};
- foreach my $rulename (keys %{$cf}) {
+ foreach my $rulename (keys %{$reviprules}) {
$scanner->register_async_rule_start($rulename);
}
}
Modified: spamassassin/trunk/masses/mass-check
URL: http://svn.apache.org/viewvc/spamassassin/trunk/masses/mass-check?rev=596095&r1=596094&r2=596095&view=diff
==============================================================================
--- spamassassin/trunk/masses/mass-check (original)
+++ spamassassin/trunk/masses/mass-check Sun Nov 18 08:11:20 2007
@@ -250,6 +250,7 @@
}
require $rules_path;
+ unlink $rules_path;
}
if ($opt_noisy) {
@@ -331,6 +332,30 @@
usage(1) if !@targets;
}
+if ($opt_reuse) {
+ # if we have --reuse, don't bother testing DNS; we shouldn't be hitting
+ # the wire at all, and in fact we may be running without a net connection
+ push @{$opt_cf}, "dns_available yes\n";
+
+ # --reuse without --net means we need to just zero ALL net rules; skip net
+ # lookups entirely except for the reused ones. Do this before constructing
+ # the Mail::SpamAssassin object to ensure no network-rules stuff is compiled
+ # in compile_now(). (This config is for the reuse=no case. We create a
+ # reuse=yes config after compile_now().)
+ if (!$opt_net) {
+ my @zero = (sort grep {
+ (ref($rules{$_}) eq 'HASH')
+ && $rules{$_}->{tflags}
+ && $rules{$_}->{tflags} =~ /\bnet\b/;
+ } keys %rules);
+
+ foreach my $r (@zero) {
+ push @{$opt_cf}, "score $r 0\n";
+ # warn "--reuse/!--net zeroed $r";
+ }
+ }
+}
+
my $spamtest = new Mail::SpamAssassin ({
'debug' => $opt_debug,
'rules_filename' => $opt_c,
@@ -348,49 +373,36 @@
LOCAL_RULES_DIR => '',
});
-$spamtest->compile_now(1);
+$spamtest->compile_now(0); # 0 since we will be reading more configs
$spamtest->read_scoreonly_config("$FindBin::Bin/mass-check.cf");
# generated user_prefs
if ($opt_reuse) {
-
- # --reuse without --net means we need to just zero ALL net rules;
- # skip net lookups entirely except for the reused ones. This
- # config is for the reuse=no case.
- if (!$opt_net) {
- zero_scores_for_rules (sort grep {
- (ref($rules{$_}) eq 'HASH')
- && $rules{$_}->{tflags}
- && $rules{$_}->{tflags} =~ /\bnet\b/;
- } keys %rules);
- }
-
# copy current prefs if it exists
$spamtest->copy_config(undef, \%orig_conf);
# zeroed scores to reuse_prefs
- zero_scores_for_rules (sort grep {
+ my @zero = (sort grep {
defined $reuse{$_}->{skip}
} keys %reuse);
+ zero_rule_scores(@zero);
$spamtest->copy_config(undef, \%reuse_conf);
$reuse_rules_loaded_p = 1;
}
-sub zero_scores_for_rules {
+sub zero_rule_scores {
my @zero = @_;
- # TODO: this should use a file in ./tmp/, not something that could
- # accidentally be loaded later
- open(PREFS, ">$opt_p/reuse_prefs")
- or die "Unable to open $opt_p/reuse_prefs: $!\n".
+ my $pfile = "$FindBin::Bin/tmp/reuse_prefs";
+ open(PREFS, ">$pfile") or die "Unable to open $pfile: $!\n".
"Needed for --reuse to work properly";
-
for my $rule (@zero) {
print PREFS "score $rule 0\n";
}
- close(PREFS);
- $spamtest->read_scoreonly_config("$opt_p/reuse_prefs");
+ close PREFS or die "failed to write $pfile";
+ $spamtest->read_scoreonly_config($pfile);
+ unlink $pfile;
}
my $who = `id -un 2>/dev/null`;
@@ -728,35 +740,38 @@
# parse the message, and force it to complete
my $ma = $spamtest->parse($dataref, 1);
- # remove SpamAssassin markup, if present and the mail was spam
+ # get X-Spam-Status: header for rule hit resue
my $x_spam_status;
if ($opt_reuse) {
- # get X-Spam-Status: header for rule hit resue
$x_spam_status = $ma->get_header("X-Spam-Status");
+ $x_spam_status =~ s/,\s+/,/gs;
}
-
my @previous; # previous hits, only set if $opt_reuse
- if ($x_spam_status) {
- $x_spam_status =~ s/,\s+/,/gs;
- if ($x_spam_status =~ m/tests=(\S*)/
+
+ if ($opt_reuse) {
+ if ($x_spam_status
+ && $x_spam_status =~ m/tests=(\S*)/
&& $x_spam_status !~ /\bshortcircuit=(?:ham|spam|default)\b/)
{
push @previous, split(/,/, $1);
# we found previous tests, so move the reuse config into place
unless ($reuse_rules_loaded_p) {
- $spamtest->copy_config(\%reuse_conf, undef);
- $reuse_rules_loaded_p = 1;
+ $spamtest->copy_config(\%reuse_conf, undef);
+ $reuse_rules_loaded_p = 1;
+ dbg "mass-check: reusing hits";
}
}
- }
- elsif ($opt_reuse) {
- if ($reuse_rules_loaded_p) {
- $spamtest->copy_config(\%orig_conf, undef);
- $reuse_rules_loaded_p = 0;
+ else {
+ if ($reuse_rules_loaded_p) {
+ $spamtest->copy_config(\%orig_conf, undef);
+ $reuse_rules_loaded_p = 0;
+ dbg "mass-check: not reusing hits";
+ }
}
}
+ # remove SpamAssassin markup, if present and the mail was spam
my $header = $ma->get_header("Received");
if ($header && $header =~ /\bwith SpamAssassin\b/) {
if (!$opt_deencap || message_should_be_deencapped($ma)) {
@@ -861,9 +876,17 @@
if ($reuse_rules_loaded_p) {
push(@extra, "reuse=yes");
- }
- else {
+ } else {
push(@extra, "reuse=no");
+ }
+
+ # log the scoreset we're in
+ {
+ my $set = 0;
+ if ($opt_net) { $set |= 1; }
+ if ($reuse_rules_loaded_p) { $set |= 1; }
+ if ($status && defined $status->{bayes_score}) { $set |= 2; }
+ push(@extra, "set=".$set);
}
if ($opt_client) {
Modified: spamassassin/trunk/rules/20_fake_helo_tests.cf
URL: http://svn.apache.org/viewvc/spamassassin/trunk/rules/20_fake_helo_tests.cf?rev=596095&r1=596094&r2=596095&view=diff
==============================================================================
--- spamassassin/trunk/rules/20_fake_helo_tests.cf (original)
+++ spamassassin/trunk/rules/20_fake_helo_tests.cf Sun Nov 18 08:11:20 2007
@@ -133,17 +133,24 @@
ifplugin Mail::SpamAssassin::Plugin::RelayEval
+# these perform a lookup_ptr(), hence 'tflags net'
header FAKE_HELO_MSN eval:check_for_rdns_helo_mismatch("msn\.com","msn\.com")
describe FAKE_HELO_MSN Host HELO did not match rDNS: msn.com
+tflags FAKE_HELO_MSN net
header FAKE_HELO_MAIL_COM eval:check_for_rdns_helo_mismatch("mail\.com","mail\.com")
describe FAKE_HELO_MAIL_COM Host HELO did not match rDNS: mail.com
+tflags FAKE_HELO_MAIL_COM net
header FAKE_HELO_EMAIL_COM eval:check_for_rdns_helo_mismatch("email\.com","email\.com")
describe FAKE_HELO_EMAIL_COM Host HELO did not match rDNS: email.com
+tflags FAKE_HELO_EMAIL_COM net
header FAKE_HELO_EXCITE eval:check_for_rdns_helo_mismatch("excite\S*","excite\.com")
describe FAKE_HELO_EXCITE Host HELO did not match rDNS: excite.com
+tflags FAKE_HELO_EXCITE net
header FAKE_HELO_LYCOS eval:check_for_rdns_helo_mismatch("lycos\S*","lycos\.com")
describe FAKE_HELO_LYCOS Host HELO did not match rDNS: lycos.com
+tflags FAKE_HELO_LYCOS net
header FAKE_HELO_YAHOO_CA eval:check_for_rdns_helo_mismatch("yahoo\.ca","yahoo\.ca")
describe FAKE_HELO_YAHOO_CA Host HELO did not match rDNS: yahoo.ca
+tflags FAKE_HELO_YAHOO_CA net
endif
Modified: spamassassin/trunk/rules/20_head_tests.cf
URL: http://svn.apache.org/viewvc/spamassassin/trunk/rules/20_head_tests.cf?rev=596095&r1=596094&r2=596095&view=diff
==============================================================================
--- spamassassin/trunk/rules/20_head_tests.cf (original)
+++ spamassassin/trunk/rules/20_head_tests.cf Sun Nov 18 08:11:20 2007
@@ -456,8 +456,11 @@
describe MISSING_HEADERS Missing To: header
# this variant is local, using the Received hdr itself...
+# it's still marked "tflags net" though since it uses the same underlying
+# code and can trigger lookup_ptr() using "mass-check --reuse" without "--net"
header ROUND_THE_WORLD_LOCAL eval:check_for_round_the_world_received_helo()
describe ROUND_THE_WORLD_LOCAL Received: says mail sent around the world (HELO)
+tflags ROUND_THE_WORLD_LOCAL net
header DATE_IN_PAST_03_06 eval:check_for_shifted_date('-6', '-3')
describe DATE_IN_PAST_03_06 Date: is 3 to 6 hours before Received: date
Modified: spamassassin/trunk/rules/25_spf.cf
URL: http://svn.apache.org/viewvc/spamassassin/trunk/rules/25_spf.cf?rev=596095&r1=596094&r2=596095&view=diff
==============================================================================
--- spamassassin/trunk/rules/25_spf.cf (original)
+++ spamassassin/trunk/rules/25_spf.cf Sun Nov 18 08:11:20 2007
@@ -57,8 +57,15 @@
describe SPF_HELO_FAIL SPF: HELO does not match SPF record (fail)
describe SPF_HELO_SOFTFAIL SPF: HELO does not match SPF record (softfail)
-tflags SPF_PASS nice userconf
-tflags SPF_HELO_PASS nice userconf
+# these are "userconf" so that scores are set by hand
+tflags SPF_PASS nice userconf net
+tflags SPF_HELO_PASS nice userconf net
+tflags SPF_NEUTRAL net
+tflags SPF_FAIL net
+tflags SPF_SOFTFAIL net
+tflags SPF_HELO_NEUTRAL net
+tflags SPF_HELO_FAIL net
+tflags SPF_HELO_SOFTFAIL net
# rules from earlier than current release that can be reused
#reuse SPF_PASS