You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spamassassin.apache.org by jm...@apache.org on 2007/11/18 17:11:21 UTC

svn commit: r596095 - in /spamassassin/trunk: build/nightlymc/ lib/Mail/SpamAssassin/Plugin/ masses/ rules/

Author: jm
Date: Sun Nov 18 08:11:20 2007
New Revision: 596095

URL: http://svn.apache.org/viewvc?rev=596095&view=rev
Log:
bug 5711: allow 'mass-check --reuse' without '--net' to reuse net-rule hits, and output mass-check results for scoreset 1; while lines that are not reusable use set 0.  Also, fix a few tests to use 'tflags net' if they use network lookups (including calls to lookup_ptr().)  Fix nightly mass-checks on the zone to use --reuse to gain this.

Modified:
    spamassassin/trunk/build/nightlymc/corpus.doc
    spamassassin/trunk/build/nightlymc/corpus.fredt
    spamassassin/trunk/build/nightlymc/corpus.jm
    spamassassin/trunk/build/nightlymc/corpus.zmi
    spamassassin/trunk/lib/Mail/SpamAssassin/Plugin/URIDNSBL.pm
    spamassassin/trunk/masses/mass-check
    spamassassin/trunk/rules/20_fake_helo_tests.cf
    spamassassin/trunk/rules/20_head_tests.cf
    spamassassin/trunk/rules/25_spf.cf

Modified: spamassassin/trunk/build/nightlymc/corpus.doc
URL: http://svn.apache.org/viewvc/spamassassin/trunk/build/nightlymc/corpus.doc?rev=596095&r1=596094&r2=596095&view=diff
==============================================================================
--- spamassassin/trunk/build/nightlymc/corpus.doc (original)
+++ spamassassin/trunk/build/nightlymc/corpus.doc Sun Nov 18 08:11:20 2007
@@ -1,5 +1,5 @@
-opts_weekly="--net --cache --cachedir=/tmpfs/aicache_nightly --cs_schedule_cache --cs_cachedir=/export/home/bbmass/cache --restart=500 --after="15552000" --tail=15000 --net -j 8 -f /home/bbmass/mc-nightly/targets.doc"
-opts_nightly="--cache --cachedir=/tmpfs/aicache_nightly --cs_schedule_cache --cs_cachedir=/export/home/bbmass/cache --restart=500 --after="15552000" --tail=15000 -f /home/bbmass/mc-nightly/targets.doc"
+opts_weekly="--net --reuse --cache --cachedir=/tmpfs/aicache_nightly --cs_schedule_cache --cs_cachedir=/export/home/bbmass/cache --restart=500 --after="15552000" --tail=15000 --net -j 8 -f /home/bbmass/mc-nightly/targets.doc"
+opts_nightly="--reuse --cache --cachedir=/tmpfs/aicache_nightly --cs_schedule_cache --cs_cachedir=/export/home/bbmass/cache --restart=500 --after="15552000" --tail=15000 -f /home/bbmass/mc-nightly/targets.doc"
 tmp=$HOME/tmp
 tree=$HOME/svn
 prefs_weekly=$HOME/user_prefs.weekly

Modified: spamassassin/trunk/build/nightlymc/corpus.fredt
URL: http://svn.apache.org/viewvc/spamassassin/trunk/build/nightlymc/corpus.fredt?rev=596095&r1=596094&r2=596095&view=diff
==============================================================================
--- spamassassin/trunk/build/nightlymc/corpus.fredt (original)
+++ spamassassin/trunk/build/nightlymc/corpus.fredt Sun Nov 18 08:11:20 2007
@@ -1,5 +1,5 @@
-opts_weekly="--net --cache --cachedir=/tmpfs/aicache_nightly --cs_schedule_cache --cs_cachedir=/export/home/bbmass/cache --restart=500 --after="15552000" --tail=15000 --net -j 8 -f /home/bbmass/mc-nightly/targets.fredt"
-opts_nightly="--cache --cachedir=/tmpfs/aicache_nightly --cs_schedule_cache --cs_cachedir=/export/home/bbmass/cache --restart=500 --after="15552000" --tail=15000 -f /home/bbmass/mc-nightly/targets.fredt"
+opts_weekly="--net --reuse --cache --cachedir=/tmpfs/aicache_nightly --cs_schedule_cache --cs_cachedir=/export/home/bbmass/cache --restart=500 --after="15552000" --tail=15000 --net -j 8 -f /home/bbmass/mc-nightly/targets.fredt"
+opts_nightly="--reuse --cache --cachedir=/tmpfs/aicache_nightly --cs_schedule_cache --cs_cachedir=/export/home/bbmass/cache --restart=500 --after="15552000" --tail=15000 -f /home/bbmass/mc-nightly/targets.fredt"
 tmp=$HOME/tmp
 tree=$HOME/svn
 prefs_weekly=$HOME/user_prefs.weekly

Modified: spamassassin/trunk/build/nightlymc/corpus.jm
URL: http://svn.apache.org/viewvc/spamassassin/trunk/build/nightlymc/corpus.jm?rev=596095&r1=596094&r2=596095&view=diff
==============================================================================
--- spamassassin/trunk/build/nightlymc/corpus.jm (original)
+++ spamassassin/trunk/build/nightlymc/corpus.jm Sun Nov 18 08:11:20 2007
@@ -1,5 +1,5 @@
-opts_weekly="--net --cache --cachedir=/tmpfs/aicache_nightly --cs_schedule_cache --cs_cachedir=/export/home/bbmass/cache --restart=500 ham:detect:/home/bbmass/rawcor/jm/ham/* --after="15552000" --tail=25000 --scanprob=0.3 spam:detect:/home/bbmass/rawcor/jm/spam/*"
-opts_nightly="--cache --cachedir=/tmpfs/aicache_nightly --cs_schedule_cache --cs_cachedir=/export/home/bbmass/cache --restart=500 ham:detect:/home/bbmass/rawcor/jm/ham/* --after="15552000" --tail=25000 --scanprob=0.3 spam:detect:/home/bbmass/rawcor/jm/spam/*"
+opts_weekly="--net --reuse --cache --cachedir=/tmpfs/aicache_nightly --cs_schedule_cache --cs_cachedir=/export/home/bbmass/cache --restart=500 ham:detect:/home/bbmass/rawcor/jm/ham/* --after="15552000" --tail=25000 --scanprob=0.3 spam:detect:/home/bbmass/rawcor/jm/spam/*"
+opts_nightly="--reuse --cache --cachedir=/tmpfs/aicache_nightly --cs_schedule_cache --cs_cachedir=/export/home/bbmass/cache --restart=500 ham:detect:/home/bbmass/rawcor/jm/ham/* --after="15552000" --tail=25000 --scanprob=0.3 spam:detect:/home/bbmass/rawcor/jm/spam/*"
 tmp=$HOME/tmp
 tree=$HOME/svn
 prefs_weekly=$HOME/user_prefs.weekly

Modified: spamassassin/trunk/build/nightlymc/corpus.zmi
URL: http://svn.apache.org/viewvc/spamassassin/trunk/build/nightlymc/corpus.zmi?rev=596095&r1=596094&r2=596095&view=diff
==============================================================================
--- spamassassin/trunk/build/nightlymc/corpus.zmi (original)
+++ spamassassin/trunk/build/nightlymc/corpus.zmi Sun Nov 18 08:11:20 2007
@@ -1,5 +1,5 @@
-opts_weekly="--net --cache --cachedir=/tmpfs/aicache_nightly --cs_schedule_cache --cs_cachedir=/export/home/bbmass/cache --restart=500 --after="15552000" --tail=15000 --net -j 8 -f /home/bbmass/mc-nightly/targets.zmi"
-opts_nightly="--cache --cachedir=/tmpfs/aicache_nightly --cs_schedule_cache --cs_cachedir=/export/home/bbmass/cache --restart=500 --after="15552000" --tail=15000 -f /home/bbmass/mc-nightly/targets.zmi"
+opts_weekly="--net --reuse --cache --cachedir=/tmpfs/aicache_nightly --cs_schedule_cache --cs_cachedir=/export/home/bbmass/cache --restart=500 --after="15552000" --tail=15000 --net -j 8 -f /home/bbmass/mc-nightly/targets.zmi"
+opts_nightly="--reuse --cache --cachedir=/tmpfs/aicache_nightly --cs_schedule_cache --cs_cachedir=/export/home/bbmass/cache --restart=500 --after="15552000" --tail=15000 -f /home/bbmass/mc-nightly/targets.zmi"
 tmp=$HOME/tmp
 tree=$HOME/svn
 prefs_weekly=$HOME/user_prefs.weekly

Modified: spamassassin/trunk/lib/Mail/SpamAssassin/Plugin/URIDNSBL.pm
URL: http://svn.apache.org/viewvc/spamassassin/trunk/lib/Mail/SpamAssassin/Plugin/URIDNSBL.pm?rev=596095&r1=596094&r2=596095&view=diff
==============================================================================
--- spamassassin/trunk/lib/Mail/SpamAssassin/Plugin/URIDNSBL.pm (original)
+++ spamassassin/trunk/lib/Mail/SpamAssassin/Plugin/URIDNSBL.pm Sun Nov 18 08:11:20 2007
@@ -435,10 +435,12 @@
     $single_dnsbl = 1;
   }
 
+  my $rhsblrules = $scanner->{uridnsbl_active_rules_rhsbl};
+  my $reviprules = $scanner->{uridnsbl_active_rules_revipbl};
+
   if ($single_dnsbl) {
     # look up the domain in the RHSBL subset
-    my $cf = $scanner->{uridnsbl_active_rules_rhsbl};
-    foreach my $rulename (keys %{$cf}) {
+    foreach my $rulename (keys %{$rhsblrules}) {
       my $rulecf = $scanner->{conf}->{uridnsbls}->{$rulename};
       $self->lookup_single_dnsbl($scanner, $obj, $rulename,
 				 $dom, $rulecf->{zone}, $rulecf->{type});
@@ -447,8 +449,9 @@
       $scanner->register_async_rule_start($rulename);
     }
 
-    # perform NS, A lookups to look up the domain in the non-RHSBL subset
-    if ($dom !~ /^\d+\.\d+\.\d+\.\d+$/) {
+    # perform NS, A lookups to look up the domain in the non-RHSBL subset,
+    # but only if there are active reverse-IP-URIBL rules
+    if ($dom !~ /^\d+\.\d+\.\d+\.\d+$/ && (scalar keys %{$reviprules})) {
       $self->lookup_domain_ns($scanner, $obj, $dom);
     }
   }
@@ -456,8 +459,7 @@
   # note that these rules are now underway.   important: unless the
   # rule hits, in the current design, these will not be considered
   # "finished" until harvest_dnsbl_queries() completes
-  my $cf = $scanner->{uridnsbl_active_rules_revipbl};
-  foreach my $rulename (keys %{$cf}) {
+  foreach my $rulename (keys %{$reviprules}) {
     $scanner->register_async_rule_start($rulename);
   }
 }

Modified: spamassassin/trunk/masses/mass-check
URL: http://svn.apache.org/viewvc/spamassassin/trunk/masses/mass-check?rev=596095&r1=596094&r2=596095&view=diff
==============================================================================
--- spamassassin/trunk/masses/mass-check (original)
+++ spamassassin/trunk/masses/mass-check Sun Nov 18 08:11:20 2007
@@ -250,6 +250,7 @@
   }
 
   require $rules_path;
+  unlink $rules_path;
 }
 
 if ($opt_noisy) {
@@ -331,6 +332,30 @@
   usage(1) if !@targets;
 }
 
+if ($opt_reuse) {
+  # if we have --reuse, don't bother testing DNS; we shouldn't be hitting
+  # the wire at all, and in fact we may be running without a net connection
+  push @{$opt_cf}, "dns_available yes\n";
+
+  # --reuse without --net means we need to just zero ALL net rules; skip net
+  # lookups entirely except for the reused ones.  Do this before constructing
+  # the Mail::SpamAssassin object to ensure no network-rules stuff is compiled
+  # in compile_now().  (This config is for the reuse=no case.  We create a
+  # reuse=yes config after compile_now().)
+  if (!$opt_net) {
+    my @zero = (sort grep {
+          (ref($rules{$_}) eq 'HASH')
+                && $rules{$_}->{tflags}
+                && $rules{$_}->{tflags} =~ /\bnet\b/; 
+        } keys %rules);
+
+    foreach my $r (@zero) {
+      push @{$opt_cf}, "score $r 0\n";
+      # warn "--reuse/!--net zeroed $r";
+    }
+  }
+}
+
 my $spamtest = new Mail::SpamAssassin ({
   'debug'              			=> $opt_debug,
   'rules_filename'     			=> $opt_c,
@@ -348,49 +373,36 @@
   LOCAL_RULES_DIR      			=> '',
 });
 
-$spamtest->compile_now(1);
+$spamtest->compile_now(0);      # 0 since we will be reading more configs
 $spamtest->read_scoreonly_config("$FindBin::Bin/mass-check.cf");
 
 # generated user_prefs
 if ($opt_reuse) {
-
-  # --reuse without --net means we need to just zero ALL net rules;
-  # skip net lookups entirely except for the reused ones.  This
-  # config is for the reuse=no case.
-  if (!$opt_net) {
-    zero_scores_for_rules (sort grep {
-          (ref($rules{$_}) eq 'HASH')
-                && $rules{$_}->{tflags}
-                && $rules{$_}->{tflags} =~ /\bnet\b/; 
-        } keys %rules);
-  }
-
   # copy current prefs if it exists
   $spamtest->copy_config(undef, \%orig_conf);
 
   # zeroed scores to reuse_prefs
-  zero_scores_for_rules (sort grep {
+  my @zero = (sort grep {
           defined $reuse{$_}->{skip} 
         } keys %reuse);
+  zero_rule_scores(@zero);
 
   $spamtest->copy_config(undef, \%reuse_conf);
   $reuse_rules_loaded_p = 1;
 }
 
-sub zero_scores_for_rules {
+sub zero_rule_scores {
   my @zero = @_;
 
-  # TODO: this should use a file in ./tmp/, not something that could
-  # accidentally be loaded later
-  open(PREFS, ">$opt_p/reuse_prefs") 
-        or die "Unable to open $opt_p/reuse_prefs: $!\n".
+  my $pfile = "$FindBin::Bin/tmp/reuse_prefs";
+  open(PREFS, ">$pfile") or die "Unable to open $pfile: $!\n".
             "Needed for --reuse to work properly";
-
   for my $rule (@zero) {
     print PREFS "score $rule 0\n";
   }
-  close(PREFS);
-  $spamtest->read_scoreonly_config("$opt_p/reuse_prefs");
+  close PREFS or die "failed to write $pfile";
+  $spamtest->read_scoreonly_config($pfile);
+  unlink $pfile;
 }
 
 my $who = `id -un 2>/dev/null`;
@@ -728,35 +740,38 @@
   # parse the message, and force it to complete
   my $ma = $spamtest->parse($dataref, 1);
 
-  # remove SpamAssassin markup, if present and the mail was spam
+  # get X-Spam-Status: header for rule hit resue
   my $x_spam_status;
   if ($opt_reuse) {
-    # get X-Spam-Status: header for rule hit resue
     $x_spam_status = $ma->get_header("X-Spam-Status");
+    $x_spam_status =~ s/,\s+/,/gs;
   }
-
   my @previous; # previous hits, only set if $opt_reuse
-  if ($x_spam_status) {
-    $x_spam_status =~ s/,\s+/,/gs;
-    if ($x_spam_status =~ m/tests=(\S*)/
+
+  if ($opt_reuse) {
+    if ($x_spam_status
+        && $x_spam_status =~ m/tests=(\S*)/
         && $x_spam_status !~ /\bshortcircuit=(?:ham|spam|default)\b/)
     {
       push @previous, split(/,/, $1);
 
       # we found previous tests, so move the reuse config into place
       unless ($reuse_rules_loaded_p) {
-	$spamtest->copy_config(\%reuse_conf, undef);
-	$reuse_rules_loaded_p = 1;
+        $spamtest->copy_config(\%reuse_conf, undef);
+        $reuse_rules_loaded_p = 1;
+        dbg "mass-check: reusing hits";
       }
     }
-  }
-  elsif ($opt_reuse) {
-    if ($reuse_rules_loaded_p) {
-      $spamtest->copy_config(\%orig_conf, undef);
-      $reuse_rules_loaded_p = 0;
+    else {
+      if ($reuse_rules_loaded_p) {
+        $spamtest->copy_config(\%orig_conf, undef);
+        $reuse_rules_loaded_p = 0;
+        dbg "mass-check: not reusing hits";
+      }
     }
   }
 
+  # remove SpamAssassin markup, if present and the mail was spam
   my $header = $ma->get_header("Received");
   if ($header && $header =~ /\bwith SpamAssassin\b/) {
     if (!$opt_deencap || message_should_be_deencapped($ma)) {
@@ -861,9 +876,17 @@
 
   if ($reuse_rules_loaded_p) {
     push(@extra, "reuse=yes");
-  }
-  else {
+  } else {
     push(@extra, "reuse=no");
+  }
+
+  # log the scoreset we're in
+  {
+    my $set = 0;
+    if ($opt_net) { $set |= 1; }
+    if ($reuse_rules_loaded_p) { $set |= 1; }
+    if ($status && defined $status->{bayes_score}) { $set |= 2; }
+    push(@extra, "set=".$set);
   }
 
   if ($opt_client) {

Modified: spamassassin/trunk/rules/20_fake_helo_tests.cf
URL: http://svn.apache.org/viewvc/spamassassin/trunk/rules/20_fake_helo_tests.cf?rev=596095&r1=596094&r2=596095&view=diff
==============================================================================
--- spamassassin/trunk/rules/20_fake_helo_tests.cf (original)
+++ spamassassin/trunk/rules/20_fake_helo_tests.cf Sun Nov 18 08:11:20 2007
@@ -133,17 +133,24 @@
 
 ifplugin Mail::SpamAssassin::Plugin::RelayEval
 
+# these perform a lookup_ptr(), hence 'tflags net'
 header FAKE_HELO_MSN		eval:check_for_rdns_helo_mismatch("msn\.com","msn\.com")
 describe FAKE_HELO_MSN		Host HELO did not match rDNS: msn.com
+tflags FAKE_HELO_MSN		net
 header FAKE_HELO_MAIL_COM	eval:check_for_rdns_helo_mismatch("mail\.com","mail\.com")
 describe FAKE_HELO_MAIL_COM	Host HELO did not match rDNS: mail.com
+tflags FAKE_HELO_MAIL_COM	net
 header FAKE_HELO_EMAIL_COM	eval:check_for_rdns_helo_mismatch("email\.com","email\.com")
 describe FAKE_HELO_EMAIL_COM	Host HELO did not match rDNS: email.com
+tflags FAKE_HELO_EMAIL_COM	net
 header FAKE_HELO_EXCITE		eval:check_for_rdns_helo_mismatch("excite\S*","excite\.com")
 describe FAKE_HELO_EXCITE	Host HELO did not match rDNS: excite.com
+tflags FAKE_HELO_EXCITE	        net
 header FAKE_HELO_LYCOS		eval:check_for_rdns_helo_mismatch("lycos\S*","lycos\.com")
 describe FAKE_HELO_LYCOS	Host HELO did not match rDNS: lycos.com
+tflags FAKE_HELO_LYCOS	        net
 header FAKE_HELO_YAHOO_CA	eval:check_for_rdns_helo_mismatch("yahoo\.ca","yahoo\.ca")
 describe FAKE_HELO_YAHOO_CA	Host HELO did not match rDNS: yahoo.ca
+tflags FAKE_HELO_YAHOO_CA	net
 
 endif

Modified: spamassassin/trunk/rules/20_head_tests.cf
URL: http://svn.apache.org/viewvc/spamassassin/trunk/rules/20_head_tests.cf?rev=596095&r1=596094&r2=596095&view=diff
==============================================================================
--- spamassassin/trunk/rules/20_head_tests.cf (original)
+++ spamassassin/trunk/rules/20_head_tests.cf Sun Nov 18 08:11:20 2007
@@ -456,8 +456,11 @@
 describe MISSING_HEADERS	Missing To: header
 
 # this variant is local, using the Received hdr itself...
+# it's still marked "tflags net" though since it uses the same underlying
+# code and can trigger lookup_ptr() using "mass-check --reuse" without "--net"
 header ROUND_THE_WORLD_LOCAL	eval:check_for_round_the_world_received_helo()
 describe ROUND_THE_WORLD_LOCAL	Received: says mail sent around the world (HELO)
+tflags ROUND_THE_WORLD_LOCAL	net
 
 header DATE_IN_PAST_03_06	eval:check_for_shifted_date('-6', '-3')
 describe DATE_IN_PAST_03_06	Date: is 3 to 6 hours before Received: date

Modified: spamassassin/trunk/rules/25_spf.cf
URL: http://svn.apache.org/viewvc/spamassassin/trunk/rules/25_spf.cf?rev=596095&r1=596094&r2=596095&view=diff
==============================================================================
--- spamassassin/trunk/rules/25_spf.cf (original)
+++ spamassassin/trunk/rules/25_spf.cf Sun Nov 18 08:11:20 2007
@@ -57,8 +57,15 @@
 describe SPF_HELO_FAIL		SPF: HELO does not match SPF record (fail)
 describe SPF_HELO_SOFTFAIL	SPF: HELO does not match SPF record (softfail)
 
-tflags SPF_PASS			nice userconf
-tflags SPF_HELO_PASS		nice userconf
+# these are "userconf" so that scores are set by hand
+tflags SPF_PASS			nice userconf net
+tflags SPF_HELO_PASS		nice userconf net
+tflags SPF_NEUTRAL		net
+tflags SPF_FAIL		        net
+tflags SPF_SOFTFAIL		net
+tflags SPF_HELO_NEUTRAL	        net
+tflags SPF_HELO_FAIL		net
+tflags SPF_HELO_SOFTFAIL	net
 
 # rules from earlier than current release that can be reused
 #reuse SPF_PASS