You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spamassassin.apache.org by jm...@apache.org on 2006/04/10 16:11:13 UTC

svn commit: r392957 - in /spamassassin/branches/bug-3109-shortcircuiting: MANIFEST lib/Mail/SpamAssassin/Conf/Parser.pm lib/Mail/SpamAssassin/Constants.pm lib/Mail/SpamAssassin/PerMsgStatus.pm t/priorities.t

Author: jm
Date: Mon Apr 10 07:11:11 2006
New Revision: 392957

URL: http://svn.apache.org/viewcvs?rev=392957&view=rev
Log:
bug 3109 patch 3467: automatically set meta-rule priorities, by tracing priorities from the 'top' rule through all its dependencies, ensuring they will all run before, or at the same time as, the top-level rule; obsoletes META_TEST_MIN_PRIORITY

Added:
    spamassassin/branches/bug-3109-shortcircuiting/t/priorities.t
Modified:
    spamassassin/branches/bug-3109-shortcircuiting/MANIFEST
    spamassassin/branches/bug-3109-shortcircuiting/lib/Mail/SpamAssassin/Conf/Parser.pm
    spamassassin/branches/bug-3109-shortcircuiting/lib/Mail/SpamAssassin/Constants.pm
    spamassassin/branches/bug-3109-shortcircuiting/lib/Mail/SpamAssassin/PerMsgStatus.pm

Modified: spamassassin/branches/bug-3109-shortcircuiting/MANIFEST
URL: http://svn.apache.org/viewcvs/spamassassin/branches/bug-3109-shortcircuiting/MANIFEST?rev=392957&r1=392956&r2=392957&view=diff
==============================================================================
--- spamassassin/branches/bug-3109-shortcircuiting/MANIFEST (original)
+++ spamassassin/branches/bug-3109-shortcircuiting/MANIFEST Mon Apr 10 07:11:11 2006
@@ -347,6 +347,7 @@
 t/plugin.t
 t/plugin_file.t
 t/prefs_include.t
+t/priorities.t
 t/razor2.t
 t/rcvd_parser.t
 t/recips.t

Modified: spamassassin/branches/bug-3109-shortcircuiting/lib/Mail/SpamAssassin/Conf/Parser.pm
URL: http://svn.apache.org/viewcvs/spamassassin/branches/bug-3109-shortcircuiting/lib/Mail/SpamAssassin/Conf/Parser.pm?rev=392957&r1=392956&r2=392957&view=diff
==============================================================================
--- spamassassin/branches/bug-3109-shortcircuiting/lib/Mail/SpamAssassin/Conf/Parser.pm (original)
+++ spamassassin/branches/bug-3109-shortcircuiting/lib/Mail/SpamAssassin/Conf/Parser.pm Mon Apr 10 07:11:11 2006
@@ -648,6 +648,9 @@
   my ($self) = @_;
   my $conf = $self->{conf};
 
+  $self->trace_meta_dependencies();
+  $self->fix_priorities();
+
   while (my ($name, $text) = each %{$conf->{tests}}) {
     my $type = $conf->{test_types}->{$name};
     my $priority = $conf->{priority}->{$name} || 0;
@@ -710,15 +713,6 @@
         $conf->{head_tests}->{$priority}->{$name} = $text;
       }
       elsif ($type == $Mail::SpamAssassin::Conf::TYPE_META_TESTS) {
-        # Meta Tests must have a priority of at least META_TEST_MIN_PRIORITY,
-        # if it's lower then reset the value
-        if ($priority < META_TEST_MIN_PRIORITY) {
-          # we need to lower the count of the old priority and raise the
-          # count of the new priority
-          $conf->{priorities}->{$priority}--;
-          $priority = META_TEST_MIN_PRIORITY;
-          $conf->{priorities}->{$priority}++;
-        }
         $conf->{meta_tests}->{$priority}->{$name} = $text;
       }
       elsif ($type == $Mail::SpamAssassin::Conf::TYPE_URI_TESTS) {
@@ -743,6 +737,88 @@
 
   delete $conf->{tests};                # free it up
   delete $conf->{priority};             # free it up
+}
+
+sub trace_meta_dependencies {
+  my ($self) = @_;
+  my $conf = $self->{conf};
+  $conf->{meta_dependencies} = { };
+
+  foreach my $name (keys %{$conf->{tests}}) {
+    next unless ($conf->{test_types}->{$name}
+                    == $Mail::SpamAssassin::Conf::TYPE_META_TESTS);
+
+    my $deps = [ ];
+
+    # Lex the rule into tokens using a rather simple RE method ...
+    my $rule = $conf->{tests}->{$name};
+    my $lexer = ARITH_EXPRESSION_LEXER;
+    my @tokens = ($rule =~ m/$lexer/g);
+
+    # Go through each token in the meta rule
+    foreach my $token (@tokens) {
+
+      # Numbers can't be rule names
+      next if ($token =~ /^(?:\W+|\d+)$/);
+
+      # If the token is a rule name, add it as a dependency
+      next unless exists $conf->{tests}->{$token};
+      push @{$deps}, $token;
+    }
+
+    # this is too noisy
+    # if (scalar @$deps > 0) {
+    # dbg("rules: meta rule $name depends on ".join(' ', @$deps));
+    # }
+
+    $conf->{meta_dependencies}->{$name} = $deps;
+  }
+}
+
+sub fix_priorities {
+  my ($self) = @_;
+  my $conf = $self->{conf};
+
+  die unless $conf->{meta_dependencies};    # order requirement
+  my $pri = $conf->{priority};
+  my $alreadydone = { };
+
+  # sort into priority order, lowest first -- this way we ensure that if we
+  # rearrange the pri of a rule deep in recursion here, then later iterate in
+  # *this* loop to that rule, we cannot accidentally increase its priority.
+
+  foreach my $rule (sort {
+            $pri->{$a} <=> $pri->{$b}
+          } keys %{$pri})
+  {
+    $self->_fix_pri_recurse($conf, $rule, $rule, $alreadydone);
+  }
+}
+
+sub _fix_pri_recurse {
+  my ($self, $conf, $rule, $shorter, $alreadydone) = @_;
+
+  # avoid infinite recursion by only looking at each rule once
+  return if $alreadydone->{$rule};
+  $alreadydone->{$rule} = 1;
+
+  # we only need to worry about meta rules -- they are the
+  # only type of rules who depend on other rules
+  my $deps = $conf->{meta_dependencies}->{$rule};
+  return unless (defined $deps);
+
+  my $basepri = $conf->{priority}->{$rule};
+
+  foreach my $dep (@$deps) {
+    my $deppri = $conf->{priority}->{$dep};
+    if ($deppri > $basepri) {
+      dbg("rules: meta $shorter (pri $basepri) depends on $dep (pri $deppri): fixing");
+      $conf->{priority}->{$dep} = $basepri;
+    }
+
+    # and recurse
+    $self->_fix_pri_recurse($conf, $dep, $shorter, $alreadydone);
+  }
 }
 
 ###########################################################################

Modified: spamassassin/branches/bug-3109-shortcircuiting/lib/Mail/SpamAssassin/Constants.pm
URL: http://svn.apache.org/viewcvs/spamassassin/branches/bug-3109-shortcircuiting/lib/Mail/SpamAssassin/Constants.pm?rev=392957&r1=392956&r2=392957&view=diff
==============================================================================
--- spamassassin/branches/bug-3109-shortcircuiting/lib/Mail/SpamAssassin/Constants.pm (original)
+++ spamassassin/branches/bug-3109-shortcircuiting/lib/Mail/SpamAssassin/Constants.pm Mon Apr 10 07:11:11 2006
@@ -34,7 +34,7 @@
 );
 # These are generic constants that may be used across several modules
 @SA_VARS = qw(
-	META_TEST_MIN_PRIORITY HARVEST_DNSBL_PRIORITY MBX_SEPARATOR
+	HARVEST_DNSBL_PRIORITY MBX_SEPARATOR
 	MAX_BODY_LINE_LENGTH MAX_HEADER_KEY_LENGTH MAX_HEADER_VALUE_LENGTH
 	MAX_HEADER_LENGTH ARITH_EXPRESSION_LEXER AI_TIME_UNKNOWN
 );
@@ -259,7 +259,6 @@
 
 # ---------------------------------------------------------------------------
 
-use constant META_TEST_MIN_PRIORITY => -500;
 use constant HARVEST_DNSBL_PRIORITY =>  500;
 
 # regular expression that matches message separators in The University of

Modified: spamassassin/branches/bug-3109-shortcircuiting/lib/Mail/SpamAssassin/PerMsgStatus.pm
URL: http://svn.apache.org/viewcvs/spamassassin/branches/bug-3109-shortcircuiting/lib/Mail/SpamAssassin/PerMsgStatus.pm?rev=392957&r1=392956&r2=392957&view=diff
==============================================================================
--- spamassassin/branches/bug-3109-shortcircuiting/lib/Mail/SpamAssassin/PerMsgStatus.pm (original)
+++ spamassassin/branches/bug-3109-shortcircuiting/lib/Mail/SpamAssassin/PerMsgStatus.pm Mon Apr 10 07:11:11 2006
@@ -190,13 +190,6 @@
        $self->{resolver}->finish_socket() if $self->{resolver};
       }
 
-      # since meta tests must have a priority of META_TEST_MIN_PRIORITY or
-      # higher then there is no reason to even call the do_meta_tests method
-      # if we are less than that.
-      if ($priority >= META_TEST_MIN_PRIORITY) {
-	$self->do_meta_tests($priority);
-      }
-
       # do head tests
       $self->do_head_tests($priority);
       $self->do_head_eval_tests($priority);
@@ -211,6 +204,8 @@
       $self->do_full_tests($priority, \$fulltext);
       $self->do_full_eval_tests($priority, \$fulltext);
 
+      $self->do_meta_tests($priority);
+
       # we may need to call this more often than once through the loop, but
       # it needs to be done at least once, either at the beginning or the end.
       $self->{main}->call_plugins ("check_tick", { permsgstatus => $self });
@@ -2547,9 +2542,10 @@
   return if (exists $self->{shortcircuit_type});
 
   dbg("rules: running meta tests; score so far=" . $self->{score} );
+  my $conf = $self->{conf};
 
   my $doing_user_rules = 
-    $self->{conf}->{user_rules_to_compile}->{$Mail::SpamAssassin::Conf::TYPE_META_TESTS};
+    $conf->{user_rules_to_compile}->{$Mail::SpamAssassin::Conf::TYPE_META_TESTS};
 
   # clean up priority value so it can be used in a subroutine name
   my $clean_priority;
@@ -2568,11 +2564,11 @@
   my $evalstr = '';
 
   # Get the list of meta tests
-  my @metas = keys %{ $self->{conf}{meta_tests}->{$priority} };
+  my @metas = keys %{ $conf->{meta_tests}->{$priority} };
 
   # Go through each rule and figure out what we need to do
   foreach $rulename (@metas) {
-    my $rule   = $self->{conf}->{meta_tests}->{$priority}->{$rulename};
+    my $rule   = $conf->{meta_tests}->{$priority}->{$rulename};
     my $token;
 
     # Lex the rule into tokens using a rather simple RE method ...
@@ -2582,8 +2578,8 @@
     # Set the rule blank to start
     $meta{$rulename} = "";
 
-    # By default, there are no dependencies for a rule
-    @{ $rule_deps{$rulename} } = ();
+    # List dependencies that are meta tests in the same priority band
+    $rule_deps{$rulename} = [ ];
 
     # Go through each token in the meta rule
     foreach $token (@tokens) {
@@ -2598,7 +2594,7 @@
 
         # If the token is another meta rule, add it as a dependency
         push (@{ $rule_deps{$rulename} }, $token)
-          if (exists $self->{conf}{meta_tests}->{$priority}->{$token});
+          if (exists $conf->{meta_tests}->{$priority}->{$token});
       }
     }
   }

Added: spamassassin/branches/bug-3109-shortcircuiting/t/priorities.t
URL: http://svn.apache.org/viewcvs/spamassassin/branches/bug-3109-shortcircuiting/t/priorities.t?rev=392957&view=auto
==============================================================================
--- spamassassin/branches/bug-3109-shortcircuiting/t/priorities.t (added)
+++ spamassassin/branches/bug-3109-shortcircuiting/t/priorities.t Mon Apr 10 07:11:11 2006
@@ -0,0 +1,113 @@
+#!/usr/bin/perl
+
+BEGIN {
+  if (-e 't/test_dir') { # if we are running "t/priorities.t", kluge around ...
+    chdir 't';
+  }
+  if (-e 'test_dir') {            # running from test directory, not ..
+    unshift(@INC, '../blib/lib');
+    unshift(@INC, '../lib');
+  }
+}
+
+my $prefix = '.';
+if (-e 'test_dir') {            # running from test directory, not ..
+  $prefix = '..';
+}
+
+use SATest; sa_t_init("priorities");
+use strict;
+use Test; BEGIN { plan tests => 8 };
+
+use Mail::SpamAssassin;
+
+tstlocalrules (q{
+
+  priority USER_IN_WHITELIST     -1000
+  priority USER_IN_DEF_WHITELIST -1000
+  priority USER_IN_ALL_SPAM_TO   -1000
+  priority SUBJECT_IN_WHITELIST  -1000
+
+  priority ALL_TRUSTED            -950
+
+  priority SUBJECT_IN_BLACKLIST   -900
+  priority USER_IN_BLACKLIST_TO   -900
+  priority USER_IN_BLACKLIST      -900
+
+  priority BAYES_99               -400
+
+  header XX_RCVD_IN_NJABL_MULTI     eval:check_rbl_sub('njabl', '127.0.0.5')
+  tflags XX_RCVD_IN_NJABL_MULTI     net
+  score XX_RCVD_IN_NJABL_MULTI      1
+
+  meta SC_URIBL_SURBL  (URIBL_BLACK && (URIBL_SC_SURBL || URIBL_JP_SURBL || URIBL_OB_SURBL ) && XX_RCVD_IN_NJABL_MULTI)
+  meta SC_URIBL_HASH   ((URIBL_BLACK || URIBL_SC_SURBL || URIBL_JP_SURBL || URIBL_OB_SURBL) && (RAZOR2_CHECK || DCC_CHECK || PYZOR_CHECK))
+  meta SC_URIBL_SBL    ((URIBL_BLACK || URIBL_SC_SURBL || URIBL_JP_SURBL || URIBL_OB_SURBL) && URIBL_SBL)
+  meta SC_URIBL_BAYES  ((URIBL_BLACK || URIBL_SC_SURBL || URIBL_JP_SURBL || URIBL_OB_SURBL) && BAYES_99)
+
+  shortcircuit SC_URIBL_SURBL        spam
+  shortcircuit SC_URIBL_HASH         spam
+  shortcircuit SC_URIBL_SBL          spam
+  shortcircuit SC_URIBL_BAYES        spam
+
+  priority SC_URIBL_SURBL            -530
+  priority SC_URIBL_HASH             -510
+  priority SC_URIBL_SBL              -510
+  priority SC_URIBL_BAYES            -510
+
+  shortcircuit DIGEST_MULTIPLE       spam
+  priority DIGEST_MULTIPLE           -300
+
+});
+
+my $sa = create_saobj({
+  dont_copy_prefs => 1,
+  # debug => 1
+});
+
+$sa->init(0); # parse rules
+ok($sa);
+my $conf = $sa->{conf};
+sub assert_rule_pri;
+
+ok assert_rule_pri 'USER_IN_WHITELIST', -1000;
+
+ok assert_rule_pri 'SC_URIBL_SURBL', -530;
+ok assert_rule_pri 'SC_URIBL_HASH', -510;
+ok assert_rule_pri 'SC_URIBL_SBL', -510;
+ok assert_rule_pri 'SC_URIBL_BAYES', -510;
+ok assert_rule_pri 'XX_RCVD_IN_NJABL_MULTI', -530;
+
+# SC_URIBL_BAYES will have overridden its base priority setting
+ok assert_rule_pri 'BAYES_99', -510;
+
+
+# ---------------------------------------------------------------------------
+
+sub assert_rule_pri {
+  my ($r, $pri) = @_;
+
+  if (defined $conf->{rbl_evals}->{$r}) {
+    # ignore rbl_evals; they do not use the priority system at all
+    return 1;
+  }
+
+  foreach my $ruletype (qw(
+    body_tests head_tests meta_tests uri_tests rawbody_tests full_tests
+    full_evals rawbody_evals head_evals body_evals
+  ))
+  {
+    if (defined $conf->{$ruletype}->{$pri}->{$r}) {
+      return 1;
+    }
+    foreach my $foundpri (keys %{$conf->{priorities}}) {
+      next unless (defined $conf->{$ruletype}->{$foundpri}->{$r});
+      warn "FAIL: rule '$r' not found at priority $pri; found at $foundpri\n";
+      return 0;
+    }
+  }
+
+  warn "FAIL: no rule '$r' found of any type at any priority\n";
+  return 0;
+}
+