You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spamassassin.apache.org by jm...@apache.org on 2006/04/10 16:11:13 UTC
svn commit: r392957 - in /spamassassin/branches/bug-3109-shortcircuiting:
MANIFEST lib/Mail/SpamAssassin/Conf/Parser.pm
lib/Mail/SpamAssassin/Constants.pm lib/Mail/SpamAssassin/PerMsgStatus.pm
t/priorities.t
Author: jm
Date: Mon Apr 10 07:11:11 2006
New Revision: 392957
URL: http://svn.apache.org/viewcvs?rev=392957&view=rev
Log:
bug 3109 patch 3467: automatically set meta-rule priorities, by tracing priorities from the 'top' rule through all its dependencies, ensuring they will all run before, or at the same time as, the top-level rule; obsoletes META_TEST_MIN_PRIORITY
Added:
spamassassin/branches/bug-3109-shortcircuiting/t/priorities.t
Modified:
spamassassin/branches/bug-3109-shortcircuiting/MANIFEST
spamassassin/branches/bug-3109-shortcircuiting/lib/Mail/SpamAssassin/Conf/Parser.pm
spamassassin/branches/bug-3109-shortcircuiting/lib/Mail/SpamAssassin/Constants.pm
spamassassin/branches/bug-3109-shortcircuiting/lib/Mail/SpamAssassin/PerMsgStatus.pm
Modified: spamassassin/branches/bug-3109-shortcircuiting/MANIFEST
URL: http://svn.apache.org/viewcvs/spamassassin/branches/bug-3109-shortcircuiting/MANIFEST?rev=392957&r1=392956&r2=392957&view=diff
==============================================================================
--- spamassassin/branches/bug-3109-shortcircuiting/MANIFEST (original)
+++ spamassassin/branches/bug-3109-shortcircuiting/MANIFEST Mon Apr 10 07:11:11 2006
@@ -347,6 +347,7 @@
t/plugin.t
t/plugin_file.t
t/prefs_include.t
+t/priorities.t
t/razor2.t
t/rcvd_parser.t
t/recips.t
Modified: spamassassin/branches/bug-3109-shortcircuiting/lib/Mail/SpamAssassin/Conf/Parser.pm
URL: http://svn.apache.org/viewcvs/spamassassin/branches/bug-3109-shortcircuiting/lib/Mail/SpamAssassin/Conf/Parser.pm?rev=392957&r1=392956&r2=392957&view=diff
==============================================================================
--- spamassassin/branches/bug-3109-shortcircuiting/lib/Mail/SpamAssassin/Conf/Parser.pm (original)
+++ spamassassin/branches/bug-3109-shortcircuiting/lib/Mail/SpamAssassin/Conf/Parser.pm Mon Apr 10 07:11:11 2006
@@ -648,6 +648,9 @@
my ($self) = @_;
my $conf = $self->{conf};
+ $self->trace_meta_dependencies();
+ $self->fix_priorities();
+
while (my ($name, $text) = each %{$conf->{tests}}) {
my $type = $conf->{test_types}->{$name};
my $priority = $conf->{priority}->{$name} || 0;
@@ -710,15 +713,6 @@
$conf->{head_tests}->{$priority}->{$name} = $text;
}
elsif ($type == $Mail::SpamAssassin::Conf::TYPE_META_TESTS) {
- # Meta Tests must have a priority of at least META_TEST_MIN_PRIORITY,
- # if it's lower then reset the value
- if ($priority < META_TEST_MIN_PRIORITY) {
- # we need to lower the count of the old priority and raise the
- # count of the new priority
- $conf->{priorities}->{$priority}--;
- $priority = META_TEST_MIN_PRIORITY;
- $conf->{priorities}->{$priority}++;
- }
$conf->{meta_tests}->{$priority}->{$name} = $text;
}
elsif ($type == $Mail::SpamAssassin::Conf::TYPE_URI_TESTS) {
@@ -743,6 +737,88 @@
delete $conf->{tests}; # free it up
delete $conf->{priority}; # free it up
+}
+
+sub trace_meta_dependencies {
+ my ($self) = @_;
+ my $conf = $self->{conf};
+ $conf->{meta_dependencies} = { };
+
+ foreach my $name (keys %{$conf->{tests}}) {
+ next unless ($conf->{test_types}->{$name}
+ == $Mail::SpamAssassin::Conf::TYPE_META_TESTS);
+
+ my $deps = [ ];
+
+ # Lex the rule into tokens using a rather simple RE method ...
+ my $rule = $conf->{tests}->{$name};
+ my $lexer = ARITH_EXPRESSION_LEXER;
+ my @tokens = ($rule =~ m/$lexer/g);
+
+ # Go through each token in the meta rule
+ foreach my $token (@tokens) {
+
+ # Numbers can't be rule names
+ next if ($token =~ /^(?:\W+|\d+)$/);
+
+ # If the token is a rule name, add it as a dependency
+ next unless exists $conf->{tests}->{$token};
+ push @{$deps}, $token;
+ }
+
+ # this is too noisy
+ # if (scalar @$deps > 0) {
+ # dbg("rules: meta rule $name depends on ".join(' ', @$deps));
+ # }
+
+ $conf->{meta_dependencies}->{$name} = $deps;
+ }
+}
+
+sub fix_priorities {
+ my ($self) = @_;
+ my $conf = $self->{conf};
+
+ die unless $conf->{meta_dependencies}; # order requirement
+ my $pri = $conf->{priority};
+ my $alreadydone = { };
+
+ # sort into priority order, lowest first -- this way we ensure that if we
+ # rearrange the pri of a rule deep in recursion here, then later iterate in
+ # *this* loop to that rule, we cannot accidentally increase its priority.
+
+ foreach my $rule (sort {
+ $pri->{$a} <=> $pri->{$b}
+ } keys %{$pri})
+ {
+ $self->_fix_pri_recurse($conf, $rule, $rule, $alreadydone);
+ }
+}
+
+sub _fix_pri_recurse {
+ my ($self, $conf, $rule, $shorter, $alreadydone) = @_;
+
+ # avoid infinite recursion by only looking at each rule once
+ return if $alreadydone->{$rule};
+ $alreadydone->{$rule} = 1;
+
+ # we only need to worry about meta rules -- they are the
+ # only type of rules who depend on other rules
+ my $deps = $conf->{meta_dependencies}->{$rule};
+ return unless (defined $deps);
+
+ my $basepri = $conf->{priority}->{$rule};
+
+ foreach my $dep (@$deps) {
+ my $deppri = $conf->{priority}->{$dep};
+ if ($deppri > $basepri) {
+ dbg("rules: meta $shorter (pri $basepri) depends on $dep (pri $deppri): fixing");
+ $conf->{priority}->{$dep} = $basepri;
+ }
+
+ # and recurse
+ $self->_fix_pri_recurse($conf, $dep, $shorter, $alreadydone);
+ }
}
###########################################################################
Modified: spamassassin/branches/bug-3109-shortcircuiting/lib/Mail/SpamAssassin/Constants.pm
URL: http://svn.apache.org/viewcvs/spamassassin/branches/bug-3109-shortcircuiting/lib/Mail/SpamAssassin/Constants.pm?rev=392957&r1=392956&r2=392957&view=diff
==============================================================================
--- spamassassin/branches/bug-3109-shortcircuiting/lib/Mail/SpamAssassin/Constants.pm (original)
+++ spamassassin/branches/bug-3109-shortcircuiting/lib/Mail/SpamAssassin/Constants.pm Mon Apr 10 07:11:11 2006
@@ -34,7 +34,7 @@
);
# These are generic constants that may be used across several modules
@SA_VARS = qw(
- META_TEST_MIN_PRIORITY HARVEST_DNSBL_PRIORITY MBX_SEPARATOR
+ HARVEST_DNSBL_PRIORITY MBX_SEPARATOR
MAX_BODY_LINE_LENGTH MAX_HEADER_KEY_LENGTH MAX_HEADER_VALUE_LENGTH
MAX_HEADER_LENGTH ARITH_EXPRESSION_LEXER AI_TIME_UNKNOWN
);
@@ -259,7 +259,6 @@
# ---------------------------------------------------------------------------
-use constant META_TEST_MIN_PRIORITY => -500;
use constant HARVEST_DNSBL_PRIORITY => 500;
# regular expression that matches message separators in The University of
Modified: spamassassin/branches/bug-3109-shortcircuiting/lib/Mail/SpamAssassin/PerMsgStatus.pm
URL: http://svn.apache.org/viewcvs/spamassassin/branches/bug-3109-shortcircuiting/lib/Mail/SpamAssassin/PerMsgStatus.pm?rev=392957&r1=392956&r2=392957&view=diff
==============================================================================
--- spamassassin/branches/bug-3109-shortcircuiting/lib/Mail/SpamAssassin/PerMsgStatus.pm (original)
+++ spamassassin/branches/bug-3109-shortcircuiting/lib/Mail/SpamAssassin/PerMsgStatus.pm Mon Apr 10 07:11:11 2006
@@ -190,13 +190,6 @@
$self->{resolver}->finish_socket() if $self->{resolver};
}
- # since meta tests must have a priority of META_TEST_MIN_PRIORITY or
- # higher then there is no reason to even call the do_meta_tests method
- # if we are less than that.
- if ($priority >= META_TEST_MIN_PRIORITY) {
- $self->do_meta_tests($priority);
- }
-
# do head tests
$self->do_head_tests($priority);
$self->do_head_eval_tests($priority);
@@ -211,6 +204,8 @@
$self->do_full_tests($priority, \$fulltext);
$self->do_full_eval_tests($priority, \$fulltext);
+ $self->do_meta_tests($priority);
+
# we may need to call this more often than once through the loop, but
# it needs to be done at least once, either at the beginning or the end.
$self->{main}->call_plugins ("check_tick", { permsgstatus => $self });
@@ -2547,9 +2542,10 @@
return if (exists $self->{shortcircuit_type});
dbg("rules: running meta tests; score so far=" . $self->{score} );
+ my $conf = $self->{conf};
my $doing_user_rules =
- $self->{conf}->{user_rules_to_compile}->{$Mail::SpamAssassin::Conf::TYPE_META_TESTS};
+ $conf->{user_rules_to_compile}->{$Mail::SpamAssassin::Conf::TYPE_META_TESTS};
# clean up priority value so it can be used in a subroutine name
my $clean_priority;
@@ -2568,11 +2564,11 @@
my $evalstr = '';
# Get the list of meta tests
- my @metas = keys %{ $self->{conf}{meta_tests}->{$priority} };
+ my @metas = keys %{ $conf->{meta_tests}->{$priority} };
# Go through each rule and figure out what we need to do
foreach $rulename (@metas) {
- my $rule = $self->{conf}->{meta_tests}->{$priority}->{$rulename};
+ my $rule = $conf->{meta_tests}->{$priority}->{$rulename};
my $token;
# Lex the rule into tokens using a rather simple RE method ...
@@ -2582,8 +2578,8 @@
# Set the rule blank to start
$meta{$rulename} = "";
- # By default, there are no dependencies for a rule
- @{ $rule_deps{$rulename} } = ();
+ # List dependencies that are meta tests in the same priority band
+ $rule_deps{$rulename} = [ ];
# Go through each token in the meta rule
foreach $token (@tokens) {
@@ -2598,7 +2594,7 @@
# If the token is another meta rule, add it as a dependency
push (@{ $rule_deps{$rulename} }, $token)
- if (exists $self->{conf}{meta_tests}->{$priority}->{$token});
+ if (exists $conf->{meta_tests}->{$priority}->{$token});
}
}
}
Added: spamassassin/branches/bug-3109-shortcircuiting/t/priorities.t
URL: http://svn.apache.org/viewcvs/spamassassin/branches/bug-3109-shortcircuiting/t/priorities.t?rev=392957&view=auto
==============================================================================
--- spamassassin/branches/bug-3109-shortcircuiting/t/priorities.t (added)
+++ spamassassin/branches/bug-3109-shortcircuiting/t/priorities.t Mon Apr 10 07:11:11 2006
@@ -0,0 +1,113 @@
+#!/usr/bin/perl
+
+BEGIN {
+ if (-e 't/test_dir') { # if we are running "t/priorities.t", kluge around ...
+ chdir 't';
+ }
+ if (-e 'test_dir') { # running from test directory, not ..
+ unshift(@INC, '../blib/lib');
+ unshift(@INC, '../lib');
+ }
+}
+
+my $prefix = '.';
+if (-e 'test_dir') { # running from test directory, not ..
+ $prefix = '..';
+}
+
+use SATest; sa_t_init("priorities");
+use strict;
+use Test; BEGIN { plan tests => 8 };
+
+use Mail::SpamAssassin;
+
+tstlocalrules (q{
+
+ priority USER_IN_WHITELIST -1000
+ priority USER_IN_DEF_WHITELIST -1000
+ priority USER_IN_ALL_SPAM_TO -1000
+ priority SUBJECT_IN_WHITELIST -1000
+
+ priority ALL_TRUSTED -950
+
+ priority SUBJECT_IN_BLACKLIST -900
+ priority USER_IN_BLACKLIST_TO -900
+ priority USER_IN_BLACKLIST -900
+
+ priority BAYES_99 -400
+
+ header XX_RCVD_IN_NJABL_MULTI eval:check_rbl_sub('njabl', '127.0.0.5')
+ tflags XX_RCVD_IN_NJABL_MULTI net
+ score XX_RCVD_IN_NJABL_MULTI 1
+
+ meta SC_URIBL_SURBL (URIBL_BLACK && (URIBL_SC_SURBL || URIBL_JP_SURBL || URIBL_OB_SURBL ) && XX_RCVD_IN_NJABL_MULTI)
+ meta SC_URIBL_HASH ((URIBL_BLACK || URIBL_SC_SURBL || URIBL_JP_SURBL || URIBL_OB_SURBL) && (RAZOR2_CHECK || DCC_CHECK || PYZOR_CHECK))
+ meta SC_URIBL_SBL ((URIBL_BLACK || URIBL_SC_SURBL || URIBL_JP_SURBL || URIBL_OB_SURBL) && URIBL_SBL)
+ meta SC_URIBL_BAYES ((URIBL_BLACK || URIBL_SC_SURBL || URIBL_JP_SURBL || URIBL_OB_SURBL) && BAYES_99)
+
+ shortcircuit SC_URIBL_SURBL spam
+ shortcircuit SC_URIBL_HASH spam
+ shortcircuit SC_URIBL_SBL spam
+ shortcircuit SC_URIBL_BAYES spam
+
+ priority SC_URIBL_SURBL -530
+ priority SC_URIBL_HASH -510
+ priority SC_URIBL_SBL -510
+ priority SC_URIBL_BAYES -510
+
+ shortcircuit DIGEST_MULTIPLE spam
+ priority DIGEST_MULTIPLE -300
+
+});
+
+my $sa = create_saobj({
+ dont_copy_prefs => 1,
+ # debug => 1
+});
+
+$sa->init(0); # parse rules
+ok($sa);
+my $conf = $sa->{conf};
+sub assert_rule_pri;
+
+ok assert_rule_pri 'USER_IN_WHITELIST', -1000;
+
+ok assert_rule_pri 'SC_URIBL_SURBL', -530;
+ok assert_rule_pri 'SC_URIBL_HASH', -510;
+ok assert_rule_pri 'SC_URIBL_SBL', -510;
+ok assert_rule_pri 'SC_URIBL_BAYES', -510;
+ok assert_rule_pri 'XX_RCVD_IN_NJABL_MULTI', -530;
+
+# SC_URIBL_BAYES will have overridden its base priority setting
+ok assert_rule_pri 'BAYES_99', -510;
+
+
+# ---------------------------------------------------------------------------
+
+sub assert_rule_pri {
+ my ($r, $pri) = @_;
+
+ if (defined $conf->{rbl_evals}->{$r}) {
+ # ignore rbl_evals; they do not use the priority system at all
+ return 1;
+ }
+
+ foreach my $ruletype (qw(
+ body_tests head_tests meta_tests uri_tests rawbody_tests full_tests
+ full_evals rawbody_evals head_evals body_evals
+ ))
+ {
+ if (defined $conf->{$ruletype}->{$pri}->{$r}) {
+ return 1;
+ }
+ foreach my $foundpri (keys %{$conf->{priorities}}) {
+ next unless (defined $conf->{$ruletype}->{$foundpri}->{$r});
+ warn "FAIL: rule '$r' not found at priority $pri; found at $foundpri\n";
+ return 0;
+ }
+ }
+
+ warn "FAIL: no rule '$r' found of any type at any priority\n";
+ return 0;
+}
+