You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spamassassin.apache.org by jm...@apache.org on 2006/07/21 21:06:11 UTC
svn commit: r424421 - in /spamassassin/branches/bug-3109-shortcircuiting: ./
build/automc/ build/buildbot/ lib/Mail/ lib/Mail/SpamAssassin/
lib/Mail/SpamAssassin/Locker/ lib/Mail/SpamAssassin/Message/Metadata/
lib/Mail/SpamAssassin/Plugin/ lib/Mail/Spa...
Author: jm
Date: Fri Jul 21 12:06:09 2006
New Revision: 424421
URL: http://svn.apache.org/viewvc?rev=424421&view=rev
Log:
latest code from bug 3109, and merge up to r424414
Removed:
spamassassin/branches/bug-3109-shortcircuiting/build/automc/populate_cor
spamassassin/branches/bug-3109-shortcircuiting/masses/rule-qa/automc/post-comments
spamassassin/branches/bug-3109-shortcircuiting/masses/rule-qa/automc/scrape-bugzilla
Modified:
spamassassin/branches/bug-3109-shortcircuiting/INSTALL
spamassassin/branches/bug-3109-shortcircuiting/MANIFEST
spamassassin/branches/bug-3109-shortcircuiting/build/buildbot/bbmass_master.cfg
spamassassin/branches/bug-3109-shortcircuiting/build/buildbot/master.cfg
spamassassin/branches/bug-3109-shortcircuiting/lib/Mail/SpamAssassin.pm
spamassassin/branches/bug-3109-shortcircuiting/lib/Mail/SpamAssassin/AICache.pm
spamassassin/branches/bug-3109-shortcircuiting/lib/Mail/SpamAssassin/Conf.pm
spamassassin/branches/bug-3109-shortcircuiting/lib/Mail/SpamAssassin/DBBasedAddrList.pm
spamassassin/branches/bug-3109-shortcircuiting/lib/Mail/SpamAssassin/Dns.pm
spamassassin/branches/bug-3109-shortcircuiting/lib/Mail/SpamAssassin/EvalTests.pm
spamassassin/branches/bug-3109-shortcircuiting/lib/Mail/SpamAssassin/Locker/Flock.pm
spamassassin/branches/bug-3109-shortcircuiting/lib/Mail/SpamAssassin/Locker/UnixNFSSafe.pm
spamassassin/branches/bug-3109-shortcircuiting/lib/Mail/SpamAssassin/Message/Metadata/Received.pm
spamassassin/branches/bug-3109-shortcircuiting/lib/Mail/SpamAssassin/PerMsgStatus.pm
spamassassin/branches/bug-3109-shortcircuiting/lib/Mail/SpamAssassin/Plugin.pm
spamassassin/branches/bug-3109-shortcircuiting/lib/Mail/SpamAssassin/Plugin/AWL.pm
spamassassin/branches/bug-3109-shortcircuiting/lib/Mail/SpamAssassin/Util.pm
spamassassin/branches/bug-3109-shortcircuiting/lib/Mail/SpamAssassin/Util/DependencyInfo.pm
spamassassin/branches/bug-3109-shortcircuiting/procmailrc.example
spamassassin/branches/bug-3109-shortcircuiting/rules/10_default_prefs.cf
spamassassin/branches/bug-3109-shortcircuiting/rules/60_shortcircuit.cf
spamassassin/branches/bug-3109-shortcircuiting/rules/active.list
spamassassin/branches/bug-3109-shortcircuiting/rules/v320.pre
spamassassin/branches/bug-3109-shortcircuiting/sa-learn.raw
spamassassin/branches/bug-3109-shortcircuiting/sa-update.raw
spamassassin/branches/bug-3109-shortcircuiting/spamc/libspamc.c
spamassassin/branches/bug-3109-shortcircuiting/spamc/spamc.c
spamassassin/branches/bug-3109-shortcircuiting/spamc/spamc.pod
spamassassin/branches/bug-3109-shortcircuiting/spamd/spamd.raw
spamassassin/branches/bug-3109-shortcircuiting/t/gtube.t
spamassassin/branches/bug-3109-shortcircuiting/t/rcvd_parser.t
spamassassin/branches/bug-3109-shortcircuiting/t/shortcircuit.t
Modified: spamassassin/branches/bug-3109-shortcircuiting/INSTALL
URL: http://svn.apache.org/viewvc/spamassassin/branches/bug-3109-shortcircuiting/INSTALL?rev=424421&r1=424420&r2=424421&view=diff
==============================================================================
--- spamassassin/branches/bug-3109-shortcircuiting/INSTALL (original)
+++ spamassassin/branches/bug-3109-shortcircuiting/INSTALL Fri Jul 21 12:06:09 2006
@@ -339,11 +339,13 @@
- IO::Zlib (from CPAN)
Used by sa-update to uncompress update archives.
+ Version 1.04 or later is required.
- Archive::Tar (from CPAN)
Used by sa-update to expand update archives.
+ Version 1.23 or later is required.
- Encode::Detect (from CPAN)
Modified: spamassassin/branches/bug-3109-shortcircuiting/MANIFEST
URL: http://svn.apache.org/viewvc/spamassassin/branches/bug-3109-shortcircuiting/MANIFEST?rev=424421&r1=424420&r2=424421&view=diff
==============================================================================
--- spamassassin/branches/bug-3109-shortcircuiting/MANIFEST (original)
+++ spamassassin/branches/bug-3109-shortcircuiting/MANIFEST Fri Jul 21 12:06:09 2006
@@ -82,6 +82,7 @@
lib/Mail/SpamAssassin/Plugin/Razor2.pm
lib/Mail/SpamAssassin/Plugin/RelayCountry.pm
lib/Mail/SpamAssassin/Plugin/ReplaceTags.pm
+lib/Mail/SpamAssassin/Plugin/Shortcircuit.pm
lib/Mail/SpamAssassin/Plugin/SPF.pm
lib/Mail/SpamAssassin/Plugin/SpamCop.pm
lib/Mail/SpamAssassin/Plugin/Test.pm
@@ -331,6 +332,7 @@
t/debug.t
t/desc_wrap.t
t/dnsbl.t
+t/get_headers.t
t/gtube.t
t/hashcash.t
t/html_colors.t
@@ -340,6 +342,7 @@
t/ip_addrs.t
t/lang_lint.t
t/lang_pl_tests.t
+t/lint_nocreate_prefs.t
t/memory_cycles.t
t/meta.t
t/metadata.t
@@ -483,3 +486,4 @@
t/dkim.t
t/uribl.t
t/shortcircuit.t
+t/spamc_y.t
Modified: spamassassin/branches/bug-3109-shortcircuiting/build/buildbot/bbmass_master.cfg
URL: http://svn.apache.org/viewvc/spamassassin/branches/bug-3109-shortcircuiting/build/buildbot/bbmass_master.cfg?rev=424421&r1=424420&r2=424421&view=diff
==============================================================================
--- spamassassin/branches/bug-3109-shortcircuiting/build/buildbot/bbmass_master.cfg (original)
+++ spamassassin/branches/bug-3109-shortcircuiting/build/buildbot/bbmass_master.cfg Fri Jul 21 12:06:09 2006
@@ -150,7 +150,7 @@
# the Waterfall 'status' entry, but at an externally-visible host name which
# the buildbot cannot on its own.
-c['buildbotURL'] = "http://spamassassin.zones.apache.org/preflight/"
+c['buildbotURL'] = "http://bbmass.spamassassin.org:8011/"
# finally we define the name that the buildmaster has been waiting for.
Modified: spamassassin/branches/bug-3109-shortcircuiting/build/buildbot/master.cfg
URL: http://svn.apache.org/viewvc/spamassassin/branches/bug-3109-shortcircuiting/build/buildbot/master.cfg?rev=424421&r1=424420&r2=424421&view=diff
==============================================================================
--- spamassassin/branches/bug-3109-shortcircuiting/build/buildbot/master.cfg (original)
+++ spamassassin/branches/bug-3109-shortcircuiting/build/buildbot/master.cfg Fri Jul 21 12:06:09 2006
@@ -314,7 +314,7 @@
# the Waterfall 'status' entry, but at an externally-visible host name which
# the buildbot cannot on its own.
-c['buildbotURL'] = "http://spamassassin.zones.apache.org:8010/"
+c['buildbotURL'] = "http://buildbot.spamassassin.org:8010/"
# finally we define the name that the buildmaster has been waiting for.
Modified: spamassassin/branches/bug-3109-shortcircuiting/lib/Mail/SpamAssassin.pm
URL: http://svn.apache.org/viewvc/spamassassin/branches/bug-3109-shortcircuiting/lib/Mail/SpamAssassin.pm?rev=424421&r1=424420&r2=424421&view=diff
==============================================================================
--- spamassassin/branches/bug-3109-shortcircuiting/lib/Mail/SpamAssassin.pm (original)
+++ spamassassin/branches/bug-3109-shortcircuiting/lib/Mail/SpamAssassin.pm Fri Jul 21 12:06:09 2006
@@ -1248,8 +1248,13 @@
$self->{lint_rules} = $self->{conf}->{lint_rules} = 1;
$self->{syntax_errors} = 0;
+ my $olddcp = $self->{dont_copy_prefs};
+ $self->{dont_copy_prefs} = 1;
+
$self->init(1);
$self->{syntax_errors} += $self->{conf}->{errors};
+
+ $self->{dont_copy_prefs} = $olddcp; # revert back to previous
my $mail = $self->parse(\@testmsg, 1);
my $status = Mail::SpamAssassin::PerMsgStatus->new($self, $mail,
Modified: spamassassin/branches/bug-3109-shortcircuiting/lib/Mail/SpamAssassin/AICache.pm
URL: http://svn.apache.org/viewvc/spamassassin/branches/bug-3109-shortcircuiting/lib/Mail/SpamAssassin/AICache.pm?rev=424421&r1=424420&r2=424421&view=diff
==============================================================================
--- spamassassin/branches/bug-3109-shortcircuiting/lib/Mail/SpamAssassin/AICache.pm (original)
+++ spamassassin/branches/bug-3109-shortcircuiting/lib/Mail/SpamAssassin/AICache.pm Fri Jul 21 12:06:09 2006
@@ -60,10 +60,13 @@
my $use_cache = 1;
+ # be sure to use rel2abs() here, since otherwise relative paths
+ # are broken by the prefix stuff
if ($self->{type} eq 'dir') {
$self->{cache_file} = File::Spec->catdir(
$self->{prefix},
- $self->{path}, '.spamassassin_cache');
+ File::Spec->rel2abs($self->{path}),
+ '.spamassassin_cache');
$self->{cache_mtime} = (stat($self->{cache_file}))[9] || 0;
}
@@ -71,7 +74,7 @@
my @split = File::Spec->splitpath($self->{path});
$self->{cache_file} = File::Spec->catdir(
$self->{prefix},
- $split[1],
+ File::Spec->rel2abs($split[1]),
join('_', '.spamassassin_cache', $self->{type}, $split[2]));
$self->{cache_mtime} = (stat($self->{cache_file}))[9] || 0;
Modified: spamassassin/branches/bug-3109-shortcircuiting/lib/Mail/SpamAssassin/Conf.pm
URL: http://svn.apache.org/viewvc/spamassassin/branches/bug-3109-shortcircuiting/lib/Mail/SpamAssassin/Conf.pm?rev=424421&r1=424420&r2=424421&view=diff
==============================================================================
--- spamassassin/branches/bug-3109-shortcircuiting/lib/Mail/SpamAssassin/Conf.pm (original)
+++ spamassassin/branches/bug-3109-shortcircuiting/lib/Mail/SpamAssassin/Conf.pm Fri Jul 21 12:06:09 2006
@@ -43,8 +43,8 @@
directories.
The C<#> character starts a comment, which continues until end of line.
-B<NOTE:> using the C<#> character in the regular expression rules requires
-escaping. i.e.: C<\#>
+B<NOTE:> if the C<#> character is to be used as part of a rule or
+configuration option, it must be escaped with a backslash. i.e.: C<\#>
Whitespace in the files is not significant, but please note that starting a
line with whitespace is deprecated, as we reserve its use for multi-line rule
@@ -2218,146 +2218,6 @@
type => $CONF_TYPE_HASH_KEY_VALUE
});
-=item shortcircuit SYMBOLIC_TEST_NAME {ham|spam|on|off}
-
-Shortcircuiting a test will force all other pending rules to be skipped, if
-that test is hit.
-
-Recomended usage is to use C<priority> to set rules with strong S/O values (ie.
-1.0) to be run first, and make instant spam or ham classification based on
-that.
-
-To override a test that uses shortcircuiting, you can set the classification
-type to C<off>.
-
-=over 4
-
-=item on
-
-Shortcircuits the rest of the tests, but does not make a strict classification
-of spam or ham. Rather, it uses the default score for the rule being
-shortcircuited. This would allow you, for example, to define a rule such as
-
-=over 4
-
- body TEST /test/
- describe TEST test rule that scores barely over spam threshold
- score TEST 5.5
- priority TEST -100
- shortcircuit TEST on
-
-=back
-
-The result of a message hitting the above rule would be a final score of 5.5,
-as opposed to 100 (default) if it were classified as spam.
-
-=item off
-
-Disables shortcircuiting on said rule.
-
-=item spam
-
-Shortcircuit the rule using a set of defaults; override the default score of
-this rule with the score from C<shortcircuit_spam_score>, set the
-C<noautolearn> tflag, and set priority to C<-100>. In other words,
-equivalent to:
-
-=over 4
-
- shortcircuit TEST on
- priority TEST -100
- score TEST 100
- tflags TEST noautolearn
-
-=back
-
-=item ham
-
-Shortcircuit the rule using a set of defaults; override the default score of
-this rule with the score from C<shortcircuit_ham_score>, set the C<noautolearn>
-and C<nice> tflags, and set priority to C<-100>. In other words, equivalent
-to:
-
-=over 4
-
- shortcircuit TEST on
- priority TEST -100
- score TEST -100
- tflags TEST noautolearn nice
-
-=back
-
-=back
-
-=cut
-
- push (@cmds, {
- setting => 'shortcircuit',
- code => sub {
- my ($self, $key, $value, $line) = @_;
- my ($rule,$type);
- unless (defined $value && $value !~ /^$/) {
- return $MISSING_REQUIRED_VALUE;
- }
- if ($value =~ /^(\S+)\s+(\S+)$/) {
- $rule=$1;
- $type=$2;
- } else {
- return $INVALID_VALUE;
- }
-
- if ($type =~ m/^(?:spam|ham)$/) {
- dbg("shortcircuit: adding $rule using abbreviation $type");
-
- # set the defaults:
- $self->{shortcircuit}->{$rule} = $type;
- $self->{priority}->{$rule} = -100;
-
- my $tf = $self->{tflags}->{$rule};
- $self->{tflags}->{$rule} = ($tf ? $tf." " : "") .
- ($type eq 'ham' ? "nice " : "") .
- "noautolearn";
- }
- elsif ($type eq "on") {
- $self->{shortcircuit}->{$rule} = "on";
- }
- elsif ($type eq "off") {
- delete $self->{shortcircuit}->{$rule};
- }
- else {
- return $INVALID_VALUE;
- }
- }
- });
-
-=item shortcircuit_spam_score n.nn (default: 100)
-
-When shortcircuit is used on a rule, and the shortcircuit classification type
-is set to C<spam>, this value should be applied in place of the default score
-for that rule.
-
-=cut
-
- push (@cmds, {
- setting => 'shortcircuit_spam_score',
- default => 100,
- type => $CONF_TYPE_NUMERIC
- });
-
-=item shortcircuit_ham_score n.nn (default: -100)
-
-When shortcircuit is used on a rule, and the shortcircuit classification type
-is set to C<ham>, this value should be applied in place of the default score
-for that rule.
-
-=cut
-
- push (@cmds, {
- setting => 'shortcircuit_ham_score',
- default => -100,
- type => $CONF_TYPE_NUMERIC
- });
-
=back
=head1 ADMINISTRATOR SETTINGS
@@ -2894,9 +2754,6 @@
_DCCR_ DCC's results
_PYZOR_ Pyzor results
_RBL_ full results for positive RBL queries in DNS URI format
- _SC_ shortcircuit status (classification and rule name)
- _SCRULE_ rulename that caused the shortcircuit
- _SCTYPE_ shortcircuit classification ("spam", "ham", "default", "none")
_LANGUAGES_ possible languages of mail
_PREVIEW_ content preview
_REPORT_ terse report of tests hit (for header reports)
Modified: spamassassin/branches/bug-3109-shortcircuiting/lib/Mail/SpamAssassin/DBBasedAddrList.pm
URL: http://svn.apache.org/viewvc/spamassassin/branches/bug-3109-shortcircuiting/lib/Mail/SpamAssassin/DBBasedAddrList.pm?rev=424421&r1=424420&r2=424421&view=diff
==============================================================================
--- spamassassin/branches/bug-3109-shortcircuiting/lib/Mail/SpamAssassin/DBBasedAddrList.pm (original)
+++ spamassassin/branches/bug-3109-shortcircuiting/lib/Mail/SpamAssassin/DBBasedAddrList.pm Fri Jul 21 12:06:09 2006
@@ -55,8 +55,6 @@
'locked_file' => ''
};
- my $path;
-
my @order = split (' ', $main->{conf}->{auto_whitelist_db_modules});
my $dbm_module = Mail::SpamAssassin::Util::first_available_module (@order);
if (!$dbm_module) {
@@ -64,43 +62,38 @@
$main->{conf}->{auto_whitelist_db_modules}."\n";
}
- my $umask = umask 0;
- if(defined($main->{conf}->{auto_whitelist_path})) # if undef then don't worry -- empty hash!
- {
- $path = $main->sed_path ($main->{conf}->{auto_whitelist_path});
+ # if undef then don't worry -- empty hash!
+ if (defined($main->{conf}->{auto_whitelist_path})) {
+ my $path = $main->sed_path($main->{conf}->{auto_whitelist_path});
+ my ($mod1, $mod2);
if ($main->{locker}->safe_lock
- ($path, 30, $main->{conf}->{auto_whitelist_file_mode}))
+ ($path, 30, $main->{conf}->{auto_whitelist_file_mode}))
{
$self->{locked_file} = $path;
- $self->{is_locked} = 1;
- dbg("auto-whitelist: tie-ing to DB file of type $dbm_module R/W in $path");
- tie %{$self->{accum}},$dbm_module,$path,
- O_RDWR|O_CREAT, #open rw w/lock
- (oct ($main->{conf}->{auto_whitelist_file_mode}) & 0666)
- or goto failed_to_tie;
-
- } else {
+ $self->{is_locked} = 1;
+ ($mod1, $mod2) = ('R/W', O_RDWR | O_CREAT);
+ }
+ else {
$self->{is_locked} = 0;
- dbg("auto-whitelist: tie-ing to DB file of type $dbm_module R/O in $path");
- tie %{$self->{accum}},$dbm_module,$path,
- O_RDONLY, #open ro w/o lock
- (oct ($main->{conf}->{auto_whitelist_file_mode}) & 0666)
- or goto failed_to_tie;
+ ($mod1, $mod2) = ('R/O', O_RDONLY);
}
+
+ dbg("auto-whitelist: tie-ing to DB file of type $dbm_module $mod1 in $path");
+
+ if (! tie %{ $self->{accum} }, $dbm_module, $path, $mod2,
+ oct($main->{conf}->{auto_whitelist_file_mode}) ) {
+ my $err = $!; # might get overwritten later
+ if ($self->{is_locked}) {
+ $self->{main}->{locker}->safe_unlock($self->{locked_file});
+ $self->{is_locked} = 0;
+ }
+ die "auto-whitelist: cannot open auto_whitelist_path $path: $err\n";
+ }
}
- umask $umask;
bless ($self, $class);
return $self;
-
-failed_to_tie:
- umask $umask;
- if ($self->{is_locked}) {
- $self->{main}->{locker}->safe_unlock ($self->{locked_file});
- $self->{is_locked} = 0;
- }
- die "auto-whitelist: cannot open auto_whitelist_path $path: $!\n";
}
###########################################################################
Modified: spamassassin/branches/bug-3109-shortcircuiting/lib/Mail/SpamAssassin/Dns.pm
URL: http://svn.apache.org/viewvc/spamassassin/branches/bug-3109-shortcircuiting/lib/Mail/SpamAssassin/Dns.pm?rev=424421&r1=424420&r2=424421&view=diff
==============================================================================
--- spamassassin/branches/bug-3109-shortcircuiting/lib/Mail/SpamAssassin/Dns.pm (original)
+++ spamassassin/branches/bug-3109-shortcircuiting/lib/Mail/SpamAssassin/Dns.pm Fri Jul 21 12:06:09 2006
@@ -206,7 +206,7 @@
}
if (!defined $self->{tests_already_hit}->{$rule}) {
- $self->got_hit($rule, "RBL: ");
+ $self->got_hit($rule, "RBL: ", ruletype => "dnsbl");
}
}
@@ -247,7 +247,7 @@
++$self->{sender_host_fail} == 2)
{
for my $rule (@{$rules}) {
- $self->got_hit($rule, "DNS: ");
+ $self->got_hit($rule, "DNS: ", ruletype => "dns");
}
}
@@ -308,7 +308,7 @@
my $untainted = $1;
$subtest = $untainted;
- $self->got_hit($rule, "SenderBase: ") if !$undef && eval $subtest;
+ $self->got_hit($rule, "SenderBase: ", ruletype => "dnsbl") if !$undef && eval $subtest;
}
# bitmask
elsif ($subtest =~ /^\d+$/) {
Modified: spamassassin/branches/bug-3109-shortcircuiting/lib/Mail/SpamAssassin/EvalTests.pm
URL: http://svn.apache.org/viewvc/spamassassin/branches/bug-3109-shortcircuiting/lib/Mail/SpamAssassin/EvalTests.pm?rev=424421&r1=424420&r2=424421&view=diff
==============================================================================
--- spamassassin/branches/bug-3109-shortcircuiting/lib/Mail/SpamAssassin/EvalTests.pm (original)
+++ spamassassin/branches/bug-3109-shortcircuiting/lib/Mail/SpamAssassin/EvalTests.pm Fri Jul 21 12:06:09 2006
@@ -3082,13 +3082,16 @@
}
# came up on the users@ list, look for multipart/alternative parts which
-# include non-text parts -- skip multipart/related parts which occurs in ham
+# include non-text parts -- skip certain types which occur normally in ham
sub check_ma_non_text {
my $self = shift;
foreach my $map ($self->{msg}->find_parts(qr@^multipart/alternative$@i)) {
foreach my $p ($map->find_parts(qr/./, 1, 0)) {
- return 1 if ($p->{'type'} !~ m@^text/@i && $p->{'type'} !~ m@^multipart/related$@i);
+ next if (lc $p->{'type'} eq 'multipart/related');
+ next if (lc $p->{'type'} eq 'application/rtf');
+ next if ($p->{'type'} =~ m@^text/@i);
+ return 1;
}
}
Modified: spamassassin/branches/bug-3109-shortcircuiting/lib/Mail/SpamAssassin/Locker/Flock.pm
URL: http://svn.apache.org/viewvc/spamassassin/branches/bug-3109-shortcircuiting/lib/Mail/SpamAssassin/Locker/Flock.pm?rev=424421&r1=424420&r2=424421&view=diff
==============================================================================
--- spamassassin/branches/bug-3109-shortcircuiting/lib/Mail/SpamAssassin/Locker/Flock.pm (original)
+++ spamassassin/branches/bug-3109-shortcircuiting/lib/Mail/SpamAssassin/Locker/Flock.pm Fri Jul 21 12:06:09 2006
@@ -51,13 +51,14 @@
my @stat;
$max_retries ||= 30;
- $mode ||= 0700;
+ $mode ||= 0600;
+ $mode = oct $mode if $mode =~ /^0/; # accept number or string
my $lock_file = "$path.mutex";
- my $umask = umask (oct($mode) ^ 0700);
+ my $umask = umask(~$mode);
my $fh = new IO::File();
- if (!$fh->open ("$lock_file", O_RDWR|O_CREAT)) {
+ if (!$fh->open ($lock_file, O_RDWR|O_CREAT)) {
umask $umask; # just in case
die "locker: safe_lock: cannot create lockfile $lock_file: $!\n";
}
Modified: spamassassin/branches/bug-3109-shortcircuiting/lib/Mail/SpamAssassin/Locker/UnixNFSSafe.pm
URL: http://svn.apache.org/viewvc/spamassassin/branches/bug-3109-shortcircuiting/lib/Mail/SpamAssassin/Locker/UnixNFSSafe.pm?rev=424421&r1=424420&r2=424421&view=diff
==============================================================================
--- spamassassin/branches/bug-3109-shortcircuiting/lib/Mail/SpamAssassin/Locker/UnixNFSSafe.pm (original)
+++ spamassassin/branches/bug-3109-shortcircuiting/lib/Mail/SpamAssassin/Locker/UnixNFSSafe.pm Fri Jul 21 12:06:09 2006
@@ -60,7 +60,8 @@
my @stat;
$max_retries ||= 30;
- $mode ||= 0700;
+ $mode ||= 0600;
+ $mode = oct $mode if $mode =~ /^0/; # accept number or string
my $lock_file = "$path.lock";
my $hname = Mail::SpamAssassin::Util::fq_hostname();
@@ -70,7 +71,7 @@
# keep this for unlocking
$self->{lock_tmp} = $lock_tmp;
- my $umask = umask (oct($mode) ^ 0700);
+ my $umask = umask(~$mode);
if (!open(LTMP, ">$lock_tmp")) {
umask $umask; # just in case
die "locker: safe_lock: cannot create tmp lockfile $lock_tmp for $lock_file: $!\n";
Modified: spamassassin/branches/bug-3109-shortcircuiting/lib/Mail/SpamAssassin/Message/Metadata/Received.pm
URL: http://svn.apache.org/viewvc/spamassassin/branches/bug-3109-shortcircuiting/lib/Mail/SpamAssassin/Message/Metadata/Received.pm?rev=424421&r1=424420&r2=424421&view=diff
==============================================================================
--- spamassassin/branches/bug-3109-shortcircuiting/lib/Mail/SpamAssassin/Message/Metadata/Received.pm (original)
+++ spamassassin/branches/bug-3109-shortcircuiting/lib/Mail/SpamAssassin/Message/Metadata/Received.pm Fri Jul 21 12:06:09 2006
@@ -414,6 +414,10 @@
elsif (/\) by .+ \(\d{1,2}\.\d\.\d{3}(?:\.\d{1,3})?\) \(authenticated as .+\) id /) {
$auth = 'CriticalPath';
}
+ # Postfix 2.3 and later with "smtpd_sasl_authenticated_header yes"
+ elsif (/\)\s+\(Authenticated sender:\s+\S+\)\s+by\s+\S+\s+\(Postfix\)\s+with\s+/) {
+ $auth = 'Postfix';
+ }
if (/^from /) {
# try to catch enveloper senders
Modified: spamassassin/branches/bug-3109-shortcircuiting/lib/Mail/SpamAssassin/PerMsgStatus.pm
URL: http://svn.apache.org/viewvc/spamassassin/branches/bug-3109-shortcircuiting/lib/Mail/SpamAssassin/PerMsgStatus.pm?rev=424421&r1=424420&r2=424421&view=diff
==============================================================================
--- spamassassin/branches/bug-3109-shortcircuiting/lib/Mail/SpamAssassin/PerMsgStatus.pm (original)
+++ spamassassin/branches/bug-3109-shortcircuiting/lib/Mail/SpamAssassin/PerMsgStatus.pm Fri Jul 21 12:06:09 2006
@@ -59,11 +59,14 @@
use Mail::SpamAssassin::Logger;
use vars qw{
- @ISA
+ @ISA @TEMPORARY_METHODS
};
@ISA = qw();
+# methods defined by the compiled ruleset; deleted in finish_tests()
+@TEMPORARY_METHODS = ();
+
###########################################################################
sub new {
@@ -78,12 +81,13 @@
'test_logs' => '',
'test_names_hit' => [ ],
'subtest_names_hit' => [ ],
+ 'spamd_result_log_items' => [ ],
'tests_already_hit' => { },
'hdr_cache' => { },
'rule_errors' => 0,
'disable_auto_learning' => 0,
'auto_learn_status' => undef,
- 'conf' => $main->{conf},
+ 'conf' => $main->{conf},
'async' => Mail::SpamAssassin::AsyncLoop->new($main)
};
@@ -171,13 +175,14 @@
next unless ($self->{conf}->{priorities}->{$priority} > 0);
# if shortcircuiting is hit, we skip all other priorities...
- last if (exists $self->{shortcircuit_type});
+ last if $self->have_shortcircuited();
dbg("check: running tests for priority: $priority");
# only harvest the dnsbl queries once priority HARVEST_DNSBL_PRIORITY
# has been reached and then only run once
- if ($priority >= HARVEST_DNSBL_PRIORITY && $needs_dnsbl_harvest_p && !exists $self->{shortcircuit_type})
+ if ($priority >= HARVEST_DNSBL_PRIORITY && $needs_dnsbl_harvest_p
+ && !$self->have_shortcircuited())
{
# harvest the DNS results
$self->harvest_dnsbl_queries();
@@ -213,20 +218,15 @@
# sanity check, it is possible that no rules >= HARVEST_DNSBL_PRIORITY ran so the harvest
# may not have run yet. Check, and if so, go ahead and harvest here.
if ($needs_dnsbl_harvest_p) {
- if (!exists $self->{shortcircuit_type}) {
+ if (!$self->have_shortcircuited()) {
# harvest the DNS results
$self->harvest_dnsbl_queries();
}
# finish the DNS results
+ # TODO: this should be consolidated with the identical code above
$self->rbl_finish();
-
- if (!exists $self->{shortcircuit_type}) {
- # TODO: should we call this even if we're short-circuiting?
- # in URIDNSBL, it used to be a time-consuming operation.
- $self->{main}->call_plugins("check_post_dnsbl", { permsgstatus => $self });
- }
-
+ $self->{main}->call_plugins ("check_post_dnsbl", { permsgstatus => $self });
$self->{resolver}->finish_socket() if $self->{resolver};
}
@@ -1142,6 +1142,46 @@
###########################################################################
+# public API for plugins
+
+=item $status->set_spamd_result_item($subref)
+
+Set an entry for the spamd result log line. C<$subref> should be a code
+reference for a subroutine which will return a string in C<'name=VALUE'>
+format, similar to the other entries in the spamd result line:
+
+ Jul 17 14:10:47 radish spamd[16670]: spamd: result: Y 22 - ALL_NATURAL,
+ DATE_IN_FUTURE_03_06,DIET_1,DRUGS_ERECTILE,DRUGS_PAIN,
+ TEST_FORGED_YAHOO_RCVD,TEST_INVALID_DATE,TEST_NOREALNAME,
+ TEST_NORMAL_HTTP_TO_IP,UNDISC_RECIPS scantime=0.4,size=3138,user=jm,
+ uid=1000,required_score=5.0,rhost=localhost,raddr=127.0.0.1,
+ rport=33153,mid=<9PS291LhupY>,autolearn=spam
+
+C<name> and C<VALUE> must not contain C<=> or C<,> characters, as it
+is important that these log lines are easy to parse.
+
+The code reference will be called by spamd after the message has been scanned,
+and the C<PerMsgStatus::check()> method has returned.
+
+=cut
+
+sub set_spamd_result_item {
+ my ($self, $ref) = @_;
+ push @{$self->{spamd_result_log_items}}, $ref;
+}
+
+# called by spamd
+sub get_spamd_result_log_items {
+ my ($self) = @_;
+ my @ret = ();
+ foreach my $ref (@{$self->{spamd_result_log_items}}) {
+ push @ret, &$ref;
+ }
+ return @ret;
+}
+
+###########################################################################
+
sub _get_tag_value_for_yesno {
my $self = shift;
@@ -1251,17 +1291,6 @@
AUTOLEARN => sub { return $self->get_autolearn_status(); },
- SC => sub {
- my $rule = $self->{shortcircuit_rule};
- my $type = $self->{shortcircuit_type};
- return "$rule ($type)" if ($rule);
- return "no";
- },
-
- SCRULE => sub { return ($self->{shortcircuit_rule} || "none") ; },
-
- SCTYPE => sub { return ($self->{shortcircuit_type} || "no") ; },
-
TESTS => sub {
my $arg = (shift || ',');
return (join($arg, sort(@{$self->{test_names_hit}})) || "none");
@@ -1382,6 +1411,13 @@
undef &{'_meta_tests_'.$clean_priority};
}
}
+
+ foreach my $method (@TEMPORARY_METHODS) {
+ if (defined &{$method}) {
+ undef &{$method};
+ }
+ }
+ @TEMPORARY_METHODS = (); # clear for next time
}
@@ -1670,9 +1706,7 @@
}
sub hit_rule_plugin_code {
- my ($self, $rulename, $ruletype) = @_;
-
- return '' unless exists($self->{should_log_rule_hits}) || $self->{main}->have_plugin("hit_rule");
+ my ($self, $rulename, $ruletype, $loop_break_directive) = @_;
# note: keep this in 'single quotes' to avoid the $ & performance hit,
# unless specifically requested by the caller. Also split the
@@ -1685,7 +1719,6 @@
$debug_code = '
dbg("rules: ran '.$ruletype.' rule '.$rulename.' ======> got hit: \"" . '.
$match.' . "\"");
-
';
}
@@ -1696,15 +1729,14 @@
';
}
- my $plugin_code = '';
- if ($self->{main}->have_plugin("hit_rule")) {
- $plugin_code = '
- $self->{main}->call_plugins ("hit_rule", { permsgstatus => $self, rulename => \''.$rulename.'\', ruletype => \''.$ruletype.'\' });
- ';
+ # if we're not running "tflags multiple", break out of the matching
+ # loop this way
+ my $multiple_code = '';
+ if ($self->{conf}->{tflags}->{$rulename} !~ /\bmultiple\b/) {
+ $multiple_code = $loop_break_directive.';';
}
- return $debug_code.$save_hits_code.$plugin_code.'
- ';
+ return $debug_code.$save_hits_code.$multiple_code;
}
sub ran_rule_plugin_code {
@@ -1730,7 +1762,7 @@
my ($self, $priority) = @_;
local ($_);
- return if (exists $self->{shortcircuit_type});
+ return if $self->have_shortcircuited();
# note: we do this only once for all head pattern tests. Only
# eval tests need to use stuff in here.
@@ -1787,10 +1819,8 @@
my($self,$text) = @_;
'.$self->hash_line_for_rule($rulename).'
while ($text '.$testtype.'~ '.$pat.'g) {
- $self->got_hit (q#'.$rulename.'#, q{});
- '. $self->hit_rule_plugin_code($rulename, "header") . '
- # Ok, we hit, stop now.
- last unless $self->{conf}->{tflags}->{q{'.$rulename.'}} =~ /\bmultiple\b/;
+ $self->got_hit(q#'.$rulename.'#, "", ruletype => "header");
+ '. $self->hit_rule_plugin_code($rulename, "header", "last") . '
}
}';
@@ -1851,7 +1881,7 @@
my ($self, $priority, $textary) = @_;
local ($_);
- return if (exists $self->{shortcircuit_type});
+ return if $self->have_shortcircuited();
dbg("rules: running body-text per-line regexp tests; score so far=".$self->{score});
@@ -1895,10 +1925,8 @@
'.$self->hash_line_for_rule($rulename).'
pos = 0;
while ('.$pat.'g) {
- $self->got_pattern_hit(q{'.$rulename.'}, "BODY: ");
- '. $self->hit_rule_plugin_code($rulename, "body") . '
- # Ok, we hit, stop now.
- return unless $self->{conf}->{tflags}->{q{'.$rulename.'}} =~ /\bmultiple\b/;
+ $self->got_hit(q{'.$rulename.'}, "BODY: ", ruletype => "body");
+ '. $self->hit_rule_plugin_code($rulename, "body", "return") . '
}
}
}
@@ -2267,7 +2295,7 @@
my ($self, $priority, @uris) = @_;
local ($_);
- return if (exists $self->{shortcircuit_type});
+ return if $self->have_shortcircuited();
dbg("uri: running uri tests; score so far=".$self->{score});
@@ -2311,10 +2339,8 @@
'.$self->hash_line_for_rule($rulename).'
pos = 0;
while ('.$pat.'g) {
- $self->got_pattern_hit(q{'.$rulename.'}, "URI: ");
- '. $self->hit_rule_plugin_code($rulename, "uri") . '
- # Ok, we hit, stop now.
- return unless $self->{conf}->{tflags}->{q{'.$rulename.'}} =~ /\bmultiple\b/;
+ $self->got_hit(q{'.$rulename.'}, "URI: ", ruletype => "uri");
+ '. $self->hit_rule_plugin_code($rulename, "uri", "return") .'
}
}
}
@@ -2361,7 +2387,7 @@
my ($self, $priority, $textary) = @_;
local ($_);
- return if (exists $self->{shortcircuit_type});
+ return if $self->have_shortcircuited();
dbg("rules: running raw-body-text per-line regexp tests; score so far=".$self->{score});
@@ -2404,10 +2430,8 @@
'.$self->hash_line_for_rule($rulename).'
pos = 0;
while ('.$pat.'g) {
- $self->got_pattern_hit(q{'.$rulename.'}, "RAW: ");
- '. $self->hit_rule_plugin_code($rulename, "rawbody") . '
- # Ok, we hit, stop now.
- return unless $self->{conf}->{tflags}->{q{'.$rulename.'}} =~ /\bmultiple\b/;
+ $self->got_hit(q{'.$rulename.'}, "RAW: ", ruletype => "rawbody");
+ '. $self->hit_rule_plugin_code($rulename, "rawbody", "return") . '
}
}
}
@@ -2454,7 +2478,7 @@
my ($self, $priority, $fullmsgref) = @_;
local ($_);
- return if (exists $self->{shortcircuit_type});
+ return if $self->have_shortcircuited();
dbg("rules: running full-text regexp tests; score so far=".$self->{score});
@@ -2484,10 +2508,8 @@
'.$self->hash_line_for_rule($rulename).'
pos $$fullmsgref = 0;
while ($$fullmsgref =~ '.$pat.'g) {
- $self->got_pattern_hit(q{'.$rulename.'}, "FULL: ");
- '. $self->hit_rule_plugin_code($rulename, "full") . '
- # Ok, we hit, stop now.
- last unless $self->{conf}->{tflags}->{q{'.$rulename.'}} =~ /\bmultiple\b/;
+ $self->got_hit(q{'.$rulename.'}, "FULL: ", ruletype => "full");
+ '. $self->hit_rule_plugin_code($rulename, "full", "last") . '
}
'.$self->ran_rule_plugin_code($rulename, "full").'
}
@@ -2531,25 +2553,32 @@
sub do_head_eval_tests {
my ($self, $priority) = @_;
return unless (defined($self->{conf}->{head_evals}->{$priority}));
- $self->run_eval_tests ($self->{conf}->{head_evals}->{$priority}, '');
+ $self->run_eval_tests ($Mail::SpamAssassin::Conf::TYPE_HEAD_EVALS,
+ $self->{conf}->{head_evals}->{$priority}, '', $priority);
}
sub do_body_eval_tests {
my ($self, $priority, $bodystring) = @_;
return unless (defined($self->{conf}->{body_evals}->{$priority}));
- $self->run_eval_tests ($self->{conf}->{body_evals}->{$priority}, 'BODY: ', $bodystring);
+ $self->run_eval_tests ($Mail::SpamAssassin::Conf::TYPE_BODY_EVALS,
+ $self->{conf}->{body_evals}->{$priority}, 'BODY: ',
+ $priority, $bodystring);
}
sub do_rawbody_eval_tests {
my ($self, $priority, $bodystring) = @_;
return unless (defined($self->{conf}->{rawbody_evals}->{$priority}));
- $self->run_eval_tests ($self->{conf}->{rawbody_evals}->{$priority}, 'RAW: ', $bodystring);
+ $self->run_eval_tests ($Mail::SpamAssassin::Conf::TYPE_RAWBODY_EVALS,
+ $self->{conf}->{rawbody_evals}->{$priority}, 'RAW: ',
+ $priority, $bodystring);
}
sub do_full_eval_tests {
my ($self, $priority, $fullmsgref) = @_;
return unless (defined($self->{conf}->{full_evals}->{$priority}));
- $self->run_eval_tests ($self->{conf}->{full_evals}->{$priority}, '', $fullmsgref);
+ $self->run_eval_tests ($Mail::SpamAssassin::Conf::TYPE_FULL_EVALS,
+ $self->{conf}->{full_evals}->{$priority}, '',
+ $priority, $fullmsgref);
}
###########################################################################
@@ -2558,7 +2587,7 @@
my ($self, $priority) = @_;
local ($_);
- return if (exists $self->{shortcircuit_type});
+ return if $self->have_shortcircuited();
dbg("rules: running meta tests; score so far=" . $self->{score} );
my $conf = $self->{conf};
@@ -2615,6 +2644,13 @@
$meta{$rulename} .= "\$h->{'$token'} ";
$setup_rules{$token}=1;
+ if (!exists $conf->{scores}->{$token}) {
+ info("rules: meta test $rulename has undefined dependency '$token'");
+ }
+ elsif ($conf->{scores}->{$token} == 0) {
+ info("rules: meta test $rulename has dependency '$token' with a zero score");
+ }
+
# If the token is another meta rule, add it as a dependency
push (@{ $rule_deps{$rulename} }, $token)
if (exists $conf->{meta_tests}->{$priority}->{$token});
@@ -2654,8 +2690,10 @@
}
# Add this meta rule to the eval line
- $evalstr .= ' $r = '.$meta{$metas[$i]}.";\n";
- $evalstr .= ' if ($r) { $self->got_hit (q#'.$metas[$i].'#, "", $r); }'."\n";
+ $evalstr .= '
+ $r = '.$meta{$metas[$i]}.';
+ if ($r) { $self->got_hit(q#'.$metas[$i].'#, "", ruletype => "meta", value => $r); }
+ ';
splice @metas, $i--, 1; # remove this rule from our list
}
@@ -2740,88 +2778,172 @@
###########################################################################
sub run_eval_tests {
- my ($self, $evalhash, $prepend2desc, @extraevalargs) = @_;
+ my ($self, $testtype, $evalhash, $prepend2desc, $priority, @extraevalargs) = @_;
local ($_);
- return if (exists $self->{shortcircuit_type});
-
+ return if $self->have_shortcircuited();
+
+ my $doing_user_rules = $self->{conf}->{user_rules_to_compile}->{$testtype};
+
+ # clean up priority value so it can be used in a subroutine name
+ my $clean_priority;
+ ($clean_priority = $priority) =~ s/-/neg/;
+
+ my $scoreset = $self->{conf}->get_score_set();
+
+ my $methodname = '_eval_tests'.
+ '_type'.$testtype .
+ '_pri'.$clean_priority .
+ '_set'.$scoreset;
+
+ # Some of the rules are scoreset specific, so we need additional
+ # subroutines to handle those
+ if (defined &{'Mail::SpamAssassin::PerMsgStatus::'.$methodname}
+ && !$doing_user_rules)
+ {
+ no strict "refs";
+ &{'Mail::SpamAssassin::PerMsgStatus::'.$methodname}($self,@extraevalargs);
+ use strict "refs";
+ return;
+ }
+
# look these up once in advance to save repeated lookups in loop below
- my $debugenabled = would_log('dbg');
- my $scoresref = $self->{conf}->{scores};
my $tflagsref = $self->{conf}->{tflags};
my $have_start_rules = $self->{main}->have_plugin("start_rules");
my $have_ran_rule = $self->{main}->have_plugin("ran_rule");
- my $scoreset = $self->{conf}->get_score_set();
- while (my ($rulename, $test) = each %{$evalhash}) {
- last if (exists $self->{shortcircuit_type});
-
- # Score of 0, skip it.
- my $score = $scoresref->{$rulename};
- next unless $score;
-
- # If the rule is a net rule, and we're in a non-net scoreset, skip it.
- next if ((($scoreset & 1) == 0) &&
- $tflagsref->{$rulename} &&
- $tflagsref->{$rulename} =~ /\bnet\b/);
-
- # If the rule is a bayes rule, and we're in a non-bayes scoreset, skip it.
- next if ((($scoreset & 2) == 0) &&
- $tflagsref->{$rulename} &&
- $tflagsref->{$rulename} =~ /\bbayes\b/);
+ # the buffer for the evaluated code
+ my $evalstr = q{ };
+$evalstr .= q{ my $function; };
+
+ # conditionally include the dbg in the eval str
+ my $dbgstr = q{ };
+ if (would_log('dbg')) {
+ $dbgstr = q{
+ dbg("rules: ran eval rule $rulename ======> got hit ($result)");
+ };
+ }
- my $result;
- $self->{test_log_msgs} = (); # clear test state
+ while (my ($rulename, $test) = each %{$evalhash})
+ {
+ if ($tflagsref->{$rulename}) {
+ # If the rule is a net rule, and we are in a non-net scoreset, skip it.
+ if ($tflagsref->{$rulename} =~ /\bnet\b/) {
+ next if (($scoreset & 1) == 0);
+ }
+ # If the rule is a bayes rule, and we are in a non-bayes scoreset, skip it.
+ if ($tflagsref->{$rulename} =~ /\bbayes\b/) {
+ next if (($scoreset & 2) == 0);
+ }
+ }
my ($function, @args) = @{$test};
- unshift(@args, @extraevalargs);
- # check to make sure the function is defined
- if (!$self->can ($function)) {
- my $pluginobj = $self->{conf}->{eval_plugins}->{$function};
- if ($pluginobj) {
- # we have a plugin for this. eval its function
- $self->register_plugin_eval_glue ($pluginobj, $function);
- } else {
- dbg("rules: no method found for eval test $function");
- }
- }
+ $evalstr .= '
+ $rulename = q#'.$rulename.'#;
+ $self->{test_log_msgs} = ();
+ ';
- # let plugins get the name of the rule that's currently being
- # run
- $self->{current_rule_name} = $rulename;
+ # only need to set current_rule_name for plugin evals
+ if ($self->{conf}->{eval_plugins}->{$function}) {
+ # let plugins get the name of the rule that is currently being run,
+ # and ensure their eval functions exist
+ $evalstr .= '
+ $self->{current_rule_name} = $rulename;
+ $self->register_plugin_eval_glue(q#'.$function.'#);
+ ';
+ }
+ # this stuff is quite slow, and totally superfluous if
+ # no plugin is loaded for those hooks
if ($have_start_rules) {
- $self->{main}->call_plugins("start_rules", { permsgstatus => $self, ruletype => "eval" });
+ $evalstr .= '
+ $self->{main}->call_plugins("start_rules", {
+ permsgstatus => $self, ruletype => "eval"
+ });
+ ';
}
- eval {
- $result = $self->$function(@args);
- };
-
- if ($@) {
- warn "rules: failed to run $rulename test, skipping:\n" . "\t($@)\n";
- $self->{rule_errors}++;
- next;
+ my $argstr = '';
+ if (scalar @args > 0) {
+ $argstr = ',' . join (', ', map { "q#".$_."#" } @args);
}
+ $evalstr .= '
+ eval {
+ $result = $self->' . $function . ' (@extraevalargs '. $argstr .' );
+ };
+ if ($@) { $self->handle_eval_rule_errors($rulename); }
+ ';
+
if ($have_ran_rule) {
- $self->{main}->call_plugins("ran_rule", { permsgstatus => $self, ruletype => "eval", rulename => $rulename });
+ $evalstr .= '
+ $self->{main}->call_plugins("ran_rule", {
+ permsgstatus => $self, ruletype => "eval", rulename => $rulename
+ });
+ ';
}
- if ($result) {
- $self->got_hit ($rulename, $prepend2desc, $result);
- dbg("rules: ran eval rule $rulename ======> got hit ($result)") if $debugenabled;
- $self->{main}->call_plugins("hit_rule", { permsgstatus => $self, ruletype => "eval", rulename => $rulename });
+ $evalstr .= '
+ if ($result) {
+ $self->got_hit($rulename, $prepend2desc, ruletype => "eval", value => $result);
+ '.$dbgstr.'
+ }
+ ';
+ }
+
+ # nothing done in the loop, that means no rules
+ return unless ($evalstr);
+
+ $evalstr = <<"EOT";
+{
+ package Mail::SpamAssassin::PerMsgStatus;
+
+ sub ${methodname} {
+ my (\$self, \@extraevalargs) = \@_;
+
+ my \$prepend2desc = q#$prepend2desc#;
+ my \$rulename;
+ my \$result;
+
+ $evalstr
}
+
+ 1;
+}
+EOT
+
+ eval $evalstr;
+
+ push (@TEMPORARY_METHODS, $methodname);
+
+ if ($@) {
+ warn "rules: failed to compile eval tests, skipping some: $@\n";
+ $self->{rule_errors}++;
}
+ else {
+ no strict "refs";
+ &{'Mail::SpamAssassin::PerMsgStatus::'.$methodname}($self,@extraevalargs);
+ use strict "refs";
+ }
+}
+
+# use a separate sub here, for brevity
+sub handle_eval_rule_errors {
+ my ($self, $rulename) = @_;
+ warn "rules: failed to run $rulename test, skipping:\n\t($@)\n";
+ $self->{rule_errors}++;
}
sub register_plugin_eval_glue {
- my ($self, $pluginobj, $function) = @_;
+ my ($self, $function) = @_;
+
+ # return if it's not an eval_plugin function
+ return if (!exists $self->{conf}->{eval_plugins}->{$function});
- # stop reporting this -- it's too noisy!
- # dbg("plugin: registering glue method for $function ($pluginobj)");
+ # return if it's been registered already
+ return if ($self->can ($function) &&
+ defined &{'Mail::SpamAssassin::PerMsgStatus::'.$function});
my $evalstr = <<"ENDOFEVAL";
{
@@ -2842,13 +2964,15 @@
warn "rules: failed to run header tests, skipping some: $@\n";
$self->{rule_errors}++;
}
+
+ # ensure this method is deleted if finish_tests() is called
+ push (@TEMPORARY_METHODS, $function);
}
###########################################################################
sub run_rbl_eval_tests {
my ($self, $evalhash) = @_;
- my ($rulename, $pat, @args);
local ($_);
if ($self->{main}->{local_tests_only}) {
@@ -2879,10 +3003,12 @@
###########################################################################
-sub got_pattern_hit {
- my ($self, $rulename, $prefix) = @_;
-
- $self->got_hit ($rulename, $prefix);
+sub have_shortcircuited
+{
+ my ($self) = @_;
+ return 1 if $self->{main}->call_plugins ("have_shortcircuited", {
+ permsgstatus => $self
+ });
}
###########################################################################
@@ -2907,16 +3033,17 @@
$self->{test_log_msgs} = ();
}
+# internal API, called only by get_hit()
+# TODO: refactor and merge this into that function
sub _handle_hit {
- my ($self, $rule, $score, $area, $desc, $scrule) = @_;
+ my ($self, $rule, $score, $area, $ruletype, $desc) = @_;
- # if this was a shortcircuited rule hit, lets do some cleanup first
- if ($scrule) {
- undef $self->{test_names_hit}; # reset rule hits
- $self->{score} = 0; # reset score
- $self->{tag_data}->{REPORT} = ''; # reset tag data
- $self->{tag_data}->{SUMMARY} = ''; # reset tag data
- }
+ $self->{main}->call_plugins ("hit_rule", {
+ permsgstatus => $self,
+ rulename => $rule,
+ ruletype => $ruletype,
+ score => $score
+ });
# ignore meta-match sub-rules.
if ($rule =~ /^__/) { push(@{$self->{subtest_names_hit}}, $rule); return; }
@@ -2967,44 +3094,77 @@
$wrapped;
}
+###########################################################################
+
+=item $status->got_hit ($rulename, $desc_prepend [, name => value, ...])
+
+Register a hit against a rule in the ruleset.
+
+There are two mandatory arguments. These are C<$rulename>, the name of the rule
+that fired, and C<$desc_prepend>, which is a short string that will be
+prepended to the rules C<describe> string in output reports.
+
+In addition, callers can supplement that with the following optional
+data:
+
+=over 4
+
+=item score => $num
+
+Optional: the score to use for the rule hit. If unspecified,
+the value from the C<Mail::SpamAssassin::Conf> object's C<{scores}>
+hash will be used.
+
+=item value => $num
+
+Optional: the value to assign to the rule; the default value is C<1>.
+I<tflags multiple> rules use values of greater than 1 to indicate
+multiple hits. This value is accessible to meta rules.
+
+=item ruletype => $type
+
+Optional, but recommended: the rule type string. This is used in the
+C<hit_rule> plugin call, called by this method. If unset, I<'unknown'> is
+used.
+
+=back
+
+Backwards compatibility: the two mandatory arguments have been part of this API
+since SpamAssassin 2.x. The optional I<name=<gt>value> pairs, however, are a
+new addition in SpamAssassin 3.2.0.
+
+=cut
+
sub got_hit {
- my ($self, $rule, $area, $value) = @_;
- $value ||= 1;
+ my ($self, $rule, $area, %params) = @_;
+
+ return if $self->have_shortcircuited();
- return if (exists $self->{shortcircuit_type});
+ # ensure that rule values always result in an *increase* of
+ # $self->{tests_already_hit}->{$rule}:
+ my $value = $params{value}; if (!$value || $value <= 0) { $value = 1; }
+
+ # default ruletype, if not specified:
+ $params{ruletype} ||= 'unknown';
my $already_hit = $self->{tests_already_hit}->{$rule} || 0;
$self->{tests_already_hit}->{$rule} = $already_hit + $value;
- # only allow each test to be scored once per mail
+ # only allow each test to be scored once per mail, once we
+ # get into this method ('tflags multiple' rules must be dealt
+ # with in callers to this method)
return if ($already_hit);
- my $desc = $self->{conf}->{descriptions}->{$rule};
- $desc ||= $rule;
-
- my $score = $self->{conf}->{scores}->{$rule};
-
- my $sctype = $self->{conf}->{shortcircuit}->{$rule};
- if ($sctype) {
- $self->{shortcircuit_rule} = $rule;
- if ($sctype eq 'on') { # guess by rule score
- $self->{shortcircuit_type} = ($score < 0 ? 'ham' : 'spam');
- dbg("shortcircuit: s/c due to $rule, using score of $score");
- }
- else {
- $self->{shortcircuit_type} = $sctype;
- if ($sctype eq 'ham') {
- $score = $self->{conf}->{shortcircuit_ham_score};
- } else {
- $score = $self->{conf}->{shortcircuit_spam_score};
- }
- dbg("shortcircuit: s/c $sctype due to $rule, using score of $score");
- }
- }
-
- $self->_handle_hit($rule, $score, $area, $desc, $self->{shortcircuit_rule});
+ $self->_handle_hit($rule,
+ $params{score} || $self->{conf}->{scores}->{$rule},
+ $area,
+ $params{ruletype},
+ ($self->{conf}->{descriptions}->{$rule} || $rule));
}
+###########################################################################
+
+# TODO: this needs API doc
sub test_log {
my ($self, $msg) = @_;
while ($msg =~ s/^(.{30,48})\s//) {
Modified: spamassassin/branches/bug-3109-shortcircuiting/lib/Mail/SpamAssassin/Plugin.pm
URL: http://svn.apache.org/viewvc/spamassassin/branches/bug-3109-shortcircuiting/lib/Mail/SpamAssassin/Plugin.pm?rev=424421&r1=424420&r2=424421&view=diff
==============================================================================
--- spamassassin/branches/bug-3109-shortcircuiting/lib/Mail/SpamAssassin/Plugin.pm (original)
+++ spamassassin/branches/bug-3109-shortcircuiting/lib/Mail/SpamAssassin/Plugin.pm Fri Jul 21 12:06:09 2006
@@ -429,6 +429,10 @@
The name of the rule that fired.
+=item score
+
+The rule's score in the active scoreset.
+
=back
=item $plugin->ran_rule ( { options ... } )
Modified: spamassassin/branches/bug-3109-shortcircuiting/lib/Mail/SpamAssassin/Plugin/AWL.pm
URL: http://svn.apache.org/viewvc/spamassassin/branches/bug-3109-shortcircuiting/lib/Mail/SpamAssassin/Plugin/AWL.pm?rev=424421&r1=424420&r2=424421&view=diff
==============================================================================
--- spamassassin/branches/bug-3109-shortcircuiting/lib/Mail/SpamAssassin/Plugin/AWL.pm (original)
+++ spamassassin/branches/bug-3109-shortcircuiting/lib/Mail/SpamAssassin/Plugin/AWL.pm Fri Jul 21 12:06:09 2006
@@ -235,7 +235,7 @@
type => $Mail::SpamAssassin::Conf::CONF_TYPE_STRING
});
-=item auto_whitelist_file_mode (default: 0700)
+=item auto_whitelist_file_mode (default: 0600)
The file mode bits used for the automatic-whitelist directory or file.
@@ -248,7 +248,7 @@
push (@cmds, {
setting => 'auto_whitelist_file_mode',
is_admin => 1,
- default => '0700',
+ default => '0600',
type => $Mail::SpamAssassin::Conf::CONF_TYPE_NUMERIC
});
@@ -380,11 +380,7 @@
}
if ($delta != 0) {
- # We have to use the private _handle_hit method here because we want
- # to pass in a dynamically generated score. Perhaps we should extend
- # handle_hit or add a handle_dynamic_hit method to help here.
- $pms->_handle_hit("AWL", $delta, "AWL: ",
- $pms->{conf}->{descriptions}->{AWL});
+ $pms->got_hit("AWL", "AWL: ", ruletype => 'eval', score => $delta);
}
$whitelist->finish();
Modified: spamassassin/branches/bug-3109-shortcircuiting/lib/Mail/SpamAssassin/Util.pm
URL: http://svn.apache.org/viewvc/spamassassin/branches/bug-3109-shortcircuiting/lib/Mail/SpamAssassin/Util.pm?rev=424421&r1=424420&r2=424421&view=diff
==============================================================================
--- spamassassin/branches/bug-3109-shortcircuiting/lib/Mail/SpamAssassin/Util.pm (original)
+++ spamassassin/branches/bug-3109-shortcircuiting/lib/Mail/SpamAssassin/Util.pm Fri Jul 21 12:06:09 2006
@@ -421,14 +421,40 @@
# Time::Local (v1.10 at least) throws warnings when the dates cause
# a 32-bit overflow. So force a min/max for year.
if ($yyyy > 2037) {
- dbg("util: date after supported range, forcing year to 2037: $date");
+ dbg("util: year after supported range, forcing year to 2037: $date");
$yyyy = 2037;
}
elsif ($yyyy < 1970) {
- dbg("util: date before supported range, forcing year to 1970: $date");
+ dbg("util: year before supported range, forcing year to 1970: $date");
$yyyy = 1971;
}
+ # Fudge invalid times so that we get a usable date.
+ if ($ss > 59) {
+ dbg("util: second after supported range, forcing second to 59: $date");
+ $ss = 59;
+ }
+ elsif ($ss < 0) {
+ dbg("util: second before supported range, forcing second to 00: $date");
+ $ss = "00";
+ }
+ if ($mm > 59) {
+ dbg("util: minute after supported range, forcing minute to 59: $date");
+ $mm = 59;
+ }
+ elsif ($mm < 0) {
+ dbg("util: minute before supported range, forcing minute to 00: $date");
+ $mm = "00";
+ }
+ if ($hh > 23) {
+ dbg("util: hour after supported range, forcing hour to 23: $date");
+ $hh = 23;
+ }
+ elsif ($hh < 0) {
+ dbg("util: hour before supported range, forcing hour to 00: $date");
+ $hh = "00";
+ }
+
my $time;
eval { # could croak
$time = timegm($ss, $mm, $hh, $dd, $mmm-1, $yyyy);
@@ -1206,7 +1232,7 @@
# use a traditional open(FOO, "cmd |")
my $cmd = join(' ', @cmdline);
- if ($stdinfile) { $cmd .= " < '$stdinfile'"; }
+ if ($stdinfile) { $cmd .= qq/ < "$stdinfile"/; }
if ($duperr2out) { $cmd .= " 2>&1"; }
return open ($fh, $cmd.'|');
}
Modified: spamassassin/branches/bug-3109-shortcircuiting/lib/Mail/SpamAssassin/Util/DependencyInfo.pm
URL: http://svn.apache.org/viewvc/spamassassin/branches/bug-3109-shortcircuiting/lib/Mail/SpamAssassin/Util/DependencyInfo.pm?rev=424421&r1=424420&r2=424421&view=diff
==============================================================================
--- spamassassin/branches/bug-3109-shortcircuiting/lib/Mail/SpamAssassin/Util/DependencyInfo.pm (original)
+++ spamassassin/branches/bug-3109-shortcircuiting/lib/Mail/SpamAssassin/Util/DependencyInfo.pm Fri Jul 21 12:06:09 2006
@@ -156,13 +156,13 @@
},
{
module => 'Archive::Tar',
- version => '0.00',
+ version => '1.23',
desc => 'The "sa-update" script requires this module to access tar update
archive files.',
},
{
module => 'IO::Zlib',
- version => '0.00',
+ version => '1.04',
desc => 'The "sa-update" script requires this module to access compressed
update archive files.',
},
Modified: spamassassin/branches/bug-3109-shortcircuiting/procmailrc.example
URL: http://svn.apache.org/viewvc/spamassassin/branches/bug-3109-shortcircuiting/procmailrc.example?rev=424421&r1=424420&r2=424421&view=diff
==============================================================================
--- spamassassin/branches/bug-3109-shortcircuiting/procmailrc.example (original)
+++ spamassassin/branches/bug-3109-shortcircuiting/procmailrc.example Fri Jul 21 12:06:09 2006
@@ -11,8 +11,8 @@
# Pipe the mail through spamassassin (replace 'spamassassin' with 'spamc'
# if you use the spamc/spamd combination)
#
-# The condition line ensures that only messages smaller than 250 kB
-# (250 * 1024 = 256000 bytes) are processed by SpamAssassin. Most spam
+# The condition line ensures that only messages smaller than 500 kB
+# (500 * 1024 = 512000 bytes) are processed by SpamAssassin. Most spam
# isn't bigger than a few k and working with big messages can bring
# SpamAssassin to its knees.
#
@@ -20,7 +20,7 @@
# at 1 time, to keep the load down.
#
:0fw: spamassassin.lock
-* < 256000
+* < 512000
| spamassassin
# Mails with a score of 15 or higher are almost certainly spam (with 0.05%
Modified: spamassassin/branches/bug-3109-shortcircuiting/rules/10_default_prefs.cf
URL: http://svn.apache.org/viewvc/spamassassin/branches/bug-3109-shortcircuiting/rules/10_default_prefs.cf?rev=424421&r1=424420&r2=424421&view=diff
==============================================================================
--- spamassassin/branches/bug-3109-shortcircuiting/rules/10_default_prefs.cf (original)
+++ spamassassin/branches/bug-3109-shortcircuiting/rules/10_default_prefs.cf Fri Jul 21 12:06:09 2006
@@ -33,7 +33,7 @@
report
report Content preview: _PREVIEW_
report
-report Content analysis details: (_SCORE_ points, _REQD_ required, s/c _SCTYPE_)
+report Content analysis details: (_SCORE_ points, _REQD_ required)
report
report " pts rule name description"
report ---- ---------------------- --------------------------------------------------
@@ -92,7 +92,7 @@
# FROM_HAS_MIXED_NUMS3,HOME_EMPLOYMENT,INVALID_DATE,INVALID_MSGID
# LINES_OF_YELLING,MSGID_HAS_NO_AT,NO_REAL_NAME,ONCE_IN_LIFETIME
# UNDISC_RECIPS autolearn=spam version=2.60-cvs
-add_header all Status "_YESNO_, score=_SCORE_ required=_REQD_ tests=_TESTS_ shortcircuit=_SCTYPE_ autolearn=_AUTOLEARN_ version=_VERSION_"
+add_header all Status "_YESNO_, score=_SCORE_ required=_REQD_ tests=_TESTS_ autolearn=_AUTOLEARN_ version=_VERSION_"
###########################################################################
# Default prefs values: users can override these in their
Modified: spamassassin/branches/bug-3109-shortcircuiting/rules/60_shortcircuit.cf
URL: http://svn.apache.org/viewvc/spamassassin/branches/bug-3109-shortcircuiting/rules/60_shortcircuit.cf?rev=424421&r1=424420&r2=424421&view=diff
==============================================================================
--- spamassassin/branches/bug-3109-shortcircuiting/rules/60_shortcircuit.cf (original)
+++ spamassassin/branches/bug-3109-shortcircuiting/rules/60_shortcircuit.cf Fri Jul 21 12:06:09 2006
@@ -23,6 +23,8 @@
###########################################################################
# HIGH PRIORITY RULES
+ifplugin Mail::SpamAssassin::Plugin::Shortcircuit
+
priority USER_IN_WHITELIST -1000
priority USER_IN_DEF_WHITELIST -1000
priority USER_IN_ALL_SPAM_TO -1000
@@ -52,4 +54,6 @@
# shortcircuit BAYES_99 spam
# shortcircuit BAYES_00 ham
+
+endif # Mail::SpamAssassin::Plugin::Shortcircuit
Modified: spamassassin/branches/bug-3109-shortcircuiting/rules/active.list
URL: http://svn.apache.org/viewvc/spamassassin/branches/bug-3109-shortcircuiting/rules/active.list?rev=424421&r1=424420&r2=424421&view=diff
==============================================================================
--- spamassassin/branches/bug-3109-shortcircuiting/rules/active.list (original)
+++ spamassassin/branches/bug-3109-shortcircuiting/rules/active.list Fri Jul 21 12:06:09 2006
@@ -1,8 +1,5 @@
# active ruleset list, automatically generated from http://ruleqa.spamassassin.org/
-# with results from: net-bb-doc net-bb-zmi net-daf net-parkerm net-theo
-
-# good enough
-ADDR_NUMS_AT_BIGSITE
+# with results from: bb-doc bb-jm bb-zmi cthielen daf parkerm theo wtogami zmi
# good enough
ADVANCE_FEE_3
@@ -14,6 +11,9 @@
AWL
# good enough
+AXB_FAKETZ
+
+# good enough
BAD_ENC_HEADER
# good enough
@@ -74,9 +74,6 @@
DATE_SPAMWARE_Y2K
# good enough
-DAV_NON_HOTMAIL
-
-# good enough
DEAR_WINNER
# tflags net
@@ -119,6 +116,9 @@
DRUGS_DIET
# good enough
+DRUGS_ERECTILE
+
+# good enough
DRUGS_ERECTILE_OBFU
# good enough
@@ -131,18 +131,15 @@
DRUGS_SLEEP_EREC
# good enough
-DRUG_ED_GENERIC
+DRUG_DOSAGE
# good enough
-DRUG_ED_ONLINE
+DRUG_ED_GENERIC
# good enough
DRUG_ED_SILD
# good enough
-EARN_PER_WEEK
-
-# good enough
EMPTY_MESSAGE
# good enough
@@ -158,12 +155,6 @@
EXCUSE_24
# good enough
-FAKE_HELO_EMAIL_COM
-
-# good enough
-FAKE_HELO_EXCITE
-
-# good enough
FAKE_HELO_LYCOS
# good enough
@@ -221,10 +212,10 @@
FH_RCVD_WITHSMTPFOR
# good enough
-FORGED_AOL_TAGS
+FM_CLAIM_IPOD
# good enough
-FORGED_EUDORAMAIL_RCVD
+FORGED_AOL_TAGS
# good enough
FORGED_HOTMAIL_RCVD
@@ -302,28 +293,28 @@
FROM_BLANK_NAME
# good enough
-FROM_DOMAIN_NOVOWEL
-
-# good enough
FROM_ENDS_IN_NUMS
# good enough
FROM_HAS_MIXED_NUMS
# good enough
-FROM_HAS_ULINE_NUMS
-
-# good enough
FROM_ILLEGAL_CHARS
# good enough
FROM_LOCAL_NOVOWEL
# good enough
-FROM_NO_LOWER
+FROM_NO_USER
# good enough
-FROM_NO_USER
+FROM_OFFERS
+
+# good enough
+FR_WWW_DOMAIN_23SUBDIR
+
+# good enough
+FS_START_DOYOU2
# good enough
FUZZY_MERIDIA
@@ -332,11 +323,20 @@
FUZZY_SPRM
# good enough
+FUZZY_STOCK
+
+# good enough
FU_HOODIA
# good enough
GAPPY_SUBJECT
+# good enough
+GEO_QUERY_STRING
+
+# good enough
+GMD_FAKETZ
+
# tflags net
HABEAS_ACCREDITED_COI
@@ -371,6 +371,9 @@
HASHCASH_HIGH
# good enough
+HEADER_COUNT_CTYPE
+
+# good enough
HEADER_SPAM
# good enough
@@ -410,14 +413,17 @@
HS_GETMEOFF
# good enough
-HS_MEETUP_FOR_SEX
+HS_INDEX_PARAM
# good enough
-HS_PHARMA_1
+HS_MEETUP_FOR_SEX
# good enough
HS_SUBJ_ONLINE_PHARMACEUTICAL
+# good enough
+HS_SYNDICATE_P2
+
# tflags userconf
HTML_CHARSET_FARAWAY
@@ -437,9 +443,6 @@
HTTP_EXCESSIVE_ESCAPES
# good enough
-INFO_TLD
-
-# good enough
INTERRUPTUS
# good enough
@@ -488,6 +491,9 @@
KAM_STOCKTIP24
# good enough
+KAM_STOCKTIP3
+
+# good enough
KAM_STOCKTIP6
# good enough
@@ -509,19 +515,19 @@
MID_DEGREES
# good enough
+MID_MJW_STOX
+
+# good enough
MILLION_USD
# good enough
MIME_BAD_ISO_CHARSET
# good enough
-MIME_BASE64_BLANKS
+MIME_BASE64_TEXT
# good enough
-MIME_BASE64_NO_NAME
-
-# good enough
-MIME_BASE64_TEXT
+MIME_BOUND_ALLHEX_17
# good enough
MIME_BOUND_DD_DIGITS
@@ -545,13 +551,10 @@
MISSING_MIMEOLE
# good enough
-MISSING_MIME_HB_SEP
-
-# good enough
-MISSING_SUBJECT
+MORE_SEX
# good enough
-ML_MARKETING
+MPART_ALT_DIFF
# good enough
MSGID_DOLLARS_RANDOM
@@ -577,9 +580,6 @@
# good enough
MSGID_YAHOO_CAPS
-# good enough
-NOT_ADVISOR
-
# tflags net
NO_DNS_FOR_FROM
@@ -602,27 +602,18 @@
NUMERIC_HTTP_ADDR
# good enough
-OBSCURED_EMAIL
-
-# good enough
-ONE_TIME
+ONLINE_PHARMACY
# good enough
PERCENT_RANDOM
# good enough
-PLING_PLING
-
-# good enough
PLING_QUERY
# good enough
PORN_15
# good enough
-PORN_URL_SEX
-
-# good enough
PREST_NON_ACCREDITED
# good enough
@@ -794,9 +785,6 @@
REPLICA_WATCH
# good enough
-REPLY_TO_EMPTY
-
-# good enough
REPTO_OVERQUOTE_THEBAT
# good enough
@@ -848,12 +836,6 @@
SPF_SOFTFAIL
# good enough
-SPOOF_COM2COM
-
-# good enough
-SPOOF_OURI
-
-# good enough
STOCK_ALERT
# good enough
@@ -881,16 +863,10 @@
SUBJECT_NEEDS_ENCODING
# good enough
-SUBJECT_NOVOWEL
-
-# good enough
SUBJECT_SEXUAL
# good enough
-SUBJ_CONSONANTS
-
-# good enough
-SUBJ_HAS_SPACES
+SUBJ_AS_SEEN
# good enough
SUBJ_ILLEGAL_CHARS
@@ -899,13 +875,13 @@
SUBJ_RE_NUM
# good enough
-SUSPICIOUS_RECIPS
+SUBJ_YOUR_FAMILY
# good enough
-TO_CC_NONE
+SUB_HELLO
# good enough
-TO_EMPTY
+SUSPICIOUS_RECIPS
# good enough
TO_MALFORMED
@@ -929,6 +905,9 @@
TVD_BODY_END_STAR
# good enough
+TVD_DEAD_JOB
+
+# good enough
TVD_DEAR_HOMEOWNER
# good enough
@@ -1022,6 +1001,9 @@
TVD_RATWARE_CB_2
# good enough
+TVD_RATWARE_MSGID_01
+
+# good enough
TVD_RATWARE_MSGID_02
# good enough
@@ -1031,6 +1013,9 @@
TVD_SINGLE_SPAN_DIV
# good enough
+TVD_SPACED_SUBJECT_WORD3
+
+# good enough
TVD_SPACED_WORDS
# good enough
@@ -1055,10 +1040,13 @@
TVD_UA_FOSTERING
# good enough
-TVD_VIS_HIDDEN
+TVD_UNDER_VALUED
-# tflags net
-DNS_FROM_DOB
+# good enough
+TVD_VISIT_PHARMA
+
+# good enough
+TVD_VIS_HIDDEN
# good enough
FORGED_IMS_HTML
@@ -1073,49 +1061,19 @@
FORGED_OUTLOOK_TAGS
# good enough
-HS_SYNDICATE_P2
-
-# tflags net
-RCVD_IN_DOB
-
-# tflags net
-RCVD_IN_DSBL
-
-# tflags net
-RCVD_IN_NJABL_DUL
-
-# tflags net
-RCVD_IN_SORBS_DUL
-
-# tflags net
-RCVD_IN_WHOIS_INVALID
-
-# tflags net
-RCVD_IN_XBL
-
-# good enough
-TVD_FW_GRAPHIC_ID1
-
-# good enough
-TVD_PH_1
+HS_PHARMA_1
# good enough
-TVD_PH_BODY_ACCOUNTS_PRE
+RATWARE_MS_HASH
# good enough
-TVD_PH_FR5
+RATWARE_OUTLOOK_NONAME
# good enough
-TVD_RATWARE_MSGID_01
-
-# tflags net
-URIBL_RHS_DOB
+TVD_FW_GRAPHIC_NAME_LONG
# good enough
-ZMIde_REPLICA1
-
-# good enough
-ZMIde_REPLICASURE
+TVD_FW_GRAPHIC_NAME_MID
# good enough
UNCLAIMED_MONEY
@@ -1123,12 +1081,6 @@
# good enough
UNCLOSED_BRACKET
-# good enough
-UNDISC_RECIPS
-
-# good enough
-UNIQUE_WORDS
-
# tflags userconf
UNPARSEABLE_RELAY
@@ -1175,9 +1127,6 @@
URIBL_WS_SURBL
# good enough
-URI_4YOU
-
-# good enough
URI_L_PHP
# good enough
@@ -1244,19 +1193,19 @@
X_PRIORITY_CC
# good enough
-X_PRIORITY_HIGH
+ZMIde_EBAYJOBSURI
# good enough
-ZMIde_EBAYJOBSURI
+ZMIde_REPLICA1
# good enough
ZMIde_REPLICA2
# good enough
-ZMIde_SEXUALEXPL1
+ZMIde_REPLICASURE
-# tflags net
-__RCVD_IN_DOB
+# good enough
+ZMIde_SEXUALEXPL1
# tflags net
__RCVD_IN_IADB
Modified: spamassassin/branches/bug-3109-shortcircuiting/rules/v320.pre
URL: http://svn.apache.org/viewvc/spamassassin/branches/bug-3109-shortcircuiting/rules/v320.pre?rev=424421&r1=424420&r2=424421&view=diff
==============================================================================
--- spamassassin/branches/bug-3109-shortcircuiting/rules/v320.pre (original)
+++ spamassassin/branches/bug-3109-shortcircuiting/rules/v320.pre Fri Jul 21 12:06:09 2006
@@ -18,3 +18,8 @@
# URIDetail - test URIs using detailed URI information
#
loadplugin Mail::SpamAssassin::Plugin::URIDetail
+
+# Shortcircuit - stop evaluation early if high-accuracy rules fire
+#
+# loadplugin Mail::SpamAssassin::Plugin::Shortcircuit
+
Modified: spamassassin/branches/bug-3109-shortcircuiting/sa-learn.raw
URL: http://svn.apache.org/viewvc/spamassassin/branches/bug-3109-shortcircuiting/sa-learn.raw?rev=424421&r1=424420&r2=424421&view=diff
==============================================================================
--- spamassassin/branches/bug-3109-shortcircuiting/sa-learn.raw (original)
+++ spamassassin/branches/bug-3109-shortcircuiting/sa-learn.raw Fri Jul 21 12:06:09 2006
@@ -563,7 +563,7 @@
--mbx Input sources are in mbx format
--showdots Show progress using dots
--progress Show progress using progress bar
- --no-sync Skip syncronizing the database and journal
+ --no-sync Skip synchronizing the database and journal
after learning
-L, --local Operate locally, no network accesses
--import Migrate data from older version/non DB_File
@@ -665,7 +665,7 @@
or not. Note: This doesn't mean any tokens will actually expire.
Please see the EXPIRATION section below.
-Note: C<--force-expire> also causes the journal data to be syncronized
+Note: C<--force-expire> also causes the journal data to be synchronized
into the Bayes databases.
=item B<--forget>
@@ -754,7 +754,7 @@
=item B<--no-sync>
-Skip the slow syncronization step which normally takes place after
+Skip the slow synchronization step which normally takes place after
changing database entries. If you plan to learn from many folders in
a batch, or to learn many individual messages one-by-one, it is faster
to use this switch and run C<sa-learn --sync> once all the folders have
@@ -849,7 +849,7 @@
readable, even if statistics make me break out in hives.
The short semi-inaccurate version: Given training, a spam heuristics engine
-can take the most "spammy" and "hammy" words and apply probablistic
+can take the most "spammy" and "hammy" words and apply probabilistic
analysis. Furthermore, once given a basis for the analysis, the engine can
continue to learn iteratively by applying both the non-Bayesian and Bayesian
rulesets together to create evolving "intelligence".
@@ -885,7 +885,7 @@
it is fairly easy to understand why.
With Bayesian analysis, it's all probabilities - "because the past says
-it is likely as this falls into a probablistic distribution common to past
+it is likely as this falls into a probabilistic distribution common to past
spam in your systems". Tell that to your users! Tell that to the client
when he asks "what can I do to change this". (By the way, the answer in
this case is "use whitelisting".)
@@ -1070,12 +1070,12 @@
it uses in its calculations. To avoid the contention of having each
SpamAssassin process attempting to gain write access to the Bayes DB,
the token timestamps are written to a 'journal' file which will later
-(either automatically or via C<sa-learn --sync>) be used to syncronize
+(either automatically or via C<sa-learn --sync>) be used to synchronize
the Bayes DB.
Also, through the use of C<bayes_learn_to_journal>, or when using the
C<--no-sync> option with sa-learn, the actual learning data will take
-be placed into the journal for later syncronization. This is typically
+be placed into the journal for later synchronization. This is typically
useful for high-traffic sites to avoid the same contention as stated
above.
@@ -1213,7 +1213,7 @@
=over 4
=item C<bayes_auto_expire> is used to specify whether or not SpamAssassin
-ought to opportunistically attempt to expire the Bayes databaase.
+ought to opportunistically attempt to expire the Bayes database.
The default is 1 (yes).
=item C<bayes_expiry_max_db_size> specifies both the auto-expire token
Modified: spamassassin/branches/bug-3109-shortcircuiting/sa-update.raw
URL: http://svn.apache.org/viewvc/spamassassin/branches/bug-3109-shortcircuiting/sa-update.raw?rev=424421&r1=424420&r2=424421&view=diff
==============================================================================
--- spamassassin/branches/bug-3109-shortcircuiting/sa-update.raw (original)
+++ spamassassin/branches/bug-3109-shortcircuiting/sa-update.raw Fri Jul 21 12:06:09 2006
@@ -88,8 +88,8 @@
eval { use Net::DNS; };
eval { use LWP::UserAgent; };
eval { use HTTP::Date qw(time2str); };
-eval { use Archive::Tar; };
-eval { use IO::Zlib; };
+eval { use Archive::Tar 1.23; };
+eval { use IO::Zlib 1.04; };
# These should already be available
use Mail::SpamAssassin;
@@ -401,11 +401,7 @@
next;
}
- # ensure dirs exist, upfront
- unless (-d $UPDDir) {
- dbg("channel: creating $UPDDir");
- mkpath([$UPDDir], 0, 0777) or die "fatal: can't create $UPDDir: $!\n";
- }
+ # ensure tmp dir exists, upfront
unless (-d $UPDTmp) {
dbg("channel: creating $UPDTmp");
mkpath([$UPDTmp], 0, 0777) or die "fatal: can't create $UPDTmp: $!\n";
@@ -751,22 +747,62 @@
}
closedir(DIR);
if (!close(CF)) {
- warn "write to $CFFTmp failed! attempting to continue";
- channel_failed("write to $CFFTmp failed");
- next;
+ die "write to $CFFTmp failed! $!"; # write failed = fatal
}
+ # create a test file, in an attempt to mitigate dangers of incomplete
+ # upgrades. If we fail to move this file the same way we expect to with the
+ # "real" upgrade files, there's no point in continuing. (bug 4941)
+ my $testfile = "$UPDTmp/.rename_test.tmp";
+ my $testtofile = "$UPDDir/.rename_test.tmp";
+ open(TST, ">".$testfile) or die "write to $testfile failed! $!";
+ print TST time;
+ close TST or die "close of $testfile failed! $!";
+
dbg("channel: applying changes to $UPDDir...");
- # too late to stop now! At this stage, if there are errors,
- # we have to attempt to carry on regardless, since we've already
- # blown away the old ruleset.
-
- # clean out the "real" update dir, and copy from tmp areas
- if (!clean_update_dir($UPDDir)) {
- warn("channel: attempt to rm contents failed, attempting to continue anyway");
+ if (-d $UPDDir) {
+ if (!rename($testfile, $testtofile)) {
+ warn "rename $testfile $testtofile failed: $!";
+ unlink ($testfile, $testtofile);
+ die "rename test failed (existing dir), aborting upgrade"
+ }
+
+ unlink $testtofile;
+
+ # ok that worked, too late to stop now! At this stage, if there are
+ # errors, we have to attempt to carry on regardless, since we've already
+ # blown away the old ruleset.
+ dbg("channel: point of no return for existing $UPDDir");
+
+ # clean out the "real" update dir
+ if (!clean_update_dir($UPDDir)) {
+ warn("channel: attempt to rm contents failed, attempting to continue anyway");
+ }
+
+ } else {
+ # create the dir, if it doesn't exist
+ dbg("channel: creating $UPDDir");
+ if (!mkpath([$UPDDir], 0, 0777)) {
+ rmdir $UPDDir; # be sure it can't be used (bug 4941)
+ die "fatal: can't create $UPDDir: $!\n";
+ }
+
+ if (!rename($testfile, $testtofile)) {
+ warn "rename $testfile $testtofile failed: $!";
+ unlink ($testfile, $testtofile);
+ rmdir $UPDDir; # be sure it can't be used (bug 4941)
+ die "rename test failed (new dir), aborting upgrade"
+ }
+
+ unlink $testtofile;
+
+ # ok, that test worked. it's now likely that the .cf's will
+ # similarly be ok to rename, too. Too late to stop from here on
+ dbg("channel: point of no return for new $UPDDir");
}
+ # move in the files
foreach my $file (@files) {
rename("$UPDTmp/$file", "$UPDDir/$file")
or warn "rename $UPDTmp/$file $UPDDir/$file failed: $!";
@@ -1005,8 +1041,14 @@
my $gpghome = '';
if ($opt{'gpghomedir'}) {
$gpghome = $opt{'gpghomedir'};
- $gpghome =~ s/\'/\\\'/gs;
- $gpghome = "--homedir='$gpghome'";
+ if (Mail::SpamAssassin::Util::am_running_on_windows()) {
+ # windows is single-quote-phobic; bug 4958 cmt 7
+ $gpghome =~ s/\"/\\\"/gs;
+ $gpghome = "--homedir=\"$gpghome\"";
+ } else {
+ $gpghome =~ s/\'/\\\'/gs;
+ $gpghome = "--homedir='$gpghome'";
+ }
}
return $gpghome;
}
@@ -1061,6 +1103,7 @@
sub clean_update_dir {
my $dir = shift;
+
unless (opendir(DIR, $dir)) {
warn "error: can't readdir $dir: $!\n";
dbg("channel: attempt to readdir failed, channel failed");
@@ -1074,6 +1117,7 @@
$file = $1;
if (!unlink "$dir/$file") {
warn "error: can't remove file $dir/$file: $!\n";
+ closedir(DIR);
return 0;
}
}
Modified: spamassassin/branches/bug-3109-shortcircuiting/spamc/libspamc.c
URL: http://svn.apache.org/viewvc/spamassassin/branches/bug-3109-shortcircuiting/spamc/libspamc.c?rev=424421&r1=424420&r2=424421&view=diff
==============================================================================
--- spamassassin/branches/bug-3109-shortcircuiting/spamc/libspamc.c (original)
+++ spamassassin/branches/bug-3109-shortcircuiting/spamc/libspamc.c Fri Jul 21 12:06:09 2006
@@ -608,20 +608,20 @@
}
/* Find the end-of-DATA line */
- /* if bad format with no end ".\n" will truncate the last two characters of the buffer */
prev = '\n';
- for (i = j = 0; (i+2) < (unsigned int) m->msg_len; i++) { /* (i+2) prevents out of bound reference msg[i+2] */
+ for (i = j = 0; i < (unsigned int) m->msg_len; i++) {
if (prev == '\n' && m->msg[i] == '.') {
/* Dot at the beginning of a line */
- if ((m->msg[i + 1] == '\r' && m->msg[i + 2] == '\n')
- || m->msg[i + 1] == '\n') {
+ if (((i+1) == m->msg_len)
+ || ((i+1) < m->msg_len && m->msg[i + 1] == '\n')
+ || ((i+2) < m->msg_len && m->msg[i + 1] == '\r' && m->msg[i + 2] == '\n')) {
/* Lone dot! That's all, folks */
m->post = m->msg + i;
m->post_len = m->msg_len - i;
m->msg_len = j;
break;
}
- else if (m->msg[i + 1] == '.') {
+ else if ((i+1) < m->msg_len && m->msg[i + 1] == '.') {
/* Escaping dot, eliminate. */
prev = '.';
continue;
@@ -631,6 +631,9 @@
m->msg[j++] = m->msg[i];
}
+ /* if bad format with no end "\n.\n", error out */
+ if (m->post == NULL)
+ return EX_DATAERR;
m->type = MESSAGE_BSMTP;
m->out = m->msg;
m->out_len = m->msg_len;
Modified: spamassassin/branches/bug-3109-shortcircuiting/spamc/spamc.c
URL: http://svn.apache.org/viewvc/spamassassin/branches/bug-3109-shortcircuiting/spamc/spamc.c?rev=424421&r1=424420&r2=424421&view=diff
==============================================================================
--- spamassassin/branches/bug-3109-shortcircuiting/spamc/spamc.c (original)
+++ spamassassin/branches/bug-3109-shortcircuiting/spamc/spamc.c Fri Jul 21 12:06:09 2006
@@ -153,7 +153,7 @@
" Timeout in seconds for communications to\n"
" spamd. [default: 600]\n");
usg(" -s, --max-size size Specify maximum message size, in bytes.\n"
- " [default: 250k]\n");
+ " [default: 500k]\n");
usg(" -u, --username username\n"
" User for spamd to process this message under.\n"
" [default: current user]\n");
@@ -673,7 +673,7 @@
#endif
/* set some defaults */
- max_size = 250 * 1024;
+ max_size = 500 * 1024;
username = NULL;
combo_argc = 1;
@@ -879,6 +879,17 @@
}
else if (flags & (SPAMC_LEARN|SPAMC_PING) ) {
message_cleanup(&m);
+ }
+ else if (flags & SPAMC_SYMBOLS) {
+ /* bug 4991: -y should only output a blank line on connection failure */
+ full_write(out_fd, 1, "\n", 1);
+ message_cleanup(&m);
+ if (use_exit_code) {
+ ret = result;
+ }
+ else if (flags & SPAMC_SAFE_FALLBACK) {
+ ret = EX_OK;
+ }
}
else {
message_dump(STDIN_FILENO, out_fd, &m);
Modified: spamassassin/branches/bug-3109-shortcircuiting/spamc/spamc.pod
URL: http://svn.apache.org/viewvc/spamassassin/branches/bug-3109-shortcircuiting/spamc/spamc.pod?rev=424421&r1=424420&r2=424421&view=diff
==============================================================================
--- spamassassin/branches/bug-3109-shortcircuiting/spamc/spamc.pod (original)
+++ spamassassin/branches/bug-3109-shortcircuiting/spamc/spamc.pod Fri Jul 21 12:06:09 2006
@@ -148,11 +148,11 @@
=item B<-s> I<max_size>, B<--max-size>=I<max_size>
Set the maximum message size which will be sent to spamd -- any bigger than
-this threshold and the message will be returned unprocessed (default: 250 KB).
+this threshold and the message will be returned unprocessed (default: 500 KB).
If spamc gets handed a message bigger than this, it won't be passed to spamd.
The size is specified in bytes, as a positive integer greater than 0.
-For example, B<-s 250000>.
+For example, B<-s 500000>.
=item B<-S>, B<--ssl>
Modified: spamassassin/branches/bug-3109-shortcircuiting/spamd/spamd.raw
URL: http://svn.apache.org/viewvc/spamassassin/branches/bug-3109-shortcircuiting/spamd/spamd.raw?rev=424421&r1=424420&r2=424421&view=diff
==============================================================================
--- spamassassin/branches/bug-3109-shortcircuiting/spamd/spamd.raw (original)
+++ spamassassin/branches/bug-3109-shortcircuiting/spamd/spamd.raw Fri Jul 21 12:06:09 2006
@@ -1393,7 +1393,7 @@
push(@extra, "bayes=".sprintf("%06f", $status->{bayes_score}));
}
push(@extra, "autolearn=".$status->get_autolearn_status());
- push(@extra, "shortcircuit=".$status->get_tag("SCTYPE"));
+ push(@extra, $status->get_spamd_result_log_items());
my $yorn = $status->is_spam() ? 'Y' : '.';
my $score = $status->get_score();
Modified: spamassassin/branches/bug-3109-shortcircuiting/t/gtube.t
URL: http://svn.apache.org/viewvc/spamassassin/branches/bug-3109-shortcircuiting/t/gtube.t?rev=424421&r1=424420&r2=424421&view=diff
==============================================================================
--- spamassassin/branches/bug-3109-shortcircuiting/t/gtube.t (original)
+++ spamassassin/branches/bug-3109-shortcircuiting/t/gtube.t Fri Jul 21 12:06:09 2006
@@ -22,6 +22,8 @@
endif
");
+$ENV{'LC_ALL'} = 'C'; # a cheat, but we match the description
+
ok (sarun ("-L -t < data/spam/gtube.eml", \&patterns_run_cb));
ok_all_patterns();
Modified: spamassassin/branches/bug-3109-shortcircuiting/t/rcvd_parser.t
URL: http://svn.apache.org/viewvc/spamassassin/branches/bug-3109-shortcircuiting/t/rcvd_parser.t?rev=424421&r1=424420&r2=424421&view=diff
==============================================================================
--- spamassassin/branches/bug-3109-shortcircuiting/t/rcvd_parser.t (original)
+++ spamassassin/branches/bug-3109-shortcircuiting/t/rcvd_parser.t Fri Jul 21 12:06:09 2006
@@ -18,7 +18,7 @@
use lib '.'; use lib 't';
use SATest; sa_t_init("rcvd_parser");
-use Test; BEGIN { plan tests => 49 };
+use Test; BEGIN { plan tests => 50 };
use strict;
@@ -619,6 +619,20 @@
} => q{
[ ip=70.20.57.51 rdns= helo=KRYPTIK by=host.name ident= envfrom= id=M2006040415284308595 auth= ]
+
+},
+q{
+
+Received: from bar.example.org (bar.example.org [127.0.0.1])
+ (using TLSv1 with cipher DHE-RSA-AES256-SHA (256/256 bits))
+ (Client did not present a certificate)
+ (Authenticated sender: sender.example.net)
+ by foo.example.net (Postfix) with ESMTP id 44A8959ED6B0
+ for <re...@example.com>; Fri, 30 Jun 2006 08:02:00 +0100 (BST)
+
+} => q{
+
+[ ip=127.0.0.1 rdns=bar.example.org helo=bar.example.org by=foo.example.net ident= envfrom= id=44A8959ED6B0 auth=Postfix ]
},
);
Modified: spamassassin/branches/bug-3109-shortcircuiting/t/shortcircuit.t
URL: http://svn.apache.org/viewvc/spamassassin/branches/bug-3109-shortcircuiting/t/shortcircuit.t?rev=424421&r1=424420&r2=424421&view=diff
==============================================================================
--- spamassassin/branches/bug-3109-shortcircuiting/t/shortcircuit.t (original)
+++ spamassassin/branches/bug-3109-shortcircuiting/t/shortcircuit.t Fri Jul 21 12:06:09 2006
@@ -12,6 +12,10 @@
tstlocalrules ('
+ loadplugin Mail::SpamAssassin::Plugin::Shortcircuit
+
+ add_header all Status "_YESNO_, score=_SCORE_ required=_REQD_ tests=_TESTS_ shortcircuit=_SCTYPE_ autolearn=_AUTOLEARN_ version=_VERSION_"
+
# hits spam/001
body X_FOO /Congratulations/
header X_BAR From =~ /sb55/