You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spamassassin.apache.org by fe...@apache.org on 2006/07/24 00:51:12 UTC
svn commit: r424846 - in /spamassassin/trunk: ./ lib/Mail/
lib/Mail/SpamAssassin/ lib/Mail/SpamAssassin/Plugin/ rules/
Author: felicity
Date: Sun Jul 23 15:51:07 2006
New Revision: 424846
URL: http://svn.apache.org/viewvc?rev=424846&view=rev
Log:
get rid of EvalTests and move the code into plugins.
Added:
spamassassin/trunk/lib/Mail/SpamAssassin/Plugin/Bayes.pm
- copied unchanged from r357409, spamassassin/branches/tvd-evaltoplugin/lib/Mail/SpamAssassin/Plugin/Bayes.pm
spamassassin/trunk/lib/Mail/SpamAssassin/Plugin/BodyEval.pm
- copied, changed from r357409, spamassassin/branches/tvd-evaltoplugin/lib/Mail/SpamAssassin/Plugin/BodyEval.pm
spamassassin/trunk/lib/Mail/SpamAssassin/Plugin/DNSEval.pm
- copied, changed from r357409, spamassassin/branches/tvd-evaltoplugin/lib/Mail/SpamAssassin/Plugin/DNSEval.pm
spamassassin/trunk/lib/Mail/SpamAssassin/Plugin/HTMLEval.pm
- copied, changed from r357409, spamassassin/branches/tvd-evaltoplugin/lib/Mail/SpamAssassin/Plugin/HTMLEval.pm
spamassassin/trunk/lib/Mail/SpamAssassin/Plugin/HeaderEval.pm
- copied, changed from r357409, spamassassin/branches/tvd-evaltoplugin/lib/Mail/SpamAssassin/Plugin/HeaderEval.pm
spamassassin/trunk/lib/Mail/SpamAssassin/Plugin/MIMEEval.pm
- copied, changed from r357409, spamassassin/branches/tvd-evaltoplugin/lib/Mail/SpamAssassin/Plugin/MIMEEval.pm
spamassassin/trunk/lib/Mail/SpamAssassin/Plugin/RelayEval.pm
- copied unchanged from r357409, spamassassin/branches/tvd-evaltoplugin/lib/Mail/SpamAssassin/Plugin/RelayEval.pm
spamassassin/trunk/lib/Mail/SpamAssassin/Plugin/URIEval.pm
- copied, changed from r357409, spamassassin/branches/tvd-evaltoplugin/lib/Mail/SpamAssassin/Plugin/URIEval.pm
spamassassin/trunk/lib/Mail/SpamAssassin/Plugin/WLBLEval.pm
- copied, changed from r357409, spamassassin/branches/tvd-evaltoplugin/lib/Mail/SpamAssassin/Plugin/WLBLEval.pm
Removed:
spamassassin/trunk/lib/Mail/SpamAssassin/EvalTests.pm
spamassassin/trunk/rules/20_body_tests.cf
Modified:
spamassassin/trunk/MANIFEST
spamassassin/trunk/lib/Mail/SpamAssassin.pm
spamassassin/trunk/lib/Mail/SpamAssassin/Bayes.pm
spamassassin/trunk/lib/Mail/SpamAssassin/Conf.pm
spamassassin/trunk/lib/Mail/SpamAssassin/Constants.pm
spamassassin/trunk/lib/Mail/SpamAssassin/Dns.pm
spamassassin/trunk/lib/Mail/SpamAssassin/PerMsgStatus.pm
spamassassin/trunk/lib/Mail/SpamAssassin/Util.pm
spamassassin/trunk/rules/20_dnsbl_tests.cf
spamassassin/trunk/rules/20_head_tests.cf
spamassassin/trunk/rules/20_html_tests.cf
spamassassin/trunk/rules/20_net_tests.cf
spamassassin/trunk/rules/23_bayes.cf
spamassassin/trunk/rules/60_whitelist.cf
spamassassin/trunk/rules/v320.pre
Modified: spamassassin/trunk/MANIFEST
URL: http://svn.apache.org/viewvc/spamassassin/trunk/MANIFEST?rev=424846&r1=424845&r2=424846&view=diff
==============================================================================
--- spamassassin/trunk/MANIFEST (original)
+++ spamassassin/trunk/MANIFEST Sun Jul 23 15:51:07 2006
@@ -47,7 +47,6 @@
lib/Mail/SpamAssassin/DBBasedAddrList.pm
lib/Mail/SpamAssassin/Dns.pm
lib/Mail/SpamAssassin/DnsResolver.pm
-lib/Mail/SpamAssassin/EvalTests.pm
lib/Mail/SpamAssassin/HTML.pm
lib/Mail/SpamAssassin/Locales.pm
lib/Mail/SpamAssassin/Locker.pm
@@ -72,15 +71,22 @@
lib/Mail/SpamAssassin/Plugin/AccessDB.pm
lib/Mail/SpamAssassin/Plugin/AntiVirus.pm
lib/Mail/SpamAssassin/Plugin/AutoLearnThreshold.pm
+lib/Mail/SpamAssassin/Plugin/Bayes.pm
+lib/Mail/SpamAssassin/Plugin/BodyEval.pm
lib/Mail/SpamAssassin/Plugin/DCC.pm
lib/Mail/SpamAssassin/Plugin/DKIM.pm
+lib/Mail/SpamAssassin/Plugin/DNSEval.pm
lib/Mail/SpamAssassin/Plugin/DomainKeys.pm
+lib/Mail/SpamAssassin/Plugin/HTMLEval.pm
lib/Mail/SpamAssassin/Plugin/Hashcash.pm
+lib/Mail/SpamAssassin/Plugin/HeaderEval.pm
lib/Mail/SpamAssassin/Plugin/HTTPSMismatch.pm
+lib/Mail/SpamAssassin/Plugin/MIMEEval.pm
lib/Mail/SpamAssassin/Plugin/MIMEHeader.pm
lib/Mail/SpamAssassin/Plugin/Pyzor.pm
lib/Mail/SpamAssassin/Plugin/Razor2.pm
lib/Mail/SpamAssassin/Plugin/RelayCountry.pm
+lib/Mail/SpamAssassin/Plugin/RelayEval.pm
lib/Mail/SpamAssassin/Plugin/ReplaceTags.pm
lib/Mail/SpamAssassin/Plugin/SPF.pm
lib/Mail/SpamAssassin/Plugin/SpamCop.pm
@@ -88,6 +94,8 @@
lib/Mail/SpamAssassin/Plugin/TextCat.pm
lib/Mail/SpamAssassin/Plugin/URIDetail.pm
lib/Mail/SpamAssassin/Plugin/URIDNSBL.pm
+lib/Mail/SpamAssassin/Plugin/URIEval.pm
+lib/Mail/SpamAssassin/Plugin/WLBLEval.pm
lib/Mail/SpamAssassin/Plugin/WhiteListSubject.pm
lib/Mail/SpamAssassin/PluginHandler.pm
lib/Mail/SpamAssassin/Reporter.pm
@@ -427,7 +435,6 @@
tools/test_extract
build/mkrules
rules/10_default_prefs.cf
-rules/20_body_tests.cf
rules/20_dnsbl_tests.cf
rules/20_head_tests.cf
rules/20_html_tests.cf
Modified: spamassassin/trunk/lib/Mail/SpamAssassin.pm
URL: http://svn.apache.org/viewvc/spamassassin/trunk/lib/Mail/SpamAssassin.pm?rev=424846&r1=424845&r2=424846&view=diff
==============================================================================
--- spamassassin/trunk/lib/Mail/SpamAssassin.pm (original)
+++ spamassassin/trunk/lib/Mail/SpamAssassin.pm Sun Jul 23 15:51:07 2006
@@ -1171,6 +1171,7 @@
my $mail = $self->parse(\@testmsg, 1);
my $status = Mail::SpamAssassin::PerMsgStatus->new($self, $mail,
{ disable_auto_learning => 1 } );
+
# We want to turn off the bayes rules for this test msg
my $use_bayes_rules_value = $self->{conf}->{use_bayes_rules};
$self->{conf}->{use_bayes_rules} = 0;
Modified: spamassassin/trunk/lib/Mail/SpamAssassin/Bayes.pm
URL: http://svn.apache.org/viewvc/spamassassin/trunk/lib/Mail/SpamAssassin/Bayes.pm?rev=424846&r1=424845&r2=424846&view=diff
==============================================================================
--- spamassassin/trunk/lib/Mail/SpamAssassin/Bayes.pm (original)
+++ spamassassin/trunk/lib/Mail/SpamAssassin/Bayes.pm Sun Jul 23 15:51:07 2006
@@ -672,8 +672,10 @@
return 0 unless $self->{use_ignores};
- my $ignore = $PMS->check_from_in_list('bayes_ignore_from')
- || $PMS->check_to_in_list('bayes_ignore_to');
+ my $ig_from = $self->{main}->call_plugins ("check_wb_list", { permsgstatus => $PMS, type => 'from', list => 'bayes_ignore_from' });
+ my $ig_to = $self->{main}->call_plugins ("check_wb_list", { permsgstatus => $PMS, type => 'to', list => 'bayes_ignore_to' });
+
+ my $ignore = $ig_from || $ig_to;
dbg("bayes: not using bayes, bayes_ignore_from or _to rule") if $ignore;
Modified: spamassassin/trunk/lib/Mail/SpamAssassin/Conf.pm
URL: http://svn.apache.org/viewvc/spamassassin/trunk/lib/Mail/SpamAssassin/Conf.pm?rev=424846&r1=424845&r2=424846&view=diff
==============================================================================
--- spamassassin/trunk/lib/Mail/SpamAssassin/Conf.pm (original)
+++ spamassassin/trunk/lib/Mail/SpamAssassin/Conf.pm Sun Jul 23 15:51:07 2006
@@ -259,300 +259,6 @@
=back
-=head2 WHITELIST AND BLACKLIST OPTIONS
-
-=over 4
-
-=item whitelist_from add@ress.com
-
-Used to specify addresses which send mail that is often tagged (incorrectly) as
-spam. If you want to whitelist your own domain, be aware that spammers will
-often impersonate the domain of the recipient. The recommended solution is to
-instead use C<whitelist_from_rcvd> as explained below.
-
-Whitelist and blacklist addresses are now file-glob-style patterns, so
-C<fr...@somewhere.com>, C<*...@isp.com>, or C<*.domain.net> will all work.
-Specifically, C<*> and C<?> are allowed, but all other metacharacters are not.
-Regular expressions are not used for security reasons.
-
-Multiple addresses per line, separated by spaces, is OK. Multiple
-C<whitelist_from> lines is also OK.
-
-The headers checked for whitelist addresses are as follows: if C<Resent-From>
-is set, use that; otherwise check all addresses taken from the following
-set of headers:
-
- Envelope-Sender
- Resent-Sender
- X-Envelope-From
- From
-
-In addition, the "envelope sender" data, taken from the SMTP envelope
-data where this is available, is looked up.
-
-e.g.
-
- whitelist_from joe@example.com fred@example.com
- whitelist_from *@example.com
-
-=cut
-
- push (@cmds, {
- setting => 'whitelist_from',
- type => $CONF_TYPE_ADDRLIST
- });
-
-=item unwhitelist_from add@ress.com
-
-Used to override a default whitelist_from entry, so for example a distribution
-whitelist_from can be overridden in a local.cf file, or an individual user can
-override a whitelist_from entry in their own C<user_prefs> file.
-The specified email address has to match exactly the address previously
-used in a whitelist_from line.
-
-e.g.
-
- unwhitelist_from joe@example.com fred@example.com
- unwhitelist_from *@example.com
-
-=cut
-
- push (@cmds, {
- command => 'unwhitelist_from',
- setting => 'whitelist_from',
- code => \&Mail::SpamAssassin::Conf::Parser::remove_addrlist_value
- });
-
-=item whitelist_from_rcvd addr@lists.sourceforge.net sourceforge.net
-
-Use this to supplement the whitelist_from addresses with a check against the
-Received headers. The first parameter is the address to whitelist, and the
-second is a string to match the relay's rDNS.
-
-This string is matched against the reverse DNS lookup used during the handover
-from the internet to your internal network's mail exchangers. It can
-either be the full hostname, or the domain component of that hostname. In
-other words, if the host that connected to your MX had an IP address that
-mapped to 'sendinghost.spamassassin.org', you should specify
-C<sendinghost.spamassassin.org> or just C<spamassassin.org> here.
-
-Note that this requires that C<internal_networks> be correct. For simple
-cases, it will be, but for a complex network, or running with DNS checks
-off or with C<-L>, you may get better results by setting that parameter.
-
-e.g.
-
- whitelist_from_rcvd joe@example.com example.com
- whitelist_from_rcvd *@axkit.org sergeant.org
-
-=item def_whitelist_from_rcvd addr@lists.sourceforge.net sourceforge.net
-
-Same as C<whitelist_from_rcvd>, but used for the default whitelist entries
-in the SpamAssassin distribution. The whitelist score is lower, because
-these are often targets for spammer spoofing.
-
-=cut
-
- push (@cmds, {
- setting => 'whitelist_from_rcvd',
- code => sub {
- my ($self, $key, $value, $line) = @_;
- unless (defined $value && $value !~ /^$/) {
- return $MISSING_REQUIRED_VALUE;
- }
- unless ($value =~ /^\S+\s+\S+$/) {
- return $INVALID_VALUE;
- }
- $self->{parser}->add_to_addrlist_rcvd ('whitelist_from_rcvd',
- split(/\s+/, $value));
- }
- });
-
- push (@cmds, {
- setting => 'def_whitelist_from_rcvd',
- code => sub {
- my ($self, $key, $value, $line) = @_;
- unless (defined $value && $value !~ /^$/) {
- return $MISSING_REQUIRED_VALUE;
- }
- unless ($value =~ /^\S+\s+\S+$/) {
- return $INVALID_VALUE;
- }
- $self->{parser}->add_to_addrlist_rcvd ('def_whitelist_from_rcvd',
- split(/\s+/, $value));
- }
- });
-
-=item whitelist_allows_relays add@ress.com
-
-Specify addresses which are in C<whitelist_from_rcvd> that sometimes
-send through a mail relay other than the listed ones. By default mail
-with a From address that is in C<whitelist_from_rcvd> that does not match
-the relay will trigger a forgery rule. Including the address in
-C<whitelist_allows_relay> prevents that.
-
-Whitelist and blacklist addresses are now file-glob-style patterns, so
-C<fr...@somewhere.com>, C<*...@isp.com>, or C<*.domain.net> will all work.
-Specifically, C<*> and C<?> are allowed, but all other metacharacters are not.
-Regular expressions are not used for security reasons.
-
-Multiple addresses per line, separated by spaces, is OK. Multiple
-C<whitelist_allows_relays> lines is also OK.
-
-The specified email address does not have to match exactly the address
-previously used in a whitelist_from_rcvd line as it is compared to the
-address in the header.
-
-e.g.
-
- whitelist_allows_relays joe@example.com fred@example.com
- whitelist_allows_relays *@example.com
-
-=cut
-
- push (@cmds, {
- setting => 'whitelist_allows_relays',
- type => $CONF_TYPE_ADDRLIST
- });
-
-=item unwhitelist_from_rcvd add@ress.com
-
-Used to override a default whitelist_from_rcvd entry, so for example a
-distribution whitelist_from_rcvd can be overridden in a local.cf file,
-or an individual user can override a whitelist_from_rcvd entry in
-their own C<user_prefs> file.
-
-The specified email address has to match exactly the address previously
-used in a whitelist_from_rcvd line.
-
-e.g.
-
- unwhitelist_from_rcvd joe@example.com fred@example.com
- unwhitelist_from_rcvd *@axkit.org
-
-=cut
-
- push (@cmds, {
- setting => 'unwhitelist_from_rcvd',
- code => sub {
- my ($self, $key, $value, $line) = @_;
- unless (defined $value && $value !~ /^$/) {
- return $MISSING_REQUIRED_VALUE;
- }
- unless ($value =~ /^(?:\S+(?:\s+\S+)*)$/) {
- return $INVALID_VALUE;
- }
- $self->{parser}->remove_from_addrlist_rcvd('whitelist_from_rcvd',
- split (/\s+/, $value));
- $self->{parser}->remove_from_addrlist_rcvd('def_whitelist_from_rcvd',
- split (/\s+/, $value));
- }
- });
-
-=item blacklist_from add@ress.com
-
-Used to specify addresses which send mail that is often tagged (incorrectly) as
-non-spam, but which the user doesn't want. Same format as C<whitelist_from>.
-
-=cut
-
- push (@cmds, {
- setting => 'blacklist_from',
- type => $CONF_TYPE_ADDRLIST
- });
-
-=item unblacklist_from add@ress.com
-
-Used to override a default blacklist_from entry, so for example a
-distribution blacklist_from can be overridden in a local.cf file, or
-an individual user can override a blacklist_from entry in their own
-C<user_prefs> file. The specified email address has to match exactly
-the address previously used in a blacklist_from line.
-
-
-e.g.
-
- unblacklist_from joe@example.com fred@example.com
- unblacklist_from *@spammer.com
-
-=cut
-
-
- push (@cmds, {
- command => 'unblacklist_from',
- setting => 'blacklist_from',
- code => \&Mail::SpamAssassin::Conf::Parser::remove_addrlist_value
- });
-
-
-=item whitelist_to add@ress.com
-
-If the given address appears as a recipient in the message headers
-(Resent-To, To, Cc, obvious envelope recipient, etc.) the mail will
-be whitelisted. Useful if you're deploying SpamAssassin system-wide,
-and don't want some users to have their mail filtered. Same format
-as C<whitelist_from>.
-
-There are three levels of To-whitelisting, C<whitelist_to>, C<more_spam_to>
-and C<all_spam_to>. Users in the first level may still get some spammish
-mails blocked, but users in C<all_spam_to> should never get mail blocked.
-
-The headers checked for whitelist addresses are as follows: if C<Resent-To> or
-C<Resent-Cc> are set, use those; otherwise check all addresses taken from the
-following set of headers:
-
- To
- Cc
- Apparently-To
- Delivered-To
- Envelope-Recipients
- Apparently-Resent-To
- X-Envelope-To
- Envelope-To
- X-Delivered-To
- X-Original-To
- X-Rcpt-To
- X-Real-To
-
-=item more_spam_to add@ress.com
-
-See above.
-
-=item all_spam_to add@ress.com
-
-See above.
-
-=cut
-
- push (@cmds, {
- setting => 'whitelist_to',
- type => $CONF_TYPE_ADDRLIST
- });
- push (@cmds, {
- setting => 'more_spam_to',
- type => $CONF_TYPE_ADDRLIST
- });
- push (@cmds, {
- setting => 'all_spam_to',
- type => $CONF_TYPE_ADDRLIST
- });
-
-=item blacklist_to add@ress.com
-
-If the given address appears as a recipient in the message headers
-(Resent-To, To, Cc, obvious envelope recipient, etc.) the mail will
-be blacklisted. Same format as C<blacklist_from>.
-
-=cut
-
-
- push (@cmds, {
- setting => 'blacklist_to',
- type => $CONF_TYPE_ADDRLIST
- });
-
-=back
-
=head2 BASIC MESSAGE TAGGING OPTIONS
=over 4
Modified: spamassassin/trunk/lib/Mail/SpamAssassin/Constants.pm
URL: http://svn.apache.org/viewvc/spamassassin/trunk/lib/Mail/SpamAssassin/Constants.pm?rev=424846&r1=424845&r2=424846&view=diff
==============================================================================
--- spamassassin/trunk/lib/Mail/SpamAssassin/Constants.pm (original)
+++ spamassassin/trunk/lib/Mail/SpamAssassin/Constants.pm Sun Jul 23 15:51:07 2006
@@ -37,6 +37,7 @@
HARVEST_DNSBL_PRIORITY MBX_SEPARATOR
MAX_BODY_LINE_LENGTH MAX_HEADER_KEY_LENGTH MAX_HEADER_VALUE_LENGTH
MAX_HEADER_LENGTH ARITH_EXPRESSION_LEXER AI_TIME_UNKNOWN
+ CHARSETS_LIKELY_TO_FP_AS_CAPS
);
%EXPORT_TAGS = (
@@ -305,5 +306,10 @@
# second pass (when the message is actually read + processed) the received
# date is calculated. this value signifies "unknown" from the first pass.
use constant AI_TIME_UNKNOWN => 0;
+
+# Charsets which use capital letters heavily in their encoded representation.
+use constant CHARSETS_LIKELY_TO_FP_AS_CAPS => qr{[-_a-z0-9]*(?:
+ koi|jp|jis|euc|gb|big5|isoir|cp1251|georgianps|pt154|tis
+ )[-_a-z0-9]*}ix;
1;
Modified: spamassassin/trunk/lib/Mail/SpamAssassin/Dns.pm
URL: http://svn.apache.org/viewvc/spamassassin/trunk/lib/Mail/SpamAssassin/Dns.pm?rev=424846&r1=424845&r2=424846&view=diff
==============================================================================
--- spamassassin/trunk/lib/Mail/SpamAssassin/Dns.pm (original)
+++ spamassassin/trunk/lib/Mail/SpamAssassin/Dns.pm Sun Jul 23 15:51:07 2006
@@ -841,4 +841,12 @@
###########################################################################
+# interface called by SPF plugin
+sub check_for_from_dns {
+ my ($self, $pms) = @_;
+ if (defined $pms->{sender_host_fail}) {
+ return ($pms->{sender_host_fail} == 2); # both MX and A need to fail
+ }
+}
+
1;
Modified: spamassassin/trunk/lib/Mail/SpamAssassin/PerMsgStatus.pm
URL: http://svn.apache.org/viewvc/spamassassin/trunk/lib/Mail/SpamAssassin/PerMsgStatus.pm?rev=424846&r1=424845&r2=424846&view=diff
==============================================================================
--- spamassassin/trunk/lib/Mail/SpamAssassin/PerMsgStatus.pm (original)
+++ spamassassin/trunk/lib/Mail/SpamAssassin/PerMsgStatus.pm Sun Jul 23 15:51:07 2006
@@ -52,7 +52,6 @@
use warnings;
use Mail::SpamAssassin::Constants qw(:sa);
-use Mail::SpamAssassin::EvalTests;
use Mail::SpamAssassin::AsyncLoop;
use Mail::SpamAssassin::Conf;
use Mail::SpamAssassin::Util;
@@ -3265,6 +3264,105 @@
unlink $self->{fulltext_tmpfile};
$self->{fulltext_tmpfile} = undef;
}
+}
+
+###########################################################################
+
+sub all_from_addrs {
+ my ($self) = @_;
+
+ if (exists $self->{all_from_addrs}) { return @{$self->{all_from_addrs}}; }
+
+ my @addrs;
+
+ # Resent- headers take priority, if present. see bug 672
+ # http://www.hughes-family.org/bugzilla/show_bug.cgi?id=672
+ my $resent = $self->get('Resent-From');
+ if (defined $resent && $resent =~ /\S/) {
+ @addrs = $self->{main}->find_all_addrs_in_line ($resent);
+
+ }
+ else {
+ # bug 2292: Used to use find_all_addrs_in_line() with the same
+ # headers, but the would catch addresses in comments which caused
+ # FNs for things like whitelist_from. Since all of these are From
+ # headers, there should only be 1 address in each anyway, so use the
+ # :addr code...
+ # bug 3366: some addresses come in as 'foo@bar...', which is invalid.
+ # so deal with the multiple periods.
+ @addrs = grep { defined($_) && length($_) > 0 } map { tr/././s; $_; }
+ ($self->get('From:addr'), # std
+ $self->get('Envelope-Sender:addr'), # qmail: new-inject(1)
+ $self->get('Resent-Sender:addr'), # procmailrc manpage
+ $self->get('X-Envelope-From:addr'), # procmailrc manpage
+ $self->get('EnvelopeFrom:addr')); # SMTP envelope
+ # http://www.cs.tut.fi/~jkorpela/headers.html is useful here
+ }
+
+ # Remove duplicate addresses
+ my %addrs = map { $_ => 1 } @addrs;
+ @addrs = keys %addrs;
+
+ dbg("eval: all '*From' addrs: " . join(" ", @addrs));
+ $self->{all_from_addrs} = \@addrs;
+ return @addrs;
+}
+
+sub all_to_addrs {
+ my ($self) = @_;
+
+ if (exists $self->{all_to_addrs}) { return @{$self->{all_to_addrs}}; }
+
+ my @addrs;
+
+ # Resent- headers take priority, if present. see bug 672
+ # http://www.hughes-family.org/bugzilla/show_bug.cgi?id=672
+ my $resent = $self->get('Resent-To') . $self->get('Resent-Cc');
+ if (defined $resent && $resent =~ /\S/) {
+ @addrs = $self->{main}->find_all_addrs_in_line (
+ $self->get('Resent-To') . # std, rfc822
+ $self->get('Resent-Cc')); # std, rfc822
+
+ } else {
+ # OK, a fetchmail trick: try to find the recipient address from
+ # the most recent 3 Received lines. This is required for sendmail,
+ # since it does not add a helpful header like exim, qmail
+ # or Postfix do.
+ #
+ my $rcvd = $self->get('Received');
+ $rcvd =~ s/\n[ \t]+/ /gs;
+ $rcvd =~ s/\n+/\n/gs;
+
+ my @rcvdlines = split(/\n/, $rcvd, 4); pop @rcvdlines; # forget last one
+ my @rcvdaddrs = ();
+ foreach my $line (@rcvdlines) {
+ if ($line =~ / for (\S+\@\S+);/) { push (@rcvdaddrs, $1); }
+ }
+
+ @addrs = $self->{main}->find_all_addrs_in_line (
+ join(" ", @rcvdaddrs)."\n" .
+ $self->get('To') . # std
+ $self->get('Apparently-To') . # sendmail, from envelope
+ $self->get('Delivered-To') . # Postfix, poss qmail
+ $self->get('Envelope-Recipients') . # qmail: new-inject(1)
+ $self->get('Apparently-Resent-To') . # procmailrc manpage
+ $self->get('X-Envelope-To') . # procmailrc manpage
+ $self->get('Envelope-To') . # exim
+ $self->get('X-Delivered-To') . # procmail quick start
+ $self->get('X-Original-To') . # procmail quick start
+ $self->get('X-Rcpt-To') . # procmail quick start
+ $self->get('X-Real-To') . # procmail quick start
+ $self->get('Cc')); # std
+ # those are taken from various sources; thanks to Nancy McGough, who
+ # noted some in <http://www.ii.com/internet/robots/procmail/qs/#envelope>
+ }
+
+ dbg("eval: all '*To' addrs: " . join(" ", @addrs));
+ $self->{all_to_addrs} = \@addrs;
+ return @addrs;
+
+# http://www.cs.tut.fi/~jkorpela/headers.html is useful here, also
+# http://www.exim.org/pipermail/exim-users/Week-of-Mon-20001009/021672.html
}
###########################################################################
Copied: spamassassin/trunk/lib/Mail/SpamAssassin/Plugin/BodyEval.pm (from r357409, spamassassin/branches/tvd-evaltoplugin/lib/Mail/SpamAssassin/Plugin/BodyEval.pm)
URL: http://svn.apache.org/viewvc/spamassassin/trunk/lib/Mail/SpamAssassin/Plugin/BodyEval.pm?p2=spamassassin/trunk/lib/Mail/SpamAssassin/Plugin/BodyEval.pm&p1=spamassassin/branches/tvd-evaltoplugin/lib/Mail/SpamAssassin/Plugin/BodyEval.pm&r1=357409&r2=424846&rev=424846&view=diff
==============================================================================
--- spamassassin/branches/tvd-evaltoplugin/lib/Mail/SpamAssassin/Plugin/BodyEval.pm (original)
+++ spamassassin/trunk/lib/Mail/SpamAssassin/Plugin/BodyEval.pm Sun Jul 23 15:51:07 2006
@@ -42,6 +42,7 @@
$self->register_eval_rule("multipart_alternative_difference");
$self->register_eval_rule("multipart_alternative_difference_count");
$self->register_eval_rule("check_blank_line_ratio");
+ $self->register_eval_rule("tvd_vertical_words");
return $self;
}
@@ -187,7 +188,6 @@
return;
}
-
sub check_blank_line_ratio {
my ($self, $pms, $fulltext, $min, $max, $minlines) = @_;
@@ -213,6 +213,34 @@
return (($min == 0 && $pms->{blank_line_ratio}->{$minlines} <= $max) ||
($pms->{blank_line_ratio}->{$minlines} > $min &&
$pms->{blank_line_ratio}->{$minlines} <= $max));
+}
+
+sub tvd_vertical_words {
+ my ($self, $pms, $text, $min, $max) = @_;
+
+ # klugy
+ $max = 101 if ($max >= 100);
+
+ if (!defined $pms->{tvd_vertical_words}) {
+ $pms->{tvd_vertical_words} = 0;
+
+ foreach (@{$text}) {
+ my $l = length $_;
+ next unless ($l > 5);
+ my $spaces = tr/ / /;
+ my $nonspaces = $l - $spaces;
+ my $pct;
+ if ($spaces > $nonspaces || $nonspaces == 0) {
+ $pct = 100;
+ }
+ else {
+ $pct = int(100*$spaces/$nonspaces);
+ }
+ $pms->{tvd_vertical_words} = $pct if ($pct > $pms->{tvd_vertical_words});
+ }
+ }
+
+ return 1 if ($pms->{tvd_vertical_words} >= $min && $pms->{tvd_vertical_words} < $max);
}
1;
Copied: spamassassin/trunk/lib/Mail/SpamAssassin/Plugin/DNSEval.pm (from r357409, spamassassin/branches/tvd-evaltoplugin/lib/Mail/SpamAssassin/Plugin/DNSEval.pm)
URL: http://svn.apache.org/viewvc/spamassassin/trunk/lib/Mail/SpamAssassin/Plugin/DNSEval.pm?p2=spamassassin/trunk/lib/Mail/SpamAssassin/Plugin/DNSEval.pm&p1=spamassassin/branches/tvd-evaltoplugin/lib/Mail/SpamAssassin/Plugin/DNSEval.pm&r1=357409&r2=424846&rev=424846&view=diff
==============================================================================
--- spamassassin/branches/tvd-evaltoplugin/lib/Mail/SpamAssassin/Plugin/DNSEval.pm (original)
+++ spamassassin/trunk/lib/Mail/SpamAssassin/Plugin/DNSEval.pm Sun Jul 23 15:51:07 2006
@@ -37,19 +37,36 @@
my $self = $class->SUPER::new($mailsaobject);
bless ($self, $class);
- # the important bit!
- $self->register_eval_rule("check_rbl_accreditor");
- $self->register_eval_rule("check_rbl");
- $self->register_eval_rule("check_rbl_txt");
- $self->register_eval_rule("check_rbl_sub");
- $self->register_eval_rule("check_rbl_results_for");
- $self->register_eval_rule("check_rbl_from_host");
- $self->register_eval_rule("check_rbl_envfrom");
- $self->register_eval_rule("check_dns_sender");
+ # this is done this way so that the same list can be used here and in
+ # check_start()
+ $self->{'evalrules'} = [
+ 'check_rbl_accreditor',
+ 'check_rbl',
+ 'check_rbl_txt',
+ 'check_rbl_sub',
+ 'check_rbl_results_for',
+ 'check_rbl_from_host',
+ 'check_rbl_envfrom',
+ 'check_dns_sender',
+ ];
+
+ foreach(@{$self->{'evalrules'}}) {
+ $self->register_eval_rule($_);
+ }
return $self;
}
+# this is necessary because PMS::run_rbl_eval_tests() calls these functions
+# directly as part of PMS
+sub check_start {
+ my ($self, $opts) = @_;
+
+ foreach(@{$self->{'evalrules'}}) {
+ $opts->{'permsgstatus'}->register_plugin_eval_glue($_);
+ }
+}
+
sub ip_list_uniq_and_strip_private {
my ($self, @origips) = @_;
my @ips = ();
@@ -72,10 +89,10 @@
my ($self, $pms, $rule, $set, $rbl_server, $subtest, $accreditor) = @_;
if (!defined $pms->{accreditor_tag}) {
- $pms->message_accreditor_tag($pms);
+ $self->message_accreditor_tag($pms);
}
if ($pms->{accreditor_tag}->{$accreditor}) {
- $pms->check_rbl_backend($pms, $rule, $set, $rbl_server, 'A', $subtest);
+ $self->check_rbl_backend($pms, $rule, $set, $rbl_server, 'A', $subtest);
}
return 0;
}
@@ -274,13 +291,13 @@
# this only checks the address host name and not the domain name because
# using the domain name had much worse results for dsn.rfc-ignorant.org
sub check_rbl_from_host {
- _check_rbl_addresses(@_, $_[0]->all_from_addrs());
+ _check_rbl_addresses(@_, $_[1]->all_from_addrs());
}
# this only checks the address host name and not the domain name because
# using the domain name had much worse results for dsn.rfc-ignorant.org
sub check_rbl_envfrom {
- _check_rbl_addresses(@_, $_[0]->get('EnvelopeFrom:addr'));
+ _check_rbl_addresses(@_, $_[1]->get('EnvelopeFrom:addr'));
}
sub _check_rbl_addresses {
Copied: spamassassin/trunk/lib/Mail/SpamAssassin/Plugin/HTMLEval.pm (from r357409, spamassassin/branches/tvd-evaltoplugin/lib/Mail/SpamAssassin/Plugin/HTMLEval.pm)
URL: http://svn.apache.org/viewvc/spamassassin/trunk/lib/Mail/SpamAssassin/Plugin/HTMLEval.pm?p2=spamassassin/trunk/lib/Mail/SpamAssassin/Plugin/HTMLEval.pm&p1=spamassassin/branches/tvd-evaltoplugin/lib/Mail/SpamAssassin/Plugin/HTMLEval.pm&r1=357409&r2=424846&rev=424846&view=diff
==============================================================================
--- spamassassin/branches/tvd-evaltoplugin/lib/Mail/SpamAssassin/Plugin/HTMLEval.pm (original)
+++ spamassassin/trunk/lib/Mail/SpamAssassin/Plugin/HTMLEval.pm Sun Jul 23 15:51:07 2006
@@ -46,6 +46,8 @@
$self->register_eval_rule("html_title_subject_ratio");
$self->register_eval_rule("html_text_not_match");
$self->register_eval_rule("html_range");
+ $self->register_eval_rule("check_iframe_src");
+ $self->register_eval_rule("check_html_uri_only");
return $self;
}
@@ -183,6 +185,34 @@
# if we get here everything should be a number
return ($test > $min && $test <= $max);
}
+}
+
+sub check_iframe_src {
+ my ($self, $pms) = @_;
+
+ foreach my $v ( values %{$pms->{html}->{uri_detail}} ) {
+ return 1 if $v->{types}->{iframe};
+ }
+
+ return 0;
+}
+
+sub check_html_uri_only {
+ my ($self, $pms) = @_;
+
+ # Find out if there are any multipart/alternative parts in the message
+ my @ma = $pms->{msg}->find_parts(qr@^multipart/alternative\b@i);
+
+ # If there are no multipart/alternative sections, skip this test.
+ return if (!@ma);
+
+ # At this point, we're not actually checking the alternates, just the entire
+ # message.
+ foreach my $v ( values %{$pms->{html}->{uri_detail}} ) {
+ return 0 if (exists $v->{types}->{parsed});
+ }
+
+ return 1;
}
1;
Copied: spamassassin/trunk/lib/Mail/SpamAssassin/Plugin/HeaderEval.pm (from r357409, spamassassin/branches/tvd-evaltoplugin/lib/Mail/SpamAssassin/Plugin/HeaderEval.pm)
URL: http://svn.apache.org/viewvc/spamassassin/trunk/lib/Mail/SpamAssassin/Plugin/HeaderEval.pm?p2=spamassassin/trunk/lib/Mail/SpamAssassin/Plugin/HeaderEval.pm&p1=spamassassin/branches/tvd-evaltoplugin/lib/Mail/SpamAssassin/Plugin/HeaderEval.pm&r1=357409&r2=424846&rev=424846&view=diff
==============================================================================
--- spamassassin/branches/tvd-evaltoplugin/lib/Mail/SpamAssassin/Plugin/HeaderEval.pm (original)
+++ spamassassin/trunk/lib/Mail/SpamAssassin/Plugin/HeaderEval.pm Sun Jul 23 15:51:07 2006
@@ -995,7 +995,16 @@
elsif ($test eq "user") {
my $to = $full_to;
$to =~ s/\@.*//;
- return $subject =~ /^\s*\Q$to\E,\s/i; # "user,\s" case insensitive
+ my $subj = $subject;
+ $subj =~ s/^\s+//;
+ $subj =~ s/\s+$//;
+
+ return $subject =~ /^(?:
+ (?:re|fw):\s*(?:\w+\s+)?\Q$to\E$
+ |(?-i:\Q$to\E)\s*[,:;!?-](?:$|\s)
+ |\Q$to\E$
+ |,\s*\Q$to\E[,:;!?-]$
+ )/ix;
}
return 0;
}
Copied: spamassassin/trunk/lib/Mail/SpamAssassin/Plugin/MIMEEval.pm (from r357409, spamassassin/branches/tvd-evaltoplugin/lib/Mail/SpamAssassin/Plugin/MIMEEval.pm)
URL: http://svn.apache.org/viewvc/spamassassin/trunk/lib/Mail/SpamAssassin/Plugin/MIMEEval.pm?p2=spamassassin/trunk/lib/Mail/SpamAssassin/Plugin/MIMEEval.pm&p1=spamassassin/branches/tvd-evaltoplugin/lib/Mail/SpamAssassin/Plugin/MIMEEval.pm&r1=357409&r2=424846&rev=424846&view=diff
==============================================================================
--- spamassassin/branches/tvd-evaltoplugin/lib/Mail/SpamAssassin/Plugin/MIMEEval.pm (original)
+++ spamassassin/trunk/lib/Mail/SpamAssassin/Plugin/MIMEEval.pm Sun Jul 23 15:51:07 2006
@@ -43,6 +43,7 @@
$self->register_eval_rule("check_msg_parse_flags");
$self->register_eval_rule("check_for_faraway_charset");
$self->register_eval_rule("check_for_uppercase");
+ $self->register_eval_rule("check_ma_non_text");
return $self;
}
@@ -86,7 +87,6 @@
0;
}
-
sub check_for_mime {
my ($self, $pms, undef, $test) = @_;
@@ -449,6 +449,23 @@
if ($type =~ /charset='([^']+)'/i) { return $1; }
if ($type =~ /charset=(\S+)/i) { return $1; }
return undef;
+}
+
+# came up on the users@ list, look for multipart/alternative parts which
+# include non-text parts -- skip certain types which occur normally in ham
+sub check_ma_non_text {
+ my($self, $pms) = @_;
+
+ foreach my $map ($pms->{msg}->find_parts(qr@^multipart/alternative$@i)) {
+ foreach my $p ($map->find_parts(qr/./, 1, 0)) {
+ next if (lc $p->{'type'} eq 'multipart/related');
+ next if (lc $p->{'type'} eq 'application/rtf');
+ next if ($p->{'type'} =~ m@^text/@i);
+ return 1;
+ }
+ }
+
+ return 0;
}
1;
Copied: spamassassin/trunk/lib/Mail/SpamAssassin/Plugin/URIEval.pm (from r357409, spamassassin/branches/tvd-evaltoplugin/lib/Mail/SpamAssassin/Plugin/URIEval.pm)
URL: http://svn.apache.org/viewvc/spamassassin/trunk/lib/Mail/SpamAssassin/Plugin/URIEval.pm?p2=spamassassin/trunk/lib/Mail/SpamAssassin/Plugin/URIEval.pm&p1=spamassassin/branches/tvd-evaltoplugin/lib/Mail/SpamAssassin/Plugin/URIEval.pm&r1=357409&r2=424846&rev=424846&view=diff
==============================================================================
--- spamassassin/branches/tvd-evaltoplugin/lib/Mail/SpamAssassin/Plugin/URIEval.pm (original)
+++ spamassassin/trunk/lib/Mail/SpamAssassin/Plugin/URIEval.pm Sun Jul 23 15:51:07 2006
@@ -82,7 +82,10 @@
next if ($k !~ m%^https?:/*(?:[^\@/]+\@)?\d+\.\d+\.\d+\.\d+%i);
foreach (@{$v->{anchor_text}}) {
next if (m%^https:/*(?:[^\@/]+\@)?\d+\.\d+\.\d+\.\d+%i);
- return 1 if (m%https:%i);
+ if (m%https:%i) {
+ keys %{$self->{html}->{uri_detail}}; # resets iterator, bug 4829
+ return 1;
+ }
}
}
Copied: spamassassin/trunk/lib/Mail/SpamAssassin/Plugin/WLBLEval.pm (from r357409, spamassassin/branches/tvd-evaltoplugin/lib/Mail/SpamAssassin/Plugin/WLBLEval.pm)
URL: http://svn.apache.org/viewvc/spamassassin/trunk/lib/Mail/SpamAssassin/Plugin/WLBLEval.pm?p2=spamassassin/trunk/lib/Mail/SpamAssassin/Plugin/WLBLEval.pm&p1=spamassassin/branches/tvd-evaltoplugin/lib/Mail/SpamAssassin/Plugin/WLBLEval.pm&r1=357409&r2=424846&rev=424846&view=diff
==============================================================================
--- spamassassin/branches/tvd-evaltoplugin/lib/Mail/SpamAssassin/Plugin/WLBLEval.pm (original)
+++ spamassassin/trunk/lib/Mail/SpamAssassin/Plugin/WLBLEval.pm Sun Jul 23 15:51:07 2006
@@ -355,7 +355,7 @@
sub check_from_in_blacklist {
my ($self, $pms) = @_;
local ($_);
- foreach $_ ($self->all_from_addrs($pms)) {
+ foreach $_ ($pms->all_from_addrs()) {
if ($self->_check_whitelist ($self->{main}->{conf}->{blacklist_from}, $_)) {
return 1;
}
@@ -365,7 +365,7 @@
sub check_to_in_blacklist {
my ($self, $pms) = @_;
local ($_);
- foreach $_ ($self->all_to_addrs($pms)) {
+ foreach $_ ($pms->all_to_addrs()) {
if ($self->_check_whitelist ($self->{main}->{conf}->{blacklist_to}, $_)) {
return 1;
}
@@ -375,7 +375,7 @@
sub check_to_in_whitelist {
my ($self, $pms) = @_;
local ($_);
- foreach $_ ($self->all_to_addrs($pms)) {
+ foreach $_ ($pms->all_to_addrs()) {
if ($self->_check_whitelist ($self->{main}->{conf}->{whitelist_to}, $_)) {
return 1;
}
@@ -385,7 +385,7 @@
sub check_to_in_more_spam {
my ($self, $pms) = @_;
local ($_);
- foreach $_ ($self->all_to_addrs($pms)) {
+ foreach $_ ($pms->all_to_addrs()) {
if ($self->_check_whitelist ($self->{main}->{conf}->{more_spam_to}, $_)) {
return 1;
}
@@ -395,7 +395,7 @@
sub check_to_in_all_spam {
my ($self, $pms) = @_;
local ($_);
- foreach $_ ($self->all_to_addrs($pms)) {
+ foreach $_ ($pms->all_to_addrs()) {
if ($self->_check_whitelist ($self->{main}->{conf}->{all_spam_to}, $_)) {
return 1;
}
@@ -410,7 +410,7 @@
return;
}
- foreach my $addr ($self->all_from_addrs($pms)) {
+ foreach my $addr ($pms->all_from_addrs()) {
if ($self->_check_whitelist ($list_ref, $addr)) {
return 1;
}
@@ -444,7 +444,7 @@
return;
}
- foreach my $addr ($self->all_to_addrs($pms)) {
+ foreach my $addr ($pms->all_to_addrs()) {
if ($self->_check_whitelist ($list_ref, $addr)) {
return 1;
}
@@ -487,7 +487,7 @@
my ($self, $pms) = @_;
my $found_match = 0;
local ($_);
- foreach $_ ($self->all_from_addrs($pms)) {
+ foreach $_ ($pms->all_from_addrs()) {
if ($self->_check_whitelist ($self->{main}->{conf}->{whitelist_from}, $_)) {
$pms->{from_in_whitelist} = 1;
return;
@@ -512,7 +512,7 @@
my ($self, $pms) = @_;
my $found_match = 0;
local ($_);
- foreach $_ ($self->all_from_addrs($pms)) {
+ foreach $_ ($pms->all_from_addrs()) {
my $wh = $self->_check_whitelist_rcvd ($pms, $self->{main}->{conf}->{def_whitelist_from_rcvd}, $_);
if ($wh == 1) {
$pms->{from_in_default_whitelist} = 1;
@@ -595,103 +595,6 @@
}
return 0;
-}
-
-sub all_from_addrs {
- my ($self, $pms) = @_;
-
- if (exists $pms->{all_from_addrs}) { return @{$pms->{all_from_addrs}}; }
-
- my @addrs;
-
- # Resent- headers take priority, if present. see bug 672
- # http://www.hughes-family.org/bugzilla/show_bug.cgi?id=672
- my $resent = $pms->get('Resent-From');
- if (defined $resent && $resent =~ /\S/) {
- @addrs = $self->{main}->find_all_addrs_in_line ($resent);
-
- }
- else {
- # bug 2292: Used to use find_all_addrs_in_line() with the same
- # headers, but the would catch addresses in comments which caused
- # FNs for things like whitelist_from. Since all of these are From
- # headers, there should only be 1 address in each anyway, so use the
- # :addr code...
- # bug 3366: some addresses come in as 'foo@bar...', which is invalid.
- # so deal with the multiple periods.
- @addrs = grep { defined($_) && length($_) > 0 } map { tr/././s; $_; }
- ($pms->get('From:addr'), # std
- $pms->get('Envelope-Sender:addr'), # qmail: new-inject(1)
- $pms->get('Resent-Sender:addr'), # procmailrc manpage
- $pms->get('X-Envelope-From:addr'), # procmailrc manpage
- $pms->get('EnvelopeFrom:addr')); # SMTP envelope
- # http://www.cs.tut.fi/~jkorpela/headers.html is useful here
- }
-
- # Remove duplicate addresses
- my %addrs = map { $_ => 1 } @addrs;
- @addrs = keys %addrs;
-
- dbg("eval: all '*From' addrs: " . join(" ", @addrs));
- $pms->{all_from_addrs} = \@addrs;
- return @addrs;
-}
-
-sub all_to_addrs {
- my ($self, $pms) = @_;
-
- if (exists $pms->{all_to_addrs}) { return @{$pms->{all_to_addrs}}; }
-
- my @addrs;
-
- # Resent- headers take priority, if present. see bug 672
- # http://www.hughes-family.org/bugzilla/show_bug.cgi?id=672
- my $resent = $pms->get('Resent-To') . $pms->get('Resent-Cc');
- if (defined $resent && $resent =~ /\S/) {
- @addrs = $self->{main}->find_all_addrs_in_line (
- $pms->get('Resent-To') . # std, rfc822
- $pms->get('Resent-Cc')); # std, rfc822
-
- } else {
- # OK, a fetchmail trick: try to find the recipient address from
- # the most recent 3 Received lines. This is required for sendmail,
- # since it does not add a helpful header like exim, qmail
- # or Postfix do.
- #
- my $rcvd = $pms->get('Received');
- $rcvd =~ s/\n[ \t]+/ /gs;
- $rcvd =~ s/\n+/\n/gs;
-
- my @rcvdlines = split(/\n/, $rcvd, 4); pop @rcvdlines; # forget last one
- my @rcvdaddrs = ();
- foreach my $line (@rcvdlines) {
- if ($line =~ / for (\S+\@\S+);/) { push (@rcvdaddrs, $1); }
- }
-
- @addrs = $self->{main}->find_all_addrs_in_line (
- join(" ", @rcvdaddrs)."\n" .
- $pms->get('To') . # std
- $pms->get('Apparently-To') . # sendmail, from envelope
- $pms->get('Delivered-To') . # Postfix, poss qmail
- $pms->get('Envelope-Recipients') . # qmail: new-inject(1)
- $pms->get('Apparently-Resent-To') . # procmailrc manpage
- $pms->get('X-Envelope-To') . # procmailrc manpage
- $pms->get('Envelope-To') . # exim
- $pms->get('X-Delivered-To') . # procmail quick start
- $pms->get('X-Original-To') . # procmail quick start
- $pms->get('X-Rcpt-To') . # procmail quick start
- $pms->get('X-Real-To') . # procmail quick start
- $pms->get('Cc')); # std
- # those are taken from various sources; thanks to Nancy McGough, who
- # noted some in <http://www.ii.com/internet/robots/procmail/qs/#envelope>
- }
-
- dbg("eval: all '*To' addrs: " . join(" ", @addrs));
- $pms->{all_to_addrs} = \@addrs;
- return @addrs;
-
-# http://www.cs.tut.fi/~jkorpela/headers.html is useful here, also
-# http://www.exim.org/pipermail/exim-users/Week-of-Mon-20001009/021672.html
}
1;
Modified: spamassassin/trunk/lib/Mail/SpamAssassin/Util.pm
URL: http://svn.apache.org/viewvc/spamassassin/trunk/lib/Mail/SpamAssassin/Util.pm?rev=424846&r1=424845&r2=424846&view=diff
==============================================================================
--- spamassassin/trunk/lib/Mail/SpamAssassin/Util.pm (original)
+++ spamassassin/trunk/lib/Mail/SpamAssassin/Util.pm Sun Jul 23 15:51:07 2006
@@ -1416,6 +1416,20 @@
###########################################################################
+sub get_my_locales {
+ my ($ok_locales) = @_;
+
+ my @locales = split(' ', $ok_locales);
+ my $lang = $ENV{'LC_ALL'};
+ $lang ||= $ENV{'LANGUAGE'};
+ $lang ||= $ENV{'LC_MESSAGES'};
+ $lang ||= $ENV{'LANG'};
+ push (@locales, $lang) if defined($lang);
+ return @locales;
+}
+
+###########################################################################
+
1;
=back
Modified: spamassassin/trunk/rules/20_dnsbl_tests.cf
URL: http://svn.apache.org/viewvc/spamassassin/trunk/rules/20_dnsbl_tests.cf?rev=424846&r1=424845&r2=424846&view=diff
==============================================================================
--- spamassassin/trunk/rules/20_dnsbl_tests.cf (original)
+++ spamassassin/trunk/rules/20_dnsbl_tests.cf Sun Jul 23 15:51:07 2006
@@ -24,6 +24,10 @@
require_version @@VERSION@@
+###########################################################################
+
+ifplugin Mail::SpamAssassin::Plugin::DNSEval
+
# See the Mail::SpamAssassin::Conf manual page for details of how to use
# check_rbl().
@@ -333,3 +337,5 @@
# header HABEAS_CHECKED eval:check_rbl_accreditor('accredit-firsttrusted', 'sa-accredit.habeas.com.', '127\.\d+\.\d+\.[6789]\d', 'habeas')
# describe HABEAS_CHECKED Habeas Checked
# tflags HABEAS_CHECKED net nice
+
+endif
Modified: spamassassin/trunk/rules/20_head_tests.cf
URL: http://svn.apache.org/viewvc/spamassassin/trunk/rules/20_head_tests.cf?rev=424846&r1=424845&r2=424846&view=diff
==============================================================================
--- spamassassin/trunk/rules/20_head_tests.cf (original)
+++ spamassassin/trunk/rules/20_head_tests.cf Sun Jul 23 15:51:07 2006
@@ -24,4 +24,10 @@
require_version @@VERSION@@
+###########################################################################
+
+ifplugin Mail::SpamAssassin::Plugin::HeaderEval
+
header __ENV_AND_HDR_FROM_MATCH eval:check_for_matching_env_and_hdr_from()
+
+endif
Modified: spamassassin/trunk/rules/20_html_tests.cf
URL: http://svn.apache.org/viewvc/spamassassin/trunk/rules/20_html_tests.cf?rev=424846&r1=424845&r2=424846&view=diff
==============================================================================
--- spamassassin/trunk/rules/20_html_tests.cf (original)
+++ spamassassin/trunk/rules/20_html_tests.cf Sun Jul 23 15:51:07 2006
@@ -28,6 +28,76 @@
#
# please sort these by eval type then name
+meta HTML_SHORT_LINK_IMG_1 __HTML_LENGTH_0000_1024 && __HTML_LINK_IMAGE
+meta HTML_SHORT_LINK_IMG_2 __HTML_LENGTH_1024_1536 && __HTML_LINK_IMAGE
+meta HTML_SHORT_LINK_IMG_3 __HTML_LENGTH_1536_2048 && __HTML_LINK_IMAGE
+describe HTML_SHORT_LINK_IMG_1 HTML is very short with a linked image
+describe HTML_SHORT_LINK_IMG_2 HTML is very short with a linked image
+describe HTML_SHORT_LINK_IMG_3 HTML is very short with a linked image
+
+meta HTML_SHORT_COMMENT (__HTML_LENGTH_512 && __COMMENT_EXISTS)
+describe HTML_SHORT_COMMENT HTML is very short with HTML comments
+
+meta HTML_SHORT_CENTER (__HTML_LENGTH_384 && __TAG_EXISTS_CENTER)
+describe HTML_SHORT_CENTER HTML is very short with CENTER tag
+
+meta HTML_TITLE_LONG __HTML_TITLE_120 && !__MIME_ATTACHMENT
+describe HTML_TITLE_LONG HTML title is very long
+
+meta HTML_TITLE_SUBJ_DIFF __HTML_TITLE_SUBJ_DIFF && !__MIME_ATTACHMENT
+
+meta HTML_CHARSET_FARAWAY (__HTML_CHARSET_FARAWAY && __HIGHBITS)
+describe HTML_CHARSET_FARAWAY A foreign language charset used in HTML markup
+tflags HTML_CHARSET_FARAWAY userconf
+
+meta HTML_MIME_NO_HTML_TAG MIME_HTML_ONLY && !__TAG_EXISTS_HTML
+describe HTML_MIME_NO_HTML_TAG HTML-only message, but there is no HTML tag
+
+meta HTML_MISSING_CTYPE (!__MIME_HTML && HTML_MESSAGE)
+describe HTML_MISSING_CTYPE Message is HTML without HTML Content-Type
+
+###########################################################################
+# rawbody HTML tests
+
+rawbody HIDE_WIN_STATUS /<[^>]+onMouseOver=[^>]+window\.status=/i
+describe HIDE_WIN_STATUS Javascript to hide URLs in browser
+
+rawbody __OBFUSCATING_COMMENT_A /\w(?:<![^>]*>)+\w/
+rawbody __OBFUSCATING_COMMENT_B /[^\s>](?:<![^>]*>)+[^\s<]/
+meta OBFUSCATING_COMMENT ((__OBFUSCATING_COMMENT_A && HTML_MESSAGE) || (__OBFUSCATING_COMMENT_B && MIME_HTML_ONLY))
+describe OBFUSCATING_COMMENT HTML comments which obfuscate text
+
+# spams that are assembled from a Javascript array
+# look for the XOR op
+rawbody __JS_FROMCHARCODE /String\.fromCharCode\s*\(\s*\S+\s*\[\s*\S+\s*\]\s*\^/
+rawbody __JS_DOCWRITE /document\.write/
+meta JS_FROMCHARCODE (__JS_FROMCHARCODE && __JS_DOCWRITE)
+describe JS_FROMCHARCODE Document is built from a Javascript charcode array
+
+# A-Z, a-z, 0-9
+rawbody ENTITY_DEC_ALPHANUM /\&\#0*(?:4[89]|5[0-7]|6[5-9][78]\d|9[0789]|1[01]\d|12[012])\;/
+describe ENTITY_DEC_ALPHANUM HTML contains needlessly encoded characters
+
+# ! $ % ' ( ) , - . / : ; = ? @ _
+# a good possible rule that may resurface
+#rawbody ENTITY_DEC_OTHER /\&\#0*(?:3[3679]|4[014567]|5[89]|6[134]|95)\;/
+#describe ENTITY_DEC_OTHER HTML contains needlessly encoded punctuation
+
+# thanks to Bob Menschel for this one; bug 4116
+rawbody HTML_EHTML2 m'</html></html>'i
+describe HTML_EHTML2 HTML has doubled end HTML tag
+
+# bug 3070
+rawbody HTML_TINY_FONT /\<.*font\-size\:[ \"]*[01][^0-9]+.*\>/i
+describe HTML_TINY_FONT body contains 1 or 0-point font
+
+body __HIGHBITS /(?:[\x80-\xff].?){4}/
+# note: __HIGHBITS is used by HTML_CHARSET_FARAWAY
+
+###########################################################################
+
+ifplugin Mail::SpamAssassin::Plugin::HTMLEval
+
# HTML control test, HTML spam rules should all have better S/O than this
body HTML_MESSAGE eval:html_test('html')
describe HTML_MESSAGE HTML included in message
@@ -263,96 +333,41 @@
body __HTML_LENGTH_0000_1024 eval:html_range('length', '0', '1024')
body __HTML_LENGTH_1024_1536 eval:html_range('length', '1024', '1536')
body __HTML_LENGTH_1536_2048 eval:html_range('length', '1536', '2048')
-meta HTML_SHORT_LINK_IMG_1 __HTML_LENGTH_0000_1024 && __HTML_LINK_IMAGE
-meta HTML_SHORT_LINK_IMG_2 __HTML_LENGTH_1024_1536 && __HTML_LINK_IMAGE
-meta HTML_SHORT_LINK_IMG_3 __HTML_LENGTH_1536_2048 && __HTML_LINK_IMAGE
-describe HTML_SHORT_LINK_IMG_1 HTML is very short with a linked image
-describe HTML_SHORT_LINK_IMG_2 HTML is very short with a linked image
-describe HTML_SHORT_LINK_IMG_3 HTML is very short with a linked image
body HTML_SHORT_LENGTH eval:html_eval('length', '< 170')
describe HTML_SHORT_LENGTH HTML is extremely short
body __HTML_LENGTH_512 eval:html_eval('length', '< 512')
body __COMMENT_EXISTS eval:html_text_match('comment', '<!.*?>')
-meta HTML_SHORT_COMMENT (__HTML_LENGTH_512 && __COMMENT_EXISTS)
-describe HTML_SHORT_COMMENT HTML is very short with HTML comments
body __HTML_LENGTH_384 eval:html_eval('length', '< 384')
body __TAG_EXISTS_CENTER eval:html_tag_exists('center')
-meta HTML_SHORT_CENTER (__HTML_LENGTH_384 && __TAG_EXISTS_CENTER)
-describe HTML_SHORT_CENTER HTML is very short with CENTER tag
body HTML_TITLE_EMPTY eval:html_text_not_match('title', '(?s)\S')
describe HTML_TITLE_EMPTY HTML title contains no text
-# __MIME_ATTACHMENT also used in 20_meta_tests.cf
-body __MIME_ATTACHMENT eval:check_for_mime('mime_attachment')
-
body __HTML_TITLE_120 eval:html_text_match('title', '.{120}')
-meta HTML_TITLE_LONG __HTML_TITLE_120 && !__MIME_ATTACHMENT
-describe HTML_TITLE_LONG HTML title is very long
body __HTML_TITLE_SUBJ_DIFF eval:html_title_subject_ratio('3.5')
-meta HTML_TITLE_SUBJ_DIFF __HTML_TITLE_SUBJ_DIFF && !__MIME_ATTACHMENT
body HTML_TITLE_UNTITLED eval:html_text_match('title', '(?i)(?:untitled|new page \d+)')
describe HTML_TITLE_UNTITLED HTML title contains "Untitled"
-###########################################################################
-# meta tests
-
body __HTML_CHARSET_FARAWAY eval:html_charset_faraway()
-meta HTML_CHARSET_FARAWAY (__HTML_CHARSET_FARAWAY && __HIGHBITS)
-describe HTML_CHARSET_FARAWAY A foreign language charset used in HTML markup
-tflags HTML_CHARSET_FARAWAY userconf
-
-meta HTML_MIME_NO_HTML_TAG MIME_HTML_ONLY && !__TAG_EXISTS_HTML
-describe HTML_MIME_NO_HTML_TAG HTML-only message, but there is no HTML tag
-
-meta HTML_MISSING_CTYPE (!__MIME_HTML && HTML_MESSAGE)
-describe HTML_MISSING_CTYPE Message is HTML without HTML Content-Type
-
-###########################################################################
-# rawbody HTML tests
-
-rawbody HIDE_WIN_STATUS /<[^>]+onMouseOver=[^>]+window\.status=/i
-describe HIDE_WIN_STATUS Javascript to hide URLs in browser
-
-rawbody __OBFUSCATING_COMMENT_A /\w(?:<![^>]*>)+\w/
-rawbody __OBFUSCATING_COMMENT_B /[^\s>](?:<![^>]*>)+[^\s<]/
-meta OBFUSCATING_COMMENT ((__OBFUSCATING_COMMENT_A && HTML_MESSAGE) || (__OBFUSCATING_COMMENT_B && MIME_HTML_ONLY))
-describe OBFUSCATING_COMMENT HTML comments which obfuscate text
-
-# spams that are assembled from a Javascript array
-# look for the XOR op
-rawbody __JS_FROMCHARCODE /String\.fromCharCode\s*\(\s*\S+\s*\[\s*\S+\s*\]\s*\^/
-rawbody __JS_DOCWRITE /document\.write/
-meta JS_FROMCHARCODE (__JS_FROMCHARCODE && __JS_DOCWRITE)
-describe JS_FROMCHARCODE Document is built from a Javascript charcode array
-# A-Z, a-z, 0-9
-rawbody ENTITY_DEC_ALPHANUM /\&\#0*(?:4[89]|5[0-7]|6[5-9][78]\d|9[0789]|1[01]\d|12[012])\;/
-describe ENTITY_DEC_ALPHANUM HTML contains needlessly encoded characters
+body HTML_IFRAME_SRC eval:check_iframe_src()
+describe HTML_IFRAME_SRC Message has HTML IFRAME tag with SRC URI
-# ! $ % ' ( ) , - . / : ; = ? @ _
-# a good possible rule that may resurface
-#rawbody ENTITY_DEC_OTHER /\&\#0*(?:3[3679]|4[014567]|5[89]|6[134]|95)\;/
-#describe ENTITY_DEC_OTHER HTML contains needlessly encoded punctuation
+body URI_HTML_ONLY eval:check_html_uri_only()
+describe URI_HTML_ONLY URIs only found in HTML part of multipart/alternative message
-# thanks to Bob Menschel for this one; bug 4116
-rawbody HTML_EHTML2 m'</html></html>'i
-describe HTML_EHTML2 HTML has doubled end HTML tag
+endif
-# bug 3070
-rawbody HTML_TINY_FONT /\<.*font\-size\:[ \"]*[01][^0-9]+.*\>/i
-describe HTML_TINY_FONT body contains 1 or 0-point font
+###########################################################################
-body __HIGHBITS /(?:[\x80-\xff].?){4}/
-# note: __HIGHBITS is used by HTML_CHARSET_FARAWAY
+ifplugin Mail::SpamAssassin::Plugin::MIMEEval
-body HTML_IFRAME_SRC eval:check_iframe_src()
-describe HTML_IFRAME_SRC Message has HTML IFRAME tag with SRC URI
+# __MIME_ATTACHMENT also used in 20_meta_tests.cf
+body __MIME_ATTACHMENT eval:check_for_mime('mime_attachment')
-body URI_HTML_ONLY eval:check_html_uri_only()
-describe URI_HTML_ONLY URIs only found in HTML part of multipart/alternative message
+endif
Modified: spamassassin/trunk/rules/20_net_tests.cf
URL: http://svn.apache.org/viewvc/spamassassin/trunk/rules/20_net_tests.cf?rev=424846&r1=424845&r2=424846&view=diff
==============================================================================
--- spamassassin/trunk/rules/20_net_tests.cf (original)
+++ spamassassin/trunk/rules/20_net_tests.cf Sun Jul 23 15:51:07 2006
@@ -40,17 +40,26 @@
tflags DIGEST_MULTIPLE net
#reuse DIGEST_MULTIPLE
-# ---------------------------------------------------------------------------
-# Other DNS tests
+###########################################################################
+
+ifplugin Mail::SpamAssassin::Plugin::DNSEval
header NO_DNS_FOR_FROM eval:check_dns_sender()
describe NO_DNS_FOR_FROM Envelope sender has no MX or A DNS records
tflags NO_DNS_FOR_FROM net
#reuse NO_DNS_FOR_FROM
+endif
+
+###########################################################################
+
+ifplugin Mail::SpamAssassin::Plugin::HeaderEval
+
# this variant uses a DNS reverse lookup. so now we can use a version
# of this test without a net connection (ROUND_THE_WORLD_LOCAL)
header ROUND_THE_WORLD eval:check_for_round_the_world_received_revdns()
describe ROUND_THE_WORLD Received: says mail sent around the world (DNS)
tflags ROUND_THE_WORLD net
#reuse ROUND_THE_WORLD
+
+endif
Modified: spamassassin/trunk/rules/23_bayes.cf
URL: http://svn.apache.org/viewvc/spamassassin/trunk/rules/23_bayes.cf?rev=424846&r1=424845&r2=424846&view=diff
==============================================================================
--- spamassassin/trunk/rules/23_bayes.cf (original)
+++ spamassassin/trunk/rules/23_bayes.cf Sun Jul 23 15:51:07 2006
@@ -26,6 +26,8 @@
###########################################################################
+ifplugin Mail::SpamAssassin::Plugin::Bayes
+
body BAYES_00 eval:check_bayes('0.00', '0.01')
body BAYES_05 eval:check_bayes('0.01', '0.05')
body BAYES_20 eval:check_bayes('0.05', '0.20')
@@ -59,3 +61,5 @@
describe BAYES_80 Bayesian spam probability is 80 to 95%
describe BAYES_95 Bayesian spam probability is 95 to 99%
describe BAYES_99 Bayesian spam probability is 99 to 100%
+
+endif
Modified: spamassassin/trunk/rules/60_whitelist.cf
URL: http://svn.apache.org/viewvc/spamassassin/trunk/rules/60_whitelist.cf?rev=424846&r1=424845&r2=424846&view=diff
==============================================================================
--- spamassassin/trunk/rules/60_whitelist.cf (original)
+++ spamassassin/trunk/rules/60_whitelist.cf Sun Jul 23 15:51:07 2006
@@ -28,6 +28,8 @@
# user slip-up could result in scribbling side-effects in the bayes
# db as a result -- which is hard to remedy.
+ifplugin Mail::SpamAssassin::Plugin::WLBLEval
+
header USER_IN_BLACKLIST eval:check_from_in_blacklist()
describe USER_IN_BLACKLIST From: address is in the user's black-list
tflags USER_IN_BLACKLIST userconf noautolearn
@@ -102,3 +104,4 @@
def_whitelist_from_rcvd *@warehouse.com warehouse.com
def_whitelist_from_rcvd *@*.efax.com efax.com
+endif
Modified: spamassassin/trunk/rules/v320.pre
URL: http://svn.apache.org/viewvc/spamassassin/trunk/rules/v320.pre?rev=424846&r1=424845&r2=424846&view=diff
==============================================================================
--- spamassassin/trunk/rules/v320.pre (original)
+++ spamassassin/trunk/rules/v320.pre Sun Jul 23 15:51:07 2006
@@ -18,3 +18,15 @@
# URIDetail - test URIs using detailed URI information
#
loadplugin Mail::SpamAssassin::Plugin::URIDetail
+
+# Plugins which used to be EvalTests.pm
+# broken out into separate plugins
+loadplugin Mail::SpamAssassin::Plugin::Bayes
+loadplugin Mail::SpamAssassin::Plugin::BodyEval
+loadplugin Mail::SpamAssassin::Plugin::DNSEval
+loadplugin Mail::SpamAssassin::Plugin::HTMLEval
+loadplugin Mail::SpamAssassin::Plugin::HeaderEval
+loadplugin Mail::SpamAssassin::Plugin::MIMEEval
+loadplugin Mail::SpamAssassin::Plugin::RelayEval
+loadplugin Mail::SpamAssassin::Plugin::URIEval
+loadplugin Mail::SpamAssassin::Plugin::WLBLEval