You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spamassassin.apache.org by qu...@apache.org on 2004/04/20 04:20:11 UTC
svn commit: rev 10113 - in incubator/spamassassin/trunk: lib/Mail/SpamAssassin rules
Author: quinlan
Date: Mon Apr 19 19:20:10 2004
New Revision: 10113
Modified:
incubator/spamassassin/trunk/lib/Mail/SpamAssassin/EvalTests.pm
incubator/spamassassin/trunk/lib/Mail/SpamAssassin/Received.pm
incubator/spamassassin/trunk/rules/70_testing.cf
Log:
add cleaner version of MSGID_FROM_MTA*
Modified: incubator/spamassassin/trunk/lib/Mail/SpamAssassin/EvalTests.pm
==============================================================================
--- incubator/spamassassin/trunk/lib/Mail/SpamAssassin/EvalTests.pm (original)
+++ incubator/spamassassin/trunk/lib/Mail/SpamAssassin/EvalTests.pm Mon Apr 19 19:20:10 2004
@@ -326,6 +326,71 @@
}
}
+# Message-ID for untrusted message was added by a trusted relay
+sub message_id_from_mta_1 {
+ my ($self) = @_;
+
+ my $id = $self->get('MESSAGEID');
+ return unless defined($id) && $id;
+
+ if ($self->{num_relays_untrusted} > 0) {
+ for my $rcvd (@{$self->{relays_untrusted}}[0], @{$self->{relays_trusted}})
+ {
+ return 1 if $rcvd->{id} && (index($id, $rcvd->{id}) != -1);
+ }
+ }
+ return 0;
+}
+
+# Message-ID for untrusted message was added by a trusted relay
+sub message_id_from_mta_2 {
+ my ($self) = @_;
+
+ my $id = $self->get('Resent-Message-ID') || $self->get('Message-ID');
+ return unless defined($id) && $id;
+
+ if ($self->{num_relays_untrusted} > 0) {
+ for my $rcvd (@{$self->{relays_untrusted}}[0], @{$self->{relays_trusted}})
+ {
+ return 1 if $rcvd->{id} && (index($id, $rcvd->{id}) != -1);
+ }
+ }
+ return 0;
+}
+
+
+# Message-ID for untrusted message was added by a trusted relay
+sub message_id_from_mta_3 {
+ my ($self) = @_;
+
+ my $id = $self->get('MESSAGEID');
+ return unless defined($id) && $id;
+
+ if ($self->{num_relays_untrusted} > 0) {
+ for my $rcvd (@{$self->{relays_untrusted}}[0], @{$self->{relays_trusted}})
+ {
+ return 1 if $rcvd->{id} && (index(lc($id), lc($rcvd->{id})) != -1);
+ }
+ }
+ return 0;
+}
+
+# Message-ID for untrusted message was added by a trusted relay
+sub message_id_from_mta_4 {
+ my ($self) = @_;
+
+ my $id = $self->get('Resent-Message-ID') || $self->get('Message-ID');
+ return unless defined($id) && $id;
+
+ if ($self->{num_relays_untrusted} > 0) {
+ for my $rcvd (@{$self->{relays_untrusted}}[0], @{$self->{relays_trusted}})
+ {
+ return 1 if $rcvd->{id} && (index(lc($id), lc($rcvd->{id})) != -1);
+ }
+ }
+ return 0;
+}
+
###########################################################################
# FORGED_RCVD_TRAIL
@@ -619,9 +684,9 @@
my $rcvd = $self->get ('Received');
- if ( $self->get("Resent-From") && $self->get("Resent-To") ) {
+ if ($self->get("Resent-From") && $self->get("Resent-To")) {
my $xrcvd = $self->get("X-Received");
- $rcvd = $xrcvd if ( $xrcvd );
+ $rcvd = $xrcvd if $xrcvd;
}
$rcvd =~ s/\s+/ /gs; # just spaces, simplify the regexp
@@ -906,7 +971,7 @@
my $list_ref = $self->{conf}{$list};
warn "Could not find list $list" unless defined $list_ref;
- foreach my $addr ( all_from_addrs $self ) {
+ foreach my $addr (all_from_addrs $self) {
return 1 if _check_whitelist $self $list_ref, $addr;
}
@@ -920,7 +985,7 @@
my $list_ref = $self->{conf}{$list};
warn "Could not find list $list" unless defined $list_ref;
- foreach my $addr ( all_to_addrs $self ) {
+ foreach my $addr (all_to_addrs $self) {
return 1 if _check_whitelist $self $list_ref, $addr;
}
@@ -1375,7 +1440,7 @@
# in @fullips. It includes the IPs that are trusted, but
# not in internal_networks.
my @fullexternal = map {
- (!$_->{internal}) ? ( $_->{ip} ) : ()
+ (!$_->{internal}) ? ($_->{ip}) : ()
} @{$self->{relays_trusted}};
push (@fullexternal, @fullips); # add untrusted set too
@@ -1425,7 +1490,7 @@
{
push(@ips, @originating);
if ($1 eq "first") {
- @ips = ( $ips[0] );
+ @ips = ($ips[0]);
}
else {
shift @ips;
@@ -1669,10 +1734,10 @@
my %counts1 = ();
my %counts2 = ();
- foreach ( split(//, lc $addr1) ) {
+ foreach (split(//, lc $addr1)) {
$counts1{$_}++;
}
- foreach ( split(//, lc $addr2) ) {
+ foreach (split(//, lc $addr2)) {
$counts2{$_}++;
}
@@ -1903,7 +1968,7 @@
sub received_within_months {
# filters out some false positives in old corpus mail - Allen
- my($self,$min,$max) = @_;
+ my ($self,$min,$max) = @_;
if (!exists($self->{date_received})) {
$self->_check_date_received();
@@ -1949,7 +2014,7 @@
$self->{received_header_times} = [ () ];
$self->{received_fetchmail_time} = undef;
- my(@received);
+ my (@received);
my $received = $self->get('Received');
if (defined($received) && length($received)) {
@received = grep {$_ =~ m/\S/} (split(/\n/,$received));
@@ -1961,7 +2026,7 @@
}
# handle fetchmail headers
- my(@local);
+ my (@local);
if (($received[0] =~
m/\bfrom (?:localhost\s|(?:\S+ ){1,2}\S*\b127\.0\.0\.1\b)/) ||
($received[0] =~ m/qmail \d+ invoked by uid \d+/)) {
@@ -1978,7 +2043,7 @@
my $rcvd;
if (scalar(@local)) {
- my(@fetchmail_times);
+ my (@fetchmail_times);
foreach $rcvd (@local) {
if ($rcvd =~ m/(\s.?\d+ \S\S\S \d+ \d+:\d+:\d+ \S+)/) {
my $date = $1;
@@ -1999,7 +2064,7 @@
}
}
- my(@header_times);
+ my (@header_times);
foreach $rcvd (@received) {
if ($rcvd =~ m/(\s.?\d+ \S\S\S \d+ \d+:\d+:\d+ \S+)/) {
my $date = $1;
@@ -2022,7 +2087,7 @@
sub _check_date_received {
my $self = $_[0];
- my(@dates_poss);
+ my (@dates_poss);
$self->{date_received} = 0;
@@ -2037,7 +2102,7 @@
if (!exists($self->{received_header_times})) {
$self->_get_received_header_times();
}
- my(@received_header_times) = @{ $self->{received_header_times} };
+ my (@received_header_times) = @{ $self->{received_header_times} };
if (scalar(@received_header_times)) {
push @dates_poss, $received_header_times[0];
}
@@ -2078,13 +2143,13 @@
if (!exists($self->{received_header_times})) {
$self->_get_received_header_times();
}
- my(@header_times) = @{ $self->{received_header_times} };
+ my (@header_times) = @{ $self->{received_header_times} };
if (!scalar(@header_times)) {
return; # archived mail?
}
- my(@diffs) = map {$self->{date_header_time} - $_} (@header_times);
+ my (@diffs) = map {$self->{date_header_time} - $_} (@header_times);
# if the last Received: header has no difference, then we choose to
# exclude it
@@ -2585,9 +2650,9 @@
$self->{mime_suspect_name} = 0;
# Get all parts ...
- foreach my $p ( $self->{msg}->find_parts(qr/./) ) {
+ foreach my $p ($self->{msg}->find_parts(qr/./)) {
# message headers
- my($ctype, $boundary, $charset, $name) = Mail::SpamAssassin::Util::parse_content_type($p->get_header("content-type"));
+ my ($ctype, $boundary, $charset, $name) = Mail::SpamAssassin::Util::parse_content_type($p->get_header("content-type"));
if ($ctype eq 'multipart/alternative') {
$self->{mime_multipart_alternative} = 1;
@@ -2606,7 +2671,7 @@
# If we're not in a leaf node in the tree, there will be no raw
# section, so skip it.
- if ( ! $p->is_leaf() ) {
+ if (! $p->is_leaf()) {
next;
}
@@ -2615,8 +2680,8 @@
$part_bytes[$part] = 0 if $cd !~ /attachment/;
my $previous = '';
- foreach ( @{$p->raw()} ) {
- if ( $cte =~ /base64/i ) {
+ foreach (@{$p->raw()}) {
+ if ($cte =~ /base64/i) {
if ($previous =~ /^\s*$/ && /^\s*$/) {
$self->{mime_base64_blanks} = 1;
}
@@ -2629,7 +2694,7 @@
$self->{mime_html_no_charset} = 0;
}
if ($self->{mime_multipart_alternative} && $cd !~ /attachment/ &&
- ( $ctype eq 'text/plain' || $ctype eq 'text/html' ) ) {
+ ($ctype eq 'text/plain' || $ctype eq 'text/html')) {
$part_bytes[$part] += length;
}
@@ -2701,7 +2766,7 @@
my ($self) = @_;
return 0 unless ($self->is_razor2_available());
- return $self->{razor2_result} if ( defined $self->{razor2_result} );
+ return $self->{razor2_result} if (defined $self->{razor2_result});
# note: we don't use $fulltext. instead we get the raw message,
# unfiltered, for razor2 to check. ($fulltext removes MIME
@@ -2744,7 +2809,7 @@
# unfiltered, for DCC to check. ($fulltext removes MIME
# parts etc.)
my $full = $self->{msg}->get_pristine();
- if ( $have_dccifd ) {
+ if ($have_dccifd) {
return $self->dccifd_lookup (\$full);
} else {
return $self->dcc_lookup (\$full);
@@ -2915,22 +2980,22 @@
my ($self, $hdr, $min, $max) = @_;
my %uniq = ();
my @hdrs = grep(!$uniq{$_}++, $self->{msg}->get_header ($hdr));
- return ( scalar @hdrs >= $min && scalar @hdrs <= $max );
+ return (scalar @hdrs >= $min && scalar @hdrs <= $max);
}
sub check_blank_line_ratio {
my ($self, $fulltext, $min, $max, $minlines) = @_;
- if ( !defined $minlines || $minlines < 1 ) {
+ if (!defined $minlines || $minlines < 1) {
$minlines = 1;
}
$fulltext = $self->get_decoded_body_text_array();
- if ( ! exists $self->{blank_line_ratio}->{$minlines} ) {
- my($blank) = 0;
- if ( scalar @{$fulltext} >= $minlines ) {
- foreach my $line ( @{$fulltext} ) {
- next if ( $line =~ /\S/ );
+ if (! exists $self->{blank_line_ratio}->{$minlines}) {
+ my ($blank) = 0;
+ if (scalar @{$fulltext} >= $minlines) {
+ foreach my $line (@{$fulltext}) {
+ next if ($line =~ /\S/);
$blank++;
}
$self->{blank_line_ratio}->{$minlines} = 100 * $blank / scalar @{$fulltext};
@@ -2940,11 +3005,13 @@
}
}
- return ( ($min == 0 && $self->{blank_line_ratio}->{$minlines} <= $max) || ($self->{blank_line_ratio}->{$minlines} > $min && $self->{blank_line_ratio}->{$minlines} <= $max) );
+ return (($min == 0 && $self->{blank_line_ratio}->{$minlines} <= $max) ||
+ ($self->{blank_line_ratio}->{$minlines} > $min &&
+ $self->{blank_line_ratio}->{$minlines} <= $max));
}
sub check_access_database {
- my($self, $path) = @_;
+ my ($self, $path) = @_;
if (!HAS_DB_FILE) {
return 0;
@@ -2956,20 +3023,20 @@
$path = $self->{main}->sed_path ($path);
dbg("Tie-ing to DB file R/O in $path");
- if ( tie %access,"DB_File",$path, O_RDONLY ) {
+ if (tie %access,"DB_File",$path, O_RDONLY) {
my @lookfor = ();
# Look for "From:" versions as well!
- foreach my $from ( $self->all_from_addrs() ) {
+ foreach my $from ($self->all_from_addrs()) {
# $user."\@"
# rotate through $domain and check
- my($user,$domain) = split(/\@/, $from,2);
+ my ($user,$domain) = split(/\@/, $from,2);
push(@lookfor, "From:$from",$from);
- if ( $user ) {
+ if ($user) {
push(@lookfor, "From:$user\@", "$user\@");
}
- if ( $domain ) {
- while( $domain =~ /\./ ) {
+ if ($domain) {
+ while ($domain =~ /\./) {
push(@lookfor, "From:$domain", $domain);
$domain =~ s/^[^.]*\.//;
}
@@ -2978,13 +3045,13 @@
}
# we can only match this if we have at least 1 untrusted header
- if ( $self->{num_relays_untrusted} > 0 ) {
+ if ($self->{num_relays_untrusted} > 0) {
my $lastunt = $self->{relays_untrusted}->[0];
# If there was a reverse lookup, use it in a lookup
- if ( ! $lastunt->{no_reverse_dns} ) {
+ if (! $lastunt->{no_reverse_dns}) {
my $rdns = $lastunt->{lc_rdns};
- while( $rdns =~ /\./ ) {
+ while($rdns =~ /\./) {
push(@lookfor, "From:$rdns", $rdns);
$rdns =~ s/^[^.]*\.//;
}
@@ -2992,9 +3059,9 @@
}
# do both IP and net (rotate over IP)
- my($ip) = $lastunt->{ip};
+ my ($ip) = $lastunt->{ip};
$ip =~ tr/0-9.//cd;
- while( $ip =~ /\./ ) {
+ while($ip =~ /\./) {
push(@lookfor, "From:$ip", $ip);
$ip =~ s/\.[^.]*$//;
}
@@ -3003,15 +3070,15 @@
my $retval = 0;
my %cache = ();
- foreach ( @lookfor ) {
- next if ( $cache{$_}++ );
+ foreach (@lookfor) {
+ next if ($cache{$_}++);
dbg("accessdb: Looking for $_");
# Some systems put a null at the end of the key, most don't...
my $result = $access{$_} || $access{"$_\000"} || next;
- my($type) = split(/\W/,$result);
- if ( exists $ok{$type} ) {
+ my ($type) = split(/\W/,$result);
+ if (exists $ok{$type}) {
dbg("accessdb: hit OK: $type, $_");
$retval = 0;
last;
@@ -3237,7 +3304,7 @@
# not all perls understand what "inf" means, so we need to do
# non-numeric tests! urg!
if (!defined $max || $max eq "inf") {
- return ( $test eq "inf" ) ? 1 : ($test > $min);
+ return ($test eq "inf") ? 1 : ($test > $min);
}
elsif ($test eq "inf") {
# $max < inf, so $test == inf means $test > $max
@@ -3252,9 +3319,9 @@
###########################################################################
sub multipart_alternative_difference {
- my($self, $fulltext, $min, $max) = @_;
+ my ($self, $fulltext, $min, $max) = @_;
- $self->_multipart_alternative_difference() unless ( exists $self->{madiff} );
+ $self->_multipart_alternative_difference() unless (exists $self->{madiff});
if (($min == 0 || $self->{madiff} > $min) &&
($max eq "undef" || $self->{madiff} <= $max)) {
@@ -3264,7 +3331,7 @@
}
sub _multipart_alternative_difference {
- my($self) = @_;
+ my ($self) = @_;
$self->{madiff} = 0;
# Find all multipart/alternative parts in the message
@@ -3286,15 +3353,15 @@
}
# Go through each of the multipart parts
- foreach my $part ( @ma ) {
+ foreach my $part (@ma) {
my %html = ();
my %text = ();
# limit our search to text-based parts
my @txt = $part->find_parts(qr@^text\b@i);
- foreach my $text ( @txt ) {
+ foreach my $text (@txt) {
# we only care about the rendered version of the part
- my($type, $rnd) = $text->rendered();
+ my ($type, $rnd) = $text->rendered();
# parse the rendered text into tokens. assume they are whitespace
# separated, and ignore anything that doesn't have a word-character
@@ -3302,8 +3369,8 @@
# points, horizontal lines, etc. this assumes that punctuation
# in one part will be the same in other parts.
#
- if ( $type eq 'text/html' ) {
- foreach my $w ( grep(/\w/,split(/\s+/,$rnd)) ) {
+ if ($type eq 'text/html') {
+ foreach my $w (grep(/\w/,split(/\s+/,$rnd))) {
#dbg("HTML: $w");
$html{$w}++;
}
@@ -3315,7 +3382,7 @@
}
}
else {
- foreach my $w ( grep(/\w/,split(/\s+/,$rnd)) ) {
+ foreach my $w (grep(/\w/,split(/\s+/,$rnd))) {
#dbg("TEXT: $w");
$text{$w}++;
}
@@ -3324,12 +3391,12 @@
# How many HTML tokens do we have at the start?
my $orig = keys %html;
- next if ( $orig == 0 );
+ next if ($orig == 0);
# If the token appears at least as many times in the text part as
# in the html part, remove it from the list of html tokens.
- while( my($k,$v) = each %text ) {
- delete $html{$k} if ( exists $html{$k} && $html{$k}-$text{$k} < 1 );
+ while(my ($k,$v) = each %text) {
+ delete $html{$k} if (exists $html{$k} && $html{$k}-$text{$k} < 1);
}
#map { dbg("LEFT: $_") } keys %html;
@@ -3339,7 +3406,7 @@
# a 0% difference rate. Calculate it here, and record the difference
# if it's been the highest so far in this message.
my $diff = scalar(keys %html)/$orig*100;
- $self->{madiff} = $diff if ( $diff > $self->{madiff} );
+ $self->{madiff} = $diff if ($diff > $self->{madiff});
dbg(sprintf "madiff: left: %d, orig: %d, max-difference: %0.2f%%", scalar(keys %html), $orig, $self->{madiff});
}
@@ -3350,7 +3417,7 @@
###########################################################################
sub check_domain_ratio {
- my($self, $body, $ratio) = @_;
+ my ($self, $body, $ratio) = @_;
my $length = (length(join('', @{$body})) || 1);
if (!defined $self->{uri_domain_count}) {
$self->get_uri_list();
@@ -3362,18 +3429,18 @@
###########################################################################
sub check_for_http_redirector {
- my($self) = @_;
+ my ($self) = @_;
- foreach ( $self->get_uri_list() ) {
+ foreach ($self->get_uri_list()) {
while (s{^https?://([^/:\?]+).+?(https?://([^/:\?]+).+)$}{$2}g) {
- my($redir, $dest) = ($1,$3);
+ my ($redir, $dest) = ($1,$3);
foreach ($redir,$dest) {
# Strip down to domain.tld
s/^(?:.+\.)([^.]+\.[^.]+)$/$1/;
# make sure we do things case-insensitively
$_ = lc $_;
}
- next if ( $redir eq $dest );
+ next if ($redir eq $dest);
dbg("redirect: found $redir to $dest, flagging");
return 1;
}
Modified: incubator/spamassassin/trunk/lib/Mail/SpamAssassin/Received.pm
==============================================================================
--- incubator/spamassassin/trunk/lib/Mail/SpamAssassin/Received.pm (original)
+++ incubator/spamassassin/trunk/lib/Mail/SpamAssassin/Received.pm Mon Apr 19 19:20:10 2004
@@ -365,6 +365,7 @@
my $helo = '';
my $rdns = '';
my $by = '';
+ my $id = '';
my $ident = '';
my $envfrom = '';
my $mta_looked_up_dns = 0;
@@ -383,6 +384,11 @@
# some MTAs will swap position of rdns and helo -- so we can't
# simply use simplistic regexps.
+ # try to catch unique message identifier
+ if (/\sid\s+<?([^\s<>;]{3,})/) {
+ $id = $1;
+ }
+
if (/^from /) {
# try to catch enveloper senders
if (/envelope-(?:sender|from)[ =](\S+)\b/) {
@@ -1025,6 +1031,7 @@
ip => $ip,
by => $by,
helo => $helo,
+ id => $id,
ident => $ident,
envfrom => $envfrom,
lc_by => (lc $by),
@@ -1068,7 +1075,7 @@
# of entries must be preserved, so that regexps that assume that
# e.g. "ip" comes before "helo" will still work.
#
- my $asstr = "[ ip=$ip rdns=$rdns helo=$helo by=$by ident=$ident envfrom=$envfrom intl=0 ]";
+ my $asstr = "[ ip=$ip rdns=$rdns helo=$helo by=$by ident=$ident envfrom=$envfrom intl=0 id=$id ]";
dbg ("received-header: parsed as $asstr");
$relay->{as_string} = $asstr;
Modified: incubator/spamassassin/trunk/rules/70_testing.cf
==============================================================================
--- incubator/spamassassin/trunk/rules/70_testing.cf (original)
+++ incubator/spamassassin/trunk/rules/70_testing.cf Mon Apr 19 19:20:10 2004
@@ -478,13 +478,21 @@
body T_NO_COST /\bno (?:(?:extra|hidden) )?(?:cost|charge)\b/i
# URL obfuscation services
-uri T_URL_SHORTEN_1 m{http://makeashorterlink.com/}i
-uri T_URL_SHORTEN_2 m{http://shorl.com/}i
-uri T_URL_SHORTEN_3 m{http://tinyurl.com/}i
-uri T_URL_SHORTEN_4 m{http://xrl.us/}i
-uri T_URL_SHORTEN_5 m{http://snipurl.com/}i
-uri T_URL_SHORTEN_6 m{http://lin.kz/}i
+uri T_URL_SHORTEN_1 m{http://makeashorterlink\.com/}i
+uri T_URL_SHORTEN_2 m{http://shorl\.com/}i
+uri T_URL_SHORTEN_3 m{http://tinyurl\.com/}i
+uri T_URL_SHORTEN_4 m{http://xrl\.us/}i
+uri T_URL_SHORTEN_5 m{http://snipurl\.com/}i
+uri T_URL_SHORTEN_6 m{http://lin\.kz/}i
+
+# possible replacement for MTA_FROM_MTA_SHORT
+# maybe delete MSGID_FROM_MTA_LATER and MSGID_FROM_MTA_BACKUP too
+header T_MSGID_FROM_MTA_1 eval:message_id_from_mta_1()
+header T_MSGID_FROM_MTA_2 eval:message_id_from_mta_2()
+header T_MSGID_FROM_MTA_3 eval:message_id_from_mta_3()
+header T_MSGID_FROM_MTA_4 eval:message_id_from_mta_4()
# bug 3268
body T_REDIRECTOR eval:check_for_http_redirector()
describe T_REDIRECTOR Message has HTTP redirector URI
+