You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spamassassin.apache.org by mm...@apache.org on 2009/11/24 17:32:51 UTC
svn commit: r883770 - in /spamassassin/trunk:
lib/Mail/SpamAssassin/AutoWhitelist.pm lib/Mail/SpamAssassin/Plugin/AWL.pm
lib/Mail/SpamAssassin/SQLBasedAddrList.pm sql/README.awl sql/awl_mysql.sql
sql/awl_pg.sql t/data/spam/004
Author: mmartinec
Date: Tue Nov 24 16:32:51 2009
New Revision: 883770
URL: http://svn.apache.org/viewvc?rev=883770&view=rev
Log:
Bug 6203: make AWL CIDR mask configurable: auto_whitelist_ipv4_mask_len
and auto_whitelist_ipv6_mask_len; update README.awl and sql/awl_*.sql
accordingly (increasing awl.ip field width); 'fix' the t/data/spam/004
sample mail to avoid a test failing with a /24 net mask;
avoid race condition in SQLBasedAddrList.pm when multiple processes
try to insert-or-update an awl SQL record: try INSERT first, and if
that fails go for UPDATE.
Modified:
spamassassin/trunk/lib/Mail/SpamAssassin/AutoWhitelist.pm
spamassassin/trunk/lib/Mail/SpamAssassin/Plugin/AWL.pm
spamassassin/trunk/lib/Mail/SpamAssassin/SQLBasedAddrList.pm
spamassassin/trunk/sql/README.awl
spamassassin/trunk/sql/awl_mysql.sql
spamassassin/trunk/sql/awl_pg.sql
spamassassin/trunk/t/data/spam/004
Modified: spamassassin/trunk/lib/Mail/SpamAssassin/AutoWhitelist.pm
URL: http://svn.apache.org/viewvc/spamassassin/trunk/lib/Mail/SpamAssassin/AutoWhitelist.pm?rev=883770&r1=883769&r2=883770&view=diff
==============================================================================
--- spamassassin/trunk/lib/Mail/SpamAssassin/AutoWhitelist.pm (original)
+++ spamassassin/trunk/lib/Mail/SpamAssassin/AutoWhitelist.pm Tue Nov 24 16:32:51 2009
@@ -45,7 +45,7 @@
use warnings;
use bytes;
use re 'taint';
-use NetAddr::IP;
+use NetAddr::IP qw(:upper); # ensure IPv6 string case even if default changes
use Mail::SpamAssassin;
use Mail::SpamAssassin::Logger;
@@ -64,18 +64,20 @@
$class = ref($class) || $class;
my ($main, $msg) = @_;
+ my $conf = $main->{conf};
my $self = {
- 'main' => $main,
+ main => $main,
+ factor => $conf->{auto_whitelist_factor},
+ ipv4_mask_len => $conf->{auto_whitelist_ipv4_mask_len},
+ ipv6_mask_len => $conf->{auto_whitelist_ipv6_mask_len},
};
- $self->{factor} = $main->{conf}->{auto_whitelist_factor};
-
my $factory;
if ($main->{pers_addr_list_factory}) {
$factory = $main->{pers_addr_list_factory};
}
else {
- my $type = $main->{conf}->{auto_whitelist_factory};
+ my $type = $conf->{auto_whitelist_factory};
if ($type =~ /^([_A-Za-z0-9:]+)$/) {
$type = untaint_var($type);
eval '
@@ -278,39 +280,68 @@
###########################################################################
-sub pack_addr {
- my ($self, $addr, $origip) = @_;
-
- $addr = lc $addr;
- $addr =~ s/[\000\;\'\"\!\|]/_/gs; # paranoia
+sub ip_to_awl_key {
+ my ($self, $origip) = @_;
+ my $result;
local $1;
if (!defined $origip) {
- # could not find an IP address to use, could be localhost mail or from
- # the user running "add-addr-to-*".
- $origip = 'none';
+ # could not find an IP address to use
} elsif ($origip =~ /^ (\d{1,3} \. \d{1,3}) \. \d{1,3} \. \d{1,3} $/xs) {
- $origip = $1;
- } elsif ($origip =~ /:/ &&
+ my $mask_len = $self->{ipv4_mask_len};
+ $mask_len = 16 if !defined $mask_len;
+ # handle the default and easy cases manually
+ if ($mask_len == 32) {
+ $result = $origip;
+ } elsif ($mask_len == 16) {
+ $result = $1;
+ } else {
+ my $origip_obj = NetAddr::IP->new($origip . '/' . $mask_len);
+ if (!defined $origip_obj) { # invalid IPv4 address
+ dbg("auto-whitelist: bad IPv4 address $origip");
+ } else {
+ $result = $origip_obj->network->addr;
+ $result =~s/(\.0){1,3}\z//; # truncate zero tail
+ }
+ }
+ } elsif ($origip =~ /:/ && # triage
$origip =~
/^ [0-9a-f]{0,4} (?: : [0-9a-f]{0,4} | \. [0-9]{1,3} ){2,9} $/xsi) {
# looks like an IPv6 address
- my $origip_obj = NetAddr::IP->new6($origip);
+ my $mask_len = $self->{ipv6_mask_len};
+ $mask_len = 48 if !defined $mask_len;
+ my $origip_obj = NetAddr::IP->new6($origip . '/' . $mask_len);
if (!defined $origip_obj) { # invalid IPv6 address
dbg("auto-whitelist: bad IPv6 address $origip");
- $origip = 'junk-' . $origip;
} else {
- $origip = $origip_obj->full6; # string in a canonical form
- $origip =~ s/(:[0-9a-f]{4}){5}\z//si; # keep only the /48 network addr
- $origip .= '::'; # although it wastes 2 chars, it's nice to make it
- # look like a syntactically correct IPv6 address
+ $result = $origip_obj->network->full6; # string in a canonical form
+ $result =~ s/(:0000){1,7}\z/::/; # compress zero tail
}
} else {
dbg("auto-whitelist: bad IP address $origip");
- $origip =~ s/[^0-9A-Fa-f:.]/_/gs; # paranoia
- $origip = 'junk-' . $origip;
}
- $origip = substr($origip,0,16) if length($origip) > 16; # awl.ip field
+ if (defined $result && length($result) > 39) { # just in case, keep under
+ $result = substr($result,0,39); # the awl.ip field size
+ }
+ dbg("auto-whitelist: IP masking %s -> %s", $origip,$result);
+ return $result;
+}
+
+###########################################################################
+
+sub pack_addr {
+ my ($self, $addr, $origip) = @_;
+
+ $addr = lc $addr;
+ $addr =~ s/[\000\;\'\"\!\|]/_/gs; # paranoia
+
+ if (!defined $origip) {
+ # could not find an IP address to use, could be localhost mail
+ # or from the user running "add-addr-to-*".
+ $origip = 'none';
+ } else {
+ $origip = $self->ip_to_awl_key($origip);
+ }
return $addr . "|ip=" . $origip;
}
Modified: spamassassin/trunk/lib/Mail/SpamAssassin/Plugin/AWL.pm
URL: http://svn.apache.org/viewvc/spamassassin/trunk/lib/Mail/SpamAssassin/Plugin/AWL.pm?rev=883770&r1=883769&r2=883770&view=diff
==============================================================================
--- spamassassin/trunk/lib/Mail/SpamAssassin/Plugin/AWL.pm (original)
+++ spamassassin/trunk/lib/Mail/SpamAssassin/Plugin/AWL.pm Tue Nov 24 16:32:51 2009
@@ -148,6 +148,67 @@
type => $Mail::SpamAssassin::Conf::CONF_TYPE_NUMERIC
});
+=item auto_whitelist_ipv4_mask_len n (default: 16, range [0..32])
+
+The AWL database keeps only the specified number of most-significant bits
+of an IPv4 address in its fields, so that different individual IP addresses
+within a subnet belonging to the same owner are managed under a single
+database record. As we have no information available on the allocated
+address ranges of senders, this CIDR mask length is only an approximation.
+The default is 16 bits, corresponding to a former class B. Increase the
+number if a finer granularity is desired, e.g. to 24 (class C) or 32.
+A value 0 is allowed but is not particularly useful, as it would treat the
+whole internet as a single organization. The number need not be a multiple
+of 8, any split is allowed.
+
+=cut
+
+ push (@cmds, {
+ setting => 'auto_whitelist_ipv4_mask_len',
+ default => 16,
+ type => $Mail::SpamAssassin::Conf::CONF_TYPE_NUMERIC,
+ code => sub {
+ my ($self, $key, $value, $line) = @_;
+ if (!defined $value || $value eq '') {
+ return $Mail::SpamAssassin::Conf::MISSING_REQUIRED_VALUE;
+ } elsif ($value !~ /^\d+$/ || $value < 0 || $value > 32) {
+ return $Mail::SpamAssassin::Conf::INVALID_VALUE;
+ }
+ $self->{auto_whitelist_ipv4_mask_len} = $value;
+ }
+ });
+
+=item auto_whitelist_ipv6_mask_len n (default: 48, range [0..128])
+
+The AWL database keeps only the specified number of most-significant bits
+of an IPv6 address in its fields, so that different individual IP addresses
+within a subnet belonging to the same owner are managed under a single
+database record. As we have no information available on the allocated address
+ranges of senders, this CIDR mask length is only an approximation. The default
+is 48 bits, corresponding to an address range commonly allocated to individual
+(smaller) organizations. Increase the number for a finer granularity, e.g.
+to 64 or 96 or 128, or decrease for wider ranges, e.g. 32. A value 0 is
+allowed but is not particularly useful, as it would treat the whole internet
+as a single organization. The number need not be a multiple of 4, any split
+is allowed.
+
+=cut
+
+ push (@cmds, {
+ setting => 'auto_whitelist_ipv6_mask_len',
+ default => 48,
+ type => $Mail::SpamAssassin::Conf::CONF_TYPE_NUMERIC,
+ code => sub {
+ my ($self, $key, $value, $line) = @_;
+ if (!defined $value || $value eq '') {
+ return $Mail::SpamAssassin::Conf::MISSING_REQUIRED_VALUE;
+ } elsif ($value !~ /^\d+$/ || $value < 0 || $value > 128) {
+ return $Mail::SpamAssassin::Conf::INVALID_VALUE;
+ }
+ $self->{auto_whitelist_ipv6_mask_len} = $value;
+ }
+ });
+
=item user_awl_sql_override_username
Used by the SQLBasedAddrList storage implementation.
Modified: spamassassin/trunk/lib/Mail/SpamAssassin/SQLBasedAddrList.pm
URL: http://svn.apache.org/viewvc/spamassassin/trunk/lib/Mail/SpamAssassin/SQLBasedAddrList.pm?rev=883770&r1=883769&r2=883770&view=diff
==============================================================================
--- spamassassin/trunk/lib/Mail/SpamAssassin/SQLBasedAddrList.pm (original)
+++ spamassassin/trunk/lib/Mail/SpamAssassin/SQLBasedAddrList.pm Tue Nov 24 16:32:51 2009
@@ -136,7 +136,8 @@
my $dbh = DBI->connect($dsn, $dbuser, $dbpass, {'PrintError' => 0});
if(!$dbh) {
- dbg("auto-whitelist: sql-based unable to connect to database ($dsn) : " . DBI::errstr);
+ info("auto-whitelist: sql-based unable to connect to database (%s) : %s",
+ $dsn, DBI::errstr);
return undef;
}
@@ -203,26 +204,28 @@
my $sql = "SELECT count, totscore FROM $self->{tablename} " .
"WHERE username = ? AND email = ?";
- my @args = ($email);
- if ($self->{_with_awl_signer}) {
- my @signedby = !defined $signedby ? '' : split(' ', lc $signedby);
- if (@signedby == 1) {
+ my @args = ( $email );
+ if (!$self->{_with_awl_signer}) {
+ $sql .= " AND ip = ?";
+ push(@args, $ip);
+ } else {
+ my @signedby = !defined $signedby ? () : split(' ', lc $signedby);
+ if (!@signedby) {
+ $sql .= " AND signedby = '' AND ip = ?";
+ push(@args, $ip);
+ } elsif (@signedby == 1) {
$sql .= " AND signedby = ?";
} elsif (@signedby > 1) {
$sql .= " AND signedby IN (" . join(',', ('?') x @signedby) . ")";
}
push(@args, @signedby);
}
- if (!$self->{_with_awl_signer} || !defined $signedby || $signedby eq '') {
- $sql .= " AND ip = ?";
- push(@args, $ip);
- }
my $sth = $self->{dbh}->prepare($sql);
my $rc = $sth->execute($self->{_username}, @args);
if (!$rc) { # there was an error, but try to go on
- my $err = $self->{dbh}->errstr;
- dbg("auto-whitelist: sql-based get_addr_entry: SQL error: $err");
+ info("auto-whitelist: sql-based get_addr_entry %s: SQL error: %s",
+ join('|',@args), $sth->errstr);
$entry->{count} = 0;
$entry->{totscore} = 0;
}
@@ -240,7 +243,7 @@
$cnt++;
}
dbg("auto-whitelist: sql-based get_addr_entry: %s for %s",
- $cnt ? "found $cnt entry" : 'no entries found',
+ $cnt ? "found $cnt entries" : 'no entries found',
join('|',@args) );
}
$sth->finish();
@@ -279,62 +282,81 @@
return $entry unless $email ne '' && (defined $ip || defined $signedby);
- if ($entry->{exists_p}) { # entry already exists, so just update
- my(@args) = ($score, $self->{_username}, $email);
+ # try inserting first, and if that fails we'll do the update; this way
+ # we avoid to large extent a race condition between multiple processes
+
+ my $inserted = 0;
+
+ { my @fields = qw(username email ip count totscore);
+ my @signedby;
+ if ($self->{_with_awl_signer}) {
+ push(@fields, 'signedby');
+ @signedby = !defined $signedby ? () : split(' ', lc $signedby);
+ @signedby = ( '' ) if !@signedby;
+ }
+ my @args = ($self->{_username}, $email, $ip, 1, $score);
+ my $sql = sprintf("INSERT INTO %s (%s) VALUES (%s)", $self->{tablename},
+ join(',', @fields), join(',', ('?') x @fields));
+ my $sth = $self->{dbh}->prepare($sql);
+
+ if (!$self->{_with_awl_signer}) {
+ my $rc = $sth->execute(@args);
+ if (!$rc) {
+ dbg("auto-whitelist: sql-based add_score/insert %s: SQL error: %s",
+ join('|',@args), $sth->errstr);
+ } else {
+ dbg("auto-whitelist: sql-based add_score/insert ".
+ "score %s: %s", $score, join('|',@args));
+ $inserted = 1; $entry->{exists_p} = 1;
+ }
+ } else {
+ for my $s (@signedby) {
+ my $rc = $sth->execute(@args, $s);
+ if (!$rc) {
+ dbg("auto-whitelist: sql-based add_score/insert %s: SQL error: %s",
+ join('|',@args,$s), $sth->errstr);
+ } else {
+ dbg("auto-whitelist: sql-based add_score/insert ".
+ "score %s: %s", $score, join('|',@args,$s));
+ $inserted = 1; $entry->{exists_p} = 1;
+ }
+ }
+ }
+ }
+
+ if (!$inserted) {
+ # insert failed, assume primary key constraint, so try the update
+
my $sql = "UPDATE $self->{tablename} ".
"SET count = count + 1, totscore = totscore + ? ".
"WHERE username = ? AND email = ?";
- my @signedby;
+ my(@args) = ($score, $self->{_username}, $email);
if ($self->{_with_awl_signer}) {
- @signedby = !defined $signedby ? '' : split(' ', lc $signedby);
- if (@signedby == 1) {
+ my @signedby = !defined $signedby ? () : split(' ', lc $signedby);
+ if (!@signedby) {
+ $sql .= " AND signedby = ''";
+ } elsif (@signedby == 1) {
$sql .= " AND signedby = ?";
} elsif (@signedby > 1) {
$sql .= " AND signedby IN (" . join(',', ('?') x @signedby) . ")";
}
push(@args, @signedby);
}
- if (!$self->{_with_awl_signer} || !defined $signedby || $signedby eq '') {
- $sql .= " AND ip = ?";
- push(@args, $ip);
- }
+ $sql .= " AND ip = ?";
+ push(@args, $ip);
+
my $sth = $self->{dbh}->prepare($sql);
my $rc = $sth->execute(@args);
if (!$rc) {
- my $err = $self->{dbh}->errstr;
- dbg("auto-whitelist: sql-based add_score/update: SQL error: $err");
- }
- else {
- dbg("auto-whitelist: sql-based add_score: ".
+ info("auto-whitelist: sql-based add_score/update %s: SQL error: %s",
+ join('|',@args), $sth->errstr);
+ } else {
+ dbg("auto-whitelist: sql-based add_score/update ".
"new count: %s, new totscore: %s for %s",
- $entry->{count}, $entry->{totscore}, join('|',@args[2..$#args]));
- }
- $sth->finish();
- }
- else { # no entry yet, so insert a new entry
- my @fields = qw(username email ip count totscore);
- my @args = ($self->{_username}, $email, $ip, 1, $score);
- my @signedby;
- if ($self->{_with_awl_signer}) {
- push(@fields, 'signedby');
- @signedby = !defined $signedby ? '' : split(' ', lc $signedby);
- }
- my $sql = sprintf("INSERT INTO %s (%s) VALUES (%s)", $self->{tablename},
- join(',', @fields), join(',', ('?') x @fields));
- my $sth = $self->{dbh}->prepare($sql);
- for my $s (@signedby) {
- # loop runs for at least one element, possibly an empty signing domain
- my $rc = $sth->execute(@args, (@fields > @args ? $s : ()) );
- if (!$rc) {
- my $err = $self->{dbh}->errstr;
- dbg("auto-whitelist: sql-based add_score/insert: SQL error: $err");
- }
+ $entry->{count}, $entry->{totscore}, join('|',@args));
+ $entry->{exists_p} = 1;
}
- $entry->{exists_p} = 1;
- dbg("auto-whitelist: sql-based add_score: created new entry ".
- "with totscore %s: %s", $score, join('|',@args[2..$#args]));
- $sth->finish();
}
return $entry;
@@ -387,8 +409,8 @@
my $rc = $sth->execute(@args);
if (!$rc) {
- my $err = $self->{dbh}->errstr;
- dbg("auto-whitelist: sql-based remove_entry: SQL error: $err");
+ info("auto-whitelist: sql-based remove_entry %s: SQL error: %s",
+ join('|',@args), $sth->errstr);
}
else {
# We might normally have a dbg saying we removed the address
Modified: spamassassin/trunk/sql/README.awl
URL: http://svn.apache.org/viewvc/spamassassin/trunk/sql/README.awl?rev=883770&r1=883769&r2=883770&view=diff
==============================================================================
--- spamassassin/trunk/sql/README.awl (original)
+++ spamassassin/trunk/sql/README.awl Tue Nov 24 16:32:51 2009
@@ -74,7 +74,7 @@
username varchar(100) # this is the username whose e-mail is being filtered
email varchar(200) # this is the address key
- ip varchar(16) # this is the ip key (fits IPv4/24 or IPv6/48)
+ ip varchar(40) # this is the ip key (fits IPv4 IPv6)
count int(11) # this is the message counter
totscore float # this is the total calculated score
signedby varchar(255) # a DKIM or DomainKeys signing domain(s)
@@ -108,7 +108,7 @@
CREATE TABLE awl (
username varchar(100) NOT NULL default '',
email varchar(255) NOT NULL default '',
- ip varchar(16) NOT NULL default '',
+ ip varchar(40) NOT NULL default '',
count int(11) NOT NULL default '0',
totscore float NOT NULL default '0',
signedby varchar(255) NOT NULL default '',
@@ -137,11 +137,11 @@
auto_whitelist_distinguish_signed 1
To extend a field awl.ip on an existing table to be able to fit
-an IPv6 addresses (its /48 network part) or an IPv4 address (/24):
+an IPv6 addresses (39 characters would suffice) or an IPv4 address:
under MySQL:
- ALTER TABLE awl MODIFY ip varchar(16);
+ ALTER TABLE awl MODIFY ip varchar(40);
under PostgreSQL:
- ALTER TABLE awl ALTER ip TYPE varchar(16);
+ ALTER TABLE awl ALTER ip TYPE varchar(40);
Once you have created the database and added the table, just add the
Modified: spamassassin/trunk/sql/awl_mysql.sql
URL: http://svn.apache.org/viewvc/spamassassin/trunk/sql/awl_mysql.sql?rev=883770&r1=883769&r2=883770&view=diff
==============================================================================
--- spamassassin/trunk/sql/awl_mysql.sql (original)
+++ spamassassin/trunk/sql/awl_mysql.sql Tue Nov 24 16:32:51 2009
@@ -1,7 +1,7 @@
CREATE TABLE awl (
username varchar(100) NOT NULL default '',
email varchar(255) NOT NULL default '',
- ip varchar(16) NOT NULL default '',
+ ip varchar(40) NOT NULL default '',
count int(11) NOT NULL default '0',
totscore float NOT NULL default '0',
signedby varchar(255) NOT NULL default '',
Modified: spamassassin/trunk/sql/awl_pg.sql
URL: http://svn.apache.org/viewvc/spamassassin/trunk/sql/awl_pg.sql?rev=883770&r1=883769&r2=883770&view=diff
==============================================================================
--- spamassassin/trunk/sql/awl_pg.sql (original)
+++ spamassassin/trunk/sql/awl_pg.sql Tue Nov 24 16:32:51 2009
@@ -1,7 +1,7 @@
CREATE TABLE awl (
username varchar(100) NOT NULL default '',
email varchar(255) NOT NULL default '',
- ip varchar(16) NOT NULL default '',
+ ip varchar(40) NOT NULL default '',
count bigint NOT NULL default '0',
totscore float NOT NULL default '0',
signedby varchar(255) NOT NULL default '',
Modified: spamassassin/trunk/t/data/spam/004
URL: http://svn.apache.org/viewvc/spamassassin/trunk/t/data/spam/004?rev=883770&r1=883769&r2=883770&view=diff
==============================================================================
--- spamassassin/trunk/t/data/spam/004 (original)
+++ spamassassin/trunk/t/data/spam/004 Tue Nov 24 16:32:51 2009
@@ -16,7 +16,7 @@
Received: from ns.sakakura-kk.co.jp (ns.sakakura-kk.co.jp [61.119.13.18])
by mail.netnoteinc.com (Postfix) with ESMTP id 4FD6F1143D6 for
<jm...@netnoteinc.com>; Thu, 6 Dec 2001 23:58:02 +0000 (Eire)
-Received: from plain (ZHONGXIN [212.17.88.134]) by ns.sakakura-kk.co.jp
+Received: from plain (ZHONGXIN [212.17.35.134]) by ns.sakakura-kk.co.jp
with SMTP (Microsoft Exchange Internet Mail Service Version 5.5.2653.13)
id YJQP6DKP; Fri, 7 Dec 2001 08:15:01 +0900
Received: from 144.137.3.98 (SquirrelMail authenticated user jmmail) by