You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spamassassin.apache.org by mm...@apache.org on 2010/06/29 01:29:03 UTC
svn commit: r958790 - in /spamassassin/trunk/lib/Mail/SpamAssassin: Conf.pm
Plugin/WLBLEval.pm
Author: mmartinec
Date: Mon Jun 28 23:29:02 2010
New Revision: 958790
URL: http://svn.apache.org/viewvc?rev=958790&view=rev
Log:
Bug 6458 - add enlist_uri_host and delist_uri_host conf directives,
allowing for arbitrarily named URI lists, each associated with
its own scoring rule
Modified:
spamassassin/trunk/lib/Mail/SpamAssassin/Conf.pm
spamassassin/trunk/lib/Mail/SpamAssassin/Plugin/WLBLEval.pm
Modified: spamassassin/trunk/lib/Mail/SpamAssassin/Conf.pm
URL: http://svn.apache.org/viewvc/spamassassin/trunk/lib/Mail/SpamAssassin/Conf.pm?rev=958790&r1=958789&r2=958790&view=diff
==============================================================================
--- spamassassin/trunk/lib/Mail/SpamAssassin/Conf.pm (original)
+++ spamassassin/trunk/lib/Mail/SpamAssassin/Conf.pm Mon Jun 28 23:29:02 2010
@@ -642,74 +642,135 @@ e.g.
});
-=item blacklist_uri_host host-or-domain ...
+=item enlist_uri_host (listname) host ...
+
+Adds one or more host names or domain names to a named list of URI domains.
+The named list can then be consulted through a check_uri_host_in_wblist()
+eval rule, which takes the list name as an argument. Parenthesis around
+a list name are literal - a required syntax.
+
+Host names may optionally be prefixed by an exclamantion mark '!', which
+produces false as a result if this entry matches. This makes it easier
+to exclude some subdomains when their superdomain is listed, for example:
+
+ enlist_uri_host (MYLIST) !sub1.example.com !sub2.example.com example.com
-Adds one or more host names to a list of blacklisted URI domains.
+No wildcards are supported, but subdomains do match implicitly. Lists
+are independent. Search for each named list starts by looking up the
+full hostname first, then leading fields are progressively stripped off
+(e.g.: sub.example.com, example.com, com) until a match is found or we run
+out of fields. The first matching entry (the most specific) determines if
+a lookup yielded a true (no '!' prefix) or a false ('!'-prefixed) result.
-No wildcards are supported, but subdomains do match implicitly. There is
-only one combined list for black- and whitelisting of host names in URIs.
-Search starts by looking up the full hostname first, then leading fields
-are progressively stripped off (e.g.: sub.example.com, example.com, com)
-until a match is found or we run out of fields. The first matching entry
-(the most specific) determines if a lookup yielded a blacklisted or a
-whitelisted result.
-
-If an URL contains an IP address in place of a host name, the
-black- (or white-) list must specify the exact same IP address.
-
-A domain cannot be both blacklisted and whitelisted at the same time, the
-last directive prevails. Use the unlist_uri_host directive to neutralize
-previous blacklist_uri_host and whitelist_uri_host settings.
+If an URL found in a message contains an IP address in place of a host name,
+the given list must specify the exact same IP address (instead of a host name)
+in order to match.
+
+Use the delist_uri_host directive to neutralize previous enlist_uri_host
+settings. Listnames 'BLACK' and 'WHITE' have their shorthand directives
+blacklist_uri_host and whitelist_uri_host and default rules, but are
+otherwise not special or reserved.
=cut
push (@cmds, {
- command => 'blacklist_uri_host',
- setting => 'wblist_uri_host',
+ command => 'enlist_uri_host',
+ setting => 'uri_host_lists',
type => $CONF_TYPE_ADDRLIST,
code => sub {
my($conf, $key, $value, $line) = @_;
- my $listref = $conf->{wblist_uri_host};
- $conf->{wblist_uri_host} = $listref = {} if !$listref;
- $listref->{$_} = +1 for split(' ', lc $value);
+ local($1,$2);
+ if ($value !~ /^ \( (.*?) \) \s+ (.*) \z/sx) {
+ return $MISSING_REQUIRED_VALUE;
+ }
+ my $listname = $1; # corresponds to arg in check_uri_host_in_wblist()
+ # note: must not factor out dereferencing, as otherwise
+ # subhashes would spring up in a copy and be lost
+ foreach my $host ( split(' ', lc $2) ) {
+ my $v = $host =~ s/^!// ? 0 : 1;
+ $conf->{uri_host_lists}{$listname}{$host} = $v;
+ }
}
});
-=item whitelist_uri_host host-or-domain ...
+=item delist_uri_host [ (listname) ] host ...
-Adds one or more host names to a list of whitelisted URI domains.
-See blacklist_uri_host directive for details.
+Removes one or more specified host names from a named list of URI domains.
+Removing an unlisted name is ignored (is not an error). Listname is optional,
+if specified then just the named list is affected, otherwise hosts are
+removed from all URI host lists created so far. Parenthesis around a list
+name are a required syntax.
+
+Note that directives in configuration files are processed in sequence,
+the delist_uri_host only applies to previously listed entries and has
+no effect on enlisted entries in yet-to-be-processed directives.
+
+For convenience (similarity to the enlist_uri_host directive) hostnames
+may be prefixed by a an exclamation mark, which is stripped off from each
+name and has no meaning here.
=cut
push (@cmds, {
- command => 'whitelist_uri_host',
- setting => 'wblist_uri_host',
+ command => 'delist_uri_host',
+ setting => 'uri_host_lists',
+ type => $CONF_TYPE_ADDRLIST,
+ code => sub {
+ my($conf, $key, $value, $line) = @_;
+ local($1,$2);
+ if ($value !~ /^ (?: \( (.*?) \) \s+ )? (.*) \z/sx) {
+ return $MISSING_REQUIRED_VALUE;
+ }
+ my @listnames = defined $1 ? $1 : keys %{$conf->{uri_host_lists}};
+ my @args = split(' ', lc $2);
+ foreach my $listname (@listnames) {
+ foreach my $host (@args) {
+ my $v = $host =~ s/^!// ? 0 : 1;
+ delete $conf->{uri_host_lists}{$listname}{$host};
+ }
+ }
+ }
+ });
+
+=item blacklist_uri_host host-or-domain ...
+
+Is a shorthand for a directive: enlist_uri_host (BLACK) host ...
+
+Please see directives enlist_uri_host and delist_uri_host for details.
+
+=cut
+
+ push (@cmds, {
+ command => 'blacklist_uri_host',
+ setting => 'uri_host_lists',
type => $CONF_TYPE_ADDRLIST,
code => sub {
my($conf, $key, $value, $line) = @_;
- my $listref = $conf->{wblist_uri_host};
- $conf->{wblist_uri_host} = $listref = {} if !$listref;
- $listref->{$_} = -1 for split(' ', lc $value);
+ foreach my $host ( split(' ', lc $value) ) {
+ my $v = $host =~ s/^!// ? 0 : 1;
+ $conf->{uri_host_lists}{'BLACK'}{$host} = $v;
+ }
}
});
-=item unlist_uri_host host-or-domain ...
+=item whitelist_uri_host host-or-domain ...
-Removes one or more specified host names from a list of black- or whitelisted
-URI domains. Removing an unlisted name is ignored (is not an error).
+Is a shorthand for a directive: enlist_uri_host (BLACK) host ...
+
+Please see directives enlist_uri_host and delist_uri_host for details.
=cut
push (@cmds, {
- command => 'unlist_uri_host',
- setting => 'wblist_uri_host',
+ command => 'whitelist_uri_host',
+ setting => 'uri_host_lists',
type => $CONF_TYPE_ADDRLIST,
code => sub {
my($conf, $key, $value, $line) = @_;
- my $listref = $conf->{wblist_uri_host};
- $conf->{wblist_uri_host} = $listref = {} if !$listref;
- delete $listref->{$_} for split(' ', lc $value);
+ foreach my $host ( split(' ', lc $value) ) {
+ my $v = $host =~ s/^!// ? 0 : 1;
+ $conf->{uri_host_lists}{'WHITE'}{$host} = $v;
+ }
}
});
@@ -4171,6 +4232,8 @@ sub clone {
$dest = $self;
}
+ my %done;
+
# keys that should not be copied in ->clone().
# bug 4179: include want_rebuild_for_type, so that if a user rule
# is defined, its method will be recompiled for future scans in
@@ -4180,22 +4243,30 @@ sub clone {
scoreset scores want_rebuild_for_type
);
+ # special cases. first, skip anything that cannot be changed
+ # by users, and the stuff we take care of here
+ foreach my $var (@NON_COPIED_KEYS) {
+ $done{$var} = undef;
+ }
+
# keys that should can be copied using a ->clone() method, in ->clone()
my @CLONABLE_KEYS = qw(
internal_networks trusted_networks msa_networks
);
- my %done;
-
- # special cases. first, skip anything that cannot be changed
- # by users, and the stuff we take care of here
foreach my $key (@CLONABLE_KEYS) {
$dest->{$key} = $source->{$key}->clone();
$done{$key} = undef;
}
- foreach my $var (@NON_COPIED_KEYS) {
- $done{$var} = undef;
+ # two-level hash(es)
+ foreach my $key ('uri_host_lists') {
+ my $v = $source->{$key};
+ my $dest_key_ref = $dest->{$key} = {}; # must start from scratch!
+ while(my($k2,$v2) = each %{$v}) {
+ %{$dest_key_ref->{$k2}} = %{$v2};
+ }
+ $done{$key} = undef;
}
# bug 4179: be smarter about cloning the rule-type structures;
@@ -4309,7 +4380,7 @@ sub sa_die { Mail::SpamAssassin::sa_die(
sub feature_originating_ip_headers { 1 }
sub feature_dns_local_ports_permit_avoid { 1 }
sub feature_bayes_auto_learn_on_error { 1 }
-sub feature_uri_host_wblist { 1 }
+sub feature_uri_host_listed { 1 }
###########################################################################
Modified: spamassassin/trunk/lib/Mail/SpamAssassin/Plugin/WLBLEval.pm
URL: http://svn.apache.org/viewvc/spamassassin/trunk/lib/Mail/SpamAssassin/Plugin/WLBLEval.pm?rev=958790&r1=958789&r2=958790&view=diff
==============================================================================
--- spamassassin/trunk/lib/Mail/SpamAssassin/Plugin/WLBLEval.pm (original)
+++ spamassassin/trunk/lib/Mail/SpamAssassin/Plugin/WLBLEval.pm Mon Jun 28 23:29:02 2010
@@ -51,8 +51,7 @@ sub new {
$self->register_eval_rule("check_from_in_default_whitelist");
$self->register_eval_rule("check_forged_in_default_whitelist");
$self->register_eval_rule("check_mailfrom_matches_rcvd");
- $self->register_eval_rule("check_uri_host_in_blacklist");
- $self->register_eval_rule("check_uri_host_in_whitelist");
+ $self->register_eval_rule("check_uri_host_listed");
return $self;
}
@@ -361,94 +360,94 @@ sub _check_whitelist {
sub check_uri_host_in_blacklist {
my ($self, $pms) = @_;
- my $conf = $self->{main}{conf};
- my($host_bl, $host_wl) =
- $self->_check_uri_wblist($pms, $conf->{wblist_uri_host});
- if (defined $host_bl) {
- dbg("rules: uri host blacklisted: $host_bl");
- $pms->test_log("URI: $host_bl");
- return 1;
- }
- return 0;
+ $self->check_uri_host_listed($pms, 'BLACK');
}
sub check_uri_host_in_whitelist {
my ($self, $pms) = @_;
- my $conf = $self->{main}{conf};
- my($host_bl, $host_wl) =
- $self->_check_uri_wblist($pms, $conf->{wblist_uri_host});
- if (defined $host_wl) {
- dbg("rules: uri host whitelisted: $host_wl");
- $pms->test_log("URI: $host_wl");
- return 1;
+ $self->check_uri_host_listed($pms, 'WHITE');
+}
+
+sub check_uri_host_listed {
+ my ($self, $pms, $subname) = @_;
+ my $host_enlisted_ref = $self->_check_uri_host_listed($pms);
+ if ($host_enlisted_ref) {
+ my $matched_host = $host_enlisted_ref->{$subname};
+ if ($matched_host) {
+ dbg("rules: uri host enlisted (%s): %s", $subname, $matched_host);
+ $pms->test_log("URI: $matched_host");
+ return 1;
+ }
}
return 0;
}
-sub _check_uri_wblist {
- my ($self, $pms, $wb_hashref) = @_;
+sub _check_uri_host_listed {
+ my ($self, $pms) = @_;
- if ($pms->{'uri_wblisted'}) {
- # just provide a cached result
- } elsif (!$wb_hashref || !%$wb_hashref) {
- $pms->{'uri_wblisted'} = [ undef, undef ];
- } else {
- my $host_blacklisted;
- my $host_whitelisted;
- $wb_hashref = {} if !$wb_hashref;
- if (would_log("dbg","rules")) {
- dbg("rules: check_uri_wblist: %s",
- join(', ', map { $_.'='.$wb_hashref->{$_} } sort keys %$wb_hashref));
- }
- # obtain a full list of html-parsed domains
- my $uris = $pms->get_uri_detail_list();
- my %seen;
- while (my($uri,$info) = each %$uris) {
- next if $uri =~ /^mailto:/i; # we may want to skip mailto: uris (?)
- while (my($host,$domain) = each( %{$info->{hosts}} )) { # typically one
- next if $seen{$host};
- $seen{$host} = 1;
- local($1,$2);
- my @query_keys;
- if ($host =~ /^\[(.*)\]\z/) { # looks like an address literal
- @query_keys = ( $1 );
- } elsif ($host =~ /^\d+\.\d+\.\d+\.\d+\z/) { # IPv4 address
- @query_keys = ( $host );
- } elsif ($host ne '') {
- my($h) = $host;
- for (;;) {
- push(@query_keys, $h); # sub.example.com, example.com, com
- last if $h !~ s{^([^.]*)\.(.*)\z}{$2}s;
- }
- if (@query_keys > 10) { # sanity limit, keep the tail
- @query_keys = @query_keys[$#query_keys-9 .. $#query_keys];
- }
+ if ($pms->{'uri_host_enlisted'}) {
+ return $pms->{'uri_host_enlisted'}; # just provide a cached result
+ }
+
+ my $uri_lists_href = $self->{main}{conf}{uri_host_lists};
+ if (!$uri_lists_href || !%$uri_lists_href) {
+ $pms->{'uri_host_enlisted'} = {}; # no URI host lists
+ return $pms->{'uri_host_enlisted'};
+ }
+
+ my %host_enlisted;
+ my @uri_listnames = sort keys %$uri_lists_href;
+ if (would_log("dbg","rules")) {
+ foreach my $nm (@uri_listnames) {
+ dbg("rules: check_uri_host_listed: (%s) %s",
+ $nm, join(', ', map { $uri_lists_href->{$nm}{$_} ? $_ : '!'.$_ }
+ sort keys %{$uri_lists_href->{$nm}}));
+ }
+ }
+ # obtain a complete list of html-parsed domains
+ my $uris = $pms->get_uri_detail_list();
+ my %seen;
+ while (my($uri,$info) = each %$uris) {
+ next if $uri =~ /^mailto:/i; # we may want to skip mailto: uris (?)
+ while (my($host,$domain) = each( %{$info->{hosts}} )) { # typically one
+ next if $seen{$host};
+ $seen{$host} = 1;
+ local($1,$2);
+ my @query_keys;
+ if ($host =~ /^\[(.*)\]\z/) { # looks like an address literal
+ @query_keys = ( $1 );
+ } elsif ($host =~ /^\d+\.\d+\.\d+\.\d+\z/) { # IPv4 address
+ @query_keys = ( $host );
+ } elsif ($host ne '') {
+ my($h) = $host;
+ for (;;) {
+ shift @query_keys if @query_keys > 10; # sanity limit, keep tail
+ push(@query_keys, $h); # sub.example.com, example.com, com
+ last if $h !~ s{^([^.]*)\.(.*)\z}{$2}s;
}
- my $wb_verdict; # positive=blacklisted; negative=whitelisted
+ }
+ foreach my $nm (@uri_listnames) {
my $match;
- for my $q (@query_keys) {
- $wb_verdict = $wb_hashref->{$q};
- if ($wb_verdict) { $match = $q; last }
+ my $verdict;
+ my $hash_nm_ref = $uri_lists_href->{$nm};
+ foreach my $q (@query_keys) {
+ $verdict = $hash_nm_ref->{$q};
+ if (defined $verdict) {
+ $match = $q eq $host ? $host : "$host ($q)";
+ $match = '!' if !$verdict;
+ last;
+ }
}
- if (!$wb_verdict) {
- # dbg("rules: check_uri_wblist %s, NO MATCH for %s, search: %s",
- # $uri, $host, join(', ',@query_keys));
- } elsif ($wb_verdict > 0) {
- $host_blacklisted = $host;
- $host_blacklisted .= " ($match)" if $match ne $host;
- dbg("rules: check_uri_wblist %s, BLACK: %s, search: %s",
- $uri, $host_blacklisted, join(', ',@query_keys));
- } elsif ($wb_verdict < 0) {
- $host_whitelisted = $host;
- $host_whitelisted .= " ($match)" if $match ne $host;
- dbg("rules: check_uri_wblist %s, WHITE: %s, search: %s",
- $uri, $host_whitelisted, join(', ',@query_keys));
+ if (defined $verdict) {
+ $host_enlisted{$nm} = $match if $verdict;
+ dbg("rules: check_uri_host_listed %s, (%s): %s, search: %s",
+ $uri, $nm, $match, join(', ',@query_keys));
}
}
}
- $pms->{'uri_wblisted'} = [ $host_blacklisted, $host_whitelisted ];
}
- return @{ $pms->{'uri_wblisted'} };
+ $pms->{'uri_host_enlisted'} = \%host_enlisted;
+ return $pms->{'uri_host_enlisted'};
}
1;