You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spamassassin.apache.org by jm...@apache.org on 2008/11/24 17:17:12 UTC
svn commit: r720217 - in /spamassassin/trunk: lib/Mail/SpamAssassin/Conf.pm
lib/Mail/SpamAssassin/Util/RegistrarBoundaries.pm t/uri.t
Author: jm
Date: Mon Nov 24 08:17:10 2008
New Revision: 720217
URL: http://svn.apache.org/viewvc?rev=720217&view=rev
Log:
bug 6013: add 'util_rb_3tld', allowing 3-level TLDs to be listed in URIBLs and allowing new 3TLDs to be added from rule updates
Modified:
spamassassin/trunk/lib/Mail/SpamAssassin/Conf.pm
spamassassin/trunk/lib/Mail/SpamAssassin/Util/RegistrarBoundaries.pm
spamassassin/trunk/t/uri.t
Modified: spamassassin/trunk/lib/Mail/SpamAssassin/Conf.pm
URL: http://svn.apache.org/viewvc/spamassassin/trunk/lib/Mail/SpamAssassin/Conf.pm?rev=720217&r1=720216&r2=720217&view=diff
==============================================================================
--- spamassassin/trunk/lib/Mail/SpamAssassin/Conf.pm (original)
+++ spamassassin/trunk/lib/Mail/SpamAssassin/Conf.pm Mon Nov 24 08:17:10 2008
@@ -2575,6 +2575,33 @@
}
});
+=item util_rb_3tld 3tld1.some.tld 3tld2.other.tld ...
+
+This option allows the addition of new 3rd-level TLDs (3TLD) to the
+RegistrarBoundaries code. Updates to the list usually happen when new
+versions of SpamAssassin are released, but sometimes it's necessary to add in
+new 3TLDs faster than a release can occur. 3TLDs include things like
+demon.co.uk, plc.co.im, etc.
+
+=cut
+
+ push (@cmds, {
+ setting => 'util_rb_3tld',
+ is_admin => 1,
+ code => sub {
+ my ($self, $key, $value, $line) = @_;
+ unless (defined $value && $value !~ /^$/) {
+ return $MISSING_REQUIRED_VALUE;
+ }
+ unless ($value =~ /^[^\s.]+\.[^\s.]+\.[^\s.]+(?:\s+[^\s.]+\.[^\s.]+)*$/) {
+ return $INVALID_VALUE;
+ }
+ foreach (split(/\s+/, $value)) {
+ $Mail::SpamAssassin::Util::RegistrarBoundaries::THREE_LEVEL_DOMAINS{lc $_} = 1;
+ }
+ }
+ });
+
=item bayes_path /path/filename (default: ~/.spamassassin/bayes)
This is the directory and filename for Bayes databases. Several databases
Modified: spamassassin/trunk/lib/Mail/SpamAssassin/Util/RegistrarBoundaries.pm
URL: http://svn.apache.org/viewvc/spamassassin/trunk/lib/Mail/SpamAssassin/Util/RegistrarBoundaries.pm?rev=720217&r1=720216&r2=720217&view=diff
==============================================================================
--- spamassassin/trunk/lib/Mail/SpamAssassin/Util/RegistrarBoundaries.pm (original)
+++ spamassassin/trunk/lib/Mail/SpamAssassin/Util/RegistrarBoundaries.pm Mon Nov 24 08:17:10 2008
@@ -26,7 +26,7 @@
use re 'taint';
use vars qw (
- @ISA %TWO_LEVEL_DOMAINS %US_STATES %VALID_TLDS
+ @ISA %TWO_LEVEL_DOMAINS %THREE_LEVEL_DOMAINS %US_STATES %VALID_TLDS
);
# The list of currently-valid TLDs for the DNS system.
@@ -250,7 +250,7 @@
}
# This is required because the .us domain is nuts. See $THREE_LEVEL_DOMAINS
-# and $FOUR_LEVEL_DOMAINS below.
+# below.
#
foreach (qw/
ak al ar az ca co ct dc de fl ga gu hi ia id il in ks ky la ma md me mi
@@ -260,6 +260,12 @@
$US_STATES{$_} = 1;
}
+foreach (qw/
+ demon.co.uk esc.edu.ar lkd.co.im plc.co.im
+ /) {
+ $THREE_LEVEL_DOMAINS{$_} = 1;
+}
+
###########################################################################
=item ($hostname, $domain) = split_domain ($fqdn)
@@ -307,14 +313,13 @@
# demon.co.uk
# esc.edu.ar
# [^\.]+\.${US_STATES}\.us
- if ($domparts[2] eq 'uk' || $domparts[2] eq 'ar' || $domparts[2] eq 'im') {
- my $temp = join('.', @domparts);
- last if ($temp eq 'demon.co.uk' || $temp eq 'esc.edu.ar' ||
- $temp eq 'lkd.co.im' || $temp eq 'plc.co.im');
- }
- elsif ($domparts[2] eq 'us') {
+ if ($domparts[2] eq 'us') {
last if ($US_STATES{$domparts[1]});
}
+ else {
+ my $temp = join(".", @domparts);
+ last if ($THREE_LEVEL_DOMAINS{$temp});
+ }
}
elsif (@domparts == 2) {
# co.uk, etc.
Modified: spamassassin/trunk/t/uri.t
URL: http://svn.apache.org/viewvc/spamassassin/trunk/t/uri.t?rev=720217&r1=720216&r2=720217&view=diff
==============================================================================
--- spamassassin/trunk/t/uri.t (original)
+++ spamassassin/trunk/t/uri.t Mon Nov 24 08:17:10 2008
@@ -23,12 +23,22 @@
use Mail::SpamAssassin::HTML;
use Mail::SpamAssassin::Util;
-plan tests => 91;
+plan tests => 95;
##############################################
+
+tstlocalrules ('
+
+ util_rb_2tld live.com
+ util_rb_3tld three.3ldlive.com
+
+');
+
# initialize SpamAssassin
-my $sa = create_saobj({'dont_copy_prefs' => 1});
+my $sa = create_saobj({'dont_copy_prefs' => 1,
+ # 'debug' => 1
+});
$sa->init(0); # parse rules
@@ -320,3 +330,9 @@
ok(try($base, "g#s/../x", "http://a/b/c/g#s/../x"));
ok(try($base, "http:g", "http://a/b/c/g"));
+# uses the util_rb_*tld lines above
+ok(try_domains('WWW.LIVE.com', 'www.live.com'));
+ok(try_domains('WWW.foo.LIVE.com', 'foo.live.com'));
+ok(try_domains('WWW.three.3ldLIVE.com', 'www.three.3ldlive.com'));
+ok(try_domains('WWW.foo.basicLIVE.com', 'basiclive.com'));
+