You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spamassassin.apache.org by jm...@apache.org on 2008/11/24 17:17:12 UTC

svn commit: r720217 - in /spamassassin/trunk: lib/Mail/SpamAssassin/Conf.pm lib/Mail/SpamAssassin/Util/RegistrarBoundaries.pm t/uri.t

Author: jm
Date: Mon Nov 24 08:17:10 2008
New Revision: 720217

URL: http://svn.apache.org/viewvc?rev=720217&view=rev
Log:
bug 6013: add 'util_rb_3tld', allowing 3-level TLDs to be listed in URIBLs and allowing new 3TLDs to be added from rule updates

Modified:
    spamassassin/trunk/lib/Mail/SpamAssassin/Conf.pm
    spamassassin/trunk/lib/Mail/SpamAssassin/Util/RegistrarBoundaries.pm
    spamassassin/trunk/t/uri.t

Modified: spamassassin/trunk/lib/Mail/SpamAssassin/Conf.pm
URL: http://svn.apache.org/viewvc/spamassassin/trunk/lib/Mail/SpamAssassin/Conf.pm?rev=720217&r1=720216&r2=720217&view=diff
==============================================================================
--- spamassassin/trunk/lib/Mail/SpamAssassin/Conf.pm (original)
+++ spamassassin/trunk/lib/Mail/SpamAssassin/Conf.pm Mon Nov 24 08:17:10 2008
@@ -2575,6 +2575,33 @@
     }
   });
 
+=item util_rb_3tld 3tld1.some.tld 3tld2.other.tld ...
+
+This option allows the addition of new 3rd-level TLDs (3TLD) to the
+RegistrarBoundaries code.  Updates to the list usually happen when new
+versions of SpamAssassin are released, but sometimes it's necessary to add in
+new 3TLDs faster than a release can occur.  3TLDs include things like
+demon.co.uk, plc.co.im, etc.
+
+=cut
+
+  push (@cmds, {
+    setting => 'util_rb_3tld',
+    is_admin => 1,
+    code => sub {
+      my ($self, $key, $value, $line) = @_;
+      unless (defined $value && $value !~ /^$/) {
+	return $MISSING_REQUIRED_VALUE;
+      }
+      unless ($value =~ /^[^\s.]+\.[^\s.]+\.[^\s.]+(?:\s+[^\s.]+\.[^\s.]+)*$/) {
+	return $INVALID_VALUE;
+      }
+      foreach (split(/\s+/, $value)) {
+        $Mail::SpamAssassin::Util::RegistrarBoundaries::THREE_LEVEL_DOMAINS{lc $_} = 1;
+      }
+    }
+  });
+
 =item bayes_path /path/filename	(default: ~/.spamassassin/bayes)
 
 This is the directory and filename for Bayes databases.  Several databases

Modified: spamassassin/trunk/lib/Mail/SpamAssassin/Util/RegistrarBoundaries.pm
URL: http://svn.apache.org/viewvc/spamassassin/trunk/lib/Mail/SpamAssassin/Util/RegistrarBoundaries.pm?rev=720217&r1=720216&r2=720217&view=diff
==============================================================================
--- spamassassin/trunk/lib/Mail/SpamAssassin/Util/RegistrarBoundaries.pm (original)
+++ spamassassin/trunk/lib/Mail/SpamAssassin/Util/RegistrarBoundaries.pm Mon Nov 24 08:17:10 2008
@@ -26,7 +26,7 @@
 use re 'taint';
 
 use vars qw (
-  @ISA %TWO_LEVEL_DOMAINS %US_STATES %VALID_TLDS
+  @ISA %TWO_LEVEL_DOMAINS %THREE_LEVEL_DOMAINS %US_STATES %VALID_TLDS
 );
 
 # The list of currently-valid TLDs for the DNS system.
@@ -250,7 +250,7 @@
 }
 
 # This is required because the .us domain is nuts. See $THREE_LEVEL_DOMAINS
-# and $FOUR_LEVEL_DOMAINS below.
+# below.
 #
 foreach (qw/
   ak al ar az ca co ct dc de fl ga gu hi ia id il in ks ky la ma md me mi 
@@ -260,6 +260,12 @@
   $US_STATES{$_} = 1;
 }
 
+foreach (qw/
+  demon.co.uk esc.edu.ar lkd.co.im plc.co.im
+ /) {
+  $THREE_LEVEL_DOMAINS{$_} = 1;
+}
+
 ###########################################################################
 
 =item ($hostname, $domain) = split_domain ($fqdn)
@@ -307,14 +313,13 @@
 	# demon.co.uk
 	# esc.edu.ar
 	# [^\.]+\.${US_STATES}\.us
-	if ($domparts[2] eq 'uk' || $domparts[2] eq 'ar' || $domparts[2] eq 'im') {
-	  my $temp = join('.', @domparts);
-	  last if ($temp eq 'demon.co.uk' || $temp eq 'esc.edu.ar' ||
-	  	   $temp eq 'lkd.co.im' || $temp eq 'plc.co.im');
-	}
-	elsif ($domparts[2] eq 'us') {
+	if ($domparts[2] eq 'us') {
           last if ($US_STATES{$domparts[1]});
 	}
+        else {
+          my $temp = join(".", @domparts);
+          last if ($THREE_LEVEL_DOMAINS{$temp});
+        }
       }
       elsif (@domparts == 2) {
 	# co.uk, etc.

Modified: spamassassin/trunk/t/uri.t
URL: http://svn.apache.org/viewvc/spamassassin/trunk/t/uri.t?rev=720217&r1=720216&r2=720217&view=diff
==============================================================================
--- spamassassin/trunk/t/uri.t (original)
+++ spamassassin/trunk/t/uri.t Mon Nov 24 08:17:10 2008
@@ -23,12 +23,22 @@
 use Mail::SpamAssassin::HTML;
 use Mail::SpamAssassin::Util;
 
-plan tests => 91;
+plan tests => 95;
 
 ##############################################
 
+
+tstlocalrules ('
+
+  util_rb_2tld live.com
+  util_rb_3tld three.3ldlive.com
+
+');
+
 # initialize SpamAssassin
-my $sa = create_saobj({'dont_copy_prefs' => 1});
+my $sa = create_saobj({'dont_copy_prefs' => 1,
+        # 'debug' => 1
+});
 
 $sa->init(0); # parse rules
 
@@ -320,3 +330,9 @@
 ok(try($base, "g#s/../x", "http://a/b/c/g#s/../x"));
 ok(try($base, "http:g", "http://a/b/c/g"));
 
+# uses the util_rb_*tld lines above
+ok(try_domains('WWW.LIVE.com', 'www.live.com'));
+ok(try_domains('WWW.foo.LIVE.com', 'foo.live.com'));
+ok(try_domains('WWW.three.3ldLIVE.com', 'www.three.3ldlive.com'));
+ok(try_domains('WWW.foo.basicLIVE.com', 'basiclive.com'));
+