You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spamassassin.apache.org by jm...@apache.org on 2005/03/23 04:10:14 UTC

svn commit: r158720 - in spamassassin/trunk: lib/Mail/SpamAssassin/Util.pm t/data/spam/009 t/uri.t

Author: jm
Date: Tue Mar 22 19:10:13 2005
New Revision: 158720

URL: http://svn.apache.org/viewcvs?view=rev&rev=158720
Log:
bug 4213: deal with evil URL-encoding tricks (ampersand in URL hostname, %-encoding in the scheme portion of the URL)

Modified:
    spamassassin/trunk/lib/Mail/SpamAssassin/Util.pm
    spamassassin/trunk/t/data/spam/009
    spamassassin/trunk/t/uri.t

Modified: spamassassin/trunk/lib/Mail/SpamAssassin/Util.pm
URL: http://svn.apache.org/viewcvs/spamassassin/trunk/lib/Mail/SpamAssassin/Util.pm?view=diff&r1=158719&r2=158720
==============================================================================
--- spamassassin/trunk/lib/Mail/SpamAssassin/Util.pm (original)
+++ spamassassin/trunk/lib/Mail/SpamAssassin/Util.pm Tue Mar 22 19:10:13 2005
@@ -873,7 +873,12 @@
   $uri =~ s,#.*$,,gs;			# drop fragment
   $uri =~ s#^[a-z]+:/{0,2}##gsi;	# drop the protocol
   $uri =~ s,^[^/]*\@,,gs;		# username/passwd
-  $uri =~ s,[/\?\&].*$,,gs;		# path/cgi params
+
+  # strip path and CGI params.  note: bug 4213 shows that "&" should
+  # *not* be likewise stripped here -- it's permitted in hostnames by
+  # some common MUAs!
+  $uri =~ s,[/\?].*$,,gs;              
+
   $uri =~ s,:\d*$,,gs;			# port, bug 4191: sometimes the # is missing
 
   return if $uri =~ /\%/;         # skip undecoded URIs.
@@ -915,6 +920,20 @@
     # http:www.foo.biz -> http://www.foo.biz
     $nuri =~ s#^(https?:)/{0,2}#$1//#i;
 
+    # *always* make a dup with all %-encoding decoded, since
+    # important parts of the URL may be encoded (such as the
+    # scheme). (bug 4213)
+    if ($nuri =~ /\%[0-9a-fA-F]{2}/) {
+      $nuri = Mail::SpamAssassin::Util::url_encode($nuri);
+    }
+
+    # www.foo.biz -> http://www.foo.biz
+    # unschemed URI?  assume a default of "http://" as most 
+    # HTML-displaying MUAs would
+    if ($nuri !~ /^[-_a-z0-9]+:/) {
+      $nuri =~ s/^/http:\/\//g;
+    }
+
     # http://www.foo.biz?id=3 -> http://www.foo.biz/?id=3
     $nuri =~ s@^(https?://[^/?]+)\?@$1/?@i;
 
@@ -939,15 +958,6 @@
       # of the URI according to RFC 1738 that's invalid, and the tested
       # browsers (Firefox, IE) remove them before usage...
       if ($host =~ tr/\000-\040\200-\377//d) {
-        push(@nuris, join ('', $proto, $host, $rest));
-      }
-
-      # deal with the %## encoding if necessary
-      # only worry about decoding stuff as an obfuscation technique?
-      # encoding isn't allowed in anything but $rest, so just deal with it
-      # there.
-      if ($rest =~ /\%[0-9a-fA-F]{2}/) {
-        $rest = Mail::SpamAssassin::Util::url_encode($rest);
         push(@nuris, join ('', $proto, $host, $rest));
       }
 

Modified: spamassassin/trunk/t/data/spam/009
URL: http://svn.apache.org/viewcvs/spamassassin/trunk/t/data/spam/009?view=diff&r1=158719&r2=158720
==============================================================================
--- spamassassin/trunk/t/data/spam/009 (original)
+++ spamassassin/trunk/t/data/spam/009 Tue Mar 22 19:10:13 2005
@@ -31,6 +31,11 @@
 <a href=http://1113343455/>testing IP</a>
 <a href="http://slashdot.org@1113343456/">another</a>
 
+<p>
+cut and paste spamassassin.org for hot spamfiltering action
+address@foo.net
+</p>
+
 	  (that should do it...)
 </body></html>
 
@@ -50,5 +55,4 @@
 <BR> To be Removed Send An Email To twema2001@yahoo.com.</FONT><FONT  
 COLOR="#ff0000" SIZE=5 PTSIZE=16><B><BR> </FONT><FONT  COLOR="#000000" SIZE=3 
 PTSIZE=10></B><BR> </P><P ALIGN=LEFT><BR> <BR> </HTML>
-
 

Modified: spamassassin/trunk/t/uri.t
URL: http://svn.apache.org/viewcvs/spamassassin/trunk/t/uri.t?view=diff&r1=158719&r2=158720
==============================================================================
--- spamassassin/trunk/t/uri.t (original)
+++ spamassassin/trunk/t/uri.t Tue Mar 22 19:10:13 2005
@@ -21,7 +21,7 @@
 use Mail::SpamAssassin::HTML;
 use Mail::SpamAssassin::Util;
 
-plan tests => 63;
+plan tests => 75;
 
 ##############################################
 
@@ -49,6 +49,9 @@
 ok ($urimap{'http://66.92.69.222/'});
 ok ($urimap{'http://66.92.69.223/'});
 ok ($urimap{'http://66.92.69.224/'});
+ok ($urimap{'spamassassin.org'});
+ok (!$urimap{'CUMSLUTS.'});
+ok (!$urimap{'CUMSLUTS..VIRGIN'});
 
 ##############################################
 
@@ -70,6 +73,7 @@
 }
 
 ok(try_domains('javascript:{some crap}', undef));
+# mailtos are now ignored by uri_to_domain (bug 4201)
 ok(try_domains('mailto:nobody@example.com', undef));
 ok(try_domains('http://66.92.69.221/', '66.92.69.221'));
 ok(try_domains('http://www.spamassassin.org:8080/lists.html', 'spamassassin.org'));
@@ -80,6 +84,14 @@
 ok(try_domains('http:/www.spamassassin.org/lists.html', 'spamassassin.org'));
 ok(try_domains('http:www.spamassassin.org/lists.html', 'spamassassin.org'));
 ok(try_domains('http://kung.pao.com.cn', 'pao.com.cn'));
+ok(try_domains('kung.pao.com.cn', 'pao.com.cn'));
+ok(try_domains('kung-pao.com.cn', 'kung-pao.com.cn'));
+ok(try_domains('username:password@www.spamassassin.org/lists.html', 'spamassassin.org'));
+ok(try_domains('spamassassin.org', 'spamassassin.org'));
+ok(try_domains('SPAMASSASSIN.ORG', 'spamassassin.org'));
+ok(try_domains('WWW.SPAMASSASSIN.ORG', 'spamassassin.org'));
+ok(try_domains('spamassassin.txt', undef));
+ok(try_domains('longer.url.but.not.spamassassin.txt', undef));
 
 ##############################################
 
@@ -138,6 +150,13 @@
   ['http://www.kluge.net/']
   ));
 
+ok(try_canon([
+   'http%3A//ebg&vosxfov.com%2Eget%72xspecials%2Enet/b/Tr3f0amG'
+   ], [
+   'http%3A//ebg&vosxfov.com%2Eget%72xspecials%2Enet/b/Tr3f0amG',
+   'http://ebg&vosxfov.com.getrxspecials.net/b/Tr3f0amG'
+   ]));
+
 ##############################################
 
 sub try {
@@ -199,3 +218,4 @@
 ok(try($base, "g#s/./x", "http://a/b/c/g#s/./x"));
 ok(try($base, "g#s/../x", "http://a/b/c/g#s/../x"));
 ok(try($base, "http:g", "http://a/b/c/g"));
+