You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spamassassin.apache.org by jm...@apache.org on 2005/03/23 04:10:14 UTC
svn commit: r158720 - in spamassassin/trunk: lib/Mail/SpamAssassin/Util.pm
t/data/spam/009 t/uri.t
Author: jm
Date: Tue Mar 22 19:10:13 2005
New Revision: 158720
URL: http://svn.apache.org/viewcvs?view=rev&rev=158720
Log:
bug 4213: deal with evil URL-encoding tricks (ampersand in URL hostname, %-encoding in the scheme portion of the URL)
Modified:
spamassassin/trunk/lib/Mail/SpamAssassin/Util.pm
spamassassin/trunk/t/data/spam/009
spamassassin/trunk/t/uri.t
Modified: spamassassin/trunk/lib/Mail/SpamAssassin/Util.pm
URL: http://svn.apache.org/viewcvs/spamassassin/trunk/lib/Mail/SpamAssassin/Util.pm?view=diff&r1=158719&r2=158720
==============================================================================
--- spamassassin/trunk/lib/Mail/SpamAssassin/Util.pm (original)
+++ spamassassin/trunk/lib/Mail/SpamAssassin/Util.pm Tue Mar 22 19:10:13 2005
@@ -873,7 +873,12 @@
$uri =~ s,#.*$,,gs; # drop fragment
$uri =~ s#^[a-z]+:/{0,2}##gsi; # drop the protocol
$uri =~ s,^[^/]*\@,,gs; # username/passwd
- $uri =~ s,[/\?\&].*$,,gs; # path/cgi params
+
+ # strip path and CGI params. note: bug 4213 shows that "&" should
+ # *not* be likewise stripped here -- it's permitted in hostnames by
+ # some common MUAs!
+ $uri =~ s,[/\?].*$,,gs;
+
$uri =~ s,:\d*$,,gs; # port, bug 4191: sometimes the # is missing
return if $uri =~ /\%/; # skip undecoded URIs.
@@ -915,6 +920,20 @@
# http:www.foo.biz -> http://www.foo.biz
$nuri =~ s#^(https?:)/{0,2}#$1//#i;
+ # *always* make a dup with all %-encoding decoded, since
+ # important parts of the URL may be encoded (such as the
+ # scheme). (bug 4213)
+ if ($nuri =~ /\%[0-9a-fA-F]{2}/) {
+ $nuri = Mail::SpamAssassin::Util::url_encode($nuri);
+ }
+
+ # www.foo.biz -> http://www.foo.biz
+ # unschemed URI? assume a default of "http://" as most
+ # HTML-displaying MUAs would
+ if ($nuri !~ /^[-_a-z0-9]+:/) {
+ $nuri =~ s/^/http:\/\//g;
+ }
+
# http://www.foo.biz?id=3 -> http://www.foo.biz/?id=3
$nuri =~ s@^(https?://[^/?]+)\?@$1/?@i;
@@ -939,15 +958,6 @@
# of the URI according to RFC 1738 that's invalid, and the tested
# browsers (Firefox, IE) remove them before usage...
if ($host =~ tr/\000-\040\200-\377//d) {
- push(@nuris, join ('', $proto, $host, $rest));
- }
-
- # deal with the %## encoding if necessary
- # only worry about decoding stuff as an obfuscation technique?
- # encoding isn't allowed in anything but $rest, so just deal with it
- # there.
- if ($rest =~ /\%[0-9a-fA-F]{2}/) {
- $rest = Mail::SpamAssassin::Util::url_encode($rest);
push(@nuris, join ('', $proto, $host, $rest));
}
Modified: spamassassin/trunk/t/data/spam/009
URL: http://svn.apache.org/viewcvs/spamassassin/trunk/t/data/spam/009?view=diff&r1=158719&r2=158720
==============================================================================
--- spamassassin/trunk/t/data/spam/009 (original)
+++ spamassassin/trunk/t/data/spam/009 Tue Mar 22 19:10:13 2005
@@ -31,6 +31,11 @@
<a href=http://1113343455/>testing IP</a>
<a href="http://slashdot.org@1113343456/">another</a>
+<p>
+cut and paste spamassassin.org for hot spamfiltering action
+address@foo.net
+</p>
+
(that should do it...)
</body></html>
@@ -50,5 +55,4 @@
<BR> To be Removed Send An Email To twema2001@yahoo.com.</FONT><FONT
COLOR="#ff0000" SIZE=5 PTSIZE=16><B><BR> </FONT><FONT COLOR="#000000" SIZE=3
PTSIZE=10></B><BR> </P><P ALIGN=LEFT><BR> <BR> </HTML>
-
Modified: spamassassin/trunk/t/uri.t
URL: http://svn.apache.org/viewcvs/spamassassin/trunk/t/uri.t?view=diff&r1=158719&r2=158720
==============================================================================
--- spamassassin/trunk/t/uri.t (original)
+++ spamassassin/trunk/t/uri.t Tue Mar 22 19:10:13 2005
@@ -21,7 +21,7 @@
use Mail::SpamAssassin::HTML;
use Mail::SpamAssassin::Util;
-plan tests => 63;
+plan tests => 75;
##############################################
@@ -49,6 +49,9 @@
ok ($urimap{'http://66.92.69.222/'});
ok ($urimap{'http://66.92.69.223/'});
ok ($urimap{'http://66.92.69.224/'});
+ok ($urimap{'spamassassin.org'});
+ok (!$urimap{'CUMSLUTS.'});
+ok (!$urimap{'CUMSLUTS..VIRGIN'});
##############################################
@@ -70,6 +73,7 @@
}
ok(try_domains('javascript:{some crap}', undef));
+# mailtos are now ignored by uri_to_domain (bug 4201)
ok(try_domains('mailto:nobody@example.com', undef));
ok(try_domains('http://66.92.69.221/', '66.92.69.221'));
ok(try_domains('http://www.spamassassin.org:8080/lists.html', 'spamassassin.org'));
@@ -80,6 +84,14 @@
ok(try_domains('http:/www.spamassassin.org/lists.html', 'spamassassin.org'));
ok(try_domains('http:www.spamassassin.org/lists.html', 'spamassassin.org'));
ok(try_domains('http://kung.pao.com.cn', 'pao.com.cn'));
+ok(try_domains('kung.pao.com.cn', 'pao.com.cn'));
+ok(try_domains('kung-pao.com.cn', 'kung-pao.com.cn'));
+ok(try_domains('username:password@www.spamassassin.org/lists.html', 'spamassassin.org'));
+ok(try_domains('spamassassin.org', 'spamassassin.org'));
+ok(try_domains('SPAMASSASSIN.ORG', 'spamassassin.org'));
+ok(try_domains('WWW.SPAMASSASSIN.ORG', 'spamassassin.org'));
+ok(try_domains('spamassassin.txt', undef));
+ok(try_domains('longer.url.but.not.spamassassin.txt', undef));
##############################################
@@ -138,6 +150,13 @@
['http://www.kluge.net/']
));
+ok(try_canon([
+ 'http%3A//ebg&vosxfov.com%2Eget%72xspecials%2Enet/b/Tr3f0amG'
+ ], [
+ 'http%3A//ebg&vosxfov.com%2Eget%72xspecials%2Enet/b/Tr3f0amG',
+ 'http://ebg&vosxfov.com.getrxspecials.net/b/Tr3f0amG'
+ ]));
+
##############################################
sub try {
@@ -199,3 +218,4 @@
ok(try($base, "g#s/./x", "http://a/b/c/g#s/./x"));
ok(try($base, "g#s/../x", "http://a/b/c/g#s/../x"));
ok(try($base, "http:g", "http://a/b/c/g"));
+