You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spamassassin.apache.org by fe...@apache.org on 2005/02/21 08:02:31 UTC
svn commit: r154646 - in spamassassin/trunk: lib/Mail/SpamAssassin/Util.pm
rules/70_testing.cf t/uri.t
Author: felicity
Date: Sun Feb 20 23:02:28 2005
New Revision: 154646
URL: http://svn.apache.org/viewcvs?view=rev&rev=154646
Log:
bug 4146: invalid chars in the host section of uris would be missed for uribl checks. rework uri canonify code, add test and rules for the issue.
Modified:
spamassassin/trunk/lib/Mail/SpamAssassin/Util.pm
spamassassin/trunk/rules/70_testing.cf
spamassassin/trunk/t/uri.t
Modified: spamassassin/trunk/lib/Mail/SpamAssassin/Util.pm
URL: http://svn.apache.org/viewcvs/spamassassin/trunk/lib/Mail/SpamAssassin/Util.pm?view=diff&r1=154645&r2=154646
==============================================================================
--- spamassassin/trunk/lib/Mail/SpamAssassin/Util.pm (original)
+++ spamassassin/trunk/lib/Mail/SpamAssassin/Util.pm Sun Feb 20 23:02:28 2005
@@ -910,17 +910,12 @@
$nuri =~ s#^(https?:)/{0,2}#$1//#i;
# http://www.foo.biz?id=3 -> http://www.foo.biz/?id=3
- $nuri =~ s@^(https?://[^/?]+)\?@$1/?@;
+ $nuri =~ s@^(https?://[^/?]+)\?@$1/?@i;
# deal with encoding of chars, this is just the set of printable
# chars minus ' ' (that is, dec 33-126, hex 21-7e)
$nuri =~ s/\&\#0*(3[3-9]|[4-9]\d|1[01]\d|12[0-6]);/sprintf "%c",$1/ge;
- $nuri =~ s/\&\#x0*(2[1-9]|[3-6][a-f0-9]|7[0-9a-e]);/sprintf "%c",hex($1)/gei;
-
- # deal with the %## encoding if necessary
- if ($nuri =~ /\%[0-9a-fA-F]{2}/) {
- $nuri = Mail::SpamAssassin::Util::url_encode($nuri);
- }
+ $nuri =~ s/\&\#x0*(2[1-9]|[3-6][a-fA-F0-9]|7[0-9a-eA-E]);/sprintf "%c",hex($1)/ge;
# put the new URI on the new list if it's different
if ($nuri ne $uri) {
@@ -929,11 +924,35 @@
# deal with wierd hostname parts, remove user/pass, etc.
if ($nuri =~ m{^(https?://)([^/]+)(\/.*)?$}i) {
- my ($proto, $host, $rest) = ($1,$2,$3);
+ my($proto, $host, $rest) = ($1,$2,$3);
# not required
$rest ||= '';
+ # bug 4146: deal with non-US ASCII 7-bit chars in the host portion
+ # of the URI according to RFC 1738 that's invalid, and the tested
+ # browsers (Firefox, IE) remove them before usage...
+ if ($host =~ tr/\000-\040\200-\377//d) {
+ push(@nuris, join ('', $proto, $host, $rest));
+ }
+
+ # deal with the %## encoding if necessary
+ # only worry about decoding stuff as an obfuscation technique?
+ # encoding isn't allowed in anything but $rest, so just deal with it
+ # there.
+ if ($rest =~ /\%[0-9a-fA-F]{2}/) {
+ $rest = Mail::SpamAssassin::Util::url_encode($rest);
+ push(@nuris, join ('', $proto, $host, $rest));
+ }
+
+ # deal with http redirectors. strip off one level of redirector
+ # and add back to the array. the foreach loop will go over those
+ # and deal appropriately.
+ # bug 3308: redirectors like yahoo only need one '/' ... <grrr>
+ if ($rest =~ m{(https?:/{0,2}.+)$}i) {
+ push(@uris, $1);
+ }
+
########################
## TVD: known issue, if host has multiple combinations of the following,
## all permutations will be put onto @nuris. shouldn't be an issue.
@@ -952,6 +971,7 @@
if ($host =~ s/[^0-9A-Za-z]+$//) {
push(@nuris, join ('', $proto, $host, $rest));
}
+
########################
# deal with 'http://213.172.0x1f.13/', decode encoded octets
@@ -974,14 +994,7 @@
elsif ($host =~ /^[0-9]+$/) {
push(@nuris, join ('', $proto, decode_ulong_to_ip($host), $rest));
}
- }
- # deal with http redirectors. strip off one level of redirector
- # and add back to the array. the foreach loop will go over those
- # and deal appropriately.
- # bug 3308: redirectors like yahoo only need one '/' ... <grrr>
- if ($nuri =~ m{^https?://.+?(https?:/{0,2}.+)$}i) {
- push(@uris, $1);
}
}
Modified: spamassassin/trunk/rules/70_testing.cf
URL: http://svn.apache.org/viewcvs/spamassassin/trunk/rules/70_testing.cf?view=diff&r1=154645&r2=154646
==============================================================================
--- spamassassin/trunk/rules/70_testing.cf (original)
+++ spamassassin/trunk/rules/70_testing.cf Sun Feb 20 23:02:28 2005
@@ -381,3 +381,6 @@
header __RATWARE_NAME_ID eval:check_ratware_name_id()
meta T_RATWARE_NAME_ID __RATWARE_0_TZ_DATE && __RATWARE_NAME_ID
+
+uri T_HTTP_BAD_HOST_CHAR m@^https?://[^/]*[\000-\040\200-\377]@i
+uri T_HTTP_BAD_HOST_CTRL m@^https?://[^/]*[\000-\037]@i
Modified: spamassassin/trunk/t/uri.t
URL: http://svn.apache.org/viewcvs/spamassassin/trunk/t/uri.t?view=diff&r1=154645&r2=154646
==============================================================================
--- spamassassin/trunk/t/uri.t (original)
+++ spamassassin/trunk/t/uri.t Sun Feb 20 23:02:28 2005
@@ -125,6 +125,7 @@
]));
ok(try_canon(['http://images.google.ca/imgres?imgurl=gmib.free.fr/viagra.jpg&imgrefurl=http://www.google.com/url?q=http://www.google.com/url?q=%68%74%74%70%3A%2F%2F%77%77%77%2E%65%78%70%61%67%65%2E%63%6F%6D%2F%6D%61%6E%67%65%72%33%32'],
+
[
'http://images.google.ca/imgres?imgurl=gmib.free.fr/viagra.jpg&imgrefurl=http://www.google.com/url?q=http://www.google.com/url?q=%68%74%74%70%3A%2F%2F%77%77%77%2E%65%78%70%61%67%65%2E%63%6F%6D%2F%6D%61%6E%67%65%72%33%32',
'http://images.google.ca/imgres?imgurl=gmib.free.fr/viagra.jpg&imgrefurl=http://www.google.com/url?q=http://www.google.com/url?q=http://www.expage.com/manger32',
@@ -132,6 +133,10 @@
'http://www.google.com/url?q=http://www.expage.com/manger32',
'http://www.google.com/url?q=http://www.google.com/url?q=http://www.expage.com/manger32',
]));
+
+ok(try_canon(["http://www.kl\nuge.n\net/"],
+ ['http://www.kluge.net/']
+ ));
##############################################