You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spamassassin.apache.org by jm...@apache.org on 2004/05/07 06:36:43 UTC
svn commit: rev 10552 - in incubator/spamassassin/trunk: lib/Mail/SpamAssassin t t/data/spam
Author: jm
Date: Thu May 6 21:36:42 2004
New Revision: 10552
Modified:
incubator/spamassassin/trunk/lib/Mail/SpamAssassin/Util.pm
incubator/spamassassin/trunk/t/data/spam/009
incubator/spamassassin/trunk/t/uri.t
Log:
spam spotted in wild evading URIBL, so deal with several URI obfuscations: http://0x425c45de/, http://66.92.0x45.221/, http://1113343455/, http://slashdot.org@1113343456/ in get_uri_list
Modified: incubator/spamassassin/trunk/lib/Mail/SpamAssassin/Util.pm
==============================================================================
--- incubator/spamassassin/trunk/lib/Mail/SpamAssassin/Util.pm (original)
+++ incubator/spamassassin/trunk/lib/Mail/SpamAssassin/Util.pm Thu May 6 21:36:42 2004
@@ -792,6 +792,40 @@
$nuri =~ s/\&\#0*(3[3-9]|[4-9]\d|1[01]\d|12[0-6]);/sprintf "%c",$1/ge;
$nuri =~ s/\&\#x0*(2[1-9]|[3-6][a-f0-9]|7[0-9a-e]);/sprintf "%c",hex($1)/gei;
+ # deal with wierd hostname parts
+ if ($nuri =~ /^(https?:\/\/)([^\/]+)(\.?\/.*)$/i) {
+ my ($proto, $host, $rest) = ($1,$2,$3);
+
+ # remove "www.fakehostname.com@" username part
+ $host =~ s/^[^\@]+\@//gs;
+
+ # deal with 'http://213.172.0x1f.13/'; decode encoded octets
+ if ($host =~ /^([0-9a-fx]*\.)([0-9a-fx]*\.)([0-9a-fx]*\.)([0-9a-fx]*)$/ix)
+ {
+ my (@chunk) = ($1,$2,$3,$4);
+ for my $octet (0 .. 3) {
+ $chunk[$octet] =~ s/^0x([0-9a-f][0-9a-f])/sprintf "%d",hex($1)/gei;
+ }
+ my $parsed = join ('', $proto, @chunk, $rest);
+ if ($parsed ne $nuri) { push(@nuris, $parsed); }
+ }
+
+ # "http://0x7f000001/"
+ if ($host =~ /^0x[0-9a-f]+$/i) {
+ $host =~ s/^0x([0-9a-f]+)/sprintf "%d",hex($1)/gei;
+ $host = decode_ulong_to_ip ($host);
+ my $parsed = join ('', $proto, $host, $rest);
+ push(@nuris, $parsed);
+ }
+
+ # "http://1113343453/"
+ if ($host =~ /^[0-9]+$/) {
+ $host = decode_ulong_to_ip ($host);
+ my $parsed = join ('', $proto, $host, $rest);
+ push(@nuris, $parsed);
+ }
+ }
+
($nuri) = Mail::SpamAssassin::Util::url_encode($nuri);
if ($nuri ne $uri) {
push(@nuris, $nuri);
@@ -810,6 +844,16 @@
my %uris = map { $_ => 1 } @uris, @nuris;
return keys %uris;
+}
+
+sub decode_ulong_to_ip {
+ my ($ulong) = @_;
+ my @octets = ();
+ unshift (@octets, $ulong & 0xff); $ulong >>= 8;
+ unshift (@octets, $ulong & 0xff); $ulong >>= 8;
+ unshift (@octets, $ulong & 0xff); $ulong >>= 8;
+ unshift (@octets, $ulong & 0xff);
+ return join (".", @octets);
}
###########################################################################
Modified: incubator/spamassassin/trunk/t/data/spam/009
==============================================================================
Binary files. No diff available.
Modified: incubator/spamassassin/trunk/t/uri.t
==============================================================================
--- incubator/spamassassin/trunk/t/uri.t (original)
+++ incubator/spamassassin/trunk/t/uri.t Thu May 6 21:36:42 2004
@@ -21,7 +21,7 @@
use Mail::SpamAssassin::HTML;
use Mail::SpamAssassin::Util;
-plan tests => 57;
+plan tests => 62;
##############################################
@@ -41,7 +41,14 @@
my $msg = Mail::SpamAssassin::PerMsgStatus->new($sa, $mail);
my @uris = $msg->get_uri_list();
-ok((@uris == 1) && ($uris[0] eq 'http://62.16.101.59/livesex.htm'));
+print "got URIs: ".join (", ", @uris)."\n";
+ok (@uris >= 5);
+my %urimap = map { $_ => 1 } @uris;
+ok ($urimap{'http://62.16.101.59/livesex.htm'});
+ok ($urimap{'http://66.92.69.221/'});
+ok ($urimap{'http://66.92.69.222/'});
+ok ($urimap{'http://66.92.69.223/'});
+ok ($urimap{'http://66.92.69.224/'});
##############################################
@@ -54,7 +61,12 @@
return !defined $result;
}
- return $expect eq $result;
+ if ($expect eq $result) {
+ return 1;
+ } else {
+ warn "try_domains: failed! expect: '$expect' got: '$result'\n";
+ return 0;
+ }
}
ok(try_domains('javascript:{some crap}', undef));
Re: svn commit: rev 10552 - in incubator/spamassassin/trunk: lib/Mail/SpamAssassin t t/data/spam
Posted by Theo Van Dinter <fe...@kluge.net>.
On Fri, May 07, 2004 at 11:28:50AM -0700, Justin Mason wrote:
> I'm assuming that, like an encoded URL, it should dupe to both an
> @ version and an undecoded version.
Hrm. Ok. We should probably do full decoding first then, add that to
the queue, then remove the username part and add that to the queue.
My original idea was that we'd have just the raw and "cooked" URIs
returned, and leave everything else up to calling functions. But I can
see the argument about the username bit.
> sure, hack away ;) looks likely to be better alright.
I'll do a few timings and see what falls out. :) It's not like this
function is going to be called a lot, but ... ;)
--
Randomly Generated Tagline:
"Fruit, what's that?
It's a thing you eat when you feel like you need to have fruit."
- Dave McClelland and Chris Smith
Re: svn commit: rev 10552 - in incubator/spamassassin/trunk: lib/Mail/SpamAssassin t t/data/spam
Posted by Theo Van Dinter <fe...@kluge.net>.
On Fri, May 07, 2004 at 04:36:43AM -0000, jm@apache.org wrote:
> + # remove "www.fakehostname.com@" username part
> + $host =~ s/^[^\@]+\@//gs;
IMO, get_uri_list shouldn't remove that, that should be up to whoever's calling.
> + # "http://0x7f000001/"
does http://00000x7f000001/ work? Just curious.
> +sub decode_ulong_to_ip {
> + my ($ulong) = @_;
> + my @octets = ();
> + unshift (@octets, $ulong & 0xff); $ulong >>= 8;
> + unshift (@octets, $ulong & 0xff); $ulong >>= 8;
> + unshift (@octets, $ulong & 0xff); $ulong >>= 8;
> + unshift (@octets, $ulong & 0xff);
> + return join (".", @octets);
I don't know which is more efficient, but I always used (converted for
this function):
@octets = unpack("CCCC",pack("H*", sprintf "%08lx", $ulong));
--
Randomly Generated Tagline:
When I die, I would like to go peacefully, in my sleep, like my
Grandfather did. Not screaming and yelling like the passenger in his car.