You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spamassassin.apache.org by fe...@apache.org on 2004/01/24 19:36:48 UTC

svn commit: rev 6260 - incubator/spamassassin/trunk/lib/Mail/SpamAssassin

Author: felicity
Date: Sat Jan 24 10:36:47 2004
New Revision: 6260

Modified:
   incubator/spamassassin/trunk/lib/Mail/SpamAssassin/PerMsgStatus.pm
   incubator/spamassassin/trunk/lib/Mail/SpamAssassin/Util.pm
Log:
bug 2956: uri tests weren't matching where they should if uris were encoded improperly.  we now reencode the uris we found correctly and check them too.

Modified: incubator/spamassassin/trunk/lib/Mail/SpamAssassin/PerMsgStatus.pm
==============================================================================
--- incubator/spamassassin/trunk/lib/Mail/SpamAssassin/PerMsgStatus.pm	(original)
+++ incubator/spamassassin/trunk/lib/Mail/SpamAssassin/PerMsgStatus.pm	Sat Jan 24 10:36:47 2004
@@ -1442,6 +1442,8 @@
 sub get_uri_list {
   my ($self) = @_;
 
+  $self->{found_bad_uri_encoding} = 0;
+
   my $textary = $self->get_decoded_body_text_array();
   my ($rulename, $pat, @uris);
   local ($_);
@@ -1491,6 +1493,15 @@
 
       #warn("Got URI: $uri\n");
       push @uris, $uri;
+    }
+  }
+
+  # Make sure we catch bad encoding tricks ...
+  foreach my $uri ( @uris ) {
+    my $nuri = Mail::SpamAssassin::Util::URLEncode($uri);
+    if ( $nuri ne $uri ) {
+      push(@uris, $nuri);
+      $self->{found_bad_uri_encoding} = 1;
     }
   }
 

Modified: incubator/spamassassin/trunk/lib/Mail/SpamAssassin/Util.pm
==============================================================================
--- incubator/spamassassin/trunk/lib/Mail/SpamAssassin/Util.pm	(original)
+++ incubator/spamassassin/trunk/lib/Mail/SpamAssassin/Util.pm	Sat Jan 24 10:36:47 2004
@@ -584,12 +584,40 @@
   # Get the type out ...
   $ct =~ s/;.*$//;                    # strip everything after first semi-colon
   $ct =~ s@^([^/]+(?:/[^/]*)?).*$@$1@;	# only something/something ...
-  $ct =~ tr!\000-\040\177-\377\042\050\051\054\056\072-\077\100\133-\135!!d;    # strip inappropriate chars
+  $ct =~ tr/\000-\040\177-\377\042\050\051\054\056\072-\077\100\133-\135//d;    # strip inappropriate chars
 
   return wantarray ? ($ct,$boundary) : $ct;
 }
 
 ###########################################################################
+
+sub URLEncode {
+    my($url)=@_;
+    my(@characters)=split(/(\%[0-9a-fA-F]{2})/,$url);
+
+    foreach(@characters) {
+	if ( /\%[0-9a-fA-F]{2}/ ) {		# Escaped character set ...
+	    # IF it is in the range of 0x00-0x20 or 0x7f-0xff
+	    #    or it is one of  "<", ">", """, "#", "%",
+	    #                     ";", "/", "?", ":", "@", "=" or "&"
+	    # THEN preserve its encoding
+	    unless ( /(20|7f|[0189a-fA-F][0-9a-fA-F])/i
+		    || /2[2356fF]|3[a-fA-F]|40/i )
+	    {
+		s/\%([2-7][0-9a-fA-F])/sprintf "%c",hex($1)/e;
+	    }
+	}
+	else {					# Other stuff
+	    # 0x00-0x20, 0x7f-0xff, <, >, and " ... "
+	    s/([\000-\040\177-\377\074\076\042])
+	     /sprintf "%%%02x",unpack("C",$1)/egx;
+	}
+    }
+    return join("",@characters);
+}
+
+###########################################################################
+
 sub dbg { Mail::SpamAssassin::dbg (@_); }
 
 1;