You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spamassassin.apache.org by fe...@apache.org on 2004/01/25 21:06:34 UTC

svn commit: rev 6282 - incubator/spamassassin/trunk/lib/Mail/SpamAssassin

Author: felicity
Date: Sun Jan 25 12:06:33 2004
New Revision: 6282

Modified:
   incubator/spamassassin/trunk/lib/Mail/SpamAssassin/PerMsgStatus.pm
   incubator/spamassassin/trunk/lib/Mail/SpamAssassin/Util.pm
Log:
potentially mark when a bad uri encoding occurs.

Modified: incubator/spamassassin/trunk/lib/Mail/SpamAssassin/PerMsgStatus.pm
==============================================================================
--- incubator/spamassassin/trunk/lib/Mail/SpamAssassin/PerMsgStatus.pm	(original)
+++ incubator/spamassassin/trunk/lib/Mail/SpamAssassin/PerMsgStatus.pm	Sun Jan 25 12:06:33 2004
@@ -1414,7 +1414,7 @@
 sub get_uri_list {
   my ($self) = @_;
 
-  $self->{found_bad_uri_encoding} = 0;
+  #$self->{found_bad_uri_encoding} = 0;
 
   my $textary = $self->get_decoded_body_text_array();
   my ($rulename, $pat, @uris);
@@ -1470,10 +1470,17 @@
 
   # Make sure we catch bad encoding tricks ...
   foreach my $uri ( @uris ) {
-    my $nuri = Mail::SpamAssassin::Util::URLEncode($uri);
+    next if ( $uri =~ /^mailto:/i );
+
+    my($nuri, $unencoded, $encoded) = Mail::SpamAssassin::Util::URLEncode($uri);
     if ( $nuri ne $uri ) {
       push(@uris, $nuri);
-      $self->{found_bad_uri_encoding} = 1;
+
+      # allow some unencodings to be ok ...
+      # This is essentially HTTP_EXCESSIVE_ESCAPES ...
+      #if ( $unencoded =~ /[a-zA-Z0-9\/]/ ) {
+      #  $self->{found_bad_uri_encoding} = 1;
+      #}
     }
   }
 

Modified: incubator/spamassassin/trunk/lib/Mail/SpamAssassin/Util.pm
==============================================================================
--- incubator/spamassassin/trunk/lib/Mail/SpamAssassin/Util.pm	(original)
+++ incubator/spamassassin/trunk/lib/Mail/SpamAssassin/Util.pm	Sun Jan 25 12:06:33 2004
@@ -594,6 +594,8 @@
 sub URLEncode {
     my($url)=@_;
     my(@characters)=split(/(\%[0-9a-fA-F]{2})/,$url);
+    my(@unencoded) = ();
+    my(@encoded) = ();
 
     foreach(@characters) {
 	if ( /\%[0-9a-fA-F]{2}/ ) {		# Escaped character set ...
@@ -605,15 +607,21 @@
 		    || /2[2356fF]|3[a-fA-F]|40/i )
 	    {
 		s/\%([2-7][0-9a-fA-F])/sprintf "%c",hex($1)/e;
+		push(@unencoded, $_);
 	    }
 	}
 	else {					# Other stuff
 	    # 0x00-0x20, 0x7f-0xff, <, >, and " ... "
 	    s/([\000-\040\177-\377\074\076\042])
-	     /sprintf "%%%02x",unpack("C",$1)/egx;
+	     /push(@encoded,$1) && sprintf "%%%02x",unpack("C",$1)/egx;
 	}
     }
-    return join("",@characters);
+    if (wantarray) {
+      return(join("",@characters), join("",@unencoded), join("",@encoded));
+    }
+    else {
+      return join("",@characters);
+    }
 }
 
 ###########################################################################