You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spamassassin.apache.org by fe...@apache.org on 2007/02/13 18:05:28 UTC

svn commit: r507102 - in /spamassassin/trunk: lib/Mail/SpamAssassin/Constants.pm lib/Mail/SpamAssassin/HTML.pm lib/Mail/SpamAssassin/PerMsgStatus.pm lib/Mail/SpamAssassin/Plugin/URIEval.pm rules/20_body_tests.cf

Author: felicity
Date: Tue Feb 13 09:05:27 2007
New Revision: 507102

URL: http://svn.apache.org/viewvc?view=rev&rev=507102
Log:
bug 5318: set a maximum internal length for URIs

Modified:
    spamassassin/trunk/lib/Mail/SpamAssassin/Constants.pm
    spamassassin/trunk/lib/Mail/SpamAssassin/HTML.pm
    spamassassin/trunk/lib/Mail/SpamAssassin/PerMsgStatus.pm
    spamassassin/trunk/lib/Mail/SpamAssassin/Plugin/URIEval.pm
    spamassassin/trunk/rules/20_body_tests.cf

Modified: spamassassin/trunk/lib/Mail/SpamAssassin/Constants.pm
URL: http://svn.apache.org/viewvc/spamassassin/trunk/lib/Mail/SpamAssassin/Constants.pm?view=diff&rev=507102&r1=507101&r2=507102
==============================================================================
--- spamassassin/trunk/lib/Mail/SpamAssassin/Constants.pm (original)
+++ spamassassin/trunk/lib/Mail/SpamAssassin/Constants.pm Tue Feb 13 09:05:27 2007
@@ -38,7 +38,7 @@
 	HARVEST_DNSBL_PRIORITY MBX_SEPARATOR
 	MAX_BODY_LINE_LENGTH MAX_HEADER_KEY_LENGTH MAX_HEADER_VALUE_LENGTH
 	MAX_HEADER_LENGTH ARITH_EXPRESSION_LEXER AI_TIME_UNKNOWN
-	CHARSETS_LIKELY_TO_FP_AS_CAPS
+	CHARSETS_LIKELY_TO_FP_AS_CAPS MAX_URI_LENGTH
 );
 
 %EXPORT_TAGS = (
@@ -282,6 +282,9 @@
 use constant MAX_HEADER_VALUE_LENGTH => 8192;
 # maximum byte length of entire header
 use constant MAX_HEADER_LENGTH => 65536;
+
+# maximum byte length of any given URI
+use constant MAX_URI_LENGTH => 1024;
 
 # used for meta rules and "if" conditionals in Conf::Parser
 use constant ARITH_EXPRESSION_LEXER => qr/(?:

Modified: spamassassin/trunk/lib/Mail/SpamAssassin/HTML.pm
URL: http://svn.apache.org/viewvc/spamassassin/trunk/lib/Mail/SpamAssassin/HTML.pm?view=diff&rev=507102&r1=507101&r2=507102
==============================================================================
--- spamassassin/trunk/lib/Mail/SpamAssassin/HTML.pm (original)
+++ spamassassin/trunk/lib/Mail/SpamAssassin/HTML.pm Tue Feb 13 09:05:27 2007
@@ -25,6 +25,7 @@
 
 use HTML::Parser 3.43 ();
 use Mail::SpamAssassin::Logger;
+use Mail::SpamAssassin::Constants qw(:sa);
 
 use vars qw($re_loose $re_strict $re_other @ISA @EXPORT @EXPORT_OK);
 
@@ -134,6 +135,7 @@
   $self->put_results(anchor => $self->{anchor});
 
   $self->put_results(uri_detail => $self->{uri});
+  $self->put_results(uri_truncated => $self->{uri_truncated});
 
   # final results scalars
   $self->put_results(image_area => $self->{image_area});
@@ -313,9 +315,7 @@
 sub push_uri {
   my ($self, $type, $uri) = @_;
 
-  # URIs don't have leading/trailing whitespace ...
-  $uri =~ s/^\s+//;
-  $uri =~ s/\s+$//;
+  $uri = $self->canon_uri($uri);
 
   my $target = target_uri($self->{base_href} || "", $uri);
 
@@ -325,6 +325,22 @@
   }
 }
 
+sub canon_uri {
+  my ($self, $uri) = @_;
+
+  # URIs don't have leading/trailing whitespace ...
+  $uri =~ s/^\s+//;
+  $uri =~ s/\s+$//;
+
+  # Make sure all the URIs are nice and short
+  if (length $uri > MAX_URI_LENGTH) {
+    $self->{'uri_truncated'} = 1;
+    $uri = substr $uri, 0, MAX_URI_LENGTH;
+  }
+
+  return $uri;
+}
+
 sub html_uri {
   my ($self, $tag, $attr) = @_;
 
@@ -351,6 +367,8 @@
   }
   elsif ($tag eq "base") {
     if (my $uri = $attr->{href}) {
+      $uri = $self->canon_uri($uri);
+
       # use <BASE HREF="URI"> to turn relative links into absolute links
 
       # even if it is a base URI, handle like a normal URI as well
@@ -623,7 +641,7 @@
 
   # special text delimiters - <a> and <title>
   if ($tag eq "a") {
-    $self->{anchor_last} = (exists $attr->{href} ? $attr->{href} : "");
+    $self->{anchor_last} = (exists $attr->{href} ? $self->canon_uri($attr->{href}) : "");
     push(@{$self->{uri}->{$self->{anchor_last}}->{anchor_text}}, '');
     push(@{$self->{anchor}}, '');
   }

Modified: spamassassin/trunk/lib/Mail/SpamAssassin/PerMsgStatus.pm
URL: http://svn.apache.org/viewvc/spamassassin/trunk/lib/Mail/SpamAssassin/PerMsgStatus.pm?view=diff&rev=507102&r1=507101&r2=507102
==============================================================================
--- spamassassin/trunk/lib/Mail/SpamAssassin/PerMsgStatus.pm (original)
+++ spamassassin/trunk/lib/Mail/SpamAssassin/PerMsgStatus.pm Tue Feb 13 09:05:27 2007
@@ -1863,6 +1863,7 @@
   # get URIs from HTML parsing
   # use the metadata version since $self->{html} may not be setup
   my $detail = $self->{msg}->{metadata}->{html}->{uri_detail} || { };
+  $self->{'uri_truncated'} = 1 if $self->{msg}->{metadata}->{html}->{uri_truncated};
 
   # don't keep dereferencing ...
   my $redirector_patterns = $self->{conf}->{redirector_patterns};
@@ -2002,6 +2003,14 @@
 
         #warn("uri: got URI: $uri\n");
         push @uris, $uri;
+      }
+    }
+
+    # Make sure all the URIs are nice and short
+    foreach my $uri ( @uris ) {
+      if (length $uri > MAX_URI_LENGTH) {
+        $self->{'uri_truncated'} = 1;
+        $uri = substr $uri, 0, MAX_URI_LENGTH;
       }
     }
 

Modified: spamassassin/trunk/lib/Mail/SpamAssassin/Plugin/URIEval.pm
URL: http://svn.apache.org/viewvc/spamassassin/trunk/lib/Mail/SpamAssassin/Plugin/URIEval.pm?view=diff&rev=507102&r1=507101&r2=507102
==============================================================================
--- spamassassin/trunk/lib/Mail/SpamAssassin/Plugin/URIEval.pm (original)
+++ spamassassin/trunk/lib/Mail/SpamAssassin/Plugin/URIEval.pm Tue Feb 13 09:05:27 2007
@@ -40,6 +40,7 @@
   # the important bit!
   $self->register_eval_rule("check_for_http_redirector");
   $self->register_eval_rule("check_https_ip_mismatch");
+  $self->register_eval_rule("check_uri_truncated");
 
   return $self;
 }
@@ -80,6 +81,14 @@
   }
 
   return 0;
+}
+
+###########################################################################
+
+# is there a better way to do this?
+sub check_uri_truncated {
+  my ($self, $pms) = @_;
+  return $pms->{'uri_truncated'};
 }
 
 1;

Modified: spamassassin/trunk/rules/20_body_tests.cf
URL: http://svn.apache.org/viewvc/spamassassin/trunk/rules/20_body_tests.cf?view=diff&rev=507102&r1=507101&r2=507102
==============================================================================
--- spamassassin/trunk/rules/20_body_tests.cf (original)
+++ spamassassin/trunk/rules/20_body_tests.cf Tue Feb 13 09:05:27 2007
@@ -153,8 +153,10 @@
 
 ifplugin Mail::SpamAssassin::Plugin::URIEval
 
-
 body HTTPS_IP_MISMATCH	eval:check_https_ip_mismatch()
 describe HTTPS_IP_MISMATCH	IP to HTTPS link found in HTML
+
+body URI_TRUNCATED	eval:check_uri_truncated()
+describe URI_TRUNCATED	Message contained a URI which was truncated
 
 endif