You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spamassassin.apache.org by fe...@apache.org on 2007/02/13 18:05:28 UTC
svn commit: r507102 - in /spamassassin/trunk:
lib/Mail/SpamAssassin/Constants.pm lib/Mail/SpamAssassin/HTML.pm
lib/Mail/SpamAssassin/PerMsgStatus.pm
lib/Mail/SpamAssassin/Plugin/URIEval.pm rules/20_body_tests.cf
Author: felicity
Date: Tue Feb 13 09:05:27 2007
New Revision: 507102
URL: http://svn.apache.org/viewvc?view=rev&rev=507102
Log:
bug 5318: set a maximum internal length for URIs
Modified:
spamassassin/trunk/lib/Mail/SpamAssassin/Constants.pm
spamassassin/trunk/lib/Mail/SpamAssassin/HTML.pm
spamassassin/trunk/lib/Mail/SpamAssassin/PerMsgStatus.pm
spamassassin/trunk/lib/Mail/SpamAssassin/Plugin/URIEval.pm
spamassassin/trunk/rules/20_body_tests.cf
Modified: spamassassin/trunk/lib/Mail/SpamAssassin/Constants.pm
URL: http://svn.apache.org/viewvc/spamassassin/trunk/lib/Mail/SpamAssassin/Constants.pm?view=diff&rev=507102&r1=507101&r2=507102
==============================================================================
--- spamassassin/trunk/lib/Mail/SpamAssassin/Constants.pm (original)
+++ spamassassin/trunk/lib/Mail/SpamAssassin/Constants.pm Tue Feb 13 09:05:27 2007
@@ -38,7 +38,7 @@
HARVEST_DNSBL_PRIORITY MBX_SEPARATOR
MAX_BODY_LINE_LENGTH MAX_HEADER_KEY_LENGTH MAX_HEADER_VALUE_LENGTH
MAX_HEADER_LENGTH ARITH_EXPRESSION_LEXER AI_TIME_UNKNOWN
- CHARSETS_LIKELY_TO_FP_AS_CAPS
+ CHARSETS_LIKELY_TO_FP_AS_CAPS MAX_URI_LENGTH
);
%EXPORT_TAGS = (
@@ -282,6 +282,9 @@
use constant MAX_HEADER_VALUE_LENGTH => 8192;
# maximum byte length of entire header
use constant MAX_HEADER_LENGTH => 65536;
+
+# maximum byte length of any given URI
+use constant MAX_URI_LENGTH => 1024;
# used for meta rules and "if" conditionals in Conf::Parser
use constant ARITH_EXPRESSION_LEXER => qr/(?:
Modified: spamassassin/trunk/lib/Mail/SpamAssassin/HTML.pm
URL: http://svn.apache.org/viewvc/spamassassin/trunk/lib/Mail/SpamAssassin/HTML.pm?view=diff&rev=507102&r1=507101&r2=507102
==============================================================================
--- spamassassin/trunk/lib/Mail/SpamAssassin/HTML.pm (original)
+++ spamassassin/trunk/lib/Mail/SpamAssassin/HTML.pm Tue Feb 13 09:05:27 2007
@@ -25,6 +25,7 @@
use HTML::Parser 3.43 ();
use Mail::SpamAssassin::Logger;
+use Mail::SpamAssassin::Constants qw(:sa);
use vars qw($re_loose $re_strict $re_other @ISA @EXPORT @EXPORT_OK);
@@ -134,6 +135,7 @@
$self->put_results(anchor => $self->{anchor});
$self->put_results(uri_detail => $self->{uri});
+ $self->put_results(uri_truncated => $self->{uri_truncated});
# final results scalars
$self->put_results(image_area => $self->{image_area});
@@ -313,9 +315,7 @@
sub push_uri {
my ($self, $type, $uri) = @_;
- # URIs don't have leading/trailing whitespace ...
- $uri =~ s/^\s+//;
- $uri =~ s/\s+$//;
+ $uri = $self->canon_uri($uri);
my $target = target_uri($self->{base_href} || "", $uri);
@@ -325,6 +325,22 @@
}
}
+sub canon_uri {
+ my ($self, $uri) = @_;
+
+ # URIs don't have leading/trailing whitespace ...
+ $uri =~ s/^\s+//;
+ $uri =~ s/\s+$//;
+
+ # Make sure all the URIs are nice and short
+ if (length $uri > MAX_URI_LENGTH) {
+ $self->{'uri_truncated'} = 1;
+ $uri = substr $uri, 0, MAX_URI_LENGTH;
+ }
+
+ return $uri;
+}
+
sub html_uri {
my ($self, $tag, $attr) = @_;
@@ -351,6 +367,8 @@
}
elsif ($tag eq "base") {
if (my $uri = $attr->{href}) {
+ $uri = $self->canon_uri($uri);
+
# use <BASE HREF="URI"> to turn relative links into absolute links
# even if it is a base URI, handle like a normal URI as well
@@ -623,7 +641,7 @@
# special text delimiters - <a> and <title>
if ($tag eq "a") {
- $self->{anchor_last} = (exists $attr->{href} ? $attr->{href} : "");
+ $self->{anchor_last} = (exists $attr->{href} ? $self->canon_uri($attr->{href}) : "");
push(@{$self->{uri}->{$self->{anchor_last}}->{anchor_text}}, '');
push(@{$self->{anchor}}, '');
}
Modified: spamassassin/trunk/lib/Mail/SpamAssassin/PerMsgStatus.pm
URL: http://svn.apache.org/viewvc/spamassassin/trunk/lib/Mail/SpamAssassin/PerMsgStatus.pm?view=diff&rev=507102&r1=507101&r2=507102
==============================================================================
--- spamassassin/trunk/lib/Mail/SpamAssassin/PerMsgStatus.pm (original)
+++ spamassassin/trunk/lib/Mail/SpamAssassin/PerMsgStatus.pm Tue Feb 13 09:05:27 2007
@@ -1863,6 +1863,7 @@
# get URIs from HTML parsing
# use the metadata version since $self->{html} may not be setup
my $detail = $self->{msg}->{metadata}->{html}->{uri_detail} || { };
+ $self->{'uri_truncated'} = 1 if $self->{msg}->{metadata}->{html}->{uri_truncated};
# don't keep dereferencing ...
my $redirector_patterns = $self->{conf}->{redirector_patterns};
@@ -2002,6 +2003,14 @@
#warn("uri: got URI: $uri\n");
push @uris, $uri;
+ }
+ }
+
+ # Make sure all the URIs are nice and short
+ foreach my $uri ( @uris ) {
+ if (length $uri > MAX_URI_LENGTH) {
+ $self->{'uri_truncated'} = 1;
+ $uri = substr $uri, 0, MAX_URI_LENGTH;
}
}
Modified: spamassassin/trunk/lib/Mail/SpamAssassin/Plugin/URIEval.pm
URL: http://svn.apache.org/viewvc/spamassassin/trunk/lib/Mail/SpamAssassin/Plugin/URIEval.pm?view=diff&rev=507102&r1=507101&r2=507102
==============================================================================
--- spamassassin/trunk/lib/Mail/SpamAssassin/Plugin/URIEval.pm (original)
+++ spamassassin/trunk/lib/Mail/SpamAssassin/Plugin/URIEval.pm Tue Feb 13 09:05:27 2007
@@ -40,6 +40,7 @@
# the important bit!
$self->register_eval_rule("check_for_http_redirector");
$self->register_eval_rule("check_https_ip_mismatch");
+ $self->register_eval_rule("check_uri_truncated");
return $self;
}
@@ -80,6 +81,14 @@
}
return 0;
+}
+
+###########################################################################
+
+# is there a better way to do this?
+sub check_uri_truncated {
+ my ($self, $pms) = @_;
+ return $pms->{'uri_truncated'};
}
1;
Modified: spamassassin/trunk/rules/20_body_tests.cf
URL: http://svn.apache.org/viewvc/spamassassin/trunk/rules/20_body_tests.cf?view=diff&rev=507102&r1=507101&r2=507102
==============================================================================
--- spamassassin/trunk/rules/20_body_tests.cf (original)
+++ spamassassin/trunk/rules/20_body_tests.cf Tue Feb 13 09:05:27 2007
@@ -153,8 +153,10 @@
ifplugin Mail::SpamAssassin::Plugin::URIEval
-
body HTTPS_IP_MISMATCH eval:check_https_ip_mismatch()
describe HTTPS_IP_MISMATCH IP to HTTPS link found in HTML
+
+body URI_TRUNCATED eval:check_uri_truncated()
+describe URI_TRUNCATED Message contained a URI which was truncated
endif