You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spamassassin.apache.org by fe...@apache.org on 2007/02/13 18:04:49 UTC
svn commit: r507101 - in /spamassassin/branches/3.1:
lib/Mail/SpamAssassin/Constants.pm lib/Mail/SpamAssassin/EvalTests.pm
lib/Mail/SpamAssassin/HTML.pm lib/Mail/SpamAssassin/PerMsgStatus.pm
rules/20_body_tests.cf
Author: felicity
Date: Tue Feb 13 09:04:48 2007
New Revision: 507101
URL: http://svn.apache.org/viewvc?view=rev&rev=507101
Log:
bug 5318: set a maximum internal length for URIs
Modified:
spamassassin/branches/3.1/lib/Mail/SpamAssassin/Constants.pm
spamassassin/branches/3.1/lib/Mail/SpamAssassin/EvalTests.pm
spamassassin/branches/3.1/lib/Mail/SpamAssassin/HTML.pm
spamassassin/branches/3.1/lib/Mail/SpamAssassin/PerMsgStatus.pm
spamassassin/branches/3.1/rules/20_body_tests.cf
Modified: spamassassin/branches/3.1/lib/Mail/SpamAssassin/Constants.pm
URL: http://svn.apache.org/viewvc/spamassassin/branches/3.1/lib/Mail/SpamAssassin/Constants.pm?view=diff&rev=507101&r1=507100&r2=507101
==============================================================================
--- spamassassin/branches/3.1/lib/Mail/SpamAssassin/Constants.pm (original)
+++ spamassassin/branches/3.1/lib/Mail/SpamAssassin/Constants.pm Tue Feb 13 09:04:48 2007
@@ -38,6 +38,7 @@
META_TEST_MIN_PRIORITY HARVEST_DNSBL_PRIORITY MBX_SEPARATOR
MAX_BODY_LINE_LENGTH MAX_HEADER_KEY_LENGTH MAX_HEADER_VALUE_LENGTH
MAX_HEADER_LENGTH ARITH_EXPRESSION_LEXER AI_TIME_UNKNOWN
+ MAX_URI_LENGTH
);
%EXPORT_TAGS = (
@@ -282,6 +283,9 @@
use constant MAX_HEADER_VALUE_LENGTH => 8192;
# maximum byte length of entire header
use constant MAX_HEADER_LENGTH => 65536;
+
+# maximum byte length of any given URI
+use constant MAX_URI_LENGTH => 1024;
# used for meta rules and "if" conditionals in Conf::Parser
use constant ARITH_EXPRESSION_LEXER => qr/(?:
Modified: spamassassin/branches/3.1/lib/Mail/SpamAssassin/EvalTests.pm
URL: http://svn.apache.org/viewvc/spamassassin/branches/3.1/lib/Mail/SpamAssassin/EvalTests.pm?view=diff&rev=507101&r1=507100&r2=507101
==============================================================================
--- spamassassin/branches/3.1/lib/Mail/SpamAssassin/EvalTests.pm (original)
+++ spamassassin/branches/3.1/lib/Mail/SpamAssassin/EvalTests.pm Tue Feb 13 09:04:48 2007
@@ -3167,4 +3167,9 @@
return 0;
}
+sub check_uri_truncated {
+ my $self = shift;
+ return $self->{'uri_truncated'};
+}
+
1;
Modified: spamassassin/branches/3.1/lib/Mail/SpamAssassin/HTML.pm
URL: http://svn.apache.org/viewvc/spamassassin/branches/3.1/lib/Mail/SpamAssassin/HTML.pm?view=diff&rev=507101&r1=507100&r2=507101
==============================================================================
--- spamassassin/branches/3.1/lib/Mail/SpamAssassin/HTML.pm (original)
+++ spamassassin/branches/3.1/lib/Mail/SpamAssassin/HTML.pm Tue Feb 13 09:04:48 2007
@@ -26,6 +26,7 @@
use HTML::Parser 3.24 ();
use Mail::SpamAssassin::Logger;
+use Mail::SpamAssassin::Constants qw(:sa);
use vars qw($re_loose $re_strict $re_other @ISA @EXPORT @EXPORT_OK);
@@ -143,6 +144,7 @@
$self->put_results(anchor => $self->{anchor});
$self->put_results(uri_detail => $self->{uri});
+ $self->put_results(uri_truncated => $self->{uri_truncated});
# final results scalars
$self->put_results(image_area => $self->{image_area});
@@ -359,9 +361,7 @@
sub push_uri {
my ($self, $type, $uri) = @_;
- # URIs don't have leading/trailing whitespace ...
- $uri =~ s/^\s+//;
- $uri =~ s/\s+$//;
+ $uri = $self->canon_uri($uri);
my $target = target_uri($self->{base_href} || "", $uri);
@@ -371,6 +371,22 @@
}
}
+sub canon_uri {
+ my ($self, $uri) = @_;
+
+ # URIs don't have leading/trailing whitespace ...
+ $uri =~ s/^\s+//;
+ $uri =~ s/\s+$//;
+
+ # Make sure all the URIs are nice and short
+ if (length $uri > MAX_URI_LENGTH) {
+ $self->{'uri_truncated'} = 1;
+ $uri = substr $uri, 0, MAX_URI_LENGTH;
+ }
+
+ return $uri;
+}
+
sub html_uri {
my ($self, $tag, $attr) = @_;
@@ -397,6 +413,8 @@
}
elsif ($tag eq "base") {
if (my $uri = $attr->{href}) {
+ $uri = $self->canon_uri($uri);
+
# use <BASE HREF="URI"> to turn relative links into absolute links
# even if it is a base URI, handle like a normal URI as well
@@ -690,7 +708,7 @@
# special text delimiters - <a> and <title>
if ($tag eq "a") {
- $self->{anchor_last} = (exists $attr->{href} ? $attr->{href} : "");
+ $self->{anchor_last} = (exists $attr->{href} ? $self->canon_uri($attr->{href}) : "");
push(@{$self->{uri}->{$self->{anchor_last}}->{anchor_text}}, '');
push(@{$self->{anchor}}, '');
}
Modified: spamassassin/branches/3.1/lib/Mail/SpamAssassin/PerMsgStatus.pm
URL: http://svn.apache.org/viewvc/spamassassin/branches/3.1/lib/Mail/SpamAssassin/PerMsgStatus.pm?view=diff&rev=507101&r1=507100&r2=507101
==============================================================================
--- spamassassin/branches/3.1/lib/Mail/SpamAssassin/PerMsgStatus.pm (original)
+++ spamassassin/branches/3.1/lib/Mail/SpamAssassin/PerMsgStatus.pm Tue Feb 13 09:04:48 2007
@@ -2009,6 +2009,7 @@
# get URIs from HTML parsing
# use the metadata version since $self->{html} may not be setup
my $detail = $self->{msg}->{metadata}->{html}->{uri_detail} || { };
+ $self->{'uri_truncated'} = 1 if $self->{msg}->{metadata}->{html}->{uri_truncated};
# don't keep dereferencing ...
my $redirector_patterns = $self->{conf}->{redirector_patterns};
@@ -2143,6 +2144,14 @@
#warn("uri: got URI: $uri\n");
push @uris, $uri;
+ }
+ }
+
+ # Make sure all the URIs are nice and short
+ foreach my $uri ( @uris ) {
+ if (length $uri > MAX_URI_LENGTH) {
+ $self->{'uri_truncated'} = 1;
+ $uri = substr $uri, 0, MAX_URI_LENGTH;
}
}
Modified: spamassassin/branches/3.1/rules/20_body_tests.cf
URL: http://svn.apache.org/viewvc/spamassassin/branches/3.1/rules/20_body_tests.cf?view=diff&rev=507101&r1=507100&r2=507101
==============================================================================
--- spamassassin/branches/3.1/rules/20_body_tests.cf (original)
+++ spamassassin/branches/3.1/rules/20_body_tests.cf Tue Feb 13 09:04:48 2007
@@ -148,3 +148,6 @@
describe INTERRUPTUS Message looks to contain HTML-interrupted text
body MULTIPART_ALT_NON_TEXT eval:check_ma_non_text()
+
+body URI_TRUNCATED eval:check_uri_truncated()
+describe URI_TRUNCATED Message contained a URI which was truncated