You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spamassassin.apache.org by fe...@apache.org on 2004/08/04 06:13:21 UTC
svn commit: rev 35665 - spamassassin/branches/b2_6_0/lib/Mail/SpamAssassin
Author: felicity
Date: Tue Aug 3 21:13:21 2004
New Revision: 35665
Modified:
spamassassin/branches/b2_6_0/lib/Mail/SpamAssassin/Bayes.pm
spamassassin/branches/b2_6_0/lib/Mail/SpamAssassin/PerMsgStatus.pm
Log:
patch to truncate long headers. fixes some performance issues on certain messages.
Modified: spamassassin/branches/b2_6_0/lib/Mail/SpamAssassin/Bayes.pm
==============================================================================
--- spamassassin/branches/b2_6_0/lib/Mail/SpamAssassin/Bayes.pm (original)
+++ spamassassin/branches/b2_6_0/lib/Mail/SpamAssassin/Bayes.pm Tue Aug 3 21:13:21 2004
@@ -137,6 +137,15 @@
use constant HDRS_TOKENIZE_LONG_TOKENS_AS_SKIPS => 1;
use constant BODY_TOKENIZE_LONG_TOKENS_AS_SKIPS => 1;
+# maximum byte length of a header key
+use constant MAX_HEADER_KEY_LENGTH => 256;
+
+# maximum byte length of a header value including continued lines
+use constant MAX_HEADER_VALUE_LENGTH => 8192;
+
+# maximum byte length of entire header
+use constant MAX_HEADER_LENGTH => 65536;
+
# We store header-mined tokens in the db with a "HHeaderName:val" format.
# some headers may contain lots of gibberish tokens, so allow a little basic
# compression by mapping the header name at least here. these are the headers
@@ -432,7 +441,28 @@
sub tokenize_headers {
my ($self, $msg) = @_;
- my $hdrs = $msg->get_all_headers();
+ my @hdrs = ();
+ my $length = 0;
+
+ my $hdr;
+ foreach $hdr ($msg->get_all_headers()) {
+ last if ($length + length($hdr) > MAX_HEADER_LENGTH);
+
+ my($key, $value) = split(/:/, $hdr, 2);
+
+ # limit the length of the pairs we store
+ if (length($key) > MAX_HEADER_KEY_LENGTH) {
+ $key = substr($key, 0, MAX_HEADER_KEY_LENGTH);
+ }
+ if (length($value) > MAX_HEADER_VALUE_LENGTH) {
+ $value = substr($value, 0, MAX_HEADER_VALUE_LENGTH);
+ }
+ push(@hdrs, "$key:$value");
+ $length += length "$key:$value";
+ }
+
+ my $hdrs = join('', @hdrs);
+ undef @hdrs;
# jm: do not learn additional metadata (X-Languages, X-Relays-Untrusted)
# until we can generate that while running sa-learn. TODO
Modified: spamassassin/branches/b2_6_0/lib/Mail/SpamAssassin/PerMsgStatus.pm
==============================================================================
--- spamassassin/branches/b2_6_0/lib/Mail/SpamAssassin/PerMsgStatus.pm (original)
+++ spamassassin/branches/b2_6_0/lib/Mail/SpamAssassin/PerMsgStatus.pm Tue Aug 3 21:13:21 2004
@@ -48,6 +48,16 @@
use constant MAX_BODY_LINE_LENGTH => 2048;
+# maximum byte length of a header key
+use constant MAX_HEADER_KEY_LENGTH => 256;
+
+# maximum byte length of a header value including continued lines
+use constant MAX_HEADER_VALUE_LENGTH => 8192;
+
+# maximum byte length of entire header
+use constant MAX_HEADER_LENGTH => 65536;
+
+
use vars qw{
@ISA $base64alphabet
};
@@ -1314,7 +1324,26 @@
my $getraw = ($hdrname eq 'ALL' || $hdrname =~ s/:raw$//);
if ($hdrname eq 'ALL') {
- $_ = $self->{msg}->get_all_headers();
+ my @hdrs = ();
+ my $length = 0;
+
+ my $hdr;
+ foreach $hdr ($self->{msg}->get_all_headers()) {
+ last if ($length + length($hdr) > MAX_HEADER_LENGTH);
+
+ my($key, $value) = split(/:/, $hdr, 2);
+ # limit the length of the pairs we store
+ if (length($key) > MAX_HEADER_KEY_LENGTH) {
+ $key = substr($key, 0, MAX_HEADER_KEY_LENGTH);
+ }
+ if (length($value) > MAX_HEADER_VALUE_LENGTH) {
+ $value = substr($value, 0, MAX_HEADER_VALUE_LENGTH);
+ }
+ push(@hdrs, "$key:$value");
+ $length += length "$key:$value";
+ }
+
+ $_ = join('', @hdrs);
}
# ToCc: the combined recipients list
elsif ($hdrname eq 'ToCc') {
@@ -1324,7 +1353,14 @@
$_ .= ", " if /\S/;
}
$_ .= join ("\n", $self->{msg}->get_header ('Cc'));
- undef $_ if $_ eq '';
+ if ($_ eq '') {
+ undef $_;
+ }
+ else {
+ if (length($_) > MAX_HEADER_VALUE_LENGTH) {
+ $_ = substr($_, 0, MAX_HEADER_VALUE_LENGTH);
+ }
+ }
}
# MESSAGEID: handle lists which move the real message-id to another
# header for resending.
@@ -1334,12 +1370,18 @@
$self->{msg}->get_header ('Resent-Message-Id'),
$self->{msg}->get_header ('X-Original-Message-ID'), # bug 2122
$self->{msg}->get_header ('Message-Id'));
+ if (length($_) > MAX_HEADER_VALUE_LENGTH) {
+ $_ = substr($_, 0, MAX_HEADER_VALUE_LENGTH);
+ }
}
# a conventional header
else {
my @hdrs = $self->{msg}->get_header ($hdrname);
if ($#hdrs >= 0) {
$_ = join ("\n", @hdrs);
+ if (length($_) > MAX_HEADER_VALUE_LENGTH) {
+ $_ = substr($_, 0, MAX_HEADER_VALUE_LENGTH);
+ }
}
else {
$_ = undef;