You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spamassassin.apache.org by fe...@apache.org on 2004/10/06 22:13:18 UTC
svn commit: rev 53902 - in spamassassin/branches/3.0: lib/Mail/SpamAssassin t/data/spam
Author: felicity
Date: Wed Oct 6 13:13:17 2004
New Revision: 53902
Modified:
spamassassin/branches/3.0/lib/Mail/SpamAssassin/Message.pm
spamassassin/branches/3.0/lib/Mail/SpamAssassin/Util.pm
spamassassin/branches/3.0/t/data/spam/badmime2.txt
Log:
bug 3801: MUAs allow no blank line between the end of the message header and a MIME boundary, so we should too.
Modified: spamassassin/branches/3.0/lib/Mail/SpamAssassin/Message.pm
==============================================================================
--- spamassassin/branches/3.0/lib/Mail/SpamAssassin/Message.pm (original)
+++ spamassassin/branches/3.0/lib/Mail/SpamAssassin/Message.pm Wed Oct 6 13:13:17 2004
@@ -122,6 +122,7 @@
# Go through all the headers of the message
my $header = '';
+ my $boundary;
while ( my $last = shift @message ) {
if ( $last =~ /^From\s/ ) {
# mbox formated mailbox
@@ -162,8 +163,26 @@
if ( $last =~ /^[ \t]+/ ) { # if its a continuation
if ($header) {
$header .= $last; # fold continuations
+
+ # If we're currently dealing with a content-type header, and there's a
+ # boundary defined, use it. Since there could be multiple
+ # content-type headers in a message, the last one will be the one we
+ # should use, so just keep updating as they come in.
+ if ($header =~ /^content-type:\s*(\S.*)$/is) {
+ my($type,$temp_boundary) = Mail::SpamAssassin::Util::parse_content_type($1);
+ $boundary = $temp_boundary if ($type =~ /^multipart/ && defined $temp_boundary);
+ }
+
+ # Go onto the next header line, unless the next line is a
+ # multipart mime boundary, where we know we're going to stop
+ # below, so drop through for final header processing.
+ next unless (defined $boundary && $message[0] =~ /^--\Q$boundary\E(?:--|\s*$)/);
+ }
+ else {
+ # There was no previous header and this is just "out there"?
+ # Ignore it!
+ next;
}
- next;
}
# Ok, there's a header here, let's go ahead and add it in.
@@ -183,6 +202,15 @@
$self->{'truncated_header'} = 1;
}
$self->header($key, $value);
+
+ # If we're currently dealing with a content-type header, and there's a
+ # boundary defined, use it. Since there could be multiple
+ # content-type headers in a message, the last one will be the one we
+ # should use, so just keep updating as they come in.
+ if (lc $key eq 'content-type') {
+ my($type,$temp_boundary) = Mail::SpamAssassin::Util::parse_content_type($value);
+ $boundary = $temp_boundary if ($type =~ /^multipart/ && defined $temp_boundary);
+ }
}
}
@@ -190,7 +218,11 @@
$header = $last;
# Ok, we found the header/body blank line ...
- last if ( $last =~ /^\r?$/m );
+ last if ($last =~ /^\r?$/m);
+
+ # Alternately, if a multipart mime boundary is found in the header area,
+ # aka it's malformed, exit out as well and treat it as part of the body.
+ last if (defined $boundary && $message[0] =~ /^--\Q$boundary\E(?:--|\s*$)/);
}
# Store the pristine body for later -- store as a copy since @message
Modified: spamassassin/branches/3.0/lib/Mail/SpamAssassin/Util.pm
==============================================================================
--- spamassassin/branches/3.0/lib/Mail/SpamAssassin/Util.pm (original)
+++ spamassassin/branches/3.0/lib/Mail/SpamAssassin/Util.pm Wed Oct 6 13:13:17 2004
@@ -642,21 +642,31 @@
# - only an opening double quote seems to be needed
# - non-quoted boundaries should be followed by space, ";", or end of line
# - blank boundaries seem to not work
+ #
my($boundary) = $ct =~ m!\bboundary\s*=\s*("[^"]+|[^\s";]+(?=[\s;]|$))!i;
# remove double-quotes in boundary (should only be at start and end)
+ #
$boundary =~ tr/"//d if defined $boundary;
# Parse out the charset and name, if they exist.
+ #
my($charset) = $ct =~ /\bcharset\s*=\s*["']?(.*?)["']?(?:;|$)/i;
my($name) = $ct =~ /\b(?:file)?name\s*=\s*["']?(.*?)["']?(?:;|$)/i;
- # Get the type out ...
- $ct =~ s/;.*$//; # strip everything after first semi-colon
- $ct =~ s@^([^/]+(?:/[^/]*)?).*$@$1@; # only something/something ...
+ # Get the actual MIME type out ...
+ # Note: the header content may not be whitespace unfolded, so make sure the
+ # REs do /s when appropriate.
+ #
+ $ct =~ s/;.*$//s; # strip everything after first semi-colon
+ $ct =~ s@^([^/]+(?:/[^/]*)?).*$@$1@s; # only something/something ...
$ct =~ tr/\000-\040\177-\377\042\050\051\054\056\072-\077\100\133-\135//d; # strip inappropriate chars
$ct = lc $ct;
+ # Now that the header has been parsed, return the requested information.
+ # In scalar context, just the MIME type, in array context the
+ # four important data parts (type, boundary, charset, and filename).
+ #
return wantarray ? ($ct,$boundary,$charset,$name) : $ct;
}
Modified: spamassassin/branches/3.0/t/data/spam/badmime2.txt
==============================================================================
--- spamassassin/branches/3.0/t/data/spam/badmime2.txt (original)
+++ spamassassin/branches/3.0/t/data/spam/badmime2.txt Wed Oct 6 13:13:17 2004
@@ -13,6 +13,7 @@
Date: Wed, 04 Feb 2004 20:22:38 +0100
MIME-Version: 1.0
X-MimeOLE: Produced By Microsoft MimeOLE V4.72.1962.2
+Content-Type:multipart/related; boundary =bar; other=foo
Content-Type: multipart/alternative;
boundary ="foo"
Message-ID: <2w...@isomedia.com>
@@ -20,7 +21,6 @@
X-Status:
X-Keywords:
X-UID: 19276
-
--foo
Content-Type: text/plain; charset=us-ascii
Content-Transfer-Encoding: 8bit