You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spamassassin.apache.org by fe...@apache.org on 2004/10/06 22:13:18 UTC

svn commit: rev 53902 - in spamassassin/branches/3.0: lib/Mail/SpamAssassin t/data/spam

Author: felicity
Date: Wed Oct  6 13:13:17 2004
New Revision: 53902

Modified:
   spamassassin/branches/3.0/lib/Mail/SpamAssassin/Message.pm
   spamassassin/branches/3.0/lib/Mail/SpamAssassin/Util.pm
   spamassassin/branches/3.0/t/data/spam/badmime2.txt
Log:
bug 3801: MUAs allow no blank line between the end of the message header and a MIME boundary, so we should too.

Modified: spamassassin/branches/3.0/lib/Mail/SpamAssassin/Message.pm
==============================================================================
--- spamassassin/branches/3.0/lib/Mail/SpamAssassin/Message.pm	(original)
+++ spamassassin/branches/3.0/lib/Mail/SpamAssassin/Message.pm	Wed Oct  6 13:13:17 2004
@@ -122,6 +122,7 @@
 
   # Go through all the headers of the message
   my $header = '';
+  my $boundary;
   while ( my $last = shift @message ) {
     if ( $last =~ /^From\s/ ) {
 	# mbox formated mailbox
@@ -162,8 +163,26 @@
     if ( $last =~ /^[ \t]+/ ) {                    # if its a continuation
       if ($header) {
         $header .= $last;                            # fold continuations
+
+	# If we're currently dealing with a content-type header, and there's a
+	# boundary defined, use it.  Since there could be multiple
+	# content-type headers in a message, the last one will be the one we
+	# should use, so just keep updating as they come in.
+        if ($header =~ /^content-type:\s*(\S.*)$/is) {
+	  my($type,$temp_boundary) = Mail::SpamAssassin::Util::parse_content_type($1);
+	  $boundary = $temp_boundary if ($type =~ /^multipart/ && defined $temp_boundary);
+	}
+
+	# Go onto the next header line, unless the next line is a
+	# multipart mime boundary, where we know we're going to stop
+	# below, so drop through for final header processing.
+        next unless (defined $boundary && $message[0] =~ /^--\Q$boundary\E(?:--|\s*$)/);
+      }
+      else {
+	# There was no previous header and this is just "out there"?
+	# Ignore it!
+        next;
       }
-      next;
     }
 
     # Ok, there's a header here, let's go ahead and add it in.
@@ -183,6 +202,15 @@
 	  $self->{'truncated_header'} = 1;
 	}
         $self->header($key, $value);
+
+	# If we're currently dealing with a content-type header, and there's a
+	# boundary defined, use it.  Since there could be multiple
+	# content-type headers in a message, the last one will be the one we
+	# should use, so just keep updating as they come in.
+        if (lc $key eq 'content-type') {
+	  my($type,$temp_boundary) = Mail::SpamAssassin::Util::parse_content_type($value);
+	  $boundary = $temp_boundary if ($type =~ /^multipart/ && defined $temp_boundary);
+	}
       }
     }
 
@@ -190,7 +218,11 @@
     $header = $last;
 
     # Ok, we found the header/body blank line ...
-    last if ( $last =~ /^\r?$/m );
+    last if ($last =~ /^\r?$/m);
+
+    # Alternately, if a multipart mime boundary is found in the header area,
+    # aka it's malformed, exit out as well and treat it as part of the body.
+    last if (defined $boundary && $message[0] =~ /^--\Q$boundary\E(?:--|\s*$)/);
   }
 
   # Store the pristine body for later -- store as a copy since @message

Modified: spamassassin/branches/3.0/lib/Mail/SpamAssassin/Util.pm
==============================================================================
--- spamassassin/branches/3.0/lib/Mail/SpamAssassin/Util.pm	(original)
+++ spamassassin/branches/3.0/lib/Mail/SpamAssassin/Util.pm	Wed Oct  6 13:13:17 2004
@@ -642,21 +642,31 @@
   # - only an opening double quote seems to be needed
   # - non-quoted boundaries should be followed by space, ";", or end of line
   # - blank boundaries seem to not work
+  #
   my($boundary) = $ct =~ m!\bboundary\s*=\s*("[^"]+|[^\s";]+(?=[\s;]|$))!i;
 
   # remove double-quotes in boundary (should only be at start and end)
+  #
   $boundary =~ tr/"//d if defined $boundary;
 
   # Parse out the charset and name, if they exist.
+  #
   my($charset) = $ct =~ /\bcharset\s*=\s*["']?(.*?)["']?(?:;|$)/i;
   my($name) = $ct =~ /\b(?:file)?name\s*=\s*["']?(.*?)["']?(?:;|$)/i;
 
-  # Get the type out ...
-  $ct =~ s/;.*$//;                    # strip everything after first semi-colon
-  $ct =~ s@^([^/]+(?:/[^/]*)?).*$@$1@;	# only something/something ...
+  # Get the actual MIME type out ...
+  # Note: the header content may not be whitespace unfolded, so make sure the
+  # REs do /s when appropriate.
+  #
+  $ct =~ s/;.*$//s;                     # strip everything after first semi-colon
+  $ct =~ s@^([^/]+(?:/[^/]*)?).*$@$1@s;	# only something/something ...
   $ct =~ tr/\000-\040\177-\377\042\050\051\054\056\072-\077\100\133-\135//d;    # strip inappropriate chars
   $ct = lc $ct;
 
+  # Now that the header has been parsed, return the requested information.
+  # In scalar context, just the MIME type, in array context the
+  # four important data parts (type, boundary, charset, and filename).
+  #
   return wantarray ? ($ct,$boundary,$charset,$name) : $ct;
 }
 

Modified: spamassassin/branches/3.0/t/data/spam/badmime2.txt
==============================================================================
--- spamassassin/branches/3.0/t/data/spam/badmime2.txt	(original)
+++ spamassassin/branches/3.0/t/data/spam/badmime2.txt	Wed Oct  6 13:13:17 2004
@@ -13,6 +13,7 @@
 Date: Wed, 04 Feb 2004 20:22:38 +0100
 MIME-Version: 1.0
 X-MimeOLE: Produced By Microsoft MimeOLE V4.72.1962.2
+Content-Type:multipart/related; boundary =bar; other=foo
 Content-Type: multipart/alternative;
 	boundary ="foo"
 Message-ID: <2w...@isomedia.com>
@@ -20,7 +21,6 @@
 X-Status: 
 X-Keywords:                 
 X-UID: 19276
-
 --foo
 Content-Type: text/plain; charset=us-ascii
 Content-Transfer-Encoding: 8bit