You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spamassassin.apache.org by fe...@apache.org on 2004/09/08 05:18:19 UTC

svn commit: rev 43492 - spamassassin/trunk/lib/Mail/SpamAssassin

Author: felicity
Date: Tue Sep  7 20:18:19 2004
New Revision: 43492

Modified:
   spamassassin/trunk/lib/Mail/SpamAssassin/Message.pm
Log:
bug 3749: new version of the last patch -- certain rfc invalid mime parts were being ignored in the internal representation of the message.  we now make empty nodes appropriately.  the patch also standarizes the mime boundary REs and removes some cruft.  added some extra comments about the process because it's a little complex.

Modified: spamassassin/trunk/lib/Mail/SpamAssassin/Message.pm
==============================================================================
--- spamassassin/trunk/lib/Mail/SpamAssassin/Message.pm	(original)
+++ spamassassin/trunk/lib/Mail/SpamAssassin/Message.pm	Tue Sep  7 20:18:19 2004
@@ -427,12 +427,17 @@
 
   dbg("parsing multipart, got boundary: ".(defined $boundary ? $boundary : ''));
 
+  # NOTE: The MIME boundary REs here are very specific to be mostly RFC 1521
+  # compliant, but also allow possible malformations to still work.  Please
+  # see Bugzilla bug 3749 for more information before making any changes!
+
   # ignore preamble per RFC 1521, unless there's no boundary ...
   if ( defined $boundary ) {
     my $line;
     my $tmp_line = @{$body};
     for ($line=0; $line < $tmp_line; $line++) {
-      if ($body->[$line] =~ /^\-\-\Q$boundary\E$/) {
+      # specifically look for an opening boundary
+      if ($body->[$line] =~ /^--\Q$boundary\E\s*$/) {
 	# Make note that we found the opening boundary
 	$self->{mime_boundary_state}->{$boundary} = 1;
         last;
@@ -454,8 +459,9 @@
 
   my $line_count = @{$body};
   foreach ( @{$body} ) {
-    # if we're on the last body line, or we find a boundary marker, deal with the mime part
-    if ( --$line_count == 0 || (defined $boundary && /^\-\-\Q$boundary\E/) ) {
+    # if we're on the last body line, or we find any boundary marker,
+    # deal with the mime part
+    if ( --$line_count == 0 || (defined $boundary && /^--\Q$boundary\E(?:--|\s*$)/) ) {
       my $line = $_; # remember the last line
 
       # per rfc 1521, the CRLF before the boundary is part of the boundary:
@@ -469,15 +475,23 @@
       if ($part_array) {
         chomp( $part_array->[-1] );  # trim the CRLF that's part of the boundary
         splice @{$part_array}, -1 if ( $part_array->[-1] eq '' ); # blank line for the boundary only ...
-
-        my($p_boundary);
-	($part_msg->{'type'}, $p_boundary) = Mail::SpamAssassin::Util::parse_content_type($part_msg->header('content-type'));
-        $p_boundary ||= $boundary;
-	dbg("found part of type ".$part_msg->{'type'}.", boundary: ".(defined $p_boundary ? $p_boundary : ''));
-        $self->parse_body( $msg, $part_msg, $p_boundary, $part_array, 0 );
+      }
+      else {
+        # Invalid parts can have no body, so fake in a blank body
+	# in that case.
+        $part_array = [];
       }
 
-      if (defined $boundary && $line =~ /^\-\-\Q${boundary}\E\-\-$/) {
+      my($p_boundary);
+      ($part_msg->{'type'}, $p_boundary) = Mail::SpamAssassin::Util::parse_content_type($part_msg->header('content-type'));
+      $p_boundary ||= $boundary;
+      dbg("found part of type ".$part_msg->{'type'}.", boundary: ".(defined $p_boundary ? $p_boundary : ''));
+      $self->parse_body( $msg, $part_msg, $p_boundary, $part_array, 0 );
+
+      # rfc 1521 says /^--boundary--$/ but MUAs have a tendancy to just
+      # require /^--boundary--/ due to malformed messages, so that'll work for
+      # us as well.
+      if (defined $boundary && $line =~ /^--\Q${boundary}\E--/) {
 	# Make a note that we've seen the end boundary
 	$self->{mime_boundary_state}->{$boundary}--;
         last;