You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spamassassin.apache.org by he...@apache.org on 2019/08/08 19:24:31 UTC

svn commit: r1864729 - in /spamassassin/trunk/lib/Mail/SpamAssassin: Message.pm Util.pm

Author: hege
Date: Thu Aug  8 19:24:30 2019
New Revision: 1864729

URL: http://svn.apache.org/viewvc?rev=1864729&view=rev
Log:
Bug 5741 - Parsing code in Message.pm does not handle multipart/digest correctly
- parse_content_type returns new fifth element (missing ct)

Modified:
    spamassassin/trunk/lib/Mail/SpamAssassin/Message.pm
    spamassassin/trunk/lib/Mail/SpamAssassin/Util.pm

Modified: spamassassin/trunk/lib/Mail/SpamAssassin/Message.pm
URL: http://svn.apache.org/viewvc/spamassassin/trunk/lib/Mail/SpamAssassin/Message.pm?rev=1864729&r1=1864728&r2=1864729&view=diff
==============================================================================
--- spamassassin/trunk/lib/Mail/SpamAssassin/Message.pm (original)
+++ spamassassin/trunk/lib/Mail/SpamAssassin/Message.pm Thu Aug  8 19:24:30 2019
@@ -942,8 +942,20 @@ sub _parse_multipart {
         $part_array = [];
       }
 
-      my($p_boundary);
-      ($part_msg->{'type'}, $p_boundary) = Mail::SpamAssassin::Util::parse_content_type($part_msg->header('content-type'));
+      ($part_msg->{'type'}, my $p_boundary, undef, undef, my $ct_was_missing) =
+          Mail::SpamAssassin::Util::parse_content_type($part_msg->header('content-type'));
+
+      # bug 5741: if ct was missing and parent == multipart/digest, then
+      # type should be set as message/rfc822
+      if ($ct_was_missing) {
+        if ($msg->{'type'} eq 'multipart/digest') {
+          dbg("message: missing type, setting multipart/digest child as message/rfc822");
+          $part_msg->{'type'} = 'message/rfc822';
+        } else {
+          dbg("message: missing type, setting as default text/plain");
+        }
+      }
+
       $p_boundary ||= $boundary;
       dbg("message: found part of type ".$part_msg->{'type'}.", boundary: ".(defined $p_boundary ? $p_boundary : ''));
 
@@ -1054,12 +1066,18 @@ sub _parse_normal {
 
   dbg("message: parsing normal part");
 
-  # 0: content-type, 1: boundary, 2: charset, 3: filename
+  # 0: content-type, 1: boundary, 2: charset, 3: filename 4: ct_missing
   my @ct = Mail::SpamAssassin::Util::parse_content_type($msg->header('content-type'));
 
   # multipart sections are required to have a boundary set ...  If this
   # one doesn't, assume it's malformed and revert to text/plain
-  $msg->{'type'} = (index($ct[0], 'multipart/') != 0 || defined $boundary) ? $ct[0] : 'text/plain';
+  # bug 5741: don't overwrite the default type assigned by _parse_multipart()
+  if (!$ct[4]) {
+    $msg->{'type'} = (index($ct[0], 'multipart/') != 0 || defined $boundary) ?
+      $ct[0] : 'text/plain'
+  } else {
+    dbg("message: missing type, setting previous multipart type: %s", $msg->{'type'});
+  }
   $msg->{'charset'} = $ct[2];
 
   # attempt to figure out a name for this attachment if there is one ...

Modified: spamassassin/trunk/lib/Mail/SpamAssassin/Util.pm
URL: http://svn.apache.org/viewvc/spamassassin/trunk/lib/Mail/SpamAssassin/Util.pm?rev=1864729&r1=1864728&r2=1864729&view=diff
==============================================================================
--- spamassassin/trunk/lib/Mail/SpamAssassin/Util.pm (original)
+++ spamassassin/trunk/lib/Mail/SpamAssassin/Util.pm Thu Aug  8 19:24:30 2019
@@ -1147,7 +1147,8 @@ sub parse_content_type {
   # but it happens), MUAs seem to take the last one and so that's what we
   # should do here.
   #
-  my $ct = $_[-1] || 'text/plain; charset=us-ascii';
+  my $missing; # flag missing content-type, even though we force it text/plain
+  my $ct = $_[-1] || do { $missing = 1; 'text/plain; charset=us-ascii' };
 
   # This could be made a bit more rigid ...
   # the actual ABNF, BTW (RFC 1521, section 7.2.1):
@@ -1208,6 +1209,7 @@ sub parse_content_type {
   # bug 4298: If at this point we don't have a content-type, assume text/plain;
   # also, bug 5399: if the content-type *starts* with "text", and isn't in a 
   # list of known bad/non-plain formats, do likewise.
+  $missing = 1 if !$ct; # flag missing content-type
   if (!$ct ||
         ($ct =~ /^text\b/ && $ct !~ /^text\/(?:x-vcard|calendar|html)$/))
   {
@@ -1220,8 +1222,10 @@ sub parse_content_type {
   # Now that the header has been parsed, return the requested information.
   # In scalar context, just the MIME type, in array context the
   # four important data parts (type, boundary, charset, and filename).
+  # Added fifth array member $missing, if caller wants to know ct was
+  # missing/invalid, even though we forced it as text/plain.
   #
-  return wantarray ? ($ct,$boundary,$charset,$name) : $ct;
+  return wantarray ? ($ct,$boundary,$charset,$name,$missing) : $ct;
 }
 
 ###########################################################################