You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spamassassin.apache.org by he...@apache.org on 2019/08/08 19:24:31 UTC
svn commit: r1864729 - in /spamassassin/trunk/lib/Mail/SpamAssassin:
Message.pm Util.pm
Author: hege
Date: Thu Aug 8 19:24:30 2019
New Revision: 1864729
URL: http://svn.apache.org/viewvc?rev=1864729&view=rev
Log:
Bug 5741 - Parsing code in Message.pm does not handle multipart/digest correctly
- parse_content_type returns new fifth element (missing ct)
Modified:
spamassassin/trunk/lib/Mail/SpamAssassin/Message.pm
spamassassin/trunk/lib/Mail/SpamAssassin/Util.pm
Modified: spamassassin/trunk/lib/Mail/SpamAssassin/Message.pm
URL: http://svn.apache.org/viewvc/spamassassin/trunk/lib/Mail/SpamAssassin/Message.pm?rev=1864729&r1=1864728&r2=1864729&view=diff
==============================================================================
--- spamassassin/trunk/lib/Mail/SpamAssassin/Message.pm (original)
+++ spamassassin/trunk/lib/Mail/SpamAssassin/Message.pm Thu Aug 8 19:24:30 2019
@@ -942,8 +942,20 @@ sub _parse_multipart {
$part_array = [];
}
- my($p_boundary);
- ($part_msg->{'type'}, $p_boundary) = Mail::SpamAssassin::Util::parse_content_type($part_msg->header('content-type'));
+ ($part_msg->{'type'}, my $p_boundary, undef, undef, my $ct_was_missing) =
+ Mail::SpamAssassin::Util::parse_content_type($part_msg->header('content-type'));
+
+ # bug 5741: if ct was missing and parent == multipart/digest, then
+ # type should be set as message/rfc822
+ if ($ct_was_missing) {
+ if ($msg->{'type'} eq 'multipart/digest') {
+ dbg("message: missing type, setting multipart/digest child as message/rfc822");
+ $part_msg->{'type'} = 'message/rfc822';
+ } else {
+ dbg("message: missing type, setting as default text/plain");
+ }
+ }
+
$p_boundary ||= $boundary;
dbg("message: found part of type ".$part_msg->{'type'}.", boundary: ".(defined $p_boundary ? $p_boundary : ''));
@@ -1054,12 +1066,18 @@ sub _parse_normal {
dbg("message: parsing normal part");
- # 0: content-type, 1: boundary, 2: charset, 3: filename
+ # 0: content-type, 1: boundary, 2: charset, 3: filename 4: ct_missing
my @ct = Mail::SpamAssassin::Util::parse_content_type($msg->header('content-type'));
# multipart sections are required to have a boundary set ... If this
# one doesn't, assume it's malformed and revert to text/plain
- $msg->{'type'} = (index($ct[0], 'multipart/') != 0 || defined $boundary) ? $ct[0] : 'text/plain';
+ # bug 5741: don't overwrite the default type assigned by _parse_multipart()
+ if (!$ct[4]) {
+ $msg->{'type'} = (index($ct[0], 'multipart/') != 0 || defined $boundary) ?
+ $ct[0] : 'text/plain'
+ } else {
+ dbg("message: missing type, setting previous multipart type: %s", $msg->{'type'});
+ }
$msg->{'charset'} = $ct[2];
# attempt to figure out a name for this attachment if there is one ...
Modified: spamassassin/trunk/lib/Mail/SpamAssassin/Util.pm
URL: http://svn.apache.org/viewvc/spamassassin/trunk/lib/Mail/SpamAssassin/Util.pm?rev=1864729&r1=1864728&r2=1864729&view=diff
==============================================================================
--- spamassassin/trunk/lib/Mail/SpamAssassin/Util.pm (original)
+++ spamassassin/trunk/lib/Mail/SpamAssassin/Util.pm Thu Aug 8 19:24:30 2019
@@ -1147,7 +1147,8 @@ sub parse_content_type {
# but it happens), MUAs seem to take the last one and so that's what we
# should do here.
#
- my $ct = $_[-1] || 'text/plain; charset=us-ascii';
+ my $missing; # flag missing content-type, even though we force it text/plain
+ my $ct = $_[-1] || do { $missing = 1; 'text/plain; charset=us-ascii' };
# This could be made a bit more rigid ...
# the actual ABNF, BTW (RFC 1521, section 7.2.1):
@@ -1208,6 +1209,7 @@ sub parse_content_type {
# bug 4298: If at this point we don't have a content-type, assume text/plain;
# also, bug 5399: if the content-type *starts* with "text", and isn't in a
# list of known bad/non-plain formats, do likewise.
+ $missing = 1 if !$ct; # flag missing content-type
if (!$ct ||
($ct =~ /^text\b/ && $ct !~ /^text\/(?:x-vcard|calendar|html)$/))
{
@@ -1220,8 +1222,10 @@ sub parse_content_type {
# Now that the header has been parsed, return the requested information.
# In scalar context, just the MIME type, in array context the
# four important data parts (type, boundary, charset, and filename).
+ # Added fifth array member $missing, if caller wants to know ct was
+ # missing/invalid, even though we forced it as text/plain.
#
- return wantarray ? ($ct,$boundary,$charset,$name) : $ct;
+ return wantarray ? ($ct,$boundary,$charset,$name,$missing) : $ct;
}
###########################################################################