You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spamassassin.apache.org by fe...@apache.org on 2004/01/13 19:14:38 UTC
svn commit: rev 6155 - in incubator/spamassassin/trunk/lib/Mail/SpamAssassin: . MIME
Author: felicity
Date: Tue Jan 13 10:14:35 2004
New Revision: 6155
Modified:
incubator/spamassassin/trunk/lib/Mail/SpamAssassin/MIME.pm
incubator/spamassassin/trunk/lib/Mail/SpamAssassin/MIME/Parser.pm
Log:
make sure content-type is valid, parse all multipart/* types the same
(don't make exception for alternative)
Modified: incubator/spamassassin/trunk/lib/Mail/SpamAssassin/MIME.pm
==============================================================================
--- incubator/spamassassin/trunk/lib/Mail/SpamAssassin/MIME.pm (original)
+++ incubator/spamassassin/trunk/lib/Mail/SpamAssassin/MIME.pm Tue Jan 13 10:14:35 2004
@@ -138,8 +138,9 @@
my $type = $raw_type;
$type ||= 'text/plain';
- $type =~ s/;.*$//; # strip everything after first semi-colon
- $type =~ s/[^a-zA-Z\/]//g; # strip inappropriate chars
+ $type =~ s/;.*$//; # strip everything after first semi-colon
+ $type =~ s@^([^/]+/[^/]+).*$@$1@; # only something/something ...
+ $type =~ tr!\000-\040\177-\377\042\050\051\054\056\072-\077\100\133-\135!!d; # strip inappropriate chars
my $part = {
type => $type,
Modified: incubator/spamassassin/trunk/lib/Mail/SpamAssassin/MIME/Parser.pm
==============================================================================
--- incubator/spamassassin/trunk/lib/Mail/SpamAssassin/MIME/Parser.pm (original)
+++ incubator/spamassassin/trunk/lib/Mail/SpamAssassin/MIME/Parser.pm Tue Jan 13 10:14:35 2004
@@ -89,8 +89,9 @@
unless ( $msg->{'type'} ) {
$msg->{'type'} = $msg->header('content-type');
$msg->{'type'} ||= 'text/plain';
- $msg->{'type'} =~ s/;.*$//; # strip everything after first semi-colon
- $msg->{'type'} =~ s/[^a-zA-Z\/]//g; # strip inappropriate chars
+ $msg->{'type'} =~ s/;.*$//; # strip everything after first semi-colon
+ $msg->{'type'} =~ s@^([^/]+/[^/]+).*$@$1@; # only something/something ...
+ $msg->{'type'} =~ tr!\000-\040\177-\377\042\050\051\054\056\072-\077\100\133-\135!!d; # strip inappropriate chars
}
return $msg;
@@ -116,23 +117,13 @@
dbg("Parse text/html");
$self->_parse_normal( $msg, $_msg, $boundary, $body );
}
- elsif ( $type =~ /^multipart\/alternative/i ) {
- dbg("Parse multipart/alternative");
- if ( $initial ) {
- $self->_parse_multipart_alternate( $msg, $_msg, $boundary, $body );
- }
- else {
- $self->_parse_multipart_alternate( $_msg, $_msg, $boundary, $body );
- $msg->add_body_part( $type, $_msg );
- }
- }
elsif ( $type =~ /^multipart\//i ) {
dbg("Parse $type");
if ( $initial ) {
- $self->_parse_multipart_mixed( $msg, $_msg, $boundary, $body );
+ $self->_parse_multipart( $msg, $_msg, $boundary, $body );
}
else {
- $self->_parse_multipart_mixed( $_msg, $_msg, $boundary, $body );
+ $self->_parse_multipart( $_msg, $_msg, $boundary, $body );
$msg->add_body_part( $type, $_msg );
}
}
@@ -148,97 +139,7 @@
}
}
-sub _parse_multipart_alternate {
- my($self, $msg, $_msg, $boundary, $body ) = @_;
-
- $boundary ||= '';
- dbg("m/a got boundary: $boundary");
-
- # ignore preamble per RFC 1521, unless there's no boundary ...
- if ( $boundary ) {
- my $line;
- my $tmp_line = @{$body};
- for ($line=0; $line < $tmp_line; $line++) {
- last if $body->[$line] =~ /^\-\-\Q$boundary\E$/;
- }
-
- # Found a boundary, ignore the preamble
- if ( $line < $tmp_line ) {
- splice @{$body}, 0, $line+1;
- }
-
- # Else, there's no boundary, so leave the whole part...
- }
-
- my $in_body = 0;
-
- my $header;
- my $part_array;
- my $part_msg = Mail::SpamAssassin::MIME->new();
-
- my $line_count = @{$body};
- foreach ( @{$body} ) {
- if ( --$line_count == 0 || ($boundary && /^\-\-\Q$boundary\E/) ) {
- dbg("m/a got end of section");
-
- # end of part
- my $line = $_;
-
- # per rfc 1521, the CRLF before the boundary is part of the boundary ...
- # NOTE: The CRLF preceding the encapsulation line is conceptually
- # attached to the boundary so that it is possible to have a part
- # that does not end with a CRLF (line break). Body parts that must
- # be considered to end with line breaks, therefore, must have two
- # CRLFs preceding the encapsulation line, the first of which is part
- # of the preceding body part, and the second of which is part of the
- # encapsulation boundary.
- if ($part_array) {
- chomp( $part_array->[ scalar @{$part_array} - 1 ] );
- splice @{$part_array}, -1
- if ( $part_array->[ scalar @{$part_array} - 1 ] eq '' );
-
- $self->_decode_body( $msg, $part_msg, $boundary, $part_array );
- }
-
- last if ($boundary && $line =~ /^\-\-\Q$boundary\E\-\-$/);
- $in_body = 0;
- $part_msg = Mail::SpamAssassin::MIME->new();
- undef $part_array;
- undef $header;
- next;
- }
-
- if ($in_body) {
- push ( @{$part_array}, $_ );
- }
- else {
-
- # chomp($_);
- s/\s+$//;
- if (m/^\S/) {
- if ($header) {
- my ( $key, $value ) = split ( /:\s*/, $header, 2 );
- $part_msg->header( $key, $value );
- }
- $header = $_;
- }
- elsif (/^$/) {
- if ($header) {
- my ( $key, $value ) = split ( /:\s*/, $header, 2 );
- $part_msg->header( $key, $value );
- }
- $in_body = 1;
- }
- else {
- $_ =~ s/^\s*//;
- $header .= $_;
- }
- }
- }
-
-}
-
-sub _parse_multipart_mixed {
+sub _parse_multipart {
my($self, $msg, $_msg, $boundary, $body) = @_;
$boundary ||= '';