You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spamassassin.apache.org by fe...@apache.org on 2005/02/25 00:27:43 UTC
svn commit: r155278 - in spamassassin/trunk:
lib/Mail/SpamAssassin/Message/Node.pm rules/70_testing.cf t/mimeparse.t
Author: felicity
Date: Thu Feb 24 15:27:40 2005
New Revision: 155278
URL: http://svn.apache.org/viewcvs?view=rev&rev=155278
Log:
better handle mime header decoding via rfc 2047, add in a rule to catch invalid mime encoding, add rules to verify we parse mime in agreement with the rfc.
Modified:
spamassassin/trunk/lib/Mail/SpamAssassin/Message/Node.pm
spamassassin/trunk/rules/70_testing.cf
spamassassin/trunk/t/mimeparse.t
Modified: spamassassin/trunk/lib/Mail/SpamAssassin/Message/Node.pm
URL: http://svn.apache.org/viewcvs/spamassassin/trunk/lib/Mail/SpamAssassin/Message/Node.pm?view=diff&r1=155277&r2=155278
==============================================================================
--- spamassassin/trunk/lib/Mail/SpamAssassin/Message/Node.pm (original)
+++ spamassassin/trunk/lib/Mail/SpamAssassin/Message/Node.pm Thu Feb 24 15:27:40 2005
@@ -480,6 +480,10 @@
}
elsif ( $cte eq 'Q' ) {
# quoted printable
+
+ # the RFC states that in the encoded text, "_" is equal to "=20"
+ $data =~ s/_/=20/g;
+
return Mail::SpamAssassin::Util::qp_decode($data);
}
else {
@@ -500,8 +504,13 @@
return $header unless $header =~ /=\?/;
+ # multiple encoded sections must ignore the interim whitespace.
+ # to avoid possible FPs with (\s+(?==\?))?, look for the whole RE
+ # separated by whitespace.
+ 1 while ($header =~ s/(=\?[\w_-]+\?[bqBQ]\?[^?]+\?=)\s+(=\?[\w_-]+\?[bqBQ]\?[^?]+\?=)/$1$2/g);
+
$header =~
- s/=\?([\w_-]+)\?([bqBQ])\?(.*?)\?=/__decode_header($1, uc($2), $3)/ge;
+ s/=\?([\w_-]+)\?([bqBQ])\?([^?]+)\?=/__decode_header($1, uc($2), $3)/ge;
return $header;
}
Modified: spamassassin/trunk/rules/70_testing.cf
URL: http://svn.apache.org/viewcvs/spamassassin/trunk/rules/70_testing.cf?view=diff&r1=155277&r2=155278
==============================================================================
--- spamassassin/trunk/rules/70_testing.cf (original)
+++ spamassassin/trunk/rules/70_testing.cf Thu Feb 24 15:27:40 2005
@@ -476,3 +476,6 @@
replace_rules T_OBFU_VISIT
endif #ifplugin Mail::SpamAssassin::Plugin::ReplaceTags
+
+# catch non-RFC2047 compliant messages
+header T_BAD_ENC_HEADER ALL =~ /=\?[^?\s]+\?[^?\s]\?[^?]*\s/
Modified: spamassassin/trunk/t/mimeparse.t
URL: http://svn.apache.org/viewcvs/spamassassin/trunk/t/mimeparse.t?view=diff&r1=155277&r2=155278
==============================================================================
--- spamassassin/trunk/t/mimeparse.t (original)
+++ spamassassin/trunk/t/mimeparse.t Thu Feb 24 15:27:40 2005
@@ -87,7 +87,17 @@
],
);
-my $numtests = 0;
+# initialize SpamAssassin
+my $sa = Mail::SpamAssassin->new({
+ rules_filename => "$prefix/t/log/test_rules_copy",
+ site_rules_filename => "$prefix/t/log/test_default.cf",
+ userprefs_filename => "$prefix/masses/spamassassin/user_prefs",
+ local_tests_only => 1,
+ debug => 0,
+ dont_copy_prefs => 1,
+});
+
+my $numtests = 5;
while ( my($k,$v) = each %files ) {
$numtests += @{$v};
}
@@ -96,7 +106,7 @@
foreach my $k ( sort keys %files ) {
open(INP, $k) || die "Can't find $k:$!";
- my $mail = Mail::SpamAssassin->parse(\*INP, 1);
+ my $mail = $sa->parse(\*INP, 1);
close(INP);
my $res = join("\n",$mail->content_summary());
@@ -124,4 +134,39 @@
shift @parts;
}
}
+ $mail->finish();
}
+
+my @msg;
+my $subject;
+my $mail;
+
+@msg = ("Subject: =?ISO-8859-1?Q?a?=\n", "\n");
+$mail = $sa->parse(\@msg);
+$subject = $mail->get_header("Subject");
+$mail->finish();
+ok($subject eq "a\n");
+
+@msg = ("Subject: =?ISO-8859-1?Q?a?= b\n", "\n");
+$mail = $sa->parse(\@msg);
+$subject = $mail->get_header("Subject");
+$mail->finish();
+ok($subject eq "a b\n");
+
+@msg = ("Subject: =?ISO-8859-1?Q?a?= \t =?ISO-8859-1?Q?b?=\n", "\n");
+$mail = $sa->parse(\@msg);
+$subject = $mail->get_header("Subject");
+$mail->finish();
+ok($subject eq "ab\n");
+
+@msg = ("Subject: =?ISO-8859-1?Q?a?=\n", " =?ISO-8859-1?Q?_b?=\n", "\n");
+$mail = $sa->parse(\@msg);
+$subject = $mail->get_header("Subject");
+$mail->finish();
+ok($subject eq "a b\n");
+
+@msg = ("Subject: =?ISO-8859-1?Q?a?=\n", " =?ISO-8859-1?Q?_b?= mem_brain =? invalid ?=\n", "\n");
+$mail = $sa->parse(\@msg);
+$subject = $mail->get_header("Subject");
+$mail->finish();
+ok($subject eq "a b mem_brain =? invalid ?=\n");