You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spamassassin.apache.org by fe...@apache.org on 2005/02/25 00:27:43 UTC

svn commit: r155278 - in spamassassin/trunk: lib/Mail/SpamAssassin/Message/Node.pm rules/70_testing.cf t/mimeparse.t

Author: felicity
Date: Thu Feb 24 15:27:40 2005
New Revision: 155278

URL: http://svn.apache.org/viewcvs?view=rev&rev=155278
Log:
better handle mime header decoding via rfc 2047, add in a rule to catch invalid mime encoding, add rules to verify we parse mime in agreement with the rfc.

Modified:
    spamassassin/trunk/lib/Mail/SpamAssassin/Message/Node.pm
    spamassassin/trunk/rules/70_testing.cf
    spamassassin/trunk/t/mimeparse.t

Modified: spamassassin/trunk/lib/Mail/SpamAssassin/Message/Node.pm
URL: http://svn.apache.org/viewcvs/spamassassin/trunk/lib/Mail/SpamAssassin/Message/Node.pm?view=diff&r1=155277&r2=155278
==============================================================================
--- spamassassin/trunk/lib/Mail/SpamAssassin/Message/Node.pm (original)
+++ spamassassin/trunk/lib/Mail/SpamAssassin/Message/Node.pm Thu Feb 24 15:27:40 2005
@@ -480,6 +480,10 @@
   }
   elsif ( $cte eq 'Q' ) {
     # quoted printable
+
+    # the RFC states that in the encoded text, "_" is equal to "=20"
+    $data =~ s/_/=20/g;
+
     return Mail::SpamAssassin::Util::qp_decode($data);
   }
   else {
@@ -500,8 +504,13 @@
 
   return $header unless $header =~ /=\?/;
 
+  # multiple encoded sections must ignore the interim whitespace.
+  # to avoid possible FPs with (\s+(?==\?))?, look for the whole RE
+  # separated by whitespace.
+  1 while ($header =~ s/(=\?[\w_-]+\?[bqBQ]\?[^?]+\?=)\s+(=\?[\w_-]+\?[bqBQ]\?[^?]+\?=)/$1$2/g);
+
   $header =~
-    s/=\?([\w_-]+)\?([bqBQ])\?(.*?)\?=/__decode_header($1, uc($2), $3)/ge;
+    s/=\?([\w_-]+)\?([bqBQ])\?([^?]+)\?=/__decode_header($1, uc($2), $3)/ge;
 
   return $header;
 }

Modified: spamassassin/trunk/rules/70_testing.cf
URL: http://svn.apache.org/viewcvs/spamassassin/trunk/rules/70_testing.cf?view=diff&r1=155277&r2=155278
==============================================================================
--- spamassassin/trunk/rules/70_testing.cf (original)
+++ spamassassin/trunk/rules/70_testing.cf Thu Feb 24 15:27:40 2005
@@ -476,3 +476,6 @@
 replace_rules T_OBFU_VISIT
 
 endif   #ifplugin Mail::SpamAssassin::Plugin::ReplaceTags
+
+# catch non-RFC2047 compliant messages
+header T_BAD_ENC_HEADER		ALL =~ /=\?[^?\s]+\?[^?\s]\?[^?]*\s/

Modified: spamassassin/trunk/t/mimeparse.t
URL: http://svn.apache.org/viewcvs/spamassassin/trunk/t/mimeparse.t?view=diff&r1=155277&r2=155278
==============================================================================
--- spamassassin/trunk/t/mimeparse.t (original)
+++ spamassassin/trunk/t/mimeparse.t Thu Feb 24 15:27:40 2005
@@ -87,7 +87,17 @@
 	],
 );
 
-my $numtests = 0;
+# initialize SpamAssassin
+my $sa = Mail::SpamAssassin->new({
+    rules_filename => "$prefix/t/log/test_rules_copy",
+    site_rules_filename => "$prefix/t/log/test_default.cf",
+    userprefs_filename  => "$prefix/masses/spamassassin/user_prefs",
+    local_tests_only    => 1,
+    debug             => 0,
+    dont_copy_prefs   => 1,
+});
+
+my $numtests = 5;
 while ( my($k,$v) = each %files ) {
   $numtests += @{$v};
 }
@@ -96,7 +106,7 @@
 
 foreach my $k ( sort keys %files ) {
   open(INP, $k) || die "Can't find $k:$!";
-  my $mail = Mail::SpamAssassin->parse(\*INP, 1);
+  my $mail = $sa->parse(\*INP, 1);
   close(INP);
 
   my $res = join("\n",$mail->content_summary());
@@ -124,4 +134,39 @@
       shift @parts;
     }
   }
+  $mail->finish();
 }
+
+my @msg;
+my $subject;
+my $mail;
+
+@msg = ("Subject: =?ISO-8859-1?Q?a?=\n", "\n");
+$mail = $sa->parse(\@msg);
+$subject = $mail->get_header("Subject");
+$mail->finish();
+ok($subject eq "a\n");
+
+@msg = ("Subject: =?ISO-8859-1?Q?a?= b\n", "\n");
+$mail = $sa->parse(\@msg);
+$subject = $mail->get_header("Subject");
+$mail->finish();
+ok($subject eq "a b\n");
+
+@msg = ("Subject: =?ISO-8859-1?Q?a?=   \t =?ISO-8859-1?Q?b?=\n", "\n");
+$mail = $sa->parse(\@msg);
+$subject = $mail->get_header("Subject");
+$mail->finish();
+ok($subject eq "ab\n");
+
+@msg = ("Subject: =?ISO-8859-1?Q?a?=\n", " =?ISO-8859-1?Q?_b?=\n", "\n");
+$mail = $sa->parse(\@msg);
+$subject = $mail->get_header("Subject");
+$mail->finish();
+ok($subject eq "a b\n");
+
+@msg = ("Subject: =?ISO-8859-1?Q?a?=\n", " =?ISO-8859-1?Q?_b?= mem_brain =?  invalid ?=\n", "\n");
+$mail = $sa->parse(\@msg);
+$subject = $mail->get_header("Subject");
+$mail->finish();
+ok($subject eq "a b mem_brain =?  invalid ?=\n");