You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spamassassin.apache.org by he...@apache.org on 2021/04/30 18:17:51 UTC

svn commit: r1889337 [2/2] - in /spamassassin/trunk: ./ lib/Mail/ lib/Mail/SpamAssassin/ lib/Mail/SpamAssassin/Plugin/ lib/Mail/SpamAssassin/Util/ t/ t/data/ t/data/nice/ t/data/spam/

Modified: spamassassin/trunk/t/get_headers.t
URL: http://svn.apache.org/viewvc/spamassassin/trunk/t/get_headers.t?rev=1889337&r1=1889336&r2=1889337&view=diff
==============================================================================
--- spamassassin/trunk/t/get_headers.t [iso-8859-1] (original)
+++ spamassassin/trunk/t/get_headers.t [UTF-8] Fri Apr 30 18:17:51 2021
@@ -1,31 +1,79 @@
 #!/usr/bin/perl -w -T
 
+###
+### UTF-8 CONTENT, edit with UTF-8 locale/editor
+###
+
 use strict;
 
 use lib '.'; use lib 't';
 use SATest; sa_t_init("get_headers");
+use Test::More;
+
 use Mail::SpamAssassin;
 
-use Test::More tests => 22;
+use constant HAS_EMAIL_ADDRESS_XS => eval { require Email::Address::XS; };
+
+my $tests = 52;
+$tests *= 2 if (HAS_EMAIL_ADDRESS_XS);
+plan tests => $tests;
 
 ##############################################
 
 # initialize SpamAssassin
-my $sa = create_saobj({'dont_copy_prefs' => 1});
-$sa->init(0);
-my $mail = $sa->parse( get_raw_headers()."\n\nBlah\n" );
-my $msg = Mail::SpamAssassin::PerMsgStatus->new($sa, $mail);
+my ($sa,$mail,$pms);
+sub new_saobj {
+  $pms->finish() if $pms;
+  $mail->finish() if $mail;
+  $sa->finish() if $sa;
+  undef $sa; undef $mail; undef $pms;
+  $sa = create_saobj({'dont_copy_prefs' => 1});
+  $sa->init(0);
+  $mail = $sa->parse( get_raw_headers()."\n\nBlah\n" );
+  $pms = Mail::SpamAssassin::PerMsgStatus->new($sa, $mail);
+}
 
 sub try {
   my ($try, $expect) = @_;
-  my $result = $msg->get($try);
 
-  # undef might be valid in some situations, so deal with it...
+  my $result;
+  my @results = $pms->get($try);
+  if (!@results) {
+    $result = undef;
+  } else {
+    $result = join("\\n", @results);
+  }
+
+  my $parser = $Mail::SpamAssassin::Util::header_address_parser == 1 ?
+    'internal' : 'Email::Address::XS';
+
+  # Whitelist some differences
+  if ($parser eq 'Email::Address::XS') {
+    # try: Email::Address::XS: 'From5:addr' failed! expect: 'noreply@foobar.com\ninfo=foobar.com@mlsend.com' got: 'noreply@foobar.com'
+    return 1 if $try eq 'From5:addr' && $result eq 'noreply@foobar.com';
+    # try: Email::Address::XS: 'From5:name' failed! expect: undef got: '=?UTF-8?Q? Foobar _'
+    return 1 if $try eq 'From5:name' && $result eq '=?UTF-8?Q? Foobar _';
+    # try: Email::Address::XS: 'From9:name' failed! expect: 'Mr\nSpam' got: 'Mr, Spam <sp...@blah.com>\nSpam'
+    return 1 if $try eq 'From9:name' && $result eq 'Mr, Spam <sp...@blah.com>\nSpam';
+  }
+
   if (!defined $expect) {
-    return !defined $result;
+    if (defined $result) {
+      my $lr=$result;$lr=~s/\t/\\t/gs;$lr =~s/\n/\\n/gs;
+      warn "try: $parser: '$try' failed! expect: undef got: '$lr'\n";
+      return 0;
+    } else {
+      return 1;
+    }
   }
   elsif (!defined $result) {
-    return 0;
+    if (defined $expect) {
+      my $le=$expect;$le=~s/\t/\\t/gs;$le =~s/\n/\\n/gs;
+      warn "try: $parser: '$try' failed! expect: '$le' got: undef\n";
+      return 0;
+    } else {
+      return 1;
+    }
   }
 
   if ($expect eq $result) {
@@ -33,7 +81,7 @@ sub try {
   } else {
     my $le=$expect;$le=~s/\t/\\t/gs;$le =~s/\n/\\n/gs;
     my $lr=$result;$lr=~s/\t/\\t/gs;$lr =~s/\n/\\n/gs;
-    warn "try: '$try' failed! expect: '$le' got: '$lr'\n";
+    warn "try: $parser: '$try' failed! expect: '$le' got: '$lr'\n";
     return 0;
   }
 }
@@ -60,25 +108,55 @@ Hdr1:    foo
   
 To_bug5201_a: =?ISO-2022-JP?B?GyRCQjw+ZRsoQiAbJEI1V0JlGyhC?= <jm...@foo>
 To_bug5201_b: =?ISO-2022-JP?B?GyRCNiVHTyM3JSQlcyU1JSQlQCE8PnBKcxsoQg==?= <jm...@foo>
-To_bug5201_c: "joe+<blah>@example.com"
+To_bug5201_c: "joe+foobar@example.com"
+From1: Foo Blah
+From2: <jm...@foo>, "'Foo Blah'" <jm...@bar>, =?utf-8?Q?'Baz Bl=C3=A4h'?= <ba...@blaeh>
+From3: =?utf-8?Q?"B=C3=A4z=C3=A4=C3=A4_=28baz=40blah.?= =?utf-8?Q?com=29"?= <jm...@foo>
+From4: "Mr., Spam"<spam@(comment)blah.com(comment)>(comment)
+From5: =?UTF-8?Q?"Foobar"_<no...@foobar.com>?=, =?utf-8?Q?"Foobar"?=<in...@mlsend.com>
+X-Note: From6 is really \\\" - escaped perl backslashes..
+From6: "Mr. <Spam> (foo@bar)\\\\\\"" <sp...@blah.com> (comment)
+From7: "Mr. <Spam> \(foo\@bar)\\\\\\\\\\"" <sp...@blah.com> (comment)
+From8: "Foo Blah \(via Foobar\)" <no...@foobar.com>, "Foo Blah (via Foobar)" <no...@foobar.com>
+From9: Mr, Spam <sp...@blah.com>
 };
 }
 
 ##############################################
 
+
+for (1 .. 2) { ## parser loop
+
+if ($_ == 2 && !HAS_EMAIL_ADDRESS_XS) {
+  warn "Not running Email::Address::XS tests, module missing\n";
+  next;
+}
+
+$Mail::SpamAssassin::Util::header_address_parser = $_;
+new_saobj();
+
 ok(try('To1:addr', 'jm@foo'));
+ok(try('To1:name', undef));
 ok(try('To2:addr', 'jm@foo'));
+ok(try('To2:name', undef));
 ok(try('To3:addr', 'jm@foo'));
-ok(try('To4:addr', 'jm@foo'));
-ok(try('To5:addr', 'jm@foo'));
+ok(try('To3:name', 'Foo Blah'));
+ok(try('To4:addr', 'jm@foo\njm@bar'));
+ok(try('To4:name', undef));
+ok(try('To5:addr', 'jm@foo\njm@bar'));
+ok(try('To5:name', 'Foo Blah'));
 ok(try('To6:addr', 'jm@foo'));
+ok(try('To6:name', 'Foo Blah'));
 ok(try('To7:addr', 'jm@foo'));
+ok(try('To7:name', 'Foo Blah'));
 ok(try('To8:addr', 'jm@foo'));
+ok(try('To8:name', 'Foo Blah'));
 ok(try('To9:addr', 'jm@foo'));
+ok(try('To9:name', '_$B!z8=6b$=$N>l$GEv$?$j!*!zEv_(B_$B$?$k!*!)$/$8!z7|>^%%s%P!<!z_(B'));
 ok(try('To10:addr', '"Another User"@foo'));
 ok(try('To10:name', 'Some User'));
 ok(try('To11:addr', '"Some User"@foo'));
-ok(try('To11:name', ''));
+ok(try('To11:name', undef));
 ok(try('To12:addr', 'jm@foo'));
 ok(try('To12:name', 'Some User <jm...@bar>'));
 ok(try('To13:addr', 'jm@foo'));
@@ -86,6 +164,33 @@ ok(try('To13:name', 'Some User <"Some Us
 ok(try('Hdr1', "foo   bar baz\n"));
 ok(try('Hdr1:raw', "    foo  \n  bar\n\tbaz \n  \n"));
 ok(try('To_bug5201_a:addr', 'jm@foo'));
+ok(try('To_bug5201_a:name', '村上 久代'));
 ok(try('To_bug5201_b:addr', 'jm@foo'));
-ok(try('To_bug5201_c:addr', '"joe+<blah>@example.com"'));
+ok(try('To_bug5201_b:name', '競馬7インサイダー情報'));
+ok(try('To_bug5201_c:addr', 'joe+foobar@example.com'));
+ok(try('To_bug5201_c:name', undef));
+ok(try('From1:addr', undef));
+ok(try('From1:name', 'Foo Blah'));
+ok(try('From2:addr', 'jm@foo\njm@bar\nbaz@blaeh'));
+ok(try('From2:name', 'Foo Blah\nBaz Bläh'));
+ok(try('From3:addr', 'jm@foo'));
+ok(try('From3:name', 'Bäzää (baz@blah.com)'));
+ok(try('From4:addr', 'spam@blah.com'));
+ok(try('From4:name', 'Mr., Spam'));
+ok(try('From5:addr', 'noreply@foobar.com\ninfo=foobar.com@mlsend.com'));
+ok(try('From5:name', undef));
+ok(try('From6:addr', 'spam@blah.com'));
+ok(try('From6:name', 'Mr. <Spam> (foo@bar)"'));
+ok(try('From7:addr', 'spam@blah.com'));
+ok(try('From7:name', 'Mr. <Spam> (foo@bar)"'));
+ok(try('From8:addr', 'no-reply@foobar.com\nno-reply@foobar.com'));
+ok(try('From8:name', 'Foo Blah (via Foobar)\nFoo Blah (via Foobar)'));
+ok(try('From9:addr', 'spam@blah.com'));
+ok(try('From9:name', 'Mr\nSpam'));
+
+} ## end parser loop
+
+$pms->finish() if $pms;
+$mail->finish() if $mail;
+$sa->finish() if $sa;
 

Propchange: spamassassin/trunk/t/get_headers.t
------------------------------------------------------------------------------
    svn:mime-type = text/plain; charset=UTF-8

Modified: spamassassin/trunk/t/header_utf8.t
URL: http://svn.apache.org/viewvc/spamassassin/trunk/t/header_utf8.t?rev=1889337&r1=1889336&r2=1889337&view=diff
==============================================================================
--- spamassassin/trunk/t/header_utf8.t [iso-8859-1] (original)
+++ spamassassin/trunk/t/header_utf8.t [UTF-8] Fri Apr 30 18:17:51 2021
@@ -1,8 +1,14 @@
 #!/usr/bin/perl -T
 
+###
+### UTF-8 CONTENT, edit with UTF-8 locale/editor
+###
+
 use lib '.'; use lib 't';
 use SATest; sa_t_init("header_utf8.t");
 
+use constant HAS_EMAIL_ADDRESS_XS => eval { require Email::Address::XS; };
+
 my $have_libidn;
 BEGIN {
   eval { require Net::LibIDN } and do { $have_libidn = 1 };
@@ -14,7 +20,10 @@ if (!$have_libidn) {
 
 use Test::More;
 plan skip_all => "Test requires Perl 5.8" unless $] > 5.008; # TODO: SA already doesn't support anything below 5.8.1
-plan tests => 156;
+
+my $tests = 156;
+$tests = 305 if (HAS_EMAIL_ADDRESS_XS);
+plan tests => $tests;
 
 # ---------------------------------------------------------------------------
 
@@ -42,7 +51,7 @@ plan tests => 156;
 );
 
 %mypatterns_utf8 = (  # as it appears in a report body
-  q{/(?m)^ 0\.0 LT_ANY_CHARS \s*En-tête contient caractères$/} => 'LT_ANY_CHARS utf8',
+  q{/(?m)^ 0\.0 LT_ANY_CHARS \s*En-tête contient caractères$/} => 'LT_ANY_CHARS utf8',
 );
 
 %mypatterns_mime_qp = (  # as it appears in a mail header section
@@ -65,42 +74,42 @@ plan tests => 156;
 
 my $myrules = <<'END';
   add_header all  AuthorDomain _AUTHORDOMAIN_
-  blacklist_from  Marilù.Gioffré@esempio-università.it
+  blacklist_from  Marilù.Gioffré@esempio-università.it
   header LT_UTF8SMTP_ANY  Received =~ /\bwith\s*UTF8SMTPS?A?\b/mi
   score  LT_UTF8SMTP_ANY  -0.1
-  header LT_RPATH   Return-Path:addr =~ /^Marilù\.Gioffré\@esempio-università\.it\z/
+  header LT_RPATH   Return-Path:addr =~ /^Marilù\.Gioffré\@esempio-università\.it\z/
   score  LT_RPATH     0.01
-  header LT_ENVFROM EnvelopeFrom =~ /^Marilù\.Gioffré\@esempio-università\.it\z/
+  header LT_ENVFROM EnvelopeFrom =~ /^Marilù\.Gioffré\@esempio-università\.it\z/
   score  LT_ENVFROM   0.01
-  header LT_FROM      From =~ /^Marilù Gioffré ♥ <Marilù\.Gioffré\@esempio-università\.it>$/m
+  header LT_FROM      From =~ /^Marilù Gioffré ♥ <Marilù\.Gioffré\@esempio-università\.it>$/m
   score  LT_FROM      0.01
-  header LT_FROM_ADDR From:addr =~ /^Marilù\.Gioffré\@esempio-università\.it\z/
+  header LT_FROM_ADDR From:addr =~ /^Marilù\.Gioffré\@esempio-università\.it\z/
   score  LT_FROM_ADDR 0.01
-  header LT_FROM_NAME From:name =~ /^Marilù Gioffré ♥\z/
+  header LT_FROM_NAME From:name =~ /^Marilù Gioffré ♥\z/
   score  LT_FROM_NAME 0.01
-  header LT_FROM_RAW  From:raw  =~ /^\s*=\?ISO-8859-1\?Q\?Maril=F9\?= Gioffré ♥ <Marilù\.Gioffré\@esempio-università\.it>$/m
+  header LT_FROM_RAW  From:raw  =~ /^\s*=\?ISO-8859-1\?Q\?Maril=F9\?= Gioffré ♥ <Marilù\.Gioffré\@esempio-università\.it>$/m
   score  LT_FROM_RAW  0.01
   header LT_AUTH_DOM  X-AuthorDomain =~ /^xn--esempio-universit-4ob\.it\z/
   score  LT_AUTH_DOM  0.01
-  header LT_TO_ADDR   To:addr =~ /^Dörte\@Sörensen\.example\.com\z/
+  header LT_TO_ADDR   To:addr =~ /^Dörte\@Sörensen\.example\.com\z/
   score  LT_TO_ADDR   0.01
-  header LT_TO_NAME   To:name =~ /^Dörte Å\. Sörensen, Jr\./
+  header LT_TO_NAME   To:name =~ /^Dörte Å\. Sörensen, Jr\./
   score  LT_TO_NAME   0.01
-  header LT_CC_ADDR   Cc:addr =~ /^θσερ\@εχαμπλε\.ψομ\z/
+  header LT_CC_ADDR   Cc:addr =~ /^θσερ\@εχαμπλε\.ψομ\z/
   score  LT_CC_ADDR   0.01
-  header LT_SUBJ      Subject =~ /^Domače omrežje$/m
+  header LT_SUBJ      Subject =~ /^Domače omrežje$/m
   score  LT_SUBJ      0.01
   header LT_SUBJ_RAW  Subject:raw  =~ /^\s*=\?iso-8859-2\*sl\?Q\?Doma=e8e\?=\s+=\?utf-8\*sl\?Q\?_omre=C5\?=/m
   score  LT_SUBJ_RAW  0.01
-  header LT_SUBJ2     Subject =~ /^【重要訊息】台電105年3月電費,委託金融機構扣繳成功電子繳費憑證\(電號07487616730\)$/m
+  header LT_SUBJ2     Subject =~ /^【重要訊息】台電105年3月電費,委託金融機構扣繳成功電子繳費憑證\(電號07487616730\)$/m
   score  LT_SUBJ2     0.01
   header LT_SUBJ2_RAW Subject:raw  =~ /^\s*=\?UTF-8\?B\?44CQ6YeN6KaB6KiK5oGv44CR5Y\+w6Zu7MTA15bm0\?=\s*=\?UTF-8\?B\?M\+aciOmbu\+iyu\+\+8jOWnlOiol\+mHkeiejeapn\+ani\+aJow==\?=\s*=\?UTF-8\?B\?57mz5oiQ5Yqf6Zu75a2Q57mz6LK75oaR6K2JKOmbu\+iZnw==\?=\s*=\?UTF-8\?B\?MDc0ODc2MTY3MzAp\?=$/m
   score  LT_SUBJ2_RAW 0.01
-  header LT_MSGID     Message-ID =~ /^<b497e6c2\@example\.срб>$/m
+  header LT_MSGID     Message-ID =~ /^<b497e6c2\@example\.срб>$/m
   score  LT_MSGID     0.01
-  header LT_MESSAGEID MESSAGEID  =~ /^<b497e6c2\@example\.срб>$/m
+  header LT_MESSAGEID MESSAGEID  =~ /^<b497e6c2\@example\.срб>$/m
   score  LT_MESSAGEID 0.01
-  header LT_CT        Content-Type =~ /документы для отдела кадров\.pdf/
+  header LT_CT        Content-Type =~ /документы для отдела кадров\.pdf/
   score  LT_CT        0.01
   header LT_CT_RAW    Content-Type:raw =~ /=\?utf-8\?B\?tdC70LAg0LrQsNC00YDQvtCyLnBkZg==\?="/
   score  LT_CT_RAW    0.01
@@ -111,17 +120,32 @@ my $myrules = <<'END';
   header LT_ANY_CHARS From =~ /./
   score  LT_ANY_CHARS 0.001
   describe         LT_ANY_CHARS  Header contains characters
-  lang fr describe LT_ANY_CHARS  En-tête contient caractères
+  lang fr describe LT_ANY_CHARS  En-tête contient caractères
   # sorry, Google translate:
-  lang zh describe LT_ANY_CHARS  字符被包含在消息报头部分
+  lang zh describe LT_ANY_CHARS  字符被包含在消息报头部分
 END
 
 if (!$have_libidn) {
   # temporary fudge to prevent a test failing
   # until the Net::LibIDN becomes a mandatory module
   $myrules =~ s{^(\s*header LT_AUTH_DOM\s+X-AuthorDomain =~)\s*(/.*/)$}
-               {$1 /esempio-università\.it/}m
+               {$1 /esempio-università\.it/}m
+}
+
+
+
+###
+### Test internal and Email::Address::XS parsers
+###
+
+for (1 .. 2) { ## parser loop
+
+if ($_ == 2 && !HAS_EMAIL_ADDRESS_XS) {
+  warn "Not running Email::Address::XS tests, module missing\n";
+  next;
 }
+$ENV{'SA_HEADER_ADDRESS_PARSER'} = $_;
+
 
 $ENV{PERL_BADLANG} = 0;  # suppresses Perl warning about failed locale setting
 # see Mail::SpamAssassin::Conf::Parser::parse(), also Bug 6992
@@ -207,3 +231,6 @@ tstprefs ($myrules . '
 sarun ("-L < data/nice/unicode2", \&patterns_run_cb);
 ok_all_patterns();
 
+
+} ## end parser loop
+

Propchange: spamassassin/trunk/t/header_utf8.t
------------------------------------------------------------------------------
    svn:mime-type = text/plain; charset=UTF-8