You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spamassassin.apache.org by he...@apache.org on 2022/03/09 14:15:21 UTC

svn commit: r1898789 - in /spamassassin/trunk/t: data/spam/unicode1 utf8.t

Author: hege
Date: Wed Mar  9 14:15:20 2022
New Revision: 1898789

URL: http://svn.apache.org/viewvc?rev=1898789&view=rev
Log:
Add some utf8 body tests

Added:
    spamassassin/trunk/t/data/spam/unicode1
Modified:
    spamassassin/trunk/t/utf8.t

Added: spamassassin/trunk/t/data/spam/unicode1
URL: http://svn.apache.org/viewvc/spamassassin/trunk/t/data/spam/unicode1?rev=1898789&view=auto
==============================================================================
--- spamassassin/trunk/t/data/spam/unicode1 (added)
+++ spamassassin/trunk/t/data/spam/unicode1 Wed Mar  9 14:15:20 2022
@@ -0,0 +1,11 @@
+From pertand@email.mondolink.com  Fri Aug 31 13:39:16 2001
+To: jenny33436@netscape.net
+Subject: foo
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+Message-Id: <78...@yahoo.com>
+From: renterr989@hotmail.com
+Date: Wed, 29 Aug 2001 04:20:43 -0800
+Sender: pertand@email.mondolink.com
+
+【重要訊息】台電105年3月電費,委託金融機構扣繳成功電子繳費憑證

Modified: spamassassin/trunk/t/utf8.t
URL: http://svn.apache.org/viewvc/spamassassin/trunk/t/utf8.t?rev=1898789&r1=1898788&r2=1898789&view=diff
==============================================================================
--- spamassassin/trunk/t/utf8.t (original)
+++ spamassassin/trunk/t/utf8.t Wed Mar  9 14:15:20 2022
@@ -1,8 +1,12 @@
 #!/usr/bin/perl -T
 
+###
+### UTF-8 CONTENT, edit with UTF-8 locale/editor
+###
+
 use lib '.'; use lib 't';
 use SATest; sa_t_init("utf8");
-use Test::More tests => 4;
+use Test::More tests => 14;
 
 # ---------------------------------------------------------------------------
 
@@ -11,7 +15,41 @@ use Test::More tests => 4;
   q{ X-Spam-Flag: YES}, 'flag',
   q{ X-Spam-Level: ****}, 'stars',
 );
+%anti_patterns = ();
 
 ok (sarun ("-L -t < data/spam/009", \&patterns_run_cb));
 ok_all_patterns();
 
+# ---------------------------------------------------------------------------
+
+my $rules = '
+  body FOO1 /金融機/
+  body FOO2 /金融(?:xyz|機)/
+  body FOO3 /\xe9\x87\x91\xe8\x9e\x8d\xe6\xa9\x9f/
+  body FOO4 /.\x87(?:\x91|\x00)[\xe8\x00]\x9e\x8d\xe6\xa9\x9f/
+';
+
+%patterns = (
+  q{ 1.0 FOO1 }, '',
+  q{ 1.0 FOO2 }, '',
+  q{ 1.0 FOO3 }, '',
+  q{ 1.0 FOO4 }, '',
+);
+%anti_patterns = ();
+
+# normalize_charset 1
+tstprefs("
+  $rules
+  normalize_charset 1
+");
+ok (sarun ("-L -t < data/spam/unicode1", \&patterns_run_cb));
+ok_all_patterns();
+
+# normalize_charset 0
+tstprefs("
+  $rules
+  normalize_charset 0
+");
+ok (sarun ("-L -t < data/spam/unicode1", \&patterns_run_cb));
+ok_all_patterns();
+