You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spamassassin.apache.org by he...@apache.org on 2023/05/12 04:47:57 UTC

svn commit: r1909764 - in /spamassassin/trunk: MANIFEST lib/Mail/SpamAssassin/Message.pm t/data/spam/unicode2 t/utf8.t

Author: hege
Date: Fri May 12 04:47:57 2023
New Revision: 1909764

URL: http://svn.apache.org/viewvc?rev=1909764&view=rev
Log:
Bug 8129 - Subject gets UTF-8 encoded twice in some circumstances

Added:
    spamassassin/trunk/t/data/spam/unicode2
Modified:
    spamassassin/trunk/MANIFEST
    spamassassin/trunk/lib/Mail/SpamAssassin/Message.pm
    spamassassin/trunk/t/utf8.t

Modified: spamassassin/trunk/MANIFEST
URL: http://svn.apache.org/viewvc/spamassassin/trunk/MANIFEST?rev=1909764&r1=1909763&r2=1909764&view=diff
==============================================================================
--- spamassassin/trunk/MANIFEST (original)
+++ spamassassin/trunk/MANIFEST Fri May 12 04:47:57 2023
@@ -460,6 +460,7 @@ t/data/spam/spf1
 t/data/spam/spf2
 t/data/spam/spf3
 t/data/spam/unicode1
+t/data/spam/unicode2
 t/data/spam/urilocalbl_net.eml
 t/data/spamc_blank.cf
 t/data/taintcheckplugin.pm

Modified: spamassassin/trunk/lib/Mail/SpamAssassin/Message.pm
URL: http://svn.apache.org/viewvc/spamassassin/trunk/lib/Mail/SpamAssassin/Message.pm?rev=1909764&r1=1909763&r2=1909764&view=diff
==============================================================================
--- spamassassin/trunk/lib/Mail/SpamAssassin/Message.pm (original)
+++ spamassassin/trunk/lib/Mail/SpamAssassin/Message.pm Fri May 12 04:47:57 2023
@@ -1262,10 +1262,11 @@ sub get_body_text_array_common {
   # already been done.
   my $html_needs_setting = !exists $self->{metadata}->{html};
 
-  my $text = $method_name eq 'invisible_rendered' ? ''
+  my $subject = $method_name eq 'invisible_rendered' ? ''
                : ($self->get_header('subject') || "\n");
 
   # Go through each part
+  my $text = '';
   for (my $pt = 0 ; $pt <= $#parts ; $pt++ ) {
     my $p = $parts[$pt];
 
@@ -1317,7 +1318,8 @@ sub get_body_text_array_common {
   $text =~ tr/\x00/\n/;			# null => newline
 
   utf8::encode($text) if utf8::is_utf8($text);
-  my @textary = split_into_array_of_short_lines($text);
+  utf8::encode($subject) if utf8::is_utf8($subject);
+  my @textary = split_into_array_of_short_lines($subject.$text);
   $self->{$key} = \@textary;
 
   return $self->{$key};

Added: spamassassin/trunk/t/data/spam/unicode2
URL: http://svn.apache.org/viewvc/spamassassin/trunk/t/data/spam/unicode2?rev=1909764&view=auto
==============================================================================
--- spamassassin/trunk/t/data/spam/unicode2 (added)
+++ spamassassin/trunk/t/data/spam/unicode2 Fri May 12 04:47:57 2023
@@ -0,0 +1,79 @@
+Received: from in02.mxguardian.net ([172.16.2.249])
+	by localhost (cf03.mxguardian.net [172.16.3.11]) (MXG, port 10024)
+	with ESMTP id ckhtgSd84tD6 for <re...@example.com>;
+	Tue, 25 Apr 2023 02:17:54 +0000 (UTC)
+Received: from nj080.top (unknown [103.158.37.49])
+	by in02.mxguardian.net (Postfix) with ESMTP id 7C102886E02
+	for <re...@example.com>; Tue, 25 Apr 2023 02:17:50 +0000 (UTC)
+Received: from MZZTQKTVAKJHJCZ (unknown [163.125.18.151])
+	by nj080.top (Postfix) with ESMTPA id 253435234A8
+	for <re...@example.com>; Tue, 25 Apr 2023 02:15:34 +0000 (UTC)
+From: ts4@nj080.top
+Reply-To: 3163745494@qq.com
+To: "REDACTED" <re...@example.com>
+Subject: =?UTF-8?Q?=E5=A4=96=E8=B4=B8=E5=AE=A2=E6=88=B7=E5=BC=80?=
+	=?UTF-8?Q?=E5=8F=91=EF=BC=8C=E5=8A=BF=E5=9C=A8=E5=BF=85?=
+	=?UTF-8?Q?=E8=A1=8C?=
+Date: Tue, 25 Apr 2023 10:15:31 +0800
+MIME-Version: 1.0
+Content-Type: multipart/mixed; boundary="Mark=_746317328373158664230"
+X-Mailer: Foxmail 7, 2,53,  24[cn]
+Message-ID: <2C...@MZZTQKTVAKJHJCZ>
+
+This is a multi-part message in MIME format.
+
+--Mark=_746317328373158664230
+Content-Type: multipart/related; type="multipart/alternative";
+	boundary="Mark=_746317328373158664864"
+
+
+--Mark=_746317328373158664864
+Content-Type: multipart/alternative; boundary="Mark=_746317328373158664391"
+
+
+--Mark=_746317328373158664391
+Content-Type: text/html; charset="UTF-8"
+Content-Transfer-Encoding: quoted-printable
+
+<table style=3D"width: 99.8%; "><tbody><tr><td style=3D"; height:650px; ver=
+tical-align:top; padding: 20px;"><span style=3D"font-size: 18px;"><span sty=
+le=3D"font-family: =E5=B9=BC=E5=9C=86;">WhatsApp=E7=9B=AE=E5=89=8D=E5=B7=B2=
+=E7=BB=8F=E6=98=AF=E5=9B=BD=E5=A4=96=E6=9C=80=E7=81=AB=E7=9A=84=E5=8D=B3=E6=
+=97=B6=E8=81=8A=E5=A4=A9=E9=80=9A=E8=AE=AF=E8=BD=AF=E4=BB=B6=EF=BC=8C=E9=82=
+=A3=E4=B9=88=E5=A4=96=E8=B4=B8=E4=BA=BA=E5=A6=82=E4=BD=95=E9=80=9A=E8=BF=87=
+WhatsApp=E6=9D=A5=E5=BC=80=E5=8F=91=E5=88=B0=E5=AE=A2=E6=88=B7=E5=91=A2=E3=
+=80=82<br />
+<br />
+1=EF=BC=8C=E6=94=B6=E9=9B=86=E7=9B=AE=E6=A0=87=E5=AE=A2=E6=88=B7=E6=89=8B=
+=E6=9C=BA=E5=8F=B7=EF=BC=8C=E5=AE=9A=E4=BD=8D=E6=82=A8=E6=83=B3=E8=A6=81=E6=
+=8C=96=E6=8E=98=E7=9A=84=E5=9B=BD=E5=AE=B6=E5=8C=BA=E5=9F=9F=EF=BC=8C=E6=89=
+=B9=E9=87=8F=E8=8E=B7=E5=8F=96=E7=9B=AE=E6=A0=87=E5=AE=A2=E6=88=B7=E7=9A=84=
+=E6=89=8B=E6=9C=BA=E5=8F=B7=EF=BC=8C=E9=82=AE=E7=AE=B1=E7=AD=89=E3=80=82<br=
+ />
+2=EF=BC=8C=E8=87=AA=E5=8A=A8=E6=8F=90=E5=8F=96=E6=89=80=E5=B1=9E=E8=A1=8C=
+=E4=B8=9A=E7=BE=A4=E7=BB=84=E6=88=90=E5=91=98=E6=89=8B=E6=9C=BA=E5=8F=B7=EF=
+=BC=8C=E4=B8=80=E9=94=AE=E6=93=8D=E4=BD=9C=E3=80=82<br />
+3=EF=BC=8C=E6=89=B9=E9=87=8F=E9=AA=8C=E8=AF=81=E8=B4=A6=E6=88=B7=EF=BC=8C=
+=E6=8F=90=E9=AB=98=E5=BC=80=E5=8F=91=E6=9C=89=E6=95=88=E7=8E=87=E3=80=82<br=
+ />
+4=EF=BC=8C=E8=87=AA=E5=8A=A8=E7=BE=A4=E5=8F=91=EF=BC=8C=E5=BF=AB=E9=80=9F=
+=E7=A7=AF=E7=B4=AF=E5=AE=A2=E6=88=B7=E7=BE=A4=E4=BD=93=EF=BC=8C=E5=AE=A2=E6=
+=88=B7=E4=B8=80=E7=BD=91=E6=89=93=E5=B0=BD=E3=80=82<br />
+5=EF=BC=8C=E8=B4=A6=E6=88=B7=E5=A4=9A=E5=BC=80=EF=BC=8C=E5=AE=9E=E7=8E=B0=
+=E5=A4=9A=E8=B4=A6=E6=88=B7=E9=AB=98=E6=95=88=E7=AE=A1=E7=90=86=E3=80=82<br=
+ />
+<br />
+=E6=93=8D=E4=BD=9C=E4=B8=80=E4=BD=93=E5=8C=96=EF=BC=8C=E7=AE=80=E5=8D=95=E6=
+=98=93=E4=B8=8A=E6=89=8B=EF=BC=8C=E6=9B=B4=E6=9C=89=E5=A4=9A=E7=A7=8D=E6=96=
+=B0=E5=8A=9F=E8=83=BD=E5=B8=AE=E5=8A=A9=E6=82=A8=E5=A4=9A=E6=A8=A1=E5=BC=8F=
+=E5=BC=80=E5=8F=91=E5=AE=A2=E6=88=B7=EF=BC=8C=E6=9C=89=E5=85=B4=E8=B6=A3=E7=
+=9A=84=E6=AC=A2=E8=BF=8E=E5=89=8D=E6=9D=A5=E5=92=A8=E8=AF=A2=E4=BD=93=E9=AA=
+=8C=E3=80=82<br />
+<span style=3D"color: rgb(231, 76, 60);">QQ=EF=BC=9A3163745494<br />
+wehat=EF=BC=9A19924468330</span></span></span></td></tr></tbody></table>
+
+--Mark=_746317328373158664391--
+
+--Mark=_746317328373158664864--
+
+--Mark=_746317328373158664230--

Modified: spamassassin/trunk/t/utf8.t
URL: http://svn.apache.org/viewvc/spamassassin/trunk/t/utf8.t?rev=1909764&r1=1909763&r2=1909764&view=diff
==============================================================================
--- spamassassin/trunk/t/utf8.t (original)
+++ spamassassin/trunk/t/utf8.t Fri May 12 04:47:57 2023
@@ -6,7 +6,7 @@
 
 use lib '.'; use lib 't';
 use SATest; sa_t_init("utf8");
-use Test::More tests => 14;
+use Test::More tests => 20;
 
 # ---------------------------------------------------------------------------
 
@@ -53,3 +53,31 @@ tstprefs("
 ok (sarun ("-L -t < data/spam/unicode1", \&patterns_run_cb));
 ok_all_patterns();
 
+### Bug 8129
+
+$rules = '
+  header SUBJ_TEST Subject =~ /外贸客户开发/
+  body   BODY_TEST /外贸客户开发/
+';
+
+%patterns = (
+    q{ 1.0 SUBJ_TEST }, '',
+    q{ 1.0 BODY_TEST }, '',
+);
+
+# normalize_charset 1
+tstprefs("
+  $rules
+  normalize_charset 1
+");
+ok (sarun ("-L -t < data/spam/unicode2", \&patterns_run_cb));
+ok_all_patterns();
+
+# normalize_charset 0
+tstprefs("
+  $rules
+  normalize_charset 0
+");
+ok (sarun ("-L -t < data/spam/unicode2", \&patterns_run_cb));
+ok_all_patterns();
+