You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spamassassin.apache.org by jh...@apache.org on 2021/01/19 19:01:27 UTC

svn commit: r1885696 - in /spamassassin/trunk: lib/Mail/SpamAssassin/Message/Node.pm t/data/nice/unicode2

Author: jhardin
Date: Tue Jan 19 19:01:27 2021
New Revision: 1885696

URL: http://svn.apache.org/viewvc?rev=1885696&view=rev
Log:
Bug 7880 - fix nullref weakness in utf-16 _normalize() debug message for some UTF-16 data; modify test to cover that condition; modify detect_utf16() to skip the data scan if a BOM is present (for efficiency, as Perl's UTF-16 decoder will figure out the endianness from the BOM)

Modified:
    spamassassin/trunk/lib/Mail/SpamAssassin/Message/Node.pm
    spamassassin/trunk/t/data/nice/unicode2

Modified: spamassassin/trunk/lib/Mail/SpamAssassin/Message/Node.pm
URL: http://svn.apache.org/viewvc/spamassassin/trunk/lib/Mail/SpamAssassin/Message/Node.pm?rev=1885696&r1=1885695&r2=1885696&view=diff
==============================================================================
--- spamassassin/trunk/lib/Mail/SpamAssassin/Message/Node.pm (original)
+++ spamassassin/trunk/lib/Mail/SpamAssassin/Message/Node.pm Tue Jan 19 19:01:27 2021
@@ -388,6 +388,12 @@ sub detect_utf16 {
 	my $sum_l_o = 0;
 	my $decoder = undef;
 
+	# avoid scan if BOM present
+	if( $data =~ /^(?:\xff\xfe|\xfe\xff)/ ) {
+		dbg( "message: detect_utf16: found BOM" );
+		return undef;	# let perl figure it out from the BOM
+	}
+	
 	my @msg_h = unpack 'H' x length( $data ), $data;
 	my @msg_l = unpack 'h' x length( $data ), $data;
 
@@ -518,20 +524,22 @@ sub _normalize {
     # https://bz.apache.org/SpamAssassin/show_bug.cgi?id=7252
 
     my $decoder = detect_utf16( $_[0] );
-    if (eval { $rv = $decoder->decode($_[0], 1|8); defined $rv }) {
-      dbg("message: decoded as charset %s, declared %s",
-        $decoder->name, $charset_declared);
-      return $_[0]  if !$return_decoded;
-      $rv .= $data_taint;  # carry taintedness over, avoid Encode bug
-      return $rv;  # decoded
-    } else {
-      my $err = '';
-      if ($@) {
-        $err = $@; $err =~ s/\s+/ /gs; $err =~ s/(.*) at .*/$1/;
-        $err = " ($err)";
+    if (defined $decoder) {
+      if (eval { $rv = $decoder->decode($_[0], 1|8); defined $rv }) {
+        dbg("message: decoded as charset %s, declared %s",
+          $decoder->name, $charset_declared);
+        return $_[0]  if !$return_decoded;
+        $rv .= $data_taint;  # carry taintedness over, avoid Encode bug
+        return $rv;  # decoded
+      } else {
+        my $err = '';
+        if ($@) {
+          $err = $@; $err =~ s/\s+/ /gs; $err =~ s/(.*) at .*/$1/;
+          $err = " ($err)";
+        }
+        dbg("message: failed decoding as charset %s, declared %s%s",
+          $decoder->name, $charset_declared, $err);
       }
-      dbg("message: failed decoding as charset %s, declared %s%s",
-        $decoder->name, $charset_declared, $err);
     };
   } else {
     # try decoding as a declared character set

Modified: spamassassin/trunk/t/data/nice/unicode2
URL: http://svn.apache.org/viewvc/spamassassin/trunk/t/data/nice/unicode2?rev=1885696&r1=1885695&r2=1885696&view=diff
==============================================================================
--- spamassassin/trunk/t/data/nice/unicode2 (original)
+++ spamassassin/trunk/t/data/nice/unicode2 Tue Jan 19 19:01:27 2021
@@ -1,4 +1,4 @@
-From: test
+From: =?UTF-16?B?//492Enc?= test
 To: test
 Message-ID: <12...@test.example.com>
 Date: Thu, 16 Jun 2016 00:41:19 (UTC)