You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spamassassin.apache.org by mm...@apache.org on 2015/04/14 16:24:02 UTC

svn commit: r1673434 - /spamassassin/trunk/lib/Mail/SpamAssassin/Message/Node.pm

Author: mmartinec
Date: Tue Apr 14 14:24:02 2015
New Revision: 1673434

URL: http://svn.apache.org/r1673434
Log:
normalize_charset: decode as GBK if decoder for GB18030 is unavailable

Modified:
    spamassassin/trunk/lib/Mail/SpamAssassin/Message/Node.pm

Modified: spamassassin/trunk/lib/Mail/SpamAssassin/Message/Node.pm
URL: http://svn.apache.org/viewvc/spamassassin/trunk/lib/Mail/SpamAssassin/Message/Node.pm?rev=1673434&r1=1673433&r2=1673434&view=diff
==============================================================================
--- spamassassin/trunk/lib/Mail/SpamAssassin/Message/Node.pm (original)
+++ spamassassin/trunk/lib/Mail/SpamAssassin/Message/Node.pm Tue Apr 14 14:24:02 2015
@@ -486,6 +486,11 @@ sub _normalize {
       $chset = 'Windows-1252'; $decoder = $enc_w1252;
     } else {
       $chset = $charset_declared; $decoder = Encode::find_encoding($chset);
+      if (!$decoder && $chset =~ /^GB[ -]?18030(?:-20\d\d)?\z/i) {
+        $decoder = Encode::find_encoding('GBK');  # a subset of GB18030
+        dbg("message: no decoder for a declared charset %s, using GBK",
+            $chset)  if $decoder;
+      }
     }
     if (!$decoder) {
       dbg("message: failed decoding, no decoder for a declared charset %s",
@@ -539,6 +544,11 @@ sub _normalize {
     my $charset_detected = Encode::Detect::Detector::detect($_[1]);
     if ($charset_detected && lc $charset_detected ne lc $charset_declared) {
       my $decoder = Encode::find_encoding($charset_detected);
+      if (!$decoder && $charset_detected =~ /^GB[ -]?18030(?:-20\d\d)?\z/i) {
+        $decoder = Encode::find_encoding('GBK');  # a subset of GB18030
+        dbg("message: no decoder for a detected charset %s, using GBK",
+            $charset_detected)  if $decoder;
+      }
       if (!$decoder) {
         dbg("message: failed decoding, no decoder for a detected charset %s",
             $charset_detected);