You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spamassassin.apache.org by mm...@apache.org on 2015/04/14 16:24:02 UTC
svn commit: r1673434 -
/spamassassin/trunk/lib/Mail/SpamAssassin/Message/Node.pm
Author: mmartinec
Date: Tue Apr 14 14:24:02 2015
New Revision: 1673434
URL: http://svn.apache.org/r1673434
Log:
normalize_charset: decode as GBK if decoder for GB18030 is unavailable
Modified:
spamassassin/trunk/lib/Mail/SpamAssassin/Message/Node.pm
Modified: spamassassin/trunk/lib/Mail/SpamAssassin/Message/Node.pm
URL: http://svn.apache.org/viewvc/spamassassin/trunk/lib/Mail/SpamAssassin/Message/Node.pm?rev=1673434&r1=1673433&r2=1673434&view=diff
==============================================================================
--- spamassassin/trunk/lib/Mail/SpamAssassin/Message/Node.pm (original)
+++ spamassassin/trunk/lib/Mail/SpamAssassin/Message/Node.pm Tue Apr 14 14:24:02 2015
@@ -486,6 +486,11 @@ sub _normalize {
$chset = 'Windows-1252'; $decoder = $enc_w1252;
} else {
$chset = $charset_declared; $decoder = Encode::find_encoding($chset);
+ if (!$decoder && $chset =~ /^GB[ -]?18030(?:-20\d\d)?\z/i) {
+ $decoder = Encode::find_encoding('GBK'); # a subset of GB18030
+ dbg("message: no decoder for a declared charset %s, using GBK",
+ $chset) if $decoder;
+ }
}
if (!$decoder) {
dbg("message: failed decoding, no decoder for a declared charset %s",
@@ -539,6 +544,11 @@ sub _normalize {
my $charset_detected = Encode::Detect::Detector::detect($_[1]);
if ($charset_detected && lc $charset_detected ne lc $charset_declared) {
my $decoder = Encode::find_encoding($charset_detected);
+ if (!$decoder && $charset_detected =~ /^GB[ -]?18030(?:-20\d\d)?\z/i) {
+ $decoder = Encode::find_encoding('GBK'); # a subset of GB18030
+ dbg("message: no decoder for a detected charset %s, using GBK",
+ $charset_detected) if $decoder;
+ }
if (!$decoder) {
dbg("message: failed decoding, no decoder for a detected charset %s",
$charset_detected);