You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spamassassin.apache.org by he...@apache.org on 2018/11/17 14:40:10 UTC

svn commit: r1846805 - in /spamassassin: branches/3.4/lib/Mail/SpamAssassin/Message/Node.pm trunk/lib/Mail/SpamAssassin/Message/Node.pm

Author: hege
Date: Sat Nov 17 14:40:10 2018
New Revision: 1846805

URL: http://svn.apache.org/viewvc?rev=1846805&view=rev
Log:
Fix Windows-1252 autodetection with normalize_charset (Bug 7656)

Modified:
    spamassassin/branches/3.4/lib/Mail/SpamAssassin/Message/Node.pm
    spamassassin/trunk/lib/Mail/SpamAssassin/Message/Node.pm

Modified: spamassassin/branches/3.4/lib/Mail/SpamAssassin/Message/Node.pm
URL: http://svn.apache.org/viewvc/spamassassin/branches/3.4/lib/Mail/SpamAssassin/Message/Node.pm?rev=1846805&r1=1846804&r2=1846805&view=diff
==============================================================================
--- spamassassin/branches/3.4/lib/Mail/SpamAssassin/Message/Node.pm (original)
+++ spamassassin/branches/3.4/lib/Mail/SpamAssassin/Message/Node.pm Sat Nov 17 14:40:10 2018
@@ -579,8 +579,11 @@ sub _normalize {
     return $_[0];  # is all-ASCII, no need for decoding
 
   } elsif (!defined $rv && $enc_w1252 &&
-      #             ASCII  NBSP (c) SHY  '   "  ...   '".-   TM
-      $_[0] !~ tr/\x00-\x7F\xA0\xA9\xAD\x82\x84\x85\x91-\x97\x99//c)
+     #             ASCII  NBSP (c) SHY  '   "  ...   '".-   TM
+     #$_[0] !~ tr/\x00-\x7F\xA0\xA9\xAD\x82\x84\x85\x91-\x97\x99//c)
+     # Bug 7656: Include latin1 diacritic letters to Windows-1252 autodetection,
+     # Encode::Detect::Detector might identify them as Windows-1255 (Hebrew!)
+      $_[0] !~ tr/\x00-\x7f\xa0\xa9\xad\x82\x84\x85\x91-\x97\x99\xc0-\xd6\xd8-\xde\xe0-\xf6\xf8-\xfe//c)
   { # ASCII + NBSP + SHY + some punctuation characters
     # NBSP (A0) and SHY (AD) are at the same position in ISO-8859-* too
     # consider also: AE (r), 80 Euro

Modified: spamassassin/trunk/lib/Mail/SpamAssassin/Message/Node.pm
URL: http://svn.apache.org/viewvc/spamassassin/trunk/lib/Mail/SpamAssassin/Message/Node.pm?rev=1846805&r1=1846804&r2=1846805&view=diff
==============================================================================
--- spamassassin/trunk/lib/Mail/SpamAssassin/Message/Node.pm (original)
+++ spamassassin/trunk/lib/Mail/SpamAssassin/Message/Node.pm Sat Nov 17 14:40:10 2018
@@ -574,8 +574,11 @@ sub _normalize {
     return $_[0];  # is all-ASCII, no need for decoding
 
   } elsif (!defined $rv && $enc_w1252 &&
-      #             ASCII  NBSP (c) SHY  '   "  ...   '".-   TM
-      $_[0] !~ tr/\x00-\x7F\xA0\xA9\xAD\x82\x84\x85\x91-\x97\x99//c)
+     #             ASCII  NBSP (c) SHY  '   "  ...   '".-   TM
+     #$_[0] !~ tr/\x00-\x7F\xA0\xA9\xAD\x82\x84\x85\x91-\x97\x99//c)
+     # Bug 7656: Include latin1 diacritic letters to Windows-1252 autodetection,
+     # Encode::Detect::Detector might identify them as Windows-1255 (Hebrew!)
+      $_[0] !~ tr/\x00-\x7f\xa0\xa9\xad\x82\x84\x85\x91-\x97\x99\xc0-\xd6\xd8-\xde\xe0-\xf6\xf8-\xfe//c)
   { # ASCII + NBSP + SHY + some punctuation characters
     # NBSP (A0) and SHY (AD) are at the same position in ISO-8859-* too
     # consider also: AE (r), 80 Euro