You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pdfbox.apache.org by ms...@apache.org on 2016/11/01 21:45:38 UTC

svn commit: r1767585 - in /pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox: pdfparser/BaseParser.java util/Charsets.java

Author: msahyoun
Date: Tue Nov  1 21:45:38 2016
New Revision: 1767585

URL: http://svn.apache.org/viewvc?rev=1767585&view=rev
Log:
PDFBOX-3519: use Windows-1252 encoding when parsing COSName values if not UTF-8

Modified:
    pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/BaseParser.java
    pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/util/Charsets.java

Modified: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/BaseParser.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/BaseParser.java?rev=1767585&r1=1767584&r2=1767585&view=diff
==============================================================================
--- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/BaseParser.java (original)
+++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/BaseParser.java Tue Nov  1 21:45:38 2016
@@ -789,7 +789,7 @@ public abstract class BaseParser
         else
         {
             // some malformed PDFs don't use UTF-8 see PDFBOX-3347
-            string = new String(buffer.toByteArray(), Charsets.ISO_8859_1);
+            string = new String(buffer.toByteArray(), Charsets.WINDOWS_1252);
         }
         return COSName.getPDFName(string);
     }

Modified: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/util/Charsets.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/util/Charsets.java?rev=1767585&r1=1767584&r2=1767585&view=diff
==============================================================================
--- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/util/Charsets.java (original)
+++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/util/Charsets.java Tue Nov  1 21:45:38 2016
@@ -39,7 +39,10 @@ public final class Charsets
     
     /*** ISO-8859-1 charset */
     public static final Charset ISO_8859_1 = Charset.forName("ISO-8859-1");
-    
+
+    /*** Windows-1252 charset */
+    public static final Charset WINDOWS_1252 = Charset.forName("Windows-1252");
+
     /*** UTF-8 charset */
     public static final Charset UTF_8 = Charset.forName("UTF-8");
 }