You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pdfbox.apache.org by le...@apache.org on 2014/11/29 19:23:52 UTC

svn commit: r1642460 - in /pdfbox/branches/1.8/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font: PDFont.java PDType1CFont.java

Author: lehmi
Date: Sat Nov 29 18:23:52 2014
New Revision: 1642460

URL: http://svn.apache.org/r1642460
Log:
PDFBOX-2377: use the internal mapping of a type1C font only if a charset is present within the font descriptor

Modified:
    pdfbox/branches/1.8/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDFont.java
    pdfbox/branches/1.8/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDType1CFont.java

Modified: pdfbox/branches/1.8/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDFont.java
URL: http://svn.apache.org/viewvc/pdfbox/branches/1.8/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDFont.java?rev=1642460&r1=1642459&r2=1642460&view=diff
==============================================================================
--- pdfbox/branches/1.8/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDFont.java (original)
+++ pdfbox/branches/1.8/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDFont.java Sat Nov 29 18:23:52 2014
@@ -593,8 +593,15 @@ public abstract class PDFont implements 
             }
         }
     }
-
-    private static String getStringFromArray( byte[] c, int offset, int length ) throws IOException
+    /**
+     * Map the given byte(s) to a string.
+     *  
+     * @param c the byte array 
+     * @param offset the offset of the byte(s)
+     * @param length the number of bytes, usually 1 or 2
+     * @return the mapped string
+     */
+    protected static String getStringFromArray( byte[] c, int offset, int length )
     {
         String retval = null;
         if( length == 1 )
@@ -605,10 +612,6 @@ public abstract class PDFont implements 
         {
             retval = DOUBLE_CHAR_STRING[(c[offset]+256)%256][(c[offset+1]+256)%256];
         }
-        else
-        {
-            throw new IOException( "Error:Unknown character length:" + length );
-        }
         return retval;
     }
 

Modified: pdfbox/branches/1.8/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDType1CFont.java
URL: http://svn.apache.org/viewvc/pdfbox/branches/1.8/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDType1CFont.java?rev=1642460&r1=1642459&r2=1642460&view=diff
==============================================================================
--- pdfbox/branches/1.8/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDType1CFont.java (original)
+++ pdfbox/branches/1.8/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDType1CFont.java Sat Nov 29 18:23:52 2014
@@ -92,8 +92,6 @@ public class PDType1CFont extends PDSimp
 
     private static final byte[] SPACE_BYTES = {(byte)32};
 
-    private final int charOffset;
-    
     /**
      * Constructor.
      * @param fontDictionary the corresponding dictionary
@@ -101,7 +99,6 @@ public class PDType1CFont extends PDSimp
     public PDType1CFont( COSDictionary fontDictionary ) throws IOException
     {
         super( fontDictionary );
-        charOffset = getFirstChar() > -1 ? getFirstChar() - 1 : 0; 
         load();
     }
 
@@ -122,19 +119,26 @@ public class PDType1CFont extends PDSimp
 
     private String getCharacter(byte[] bytes, int offset, int length)
     {
-        int code = getCodeFromArray(bytes, offset, length);
         String character = null;
-        if (charOffset > 0)
-        {
-            code -= charOffset;
-        }
-        else if (codeToSID.containsKey(code))
+        // the pdf doesn't provide any encoding or toUnicode mapping
+        // we can use the font internal mapping, if a charset is defined
+        // otherwise the internal mapping may produce trash only 
+        if (getFontDescriptor().getCharSet() != null)
         {
-            code = codeToSID.get(code);
+            int code = getCodeFromArray(bytes, offset, length);
+            if (codeToSID.containsKey(code))
+            {
+                code = codeToSID.get(code);
+            }
+            if (sidToCharacter.containsKey(code))
+            {
+                character = sidToCharacter.get(code);
+            }
         }
-        if (sidToCharacter.containsKey(code))
+        else
         {
-            character = sidToCharacter.get(code);
+            // map the byte code to a character
+            character = getStringFromArray(bytes, offset, length);
         }
         return character;
     }