You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pdfbox.apache.org by le...@apache.org on 2014/09/26 17:34:41 UTC

svn commit: r1627810 - in /pdfbox/branches/1.8/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font: PDFont.java PDType1CFont.java PDType1Font.java

Author: lehmi
Date: Fri Sep 26 15:34:41 2014
New Revision: 1627810

URL: http://svn.apache.org/r1627810
Log:
PDFBOX-2377: overhaul the text extraction for Type1C fonts

Modified:
    pdfbox/branches/1.8/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDFont.java
    pdfbox/branches/1.8/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDType1CFont.java
    pdfbox/branches/1.8/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDType1Font.java

Modified: pdfbox/branches/1.8/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDFont.java
URL: http://svn.apache.org/viewvc/pdfbox/branches/1.8/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDFont.java?rev=1627810&r1=1627809&r2=1627810&view=diff
==============================================================================
--- pdfbox/branches/1.8/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDFont.java (original)
+++ pdfbox/branches/1.8/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDFont.java Fri Sep 26 15:34:41 2014
@@ -570,14 +570,18 @@ public abstract class PDFont implements 
     {
         for( int i=0; i<256; i++ )
         {
-            try
-            {
-                SINGLE_CHAR_STRING[i] = new String( new byte[] {(byte)i}, "ISO-8859-1" );
-            }
-            catch (UnsupportedEncodingException e)
+            // ISO-8859-1 doesn't support the whole range
+            if (i >= 32 && (i < 127 || i > 159))
             {
-                // Nothing should happen here
-                LOG.error(e,e);
+                try
+                {
+                    SINGLE_CHAR_STRING[i] = new String( new byte[] {(byte)i}, "ISO-8859-1" );
+                }
+                catch (UnsupportedEncodingException e)
+                {
+                    // Nothing should happen here
+                    LOG.error(e,e);
+                }
             }
             for( int j=0; j<256; j++ )
             {
@@ -594,7 +598,7 @@ public abstract class PDFont implements 
         }
     }
 
-    private static String getStringFromArray( byte[] c, int offset, int length ) throws IOException
+    protected String getStringFromArray( byte[] c, int offset, int length ) throws IOException
     {
         String retval = null;
         if( length == 1 )

Modified: pdfbox/branches/1.8/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDType1CFont.java
URL: http://svn.apache.org/viewvc/pdfbox/branches/1.8/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDType1CFont.java?rev=1627810&r1=1627809&r2=1627810&view=diff
==============================================================================
--- pdfbox/branches/1.8/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDType1CFont.java (original)
+++ pdfbox/branches/1.8/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDType1CFont.java Fri Sep 26 15:34:41 2014
@@ -92,8 +92,6 @@ public class PDType1CFont extends PDSimp
 
     private static final byte[] SPACE_BYTES = {(byte)32};
 
-    private final int charOffset;
-    
     /**
      * Constructor.
      * @param fontDictionary the corresponding dictionary
@@ -101,7 +99,6 @@ public class PDType1CFont extends PDSimp
     public PDType1CFont( COSDictionary fontDictionary ) throws IOException
     {
         super( fontDictionary );
-        charOffset = getFirstChar() > -1 ? getFirstChar() - 1 : 0; 
         load();
     }
 
@@ -124,11 +121,7 @@ public class PDType1CFont extends PDSimp
     {
         int code = getCodeFromArray(bytes, offset, length);
         String character = null;
-        if (charOffset > 0)
-        {
-            code -= charOffset;
-        }
-        else if (codeToSID.containsKey(code))
+        if (codeToSID.containsKey(code))
         {
             code = codeToSID.get(code);
         }

Modified: pdfbox/branches/1.8/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDType1Font.java
URL: http://svn.apache.org/viewvc/pdfbox/branches/1.8/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDType1Font.java?rev=1627810&r1=1627809&r2=1627810&view=diff
==============================================================================
--- pdfbox/branches/1.8/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDType1Font.java (original)
+++ pdfbox/branches/1.8/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDType1Font.java Fri Sep 26 15:34:41 2014
@@ -463,11 +463,13 @@ public class PDType1Font extends PDSimpl
     {
         if (type1CFont != null && getFontEncoding() == null)
         {
-            String character = type1CFont.encode(c, offset, length);
-            if (character != null)
+            // check for ASCII values >= 32
+            if (length == 1 && c[offset] >= 32)
             {
-                return character;
+                return getStringFromArray( c, offset, length );
             }
+            // handle values < 32 and negative byte values (int > 127)
+            return type1CFont.encode(c, offset, length);
         }
         return super.encode(c, offset, length);
     }