You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pdfbox.apache.org by le...@apache.org on 2010/09/05 13:48:42 UTC

svn commit: r992763 - in /pdfbox/trunk/pdfbox/src: main/java/org/apache/pdfbox/pdmodel/font/PDFont.java test/resources/input/allah2.pdf-sorted.txt test/resources/input/allah2.pdf.txt

Author: lehmi
Date: Sun Sep  5 11:48:41 2010
New Revision: 992763

URL: http://svn.apache.org/viewvc?rev=992763&view=rev
Log:
PDFBOX-805: fixed the character mapping

Modified:
    pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDFont.java
    pdfbox/trunk/pdfbox/src/test/resources/input/allah2.pdf-sorted.txt
    pdfbox/trunk/pdfbox/src/test/resources/input/allah2.pdf.txt

Modified: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDFont.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDFont.java?rev=992763&r1=992762&r2=992763&view=diff
==============================================================================
--- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDFont.java (original)
+++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDFont.java Sun Sep  5 11:48:41 2010
@@ -53,6 +53,7 @@ import java.io.BufferedReader;
 import java.io.InputStream;
 import java.io.InputStreamReader;
 import java.io.IOException;
+import java.io.UnsupportedEncodingException;
 
 import java.util.Collections;
 import java.util.HashMap;
@@ -177,7 +178,7 @@ public abstract class PDFont implements 
         determineEncoding();
     }
 
-    private void determineEncoding()
+   	private void determineEncoding()
     {
         String cmapName = null;
         COSName encodingName = null;
@@ -289,10 +290,7 @@ public abstract class PDFont implements 
                 log.error("Error: Could not find predefined CMAP file for '" + cmapName + "'" );
             }
         }
-//        if (fontEncoding == null)
-//        {
-            getEncodingFromFont();
-//        }
+        getEncodingFromFont();
     }
 
     /**
@@ -513,14 +511,14 @@ public abstract class PDFont implements 
         }
         
         // there is no cmap but probably an encoding with a suitable mapping
-        if( retval == null && length == 1)
+        if( retval == null )
         {
             Encoding encoding = getEncoding();
             if( encoding != null )
             {
                 retval = encoding.getCharacter( getCodeFromArray( c, offset, length ) );
             }
-            if( retval == null && cmap == null)
+            if( retval == null && (cmap == null || length == 2))
             {
                 retval = getStringFromArray( c, offset, length );
             }
@@ -537,7 +535,15 @@ public abstract class PDFont implements 
             SINGLE_CHAR_STRING[i] = new String( new byte[] {(byte)i} );
             for( int j=0; j<256; j++ )
             {
-                DOUBLE_CHAR_STRING[i][j] = new String( new byte[] {(byte)i, (byte)j} );
+                try
+                {
+                    DOUBLE_CHAR_STRING[i][j] = new String( new byte[] {(byte)i, (byte)j}, "UTF-16BE" );
+                }
+                catch (UnsupportedEncodingException e)
+                {
+                    // Nothing should happen here
+                    e.printStackTrace();
+                }
             }
         }
     }

Modified: pdfbox/trunk/pdfbox/src/test/resources/input/allah2.pdf-sorted.txt
URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/test/resources/input/allah2.pdf-sorted.txt?rev=992763&r1=992762&r2=992763&view=diff
==============================================================================
Binary files - no diff available.

Modified: pdfbox/trunk/pdfbox/src/test/resources/input/allah2.pdf.txt
URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/test/resources/input/allah2.pdf.txt?rev=992763&r1=992762&r2=992763&view=diff
==============================================================================
Binary files - no diff available.