You are viewing a plain text version of this content. The canonical link for it is here.

Posted to commits@pdfbox.apache.org by le...@apache.org on 2020/04/26 12:50:25 UTC

svn commit: r1877021 - /pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDFont.java

Author: lehmi
Date: Sun Apr 26 12:50:25 2020
New Revision: 1877021

URL: http://svn.apache.org/viewvc?rev=1877021&view=rev
Log:
PDFBOX-4749: pass the origin byte length to get a proper to unicode mapping

Modified:
    pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDFont.java

Modified: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDFont.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDFont.java?rev=1877021&r1=1877020&r2=1877021&view=diff
==============================================================================
--- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDFont.java (original)
+++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDFont.java Sun Apr 26 12:50:25 2020
@@ -20,6 +20,7 @@ import java.io.ByteArrayInputStream;
 import java.io.ByteArrayOutputStream;
 import java.io.IOException;
 import java.io.InputStream;
+import java.math.BigInteger;
 import java.util.Collections;
 import java.util.HashMap;
 import java.util.List;
@@ -465,7 +466,21 @@ public abstract class PDFont implements
             }
             else
             {
-                // proceed as normal
+                if (code < 256 && !(this instanceof PDType0Font))
+                {
+                    COSBase encoding = dict.getDictionaryObject(COSName.ENCODING);
+                    boolean isIdentity = encoding instanceof COSName
+                            && ((COSName) encoding).getName().startsWith("Identity");
+                    if (encoding != null && !isIdentity)
+                    {
+                        // due to the conversion to an int it is no longer possible to determine
+                        // if the code is based on a one or two byte value. We should consider to
+                        // refactor that part of the code.
+                        // However simple fonts with an encoding are using one byte codes so that
+                        // we can limit the CMap mappings to one byte codes by passing the origin length
+                        return toUnicodeCMap.toUnicode(code, 1);
+                    }
+                }
                 return toUnicodeCMap.toUnicode(code);
             }
         }