You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pdfbox.apache.org by ti...@apache.org on 2021/11/18 19:32:32 UTC

svn commit: r1895159 - /pdfbox/branches/2.0/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDType0Font.java

Author: tilman
Date: Thu Nov 18 19:32:32 2021
New Revision: 1895159

URL: http://svn.apache.org/viewvc?rev=1895159&view=rev
Log:
PDFBOX-5324: Get unicode from TrueTypeFont cmap, as suggested by Qiang Liu

Modified:
    pdfbox/branches/2.0/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDType0Font.java

Modified: pdfbox/branches/2.0/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDType0Font.java
URL: http://svn.apache.org/viewvc/pdfbox/branches/2.0/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDType0Font.java?rev=1895159&r1=1895158&r2=1895159&view=diff
==============================================================================
--- pdfbox/branches/2.0/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDType0Font.java (original)
+++ pdfbox/branches/2.0/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDType0Font.java Thu Nov 18 19:32:32 2021
@@ -21,10 +21,12 @@ import java.io.File;
 import java.io.IOException;
 import java.io.InputStream;
 import java.util.HashSet;
+import java.util.List;
 import java.util.Set;
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.fontbox.cmap.CMap;
+import org.apache.fontbox.ttf.CmapLookup;
 import org.apache.fontbox.ttf.TTFParser;
 import org.apache.fontbox.ttf.TrueTypeFont;
 import org.apache.fontbox.util.BoundingBox;
@@ -499,18 +501,42 @@ public class PDType0Font extends PDFont
             // e) Map the CID according to the CMap from step d), producing a Unicode value
             return cMapUCS2.toUnicode(cid);
         }
-        else
+
+        // PDFBOX-5324: try to get unicode from font cmap
+        if (descendantFont instanceof PDCIDFontType2 && descendantFont.isEmbedded())
         {
-            if (LOG.isWarnEnabled() && !noUnicode.contains(code))
+            TrueTypeFont font = ((PDCIDFontType2) descendantFont).getTrueTypeFont();
+            if (font != null)
             {
-                // if no value has been produced, there is no way to obtain Unicode for the character.
-                String cid = "CID+" + codeToCID(code);
-                LOG.warn("No Unicode mapping for " + cid + " (" + code + ") in font " + getName());
-                // we keep track of which warnings have been issued, so we don't log multiple times
-                noUnicode.add(code);
+                try
+                {
+                    CmapLookup cmap = font.getUnicodeCmapLookup(false);
+                    if (cmap != null)
+                    {
+                        int gid = descendantFont.codeToGID(code);
+                        List<Integer> codes = cmap.getCharCodes(gid);
+                        if (codes != null && codes.size() > 0)
+                        {
+                            return Character.toString((char) (int) codes.get(0));
+                        }
+                    }
+                }
+                catch (IOException e)
+                {
+                    LOG.warn("get unicode from font cmap fail", e);
+                }
             }
-            return null;
         }
+
+        if (LOG.isWarnEnabled() && !noUnicode.contains(code))
+        {
+            // if no value has been produced, there is no way to obtain Unicode for the character.
+            String cid = "CID+" + codeToCID(code);
+            LOG.warn("No Unicode mapping for " + cid + " (" + code + ") in font " + getName());
+            // we keep track of which warnings have been issued, so we don't log multiple times
+            noUnicode.add(code);
+        }
+        return null;
     }
 
     @Override