You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pdfbox.apache.org by ti...@apache.org on 2021/11/18 19:32:32 UTC
svn commit: r1895159 - /pdfbox/branches/2.0/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDType0Font.java
Author: tilman
Date: Thu Nov 18 19:32:32 2021
New Revision: 1895159
URL: http://svn.apache.org/viewvc?rev=1895159&view=rev
Log:
PDFBOX-5324: Get unicode from TrueTypeFont cmap, as suggested by Qiang Liu
Modified:
pdfbox/branches/2.0/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDType0Font.java
Modified: pdfbox/branches/2.0/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDType0Font.java
URL: http://svn.apache.org/viewvc/pdfbox/branches/2.0/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDType0Font.java?rev=1895159&r1=1895158&r2=1895159&view=diff
==============================================================================
--- pdfbox/branches/2.0/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDType0Font.java (original)
+++ pdfbox/branches/2.0/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDType0Font.java Thu Nov 18 19:32:32 2021
@@ -21,10 +21,12 @@ import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.util.HashSet;
+import java.util.List;
import java.util.Set;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.fontbox.cmap.CMap;
+import org.apache.fontbox.ttf.CmapLookup;
import org.apache.fontbox.ttf.TTFParser;
import org.apache.fontbox.ttf.TrueTypeFont;
import org.apache.fontbox.util.BoundingBox;
@@ -499,18 +501,42 @@ public class PDType0Font extends PDFont
// e) Map the CID according to the CMap from step d), producing a Unicode value
return cMapUCS2.toUnicode(cid);
}
- else
+
+ // PDFBOX-5324: try to get unicode from font cmap
+ if (descendantFont instanceof PDCIDFontType2 && descendantFont.isEmbedded())
{
- if (LOG.isWarnEnabled() && !noUnicode.contains(code))
+ TrueTypeFont font = ((PDCIDFontType2) descendantFont).getTrueTypeFont();
+ if (font != null)
{
- // if no value has been produced, there is no way to obtain Unicode for the character.
- String cid = "CID+" + codeToCID(code);
- LOG.warn("No Unicode mapping for " + cid + " (" + code + ") in font " + getName());
- // we keep track of which warnings have been issued, so we don't log multiple times
- noUnicode.add(code);
+ try
+ {
+ CmapLookup cmap = font.getUnicodeCmapLookup(false);
+ if (cmap != null)
+ {
+ int gid = descendantFont.codeToGID(code);
+ List<Integer> codes = cmap.getCharCodes(gid);
+ if (codes != null && codes.size() > 0)
+ {
+ return Character.toString((char) (int) codes.get(0));
+ }
+ }
+ }
+ catch (IOException e)
+ {
+ LOG.warn("get unicode from font cmap fail", e);
+ }
}
- return null;
}
+
+ if (LOG.isWarnEnabled() && !noUnicode.contains(code))
+ {
+ // if no value has been produced, there is no way to obtain Unicode for the character.
+ String cid = "CID+" + codeToCID(code);
+ LOG.warn("No Unicode mapping for " + cid + " (" + code + ") in font " + getName());
+ // we keep track of which warnings have been issued, so we don't log multiple times
+ noUnicode.add(code);
+ }
+ return null;
}
@Override