You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pdfbox.apache.org by le...@apache.org on 2013/09/10 18:39:50 UTC

svn commit: r1521544 - in /pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfviewer: PageDrawer.java font/TTFGlyph2D.java

Author: lehmi
Date: Tue Sep 10 16:39:50 2013
New Revision: 1521544

URL: http://svn.apache.org/r1521544
Log:
PDFBOX-1672: implemented the character encoding following to the pdf specs

Modified:
    pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfviewer/PageDrawer.java
    pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfviewer/font/TTFGlyph2D.java

Modified: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfviewer/PageDrawer.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfviewer/PageDrawer.java?rev=1521544&r1=1521543&r2=1521544&view=diff
==============================================================================
--- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfviewer/PageDrawer.java (original)
+++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfviewer/PageDrawer.java Tue Sep 10 16:39:50 2013
@@ -43,7 +43,6 @@ import java.util.Map;
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.fontbox.cff.CFFFont;
-import org.apache.fontbox.cmap.CMap;
 import org.apache.fontbox.ttf.TrueTypeFont;
 import org.apache.pdfbox.cos.COSName;
 import org.apache.pdfbox.cos.COSStream;
@@ -567,17 +566,11 @@ public class PageDrawer extends PDFStrea
             if (font instanceof PDTrueTypeFont)
             {
                 PDTrueTypeFont ttfFont = (PDTrueTypeFont) font;
-                // does the font have an optional toUnicode mapping
-                CMap toUnicodeCMap = null;
-                if (ttfFont.hasToUnicode())
-                {
-                    toUnicodeCMap = ttfFont.getToUnicodeCMap();
-                }
                 // get the true type font raw data
                 TrueTypeFont ttf = ttfFont.getTTFFont();
                 if (ttf != null)
                 {
-                    glyph2D = new TTFGlyph2D(ttf, font.getBaseFont(), ttfFont.isSymbolicFont(), toUnicodeCMap);
+                    glyph2D = new TTFGlyph2D(ttf, font.getBaseFont(), ttfFont.isSymbolicFont(), font.getFontEncoding());
                 }
             }
             else if (font instanceof PDType1Font)
@@ -597,11 +590,6 @@ public class PageDrawer extends PDFStrea
             else if (font instanceof PDType0Font)
             {
                 PDType0Font type0Font = (PDType0Font) font;
-                CMap toUnicodeCMap = null;
-                if (type0Font.hasToUnicode())
-                {
-                    toUnicodeCMap = type0Font.getToUnicodeCMap();
-                }
                 if (type0Font.getDescendantFont() instanceof PDCIDFontType2Font)
                 {
                     // a CIDFontType2Font contains TTF font
@@ -613,12 +601,12 @@ public class PageDrawer extends PDFStrea
                         if (cidType2Font.hasCIDToGIDMap())
                         {
                             glyph2D = new TTFGlyph2D(ttf, font.getBaseFont(), cidType2Font.isSymbolicFont(),
-                                    toUnicodeCMap, cidType2Font.getCID2GID());
+                                    font.getFontEncoding(), cidType2Font.getCID2GID());
                         }
                         else
                         {
                             glyph2D = new TTFGlyph2D(ttf, font.getBaseFont(), cidType2Font.isSymbolicFont(),
-                                    toUnicodeCMap);
+                                    font.getFontEncoding(), type0Font.getCMap());
                         }
                     }
                 }

Modified: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfviewer/font/TTFGlyph2D.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfviewer/font/TTFGlyph2D.java?rev=1521544&r1=1521543&r2=1521544&view=diff
==============================================================================
--- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfviewer/font/TTFGlyph2D.java (original)
+++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfviewer/font/TTFGlyph2D.java Tue Sep 10 16:39:50 2013
@@ -21,6 +21,7 @@ package org.apache.pdfbox.pdfviewer.font
 import java.awt.geom.AffineTransform;
 import java.awt.geom.GeneralPath;
 import java.awt.geom.Point2D;
+import java.io.IOException;
 import java.util.HashMap;
 
 import org.apache.commons.logging.Log;
@@ -33,6 +34,8 @@ import org.apache.fontbox.ttf.GlyphData;
 import org.apache.fontbox.ttf.GlyphDescription;
 import org.apache.fontbox.ttf.HeaderTable;
 import org.apache.fontbox.ttf.TrueTypeFont;
+import org.apache.pdfbox.encoding.Encoding;
+import org.apache.pdfbox.encoding.MacOSRomanEncoding;
 
 /**
  * This class provides a glyph to GeneralPath conversion for true type fonts.
@@ -51,14 +54,14 @@ public class TTFGlyph2D implements Glyph
     private TrueTypeFont font;
     private String name;
     private float scale = 0.001f;
-    private CMAPEncodingEntry cmapMiscUnicode = null;
     private CMAPEncodingEntry cmapWinUnicode = null;
     private CMAPEncodingEntry cmapWinSymbol = null;
     private CMAPEncodingEntry cmapMacintoshSymbol = null;
     private boolean isSymbol = false;
     private HashMap<Integer, GeneralPath> glyphs = new HashMap<Integer, GeneralPath>();
-    private CMap toUnicode = null;
-    private int unicodeByteMapping = 1;
+    private Encoding fontEncoding = null;
+    private CMap fontCMap = null;
+    private boolean hasTwoByteMappings = false;
     private int[] cid2gid = null;
 
     /**
@@ -67,11 +70,12 @@ public class TTFGlyph2D implements Glyph
      * @param trueTypeFont the true type font containing the glyphs
      * @param fontname the name of the given font
      * @param symbolFont indicates if the font is a symbolic font
+     * @param encoding the encoding of the font
      * 
      */
-    public TTFGlyph2D(TrueTypeFont trueTypeFont, String fontname, boolean symbolFont)
+    public TTFGlyph2D(TrueTypeFont trueTypeFont, String fontname, boolean symbolFont, Encoding encoding)
     {
-        this(trueTypeFont, fontname, symbolFont, null);
+        this(trueTypeFont, fontname, symbolFont, encoding, null, null);
     }
 
     /**
@@ -80,12 +84,13 @@ public class TTFGlyph2D implements Glyph
      * @param trueTypeFont the true type font containing the glyphs
      * @param fontname the name of the given font
      * @param symbolFont indicates if the font is a symbolic font
-     * @param toUnicodeCMap an optional toUnicode mapping
-     * 
+     * @param encoding the encoding of the font
+     * @param cid2gidMapping an optional CID2GID mapping
      */
-    public TTFGlyph2D(TrueTypeFont trueTypeFont, String fontname, boolean symbolFont, CMap toUnicodeCMap)
+    public TTFGlyph2D(TrueTypeFont trueTypeFont, String fontname, boolean symbolFont, Encoding encoding,
+            int[] cid2gidMapping)
     {
-        this(trueTypeFont, fontname, symbolFont, toUnicodeCMap, null);
+        this(trueTypeFont, fontname, symbolFont, encoding, cid2gidMapping, null);
     }
 
     /**
@@ -94,22 +99,37 @@ public class TTFGlyph2D implements Glyph
      * @param trueTypeFont the true type font containing the glyphs
      * @param fontname the name of the given font
      * @param symbolFont indicates if the font is a symbolic font
-     * @param toUnicodeCMap an optional toUnicode mapping
-     * @param cid2gidMapping an optional CID2GIC mapping
+     * @param encoding the encoding of the font
+     * @param cMap an optional CMap
+     */
+    public TTFGlyph2D(TrueTypeFont trueTypeFont, String fontname, boolean symbolFont, Encoding encoding, CMap cMap)
+    {
+        this(trueTypeFont, fontname, symbolFont, encoding, null, cMap);
+    }
+
+    /**
+     * Constructor.
      * 
+     * @param trueTypeFont the true type font containing the glyphs
+     * @param fontname the name of the given font
+     * @param symbolFont indicates if the font is a symbolic font
+     * @param encoding the encoding of the font
+     * @param cid2gidMapping an optional CID2GID mapping
+     * @param cMap an optional CMap
      */
-    public TTFGlyph2D(TrueTypeFont trueTypeFont, String fontname, boolean symbolFont, CMap toUnicodeCMap,
-            int[] cid2gidMapping)
+    public TTFGlyph2D(TrueTypeFont trueTypeFont, String fontname, boolean symbolFont, Encoding encoding,
+            int[] cid2gidMapping, CMap cMap)
     {
         font = trueTypeFont;
         isSymbol = symbolFont;
         name = fontname;
-        toUnicode = toUnicodeCMap;
-        if (toUnicode != null && toUnicode.hasTwoByteMappings())
+        fontEncoding = encoding;
+        cid2gid = cid2gidMapping;
+        fontCMap = cMap;
+        if (fontCMap != null)
         {
-            unicodeByteMapping = 2;
+            hasTwoByteMappings = fontCMap.hasTwoByteMappings();
         }
-        cid2gid = cid2gidMapping;
         // get units per em, which is used as scaling factor
         HeaderTable header = font.getHeader();
         if (header != null)
@@ -141,13 +161,6 @@ public class TTFGlyph2D implements Glyph
                         cmapMacintoshSymbol = cmaps[i];
                     }
                 }
-                else if (CMAPTable.PLATFORM_MISC == cmaps[i].getPlatformId())
-                {
-                    if (CMAPTable.ENCODING_UNICODE == cmaps[i].getPlatformEncodingId())
-                    {
-                        cmapMiscUnicode = cmaps[i];
-                    }
-                }
             }
         }
     }
@@ -204,68 +217,95 @@ public class TTFGlyph2D implements Glyph
         return glyphPath != null ? (GeneralPath) glyphPath.clone() : null;
     }
 
-    /**
-     * {@inheritDoc}
+    /*
+     * Try to map the given code to the corresponding glyph-ID
      */
-    @Override
-    public GeneralPath getPathForCharactercode(int code)
+    private int getGlyphcode(int code)
     {
-        if (isSymbol)
+        int result = 0;
+        if (fontEncoding != null && !isSymbol)
         {
-            int glyphId = 0;
-            // symbol fonts
-            if (cmapWinSymbol != null)
+            try
             {
-                glyphId = cmapWinSymbol.getGlyphId(code);
-                // microsoft sometimes uses PUA unicode values for symbol fonts
-                // the range 0x0020 - 0x00FF maps to 0xF020 - 0xF0FF
-                if (glyphId == 0 && code >= 0x0020 && code <= 0x00FF)
+                String charactername = fontEncoding.getName(code);
+                if (charactername != null)
                 {
-                    glyphId = cmapWinSymbol.getGlyphId(code + 0xF000);
-                }
-                if (glyphId != 0)
-                {
-                    return getPathForGlyphId(glyphId);
+                    if (cmapWinUnicode != null)
+                    {
+                        String unicode = Encoding.getCharacterForName(charactername);
+                        if (unicode != null)
+                        {
+                            code = unicode.codePointAt(0);
+                        }
+                        result = cmapWinUnicode.getGlyphId(code);
+                    }
+                    else if (cmapMacintoshSymbol != null)
+                    {
+                        code = MacOSRomanEncoding.INSTANCE.getCode(charactername);
+                        result = cmapMacintoshSymbol.getGlyphId(code);
+                    }
                 }
             }
-            // use a mac related mapping
-            if (cmapMacintoshSymbol != null)
-            {
-                glyphId = cmapMacintoshSymbol.getGlyphId(code);
-            }
-            if (glyphId != 0)
+            catch (IOException exception)
             {
-                return getPathForGlyphId(glyphId);
+                LOG.error("Caught an exception getGlyhcode: " + exception);
             }
         }
-        else
+        if (fontEncoding == null || isSymbol)
         {
-            int unicode = code;
-            // map the given code to a valid unicode value, if necessary
-            if (toUnicode != null)
+            if (cmapWinSymbol != null)
             {
-                String unicodeStr = toUnicode.lookup(code, unicodeByteMapping);
-                if (unicodeStr != null)
+                result = cmapWinSymbol.getGlyphId(code);
+                if (code >= 0 && code <= 0xFF)
                 {
-                    unicode = unicodeStr.codePointAt(0);
+                    // the CMap may use one of the following code ranges,
+                    // so that we have to add the high byte to get the
+                    // mapped value
+                    if (result == 0)
+                    {
+                        // F000 - F0FF
+                        result = cmapWinSymbol.getGlyphId(code + 0xF000);
+                    }
+                    if (result == 0)
+                    {
+                        // F100 - F1FF
+                        result = cmapWinSymbol.getGlyphId(code + 0xF100);
+                    }
+                    if (result == 0)
+                    {
+                        // F200 - F2FF
+                        result = cmapWinSymbol.getGlyphId(code + 0xF200);
+                    }
                 }
             }
-            // non symbol fonts
-            // Unicode mapping
-            if (cmapWinUnicode != null)
-            {
-                return getPathForGlyphId(cmapWinUnicode.getGlyphId(unicode));
-            }
-            // some fonts provide a custom CMap
-            if (cmapMiscUnicode != null)
+            else if (cmapMacintoshSymbol != null)
             {
-                return getPathForGlyphId(cmapMiscUnicode.getGlyphId(unicode));
+                result = cmapMacintoshSymbol.getGlyphId(code);
             }
-            // use a mac related mapping
-            // Is this possible for non symbol fonts?
-            if (cmapMacintoshSymbol != null)
+        }
+        return result;
+    }
+
+    /**
+     * {@inheritDoc}
+     */
+    @Override
+    public GeneralPath getPathForCharactercode(int code)
+    {
+
+        int glyphId = getGlyphcode(code);
+
+        if (glyphId > 0)
+        {
+            return getPathForGlyphId(glyphId);
+        }
+        // there isn't any mapping, but probably an optional CMap
+        if (fontCMap != null)
+        {
+            String string = fontCMap.lookup(code, hasTwoByteMappings ? 2 : 1);
+            if (string != null)
             {
-                return getPathForGlyphId(cmapMacintoshSymbol.getGlyphId(unicode));
+                code = string.codePointAt(0);
             }
         }
         // there isn't any mapping, but probably an optional CID2GID mapping
@@ -440,11 +480,10 @@ public class TTFGlyph2D implements Glyph
     {
         cid2gid = null;
         cmapMacintoshSymbol = null;
-        cmapMiscUnicode = null;
         cmapWinSymbol = null;
         cmapWinUnicode = null;
         font = null;
-        toUnicode = null;
+        fontCMap = null;
         if (glyphs != null)
         {
             glyphs.clear();