You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pdfbox.apache.org by ja...@apache.org on 2014/08/23 04:34:37 UTC
svn commit: r1619956 [2/3] - in /pdfbox/branches/no-awt: examples/src/main/java/org/apache/pdfbox/examples/pdmodel/ fontbox/src/main/java/org/apache/fontbox/cff/ fontbox/src/main/java/org/apache/fontbox/cmap/ fontbox/src/main/java/org/apache/fontbox/tt...

Modified: pdfbox/branches/no-awt/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDCIDFontType2.java
URL: http://svn.apache.org/viewvc/pdfbox/branches/no-awt/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDCIDFontType2.java?rev=1619956&r1=1619955&r2=1619956&view=diff
==============================================================================
--- pdfbox/branches/no-awt/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDCIDFontType2.java (original)
+++ pdfbox/branches/no-awt/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDCIDFontType2.java Sat Aug 23 02:34:35 2014
@@ -21,12 +21,16 @@ import java.io.InputStream;
 
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
+import org.apache.fontbox.ttf.CMAPEncodingEntry;
+import org.apache.fontbox.ttf.CMAPTable;
 import org.apache.fontbox.ttf.TTFParser;
 import org.apache.fontbox.ttf.TrueTypeFont;
 import org.apache.pdfbox.cos.COSBase;
 import org.apache.pdfbox.cos.COSDictionary;
 import org.apache.pdfbox.cos.COSName;
 import org.apache.pdfbox.cos.COSStream;
+import org.apache.pdfbox.encoding.Encoding;
+import org.apache.pdfbox.encoding.StandardEncoding;
 import org.apache.pdfbox.io.IOUtils;
 import org.apache.pdfbox.pdmodel.common.PDStream;
 
@@ -40,9 +44,8 @@ public class PDCIDFontType2 extends PDCI
     private static final Log LOG = LogFactory.getLog(PDCIDFontType2.class);
 
     private final TrueTypeFont ttf;
-    private Boolean hasCIDToGIDMap = null;
-    private Boolean hasIdentityCIDToGIDMap = null;
-    private int[] cid2gid = null;
+    private final int[] cid2gid;
+    private final boolean isEmbedded;
 
     /**
      * Constructor.
@@ -61,6 +64,7 @@ public class PDCIDFontType2 extends PDCI
             // embedded
             TTFParser ttfParser = new TTFParser(true);
             ttf = ttfParser.parseTTF(ff2Stream.createInputStream());
+            isEmbedded = true;
         }
         else
         {
@@ -76,132 +80,211 @@ public class PDCIDFontType2 extends PDCI
                 LOG.warn("Using fallback font for " + getBaseFont());
                 ttf = ExternalFonts.getFallbackFont();
             }
+            isEmbedded = false;
         }
+
+        cid2gid = readCIDToGIDMap();
     }
 
-    /**
-     * Indicates if this font has a CIDToGIDMap.
-     * 
-     * @return returns true if the font has a CIDToGIDMap.
-     */
-    public boolean hasCIDToGIDMap()
+    private int[] readCIDToGIDMap()
     {
-        if (hasCIDToGIDMap == null)
+        int[] cid2gid = null;
+        COSBase map = dict.getDictionaryObject(COSName.CID_TO_GID_MAP);
+        if (map instanceof COSStream)
         {
-            COSBase map = dict.getDictionaryObject(COSName.CID_TO_GID_MAP);
-            if (map != null && map instanceof COSStream)
+            COSStream stream = (COSStream) map;
+            try
             {
-                hasCIDToGIDMap = Boolean.TRUE;
+                InputStream is = stream.getUnfilteredStream();
+                byte[] mapAsBytes = IOUtils.toByteArray(is);
+                IOUtils.closeQuietly(is);
+                int numberOfInts = mapAsBytes.length / 2;
+                cid2gid = new int[numberOfInts];
+                int offset = 0;
+                for (int index = 0; index < numberOfInts; index++)
+                {
+                    int gid = (mapAsBytes[offset] & 0xff) << 8 | mapAsBytes[offset + 1] & 0xff;
+                    cid2gid[index] = gid;
+                    offset += 2;
+                }
             }
-            else
+            catch (IOException exception)
             {
-                hasCIDToGIDMap = Boolean.FALSE;
+                LOG.error("Can't read the CIDToGIDMap", exception);
             }
         }
-        return hasCIDToGIDMap;
+        return cid2gid;
+    }
+
+    @Override
+    public int codeToCID(int code)
+    {
+        // TTF has no native notion of a CID
+        return code;
     }
 
     /**
-     * Indicates if this font has an identity CIDToGIDMap.
-     * 
-     * @return returns true if the font has an identity CIDToGIDMap.
+     * Returns the GID for the given character code.
+     *
+     * @param code character code
+     * @return GID
      */
-    public boolean hasIdentityCIDToGIDMap()
+    public int codeToGID(int code) throws IOException
     {
-        if (hasIdentityCIDToGIDMap == null)
-        {
-            COSBase map = dict.getDictionaryObject(COSName.CID_TO_GID_MAP);
-            if (map != null && map instanceof COSName)
-            {
-                hasIdentityCIDToGIDMap = Boolean.TRUE;
-            }
-            else
-            {
-                hasIdentityCIDToGIDMap = Boolean.FALSE;
-            }
-        }
-        return hasIdentityCIDToGIDMap;
+        int cid = codeToCID(code);
+        return cidToGID(cid);
     }
 
     /**
-     * Maps the given CID to the correspondent GID.
+     * Returns the GID for the given CID.
      * 
-     * @param cid the given CID
-     * @return the mapped GID, or -1 if something went wrong.
+     * @param cid the given CID (for TTF this is the same as the character code)
+     * @return the mapped GID
      */
-    public int mapCIDToGID(int cid)
+    public int cidToGID(int cid) throws IOException
     {
-        if (hasCIDToGIDMap())
+        if (!isEmbedded)
         {
-            if (cid2gid == null)
+            // The conforming reader shall select glyphs by translating characters from the
+            // encoding specified by the predefined CMap to one of the encodings in the TrueType
+            // font's 'cmap' table. The means by which this is accomplished are implementation-
+            // dependent.
+
+            CMAPEncodingEntry cmap = getUnicodeCmap(ttf.getCMAP());
+            String unicode;
+
+            // non-symbolic behaviour for Type2 TTFs isn't well documented, test with PDFBOX-1422
+            if (!parent.isSymbolic()) // todo: but this descendant font has its own flags?
+            {
+                // if the font descriptor?s Nonsymbolic flag is set, the conforming reader shall
+                // create a table that maps from character codes to glyph names
+                String name = null;
+
+                // If the Encoding entry is one of the names MacRomanEncoding, WinAnsiEncoding,
+                // or a dictionary, then the table is initialized as normal
+                // todo: Encoding is not allowed though, right? So this never happens?
+                /*if (getFontEncoding() != null)
+                {
+                    name = getFontEncoding().getName(cid);
+                }*/
+
+                // Any undefined entries in the table shall be filled using StandardEncoding
+                if (name == null)
+                {
+                    name = StandardEncoding.INSTANCE.getName(cid); // code = CID for TTF
+                }
+
+                // map to a Unicode value using the Adobe Glyph List
+                unicode = Encoding.getCharacterForName(name);
+            }
+            else
+            {
+                unicode = parent.toUnicode(cid); // code = CID for TTF
+            }
+
+            if (unicode == null)
             {
-                readCIDToGIDMapping();
+                return 0;
             }
-            if (cid2gid != null && cid < cid2gid.length)
+            else if (unicode.length() > 1)
             {
-                return cid2gid[cid];
+                LOG.warn("trying to map a multi-byte character using 'cmap', result will be poor");
             }
-            return -1;
+            return cmap.getGlyphId(unicode.codePointAt(0));
         }
         else
         {
-            // identity is the default value
-            return cid;
-        }
-    }
+            // If the TrueType font program is embedded, the Type 2 CIDFont dictionary shall contain
+            // a CIDToGIDMap entry that maps CIDs to the glyph indices for the appropriate glyph
+            // descriptions in that font program.
 
-    private void readCIDToGIDMapping()
-    {
-        COSBase map = dict.getDictionaryObject(COSName.CID_TO_GID_MAP);
-        if (map instanceof COSStream)
-        {
-            COSStream stream = (COSStream) map;
-            try
+            if (cid2gid != null)
             {
-                InputStream is = stream.getUnfilteredStream();
-                byte[] mapAsBytes = IOUtils.toByteArray(is);
-                IOUtils.closeQuietly(is);
-                int numberOfInts = mapAsBytes.length / 2;
-                cid2gid = new int[numberOfInts];
-                int offset = 0;
-                for (int index = 0; index < numberOfInts; index++)
+                // use CIDToGIDMap
+                if (cid < cid2gid.length)
                 {
-                    cid2gid[index] = getCodeFromArray(mapAsBytes, offset, 2);
-                    offset += 2;
+                    return cid2gid[cid];
+                }
+                else
+                {
+                    return 0;
                 }
             }
-            catch (IOException exception)
+            else
             {
-                LOG.error("Can't read the CIDToGIDMap", exception);
+                // "Identity" is the default CIDToGIDMap
+                return cid;
             }
         }
     }
 
     /**
-     * Returns the embedded or substituted TrueType font.
+     * Returns the best Unicode from the font (the most general). The PDF spec says that "The means
+     * by which this is accomplished are implementation-dependent."
      */
-    public TrueTypeFont getTrueTypeFont()
+    private CMAPEncodingEntry getUnicodeCmap(CMAPTable cmapTable)
     {
-        return ttf;
+        CMAPEncodingEntry cmap = cmapTable.getSubtable(CMAPTable.PLATFORM_UNICODE,
+                                                  CMAPTable.ENCODING_UNICODE_2_0_FULL);
+        if (cmap == null)
+        {
+            cmap = cmapTable.getSubtable(CMAPTable.PLATFORM_UNICODE,
+                                         CMAPTable.ENCODING_UNICODE_2_0_BMP);
+        }
+        if (cmap == null)
+        {
+            cmap = cmapTable.getSubtable(CMAPTable.PLATFORM_WINDOWS,
+                                         CMAPTable.ENCODING_WIN_UNICODE);
+        }
+        if (cmap == null)
+        {
+            // Microsoft's "Recommendations for OpenType Fonts" says that "Symbol" encoding
+            // actually means "Unicode, non-standard character set"
+            cmap = cmapTable.getSubtable(CMAPTable.PLATFORM_WINDOWS,
+                                         CMAPTable.ENCODING_WIN_SYMBOL);
+        }
+        if (cmap == null)
+        {
+            // fallback to the first cmap (may not ne Unicode, so may produce poor results)
+            LOG.warn("Used fallback cmap for font " + getBaseFont());
+            cmap = cmapTable.getCmaps()[0];
+        }
+        return cmap;
     }
 
     @Override
-    public float getFontWidth(byte[] c, int offset, int length)
+    public float getHeight(int code) throws IOException
     {
-        // a suitable mapping is needed to address the correct width value
-        int code = getCodeFromArray(c, offset, length);
-        if (hasIdentityCIDToGIDMap() || hasCIDToGIDMap())
-        {
-            return getFontWidth(code);
-        }
-        else if (getParent().getCMap() != null)
+        // todo: really we want the BBox, (for text extraction:)
+        return (ttf.getHorizontalHeader().getAscender() + -ttf.getHorizontalHeader().getDescender())
+                / ttf.getUnitsPerEm(); // todo: shouldn't this be the yMax/yMin?
+    }
+
+    @Override
+    protected float getWidthFromFont(int code) throws IOException
+    {
+        int cid = codeToCID(code);
+        int gid = cidToGID(cid);
+        int width = ttf.getAdvanceWidth(gid);
+        int unitsPerEM = ttf.getUnitsPerEm();
+        if (unitsPerEM != 1000)
         {
-            String mappedString = getParent().getCMap().lookup(code, length);
-            if (mappedString != null)
-            {
-                return getFontWidth(mappedString.codePointAt(0));
-            }
+            width *= 1000f / unitsPerEM;
         }
-        return super.getFontWidth(c, offset, length);
+        return width;
+    }
+
+    @Override
+    protected boolean isEmbedded()
+    {
+        return isEmbedded;
+    }
+
+    /**
+     * Returns the embedded or substituted TrueType font.
+     */
+    public TrueTypeFont getTrueTypeFont()
+    {
+        return ttf;
     }
 }

Modified: pdfbox/branches/no-awt/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDFont.java
URL: http://svn.apache.org/viewvc/pdfbox/branches/no-awt/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDFont.java?rev=1619956&r1=1619955&r2=1619956&view=diff
==============================================================================
--- pdfbox/branches/no-awt/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDFont.java (original)
+++ pdfbox/branches/no-awt/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDFont.java Sat Aug 23 02:34:35 2014
@@ -18,16 +18,12 @@ package org.apache.pdfbox.pdmodel.font;
 
 import java.io.IOException;
 import java.io.InputStream;
-import java.io.UnsupportedEncodingException;
 import java.util.Collections;
-import java.util.HashMap;
 import java.util.List;
-import java.util.Map;
 
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.fontbox.cmap.CMap;
-import org.apache.fontbox.cmap.CMapParser;
 import org.apache.pdfbox.cos.COSArray;
 import org.apache.pdfbox.cos.COSBase;
 import org.apache.pdfbox.cos.COSDictionary;
@@ -36,16 +32,11 @@ import org.apache.pdfbox.cos.COSInteger;
 import org.apache.pdfbox.cos.COSName;
 import org.apache.pdfbox.cos.COSNumber;
 import org.apache.pdfbox.cos.COSStream;
-import org.apache.pdfbox.encoding.DictionaryEncoding;
-import org.apache.pdfbox.encoding.Encoding;
-import org.apache.pdfbox.encoding.MacRomanEncoding;
-import org.apache.pdfbox.encoding.WinAnsiEncoding;
 import org.apache.pdfbox.io.IOUtils;
 import org.apache.pdfbox.pdmodel.common.COSArrayList;
 import org.apache.pdfbox.pdmodel.common.COSObjectable;
 import org.apache.pdfbox.pdmodel.common.PDMatrix;
 import org.apache.pdfbox.pdmodel.common.PDRectangle;
-import org.apache.pdfbox.util.ResourceLoader;
 
 /**
  * This is the base class for all PDF fonts.
@@ -55,130 +46,59 @@ import org.apache.pdfbox.util.ResourceLo
 public abstract class PDFont implements COSObjectable
 {
     private static final Log LOG = LogFactory.getLog(PDFont.class);
-    private static final byte[] SPACE_BYTES = { (byte) 32 }; // formerly in PDSimpleFont
 
-    protected static final String resourceRootCMAP = "org/apache/pdfbox/resources/cmap/";
-    protected static Map<String, CMap> cmapObjects =
-            Collections.synchronizedMap(new HashMap<String, CMap>()); // todo: why synchronized?
-
-    private static final String[] SINGLE_CHAR_STRING = new String[256];
-    private static final String[][] DOUBLE_CHAR_STRING = new String[256][256];
+    private static final PDMatrix FONT_MATRIX_1000;
     static
     {
-        for (int i = 0; i < 256; i++)
-        {
-            try
-            {
-                SINGLE_CHAR_STRING[i] = new String(new byte[] { (byte) i }, "ISO-8859-1");
-            }
-            catch (UnsupportedEncodingException e)
-            {
-                // Nothing should happen here
-                LOG.error(e,e);
-            }
-            for (int j = 0; j < 256; j++)
-            {
-                try
-                {
-                    DOUBLE_CHAR_STRING[i][j] = new String(new byte[] { (byte) i, (byte) j },
-                            "UTF-16BE");
-                }
-                catch (UnsupportedEncodingException e)
-                {
-                    // Nothing should happen here
-                    LOG.error(e, e);
-                }
-            }
-        }
-    }
-
-    private static String getStringFromArray(byte[] c, int offset, int length) throws IOException
-    {
-        String retval;
-        if (length == 1)
-        {
-            retval = SINGLE_CHAR_STRING[(c[offset] + 256) % 256];
-        }
-        else if (length == 2)
-        {
-            retval = DOUBLE_CHAR_STRING[(c[offset] + 256) % 256][(c[offset + 1] + 256) % 256];
-        }
-        else
-        {
-            throw new IOException("Error:Unknown character length:" + length);
-        }
-        return retval;
-    }
-
-    /**
-     * The Font dictionary.
-     */
-    protected COSDictionary dict;
-
-    /**
-     * The font matrix.
-     */
-    protected PDMatrix fontMatrix = null;
+        COSArray array = new COSArray();
+        array.add(new COSFloat(0.001f));
+        array.add(COSInteger.ZERO);
+        array.add(COSInteger.ZERO);
+        array.add(new COSFloat(0.001f));
+        array.add(COSInteger.ZERO);
+        array.add(COSInteger.ZERO);
+        FONT_MATRIX_1000 = new PDMatrix(array);
+    }
+
+    protected final COSDictionary dict;
+    private final CMap toUnicodeCMap;
+    protected PDFontDescriptor fontDescriptor;
 
-    // CMap / Encoding
-    protected CMap cmap = null; // only used when this is a Type0 font with a CMap
-    protected Encoding fontEncoding = null; // only used when this font has an encoding
-
-    // the CMap holding the ToUnicode mapping
-    private CMap toUnicodeCmap = null;
-    private boolean hasToUnicode = false;
-
-    private List<Integer> widths = null;
-
-    protected PDFontDescriptor fontDescriptor = null;
-    private boolean widthsAreMissing = false;
-
-    // formerly in PDSimpleFont
-    private final HashMap<Integer, Float> fontSizes = new HashMap<Integer, Float>(128);
-    private float avgFontWidth = 0.0f;
-    private float avgFontHeight = 0.0f;
+    private List<Integer> widths;
+    private float avgFontWidth;
     private float fontWidthOfSpace = -1f;
 
     /**
-     * This will clear AFM resources that are stored statically. This is usually not a problem
-     * unless you want to reclaim resources for a long running process.
-     *
-     * SPECIAL NOTE: The font calculations are currently in COSObject, which is where they will
-     * reside until PDFont is mature enough to take them over. PDFont is the appropriate place for
-     * them and not in COSObject but we need font calculations for text extraction. THIS METHOD WILL
-     * BE MOVED OR REMOVED TO ANOTHER LOCATION IN A FUTURE VERSION OF PDFBOX.
-     *
-     * @deprecated This method will be removed in a future version of PDFBox.
-     */
-    @Deprecated
-    public static void clearResources()
-    {
-        cmapObjects.clear();
-    }
-
-    /**
      * Constructor.
      */
     protected PDFont()
     {
         dict = new COSDictionary();
         dict.setItem(COSName.TYPE, COSName.FONT);
+        toUnicodeCMap = null;
     }
 
     /**
      * Constructor.
-     * 
-     * @param fontDictionary The font dictionary according to the PDF specification.
+     *
+     * @param fontDictionary Font dictionary.
      */
-    protected PDFont(COSDictionary fontDictionary)
+    protected PDFont(COSDictionary fontDictionary) throws IOException
     {
         dict = fontDictionary;
+        COSBase toUnicode = dict.getDictionaryObject(COSName.TO_UNICODE);
+        if (toUnicode != null)
+        {
+            toUnicodeCMap = readCMap(toUnicode);
+        }
+        else
+        {
+            toUnicodeCMap = null;
+        }
     }
 
     /**
-     * This will get the font descriptor for this font.
-     * 
-     * @return The font descriptor for this font.
+     * Returns the font descriptor, may be null.
      */
     public PDFontDescriptor getFontDescriptor()
     {
@@ -189,133 +109,112 @@ public abstract class PDFont implements 
             {
                 fontDescriptor = new PDFontDescriptorDictionary(fd);
             }
-            // todo: NOTE: null return value here if fine, because we override this method
         }
         return fontDescriptor;
     }
 
     /**
-     * Determines the encoding for the font. This method as to be overwritten, as there are
-     * different possibilities to define a mapping.
+     * Reads a CMap given a COS Stream or Name. May return null if a predefined CMap does not exist.
+     *
+     * @param base COSName or COSStream
      */
-    protected void determineEncoding()
+    protected final CMap readCMap(COSBase base) throws IOException
     {
-        COSBase encoding = dict.getDictionaryObject(COSName.ENCODING);
-        Encoding fontEncoding = null;
-        if (encoding != null)
+        if (base instanceof COSName)
+        {
+            // predefined CMap
+            String name = ((COSName)base).getName();
+            return CMapManager.getPredefinedCMap(name);
+        }
+        else if (base instanceof COSStream)
         {
-            if (encoding instanceof COSName)
+            // embedded CMap
+            InputStream input = null;
+            try
             {
-                COSName encodingName = (COSName)encoding;
-                try
-                {
-                    fontEncoding = Encoding.getInstance(encodingName);
-                }
-                catch (IOException exception)
-                {
-                    LOG.warn("Debug: Could not find encoding for " + encodingName);
-                }
+                input = ((COSStream)base).getUnfilteredStream();
+                return CMapManager.parseCMap(input);
             }
-            else if (encoding instanceof COSDictionary)
+            finally
             {
-                try
-                {
-                    fontEncoding = new DictionaryEncoding((COSDictionary) encoding);
-                }
-                catch (IOException exception)
-                {
-                    LOG.error("Error: Could not create the DictionaryEncoding");
-                }
+                IOUtils.closeQuietly(input);
             }
         }
-        this.fontEncoding = fontEncoding;
-        extractToUnicodeEncoding();
+        else
+        {
+            throw new IOException("Expected Name or Stream");
+        }
     }
 
-    protected final void extractToUnicodeEncoding()
+    @Override
+    public COSDictionary getCOSObject()
     {
-        COSName encodingName;
-        String cmapName;
-        COSBase toUnicode = dict.getDictionaryObject(COSName.TO_UNICODE);
-        if (toUnicode != null)
+        return dict;
+    }
+
+    /**
+     * Returns the width of the given character.
+     *
+     * @param code character code
+     */
+    public float getWidth(int code) throws IOException
+    {
+        if (!isEmbedded())
         {
-            hasToUnicode = true;
-            if (toUnicode instanceof COSStream)
+            // "If the font program is not embedded, Acrobat overrides the widths in the font
+            // program on the conforming reader?s system with the widths specified in the font
+            // dictionary." (Adobe Supplement to the ISO 32000)
+
+            // Type1, Type1C, Type3
+            int firstChar = dict.getInt(COSName.FIRST_CHAR, -1);
+            int lastChar = dict.getInt(COSName.LAST_CHAR, -1);
+            if (getWidths().size() > 0 && code >= firstChar && code <= lastChar)
             {
-                try
-                {
-                    InputStream is = ((COSStream) toUnicode).getUnfilteredStream();
-                    toUnicodeCmap = parseCmap(resourceRootCMAP, is);
-                    IOUtils.closeQuietly(is);
-                }
-                catch (IOException exception)
-                {
-                    LOG.error("Error: Could not load embedded ToUnicode CMap");
-                }
+                return getWidths().get(code - firstChar).floatValue();
             }
-            else if (toUnicode instanceof COSName)
+            else
             {
-                encodingName = (COSName) toUnicode;
-                toUnicodeCmap = cmapObjects.get(encodingName.getName());
-                if (toUnicodeCmap == null)
+                PDFontDescriptor fd = getFontDescriptor();
+                if (fd instanceof PDFontDescriptorDictionary)
                 {
-                    cmapName = encodingName.getName();
-                    String resourceName = resourceRootCMAP + cmapName;
-                    try
-                    {
-                        toUnicodeCmap = parseCmap(resourceRootCMAP,
-                                ResourceLoader.loadResource(resourceName));
-                    }
-                    catch (IOException exception)
-                    {
-                        LOG.error("Error: Could not find predefined ToUnicode CMap file for '" +
-                                cmapName + "'");
-                    }
-                    if (toUnicodeCmap == null)
-                    {
-                        LOG.error("Error: Could not parse predefined ToUnicode CMap file for '" +
-                                cmapName + "'");
-                    }
+                    return fd.getMissingWidth();
+                }
+                else
+                {
+                    // if there's nothing to override with, then obviously we fall back to the font
+                    return getWidthFromFont(code);
                 }
             }
         }
+        else
+        {
+            // otherwise the fonts widths should exactly match the widths in the font dictionary
+            return getWidthFromFont(code);
+        }
     }
 
-    @Override
-    public COSBase getCOSObject()
-    {
-        return dict;
-    }
+    /**
+     * Returns the width of a glyph in the embedded font file.
+     *
+     * @param code character code
+     * @return width in glyph space
+     * @throws IOException if the font could not be read
+     */
+    protected abstract float getWidthFromFont(int code) throws IOException;
 
     /**
-     * This will get the font width for a character.
-     * 
-     * @param c The character code to get the width for.
-     * @param offset The offset into the array.
-     * @param length The length of the data.
-     * @return The width is in 1000 unit of text space, ie 333 or 777
+     * Returns true if the font file is embedded in the PDF.
      */
-    public float getFontWidth(byte[] c, int offset, int length) throws IOException
-    {
-        int code = getCodeFromArray(c, offset, length);
-        Float fontWidth = fontSizes.get(code);
-        if (fontWidth == null)
-        {
-            fontWidth = getFontWidth(code);
-            fontSizes.put(code, fontWidth);
-        }
-        return fontWidth;
-    }
+    protected abstract boolean isEmbedded();
 
     /**
      * This will get the font height for a character.
      * 
-     * @param c The character code to get the height for.
-     * @param offset The offset into the array.
-     * @param length The length of the data.
+     * @param code character code
      * @return The height is in 1000 unit of text space, ie 333 or 777
      */
-    public float getFontHeight(byte[] c, int offset, int length)
+    // todo: this is not the glyph height at all! this method is *supposed* to get the y-advance
+    public float getHeight(int code) throws IOException
     {
         // maybe there is already a precalculated value
         PDFontDescriptor desc = getFontDescriptor();
@@ -346,7 +245,6 @@ public abstract class PDFont implements 
                     retval -= desc.getDescent();
                 }
             }
-            avgFontHeight = retval;
             return retval;
         }
         return 0;
@@ -361,11 +259,11 @@ public abstract class PDFont implements 
      */
     public float getStringWidth(String string) throws IOException
     {
-        byte[] data = string.getBytes("ISO-8859-1");
+        byte[] data = string.getBytes("ISO-8859-1"); // todo: *no*, these are *not* character codes
         float totalWidth = 0;
         for (int i = 0; i < data.length; i++)
         {
-            totalWidth += getFontWidth(data, i, 1);
+            totalWidth += getWidth(data[i]);
         }
         return totalWidth;
     }
@@ -415,139 +313,42 @@ public abstract class PDFont implements 
     }
 
     /**
-     * Used for multibyte encodings.
-     * 
-     * @param data The array of data.
-     * @param offset The offset into the array.
-     * @param length The number of bytes to use.
-     * @return The int value of data from the array.
-     */
-    public int getCodeFromArray(byte[] data, int offset, int length)
-    {
-        int code = 0;
-        for (int i = 0; i < length; i++)
-        {
-            code <<= 8;
-            code |= (data[offset + i] + 256) % 256;
-        }
-        return code;
-    }
-
-    /**
-     * Encode the given value using the CMap of the font.
-     * 
-     * @param code the code to encode.
-     * @param length the byte length of the given code.
-     * @param isCIDFont indicates that the used font is a CID font.
-     * 
-     * @return The value of the encoded character.
-     * @throws IOException if something went wrong
-     */
-    protected final String cmapEncoding(int code, int length, boolean isCIDFont, CMap sourceCmap)
-            throws IOException
-    {
-        String retval = null;
-        // there is not sourceCmap if this is a descendant font
-        if (sourceCmap == null)
-        {
-            sourceCmap = cmap;
-        }
-        if (sourceCmap != null)
-        {
-            retval = sourceCmap.lookup(code, length);
-            if (retval == null && isCIDFont)
-            {
-                retval = sourceCmap.lookupCID(code);
-            }
-        }
-        return retval;
-    }
-
-    /**
-     * Returns the Unicode character(s) for a given character code.
-     * 
-     * @param c The character to encode.
-     * @param offset The offset into the array to get the data
-     * @param length The number of bytes to read.
-     * @return The value of the encoded character.
-     * @throws IOException If there is an error during the encoding.
+     * Reads a character code from a content stream string. Codes may be up to 4 bytes long.
+     *
+     * @param in string stream
+     * @return character code
+     * @throws IOException if the CMap or stream cannot be read
      */
-    public String encode(byte[] c, int offset, int length) throws IOException
-    {
-        String retval = null;
-        int code = getCodeFromArray(c, offset, length);
-        if (toUnicodeCmap != null)
-        {
-            retval = cmapEncoding(code, length, false, toUnicodeCmap);
-        }
-        if (retval == null && cmap != null)
-        {
-            retval = cmapEncoding(code, length, false, cmap);
-        }
-
-        // there is no cmap but probably an encoding with a suitable mapping
-        if (retval == null)
-        {
-            if (fontEncoding != null)
-            {
-                retval = fontEncoding.getCharacter(code);
-            }
-            if (retval == null && (cmap == null || length == 2))
-            {
-                retval = getStringFromArray(c, offset, length);
-            }
-        }
-        return retval;
-    }
-
-    public int encodeToCID(byte[] c, int offset, int length) throws IOException
-    {
-        int code = -1;
-        if (encode(c, offset, length) != null)
-        {
-            code = getCodeFromArray(c, offset, length);
-        }
-        return code;
-    }
+    public abstract int readCode(InputStream in) throws IOException;
 
     /**
-     * Parse the given CMap.
-     * 
-     * @param cmapRoot the root path pointing to the provided CMaps
-     * @param cmapStream the CMap to be read
-     * @return the parsed CMap
+     * Returns the Unicode character sequence which corresponds to the given character code.
+     *
+     * @param code character code
+     * @return Unicode character(s)
      */
-    protected final CMap parseCmap(String cmapRoot, InputStream cmapStream)
+    public String toUnicode(int code)
     {
-        CMap targetCmap = null;
-        if (cmapStream != null)
+        // if the font dictionary contains a ToUnicode CMap, use that CMap
+        if (toUnicodeCMap != null)
         {
-            CMapParser parser = new CMapParser();
-            try
+            if (toUnicodeCMap.getName() != null && toUnicodeCMap.getName().startsWith("Identity-"))
             {
-                targetCmap = parser.parse(cmapRoot, cmapStream);
-                // limit the cache to external CMaps
-                if (cmapRoot != null)
-                {
-                    cmapObjects.put(targetCmap.getName(), targetCmap);
-                }
+                // handle the undocumented case of using Identity-H/V as a ToUnicode CMap, this
+                // isn't  actually valid as the Identity-x CMaps are code->CID maps, not
+                // code->Unicode maps. See sample_fonts_solidconvertor.pdf for an example.
+                return new String(new char[] { (char) code });
             }
-            catch (IOException exception)
+            else
             {
-                LOG.error("An error occurs while reading a CMap", exception);
+                // proceed as normal
+                return toUnicodeCMap.toUnicode(code);
             }
         }
-        return targetCmap;
-    }
 
-    /**
-     * This will get or create the encoder.
-     * 
-     * @return The encoding to use.
-     */
-    public Encoding getFontEncoding()
-    {
-        return fontEncoding;
+        // if no value has been produced, there is no way to obtain Unicode for the character.
+        // this behaviour can be overridden is subclasses, but this method *must* return null here
+        return null;
     }
 
     /**
@@ -562,8 +363,6 @@ public abstract class PDFont implements 
 
     /**
      * This will get the subtype of font.
-     * 
-     * @return The type of font that this is.
      */
     public String getSubType()
     {
@@ -571,70 +370,21 @@ public abstract class PDFont implements 
     }
 
     /**
-     * Determines if the font is a type 1 font.
-     * 
-     * @return returns true if the font is a type 1 font
-     */
-    public boolean isType1Font()
-    {
-        return "Type1".equals(getSubType());
-    }
-
-    /**
-     * Determines if the font is a type 3 font.
-     * 
-     * @return returns true if the font is a type 3 font
-     */
-    public boolean isType3Font()
-    {
-        return "Type3".equals(getSubType());
-    }
-
-    /**
-     * Determines if the font is a type 0 font.
-     * 
-     * @return returns true if the font is a type 0 font
-     */
-    public boolean isType0Font()
-    {
-        return "Type0".equals(getSubType());
-    }
-
-    /**
-     * Determines if the font is a true type font.
-     * 
-     * @return returns true if the font is a true type font
-     */
-    public boolean isTrueTypeFont()
-    {
-        return "TrueType".equals(getSubType());
-    }
-
-    /**
-     * Determines if the font is a symbolic font.
-     * 
-     * @return returns true if the font is a symbolic font
+     * Returns true the font is a symbolic (that is, it does not use the Adobe Standard Roman
+     * character set).
      */
-    public boolean isSymbolicFont()
+    public boolean isSymbolic()
     {
         if (getFontDescriptor() != null)
         {
             // fixme: isSymbolic() defaults to false if the flag is missing so we can't trust this
             return getFontDescriptor().isSymbolic();
         }
-
-        // fixme: this heuristic is a starting point only
-        if (fontEncoding instanceof MacRomanEncoding || fontEncoding instanceof WinAnsiEncoding)
-        {
-            return false;
-        }
         return true;
     }
 
     /**
-     * The PostScript name of the font.
-     * 
-     * @return The postscript name of the font.
+     * Returns the PostScript name of the font.
      */
     public String getBaseFont()
     {
@@ -642,33 +392,13 @@ public abstract class PDFont implements 
     }
 
     /**
-     * The code for the first char or -1 if there is none.
-     * 
-     * @return The code for the first character.
-     */
-    public int getFirstChar()
-    {
-        return dict.getInt(COSName.FIRST_CHAR, -1);
-    }
-
-    /**
-     * The code for the last char or -1 if there is none.
-     * 
-     * @return The code for the last character.
-     */
-    public int getLastChar()
-    {
-        return dict.getInt(COSName.LAST_CHAR, -1);
-    }
-
-    /**
      * The widths of the characters. This will be null for the standard 14 fonts.
-     * 
+     *
      * @return The widths of the characters.
      */
-    public List<Integer> getWidths()
+    private List<Integer> getWidths()
     {
-        if (widths == null && !widthsAreMissing)
+        if (widths == null)
         {
             COSArray array = (COSArray) dict.getDictionaryObject(COSName.WIDTHS);
             if (array != null)
@@ -677,83 +407,18 @@ public abstract class PDFont implements 
             }
             else
             {
-                widthsAreMissing = true;
+                widths = Collections.emptyList();
             }
         }
         return widths;
     }
 
     /**
-     * This will get the matrix that is used to transform glyph space to text space. By default
-     * there are 1000 glyph units to 1 text space unit, but type3 fonts can use any value.
-     * 
-     * Note: If this is a type3 font then it can be modified via the PDType3Font.setFontMatrix,
-     * otherwise this is a read-only property.
-     * 
-     * @return The matrix to transform from glyph space to text space.
+     * Returns the font matrix, which represents the transformation from glyph space to text space.
      */
     public PDMatrix getFontMatrix()
     {
-        if (fontMatrix == null)
-        {
-            COSArray array = (COSArray) dict.getDictionaryObject(COSName.FONT_MATRIX);
-            if (array == null)
-            {
-                array = new COSArray();
-                array.add(new COSFloat(0.001f));
-                array.add(COSInteger.ZERO);
-                array.add(COSInteger.ZERO);
-                array.add(new COSFloat(0.001f));
-                array.add(COSInteger.ZERO);
-                array.add(COSInteger.ZERO);
-            }
-            fontMatrix = new PDMatrix(array);
-        }
-        return fontMatrix;
-    }
-
-    /**
-     * Determines the width of the given character.
-     * 
-     * @param charCode the code of the given character
-     * @return the width of the character
-     */
-    public float getFontWidth(int charCode) throws IOException
-    {
-        float width = -1;
-        int firstChar = getFirstChar();
-        int lastChar = getLastChar();
-        if (charCode >= firstChar && charCode <= lastChar)
-        {
-            // maybe the font doesn't provide any widths
-            if (!widthsAreMissing)
-            {
-                getWidths();
-                if (widths != null)
-                {
-                    width = widths.get(charCode - firstChar).floatValue();
-                }
-            }
-        }
-        else
-        {
-            PDFontDescriptor fd = getFontDescriptor();
-            if (fd instanceof PDFontDescriptorDictionary)
-            {
-                width = fd.getMissingWidth();
-            }
-        }
-        return width;
-    }
-
-    /**
-     * Determines if a font as a ToUnicode entry.
-     * 
-     * @return true if the font has a ToUnicode entry
-     */
-    public boolean hasToUnicode()
-    {
-        return hasToUnicode;
+        return FONT_MATRIX_1000;
     }
 
     /**
@@ -770,15 +435,15 @@ public abstract class PDFont implements 
             {
                 if (toUnicode != null)
                 {
-                    int spaceMapping = toUnicodeCmap.getSpaceMapping();
+                    int spaceMapping = toUnicodeCMap.getSpaceMapping();
                     if (spaceMapping > -1)
                     {
-                        fontWidthOfSpace = getFontWidth(spaceMapping);
+                        fontWidthOfSpace = getWidth(spaceMapping);
                     }
                 }
                 else
                 {
-                    fontWidthOfSpace = getFontWidth(SPACE_BYTES, 0, 1);
+                    fontWidthOfSpace = getWidth(32);
                 }
                 // use the average font width as fall back
                 if (fontWidthOfSpace <= 0)
@@ -796,26 +461,6 @@ public abstract class PDFont implements 
     }
 
     /**
-     * Returns the toUnicode mapping if present.
-     *
-     * @return the CMap representing the toUnicode mapping
-     */
-    public CMap getToUnicodeCMap()
-    {
-        return toUnicodeCmap;
-    }
-
-    /**
-     * Returns the CMap if present.
-     *
-     * @return the CMap representing the character encoding
-     */
-    public CMap getCMap()
-    {
-        return cmap;
-    }
-
-    /**
      * Calling this will release all cached information.
      */
     public void clear()

Added: pdfbox/branches/no-awt/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDSimpleFont.java
URL: http://svn.apache.org/viewvc/pdfbox/branches/no-awt/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDSimpleFont.java?rev=1619956&view=auto
==============================================================================
--- pdfbox/branches/no-awt/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDSimpleFont.java (added)
+++ pdfbox/branches/no-awt/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDSimpleFont.java Sat Aug 23 02:34:35 2014
@@ -0,0 +1,154 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.pdfbox.pdmodel.font;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.pdfbox.cos.COSBase;
+import org.apache.pdfbox.cos.COSDictionary;
+import org.apache.pdfbox.cos.COSName;
+import org.apache.pdfbox.encoding.DictionaryEncoding;
+import org.apache.pdfbox.encoding.Encoding;
+
+import java.io.IOException;
+import java.util.HashSet;
+import java.util.Set;
+
+/**
+ * A simple font. Simple fonts use a PostScript encoding vector.
+ *
+ * @author John Hewson
+ */
+public abstract class PDSimpleFont extends PDFont
+{
+    private static final Log LOG = LogFactory.getLog(PDSimpleFont.class);
+
+    protected Encoding fontEncoding;
+    private final Set<Integer> noUnicode = new HashSet<Integer>();
+
+    /**
+     * Constructor.
+     */
+    protected PDSimpleFont()
+    {
+        super();
+    }
+
+    /**
+     * Constructor.
+     *
+     * @param fontDictionary Font dictionary.
+     */
+    protected PDSimpleFont(COSDictionary fontDictionary) throws IOException
+    {
+        super(fontDictionary);
+    }
+
+    /**
+     * Reads the Encoding from the Font dictionary or the embedded or substituted font file.
+     * Must be called at the end of any subclass constructors.
+     *
+     * @throws IOException if the font file could not be read
+     */
+    protected final void readEncoding() throws IOException
+    {
+        COSBase encoding = dict.getDictionaryObject(COSName.ENCODING);
+        if (encoding != null)
+        {
+            if (encoding instanceof COSName)
+            {
+                COSName encodingName = (COSName)encoding;
+                fontEncoding = Encoding.getInstance(encodingName);
+                if (fontEncoding == null)
+                {
+                    LOG.warn("Unknown encoding: " + encodingName);
+                    fontEncoding = readEncodingFromFont(); // fallback
+                }
+            }
+            else if (encoding instanceof COSDictionary)
+            {
+                fontEncoding = new DictionaryEncoding((COSDictionary) encoding);
+            }
+        }
+        else
+        {
+            fontEncoding = readEncodingFromFont();
+        }
+    }
+
+    /**
+     * Called by readEncoding() if the encoding needs to be extracted from the font file.
+     *
+     * @throws IOException if the font file could not be read
+     */
+    protected abstract Encoding readEncodingFromFont() throws IOException;
+
+    /**
+     * Returns the Encoding vector.
+     */
+    public Encoding getEncoding()
+    {
+        return fontEncoding;
+    }
+
+    @Override
+    public String toUnicode(int code)
+    {
+        // first try to use a ToUnicode CMap
+        String unicode = super.toUnicode(code);
+        if (unicode != null)
+        {
+            return unicode;
+        }
+
+        // if the font is a "simple font" and uses MacRoman/MacExpert/WinAnsi[Encoding]
+        // or has Differences with names from only Adobe Standard and/or Symbol, then:
+        //
+        //    a) Map the character codes to names
+        //    b) Look up the name in the Adobe Glyph List to obtain the Unicode value
+
+        String name = null;
+        if (getEncoding() != null)
+        {
+            name = fontEncoding.getName(code);
+            unicode = Encoding.getCharacterForName(name);
+            if (unicode != null)
+            {
+                return unicode;
+            }
+        }
+
+        // if no value has been produced, there is no way to obtain Unicode for the character.
+        if (LOG.isWarnEnabled() && !noUnicode.contains(code))
+        {
+            // we keep track of which warnings have been issued, so we don't log multiple times
+            noUnicode.add(code);
+            if (name != null)
+            {
+                LOG.warn("No Unicode mapping for " + name + " (" + code + ") in font " +
+                        getBaseFont());
+            }
+            else
+            {
+                LOG.warn("No Unicode mapping for character code " + code + " in font " +
+                        getBaseFont());
+            }
+        }
+
+        return null;
+    }
+}

Modified: pdfbox/branches/no-awt/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDTrueTypeFont.java
URL: http://svn.apache.org/viewvc/pdfbox/branches/no-awt/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDTrueTypeFont.java?rev=1619956&r1=1619955&r2=1619956&view=diff
==============================================================================
--- pdfbox/branches/no-awt/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDTrueTypeFont.java (original)
+++ pdfbox/branches/no-awt/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDTrueTypeFont.java Sat Aug 23 02:34:35 2014
@@ -20,7 +20,6 @@ import java.io.File;
 import java.io.FileInputStream;
 import java.io.IOException;
 import java.io.InputStream;
-import java.util.HashMap;
 
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
@@ -39,7 +38,7 @@ import org.apache.pdfbox.pdmodel.common.
  * 
  * @author Ben Litchfield
  */
-public class PDTrueTypeFont extends PDFont
+public class PDTrueTypeFont extends PDSimpleFont
 {
     private static final Log LOG = LogFactory.getLog(PDTrueTypeFont.class);
 
@@ -66,7 +65,7 @@ public class PDTrueTypeFont extends PDFo
     private boolean cmapInitialized = false;
 
     private final TrueTypeFont ttf;
-    private final HashMap<Integer, Float> advanceWidths = new HashMap<Integer, Float> ();
+    private final boolean isEmbedded;
 
     /**
      * Creates a new TrueType font from a Font dictionary.
@@ -89,6 +88,7 @@ public class PDTrueTypeFont extends PDFo
                 ttfFont = ttfParser.parseTTF(ff2Stream.createInputStream());
             }
         }
+        isEmbedded = ttfFont != null;
 
         // substitute
         if (ttfFont == null)
@@ -102,10 +102,14 @@ public class PDTrueTypeFont extends PDFo
                 ttfFont = ExternalFonts.getFallbackFont();
             }
         }
-
         ttf = ttfFont;
+        readEncoding();
+    }
 
-        determineEncoding();
+    @Override
+    protected Encoding readEncodingFromFont() throws IOException
+    {
+        return null;
     }
 
     /**
@@ -116,17 +120,13 @@ public class PDTrueTypeFont extends PDFo
         PDTrueTypeFontEmbedder embedder = new PDTrueTypeFontEmbedder(document, dict, ttfStream);
         fontEncoding = embedder.getFontEncoding();
         ttf = embedder.getTrueTypeFont();
+        isEmbedded = true;
     }
 
     @Override
-    public PDFontDescriptor getFontDescriptor()
+    public int readCode(InputStream in) throws IOException
     {
-        if (super.getFontDescriptor() == null)
-        {
-            // todo: this is an experiment: we now allow this to be null (i.e. we no longer synthesise)
-            //fontDescriptor = makeFontDescriptor(ttf);
-        }
-        return fontDescriptor;
+        return in.read();
     }
 
     /**
@@ -138,46 +138,39 @@ public class PDTrueTypeFont extends PDFo
     }
 
     @Override
-    public float getFontWidth(int charCode) throws IOException
+    protected float getWidthFromFont(int code) throws IOException
     {
-        float width = super.getFontWidth(charCode);
-        if (width <= 0)
+        int gid = codeToGID(code);
+        int width = ttf.getAdvanceWidth(gid);
+        int unitsPerEM = ttf.getUnitsPerEm();
+        if (unitsPerEM != 1000)
         {
-            if (advanceWidths.containsKey(charCode))
-            {
-                width = advanceWidths.get(charCode);
-            }
-            else
-            {
-                int code = getGIDForCharacterCode(charCode);
-                width = ttf.getAdvanceWidth(code);
-                int unitsPerEM = ttf.getUnitsPerEm();
-                // do we have to scale the width
-                if (unitsPerEM != 1000)
-                {
-                    width *= 1000f / unitsPerEM;
-                }
-                advanceWidths.put(charCode, width);
-            }
+            width *= 1000f / unitsPerEM;
         }
         return width;
     }
 
+    @Override
+    protected boolean isEmbedded()
+    {
+        return isEmbedded;
+    }
+
     /**
      * Returns the GID for the given character code.
      *
      * @param code character code
      * @return GID (glyph index)
      */
-    public int getGIDForCharacterCode(int code) throws IOException
+    public int codeToGID(int code) throws IOException
     {
         extractCmapTable();
         int result = 0;
-        if (getFontEncoding() != null && !isSymbolicFont())
+        if (getEncoding() != null && !isSymbolic())
         {
             try
             {
-                String characterName = getFontEncoding().getName(code);
+                String characterName = getEncoding().getName(code);
                 if (characterName != null)
                 {
                     if (cmapWinUnicode != null)
@@ -219,7 +212,7 @@ public class PDTrueTypeFont extends PDFo
             }
         }
 
-        if (getFontEncoding() == null || isSymbolicFont())
+        if (getEncoding() == null || isSymbolic())
         {
             if (cmapWinSymbol != null)
             {
@@ -308,6 +301,5 @@ public class PDTrueTypeFont extends PDFo
         cmapWinSymbol = null;
         cmapMacintoshSymbol = null;
         cmapInitialized = false;
-        advanceWidths.clear();
     }
 }

Modified: pdfbox/branches/no-awt/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDTrueTypeFontEmbedder.java
URL: http://svn.apache.org/viewvc/pdfbox/branches/no-awt/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDTrueTypeFontEmbedder.java?rev=1619956&r1=1619955&r2=1619956&view=diff
==============================================================================
--- pdfbox/branches/no-awt/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDTrueTypeFontEmbedder.java (original)
+++ pdfbox/branches/no-awt/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDTrueTypeFontEmbedder.java Sat Aug 23 02:34:35 2014
@@ -217,25 +217,23 @@ class PDTrueTypeFontEmbedder
         fd.setStemV(fd.getFontBoundingBox().getWidth() * .13f);
 
         CMAPTable cmapTable = ttf.getCMAP();
-        CMAPEncodingEntry[] cmaps = cmapTable.getCmaps();
-
-        CMAPEncodingEntry uniMap = getCmapSubtable(cmaps, CMAPTable.PLATFORM_UNICODE,
+        CMAPEncodingEntry uniMap = cmapTable.getSubtable(CMAPTable.PLATFORM_UNICODE,
                 CMAPTable.ENCODING_UNICODE_2_0_FULL);
         if (uniMap == null)
         {
-            uniMap = getCmapSubtable(cmaps, CMAPTable.PLATFORM_UNICODE,
+            uniMap = cmapTable.getSubtable(CMAPTable.PLATFORM_UNICODE,
                     CMAPTable.ENCODING_UNICODE_2_0_BMP);
         }
         if (uniMap == null)
         {
-            uniMap = getCmapSubtable(cmaps, CMAPTable.PLATFORM_WINDOWS,
+            uniMap = cmapTable.getSubtable(CMAPTable.PLATFORM_WINDOWS,
                     CMAPTable.ENCODING_WIN_UNICODE);
         }
         if (uniMap == null)
         {
             // Microsoft's "Recommendations for OpenType Fonts" says that "Symbol" encoding
             // actually means "Unicode, non-standard character set"
-            uniMap = getCmapSubtable(cmaps, CMAPTable.PLATFORM_WINDOWS,
+            uniMap = cmapTable.getSubtable(CMAPTable.PLATFORM_WINDOWS,
                     CMAPTable.ENCODING_WIN_SYMBOL);
         }
         if (uniMap == null)
@@ -244,7 +242,8 @@ class PDTrueTypeFontEmbedder
             // to find one. Furthermore, if we loaded the font from disk then we should've checked
             // first to see that it had a suitable cmap before calling createFontDescriptor
             throw new IllegalArgumentException("ttf: no suitable cmap for font '" +
-                    ttf.getNaming().getFontFamily() + "', found: " + Arrays.toString(cmaps));
+                    ttf.getNaming().getFontFamily() + "', found: " +
+                    Arrays.toString(cmapTable.getCmaps()));
         }
 
         if (this.getFontEncoding() == null)
@@ -315,23 +314,6 @@ class PDTrueTypeFontEmbedder
     }
 
     /**
-     * Returns the "cmap" subtable for the given platform and encoding, or null.
-     */
-    private CMAPEncodingEntry getCmapSubtable(CMAPEncodingEntry[] cmaps,
-                                              int platformId, int platformEncodingId)
-    {
-        for (CMAPEncodingEntry cmap : cmaps)
-        {
-            if (cmap.getPlatformId() == platformId &&
-                    cmap.getPlatformEncodingId() == platformEncodingId)
-            {
-                return cmap;
-            }
-        }
-        return null;
-    }
-
-    /**
      * Returns the font's encoding.
      */
     public Encoding getFontEncoding()

Modified: pdfbox/branches/no-awt/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDType0Font.java
URL: http://svn.apache.org/viewvc/pdfbox/branches/no-awt/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDType0Font.java?rev=1619956&r1=1619955&r2=1619956&view=diff
==============================================================================
--- pdfbox/branches/no-awt/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDType0Font.java (original)
+++ pdfbox/branches/no-awt/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDType0Font.java Sat Aug 23 02:34:35 2014
@@ -19,30 +19,23 @@ package org.apache.pdfbox.pdmodel.font;
 import java.io.IOException;
 import java.io.InputStream;
 
-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
+import org.apache.fontbox.cmap.CMap;
 import org.apache.pdfbox.cos.COSArray;
 import org.apache.pdfbox.cos.COSBase;
 import org.apache.pdfbox.cos.COSDictionary;
 import org.apache.pdfbox.cos.COSName;
-import org.apache.pdfbox.cos.COSStream;
-import org.apache.pdfbox.encoding.DictionaryEncoding;
-import org.apache.pdfbox.encoding.Encoding;
-import org.apache.pdfbox.io.IOUtils;
-import org.apache.pdfbox.pdmodel.common.PDRectangle;
-import org.apache.pdfbox.util.ResourceLoader;
 
 /**
- * A Type 0 (composite) font.
- * 
+ * A Composite (Type 0) font.
+ *
  * @author Ben Litchfield
  */
 public class PDType0Font extends PDFont
 {
-    private static final Log LOG = LogFactory.getLog(PDType0Font.class);
-
     private PDCIDFont descendantFont;
     private COSDictionary descendantFontDictionary;
+    private CMap cMap, cMapUCS2;
+    private boolean isCMapPredefined;
 
     /**
      * Constructor.
@@ -59,8 +52,78 @@ public class PDType0Font extends PDFont
         {
             throw new IOException("Missing descendant font dictionary");
         }
+
+        readEncoding();
+        fetchCMapUCS2();
         descendantFont = PDFontFactory.createDescendantFont(descendantFontDictionary, this);
-        determineEncoding();
+    }
+
+    /**
+     * Reads the font's Encoding entry, which should be a CMap name/stream.
+     */
+    private void readEncoding() throws IOException
+    {
+        COSBase encoding = dict.getDictionaryObject(COSName.ENCODING);
+        if (encoding != null)
+        {
+            if (encoding instanceof COSName)
+            {
+                // predefined CMap
+                COSName encodingName = (COSName)encoding;
+                cMap = CMapManager.getPredefinedCMap(encodingName.getName());
+                if (cMap != null)
+                {
+                    isCMapPredefined = true;
+                    return;
+                }
+            }
+            else
+            {
+                cMap = readCMap(encoding);
+            }
+        }
+    }
+
+    /**
+     * Fetches the corresponding UCS2 CMap if the font's CMap is predefined.
+     */
+    private void fetchCMapUCS2() throws IOException
+    {
+        // if the font is composite and uses a predefined cmap (excluding Identity-H/V) then
+        // or if its decendant font uses Adobe-GB1/CNS1/Japan1/Korea1
+        if (isCMapPredefined)
+        {
+            // a) Map the character code to a CID using the font's CMap
+            // b) Obtain the ROS from the font's CIDSystemInfo
+            // c) Construct a second CMap name by concatenating the ROS in the format "R-O-UCS2"
+            // d) Obtain the CMap with the constructed name
+            // e) Map the CID according to the CMap from step d), producing a Unicode value
+
+            String cMapName = null;
+
+            // get the encoding CMap
+            COSBase encoding = dict.getDictionaryObject(COSName.ENCODING);
+            if (encoding != null && encoding instanceof COSName)
+            {
+                cMapName = ((COSName)encoding).getName();
+            }
+
+            // try to find the corresponding Unicode (UC2) CMap
+            if (cMapName != null && !cMapName.equals("Identity-H") &&
+                                    !cMapName.equals("Identity-V"))
+            {
+                CMap cMap = CMapManager.getPredefinedCMap(cMapName);
+                if (cMap != null)
+                {
+                    String ucs2Name = cMap.getRegistry() + "-" + cMap.getOrdering() + "-UCS2";
+                    CMap ucs2CMap = CMapManager.getPredefinedCMap(ucs2Name);
+                    if (ucs2CMap != null)
+                    {
+                        cMapUCS2 = ucs2CMap;
+                    }
+                }
+            }
+        }
     }
 
     /**
@@ -71,6 +134,14 @@ public class PDType0Font extends PDFont
         return descendantFont;
     }
 
+    /**
+     * Returns the font's CMap.
+     */
+    public CMap getCMap()
+    {
+        return cMap;
+    }
+
     @Override
     public PDFontDescriptor getFontDescriptor()
     {
@@ -78,148 +149,85 @@ public class PDType0Font extends PDFont
     }
 
     @Override
-    public float getFontWidth(byte[] c, int offset, int length)
+    public float getHeight(int code) throws IOException
     {
-        return descendantFont.getFontWidth(c, offset, length);
+        return descendantFont.getHeight(code);
     }
 
     @Override
-    public float getFontHeight(byte[] c, int offset, int length)
+    public float getAverageFontWidth()
     {
-        return descendantFont.getFontHeight(c, offset, length);
+        return descendantFont.getAverageFontWidth();
     }
 
     @Override
-    public float getAverageFontWidth()
+    public float getWidth(int code) throws IOException
     {
-        return descendantFont.getAverageFontWidth();
+        return descendantFont.getWidth(code);
     }
 
     @Override
-    public float getFontWidth(int charCode)
+    protected float getWidthFromFont(int code) throws IOException
     {
-        return descendantFont.getFontWidth(charCode);
+        return descendantFont.getWidthFromFont(code);
     }
 
-    // todo: copied from PDSimpleFont and modified
-    // todo: for a Type 0 font this can only be "The name of a predefined CMap, or a stream containing a
-    // CMap that maps character codes to font numbers and CIDs", so I should adjust this accordingly
     @Override
-    protected void determineEncoding()
+    protected boolean isEmbedded()
     {
-        String cmapName = null;
-        COSName encodingName = null;
-        COSBase encoding = dict.getDictionaryObject(COSName.ENCODING);
-        Encoding fontEncoding = null;
-        if (encoding != null)
+        return descendantFont.isEmbedded();
+    }
+
+    @Override
+    public String toUnicode(int code)
+    {
+        // try to use a ToUnicode CMap
+        String unicode = super.toUnicode(code);
+        if (unicode != null)
         {
-            if (encoding instanceof COSName)
-            {
-                if (cmap == null)
-                {
-                    encodingName = (COSName) encoding;
-                    cmap = cmapObjects.get(encodingName.getName());
-                    if (cmap == null)
-                    {
-                        cmapName = encodingName.getName();
-                    }
-                }
-                // todo: disabled because a Type0 font cannot have a simple Encoding.
-                /*if (cmap == null && cmapName != null)
-                {
-                    try
-                    {
-                        fontEncoding = Encoding.getInstance(encodingName);
-                    }
-                    catch (IOException exception)
-                    {
-                        LOG.warn("Debug: Could not find encoding for " + encodingName);
-                    }
-                }*/
-            }
-            else if (encoding instanceof COSStream)
-            {
-                if (cmap == null)
-                {
-                    COSStream encodingStream = (COSStream) encoding;
-                    try
-                    {
-                        InputStream is = encodingStream.getUnfilteredStream();
-                        cmap = parseCmap(null, is);
-                        IOUtils.closeQuietly(is);
-                    }
-                    catch (IOException exception)
-                    {
-                        LOG.error("Error: Could not parse the embedded CMAP");
-                    }
-                }
-            }
-            else if (encoding instanceof COSDictionary)
-            {
-                try
-                {
-                    fontEncoding = new DictionaryEncoding((COSDictionary) encoding);
-                }
-                catch (IOException exception)
-                {
-                    LOG.error("Error: Could not create the DictionaryEncoding");
-                }
-            }
+            return unicode;
         }
-        this.fontEncoding = fontEncoding;
-        extractToUnicodeEncoding(); // todo: IMPORTANT!
 
-        if (cmap == null && cmapName != null)
+        // if the font is composite and uses a predefined cmap (excluding Identity-H/V) then
+        // or if its decendant font uses Adobe-GB1/CNS1/Japan1/Korea1
+        if (isCMapPredefined && cMapUCS2 != null)
         {
-            InputStream cmapStream = null;
-            try
-            {
-                // look for a predefined CMap with the given name
-                cmapStream = ResourceLoader.loadResource(resourceRootCMAP + cmapName);
-                if (cmapStream != null)
-                {
-                    cmap = parseCmap(resourceRootCMAP, cmapStream);
-                    if (cmap == null && encodingName == null)
-                    {
-                        LOG.error("Error: Could not parse predefined CMAP file for '" +
-                                cmapName + "'");
-                    }
-                }
-                else
-                {
-                    LOG.warn("Debug: '" + cmapName + "' isn't a predefined map, most likely it's" +
-                            "embedded in the pdf itself.");
-                }
-            }
-            catch (IOException exception)
-            {
-                LOG.error("Error: Could not find predefined CMAP file for '" + cmapName + "'");
-            }
-            finally
-            {
-                IOUtils.closeQuietly(cmapStream);
-            }
+            // e) Map the CID according to the CMap from step d), producing a Unicode value
+            return cMapUCS2.toUnicode(code);
+        }
+        else
+        {
+            // if no value has been produced, there is no way to obtain Unicode for the character.
+            return null;
         }
     }
 
     @Override
-    public String encode(byte[] c, int offset, int length) throws IOException
+    public int readCode(InputStream in) throws IOException
     {
-        String retval = null;
-        if (hasToUnicode())
-        {
-            retval = super.encode(c, offset, length);
-        }
+        return cMap.readCode(in);
+    }
 
-        if (retval == null)
-        {
-            int result = cmap.lookupCID(c, offset, length);
-            if (result != -1)
-            {
-                retval = descendantFont.cmapEncoding(result, 2, true, null);
-            }
-        }
-        return retval;
+    /**
+     * Returns the CID for the given character code. If not found then CID 0 is returned.
+     *
+     * @param code character code
+     * @return CID
+     */
+    public int codeToCID(int code)
+    {
+        return descendantFont.codeToCID(code);
+    }
+
+    /**
+     * Returns the GID for the given character code.
+     *
+     * @param code character code
+     * @return GID
+     */
+    public int codeToGID(int code) throws IOException
+    {
+        return descendantFont.codeToGID(code);
     }
 
     @Override
@@ -237,7 +245,11 @@ public class PDType0Font extends PDFont
     @Override
     public String toString()
     {
-        return getClass().getSimpleName() + "/" + getDescendantFont()
-              .getClass().getSimpleName() + " " + getBaseFont();
+        String descendant = null;
+        if (getDescendantFont() != null)
+        {
+            descendant = getDescendantFont().getClass().getSimpleName();
+        }
+        return getClass().getSimpleName() + "/" + descendant + " " + getBaseFont();
     }
 }

Modified: pdfbox/branches/no-awt/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDType1CFont.java
URL: http://svn.apache.org/viewvc/pdfbox/branches/no-awt/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDType1CFont.java?rev=1619956&r1=1619955&r2=1619956&view=diff
==============================================================================
--- pdfbox/branches/no-awt/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDType1CFont.java (original)
+++ pdfbox/branches/no-awt/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDType1CFont.java Sat Aug 23 02:34:35 2014
@@ -19,24 +19,19 @@ package org.apache.pdfbox.pdmodel.font;
 
 import java.awt.geom.GeneralPath;
 import java.io.IOException;
-import java.util.Arrays;
+import java.io.InputStream;
 import java.util.HashMap;
-import java.util.List;
 import java.util.Map;
 
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.fontbox.cff.CFFParser;
 import org.apache.fontbox.cff.CFFType1Font;
-import org.apache.fontbox.cff.Type1CharString;
 import org.apache.fontbox.ttf.Type1Equivalent;
-import org.apache.pdfbox.cos.COSArray;
 import org.apache.pdfbox.cos.COSDictionary;
-import org.apache.pdfbox.cos.COSFloat;
 import org.apache.pdfbox.encoding.Encoding;
 import org.apache.pdfbox.encoding.Type1Encoding;
 import org.apache.pdfbox.io.IOUtils;
-import org.apache.pdfbox.pdmodel.common.PDMatrix;
 import org.apache.pdfbox.pdmodel.common.PDRectangle;
 import org.apache.pdfbox.pdmodel.common.PDStream;
 
@@ -46,19 +41,17 @@ import org.apache.pdfbox.pdmodel.common.
  * @author Villu Ruusmann
  * @author John Hewson
  */
-public class PDType1CFont extends PDFont implements PDType1Equivalent
+public class PDType1CFont extends PDSimpleFont implements PDType1Equivalent
 {
     private static final Log LOG = LogFactory.getLog(PDType1CFont.class);
-    private static final byte[] SPACE_BYTES = { (byte) 32 };
 
-    private String fontName = null;
-    private Map<String, Float> glyphWidths = new HashMap<String, Float>();
     private Map<String, Float> glyphHeights = new HashMap<String, Float>();
     private Float avgWidth = null;
     private PDRectangle fontBBox = null;
 
-    private CFFType1Font cffFont; // embedded font
+    private final CFFType1Font cffFont; // embedded font
     private final Type1Equivalent type1Equivalent; // embedded or system font for rendering
+    private final boolean isEmbedded;
 
     /**
      * Constructor.
@@ -87,6 +80,7 @@ public class PDType1CFont extends PDFont
         if (cffFont != null)
         {
             type1Equivalent = cffFont;
+            isEmbedded = true;
         }
         else
         {
@@ -100,12 +94,9 @@ public class PDType1CFont extends PDFont
                 LOG.warn("Using fallback font for " + getBaseFont());
                 type1Equivalent = ExternalFonts.getFallbackFont();
             }
+            isEmbedded = false;
         }
-
-        // cache the font name
-        fontName = cffFont.getName();
-
-        determineEncoding();
+        readEncoding();
     }
 
     /**
@@ -139,7 +130,7 @@ public class PDType1CFont extends PDFont
     @Override
     public String codeToName(int code)
     {
-        String name = getFontEncoding().getName(code);
+        String name = getEncoding().getName(code);
         if (name != null)
         {
             return name;
@@ -155,93 +146,47 @@ public class PDType1CFont extends PDFont
     {
         return ".notdef".equals(name);
     }
-
     @Override
-    protected void determineEncoding()
+    protected Encoding readEncodingFromFont() throws IOException
     {
-        super.determineEncoding();
-        Encoding fontEncoding = getFontEncoding();
-        if (fontEncoding == null)
-        {
-            // extract from CFF/substitute
-            this.fontEncoding = new Type1Encoding(type1Equivalent.getEncoding());
-        }
+        return Type1Encoding.fromFontBox(type1Equivalent.getEncoding());
     }
 
     @Override
-    public String encode(byte[] bytes, int offset, int length) throws IOException
+    public int readCode(InputStream in) throws IOException
     {
-        String character = getUnicode(bytes, offset, length);
-        if (character == null)
-        {
-            // todo: message is for debugging, remove in long term
-            LOG.warn("No character for code " + (bytes[offset] & 0xff) + " in " + fontName);
-            return null;
-        }
-        return character;
+        return in.read();
     }
 
-    /*@Override
-    public int encodeToCID(byte[] bytes, int offset, int length)
-    {
-        if (length > 2)
-        {
-            return -1;
-        }
-        int code = bytes[offset] & 0xff;
-        if (length == 2)
-        {
-            code = code * 256 + bytes[offset + 1] & 0xff;
-        }
-        return code;
-    }*/
-
-    // helper
-    private String getUnicode(byte[] bytes, int offset, int length) throws IOException
+    @Override
+    protected float getWidthFromFont(int code) throws IOException
     {
-        int code = getCodeFromArray(bytes, offset, length);
-        String character = getFontEncoding().getCharacter(code);
-        if (character == null)
-        {
-            // todo: message is for debugging, remove in long term
-            LOG.warn("Could not get character " + code);
-        }
-        return character;
+        String name = codeToName(code);
+        return cffFont.getType1CharString(name).getWidth();
     }
 
     @Override
-    public float getFontWidth(byte[] bytes, int offset, int length)
+    protected boolean isEmbedded()
     {
-        int code = bytes[offset] & 0xff;
-        String name = codeToName(code);
-
-        Float width = glyphWidths.get(name);
-        if (width == null)
-        {
-            width = getCharacterWidth(name);
-            glyphWidths.put(name, width);
-        }
-
-        return width;
+        return isEmbedded;
     }
 
     @Override
-    public float getFontHeight(byte[] bytes, int offset, int length)
+    public float getHeight(int code) throws IOException
     {
-        int code = bytes[offset] & 0xff;
         String name = codeToName(code);
 
         if (isNotDef(name))
         {
             // todo: message is for debugging, remove in long term
-            LOG.warn("No name for code " + (bytes[offset] & 0xff) + " in " + fontName);
+            LOG.warn("No name for code " + code + " in " + cffFont.getName());
             return 0;
         }
 
         float height = 0;
         if (!glyphHeights.containsKey(name))
         {
-            height = getCharacterHeight(name);
+            height = (float)cffFont.getType1CharString(name).getBounds().getHeight(); // todo: cffFont could be null
             glyphHeights.put(name, height);
         }
         return height;
@@ -254,14 +199,13 @@ public class PDType1CFont extends PDFont
         for (int i = 0; i < string.length(); i++)
         {
             String character = string.substring(i, i + 1);
-            String name = getFontEncoding().getNameForCharacter(character.charAt(0));
+            String name = getEncoding().getNameForCharacter(character.charAt(0));
             if (isNotDef(name))
             {
                 // todo: message is for debugging, remove in long term
                 LOG.warn("No code for character " + character);
-                return 0;
             }
-            width += getCharacterWidth(name);
+            width += cffFont.getType1CharString(name).getWidth();
         }
         return width;
     }
@@ -276,30 +220,6 @@ public class PDType1CFont extends PDFont
         return avgWidth;
     }
 
-    @Override
-    public PDMatrix getFontMatrix()
-    {
-        if (fontMatrix == null)
-        {
-            List<Number> numbers = cffFont.getFontMatrix(); // todo: cffFont could be null
-            if (numbers != null && numbers.size() == 6)
-            {
-                COSArray array = new COSArray();
-                for (Number number : numbers)
-                {
-                    array.add(new COSFloat(number.floatValue()));
-                }
-                fontMatrix = new PDMatrix(array);
-            }
-            else
-            {
-                // todo: the font should always have a Matrix, so why fallback?
-                super.getFontMatrix();
-            }
-        }
-        return fontMatrix;
-    }
-
     /**
      * Returns the embedded Type 1-equivalent CFF font.
      * 
@@ -311,45 +231,6 @@ public class PDType1CFont extends PDFont
     }
 
     // todo: this is a replacement for FontMetrics method
-    private float getCharacterWidth(String name)
-    {
-        try
-        {
-            // todo: for debugging we check for .notdef
-            Type1CharString notdef = cffFont.getType1CharString(".notdef"); // todo: cffFont could be null
-            Type1CharString charstring = cffFont.getType1CharString(name);
-            if (charstring == notdef)
-            {
-                // todo: message is for debugging, remove in long term
-                LOG.warn("No width for character " + name + ", using .notdef");
-            }
-            return charstring.getWidth();
-        }
-        catch (IOException e)
-        {
-            // todo: HACK
-            LOG.error(e);
-        }
-        return 0;
-    }
-
-    // todo: this is a replacement for FontMetrics method
-    // todo: but in FontMetrics this method actually gets the advance-y for vertical mode
-    private float getCharacterHeight(String name)
-    {
-        try
-        {
-            return (float)cffFont.getType1CharString(name).getBounds().getHeight(); // todo: cffFont could be null
-        }
-        catch (IOException e)
-        {
-            // todo: HACK
-            LOG.error(e);
-            return 0;
-        }
-    }
-
-    // todo: this is a replacement for FontMetrics method
     private float getAverageCharacterWidth()
     {
         // todo: not implemented, highly suspect
@@ -360,17 +241,11 @@ public class PDType1CFont extends PDFont
     public void clear()
     {
         super.clear();
-        cffFont = null;
         fontBBox = null;
         if (glyphHeights != null)
         {
             glyphHeights.clear();
             glyphHeights = null;
         }
-        if (glyphWidths != null)
-        {
-            glyphWidths.clear();
-            glyphWidths = null;
-        }
     }
 }

Modified: pdfbox/branches/no-awt/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDType1Font.java
URL: http://svn.apache.org/viewvc/pdfbox/branches/no-awt/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDType1Font.java?rev=1619956&r1=1619955&r2=1619956&view=diff
==============================================================================
--- pdfbox/branches/no-awt/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDType1Font.java (original)
+++ pdfbox/branches/no-awt/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDType1Font.java Sat Aug 23 02:34:35 2014
@@ -21,7 +21,6 @@ import java.io.IOException;
 import java.io.InputStream;
 import java.util.Arrays;
 import java.util.HashMap;
-import java.util.List;
 import java.util.Map;
 
 import org.apache.commons.logging.Log;
@@ -30,18 +29,13 @@ import org.apache.fontbox.afm.AFMParser;
 import org.apache.fontbox.afm.FontMetrics;
 import org.apache.fontbox.ttf.Type1Equivalent;
 import org.apache.fontbox.type1.Type1Font;
-import org.apache.pdfbox.cos.COSArray;
 import org.apache.pdfbox.cos.COSDictionary;
-import org.apache.pdfbox.cos.COSFloat;
 import org.apache.pdfbox.cos.COSName;
 import org.apache.pdfbox.cos.COSStream;
 import org.apache.pdfbox.encoding.Encoding;
-import org.apache.pdfbox.encoding.StandardEncoding;
 import org.apache.pdfbox.encoding.Type1Encoding;
 import org.apache.pdfbox.encoding.WinAnsiEncoding;
 import org.apache.pdfbox.pdmodel.PDDocument;
-import org.apache.pdfbox.pdmodel.common.PDMatrix;
-import org.apache.pdfbox.pdmodel.common.PDRectangle;
 import org.apache.pdfbox.pdmodel.common.PDStream;
 import org.apache.pdfbox.util.ResourceLoader;
 
@@ -50,7 +44,7 @@ import org.apache.pdfbox.util.ResourceLo
  *
  * @author Ben Litchfield
  */
-public class PDType1Font extends PDFont implements PDType1Equivalent
+public class PDType1Font extends PDSimpleFont implements PDType1Equivalent
 {
     private static final Log LOG = LogFactory.getLog(PDType1Font.class);
 
@@ -123,6 +117,7 @@ public class PDType1Font extends PDFont 
     private final FontMetrics afm; // for standard 14 fonts
     private final Type1Font type1font; // embedded font
     private final Type1Equivalent type1Equivalent; // embedded or system font for rendering
+    private final boolean isEmbedded;
 
     /**
      * Creates a Type 1 standard 14 font for embedding.
@@ -145,6 +140,7 @@ public class PDType1Font extends PDFont 
         // todo: could load the PFB font here if we wanted to support Standard 14 embedding
         type1font = null;
         type1Equivalent = null;
+        isEmbedded = false;
     }
 
     /**
@@ -162,6 +158,7 @@ public class PDType1Font extends PDFont 
         afm = null; // only used for standard 14 fonts, not AFM fonts as we already have the PFB
         type1font = embedder.getType1Font();
         type1Equivalent = embedder.getType1Font();
+        isEmbedded = true;
     }
 
     /**
@@ -206,6 +203,7 @@ public class PDType1Font extends PDFont 
                 }
             }
         }
+        isEmbedded = t1 != null;
 
         // try to find a suitable .pfb font to substitute
         if (t1 == null)
@@ -237,8 +235,7 @@ public class PDType1Font extends PDFont 
         // todo: for standard 14 only. todo: move this to a subclass "PDStandardType1Font" ?
         afm = getAFMFromBaseFont(getBaseFont()); // may be null (it usually is)
 
-        determineEncoding();
-        getEncodingFromFont();
+        readEncoding();
     }
 
     // todo: move this to a subclass?
@@ -255,72 +252,6 @@ public class PDType1Font extends PDFont 
         return null;
     }
 
-    /**
-     * Extracts the encoding from the font, if there is no Encoding given in the Font dictionary.
-     */
-    private void getEncodingFromFont()
-    {
-        if (getFontEncoding() == null)
-        {
-            // todo: this doesn't work properly for TTFs because they fake StandardEncoding currently
-            //       it seems that they should look for a MacRoman cmap instead and claim to use that
-            org.apache.fontbox.encoding.Encoding encoding = type1Equivalent.getEncoding();
-            if (encoding instanceof org.apache.fontbox.encoding.StandardEncoding)
-            {
-                this.fontEncoding = StandardEncoding.INSTANCE;
-            }
-            else if (encoding instanceof org.apache.fontbox.encoding.CustomEncoding)
-            {
-                Map<Integer,String> codeToName = encoding.getCodeToNameMap();
-                Type1Encoding type1Encoding = new Type1Encoding(codeToName.size());
-                for (Integer code : codeToName.keySet())
-                {
-                    type1Encoding.addCharacterEncoding(code, codeToName.get(code));
-                }
-                this.fontEncoding = type1Encoding;
-            }
-        }
-    }
-
-    @Override
-    public PDMatrix getFontMatrix()
-    {
-        if (fontMatrix == null)
-        {
-            // todo: this is an experimental implementation: just use the standard PostScript matrix
-            // todo: don't all PostScript fonts use a 1000upem matrix anyway?
-            if (type1font == null)
-            {
-                COSArray a = new COSArray();
-                a.add(new COSFloat(0.001f));
-                a.add(new COSFloat(0));
-                a.add(new COSFloat(0));
-                a.add(new COSFloat(0.001f));
-                a.add(new COSFloat(0));
-                a.add(new COSFloat(0));
-                fontMatrix = new PDMatrix(a);
-                return fontMatrix;
-            }
-
-            List<Number> numbers = type1font.getFontMatrix();
-            if (numbers != null && numbers.size() == 6)
-            {
-                COSArray array = new COSArray();
-                for (Number number : numbers)
-                {
-                    array.add(new COSFloat(number.floatValue()));
-                }
-                fontMatrix = new PDMatrix(array);
-            }
-            else
-            {
-                // todo: the font should always have a Matrix, so why fallback?
-                super.getFontMatrix();
-            }
-        }
-        return fontMatrix;
-    }
-
     @Override
     public PDFontDescriptor getFontDescriptor()
     {
@@ -328,48 +259,45 @@ public class PDType1Font extends PDFont 
         {
             if (afm != null)
             {
-                fontDescriptor = new PDFontDescriptorAFM(afm); // todo: wait, isn't this for embedding?
+                // this is for embedding fonts into PDFs, rather than for reading, though it works.
+                fontDescriptor = new PDFontDescriptorAFM(afm);
             }
-            // todo: else: then what? (no FD means no embedded font, plus we have no AFM: so fallback)
         }
         return fontDescriptor;
     }
 
     @Override
-    public float getFontHeight(byte[] c, int offset, int length)
+    public float getHeight(int code) throws IOException
     {
         if (afm != null)
         {
-            int code = getCodeFromArray(c, offset, length);
-            Encoding encoding = getFontEncoding();
-            String characterName = encoding.getName(code);
-            return afm.getCharacterHeight(characterName);
+            String characterName = getEncoding().getName(code);
+            return afm.getCharacterHeight(characterName); // todo: isn't this the y-advance, not the height?
         }
-        return super.getFontHeight(c, offset, length);
+        return super.getHeight(code);
     }
 
     @Override
-    public float getFontWidth(int charCode) throws IOException
+    protected float getWidthFromFont(int code) throws IOException
     {
-        float width = super.getFontWidth(charCode);
-        if (width <= 0)
+        String name = codeToName(code);
+        if (afm != null)
         {
-            // get width from AFM
-            float retval = 0;
-            if (afm != null)
-            {
-                String characterName = fontEncoding.getName(charCode);
-                retval = afm.getCharacterWidth(characterName);
-            }
-            return retval;
+            return afm.getCharacterWidth(name);
         }
         else
         {
-            return width;
+            return type1Equivalent.getWidth(name);
         }
     }
 
     @Override
+    protected boolean isEmbedded()
+    {
+        return isEmbedded;
+    }
+
+    @Override
     public float getAverageFontWidth()
     {
         if (afm != null)
@@ -383,18 +311,23 @@ public class PDType1Font extends PDFont 
     }
 
     @Override
-    protected void determineEncoding()
+    public int readCode(InputStream in) throws IOException
+    {
+        return in.read();
+    }
+
+    @Override
+    protected Encoding readEncodingFromFont() throws IOException
     {
-        super.determineEncoding();
-        Encoding fontEncoding = getFontEncoding();
-        if (fontEncoding == null)
+        if (afm != null)
         {
-            if (afm != null)
-            {
-                fontEncoding = new Type1Encoding(afm);
-            }
-            // todo: get encoding from font if still null
-            this.fontEncoding = fontEncoding;
+            // read from AFM
+            return new Type1Encoding(afm);
+        }
+        else
+        {
+            // extract from Type1 font/substitute
+            return Type1Encoding.fromFontBox(type1Equivalent.getEncoding());
         }
     }
 
@@ -431,7 +364,7 @@ public class PDType1Font extends PDFont 
     @Override
     public String codeToName(int code)
     {
-        String name = getFontEncoding().getName(code);
+        String name = getEncoding().getName(code);
         if (name != null)
         {
             return name;