You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pdfbox.apache.org by ja...@apache.org on 2014/08/23 04:34:37 UTC
svn commit: r1619956 [2/3] - in /pdfbox/branches/no-awt:
examples/src/main/java/org/apache/pdfbox/examples/pdmodel/
fontbox/src/main/java/org/apache/fontbox/cff/
fontbox/src/main/java/org/apache/fontbox/cmap/
fontbox/src/main/java/org/apache/fontbox/tt...
Modified: pdfbox/branches/no-awt/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDCIDFontType2.java
URL: http://svn.apache.org/viewvc/pdfbox/branches/no-awt/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDCIDFontType2.java?rev=1619956&r1=1619955&r2=1619956&view=diff
==============================================================================
--- pdfbox/branches/no-awt/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDCIDFontType2.java (original)
+++ pdfbox/branches/no-awt/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDCIDFontType2.java Sat Aug 23 02:34:35 2014
@@ -21,12 +21,16 @@ import java.io.InputStream;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
+import org.apache.fontbox.ttf.CMAPEncodingEntry;
+import org.apache.fontbox.ttf.CMAPTable;
import org.apache.fontbox.ttf.TTFParser;
import org.apache.fontbox.ttf.TrueTypeFont;
import org.apache.pdfbox.cos.COSBase;
import org.apache.pdfbox.cos.COSDictionary;
import org.apache.pdfbox.cos.COSName;
import org.apache.pdfbox.cos.COSStream;
+import org.apache.pdfbox.encoding.Encoding;
+import org.apache.pdfbox.encoding.StandardEncoding;
import org.apache.pdfbox.io.IOUtils;
import org.apache.pdfbox.pdmodel.common.PDStream;
@@ -40,9 +44,8 @@ public class PDCIDFontType2 extends PDCI
private static final Log LOG = LogFactory.getLog(PDCIDFontType2.class);
private final TrueTypeFont ttf;
- private Boolean hasCIDToGIDMap = null;
- private Boolean hasIdentityCIDToGIDMap = null;
- private int[] cid2gid = null;
+ private final int[] cid2gid;
+ private final boolean isEmbedded;
/**
* Constructor.
@@ -61,6 +64,7 @@ public class PDCIDFontType2 extends PDCI
// embedded
TTFParser ttfParser = new TTFParser(true);
ttf = ttfParser.parseTTF(ff2Stream.createInputStream());
+ isEmbedded = true;
}
else
{
@@ -76,132 +80,211 @@ public class PDCIDFontType2 extends PDCI
LOG.warn("Using fallback font for " + getBaseFont());
ttf = ExternalFonts.getFallbackFont();
}
+ isEmbedded = false;
}
+
+ cid2gid = readCIDToGIDMap();
}
- /**
- * Indicates if this font has a CIDToGIDMap.
- *
- * @return returns true if the font has a CIDToGIDMap.
- */
- public boolean hasCIDToGIDMap()
+ private int[] readCIDToGIDMap()
{
- if (hasCIDToGIDMap == null)
+ int[] cid2gid = null;
+ COSBase map = dict.getDictionaryObject(COSName.CID_TO_GID_MAP);
+ if (map instanceof COSStream)
{
- COSBase map = dict.getDictionaryObject(COSName.CID_TO_GID_MAP);
- if (map != null && map instanceof COSStream)
+ COSStream stream = (COSStream) map;
+ try
{
- hasCIDToGIDMap = Boolean.TRUE;
+ InputStream is = stream.getUnfilteredStream();
+ byte[] mapAsBytes = IOUtils.toByteArray(is);
+ IOUtils.closeQuietly(is);
+ int numberOfInts = mapAsBytes.length / 2;
+ cid2gid = new int[numberOfInts];
+ int offset = 0;
+ for (int index = 0; index < numberOfInts; index++)
+ {
+ int gid = (mapAsBytes[offset] & 0xff) << 8 | mapAsBytes[offset + 1] & 0xff;
+ cid2gid[index] = gid;
+ offset += 2;
+ }
}
- else
+ catch (IOException exception)
{
- hasCIDToGIDMap = Boolean.FALSE;
+ LOG.error("Can't read the CIDToGIDMap", exception);
}
}
- return hasCIDToGIDMap;
+ return cid2gid;
+ }
+
+ @Override
+ public int codeToCID(int code)
+ {
+ // TTF has no native notion of a CID
+ return code;
}
/**
- * Indicates if this font has an identity CIDToGIDMap.
- *
- * @return returns true if the font has an identity CIDToGIDMap.
+ * Returns the GID for the given character code.
+ *
+ * @param code character code
+ * @return GID
*/
- public boolean hasIdentityCIDToGIDMap()
+ public int codeToGID(int code) throws IOException
{
- if (hasIdentityCIDToGIDMap == null)
- {
- COSBase map = dict.getDictionaryObject(COSName.CID_TO_GID_MAP);
- if (map != null && map instanceof COSName)
- {
- hasIdentityCIDToGIDMap = Boolean.TRUE;
- }
- else
- {
- hasIdentityCIDToGIDMap = Boolean.FALSE;
- }
- }
- return hasIdentityCIDToGIDMap;
+ int cid = codeToCID(code);
+ return cidToGID(cid);
}
/**
- * Maps the given CID to the correspondent GID.
+ * Returns the GID for the given CID.
*
- * @param cid the given CID
- * @return the mapped GID, or -1 if something went wrong.
+ * @param cid the given CID (for TTF this is the same as the character code)
+ * @return the mapped GID
*/
- public int mapCIDToGID(int cid)
+ public int cidToGID(int cid) throws IOException
{
- if (hasCIDToGIDMap())
+ if (!isEmbedded)
{
- if (cid2gid == null)
+ // The conforming reader shall select glyphs by translating characters from the
+ // encoding specified by the predefined CMap to one of the encodings in the TrueType
+ // font's 'cmap' table. The means by which this is accomplished are implementation-
+ // dependent.
+
+ CMAPEncodingEntry cmap = getUnicodeCmap(ttf.getCMAP());
+ String unicode;
+
+ // non-symbolic behaviour for Type2 TTFs isn't well documented, test with PDFBOX-1422
+ if (!parent.isSymbolic()) // todo: but this descendant font has its own flags?
+ {
+ // if the font descriptor?s Nonsymbolic flag is set, the conforming reader shall
+ // create a table that maps from character codes to glyph names
+ String name = null;
+
+ // If the Encoding entry is one of the names MacRomanEncoding, WinAnsiEncoding,
+ // or a dictionary, then the table is initialized as normal
+ // todo: Encoding is not allowed though, right? So this never happens?
+ /*if (getFontEncoding() != null)
+ {
+ name = getFontEncoding().getName(cid);
+ }*/
+
+ // Any undefined entries in the table shall be filled using StandardEncoding
+ if (name == null)
+ {
+ name = StandardEncoding.INSTANCE.getName(cid); // code = CID for TTF
+ }
+
+ // map to a Unicode value using the Adobe Glyph List
+ unicode = Encoding.getCharacterForName(name);
+ }
+ else
+ {
+ unicode = parent.toUnicode(cid); // code = CID for TTF
+ }
+
+ if (unicode == null)
{
- readCIDToGIDMapping();
+ return 0;
}
- if (cid2gid != null && cid < cid2gid.length)
+ else if (unicode.length() > 1)
{
- return cid2gid[cid];
+ LOG.warn("trying to map a multi-byte character using 'cmap', result will be poor");
}
- return -1;
+ return cmap.getGlyphId(unicode.codePointAt(0));
}
else
{
- // identity is the default value
- return cid;
- }
- }
+ // If the TrueType font program is embedded, the Type 2 CIDFont dictionary shall contain
+ // a CIDToGIDMap entry that maps CIDs to the glyph indices for the appropriate glyph
+ // descriptions in that font program.
- private void readCIDToGIDMapping()
- {
- COSBase map = dict.getDictionaryObject(COSName.CID_TO_GID_MAP);
- if (map instanceof COSStream)
- {
- COSStream stream = (COSStream) map;
- try
+ if (cid2gid != null)
{
- InputStream is = stream.getUnfilteredStream();
- byte[] mapAsBytes = IOUtils.toByteArray(is);
- IOUtils.closeQuietly(is);
- int numberOfInts = mapAsBytes.length / 2;
- cid2gid = new int[numberOfInts];
- int offset = 0;
- for (int index = 0; index < numberOfInts; index++)
+ // use CIDToGIDMap
+ if (cid < cid2gid.length)
{
- cid2gid[index] = getCodeFromArray(mapAsBytes, offset, 2);
- offset += 2;
+ return cid2gid[cid];
+ }
+ else
+ {
+ return 0;
}
}
- catch (IOException exception)
+ else
{
- LOG.error("Can't read the CIDToGIDMap", exception);
+ // "Identity" is the default CIDToGIDMap
+ return cid;
}
}
}
/**
- * Returns the embedded or substituted TrueType font.
+ * Returns the best Unicode from the font (the most general). The PDF spec says that "The means
+ * by which this is accomplished are implementation-dependent."
*/
- public TrueTypeFont getTrueTypeFont()
+ private CMAPEncodingEntry getUnicodeCmap(CMAPTable cmapTable)
{
- return ttf;
+ CMAPEncodingEntry cmap = cmapTable.getSubtable(CMAPTable.PLATFORM_UNICODE,
+ CMAPTable.ENCODING_UNICODE_2_0_FULL);
+ if (cmap == null)
+ {
+ cmap = cmapTable.getSubtable(CMAPTable.PLATFORM_UNICODE,
+ CMAPTable.ENCODING_UNICODE_2_0_BMP);
+ }
+ if (cmap == null)
+ {
+ cmap = cmapTable.getSubtable(CMAPTable.PLATFORM_WINDOWS,
+ CMAPTable.ENCODING_WIN_UNICODE);
+ }
+ if (cmap == null)
+ {
+ // Microsoft's "Recommendations for OpenType Fonts" says that "Symbol" encoding
+ // actually means "Unicode, non-standard character set"
+ cmap = cmapTable.getSubtable(CMAPTable.PLATFORM_WINDOWS,
+ CMAPTable.ENCODING_WIN_SYMBOL);
+ }
+ if (cmap == null)
+ {
+ // fallback to the first cmap (may not ne Unicode, so may produce poor results)
+ LOG.warn("Used fallback cmap for font " + getBaseFont());
+ cmap = cmapTable.getCmaps()[0];
+ }
+ return cmap;
}
@Override
- public float getFontWidth(byte[] c, int offset, int length)
+ public float getHeight(int code) throws IOException
{
- // a suitable mapping is needed to address the correct width value
- int code = getCodeFromArray(c, offset, length);
- if (hasIdentityCIDToGIDMap() || hasCIDToGIDMap())
- {
- return getFontWidth(code);
- }
- else if (getParent().getCMap() != null)
+ // todo: really we want the BBox, (for text extraction:)
+ return (ttf.getHorizontalHeader().getAscender() + -ttf.getHorizontalHeader().getDescender())
+ / ttf.getUnitsPerEm(); // todo: shouldn't this be the yMax/yMin?
+ }
+
+ @Override
+ protected float getWidthFromFont(int code) throws IOException
+ {
+ int cid = codeToCID(code);
+ int gid = cidToGID(cid);
+ int width = ttf.getAdvanceWidth(gid);
+ int unitsPerEM = ttf.getUnitsPerEm();
+ if (unitsPerEM != 1000)
{
- String mappedString = getParent().getCMap().lookup(code, length);
- if (mappedString != null)
- {
- return getFontWidth(mappedString.codePointAt(0));
- }
+ width *= 1000f / unitsPerEM;
}
- return super.getFontWidth(c, offset, length);
+ return width;
+ }
+
+ @Override
+ protected boolean isEmbedded()
+ {
+ return isEmbedded;
+ }
+
+ /**
+ * Returns the embedded or substituted TrueType font.
+ */
+ public TrueTypeFont getTrueTypeFont()
+ {
+ return ttf;
}
}
Modified: pdfbox/branches/no-awt/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDFont.java
URL: http://svn.apache.org/viewvc/pdfbox/branches/no-awt/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDFont.java?rev=1619956&r1=1619955&r2=1619956&view=diff
==============================================================================
--- pdfbox/branches/no-awt/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDFont.java (original)
+++ pdfbox/branches/no-awt/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDFont.java Sat Aug 23 02:34:35 2014
@@ -18,16 +18,12 @@ package org.apache.pdfbox.pdmodel.font;
import java.io.IOException;
import java.io.InputStream;
-import java.io.UnsupportedEncodingException;
import java.util.Collections;
-import java.util.HashMap;
import java.util.List;
-import java.util.Map;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.fontbox.cmap.CMap;
-import org.apache.fontbox.cmap.CMapParser;
import org.apache.pdfbox.cos.COSArray;
import org.apache.pdfbox.cos.COSBase;
import org.apache.pdfbox.cos.COSDictionary;
@@ -36,16 +32,11 @@ import org.apache.pdfbox.cos.COSInteger;
import org.apache.pdfbox.cos.COSName;
import org.apache.pdfbox.cos.COSNumber;
import org.apache.pdfbox.cos.COSStream;
-import org.apache.pdfbox.encoding.DictionaryEncoding;
-import org.apache.pdfbox.encoding.Encoding;
-import org.apache.pdfbox.encoding.MacRomanEncoding;
-import org.apache.pdfbox.encoding.WinAnsiEncoding;
import org.apache.pdfbox.io.IOUtils;
import org.apache.pdfbox.pdmodel.common.COSArrayList;
import org.apache.pdfbox.pdmodel.common.COSObjectable;
import org.apache.pdfbox.pdmodel.common.PDMatrix;
import org.apache.pdfbox.pdmodel.common.PDRectangle;
-import org.apache.pdfbox.util.ResourceLoader;
/**
* This is the base class for all PDF fonts.
@@ -55,130 +46,59 @@ import org.apache.pdfbox.util.ResourceLo
public abstract class PDFont implements COSObjectable
{
private static final Log LOG = LogFactory.getLog(PDFont.class);
- private static final byte[] SPACE_BYTES = { (byte) 32 }; // formerly in PDSimpleFont
- protected static final String resourceRootCMAP = "org/apache/pdfbox/resources/cmap/";
- protected static Map<String, CMap> cmapObjects =
- Collections.synchronizedMap(new HashMap<String, CMap>()); // todo: why synchronized?
-
- private static final String[] SINGLE_CHAR_STRING = new String[256];
- private static final String[][] DOUBLE_CHAR_STRING = new String[256][256];
+ private static final PDMatrix FONT_MATRIX_1000;
static
{
- for (int i = 0; i < 256; i++)
- {
- try
- {
- SINGLE_CHAR_STRING[i] = new String(new byte[] { (byte) i }, "ISO-8859-1");
- }
- catch (UnsupportedEncodingException e)
- {
- // Nothing should happen here
- LOG.error(e,e);
- }
- for (int j = 0; j < 256; j++)
- {
- try
- {
- DOUBLE_CHAR_STRING[i][j] = new String(new byte[] { (byte) i, (byte) j },
- "UTF-16BE");
- }
- catch (UnsupportedEncodingException e)
- {
- // Nothing should happen here
- LOG.error(e, e);
- }
- }
- }
- }
-
- private static String getStringFromArray(byte[] c, int offset, int length) throws IOException
- {
- String retval;
- if (length == 1)
- {
- retval = SINGLE_CHAR_STRING[(c[offset] + 256) % 256];
- }
- else if (length == 2)
- {
- retval = DOUBLE_CHAR_STRING[(c[offset] + 256) % 256][(c[offset + 1] + 256) % 256];
- }
- else
- {
- throw new IOException("Error:Unknown character length:" + length);
- }
- return retval;
- }
-
- /**
- * The Font dictionary.
- */
- protected COSDictionary dict;
-
- /**
- * The font matrix.
- */
- protected PDMatrix fontMatrix = null;
+ COSArray array = new COSArray();
+ array.add(new COSFloat(0.001f));
+ array.add(COSInteger.ZERO);
+ array.add(COSInteger.ZERO);
+ array.add(new COSFloat(0.001f));
+ array.add(COSInteger.ZERO);
+ array.add(COSInteger.ZERO);
+ FONT_MATRIX_1000 = new PDMatrix(array);
+ }
+
+ protected final COSDictionary dict;
+ private final CMap toUnicodeCMap;
+ protected PDFontDescriptor fontDescriptor;
- // CMap / Encoding
- protected CMap cmap = null; // only used when this is a Type0 font with a CMap
- protected Encoding fontEncoding = null; // only used when this font has an encoding
-
- // the CMap holding the ToUnicode mapping
- private CMap toUnicodeCmap = null;
- private boolean hasToUnicode = false;
-
- private List<Integer> widths = null;
-
- protected PDFontDescriptor fontDescriptor = null;
- private boolean widthsAreMissing = false;
-
- // formerly in PDSimpleFont
- private final HashMap<Integer, Float> fontSizes = new HashMap<Integer, Float>(128);
- private float avgFontWidth = 0.0f;
- private float avgFontHeight = 0.0f;
+ private List<Integer> widths;
+ private float avgFontWidth;
private float fontWidthOfSpace = -1f;
/**
- * This will clear AFM resources that are stored statically. This is usually not a problem
- * unless you want to reclaim resources for a long running process.
- *
- * SPECIAL NOTE: The font calculations are currently in COSObject, which is where they will
- * reside until PDFont is mature enough to take them over. PDFont is the appropriate place for
- * them and not in COSObject but we need font calculations for text extraction. THIS METHOD WILL
- * BE MOVED OR REMOVED TO ANOTHER LOCATION IN A FUTURE VERSION OF PDFBOX.
- *
- * @deprecated This method will be removed in a future version of PDFBox.
- */
- @Deprecated
- public static void clearResources()
- {
- cmapObjects.clear();
- }
-
- /**
* Constructor.
*/
protected PDFont()
{
dict = new COSDictionary();
dict.setItem(COSName.TYPE, COSName.FONT);
+ toUnicodeCMap = null;
}
/**
* Constructor.
- *
- * @param fontDictionary The font dictionary according to the PDF specification.
+ *
+ * @param fontDictionary Font dictionary.
*/
- protected PDFont(COSDictionary fontDictionary)
+ protected PDFont(COSDictionary fontDictionary) throws IOException
{
dict = fontDictionary;
+ COSBase toUnicode = dict.getDictionaryObject(COSName.TO_UNICODE);
+ if (toUnicode != null)
+ {
+ toUnicodeCMap = readCMap(toUnicode);
+ }
+ else
+ {
+ toUnicodeCMap = null;
+ }
}
/**
- * This will get the font descriptor for this font.
- *
- * @return The font descriptor for this font.
+ * Returns the font descriptor, may be null.
*/
public PDFontDescriptor getFontDescriptor()
{
@@ -189,133 +109,112 @@ public abstract class PDFont implements
{
fontDescriptor = new PDFontDescriptorDictionary(fd);
}
- // todo: NOTE: null return value here if fine, because we override this method
}
return fontDescriptor;
}
/**
- * Determines the encoding for the font. This method as to be overwritten, as there are
- * different possibilities to define a mapping.
+ * Reads a CMap given a COS Stream or Name. May return null if a predefined CMap does not exist.
+ *
+ * @param base COSName or COSStream
*/
- protected void determineEncoding()
+ protected final CMap readCMap(COSBase base) throws IOException
{
- COSBase encoding = dict.getDictionaryObject(COSName.ENCODING);
- Encoding fontEncoding = null;
- if (encoding != null)
+ if (base instanceof COSName)
+ {
+ // predefined CMap
+ String name = ((COSName)base).getName();
+ return CMapManager.getPredefinedCMap(name);
+ }
+ else if (base instanceof COSStream)
{
- if (encoding instanceof COSName)
+ // embedded CMap
+ InputStream input = null;
+ try
{
- COSName encodingName = (COSName)encoding;
- try
- {
- fontEncoding = Encoding.getInstance(encodingName);
- }
- catch (IOException exception)
- {
- LOG.warn("Debug: Could not find encoding for " + encodingName);
- }
+ input = ((COSStream)base).getUnfilteredStream();
+ return CMapManager.parseCMap(input);
}
- else if (encoding instanceof COSDictionary)
+ finally
{
- try
- {
- fontEncoding = new DictionaryEncoding((COSDictionary) encoding);
- }
- catch (IOException exception)
- {
- LOG.error("Error: Could not create the DictionaryEncoding");
- }
+ IOUtils.closeQuietly(input);
}
}
- this.fontEncoding = fontEncoding;
- extractToUnicodeEncoding();
+ else
+ {
+ throw new IOException("Expected Name or Stream");
+ }
}
- protected final void extractToUnicodeEncoding()
+ @Override
+ public COSDictionary getCOSObject()
{
- COSName encodingName;
- String cmapName;
- COSBase toUnicode = dict.getDictionaryObject(COSName.TO_UNICODE);
- if (toUnicode != null)
+ return dict;
+ }
+
+ /**
+ * Returns the width of the given character.
+ *
+ * @param code character code
+ */
+ public float getWidth(int code) throws IOException
+ {
+ if (!isEmbedded())
{
- hasToUnicode = true;
- if (toUnicode instanceof COSStream)
+ // "If the font program is not embedded, Acrobat overrides the widths in the font
+ // program on the conforming reader?s system with the widths specified in the font
+ // dictionary." (Adobe Supplement to the ISO 32000)
+
+ // Type1, Type1C, Type3
+ int firstChar = dict.getInt(COSName.FIRST_CHAR, -1);
+ int lastChar = dict.getInt(COSName.LAST_CHAR, -1);
+ if (getWidths().size() > 0 && code >= firstChar && code <= lastChar)
{
- try
- {
- InputStream is = ((COSStream) toUnicode).getUnfilteredStream();
- toUnicodeCmap = parseCmap(resourceRootCMAP, is);
- IOUtils.closeQuietly(is);
- }
- catch (IOException exception)
- {
- LOG.error("Error: Could not load embedded ToUnicode CMap");
- }
+ return getWidths().get(code - firstChar).floatValue();
}
- else if (toUnicode instanceof COSName)
+ else
{
- encodingName = (COSName) toUnicode;
- toUnicodeCmap = cmapObjects.get(encodingName.getName());
- if (toUnicodeCmap == null)
+ PDFontDescriptor fd = getFontDescriptor();
+ if (fd instanceof PDFontDescriptorDictionary)
{
- cmapName = encodingName.getName();
- String resourceName = resourceRootCMAP + cmapName;
- try
- {
- toUnicodeCmap = parseCmap(resourceRootCMAP,
- ResourceLoader.loadResource(resourceName));
- }
- catch (IOException exception)
- {
- LOG.error("Error: Could not find predefined ToUnicode CMap file for '" +
- cmapName + "'");
- }
- if (toUnicodeCmap == null)
- {
- LOG.error("Error: Could not parse predefined ToUnicode CMap file for '" +
- cmapName + "'");
- }
+ return fd.getMissingWidth();
+ }
+ else
+ {
+ // if there's nothing to override with, then obviously we fall back to the font
+ return getWidthFromFont(code);
}
}
}
+ else
+ {
+ // otherwise the fonts widths should exactly match the widths in the font dictionary
+ return getWidthFromFont(code);
+ }
}
- @Override
- public COSBase getCOSObject()
- {
- return dict;
- }
+ /**
+ * Returns the width of a glyph in the embedded font file.
+ *
+ * @param code character code
+ * @return width in glyph space
+ * @throws IOException if the font could not be read
+ */
+ protected abstract float getWidthFromFont(int code) throws IOException;
/**
- * This will get the font width for a character.
- *
- * @param c The character code to get the width for.
- * @param offset The offset into the array.
- * @param length The length of the data.
- * @return The width is in 1000 unit of text space, ie 333 or 777
+ * Returns true if the font file is embedded in the PDF.
*/
- public float getFontWidth(byte[] c, int offset, int length) throws IOException
- {
- int code = getCodeFromArray(c, offset, length);
- Float fontWidth = fontSizes.get(code);
- if (fontWidth == null)
- {
- fontWidth = getFontWidth(code);
- fontSizes.put(code, fontWidth);
- }
- return fontWidth;
- }
+ protected abstract boolean isEmbedded();
/**
* This will get the font height for a character.
*
- * @param c The character code to get the height for.
- * @param offset The offset into the array.
- * @param length The length of the data.
+ * @param code character code
* @return The height is in 1000 unit of text space, ie 333 or 777
*/
- public float getFontHeight(byte[] c, int offset, int length)
+ // todo: this is not the glyph height at all! this method is *supposed* to get the y-advance
+ public float getHeight(int code) throws IOException
{
// maybe there is already a precalculated value
PDFontDescriptor desc = getFontDescriptor();
@@ -346,7 +245,6 @@ public abstract class PDFont implements
retval -= desc.getDescent();
}
}
- avgFontHeight = retval;
return retval;
}
return 0;
@@ -361,11 +259,11 @@ public abstract class PDFont implements
*/
public float getStringWidth(String string) throws IOException
{
- byte[] data = string.getBytes("ISO-8859-1");
+ byte[] data = string.getBytes("ISO-8859-1"); // todo: *no*, these are *not* character codes
float totalWidth = 0;
for (int i = 0; i < data.length; i++)
{
- totalWidth += getFontWidth(data, i, 1);
+ totalWidth += getWidth(data[i]);
}
return totalWidth;
}
@@ -415,139 +313,42 @@ public abstract class PDFont implements
}
/**
- * Used for multibyte encodings.
- *
- * @param data The array of data.
- * @param offset The offset into the array.
- * @param length The number of bytes to use.
- * @return The int value of data from the array.
- */
- public int getCodeFromArray(byte[] data, int offset, int length)
- {
- int code = 0;
- for (int i = 0; i < length; i++)
- {
- code <<= 8;
- code |= (data[offset + i] + 256) % 256;
- }
- return code;
- }
-
- /**
- * Encode the given value using the CMap of the font.
- *
- * @param code the code to encode.
- * @param length the byte length of the given code.
- * @param isCIDFont indicates that the used font is a CID font.
- *
- * @return The value of the encoded character.
- * @throws IOException if something went wrong
- */
- protected final String cmapEncoding(int code, int length, boolean isCIDFont, CMap sourceCmap)
- throws IOException
- {
- String retval = null;
- // there is not sourceCmap if this is a descendant font
- if (sourceCmap == null)
- {
- sourceCmap = cmap;
- }
- if (sourceCmap != null)
- {
- retval = sourceCmap.lookup(code, length);
- if (retval == null && isCIDFont)
- {
- retval = sourceCmap.lookupCID(code);
- }
- }
- return retval;
- }
-
- /**
- * Returns the Unicode character(s) for a given character code.
- *
- * @param c The character to encode.
- * @param offset The offset into the array to get the data
- * @param length The number of bytes to read.
- * @return The value of the encoded character.
- * @throws IOException If there is an error during the encoding.
+ * Reads a character code from a content stream string. Codes may be up to 4 bytes long.
+ *
+ * @param in string stream
+ * @return character code
+ * @throws IOException if the CMap or stream cannot be read
*/
- public String encode(byte[] c, int offset, int length) throws IOException
- {
- String retval = null;
- int code = getCodeFromArray(c, offset, length);
- if (toUnicodeCmap != null)
- {
- retval = cmapEncoding(code, length, false, toUnicodeCmap);
- }
- if (retval == null && cmap != null)
- {
- retval = cmapEncoding(code, length, false, cmap);
- }
-
- // there is no cmap but probably an encoding with a suitable mapping
- if (retval == null)
- {
- if (fontEncoding != null)
- {
- retval = fontEncoding.getCharacter(code);
- }
- if (retval == null && (cmap == null || length == 2))
- {
- retval = getStringFromArray(c, offset, length);
- }
- }
- return retval;
- }
-
- public int encodeToCID(byte[] c, int offset, int length) throws IOException
- {
- int code = -1;
- if (encode(c, offset, length) != null)
- {
- code = getCodeFromArray(c, offset, length);
- }
- return code;
- }
+ public abstract int readCode(InputStream in) throws IOException;
/**
- * Parse the given CMap.
- *
- * @param cmapRoot the root path pointing to the provided CMaps
- * @param cmapStream the CMap to be read
- * @return the parsed CMap
+ * Returns the Unicode character sequence which corresponds to the given character code.
+ *
+ * @param code character code
+ * @return Unicode character(s)
*/
- protected final CMap parseCmap(String cmapRoot, InputStream cmapStream)
+ public String toUnicode(int code)
{
- CMap targetCmap = null;
- if (cmapStream != null)
+ // if the font dictionary contains a ToUnicode CMap, use that CMap
+ if (toUnicodeCMap != null)
{
- CMapParser parser = new CMapParser();
- try
+ if (toUnicodeCMap.getName() != null && toUnicodeCMap.getName().startsWith("Identity-"))
{
- targetCmap = parser.parse(cmapRoot, cmapStream);
- // limit the cache to external CMaps
- if (cmapRoot != null)
- {
- cmapObjects.put(targetCmap.getName(), targetCmap);
- }
+ // handle the undocumented case of using Identity-H/V as a ToUnicode CMap, this
+ // isn't actually valid as the Identity-x CMaps are code->CID maps, not
+ // code->Unicode maps. See sample_fonts_solidconvertor.pdf for an example.
+ return new String(new char[] { (char) code });
}
- catch (IOException exception)
+ else
{
- LOG.error("An error occurs while reading a CMap", exception);
+ // proceed as normal
+ return toUnicodeCMap.toUnicode(code);
}
}
- return targetCmap;
- }
- /**
- * This will get or create the encoder.
- *
- * @return The encoding to use.
- */
- public Encoding getFontEncoding()
- {
- return fontEncoding;
+ // if no value has been produced, there is no way to obtain Unicode for the character.
+ // this behaviour can be overridden is subclasses, but this method *must* return null here
+ return null;
}
/**
@@ -562,8 +363,6 @@ public abstract class PDFont implements
/**
* This will get the subtype of font.
- *
- * @return The type of font that this is.
*/
public String getSubType()
{
@@ -571,70 +370,21 @@ public abstract class PDFont implements
}
/**
- * Determines if the font is a type 1 font.
- *
- * @return returns true if the font is a type 1 font
- */
- public boolean isType1Font()
- {
- return "Type1".equals(getSubType());
- }
-
- /**
- * Determines if the font is a type 3 font.
- *
- * @return returns true if the font is a type 3 font
- */
- public boolean isType3Font()
- {
- return "Type3".equals(getSubType());
- }
-
- /**
- * Determines if the font is a type 0 font.
- *
- * @return returns true if the font is a type 0 font
- */
- public boolean isType0Font()
- {
- return "Type0".equals(getSubType());
- }
-
- /**
- * Determines if the font is a true type font.
- *
- * @return returns true if the font is a true type font
- */
- public boolean isTrueTypeFont()
- {
- return "TrueType".equals(getSubType());
- }
-
- /**
- * Determines if the font is a symbolic font.
- *
- * @return returns true if the font is a symbolic font
+ * Returns true the font is a symbolic (that is, it does not use the Adobe Standard Roman
+ * character set).
*/
- public boolean isSymbolicFont()
+ public boolean isSymbolic()
{
if (getFontDescriptor() != null)
{
// fixme: isSymbolic() defaults to false if the flag is missing so we can't trust this
return getFontDescriptor().isSymbolic();
}
-
- // fixme: this heuristic is a starting point only
- if (fontEncoding instanceof MacRomanEncoding || fontEncoding instanceof WinAnsiEncoding)
- {
- return false;
- }
return true;
}
/**
- * The PostScript name of the font.
- *
- * @return The postscript name of the font.
+ * Returns the PostScript name of the font.
*/
public String getBaseFont()
{
@@ -642,33 +392,13 @@ public abstract class PDFont implements
}
/**
- * The code for the first char or -1 if there is none.
- *
- * @return The code for the first character.
- */
- public int getFirstChar()
- {
- return dict.getInt(COSName.FIRST_CHAR, -1);
- }
-
- /**
- * The code for the last char or -1 if there is none.
- *
- * @return The code for the last character.
- */
- public int getLastChar()
- {
- return dict.getInt(COSName.LAST_CHAR, -1);
- }
-
- /**
* The widths of the characters. This will be null for the standard 14 fonts.
- *
+ *
* @return The widths of the characters.
*/
- public List<Integer> getWidths()
+ private List<Integer> getWidths()
{
- if (widths == null && !widthsAreMissing)
+ if (widths == null)
{
COSArray array = (COSArray) dict.getDictionaryObject(COSName.WIDTHS);
if (array != null)
@@ -677,83 +407,18 @@ public abstract class PDFont implements
}
else
{
- widthsAreMissing = true;
+ widths = Collections.emptyList();
}
}
return widths;
}
/**
- * This will get the matrix that is used to transform glyph space to text space. By default
- * there are 1000 glyph units to 1 text space unit, but type3 fonts can use any value.
- *
- * Note: If this is a type3 font then it can be modified via the PDType3Font.setFontMatrix,
- * otherwise this is a read-only property.
- *
- * @return The matrix to transform from glyph space to text space.
+ * Returns the font matrix, which represents the transformation from glyph space to text space.
*/
public PDMatrix getFontMatrix()
{
- if (fontMatrix == null)
- {
- COSArray array = (COSArray) dict.getDictionaryObject(COSName.FONT_MATRIX);
- if (array == null)
- {
- array = new COSArray();
- array.add(new COSFloat(0.001f));
- array.add(COSInteger.ZERO);
- array.add(COSInteger.ZERO);
- array.add(new COSFloat(0.001f));
- array.add(COSInteger.ZERO);
- array.add(COSInteger.ZERO);
- }
- fontMatrix = new PDMatrix(array);
- }
- return fontMatrix;
- }
-
- /**
- * Determines the width of the given character.
- *
- * @param charCode the code of the given character
- * @return the width of the character
- */
- public float getFontWidth(int charCode) throws IOException
- {
- float width = -1;
- int firstChar = getFirstChar();
- int lastChar = getLastChar();
- if (charCode >= firstChar && charCode <= lastChar)
- {
- // maybe the font doesn't provide any widths
- if (!widthsAreMissing)
- {
- getWidths();
- if (widths != null)
- {
- width = widths.get(charCode - firstChar).floatValue();
- }
- }
- }
- else
- {
- PDFontDescriptor fd = getFontDescriptor();
- if (fd instanceof PDFontDescriptorDictionary)
- {
- width = fd.getMissingWidth();
- }
- }
- return width;
- }
-
- /**
- * Determines if a font as a ToUnicode entry.
- *
- * @return true if the font has a ToUnicode entry
- */
- public boolean hasToUnicode()
- {
- return hasToUnicode;
+ return FONT_MATRIX_1000;
}
/**
@@ -770,15 +435,15 @@ public abstract class PDFont implements
{
if (toUnicode != null)
{
- int spaceMapping = toUnicodeCmap.getSpaceMapping();
+ int spaceMapping = toUnicodeCMap.getSpaceMapping();
if (spaceMapping > -1)
{
- fontWidthOfSpace = getFontWidth(spaceMapping);
+ fontWidthOfSpace = getWidth(spaceMapping);
}
}
else
{
- fontWidthOfSpace = getFontWidth(SPACE_BYTES, 0, 1);
+ fontWidthOfSpace = getWidth(32);
}
// use the average font width as fall back
if (fontWidthOfSpace <= 0)
@@ -796,26 +461,6 @@ public abstract class PDFont implements
}
/**
- * Returns the toUnicode mapping if present.
- *
- * @return the CMap representing the toUnicode mapping
- */
- public CMap getToUnicodeCMap()
- {
- return toUnicodeCmap;
- }
-
- /**
- * Returns the CMap if present.
- *
- * @return the CMap representing the character encoding
- */
- public CMap getCMap()
- {
- return cmap;
- }
-
- /**
* Calling this will release all cached information.
*/
public void clear()
Added: pdfbox/branches/no-awt/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDSimpleFont.java
URL: http://svn.apache.org/viewvc/pdfbox/branches/no-awt/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDSimpleFont.java?rev=1619956&view=auto
==============================================================================
--- pdfbox/branches/no-awt/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDSimpleFont.java (added)
+++ pdfbox/branches/no-awt/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDSimpleFont.java Sat Aug 23 02:34:35 2014
@@ -0,0 +1,154 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.pdfbox.pdmodel.font;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.pdfbox.cos.COSBase;
+import org.apache.pdfbox.cos.COSDictionary;
+import org.apache.pdfbox.cos.COSName;
+import org.apache.pdfbox.encoding.DictionaryEncoding;
+import org.apache.pdfbox.encoding.Encoding;
+
+import java.io.IOException;
+import java.util.HashSet;
+import java.util.Set;
+
+/**
+ * A simple font. Simple fonts use a PostScript encoding vector.
+ *
+ * @author John Hewson
+ */
+public abstract class PDSimpleFont extends PDFont
+{
+ private static final Log LOG = LogFactory.getLog(PDSimpleFont.class);
+
+ protected Encoding fontEncoding;
+ private final Set<Integer> noUnicode = new HashSet<Integer>();
+
+ /**
+ * Constructor.
+ */
+ protected PDSimpleFont()
+ {
+ super();
+ }
+
+ /**
+ * Constructor.
+ *
+ * @param fontDictionary Font dictionary.
+ */
+ protected PDSimpleFont(COSDictionary fontDictionary) throws IOException
+ {
+ super(fontDictionary);
+ }
+
+ /**
+ * Reads the Encoding from the Font dictionary or the embedded or substituted font file.
+ * Must be called at the end of any subclass constructors.
+ *
+ * @throws IOException if the font file could not be read
+ */
+ protected final void readEncoding() throws IOException
+ {
+ COSBase encoding = dict.getDictionaryObject(COSName.ENCODING);
+ if (encoding != null)
+ {
+ if (encoding instanceof COSName)
+ {
+ COSName encodingName = (COSName)encoding;
+ fontEncoding = Encoding.getInstance(encodingName);
+ if (fontEncoding == null)
+ {
+ LOG.warn("Unknown encoding: " + encodingName);
+ fontEncoding = readEncodingFromFont(); // fallback
+ }
+ }
+ else if (encoding instanceof COSDictionary)
+ {
+ fontEncoding = new DictionaryEncoding((COSDictionary) encoding);
+ }
+ }
+ else
+ {
+ fontEncoding = readEncodingFromFont();
+ }
+ }
+
+ /**
+ * Called by readEncoding() if the encoding needs to be extracted from the font file.
+ *
+ * @throws IOException if the font file could not be read
+ */
+ protected abstract Encoding readEncodingFromFont() throws IOException;
+
+ /**
+ * Returns the Encoding vector.
+ */
+ public Encoding getEncoding()
+ {
+ return fontEncoding;
+ }
+
+ @Override
+ public String toUnicode(int code)
+ {
+ // first try to use a ToUnicode CMap
+ String unicode = super.toUnicode(code);
+ if (unicode != null)
+ {
+ return unicode;
+ }
+
+ // if the font is a "simple font" and uses MacRoman/MacExpert/WinAnsi[Encoding]
+ // or has Differences with names from only Adobe Standard and/or Symbol, then:
+ //
+ // a) Map the character codes to names
+ // b) Look up the name in the Adobe Glyph List to obtain the Unicode value
+
+ String name = null;
+ if (getEncoding() != null)
+ {
+ name = fontEncoding.getName(code);
+ unicode = Encoding.getCharacterForName(name);
+ if (unicode != null)
+ {
+ return unicode;
+ }
+ }
+
+ // if no value has been produced, there is no way to obtain Unicode for the character.
+ if (LOG.isWarnEnabled() && !noUnicode.contains(code))
+ {
+ // we keep track of which warnings have been issued, so we don't log multiple times
+ noUnicode.add(code);
+ if (name != null)
+ {
+ LOG.warn("No Unicode mapping for " + name + " (" + code + ") in font " +
+ getBaseFont());
+ }
+ else
+ {
+ LOG.warn("No Unicode mapping for character code " + code + " in font " +
+ getBaseFont());
+ }
+ }
+
+ return null;
+ }
+}
Modified: pdfbox/branches/no-awt/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDTrueTypeFont.java
URL: http://svn.apache.org/viewvc/pdfbox/branches/no-awt/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDTrueTypeFont.java?rev=1619956&r1=1619955&r2=1619956&view=diff
==============================================================================
--- pdfbox/branches/no-awt/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDTrueTypeFont.java (original)
+++ pdfbox/branches/no-awt/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDTrueTypeFont.java Sat Aug 23 02:34:35 2014
@@ -20,7 +20,6 @@ import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
-import java.util.HashMap;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
@@ -39,7 +38,7 @@ import org.apache.pdfbox.pdmodel.common.
*
* @author Ben Litchfield
*/
-public class PDTrueTypeFont extends PDFont
+public class PDTrueTypeFont extends PDSimpleFont
{
private static final Log LOG = LogFactory.getLog(PDTrueTypeFont.class);
@@ -66,7 +65,7 @@ public class PDTrueTypeFont extends PDFo
private boolean cmapInitialized = false;
private final TrueTypeFont ttf;
- private final HashMap<Integer, Float> advanceWidths = new HashMap<Integer, Float> ();
+ private final boolean isEmbedded;
/**
* Creates a new TrueType font from a Font dictionary.
@@ -89,6 +88,7 @@ public class PDTrueTypeFont extends PDFo
ttfFont = ttfParser.parseTTF(ff2Stream.createInputStream());
}
}
+ isEmbedded = ttfFont != null;
// substitute
if (ttfFont == null)
@@ -102,10 +102,14 @@ public class PDTrueTypeFont extends PDFo
ttfFont = ExternalFonts.getFallbackFont();
}
}
-
ttf = ttfFont;
+ readEncoding();
+ }
- determineEncoding();
+ @Override
+ protected Encoding readEncodingFromFont() throws IOException
+ {
+ return null;
}
/**
@@ -116,17 +120,13 @@ public class PDTrueTypeFont extends PDFo
PDTrueTypeFontEmbedder embedder = new PDTrueTypeFontEmbedder(document, dict, ttfStream);
fontEncoding = embedder.getFontEncoding();
ttf = embedder.getTrueTypeFont();
+ isEmbedded = true;
}
@Override
- public PDFontDescriptor getFontDescriptor()
+ public int readCode(InputStream in) throws IOException
{
- if (super.getFontDescriptor() == null)
- {
- // todo: this is an experiment: we now allow this to be null (i.e. we no longer synthesise)
- //fontDescriptor = makeFontDescriptor(ttf);
- }
- return fontDescriptor;
+ return in.read();
}
/**
@@ -138,46 +138,39 @@ public class PDTrueTypeFont extends PDFo
}
@Override
- public float getFontWidth(int charCode) throws IOException
+ protected float getWidthFromFont(int code) throws IOException
{
- float width = super.getFontWidth(charCode);
- if (width <= 0)
+ int gid = codeToGID(code);
+ int width = ttf.getAdvanceWidth(gid);
+ int unitsPerEM = ttf.getUnitsPerEm();
+ if (unitsPerEM != 1000)
{
- if (advanceWidths.containsKey(charCode))
- {
- width = advanceWidths.get(charCode);
- }
- else
- {
- int code = getGIDForCharacterCode(charCode);
- width = ttf.getAdvanceWidth(code);
- int unitsPerEM = ttf.getUnitsPerEm();
- // do we have to scale the width
- if (unitsPerEM != 1000)
- {
- width *= 1000f / unitsPerEM;
- }
- advanceWidths.put(charCode, width);
- }
+ width *= 1000f / unitsPerEM;
}
return width;
}
+ @Override
+ protected boolean isEmbedded()
+ {
+ return isEmbedded;
+ }
+
/**
* Returns the GID for the given character code.
*
* @param code character code
* @return GID (glyph index)
*/
- public int getGIDForCharacterCode(int code) throws IOException
+ public int codeToGID(int code) throws IOException
{
extractCmapTable();
int result = 0;
- if (getFontEncoding() != null && !isSymbolicFont())
+ if (getEncoding() != null && !isSymbolic())
{
try
{
- String characterName = getFontEncoding().getName(code);
+ String characterName = getEncoding().getName(code);
if (characterName != null)
{
if (cmapWinUnicode != null)
@@ -219,7 +212,7 @@ public class PDTrueTypeFont extends PDFo
}
}
- if (getFontEncoding() == null || isSymbolicFont())
+ if (getEncoding() == null || isSymbolic())
{
if (cmapWinSymbol != null)
{
@@ -308,6 +301,5 @@ public class PDTrueTypeFont extends PDFo
cmapWinSymbol = null;
cmapMacintoshSymbol = null;
cmapInitialized = false;
- advanceWidths.clear();
}
}
Modified: pdfbox/branches/no-awt/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDTrueTypeFontEmbedder.java
URL: http://svn.apache.org/viewvc/pdfbox/branches/no-awt/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDTrueTypeFontEmbedder.java?rev=1619956&r1=1619955&r2=1619956&view=diff
==============================================================================
--- pdfbox/branches/no-awt/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDTrueTypeFontEmbedder.java (original)
+++ pdfbox/branches/no-awt/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDTrueTypeFontEmbedder.java Sat Aug 23 02:34:35 2014
@@ -217,25 +217,23 @@ class PDTrueTypeFontEmbedder
fd.setStemV(fd.getFontBoundingBox().getWidth() * .13f);
CMAPTable cmapTable = ttf.getCMAP();
- CMAPEncodingEntry[] cmaps = cmapTable.getCmaps();
-
- CMAPEncodingEntry uniMap = getCmapSubtable(cmaps, CMAPTable.PLATFORM_UNICODE,
+ CMAPEncodingEntry uniMap = cmapTable.getSubtable(CMAPTable.PLATFORM_UNICODE,
CMAPTable.ENCODING_UNICODE_2_0_FULL);
if (uniMap == null)
{
- uniMap = getCmapSubtable(cmaps, CMAPTable.PLATFORM_UNICODE,
+ uniMap = cmapTable.getSubtable(CMAPTable.PLATFORM_UNICODE,
CMAPTable.ENCODING_UNICODE_2_0_BMP);
}
if (uniMap == null)
{
- uniMap = getCmapSubtable(cmaps, CMAPTable.PLATFORM_WINDOWS,
+ uniMap = cmapTable.getSubtable(CMAPTable.PLATFORM_WINDOWS,
CMAPTable.ENCODING_WIN_UNICODE);
}
if (uniMap == null)
{
// Microsoft's "Recommendations for OpenType Fonts" says that "Symbol" encoding
// actually means "Unicode, non-standard character set"
- uniMap = getCmapSubtable(cmaps, CMAPTable.PLATFORM_WINDOWS,
+ uniMap = cmapTable.getSubtable(CMAPTable.PLATFORM_WINDOWS,
CMAPTable.ENCODING_WIN_SYMBOL);
}
if (uniMap == null)
@@ -244,7 +242,8 @@ class PDTrueTypeFontEmbedder
// to find one. Furthermore, if we loaded the font from disk then we should've checked
// first to see that it had a suitable cmap before calling createFontDescriptor
throw new IllegalArgumentException("ttf: no suitable cmap for font '" +
- ttf.getNaming().getFontFamily() + "', found: " + Arrays.toString(cmaps));
+ ttf.getNaming().getFontFamily() + "', found: " +
+ Arrays.toString(cmapTable.getCmaps()));
}
if (this.getFontEncoding() == null)
@@ -315,23 +314,6 @@ class PDTrueTypeFontEmbedder
}
/**
- * Returns the "cmap" subtable for the given platform and encoding, or null.
- */
- private CMAPEncodingEntry getCmapSubtable(CMAPEncodingEntry[] cmaps,
- int platformId, int platformEncodingId)
- {
- for (CMAPEncodingEntry cmap : cmaps)
- {
- if (cmap.getPlatformId() == platformId &&
- cmap.getPlatformEncodingId() == platformEncodingId)
- {
- return cmap;
- }
- }
- return null;
- }
-
- /**
* Returns the font's encoding.
*/
public Encoding getFontEncoding()
Modified: pdfbox/branches/no-awt/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDType0Font.java
URL: http://svn.apache.org/viewvc/pdfbox/branches/no-awt/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDType0Font.java?rev=1619956&r1=1619955&r2=1619956&view=diff
==============================================================================
--- pdfbox/branches/no-awt/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDType0Font.java (original)
+++ pdfbox/branches/no-awt/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDType0Font.java Sat Aug 23 02:34:35 2014
@@ -19,30 +19,23 @@ package org.apache.pdfbox.pdmodel.font;
import java.io.IOException;
import java.io.InputStream;
-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
+import org.apache.fontbox.cmap.CMap;
import org.apache.pdfbox.cos.COSArray;
import org.apache.pdfbox.cos.COSBase;
import org.apache.pdfbox.cos.COSDictionary;
import org.apache.pdfbox.cos.COSName;
-import org.apache.pdfbox.cos.COSStream;
-import org.apache.pdfbox.encoding.DictionaryEncoding;
-import org.apache.pdfbox.encoding.Encoding;
-import org.apache.pdfbox.io.IOUtils;
-import org.apache.pdfbox.pdmodel.common.PDRectangle;
-import org.apache.pdfbox.util.ResourceLoader;
/**
- * A Type 0 (composite) font.
- *
+ * A Composite (Type 0) font.
+ *
* @author Ben Litchfield
*/
public class PDType0Font extends PDFont
{
- private static final Log LOG = LogFactory.getLog(PDType0Font.class);
-
private PDCIDFont descendantFont;
private COSDictionary descendantFontDictionary;
+ private CMap cMap, cMapUCS2;
+ private boolean isCMapPredefined;
/**
* Constructor.
@@ -59,8 +52,78 @@ public class PDType0Font extends PDFont
{
throw new IOException("Missing descendant font dictionary");
}
+
+ readEncoding();
+ fetchCMapUCS2();
descendantFont = PDFontFactory.createDescendantFont(descendantFontDictionary, this);
- determineEncoding();
+ }
+
+ /**
+ * Reads the font's Encoding entry, which should be a CMap name/stream.
+ */
+ private void readEncoding() throws IOException
+ {
+ COSBase encoding = dict.getDictionaryObject(COSName.ENCODING);
+ if (encoding != null)
+ {
+ if (encoding instanceof COSName)
+ {
+ // predefined CMap
+ COSName encodingName = (COSName)encoding;
+ cMap = CMapManager.getPredefinedCMap(encodingName.getName());
+ if (cMap != null)
+ {
+ isCMapPredefined = true;
+ return;
+ }
+ }
+ else
+ {
+ cMap = readCMap(encoding);
+ }
+ }
+ }
+
+ /**
+ * Fetches the corresponding UCS2 CMap if the font's CMap is predefined.
+ */
+ private void fetchCMapUCS2() throws IOException
+ {
+ // if the font is composite and uses a predefined cmap (excluding Identity-H/V) then
+ // or if its decendant font uses Adobe-GB1/CNS1/Japan1/Korea1
+ if (isCMapPredefined)
+ {
+ // a) Map the character code to a CID using the font's CMap
+ // b) Obtain the ROS from the font's CIDSystemInfo
+ // c) Construct a second CMap name by concatenating the ROS in the format "R-O-UCS2"
+ // d) Obtain the CMap with the constructed name
+ // e) Map the CID according to the CMap from step d), producing a Unicode value
+
+ String cMapName = null;
+
+ // get the encoding CMap
+ COSBase encoding = dict.getDictionaryObject(COSName.ENCODING);
+ if (encoding != null && encoding instanceof COSName)
+ {
+ cMapName = ((COSName)encoding).getName();
+ }
+
+ // try to find the corresponding Unicode (UC2) CMap
+ if (cMapName != null && !cMapName.equals("Identity-H") &&
+ !cMapName.equals("Identity-V"))
+ {
+ CMap cMap = CMapManager.getPredefinedCMap(cMapName);
+ if (cMap != null)
+ {
+ String ucs2Name = cMap.getRegistry() + "-" + cMap.getOrdering() + "-UCS2";
+ CMap ucs2CMap = CMapManager.getPredefinedCMap(ucs2Name);
+ if (ucs2CMap != null)
+ {
+ cMapUCS2 = ucs2CMap;
+ }
+ }
+ }
+ }
}
/**
@@ -71,6 +134,14 @@ public class PDType0Font extends PDFont
return descendantFont;
}
+ /**
+ * Returns the font's CMap.
+ */
+ public CMap getCMap()
+ {
+ return cMap;
+ }
+
@Override
public PDFontDescriptor getFontDescriptor()
{
@@ -78,148 +149,85 @@ public class PDType0Font extends PDFont
}
@Override
- public float getFontWidth(byte[] c, int offset, int length)
+ public float getHeight(int code) throws IOException
{
- return descendantFont.getFontWidth(c, offset, length);
+ return descendantFont.getHeight(code);
}
@Override
- public float getFontHeight(byte[] c, int offset, int length)
+ public float getAverageFontWidth()
{
- return descendantFont.getFontHeight(c, offset, length);
+ return descendantFont.getAverageFontWidth();
}
@Override
- public float getAverageFontWidth()
+ public float getWidth(int code) throws IOException
{
- return descendantFont.getAverageFontWidth();
+ return descendantFont.getWidth(code);
}
@Override
- public float getFontWidth(int charCode)
+ protected float getWidthFromFont(int code) throws IOException
{
- return descendantFont.getFontWidth(charCode);
+ return descendantFont.getWidthFromFont(code);
}
- // todo: copied from PDSimpleFont and modified
- // todo: for a Type 0 font this can only be "The name of a predefined CMap, or a stream containing a
- // CMap that maps character codes to font numbers and CIDs", so I should adjust this accordingly
@Override
- protected void determineEncoding()
+ protected boolean isEmbedded()
{
- String cmapName = null;
- COSName encodingName = null;
- COSBase encoding = dict.getDictionaryObject(COSName.ENCODING);
- Encoding fontEncoding = null;
- if (encoding != null)
+ return descendantFont.isEmbedded();
+ }
+
+ @Override
+ public String toUnicode(int code)
+ {
+ // try to use a ToUnicode CMap
+ String unicode = super.toUnicode(code);
+ if (unicode != null)
{
- if (encoding instanceof COSName)
- {
- if (cmap == null)
- {
- encodingName = (COSName) encoding;
- cmap = cmapObjects.get(encodingName.getName());
- if (cmap == null)
- {
- cmapName = encodingName.getName();
- }
- }
- // todo: disabled because a Type0 font cannot have a simple Encoding.
- /*if (cmap == null && cmapName != null)
- {
- try
- {
- fontEncoding = Encoding.getInstance(encodingName);
- }
- catch (IOException exception)
- {
- LOG.warn("Debug: Could not find encoding for " + encodingName);
- }
- }*/
- }
- else if (encoding instanceof COSStream)
- {
- if (cmap == null)
- {
- COSStream encodingStream = (COSStream) encoding;
- try
- {
- InputStream is = encodingStream.getUnfilteredStream();
- cmap = parseCmap(null, is);
- IOUtils.closeQuietly(is);
- }
- catch (IOException exception)
- {
- LOG.error("Error: Could not parse the embedded CMAP");
- }
- }
- }
- else if (encoding instanceof COSDictionary)
- {
- try
- {
- fontEncoding = new DictionaryEncoding((COSDictionary) encoding);
- }
- catch (IOException exception)
- {
- LOG.error("Error: Could not create the DictionaryEncoding");
- }
- }
+ return unicode;
}
- this.fontEncoding = fontEncoding;
- extractToUnicodeEncoding(); // todo: IMPORTANT!
- if (cmap == null && cmapName != null)
+ // if the font is composite and uses a predefined cmap (excluding Identity-H/V) then
+ // or if its decendant font uses Adobe-GB1/CNS1/Japan1/Korea1
+ if (isCMapPredefined && cMapUCS2 != null)
{
- InputStream cmapStream = null;
- try
- {
- // look for a predefined CMap with the given name
- cmapStream = ResourceLoader.loadResource(resourceRootCMAP + cmapName);
- if (cmapStream != null)
- {
- cmap = parseCmap(resourceRootCMAP, cmapStream);
- if (cmap == null && encodingName == null)
- {
- LOG.error("Error: Could not parse predefined CMAP file for '" +
- cmapName + "'");
- }
- }
- else
- {
- LOG.warn("Debug: '" + cmapName + "' isn't a predefined map, most likely it's" +
- "embedded in the pdf itself.");
- }
- }
- catch (IOException exception)
- {
- LOG.error("Error: Could not find predefined CMAP file for '" + cmapName + "'");
- }
- finally
- {
- IOUtils.closeQuietly(cmapStream);
- }
+ // e) Map the CID according to the CMap from step d), producing a Unicode value
+ return cMapUCS2.toUnicode(code);
+ }
+ else
+ {
+ // if no value has been produced, there is no way to obtain Unicode for the character.
+ return null;
}
}
@Override
- public String encode(byte[] c, int offset, int length) throws IOException
+ public int readCode(InputStream in) throws IOException
{
- String retval = null;
- if (hasToUnicode())
- {
- retval = super.encode(c, offset, length);
- }
+ return cMap.readCode(in);
+ }
- if (retval == null)
- {
- int result = cmap.lookupCID(c, offset, length);
- if (result != -1)
- {
- retval = descendantFont.cmapEncoding(result, 2, true, null);
- }
- }
- return retval;
+ /**
+ * Returns the CID for the given character code. If not found then CID 0 is returned.
+ *
+ * @param code character code
+ * @return CID
+ */
+ public int codeToCID(int code)
+ {
+ return descendantFont.codeToCID(code);
+ }
+
+ /**
+ * Returns the GID for the given character code.
+ *
+ * @param code character code
+ * @return GID
+ */
+ public int codeToGID(int code) throws IOException
+ {
+ return descendantFont.codeToGID(code);
}
@Override
@@ -237,7 +245,11 @@ public class PDType0Font extends PDFont
@Override
public String toString()
{
- return getClass().getSimpleName() + "/" + getDescendantFont()
- .getClass().getSimpleName() + " " + getBaseFont();
+ String descendant = null;
+ if (getDescendantFont() != null)
+ {
+ descendant = getDescendantFont().getClass().getSimpleName();
+ }
+ return getClass().getSimpleName() + "/" + descendant + " " + getBaseFont();
}
}
Modified: pdfbox/branches/no-awt/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDType1CFont.java
URL: http://svn.apache.org/viewvc/pdfbox/branches/no-awt/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDType1CFont.java?rev=1619956&r1=1619955&r2=1619956&view=diff
==============================================================================
--- pdfbox/branches/no-awt/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDType1CFont.java (original)
+++ pdfbox/branches/no-awt/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDType1CFont.java Sat Aug 23 02:34:35 2014
@@ -19,24 +19,19 @@ package org.apache.pdfbox.pdmodel.font;
import java.awt.geom.GeneralPath;
import java.io.IOException;
-import java.util.Arrays;
+import java.io.InputStream;
import java.util.HashMap;
-import java.util.List;
import java.util.Map;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.fontbox.cff.CFFParser;
import org.apache.fontbox.cff.CFFType1Font;
-import org.apache.fontbox.cff.Type1CharString;
import org.apache.fontbox.ttf.Type1Equivalent;
-import org.apache.pdfbox.cos.COSArray;
import org.apache.pdfbox.cos.COSDictionary;
-import org.apache.pdfbox.cos.COSFloat;
import org.apache.pdfbox.encoding.Encoding;
import org.apache.pdfbox.encoding.Type1Encoding;
import org.apache.pdfbox.io.IOUtils;
-import org.apache.pdfbox.pdmodel.common.PDMatrix;
import org.apache.pdfbox.pdmodel.common.PDRectangle;
import org.apache.pdfbox.pdmodel.common.PDStream;
@@ -46,19 +41,17 @@ import org.apache.pdfbox.pdmodel.common.
* @author Villu Ruusmann
* @author John Hewson
*/
-public class PDType1CFont extends PDFont implements PDType1Equivalent
+public class PDType1CFont extends PDSimpleFont implements PDType1Equivalent
{
private static final Log LOG = LogFactory.getLog(PDType1CFont.class);
- private static final byte[] SPACE_BYTES = { (byte) 32 };
- private String fontName = null;
- private Map<String, Float> glyphWidths = new HashMap<String, Float>();
private Map<String, Float> glyphHeights = new HashMap<String, Float>();
private Float avgWidth = null;
private PDRectangle fontBBox = null;
- private CFFType1Font cffFont; // embedded font
+ private final CFFType1Font cffFont; // embedded font
private final Type1Equivalent type1Equivalent; // embedded or system font for rendering
+ private final boolean isEmbedded;
/**
* Constructor.
@@ -87,6 +80,7 @@ public class PDType1CFont extends PDFont
if (cffFont != null)
{
type1Equivalent = cffFont;
+ isEmbedded = true;
}
else
{
@@ -100,12 +94,9 @@ public class PDType1CFont extends PDFont
LOG.warn("Using fallback font for " + getBaseFont());
type1Equivalent = ExternalFonts.getFallbackFont();
}
+ isEmbedded = false;
}
-
- // cache the font name
- fontName = cffFont.getName();
-
- determineEncoding();
+ readEncoding();
}
/**
@@ -139,7 +130,7 @@ public class PDType1CFont extends PDFont
@Override
public String codeToName(int code)
{
- String name = getFontEncoding().getName(code);
+ String name = getEncoding().getName(code);
if (name != null)
{
return name;
@@ -155,93 +146,47 @@ public class PDType1CFont extends PDFont
{
return ".notdef".equals(name);
}
-
@Override
- protected void determineEncoding()
+ protected Encoding readEncodingFromFont() throws IOException
{
- super.determineEncoding();
- Encoding fontEncoding = getFontEncoding();
- if (fontEncoding == null)
- {
- // extract from CFF/substitute
- this.fontEncoding = new Type1Encoding(type1Equivalent.getEncoding());
- }
+ return Type1Encoding.fromFontBox(type1Equivalent.getEncoding());
}
@Override
- public String encode(byte[] bytes, int offset, int length) throws IOException
+ public int readCode(InputStream in) throws IOException
{
- String character = getUnicode(bytes, offset, length);
- if (character == null)
- {
- // todo: message is for debugging, remove in long term
- LOG.warn("No character for code " + (bytes[offset] & 0xff) + " in " + fontName);
- return null;
- }
- return character;
+ return in.read();
}
- /*@Override
- public int encodeToCID(byte[] bytes, int offset, int length)
- {
- if (length > 2)
- {
- return -1;
- }
- int code = bytes[offset] & 0xff;
- if (length == 2)
- {
- code = code * 256 + bytes[offset + 1] & 0xff;
- }
- return code;
- }*/
-
- // helper
- private String getUnicode(byte[] bytes, int offset, int length) throws IOException
+ @Override
+ protected float getWidthFromFont(int code) throws IOException
{
- int code = getCodeFromArray(bytes, offset, length);
- String character = getFontEncoding().getCharacter(code);
- if (character == null)
- {
- // todo: message is for debugging, remove in long term
- LOG.warn("Could not get character " + code);
- }
- return character;
+ String name = codeToName(code);
+ return cffFont.getType1CharString(name).getWidth();
}
@Override
- public float getFontWidth(byte[] bytes, int offset, int length)
+ protected boolean isEmbedded()
{
- int code = bytes[offset] & 0xff;
- String name = codeToName(code);
-
- Float width = glyphWidths.get(name);
- if (width == null)
- {
- width = getCharacterWidth(name);
- glyphWidths.put(name, width);
- }
-
- return width;
+ return isEmbedded;
}
@Override
- public float getFontHeight(byte[] bytes, int offset, int length)
+ public float getHeight(int code) throws IOException
{
- int code = bytes[offset] & 0xff;
String name = codeToName(code);
if (isNotDef(name))
{
// todo: message is for debugging, remove in long term
- LOG.warn("No name for code " + (bytes[offset] & 0xff) + " in " + fontName);
+ LOG.warn("No name for code " + code + " in " + cffFont.getName());
return 0;
}
float height = 0;
if (!glyphHeights.containsKey(name))
{
- height = getCharacterHeight(name);
+ height = (float)cffFont.getType1CharString(name).getBounds().getHeight(); // todo: cffFont could be null
glyphHeights.put(name, height);
}
return height;
@@ -254,14 +199,13 @@ public class PDType1CFont extends PDFont
for (int i = 0; i < string.length(); i++)
{
String character = string.substring(i, i + 1);
- String name = getFontEncoding().getNameForCharacter(character.charAt(0));
+ String name = getEncoding().getNameForCharacter(character.charAt(0));
if (isNotDef(name))
{
// todo: message is for debugging, remove in long term
LOG.warn("No code for character " + character);
- return 0;
}
- width += getCharacterWidth(name);
+ width += cffFont.getType1CharString(name).getWidth();
}
return width;
}
@@ -276,30 +220,6 @@ public class PDType1CFont extends PDFont
return avgWidth;
}
- @Override
- public PDMatrix getFontMatrix()
- {
- if (fontMatrix == null)
- {
- List<Number> numbers = cffFont.getFontMatrix(); // todo: cffFont could be null
- if (numbers != null && numbers.size() == 6)
- {
- COSArray array = new COSArray();
- for (Number number : numbers)
- {
- array.add(new COSFloat(number.floatValue()));
- }
- fontMatrix = new PDMatrix(array);
- }
- else
- {
- // todo: the font should always have a Matrix, so why fallback?
- super.getFontMatrix();
- }
- }
- return fontMatrix;
- }
-
/**
* Returns the embedded Type 1-equivalent CFF font.
*
@@ -311,45 +231,6 @@ public class PDType1CFont extends PDFont
}
// todo: this is a replacement for FontMetrics method
- private float getCharacterWidth(String name)
- {
- try
- {
- // todo: for debugging we check for .notdef
- Type1CharString notdef = cffFont.getType1CharString(".notdef"); // todo: cffFont could be null
- Type1CharString charstring = cffFont.getType1CharString(name);
- if (charstring == notdef)
- {
- // todo: message is for debugging, remove in long term
- LOG.warn("No width for character " + name + ", using .notdef");
- }
- return charstring.getWidth();
- }
- catch (IOException e)
- {
- // todo: HACK
- LOG.error(e);
- }
- return 0;
- }
-
- // todo: this is a replacement for FontMetrics method
- // todo: but in FontMetrics this method actually gets the advance-y for vertical mode
- private float getCharacterHeight(String name)
- {
- try
- {
- return (float)cffFont.getType1CharString(name).getBounds().getHeight(); // todo: cffFont could be null
- }
- catch (IOException e)
- {
- // todo: HACK
- LOG.error(e);
- return 0;
- }
- }
-
- // todo: this is a replacement for FontMetrics method
private float getAverageCharacterWidth()
{
// todo: not implemented, highly suspect
@@ -360,17 +241,11 @@ public class PDType1CFont extends PDFont
public void clear()
{
super.clear();
- cffFont = null;
fontBBox = null;
if (glyphHeights != null)
{
glyphHeights.clear();
glyphHeights = null;
}
- if (glyphWidths != null)
- {
- glyphWidths.clear();
- glyphWidths = null;
- }
}
}
Modified: pdfbox/branches/no-awt/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDType1Font.java
URL: http://svn.apache.org/viewvc/pdfbox/branches/no-awt/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDType1Font.java?rev=1619956&r1=1619955&r2=1619956&view=diff
==============================================================================
--- pdfbox/branches/no-awt/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDType1Font.java (original)
+++ pdfbox/branches/no-awt/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDType1Font.java Sat Aug 23 02:34:35 2014
@@ -21,7 +21,6 @@ import java.io.IOException;
import java.io.InputStream;
import java.util.Arrays;
import java.util.HashMap;
-import java.util.List;
import java.util.Map;
import org.apache.commons.logging.Log;
@@ -30,18 +29,13 @@ import org.apache.fontbox.afm.AFMParser;
import org.apache.fontbox.afm.FontMetrics;
import org.apache.fontbox.ttf.Type1Equivalent;
import org.apache.fontbox.type1.Type1Font;
-import org.apache.pdfbox.cos.COSArray;
import org.apache.pdfbox.cos.COSDictionary;
-import org.apache.pdfbox.cos.COSFloat;
import org.apache.pdfbox.cos.COSName;
import org.apache.pdfbox.cos.COSStream;
import org.apache.pdfbox.encoding.Encoding;
-import org.apache.pdfbox.encoding.StandardEncoding;
import org.apache.pdfbox.encoding.Type1Encoding;
import org.apache.pdfbox.encoding.WinAnsiEncoding;
import org.apache.pdfbox.pdmodel.PDDocument;
-import org.apache.pdfbox.pdmodel.common.PDMatrix;
-import org.apache.pdfbox.pdmodel.common.PDRectangle;
import org.apache.pdfbox.pdmodel.common.PDStream;
import org.apache.pdfbox.util.ResourceLoader;
@@ -50,7 +44,7 @@ import org.apache.pdfbox.util.ResourceLo
*
* @author Ben Litchfield
*/
-public class PDType1Font extends PDFont implements PDType1Equivalent
+public class PDType1Font extends PDSimpleFont implements PDType1Equivalent
{
private static final Log LOG = LogFactory.getLog(PDType1Font.class);
@@ -123,6 +117,7 @@ public class PDType1Font extends PDFont
private final FontMetrics afm; // for standard 14 fonts
private final Type1Font type1font; // embedded font
private final Type1Equivalent type1Equivalent; // embedded or system font for rendering
+ private final boolean isEmbedded;
/**
* Creates a Type 1 standard 14 font for embedding.
@@ -145,6 +140,7 @@ public class PDType1Font extends PDFont
// todo: could load the PFB font here if we wanted to support Standard 14 embedding
type1font = null;
type1Equivalent = null;
+ isEmbedded = false;
}
/**
@@ -162,6 +158,7 @@ public class PDType1Font extends PDFont
afm = null; // only used for standard 14 fonts, not AFM fonts as we already have the PFB
type1font = embedder.getType1Font();
type1Equivalent = embedder.getType1Font();
+ isEmbedded = true;
}
/**
@@ -206,6 +203,7 @@ public class PDType1Font extends PDFont
}
}
}
+ isEmbedded = t1 != null;
// try to find a suitable .pfb font to substitute
if (t1 == null)
@@ -237,8 +235,7 @@ public class PDType1Font extends PDFont
// todo: for standard 14 only. todo: move this to a subclass "PDStandardType1Font" ?
afm = getAFMFromBaseFont(getBaseFont()); // may be null (it usually is)
- determineEncoding();
- getEncodingFromFont();
+ readEncoding();
}
// todo: move this to a subclass?
@@ -255,72 +252,6 @@ public class PDType1Font extends PDFont
return null;
}
- /**
- * Extracts the encoding from the font, if there is no Encoding given in the Font dictionary.
- */
- private void getEncodingFromFont()
- {
- if (getFontEncoding() == null)
- {
- // todo: this doesn't work properly for TTFs because they fake StandardEncoding currently
- // it seems that they should look for a MacRoman cmap instead and claim to use that
- org.apache.fontbox.encoding.Encoding encoding = type1Equivalent.getEncoding();
- if (encoding instanceof org.apache.fontbox.encoding.StandardEncoding)
- {
- this.fontEncoding = StandardEncoding.INSTANCE;
- }
- else if (encoding instanceof org.apache.fontbox.encoding.CustomEncoding)
- {
- Map<Integer,String> codeToName = encoding.getCodeToNameMap();
- Type1Encoding type1Encoding = new Type1Encoding(codeToName.size());
- for (Integer code : codeToName.keySet())
- {
- type1Encoding.addCharacterEncoding(code, codeToName.get(code));
- }
- this.fontEncoding = type1Encoding;
- }
- }
- }
-
- @Override
- public PDMatrix getFontMatrix()
- {
- if (fontMatrix == null)
- {
- // todo: this is an experimental implementation: just use the standard PostScript matrix
- // todo: don't all PostScript fonts use a 1000upem matrix anyway?
- if (type1font == null)
- {
- COSArray a = new COSArray();
- a.add(new COSFloat(0.001f));
- a.add(new COSFloat(0));
- a.add(new COSFloat(0));
- a.add(new COSFloat(0.001f));
- a.add(new COSFloat(0));
- a.add(new COSFloat(0));
- fontMatrix = new PDMatrix(a);
- return fontMatrix;
- }
-
- List<Number> numbers = type1font.getFontMatrix();
- if (numbers != null && numbers.size() == 6)
- {
- COSArray array = new COSArray();
- for (Number number : numbers)
- {
- array.add(new COSFloat(number.floatValue()));
- }
- fontMatrix = new PDMatrix(array);
- }
- else
- {
- // todo: the font should always have a Matrix, so why fallback?
- super.getFontMatrix();
- }
- }
- return fontMatrix;
- }
-
@Override
public PDFontDescriptor getFontDescriptor()
{
@@ -328,48 +259,45 @@ public class PDType1Font extends PDFont
{
if (afm != null)
{
- fontDescriptor = new PDFontDescriptorAFM(afm); // todo: wait, isn't this for embedding?
+ // this is for embedding fonts into PDFs, rather than for reading, though it works.
+ fontDescriptor = new PDFontDescriptorAFM(afm);
}
- // todo: else: then what? (no FD means no embedded font, plus we have no AFM: so fallback)
}
return fontDescriptor;
}
@Override
- public float getFontHeight(byte[] c, int offset, int length)
+ public float getHeight(int code) throws IOException
{
if (afm != null)
{
- int code = getCodeFromArray(c, offset, length);
- Encoding encoding = getFontEncoding();
- String characterName = encoding.getName(code);
- return afm.getCharacterHeight(characterName);
+ String characterName = getEncoding().getName(code);
+ return afm.getCharacterHeight(characterName); // todo: isn't this the y-advance, not the height?
}
- return super.getFontHeight(c, offset, length);
+ return super.getHeight(code);
}
@Override
- public float getFontWidth(int charCode) throws IOException
+ protected float getWidthFromFont(int code) throws IOException
{
- float width = super.getFontWidth(charCode);
- if (width <= 0)
+ String name = codeToName(code);
+ if (afm != null)
{
- // get width from AFM
- float retval = 0;
- if (afm != null)
- {
- String characterName = fontEncoding.getName(charCode);
- retval = afm.getCharacterWidth(characterName);
- }
- return retval;
+ return afm.getCharacterWidth(name);
}
else
{
- return width;
+ return type1Equivalent.getWidth(name);
}
}
@Override
+ protected boolean isEmbedded()
+ {
+ return isEmbedded;
+ }
+
+ @Override
public float getAverageFontWidth()
{
if (afm != null)
@@ -383,18 +311,23 @@ public class PDType1Font extends PDFont
}
@Override
- protected void determineEncoding()
+ public int readCode(InputStream in) throws IOException
+ {
+ return in.read();
+ }
+
+ @Override
+ protected Encoding readEncodingFromFont() throws IOException
{
- super.determineEncoding();
- Encoding fontEncoding = getFontEncoding();
- if (fontEncoding == null)
+ if (afm != null)
{
- if (afm != null)
- {
- fontEncoding = new Type1Encoding(afm);
- }
- // todo: get encoding from font if still null
- this.fontEncoding = fontEncoding;
+ // read from AFM
+ return new Type1Encoding(afm);
+ }
+ else
+ {
+ // extract from Type1 font/substitute
+ return Type1Encoding.fromFontBox(type1Equivalent.getEncoding());
}
}
@@ -431,7 +364,7 @@ public class PDType1Font extends PDFont
@Override
public String codeToName(int code)
{
- String name = getFontEncoding().getName(code);
+ String name = getEncoding().getName(code);
if (name != null)
{
return name;