You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pdfbox.apache.org by ja...@apache.org on 2014/09/26 00:34:21 UTC
svn commit: r1627677 - in /pdfbox/trunk:
fontbox/src/main/java/org/apache/fontbox/ttf/ pdfbox/
pdfbox/src/main/java/org/apache/pdfbox/encoding/
pdfbox/src/main/java/org/apache/pdfbox/pdmodel/
pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/ pdfbox/...
Author: jahewson
Date: Thu Sep 25 22:34:20 2014
New Revision: 1627677
URL: http://svn.apache.org/r1627677
Log:
PDFBOX-2380: Refactor glyph list loading and lookup
Removed:
pdfbox/trunk/pdfbox/src/main/resources/org/apache/pdfbox/resources/glyphlist/additional_glyphlist.properties
pdfbox/trunk/pdfbox/src/main/resources/org/apache/pdfbox/resources/glyphlist/glyphlist.properties
pdfbox/trunk/pdfbox/src/main/resources/org/apache/pdfbox/resources/glyphlist/zapf_dingbats.properties
Modified:
pdfbox/trunk/fontbox/src/main/java/org/apache/fontbox/ttf/CmapSubtable.java
pdfbox/trunk/fontbox/src/main/java/org/apache/fontbox/ttf/TTFSubsetter.java
pdfbox/trunk/pdfbox/pom.xml
pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/encoding/GlyphList.java
pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/PDResources.java
pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDCIDFontType2.java
pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDFontFactory.java
pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDMMType1Font.java
pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDSimpleFont.java
pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDTrueTypeFont.java
pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDTrueTypeFontEmbedder.java
pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDType0Font.java
pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDType1CFont.java
pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDType1Font.java
pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDType3Font.java
pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/util/PDFStreamEngine.java
pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/util/PDFTextStreamEngine.java
pdfbox/trunk/pdfbox/src/test/java/org/apache/pdfbox/pdmodel/font/TestTTFParser.java
pdfbox/trunk/preflight/src/main/java/org/apache/pdfbox/preflight/font/container/TrueTypeContainer.java
Modified: pdfbox/trunk/fontbox/src/main/java/org/apache/fontbox/ttf/CmapSubtable.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/fontbox/src/main/java/org/apache/fontbox/ttf/CmapSubtable.java?rev=1627677&r1=1627676&r2=1627677&view=diff
==============================================================================
--- pdfbox/trunk/fontbox/src/main/java/org/apache/fontbox/ttf/CmapSubtable.java (original)
+++ pdfbox/trunk/fontbox/src/main/java/org/apache/fontbox/ttf/CmapSubtable.java Thu Sep 25 22:34:20 2014
@@ -497,22 +497,6 @@ public class CmapSubtable
}
/**
- * @return Returns the glyphIdToCharacterCode.
- */
- public int[] getGlyphIdToCharacterCode()
- {
- return glyphIdToCharacterCode;
- }
-
- /**
- * @param glyphIdToCharacterCodeValue The glyphIdToCharacterCode to set.
- */
- public void setGlyphIdToCharacterCode(int[] glyphIdToCharacterCodeValue)
- {
- glyphIdToCharacterCode = glyphIdToCharacterCodeValue;
- }
-
- /**
* @return Returns the platformEncodingId.
*/
public int getPlatformEncodingId()
@@ -556,6 +540,21 @@ public class CmapSubtable
return glyphId == null ? 0 : glyphId;
}
+ /**
+ * Returns the character code for the given GID.
+ *
+ * @param gid glyph id
+ * @return character code
+ */
+ public int getCharacterCode(int gid)
+ {
+ if (gid < 0 || gid >= glyphIdToCharacterCode.length)
+ {
+ return 0;
+ }
+ return glyphIdToCharacterCode[gid];
+ }
+
@Override
public String toString()
{
Modified: pdfbox/trunk/fontbox/src/main/java/org/apache/fontbox/ttf/TTFSubsetter.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/fontbox/src/main/java/org/apache/fontbox/ttf/TTFSubsetter.java?rev=1627677&r1=1627676&r2=1627677&view=diff
==============================================================================
--- pdfbox/trunk/fontbox/src/main/java/org/apache/fontbox/ttf/TTFSubsetter.java (original)
+++ pdfbox/trunk/fontbox/src/main/java/org/apache/fontbox/ttf/TTFSubsetter.java Thu Sep 25 22:34:20 2014
@@ -28,7 +28,6 @@ import java.util.GregorianCalendar;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
-import java.util.Locale;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Set;
@@ -37,9 +36,6 @@ import java.util.SortedSet;
import java.util.TreeMap;
import java.util.TreeSet;
-import org.apache.fontbox.encoding.Encoding;
-import org.apache.fontbox.encoding.MacRomanEncoding;
-
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
@@ -959,57 +955,17 @@ public class TTFSubsetter
List<String> additionalNames = new ArrayList<String>();
Map<String,Integer> additionalNamesIndices = new HashMap<String,Integer>();
- if (glyphNames == null)
+ if (glyphNames != null)
{
- Encoding enc = MacRomanEncoding.INSTANCE;
- int[] gidToUC = this.baseCmap.getGlyphIdToCharacterCode();
- for (Integer glyphId : this.glyphIds)
- {
- int uc = gidToUC[glyphId];
- String name = null;
- if (uc < 0x8000)
- {
- try
- {
- name = enc.getNameFromCharacter((char)uc);
- }
- catch (IOException e)
- {
- // TODO
- }
- }
- if (name == null)
- {
- name = String.format(Locale.ENGLISH,"uni%04X",uc);
- }
- Integer macId = WGL4Names.MAC_GLYPH_NAMES_INDICES.get(name);
- if (macId == null)
- {
- Integer idx = additionalNamesIndices.get(name);
- if (idx == null)
- {
- idx = additionalNames.size();
- additionalNames.add(name);
- additionalNamesIndices.put(name,idx);
- }
- writeUint16(dos,idx+258);
- }
- else
- {
- writeUint16(dos, macId);
- }
- }
- }
- else
- {
- for (Integer glyphId : this.glyphIds)
+ for (Integer glyphId : this.glyphIds)
{
String name = glyphNames[glyphId];
+
Integer macId = WGL4Names.MAC_GLYPH_NAMES_INDICES.get(name);
- if (macId == null)
+ if (macId == null)
{
Integer idx = additionalNamesIndices.get(name);
- if (idx == null)
+ if (idx == null)
{
idx = additionalNames.size();
additionalNames.add(name);
@@ -1017,7 +973,7 @@ public class TTFSubsetter
}
writeUint16(dos,idx+258);
}
- else
+ else
{
writeUint16(dos, macId);
}
Modified: pdfbox/trunk/pdfbox/pom.xml
URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/pom.xml?rev=1627677&r1=1627676&r2=1627677&view=diff
==============================================================================
--- pdfbox/trunk/pdfbox/pom.xml (original)
+++ pdfbox/trunk/pdfbox/pom.xml Thu Sep 25 22:34:20 2014
@@ -137,6 +137,8 @@
<excludes>
<exclude>src/main/resources/org/apache/pdfbox/resources/afm/*</exclude>
<exclude>src/main/resources/org/apache/pdfbox/resources/icc/*</exclude>
+ <exclude>src/main/resources/org/apache/pdfbox/resources/glyphlist/glyphlist.txt</exclude>
+ <exclude>src/main/resources/org/apache/pdfbox/resources/glyphlist/zapfdingbats.txt</exclude>
<exclude>src/main/resources/META-INF/services/*</exclude>
<exclude>src/test/resources/input/rendering/*.ai</exclude>
<exclude>src/test/resources/output/*</exclude>
Modified: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/encoding/GlyphList.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/encoding/GlyphList.java?rev=1627677&r1=1627676&r2=1627677&view=diff
==============================================================================
--- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/encoding/GlyphList.java (original)
+++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/encoding/GlyphList.java Thu Sep 25 22:34:20 2014
@@ -16,60 +16,69 @@
*/
package org.apache.pdfbox.encoding;
-import java.net.URL;
+import java.io.BufferedReader;
+import java.io.InputStream;
+import java.io.InputStreamReader;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
-import java.io.File;
import java.io.IOException;
-import java.util.Collections;
-import java.util.Enumeration;
import java.util.HashMap;
import java.util.Map;
-import java.util.MissingResourceException;
-import java.util.Properties;
-import java.util.StringTokenizer;
/**
- * PostScript glyph list, maps glyph names to Unicode characters.
+ * PostScript glyph list, maps glyph names to sequences of Unicode characters.
+ * Instances of GlyphList are immutable.
*/
-public class GlyphList
+public final class GlyphList
{
private static final Log LOG = LogFactory.getLog(GlyphList.class);
- public static final GlyphList DEFAULT;
- public static final GlyphList ZAPF_DINGBATS;
+ private static final GlyphList DEFAULT;
+ private static final GlyphList ZAPF_DINGBATS;
+
+ /**
+ * Returns the Adobe Glyph List (AGL).
+ */
+ public static GlyphList getAdobeGlyphList()
+ {
+ return DEFAULT;
+ }
+
+ /**
+ * Returns the Zapf Dingbats glyph list.
+ */
+ public static GlyphList getZapfDingbats()
+ {
+ return ZAPF_DINGBATS;
+ }
static
{
try
{
- DEFAULT = new GlyphList();
+ ClassLoader loader = GlyphList.class.getClassLoader();
+ String path = "org/apache/pdfbox/resources/glyphlist/";
- // Loads the official glyph List based on adobes glyph list
- DEFAULT.loadGlyphs("org/apache/pdfbox/resources/glyphlist/glyphlist.properties");
+ // Adobe Glyph List (AGL)
+ DEFAULT = new GlyphList(loader.getResourceAsStream(path + "glyphlist.txt"));
- // Loads some additional glyph mappings
- DEFAULT.loadGlyphs("org/apache/pdfbox/resources/glyphlist/additional_glyphlist.properties");
+ // Zapf Dingbats has its own glyph list
+ ZAPF_DINGBATS = new GlyphList(loader.getResourceAsStream(path + "zapfdingbats.txt"));
- // Load an external glyph list file that user can give as JVM property
+ // not supported in PDFBox 2.0, but we issue a warning, see PDFBOX-2379
try
{
String location = System.getProperty("glyphlist_ext");
if (location != null)
{
- // not supported in 2.0, see PDFBOX-2379
- throw new UnsupportedOperationException("glyphlist_ext is no longer supported, " +
- "use GlyphList.DEFAULT.addGlyphs(Properties) instead");
+ throw new UnsupportedOperationException("glyphlist_ext is no longer supported, "
+ + "use GlyphList.DEFAULT.addGlyphs(Properties) instead");
}
}
catch (SecurityException e) // can occur on System.getProperty
{
// PDFBOX-1946 ignore and continue
}
-
- // Zapf Dingbats has its own glyph list
- ZAPF_DINGBATS = new GlyphList();
- ZAPF_DINGBATS.loadGlyphs("org/apache/pdfbox/resources/glyphlist/zapf_dingbats.properties");
}
catch (IOException e)
{
@@ -77,76 +86,111 @@ public class GlyphList
}
}
- private final Map<String, String> nameToUnicode = new HashMap<String, String>();
- private final Map<String, String> unicodeToName = new HashMap<String, String>();
+ private final Map<String, String> nameToUnicode;
+ private final Map<String, String> unicodeToName;
- private GlyphList()
+ /**
+ * Creates a new GlyphList from a glyph list file.
+ *
+ * @param input glyph list in Adobe format
+ * @throws IOException if the glyph list could not be read
+ */
+ public GlyphList(InputStream input) throws IOException
+ {
+ nameToUnicode = new HashMap<String, String>();
+ unicodeToName = new HashMap<String, String>();
+ loadList(input);
+ }
+
+ /**
+ * Creates a new GlyphList from multiple glyph list files.
+ *
+ * @param glyphList an existing glyph list to be copied
+ * @param input glyph list in Adobe format
+ * @throws IOException if the glyph list could not be read
+ */
+ public GlyphList(GlyphList glyphList, InputStream input) throws IOException
{
+ nameToUnicode = new HashMap<String, String>(glyphList.nameToUnicode);
+ unicodeToName = new HashMap<String, String>(glyphList.unicodeToName);
+ loadList(input);
}
- private void loadGlyphs(String resourceName) throws IOException
+ private void loadList(InputStream input) throws IOException
{
- URL url = GlyphList.class.getClassLoader().getResource(resourceName);
- if (url == null)
+ BufferedReader in = new BufferedReader(new InputStreamReader(input));
+ try
{
- throw new MissingResourceException("Glyphlist not found: " + resourceName,
- GlyphList.class.getName(), resourceName);
- }
+ while (in.ready())
+ {
+ String line = in.readLine();
+ if (!line.startsWith("#"))
+ {
+ String[] parts = line.split(";");
+ if (parts.length < 2)
+ {
+ throw new IOException("Invalid glyph list entry: " + line);
+ }
+
+ String name = parts[0];
+ String[] unicodeList = parts[1].split(" ");
- Properties properties = new Properties();
- properties.load(url.openStream());
- addGlyphs(properties);
+ if (nameToUnicode.containsKey(name))
+ {
+ LOG.warn("duplicate value for " + name + " -> " + parts[1] + " " +
+ nameToUnicode.get(name));
+ }
+
+ int[] codePoints = new int[unicodeList.length];
+ int index = 0;
+ for (String hex : unicodeList)
+ {
+ codePoints[index++] = Integer.parseInt(hex, 16);
+ }
+ String string = new String(codePoints, 0 , codePoints.length);
+
+ // forward mapping
+ nameToUnicode.put(name, string);
+
+ // reverse mapping
+ if (!unicodeToName.containsKey(string))
+ {
+ unicodeToName.put(string, name);
+ }
+ }
+ }
+ }
+ finally
+ {
+ in.close();
+ }
}
/**
- * Adds a glyph list stored in a .properties file to this GlyphList.
+ * Returns the name for the given Unicode code point.
*
- * @param properties Glyphlist in the form Name=XXXX where X is Unicode hex.
- * @throws IOException if the properties could not be read
+ * @param codePoint Unicode code point
+ * @return PostScript glyph name, or ".notdef"
*/
- public synchronized void addGlyphs(Properties properties) throws IOException
+ public String codePointToName(int codePoint)
{
- Enumeration<?> names = properties.propertyNames();
- for (Object name : Collections.list(names))
+ String name = unicodeToName.get(new String(new int[] { codePoint }, 0 , 1));
+ if (name == null)
{
- String glyphName = name.toString();
- String unicodeValue = properties.getProperty(glyphName);
- StringTokenizer tokenizer = new StringTokenizer(unicodeValue, " ", false);
- StringBuilder value = new StringBuilder();
- while (tokenizer.hasMoreTokens())
- {
- int characterCode = Integer.parseInt(tokenizer.nextToken(), 16);
- value.append((char) characterCode);
- }
- String unicode = value.toString();
-
- if (nameToUnicode.containsKey(glyphName))
- {
- LOG.warn("duplicate value for " + glyphName + " -> " + unicode + " " +
- nameToUnicode.get(glyphName));
- }
- else
- {
- nameToUnicode.put(glyphName, unicode);
- }
-
- // reverse mapping
- if (!unicodeToName.containsKey(unicode))
- {
- unicodeToName.put(unicode, glyphName);
- }
+ return ".notdef";
}
+ return name;
}
/**
- * This will take a character code and get the name from the code.
+ * Returns the name for a given sequence of Unicode characters.
*
- * @param c Unicode character
+ * @param unicodeSequence sequence of Unicode characters
* @return PostScript glyph name, or ".notdef"
*/
- public String unicodeToName(char c)
+ public String sequenceToName(String unicodeSequence)
{
- String name = unicodeToName.get(Character.toString(c));
+ String name = unicodeToName.get(unicodeSequence);
if (name == null)
{
return ".notdef";
Modified: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/PDResources.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/PDResources.java?rev=1627677&r1=1627676&r2=1627677&view=diff
==============================================================================
--- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/PDResources.java (original)
+++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/PDResources.java Thu Sep 25 22:34:20 2014
@@ -27,6 +27,7 @@ import org.apache.pdfbox.cos.COSBase;
import org.apache.pdfbox.cos.COSDictionary;
import org.apache.pdfbox.cos.COSName;
import org.apache.pdfbox.cos.COSObject;
+import org.apache.pdfbox.encoding.GlyphList;
import org.apache.pdfbox.pdmodel.common.COSDictionaryMap;
import org.apache.pdfbox.pdmodel.common.COSObjectable;
import org.apache.pdfbox.pdmodel.font.PDFont;
@@ -134,11 +135,22 @@ public class PDResources implements COSO
/**
* This will get the map of fonts. This will never return null.
- *
+ *
* @return The map of fonts.
*/
public Map<String, PDFont> getFonts() throws IOException
{
+ return getFonts((GlyphList) null);
+ }
+
+ /**
+ * This will get the map of fonts. This will never return null.
+ *
+ * @param glyphList A custom glyph list for Unicode mapping.
+ * @return The map of fonts.
+ */
+ public Map<String, PDFont> getFonts(GlyphList glyphList) throws IOException
+ {
if (fonts == null)
{
// at least an empty map will be returned
@@ -168,7 +180,7 @@ public class PDResources implements COSO
}
else
{
- PDFont newFont = PDFontFactory.createFont((COSDictionary) font);
+ PDFont newFont = PDFontFactory.createFont((COSDictionary)font, glyphList);
fonts.put(fontName.getName(), newFont);
seenFonts.put((COSDictionary) font, newFont);
}
Modified: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDCIDFontType2.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDCIDFontType2.java?rev=1627677&r1=1627676&r2=1627677&view=diff
==============================================================================
--- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDCIDFontType2.java (original)
+++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDCIDFontType2.java Thu Sep 25 22:34:20 2014
@@ -261,7 +261,7 @@ public class PDCIDFontType2 extends PDCI
}
// map to a Unicode value using the Adobe Glyph List
- unicode = GlyphList.DEFAULT.toUnicode(name);
+ unicode = GlyphList.getAdobeGlyphList().toUnicode(name);
}
else
{
Modified: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDFontFactory.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDFontFactory.java?rev=1627677&r1=1627676&r2=1627677&view=diff
==============================================================================
--- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDFontFactory.java (original)
+++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDFontFactory.java Thu Sep 25 22:34:20 2014
@@ -23,6 +23,7 @@ import org.apache.pdfbox.cos.COSDictiona
import org.apache.pdfbox.cos.COSName;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
+import org.apache.pdfbox.encoding.GlyphList;
/**
* Creates the appropriate font subtype based on information in the dictionary.
@@ -45,6 +46,20 @@ public class PDFontFactory
*/
public static PDFont createFont(COSDictionary dictionary) throws IOException
{
+ return createFont(dictionary, null);
+ }
+
+ /**
+ * Creates a new PDFont instance with the appropriate subclass.
+ *
+ * @param dictionary a font dictionary
+ * @param glyphList the default glyph list to use for Unicode mapping
+ * @return a PDFont instance, based on the SubType entry of the dictionary
+ * @throws IOException
+ */
+ public static PDFont createFont(COSDictionary dictionary,
+ GlyphList glyphList) throws IOException
+ {
COSName type = dictionary.getCOSName(COSName.TYPE, COSName.FONT);
if (!COSName.FONT.equals(type))
{
@@ -59,10 +74,10 @@ public class PDFontFactory
{
if (((COSDictionary)fd).containsKey(COSName.FONT_FILE3))
{
- return new PDType1CFont(dictionary);
+ return new PDType1CFont(dictionary, glyphList);
}
}
- return new PDType1Font(dictionary);
+ return new PDType1Font(dictionary, glyphList);
}
else if (COSName.MM_TYPE1.equals(subType))
{
@@ -71,18 +86,18 @@ public class PDFontFactory
{
if (((COSDictionary)fd).containsKey(COSName.FONT_FILE3))
{
- return new PDType1CFont(dictionary);
+ return new PDType1CFont(dictionary, glyphList);
}
}
- return new PDMMType1Font(dictionary);
+ return new PDMMType1Font(dictionary, glyphList);
}
else if (COSName.TRUE_TYPE.equals(subType))
{
- return new PDTrueTypeFont(dictionary);
+ return new PDTrueTypeFont(dictionary, glyphList);
}
else if (COSName.TYPE3.equals(subType))
{
- return new PDType3Font(dictionary);
+ return new PDType3Font(dictionary, glyphList);
}
else if (COSName.TYPE0.equals(subType))
{
@@ -101,7 +116,7 @@ public class PDFontFactory
// assuming Type 1 font (see PDFBOX-1988) because it seems that Adobe Reader does this
// however, we may need more sophisticated logic perhaps looking at the FontFile
LOG.warn("Invalid font subtype '" + subType + "'");
- return new PDType1Font(dictionary);
+ return new PDType1Font(dictionary, glyphList);
}
}
Modified: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDMMType1Font.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDMMType1Font.java?rev=1627677&r1=1627676&r2=1627677&view=diff
==============================================================================
--- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDMMType1Font.java (original)
+++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDMMType1Font.java Thu Sep 25 22:34:20 2014
@@ -19,6 +19,7 @@ package org.apache.pdfbox.pdmodel.font;
import org.apache.pdfbox.cos.COSDictionary;
import java.io.IOException;
+import org.apache.pdfbox.encoding.GlyphList;
/**
* Type 1 Multiple Master Font.
@@ -31,9 +32,10 @@ public class PDMMType1Font extends PDTyp
* Creates an MMType1Font from a Font dictionary in a PDF.
*
* @param fontDictionary font dictionary
+ * @param glyphList a custom glyph list for Unicode mapping
*/
- public PDMMType1Font(COSDictionary fontDictionary) throws IOException
+ public PDMMType1Font(COSDictionary fontDictionary, GlyphList glyphList) throws IOException
{
- super(fontDictionary);
+ super(fontDictionary, glyphList);
}
}
Modified: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDSimpleFont.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDSimpleFont.java?rev=1627677&r1=1627676&r2=1627677&view=diff
==============================================================================
--- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDSimpleFont.java (original)
+++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDSimpleFont.java Thu Sep 25 22:34:20 2014
@@ -43,6 +43,7 @@ public abstract class PDSimpleFont exten
protected Encoding encoding;
protected GlyphList glyphList;
+ private final GlyphList defaultGlyphList;
private final Set<Integer> noUnicode = new HashSet<Integer>(); // for logging
/**
@@ -51,16 +52,26 @@ public abstract class PDSimpleFont exten
protected PDSimpleFont()
{
super();
+ defaultGlyphList = GlyphList.getAdobeGlyphList();
}
/**
* Constructor.
*
* @param fontDictionary Font dictionary.
+ * @param glyphList a custom glyph list for Unicode mapping
*/
- protected PDSimpleFont(COSDictionary fontDictionary) throws IOException
+ protected PDSimpleFont(COSDictionary fontDictionary, GlyphList glyphList) throws IOException
{
super(fontDictionary);
+ if (glyphList == null)
+ {
+ defaultGlyphList = GlyphList.getAdobeGlyphList();
+ }
+ else
+ {
+ defaultGlyphList = glyphList;
+ }
}
/**
@@ -132,11 +143,11 @@ public abstract class PDSimpleFont exten
// assign the glyph list based on the font
if ("ZapfDingbats".equals(getName()))
{
- glyphList = GlyphList.ZAPF_DINGBATS;
+ glyphList = GlyphList.getZapfDingbats();
}
else
{
- glyphList = GlyphList.DEFAULT;
+ glyphList = defaultGlyphList; // by default this is the AGL, but it can be overridden
}
}
Modified: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDTrueTypeFont.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDTrueTypeFont.java?rev=1627677&r1=1627676&r2=1627677&view=diff
==============================================================================
--- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDTrueTypeFont.java (original)
+++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDTrueTypeFont.java Thu Sep 25 22:34:20 2014
@@ -103,10 +103,11 @@ public class PDTrueTypeFont extends PDSi
* Creates a new TrueType font from a Font dictionary.
*
* @param fontDictionary The font dictionary according to the PDF specification.
+ * @param glyphList A custom glyph list for Unicode mapping
*/
- public PDTrueTypeFont(COSDictionary fontDictionary) throws IOException
+ public PDTrueTypeFont(COSDictionary fontDictionary, GlyphList glyphList) throws IOException
{
- super(fontDictionary);
+ super(fontDictionary, glyphList);
TrueTypeFont ttfFont = null;
if (getFontDescriptor() != null)
@@ -261,7 +262,7 @@ public class PDTrueTypeFont extends PDSi
// (3, 1) - (Windows, Unicode)
if (cmapWinUnicode != null)
{
- String unicode = GlyphList.DEFAULT.toUnicode(name);
+ String unicode = GlyphList.getAdobeGlyphList().toUnicode(name);
if (unicode != null)
{
int uni = unicode.codePointAt(0);
Modified: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDTrueTypeFontEmbedder.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDTrueTypeFontEmbedder.java?rev=1627677&r1=1627676&r2=1627677&view=diff
==============================================================================
--- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDTrueTypeFontEmbedder.java (original)
+++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDTrueTypeFontEmbedder.java Thu Sep 25 22:34:20 2014
@@ -287,7 +287,7 @@ class PDTrueTypeFontEmbedder
// pdf code to unicode by glyph list.
if (!name.equals(".notdef"))
{
- String c = GlyphList.DEFAULT.toUnicode(name);
+ String c = GlyphList.getAdobeGlyphList().toUnicode(name); // todo: we're supposed to use the 'provided font encoding'
int charCode = c.codePointAt(0);
int gid = uniMap.getGlyphId(charCode);
if (gid != 0)
Modified: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDType0Font.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDType0Font.java?rev=1627677&r1=1627676&r2=1627677&view=diff
==============================================================================
--- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDType0Font.java (original)
+++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDType0Font.java Thu Sep 25 22:34:20 2014
@@ -259,7 +259,7 @@ public class PDType0Font extends PDFont
// this nonsymbolic behaviour isn't well documented, test with PDFBOX-1422,
// also see PDCIDFontType2#cidToGID()
String name = StandardEncoding.INSTANCE.getName(code);
- return GlyphList.DEFAULT.toUnicode(name);
+ return GlyphList.getAdobeGlyphList().toUnicode(name);
}
else if (isCMapPredefined && cMapUCS2 != null)
{
Modified: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDType1CFont.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDType1CFont.java?rev=1627677&r1=1627676&r2=1627677&view=diff
==============================================================================
--- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDType1CFont.java (original)
+++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDType1CFont.java Thu Sep 25 22:34:20 2014
@@ -35,6 +35,7 @@ import org.apache.fontbox.util.BoundingB
import org.apache.pdfbox.cos.COSDictionary;
import org.apache.pdfbox.cos.COSName;
import org.apache.pdfbox.encoding.Encoding;
+import org.apache.pdfbox.encoding.GlyphList;
import org.apache.pdfbox.encoding.Type1Encoding;
import org.apache.pdfbox.io.IOUtils;
import org.apache.pdfbox.pdmodel.common.PDRectangle;
@@ -65,11 +66,12 @@ public class PDType1CFont extends PDSimp
* Constructor.
*
* @param fontDictionary the corresponding dictionary
+ * @param glyphList a custom glyph list for Unicode mapping
* @throws IOException it something went wrong
*/
- public PDType1CFont(COSDictionary fontDictionary) throws IOException
+ public PDType1CFont(COSDictionary fontDictionary, GlyphList glyphList) throws IOException
{
- super(fontDictionary);
+ super(fontDictionary, glyphList);
PDFontDescriptor fd = getFontDescriptor();
byte[] bytes = null;
@@ -229,8 +231,8 @@ public class PDType1CFont extends PDSimp
float width = 0;
for (int i = 0; i < string.length(); i++)
{
- String character = string.substring(i, i + 1);
- String name = getGlyphList().unicodeToName(character.charAt(0));
+ int codePoint = string.codePointAt(i);
+ String name = getGlyphList().codePointToName(codePoint);
width += cffFont.getType1CharString(name).getWidth();
}
return width;
Modified: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDType1Font.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDType1Font.java?rev=1627677&r1=1627676&r2=1627677&view=diff
==============================================================================
--- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDType1Font.java (original)
+++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDType1Font.java Thu Sep 25 22:34:20 2014
@@ -34,6 +34,7 @@ import org.apache.pdfbox.cos.COSDictiona
import org.apache.pdfbox.cos.COSName;
import org.apache.pdfbox.cos.COSStream;
import org.apache.pdfbox.encoding.Encoding;
+import org.apache.pdfbox.encoding.GlyphList;
import org.apache.pdfbox.encoding.StandardEncoding;
import org.apache.pdfbox.encoding.Type1Encoding;
import org.apache.pdfbox.encoding.WinAnsiEncoding;
@@ -125,10 +126,12 @@ public class PDType1Font extends PDSimpl
* Creates a Type 1 font from a Font dictionary in a PDF.
*
* @param fontDictionary font dictionary
+ * @param glyphList A custom glyph list for Unicode mapping
*/
- public PDType1Font(COSDictionary fontDictionary) throws IOException
+ public PDType1Font(COSDictionary fontDictionary, GlyphList glyphList) throws IOException
{
- super(fontDictionary);
+ super(fontDictionary, glyphList);
+
PDFontDescriptor fd = getFontDescriptor();
Type1Font t1 = null;
if (fd != null)
Modified: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDType3Font.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDType3Font.java?rev=1627677&r1=1627676&r2=1627677&view=diff
==============================================================================
--- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDType3Font.java (original)
+++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDType3Font.java Thu Sep 25 22:34:20 2014
@@ -27,6 +27,7 @@ import org.apache.pdfbox.cos.COSDictiona
import org.apache.pdfbox.cos.COSName;
import org.apache.pdfbox.cos.COSStream;
import org.apache.pdfbox.encoding.Encoding;
+import org.apache.pdfbox.encoding.GlyphList;
import org.apache.pdfbox.pdmodel.PDResources;
import org.apache.pdfbox.pdmodel.common.PDRectangle;
import org.apache.pdfbox.util.Matrix;
@@ -49,10 +50,11 @@ public class PDType3Font extends PDSimpl
* Constructor.
*
* @param fontDictionary The font dictionary according to the PDF specification.
+ * @param glyphList a custom glyph list for Unicode mapping
*/
- public PDType3Font(COSDictionary fontDictionary) throws IOException
+ public PDType3Font(COSDictionary fontDictionary, GlyphList glyphList) throws IOException
{
- super(fontDictionary);
+ super(fontDictionary, glyphList);
readEncoding();
}
Modified: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/util/PDFStreamEngine.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/util/PDFStreamEngine.java?rev=1627677&r1=1627676&r2=1627677&view=diff
==============================================================================
--- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/util/PDFStreamEngine.java (original)
+++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/util/PDFStreamEngine.java Thu Sep 25 22:34:20 2014
@@ -37,6 +37,7 @@ import org.apache.pdfbox.cos.COSNumber;
import org.apache.pdfbox.cos.COSObject;
import org.apache.pdfbox.cos.COSStream;
import org.apache.pdfbox.cos.COSString;
+import org.apache.pdfbox.encoding.GlyphList;
import org.apache.pdfbox.pdfparser.PDFStreamParser;
import org.apache.pdfbox.pdmodel.PDResources;
import org.apache.pdfbox.pdmodel.common.PDRectangle;
@@ -332,7 +333,7 @@ public class PDFStreamEngine
* @param tx x-translation
* @param ty y-translation
*/
- protected void applyTextAdjustment(float tx, float ty)
+ protected void applyTextAdjustment(float tx, float ty) throws IOException
{
// update the text matrix
textMatrix.concatenate(Matrix.getTranslatingInstance(tx, ty));
@@ -514,7 +515,16 @@ public class PDFStreamEngine
return Collections.emptyMap();
}
- return streamResourcesStack.peek().getFonts();
+ return streamResourcesStack.peek().getFonts(getGlyphList());
+ }
+
+ /**
+ * Returns the glyph list for Unicode mapping, the default is the Adobe Glyph List.
+ * @throws IOException if the glyph list could not be loaded
+ */
+ protected GlyphList getGlyphList() throws IOException
+ {
+ return GlyphList.getAdobeGlyphList();
}
/**
Modified: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/util/PDFTextStreamEngine.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/util/PDFTextStreamEngine.java?rev=1627677&r1=1627676&r2=1627677&view=diff
==============================================================================
--- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/util/PDFTextStreamEngine.java (original)
+++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/util/PDFTextStreamEngine.java Thu Sep 25 22:34:20 2014
@@ -16,9 +16,11 @@
*/
package org.apache.pdfbox.util;
+import java.io.InputStream;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.pdfbox.cos.COSStream;
+import org.apache.pdfbox.encoding.GlyphList;
import org.apache.pdfbox.pdmodel.PDResources;
import org.apache.pdfbox.pdmodel.common.PDRectangle;
import org.apache.pdfbox.pdmodel.font.PDFont;
@@ -64,6 +66,7 @@ public class PDFTextStreamEngine extends
private int pageRotation;
private PDRectangle pageSize;
+ private GlyphList glyphList;
/**
* Constructor.
@@ -234,4 +237,17 @@ public class PDFTextStreamEngine extends
{
// subclasses can override to provide specific functionality
}
+
+ @Override
+ protected GlyphList getGlyphList() throws IOException
+ {
+ if (glyphList == null)
+ {
+ // load additional glyph list for Unicode mapping
+ String path = "org/apache/pdfbox/resources/glyphlist/additional.txt";
+ InputStream input = GlyphList.class.getClassLoader().getResourceAsStream(path);
+ glyphList = new GlyphList(GlyphList.getAdobeGlyphList(), input);
+ }
+ return glyphList;
+ }
}
Modified: pdfbox/trunk/pdfbox/src/test/java/org/apache/pdfbox/pdmodel/font/TestTTFParser.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/test/java/org/apache/pdfbox/pdmodel/font/TestTTFParser.java?rev=1627677&r1=1627676&r2=1627677&view=diff
==============================================================================
--- pdfbox/trunk/pdfbox/src/test/java/org/apache/pdfbox/pdmodel/font/TestTTFParser.java (original)
+++ pdfbox/trunk/pdfbox/src/test/java/org/apache/pdfbox/pdmodel/font/TestTTFParser.java Thu Sep 25 22:34:20 2014
@@ -52,25 +52,25 @@ public class TestTTFParser
TrueTypeFont arial = parser.parse(arialIs);
- CmapTable cmap = arial.getCmap();
- Assert.assertNotNull(cmap);
+ CmapTable cmapTable = arial.getCmap();
+ Assert.assertNotNull(cmapTable);
- CmapSubtable[] cmaps = cmap.getCmaps();
+ CmapSubtable[] cmaps = cmapTable.getCmaps();
Assert.assertNotNull(cmaps);
- CmapSubtable uc = null;
+ CmapSubtable cmap = null;
for (CmapSubtable e : cmaps)
{
if (e.getPlatformId() == NameRecord.PLATFORM_WINDOWS
&& e.getPlatformEncodingId() == NameRecord.ENCODING_WINDOWS_UNICODE_BMP)
{
- uc = e;
+ cmap = e;
break;
}
}
- Assert.assertNotNull(uc);
+ Assert.assertNotNull(cmap);
PostScriptTable post = arial.getPostScript();
Assert.assertNotNull(post);
@@ -78,49 +78,12 @@ public class TestTTFParser
String[] glyphNames = arial.getPostScript().getGlyphNames();
Assert.assertNotNull(glyphNames);
- Encoding enc = new WinAnsiEncoding();
-
- int[] charCodes = uc.getGlyphIdToCharacterCode();
- Assert.assertNotNull(charCodes);
-
- for (int gid = 0; gid < charCodes.length; ++gid)
- {
- int charCode = charCodes[gid];
- String name = glyphNames[gid];
- if (charCode < 0x8000 && charCode >= 32)
- {
- if ("space".equals(name) || "slash".equals(name) || "bracketleft".equals(name)
- || "bracketright".equals(name) || "braceleft".equals(name) || "braceright".equals(name)
- || "product".equals(name) || "integral".equals(name) || "Omega".equals(name)
- || "radical".equals(name) || "tilde".equals(name))
- {
- Assert.assertTrue(GlyphList.DEFAULT.unicodeToName((char) charCode).startsWith(name));
- }
- else if ("bar".equals(name))
- {
- Assert.assertTrue(GlyphList.DEFAULT.unicodeToName((char) charCode).endsWith(name));
- }
- else if ("sfthyphen".equals(name))
- {
- Assert.assertEquals("softhyphen", GlyphList.DEFAULT.unicodeToName((char) charCode));
- }
- else if ("periodcentered".equals(name) && !GlyphList.DEFAULT.unicodeToName((char) charCode).equals(name))
- {
- Assert.assertEquals("bulletoperator", GlyphList.DEFAULT.unicodeToName((char) charCode));
- }
- else if ("fraction".equals(name))
- {
- Assert.assertEquals("divisionslash", GlyphList.DEFAULT.unicodeToName((char) charCode));
- }
- else if ("mu".equals(name))
- {
- Assert.assertEquals("mu1", GlyphList.DEFAULT.unicodeToName((char) charCode));
- }
- else if ("pi".equals(name))
- {
- Assert.assertEquals(0x03c0, charCode);
- }
- }
- }
+ // test a WGL4 (Macintosh standard) name
+ int gid = cmap.getGlyphId(0x2122); // TRADE MARK SIGN
+ Assert.assertEquals("trademark", glyphNames[gid]);
+
+ // test an additional name
+ gid = cmap.getGlyphId(0x20AC); // EURO SIGN
+ Assert.assertEquals("Euro", glyphNames[gid]);
}
}
Modified: pdfbox/trunk/preflight/src/main/java/org/apache/pdfbox/preflight/font/container/TrueTypeContainer.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/preflight/src/main/java/org/apache/pdfbox/preflight/font/container/TrueTypeContainer.java?rev=1627677&r1=1627676&r2=1627677&view=diff
==============================================================================
--- pdfbox/trunk/preflight/src/main/java/org/apache/pdfbox/preflight/font/container/TrueTypeContainer.java (original)
+++ pdfbox/trunk/preflight/src/main/java/org/apache/pdfbox/preflight/font/container/TrueTypeContainer.java Thu Sep 25 22:34:20 2014
@@ -163,7 +163,7 @@ public class TrueTypeContainer extends F
{
Encoding fontEncoding = this.trueTypeFont.getEncoding();
String name = fontEncoding.getName(cid);
- String character = GlyphList.DEFAULT.toUnicode(name);
+ String character = GlyphList.getAdobeGlyphList().toUnicode(name);
if (character == null)
{
return notFoundGlyphID;