You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pdfbox.apache.org by ja...@apache.org on 2014/09/26 00:34:21 UTC

svn commit: r1627677 - in /pdfbox/trunk: fontbox/src/main/java/org/apache/fontbox/ttf/ pdfbox/ pdfbox/src/main/java/org/apache/pdfbox/encoding/ pdfbox/src/main/java/org/apache/pdfbox/pdmodel/ pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/ pdfbox/...

Author: jahewson
Date: Thu Sep 25 22:34:20 2014
New Revision: 1627677

URL: http://svn.apache.org/r1627677
Log:
PDFBOX-2380: Refactor glyph list loading and lookup

Removed:
    pdfbox/trunk/pdfbox/src/main/resources/org/apache/pdfbox/resources/glyphlist/additional_glyphlist.properties
    pdfbox/trunk/pdfbox/src/main/resources/org/apache/pdfbox/resources/glyphlist/glyphlist.properties
    pdfbox/trunk/pdfbox/src/main/resources/org/apache/pdfbox/resources/glyphlist/zapf_dingbats.properties
Modified:
    pdfbox/trunk/fontbox/src/main/java/org/apache/fontbox/ttf/CmapSubtable.java
    pdfbox/trunk/fontbox/src/main/java/org/apache/fontbox/ttf/TTFSubsetter.java
    pdfbox/trunk/pdfbox/pom.xml
    pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/encoding/GlyphList.java
    pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/PDResources.java
    pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDCIDFontType2.java
    pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDFontFactory.java
    pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDMMType1Font.java
    pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDSimpleFont.java
    pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDTrueTypeFont.java
    pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDTrueTypeFontEmbedder.java
    pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDType0Font.java
    pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDType1CFont.java
    pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDType1Font.java
    pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDType3Font.java
    pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/util/PDFStreamEngine.java
    pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/util/PDFTextStreamEngine.java
    pdfbox/trunk/pdfbox/src/test/java/org/apache/pdfbox/pdmodel/font/TestTTFParser.java
    pdfbox/trunk/preflight/src/main/java/org/apache/pdfbox/preflight/font/container/TrueTypeContainer.java

Modified: pdfbox/trunk/fontbox/src/main/java/org/apache/fontbox/ttf/CmapSubtable.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/fontbox/src/main/java/org/apache/fontbox/ttf/CmapSubtable.java?rev=1627677&r1=1627676&r2=1627677&view=diff
==============================================================================
--- pdfbox/trunk/fontbox/src/main/java/org/apache/fontbox/ttf/CmapSubtable.java (original)
+++ pdfbox/trunk/fontbox/src/main/java/org/apache/fontbox/ttf/CmapSubtable.java Thu Sep 25 22:34:20 2014
@@ -497,22 +497,6 @@ public class CmapSubtable
     }
 
     /**
-     * @return Returns the glyphIdToCharacterCode.
-     */
-    public int[] getGlyphIdToCharacterCode()
-    {
-        return glyphIdToCharacterCode;
-    }
-
-    /**
-     * @param glyphIdToCharacterCodeValue The glyphIdToCharacterCode to set.
-     */
-    public void setGlyphIdToCharacterCode(int[] glyphIdToCharacterCodeValue)
-    {
-        glyphIdToCharacterCode = glyphIdToCharacterCodeValue;
-    }
-
-    /**
      * @return Returns the platformEncodingId.
      */
     public int getPlatformEncodingId()
@@ -556,6 +540,21 @@ public class CmapSubtable
         return glyphId == null ? 0 : glyphId;
     }
 
+    /**
+     * Returns the character code for the given GID.
+     *
+     * @param gid glyph id
+     * @return character code
+     */
+    public int getCharacterCode(int gid)
+    {
+        if (gid < 0 || gid >= glyphIdToCharacterCode.length)
+        {
+            return 0;
+        }
+        return glyphIdToCharacterCode[gid];
+    }
+
     @Override
     public String toString()
     {

Modified: pdfbox/trunk/fontbox/src/main/java/org/apache/fontbox/ttf/TTFSubsetter.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/fontbox/src/main/java/org/apache/fontbox/ttf/TTFSubsetter.java?rev=1627677&r1=1627676&r2=1627677&view=diff
==============================================================================
--- pdfbox/trunk/fontbox/src/main/java/org/apache/fontbox/ttf/TTFSubsetter.java (original)
+++ pdfbox/trunk/fontbox/src/main/java/org/apache/fontbox/ttf/TTFSubsetter.java Thu Sep 25 22:34:20 2014
@@ -28,7 +28,6 @@ import java.util.GregorianCalendar;
 import java.util.HashMap;
 import java.util.Iterator;
 import java.util.List;
-import java.util.Locale;
 import java.util.Map;
 import java.util.Map.Entry;
 import java.util.Set;
@@ -37,9 +36,6 @@ import java.util.SortedSet;
 import java.util.TreeMap;
 import java.util.TreeSet;
 
-import org.apache.fontbox.encoding.Encoding;
-import org.apache.fontbox.encoding.MacRomanEncoding;
-
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 
@@ -959,57 +955,17 @@ public class TTFSubsetter
         List<String> additionalNames = new ArrayList<String>();
         Map<String,Integer> additionalNamesIndices = new HashMap<String,Integer>();
         
-        if (glyphNames == null) 
+        if (glyphNames != null)
         {
-            Encoding enc = MacRomanEncoding.INSTANCE;
-            int[] gidToUC = this.baseCmap.getGlyphIdToCharacterCode();
-            for (Integer glyphId : this.glyphIds) 
-            {
-                int uc = gidToUC[glyphId];
-                String name = null;
-                if (uc < 0x8000) 
-                {
-                    try 
-                    {
-                        name = enc.getNameFromCharacter((char)uc);
-                    }
-                    catch (IOException e) 
-                    {
-                        // TODO
-                    }
-                }
-                if (name == null) 
-                {
-                    name = String.format(Locale.ENGLISH,"uni%04X",uc);
-                }
-                Integer macId = WGL4Names.MAC_GLYPH_NAMES_INDICES.get(name);
-                if (macId == null) 
-                {
-                    Integer idx = additionalNamesIndices.get(name);
-                    if (idx == null) 
-                    {
-                        idx = additionalNames.size();
-                        additionalNames.add(name);
-                        additionalNamesIndices.put(name,idx);
-                    }
-                    writeUint16(dos,idx+258);
-                }
-                else 
-                {
-                    writeUint16(dos, macId);
-                }
-            }
-        }
-        else 
-        { 
-            for (Integer glyphId : this.glyphIds) 
+            for (Integer glyphId : this.glyphIds)
             {
                 String name = glyphNames[glyphId];
+
                 Integer macId = WGL4Names.MAC_GLYPH_NAMES_INDICES.get(name);
-                if (macId == null) 
+                if (macId == null)
                 {
                     Integer idx = additionalNamesIndices.get(name);
-                    if (idx == null) 
+                    if (idx == null)
                     {
                         idx = additionalNames.size();
                         additionalNames.add(name);
@@ -1017,7 +973,7 @@ public class TTFSubsetter
                     }
                     writeUint16(dos,idx+258);
                 }
-                else 
+                else
                 {
                     writeUint16(dos, macId);
                 }

Modified: pdfbox/trunk/pdfbox/pom.xml
URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/pom.xml?rev=1627677&r1=1627676&r2=1627677&view=diff
==============================================================================
--- pdfbox/trunk/pdfbox/pom.xml (original)
+++ pdfbox/trunk/pdfbox/pom.xml Thu Sep 25 22:34:20 2014
@@ -137,6 +137,8 @@
                     <excludes>
                         <exclude>src/main/resources/org/apache/pdfbox/resources/afm/*</exclude>
                         <exclude>src/main/resources/org/apache/pdfbox/resources/icc/*</exclude>
+                        <exclude>src/main/resources/org/apache/pdfbox/resources/glyphlist/glyphlist.txt</exclude>
+                        <exclude>src/main/resources/org/apache/pdfbox/resources/glyphlist/zapfdingbats.txt</exclude>
                         <exclude>src/main/resources/META-INF/services/*</exclude>
                         <exclude>src/test/resources/input/rendering/*.ai</exclude>
                         <exclude>src/test/resources/output/*</exclude>

Modified: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/encoding/GlyphList.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/encoding/GlyphList.java?rev=1627677&r1=1627676&r2=1627677&view=diff
==============================================================================
--- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/encoding/GlyphList.java (original)
+++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/encoding/GlyphList.java Thu Sep 25 22:34:20 2014
@@ -16,60 +16,69 @@
  */
 package org.apache.pdfbox.encoding;
 
-import java.net.URL;
+import java.io.BufferedReader;
+import java.io.InputStream;
+import java.io.InputStreamReader;
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 
-import java.io.File;
 import java.io.IOException;
-import java.util.Collections;
-import java.util.Enumeration;
 import java.util.HashMap;
 import java.util.Map;
-import java.util.MissingResourceException;
-import java.util.Properties;
-import java.util.StringTokenizer;
 
 /**
- * PostScript glyph list, maps glyph names to Unicode characters.
+ * PostScript glyph list, maps glyph names to sequences of Unicode characters.
+ * Instances of GlyphList are immutable.
  */
-public class GlyphList
+public final class GlyphList
 {
     private static final Log LOG = LogFactory.getLog(GlyphList.class);
-    public static final GlyphList DEFAULT;
-    public static final GlyphList ZAPF_DINGBATS;
+    private static final GlyphList DEFAULT;
+    private static final GlyphList ZAPF_DINGBATS;
+
+    /**
+     * Returns the Adobe Glyph List (AGL).
+     */
+    public static GlyphList getAdobeGlyphList()
+    {
+        return DEFAULT;
+    }
+
+    /**
+     * Returns the Zapf Dingbats glyph list.
+     */
+    public static GlyphList getZapfDingbats()
+    {
+        return ZAPF_DINGBATS;
+    }
 
     static
     {
         try
         {
-            DEFAULT = new GlyphList();
+            ClassLoader loader = GlyphList.class.getClassLoader();
+            String path = "org/apache/pdfbox/resources/glyphlist/";
 
-            // Loads the official glyph List based on adobes glyph list
-            DEFAULT.loadGlyphs("org/apache/pdfbox/resources/glyphlist/glyphlist.properties");
+            // Adobe Glyph List (AGL)
+            DEFAULT = new GlyphList(loader.getResourceAsStream(path + "glyphlist.txt"));
 
-            // Loads some additional glyph mappings
-            DEFAULT.loadGlyphs("org/apache/pdfbox/resources/glyphlist/additional_glyphlist.properties");
+            // Zapf Dingbats has its own glyph list
+            ZAPF_DINGBATS = new GlyphList(loader.getResourceAsStream(path + "zapfdingbats.txt"));
 
-            // Load an external glyph list file that user can give as JVM property
+            // not supported in PDFBox 2.0, but we issue a warning, see PDFBOX-2379
             try
             {
                 String location = System.getProperty("glyphlist_ext");
                 if (location != null)
                 {
-                    // not supported in 2.0, see PDFBOX-2379
-                    throw new UnsupportedOperationException("glyphlist_ext is no longer supported, " +
-                      "use GlyphList.DEFAULT.addGlyphs(Properties) instead");
+                    throw new UnsupportedOperationException("glyphlist_ext is no longer supported, "
+                            + "use GlyphList.DEFAULT.addGlyphs(Properties) instead");
                 }
             }
             catch (SecurityException e)  // can occur on System.getProperty
             {
                 // PDFBOX-1946 ignore and continue
             }
-
-            // Zapf Dingbats has its own glyph list
-            ZAPF_DINGBATS = new GlyphList();
-            ZAPF_DINGBATS.loadGlyphs("org/apache/pdfbox/resources/glyphlist/zapf_dingbats.properties");
         }
         catch (IOException e)
         {
@@ -77,76 +86,111 @@ public class GlyphList
         }
     }
 
-    private final Map<String, String> nameToUnicode = new HashMap<String, String>();
-    private final Map<String, String> unicodeToName = new HashMap<String, String>();
+    private final Map<String, String> nameToUnicode;
+    private final Map<String, String> unicodeToName;
 
-    private GlyphList()
+    /**
+     * Creates a new GlyphList from a glyph list file.
+     *
+     * @param input glyph list in Adobe format
+     * @throws IOException if the glyph list could not be read
+     */
+    public GlyphList(InputStream input) throws IOException
+    {
+        nameToUnicode = new HashMap<String, String>();
+        unicodeToName = new HashMap<String, String>();
+        loadList(input);
+    }
+
+    /**
+     * Creates a new GlyphList from multiple glyph list files.
+     *
+     * @param glyphList an existing glyph list to be copied
+     * @param input glyph list in Adobe format
+     * @throws IOException if the glyph list could not be read
+     */
+    public GlyphList(GlyphList glyphList, InputStream input) throws IOException
     {
+        nameToUnicode = new HashMap<String, String>(glyphList.nameToUnicode);
+        unicodeToName = new HashMap<String, String>(glyphList.unicodeToName);
+        loadList(input);
     }
 
-    private void loadGlyphs(String resourceName) throws IOException
+    private void loadList(InputStream input) throws IOException
     {
-        URL url = GlyphList.class.getClassLoader().getResource(resourceName);
-        if (url == null)
+        BufferedReader in = new BufferedReader(new InputStreamReader(input));
+        try
         {
-            throw new MissingResourceException("Glyphlist not found: " + resourceName,
-                    GlyphList.class.getName(), resourceName);
-        }
+            while (in.ready())
+            {
+                String line = in.readLine();
+                if (!line.startsWith("#"))
+                {
+                    String[] parts = line.split(";");
+                    if (parts.length < 2)
+                    {
+                        throw new IOException("Invalid glyph list entry: " + line);
+                    }
+
+                    String name = parts[0];
+                    String[] unicodeList = parts[1].split(" ");
 
-        Properties properties = new Properties();
-        properties.load(url.openStream());
-        addGlyphs(properties);
+                    if (nameToUnicode.containsKey(name))
+                    {
+                        LOG.warn("duplicate value for " + name + " -> " + parts[1] + " " +
+                                 nameToUnicode.get(name));
+                    }
+
+                    int[] codePoints = new int[unicodeList.length];
+                    int index = 0;
+                    for (String hex : unicodeList)
+                    {
+                        codePoints[index++] = Integer.parseInt(hex, 16);
+                    }
+                    String string = new String(codePoints, 0 , codePoints.length);
+
+                    // forward mapping
+                    nameToUnicode.put(name, string);
+
+                    // reverse mapping
+                    if (!unicodeToName.containsKey(string))
+                    {
+                        unicodeToName.put(string, name);
+                    }
+                }
+            }
+        }
+        finally
+        {
+            in.close();
+        }
     }
 
     /**
-     * Adds a glyph list stored in a .properties file to this GlyphList.
+     * Returns the name for the given Unicode code point.
      *
-     * @param properties Glyphlist in the form Name=XXXX where X is Unicode hex.
-     * @throws IOException if the properties could not be read
+     * @param codePoint Unicode code point
+     * @return PostScript glyph name, or ".notdef"
      */
-    public synchronized void addGlyphs(Properties properties) throws IOException
+    public String codePointToName(int codePoint)
     {
-        Enumeration<?> names = properties.propertyNames();
-        for (Object name : Collections.list(names))
+        String name = unicodeToName.get(new String(new int[] { codePoint }, 0 , 1));
+        if (name == null)
         {
-            String glyphName = name.toString();
-            String unicodeValue = properties.getProperty(glyphName);
-            StringTokenizer tokenizer = new StringTokenizer(unicodeValue, " ", false);
-            StringBuilder value = new StringBuilder();
-            while (tokenizer.hasMoreTokens())
-            {
-                int characterCode = Integer.parseInt(tokenizer.nextToken(), 16);
-                value.append((char) characterCode);
-            }
-            String unicode = value.toString();
-
-            if (nameToUnicode.containsKey(glyphName))
-            {
-                LOG.warn("duplicate value for " + glyphName + " -> " + unicode + " " +
-                        nameToUnicode.get(glyphName));
-            }
-            else
-            {
-                nameToUnicode.put(glyphName, unicode);
-            }
-
-            // reverse mapping
-            if (!unicodeToName.containsKey(unicode))
-            {
-                unicodeToName.put(unicode, glyphName);
-            }
+            return ".notdef";
         }
+        return name;
     }
 
     /**
-     * This will take a character code and get the name from the code.
+     * Returns the name for a given sequence of Unicode characters.
      *
-     * @param c Unicode character
+     * @param unicodeSequence sequence of Unicode characters
      * @return PostScript glyph name, or ".notdef"
      */
-    public String unicodeToName(char c)
+    public String sequenceToName(String unicodeSequence)
     {
-        String name = unicodeToName.get(Character.toString(c));
+        String name = unicodeToName.get(unicodeSequence);
         if (name == null)
         {
             return ".notdef";

Modified: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/PDResources.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/PDResources.java?rev=1627677&r1=1627676&r2=1627677&view=diff
==============================================================================
--- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/PDResources.java (original)
+++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/PDResources.java Thu Sep 25 22:34:20 2014
@@ -27,6 +27,7 @@ import org.apache.pdfbox.cos.COSBase;
 import org.apache.pdfbox.cos.COSDictionary;
 import org.apache.pdfbox.cos.COSName;
 import org.apache.pdfbox.cos.COSObject;
+import org.apache.pdfbox.encoding.GlyphList;
 import org.apache.pdfbox.pdmodel.common.COSDictionaryMap;
 import org.apache.pdfbox.pdmodel.common.COSObjectable;
 import org.apache.pdfbox.pdmodel.font.PDFont;
@@ -134,11 +135,22 @@ public class PDResources implements COSO
 
     /**
      * This will get the map of fonts. This will never return null.
-     * 
+     *
      * @return The map of fonts.
      */
     public Map<String, PDFont> getFonts() throws IOException
     {
+        return getFonts((GlyphList) null);
+    }
+
+    /**
+     * This will get the map of fonts. This will never return null.
+     *
+     * @param glyphList A custom glyph list for Unicode mapping.
+     * @return The map of fonts.
+     */
+    public Map<String, PDFont> getFonts(GlyphList glyphList) throws IOException
+    {
         if (fonts == null)
         {
             // at least an empty map will be returned
@@ -168,7 +180,7 @@ public class PDResources implements COSO
                         }
                         else
                         {
-                            PDFont newFont = PDFontFactory.createFont((COSDictionary) font);
+                            PDFont newFont = PDFontFactory.createFont((COSDictionary)font, glyphList);
                             fonts.put(fontName.getName(), newFont);
                             seenFonts.put((COSDictionary) font, newFont);
                         }

Modified: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDCIDFontType2.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDCIDFontType2.java?rev=1627677&r1=1627676&r2=1627677&view=diff
==============================================================================
--- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDCIDFontType2.java (original)
+++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDCIDFontType2.java Thu Sep 25 22:34:20 2014
@@ -261,7 +261,7 @@ public class PDCIDFontType2 extends PDCI
                 }
 
                 // map to a Unicode value using the Adobe Glyph List
-                unicode = GlyphList.DEFAULT.toUnicode(name);
+                unicode = GlyphList.getAdobeGlyphList().toUnicode(name);
             }
             else
             {

Modified: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDFontFactory.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDFontFactory.java?rev=1627677&r1=1627676&r2=1627677&view=diff
==============================================================================
--- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDFontFactory.java (original)
+++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDFontFactory.java Thu Sep 25 22:34:20 2014
@@ -23,6 +23,7 @@ import org.apache.pdfbox.cos.COSDictiona
 import org.apache.pdfbox.cos.COSName;
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
+import org.apache.pdfbox.encoding.GlyphList;
 
 /**
  * Creates the appropriate font subtype based on information in the dictionary.
@@ -45,6 +46,20 @@ public class PDFontFactory
      */
     public static PDFont createFont(COSDictionary dictionary) throws IOException
     {
+        return createFont(dictionary, null);
+    }
+
+    /**
+     * Creates a new PDFont instance with the appropriate subclass.
+     *
+     * @param dictionary a font dictionary
+     * @param glyphList the default glyph list to use for Unicode mapping
+     * @return a PDFont instance, based on the SubType entry of the dictionary
+     * @throws IOException
+     */
+    public static PDFont createFont(COSDictionary dictionary,
+                                    GlyphList glyphList) throws IOException
+    {
         COSName type = dictionary.getCOSName(COSName.TYPE, COSName.FONT);
         if (!COSName.FONT.equals(type))
         {
@@ -59,10 +74,10 @@ public class PDFontFactory
             {
                 if (((COSDictionary)fd).containsKey(COSName.FONT_FILE3))
                 {
-                    return new PDType1CFont(dictionary);
+                    return new PDType1CFont(dictionary, glyphList);
                 }
             }
-            return new PDType1Font(dictionary);
+            return new PDType1Font(dictionary, glyphList);
         }
         else if (COSName.MM_TYPE1.equals(subType))
         {
@@ -71,18 +86,18 @@ public class PDFontFactory
             {
                 if (((COSDictionary)fd).containsKey(COSName.FONT_FILE3))
                 {
-                    return new PDType1CFont(dictionary);
+                    return new PDType1CFont(dictionary, glyphList);
                 }
             }
-            return new PDMMType1Font(dictionary);
+            return new PDMMType1Font(dictionary, glyphList);
         }
         else if (COSName.TRUE_TYPE.equals(subType))
         {
-            return new PDTrueTypeFont(dictionary);
+            return new PDTrueTypeFont(dictionary, glyphList);
         }
         else if (COSName.TYPE3.equals(subType))
         {
-            return new PDType3Font(dictionary);
+            return new PDType3Font(dictionary, glyphList);
         }
         else if (COSName.TYPE0.equals(subType))
         {
@@ -101,7 +116,7 @@ public class PDFontFactory
             // assuming Type 1 font (see PDFBOX-1988) because it seems that Adobe Reader does this
             // however, we may need more sophisticated logic perhaps looking at the FontFile
             LOG.warn("Invalid font subtype '" + subType + "'");
-            return new PDType1Font(dictionary);
+            return new PDType1Font(dictionary, glyphList);
         }
     }
 

Modified: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDMMType1Font.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDMMType1Font.java?rev=1627677&r1=1627676&r2=1627677&view=diff
==============================================================================
--- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDMMType1Font.java (original)
+++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDMMType1Font.java Thu Sep 25 22:34:20 2014
@@ -19,6 +19,7 @@ package org.apache.pdfbox.pdmodel.font;
 import org.apache.pdfbox.cos.COSDictionary;
 
 import java.io.IOException;
+import org.apache.pdfbox.encoding.GlyphList;
 
 /**
  * Type 1 Multiple Master Font.
@@ -31,9 +32,10 @@ public class PDMMType1Font extends PDTyp
      * Creates an MMType1Font from a Font dictionary in a PDF.
      *
      * @param fontDictionary font dictionary
+     * @param glyphList a custom glyph list for Unicode mapping
      */
-    public PDMMType1Font(COSDictionary fontDictionary) throws IOException
+    public PDMMType1Font(COSDictionary fontDictionary, GlyphList glyphList) throws IOException
     {
-        super(fontDictionary);
+        super(fontDictionary, glyphList);
     }
 }

Modified: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDSimpleFont.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDSimpleFont.java?rev=1627677&r1=1627676&r2=1627677&view=diff
==============================================================================
--- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDSimpleFont.java (original)
+++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDSimpleFont.java Thu Sep 25 22:34:20 2014
@@ -43,6 +43,7 @@ public abstract class PDSimpleFont exten
 
     protected Encoding encoding;
     protected GlyphList glyphList;
+    private final GlyphList defaultGlyphList;
     private final Set<Integer> noUnicode = new HashSet<Integer>(); // for logging
 
     /**
@@ -51,16 +52,26 @@ public abstract class PDSimpleFont exten
     protected PDSimpleFont()
     {
         super();
+        defaultGlyphList = GlyphList.getAdobeGlyphList();
     }
 
     /**
      * Constructor.
      *
      * @param fontDictionary Font dictionary.
+     * @param glyphList a custom glyph list for Unicode mapping
      */
-    protected PDSimpleFont(COSDictionary fontDictionary) throws IOException
+    protected PDSimpleFont(COSDictionary fontDictionary, GlyphList glyphList) throws IOException
     {
         super(fontDictionary);
+        if (glyphList == null)
+        {
+            defaultGlyphList = GlyphList.getAdobeGlyphList();
+        }
+        else
+        {
+            defaultGlyphList = glyphList;
+        }
     }
 
     /**
@@ -132,11 +143,11 @@ public abstract class PDSimpleFont exten
         // assign the glyph list based on the font
         if ("ZapfDingbats".equals(getName()))
         {
-            glyphList = GlyphList.ZAPF_DINGBATS;
+            glyphList = GlyphList.getZapfDingbats();
         }
         else
         {
-            glyphList = GlyphList.DEFAULT;
+            glyphList = defaultGlyphList; // by default this is the AGL, but it can be overridden
         }
     }
 

Modified: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDTrueTypeFont.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDTrueTypeFont.java?rev=1627677&r1=1627676&r2=1627677&view=diff
==============================================================================
--- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDTrueTypeFont.java (original)
+++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDTrueTypeFont.java Thu Sep 25 22:34:20 2014
@@ -103,10 +103,11 @@ public class PDTrueTypeFont extends PDSi
      * Creates a new TrueType font from a Font dictionary.
      *
      * @param fontDictionary The font dictionary according to the PDF specification.
+     * @param glyphList A custom glyph list for Unicode mapping
      */
-    public PDTrueTypeFont(COSDictionary fontDictionary) throws IOException
+    public PDTrueTypeFont(COSDictionary fontDictionary, GlyphList glyphList) throws IOException
     {
-        super(fontDictionary);
+        super(fontDictionary, glyphList);
 
         TrueTypeFont ttfFont = null;
         if (getFontDescriptor() != null)
@@ -261,7 +262,7 @@ public class PDTrueTypeFont extends PDSi
                 // (3, 1) - (Windows, Unicode)
                 if (cmapWinUnicode != null)
                 {
-                    String unicode = GlyphList.DEFAULT.toUnicode(name);
+                    String unicode = GlyphList.getAdobeGlyphList().toUnicode(name);
                     if (unicode != null)
                     {
                         int uni = unicode.codePointAt(0);

Modified: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDTrueTypeFontEmbedder.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDTrueTypeFontEmbedder.java?rev=1627677&r1=1627676&r2=1627677&view=diff
==============================================================================
--- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDTrueTypeFontEmbedder.java (original)
+++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDTrueTypeFontEmbedder.java Thu Sep 25 22:34:20 2014
@@ -287,7 +287,7 @@ class PDTrueTypeFontEmbedder
             // pdf code to unicode by glyph list.
             if (!name.equals(".notdef"))
             {
-                String c = GlyphList.DEFAULT.toUnicode(name);
+                String c = GlyphList.getAdobeGlyphList().toUnicode(name); // todo: we're supposed to use the 'provided font encoding'
                 int charCode = c.codePointAt(0);
                 int gid = uniMap.getGlyphId(charCode);
                 if (gid != 0)

Modified: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDType0Font.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDType0Font.java?rev=1627677&r1=1627676&r2=1627677&view=diff
==============================================================================
--- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDType0Font.java (original)
+++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDType0Font.java Thu Sep 25 22:34:20 2014
@@ -259,7 +259,7 @@ public class PDType0Font extends PDFont
             // this nonsymbolic behaviour isn't well documented, test with PDFBOX-1422,
             // also see PDCIDFontType2#cidToGID()
             String name = StandardEncoding.INSTANCE.getName(code);
-            return GlyphList.DEFAULT.toUnicode(name);
+            return GlyphList.getAdobeGlyphList().toUnicode(name);
         }
         else if (isCMapPredefined && cMapUCS2 != null)
         {

Modified: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDType1CFont.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDType1CFont.java?rev=1627677&r1=1627676&r2=1627677&view=diff
==============================================================================
--- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDType1CFont.java (original)
+++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDType1CFont.java Thu Sep 25 22:34:20 2014
@@ -35,6 +35,7 @@ import org.apache.fontbox.util.BoundingB
 import org.apache.pdfbox.cos.COSDictionary;
 import org.apache.pdfbox.cos.COSName;
 import org.apache.pdfbox.encoding.Encoding;
+import org.apache.pdfbox.encoding.GlyphList;
 import org.apache.pdfbox.encoding.Type1Encoding;
 import org.apache.pdfbox.io.IOUtils;
 import org.apache.pdfbox.pdmodel.common.PDRectangle;
@@ -65,11 +66,12 @@ public class PDType1CFont extends PDSimp
      * Constructor.
      * 
      * @param fontDictionary the corresponding dictionary
+     * @param glyphList a custom glyph list for Unicode mapping
      * @throws IOException it something went wrong
      */
-    public PDType1CFont(COSDictionary fontDictionary) throws IOException
+    public PDType1CFont(COSDictionary fontDictionary, GlyphList glyphList) throws IOException
     {
-        super(fontDictionary);
+        super(fontDictionary, glyphList);
 
         PDFontDescriptor fd = getFontDescriptor();
         byte[] bytes = null;
@@ -229,8 +231,8 @@ public class PDType1CFont extends PDSimp
         float width = 0;
         for (int i = 0; i < string.length(); i++)
         {
-            String character = string.substring(i, i + 1);
-            String name = getGlyphList().unicodeToName(character.charAt(0));
+            int codePoint = string.codePointAt(i);
+            String name = getGlyphList().codePointToName(codePoint);
             width += cffFont.getType1CharString(name).getWidth();
         }
         return width;

Modified: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDType1Font.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDType1Font.java?rev=1627677&r1=1627676&r2=1627677&view=diff
==============================================================================
--- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDType1Font.java (original)
+++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDType1Font.java Thu Sep 25 22:34:20 2014
@@ -34,6 +34,7 @@ import org.apache.pdfbox.cos.COSDictiona
 import org.apache.pdfbox.cos.COSName;
 import org.apache.pdfbox.cos.COSStream;
 import org.apache.pdfbox.encoding.Encoding;
+import org.apache.pdfbox.encoding.GlyphList;
 import org.apache.pdfbox.encoding.StandardEncoding;
 import org.apache.pdfbox.encoding.Type1Encoding;
 import org.apache.pdfbox.encoding.WinAnsiEncoding;
@@ -125,10 +126,12 @@ public class PDType1Font extends PDSimpl
      * Creates a Type 1 font from a Font dictionary in a PDF.
      * 
      * @param fontDictionary font dictionary
+     * @param glyphList A custom glyph list for Unicode mapping
      */
-    public PDType1Font(COSDictionary fontDictionary) throws IOException
+    public PDType1Font(COSDictionary fontDictionary, GlyphList glyphList) throws IOException
     {
-        super(fontDictionary);
+        super(fontDictionary, glyphList);
+
         PDFontDescriptor fd = getFontDescriptor();
         Type1Font t1 = null;
         if (fd != null)

Modified: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDType3Font.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDType3Font.java?rev=1627677&r1=1627676&r2=1627677&view=diff
==============================================================================
--- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDType3Font.java (original)
+++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDType3Font.java Thu Sep 25 22:34:20 2014
@@ -27,6 +27,7 @@ import org.apache.pdfbox.cos.COSDictiona
 import org.apache.pdfbox.cos.COSName;
 import org.apache.pdfbox.cos.COSStream;
 import org.apache.pdfbox.encoding.Encoding;
+import org.apache.pdfbox.encoding.GlyphList;
 import org.apache.pdfbox.pdmodel.PDResources;
 import org.apache.pdfbox.pdmodel.common.PDRectangle;
 import org.apache.pdfbox.util.Matrix;
@@ -49,10 +50,11 @@ public class PDType3Font extends PDSimpl
      * Constructor.
      *
      * @param fontDictionary The font dictionary according to the PDF specification.
+     * @param glyphList a custom glyph list for Unicode mapping
      */
-    public PDType3Font(COSDictionary fontDictionary) throws IOException
+    public PDType3Font(COSDictionary fontDictionary, GlyphList glyphList) throws IOException
     {
-        super(fontDictionary);
+        super(fontDictionary, glyphList);
         readEncoding();
     }
 

Modified: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/util/PDFStreamEngine.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/util/PDFStreamEngine.java?rev=1627677&r1=1627676&r2=1627677&view=diff
==============================================================================
--- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/util/PDFStreamEngine.java (original)
+++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/util/PDFStreamEngine.java Thu Sep 25 22:34:20 2014
@@ -37,6 +37,7 @@ import org.apache.pdfbox.cos.COSNumber;
 import org.apache.pdfbox.cos.COSObject;
 import org.apache.pdfbox.cos.COSStream;
 import org.apache.pdfbox.cos.COSString;
+import org.apache.pdfbox.encoding.GlyphList;
 import org.apache.pdfbox.pdfparser.PDFStreamParser;
 import org.apache.pdfbox.pdmodel.PDResources;
 import org.apache.pdfbox.pdmodel.common.PDRectangle;
@@ -332,7 +333,7 @@ public class PDFStreamEngine
      * @param tx x-translation
      * @param ty y-translation
      */
-    protected void applyTextAdjustment(float tx, float ty)
+    protected void applyTextAdjustment(float tx, float ty) throws IOException
     {
         // update the text matrix
         textMatrix.concatenate(Matrix.getTranslatingInstance(tx, ty));
@@ -514,7 +515,16 @@ public class PDFStreamEngine
             return Collections.emptyMap();
         }
 
-        return streamResourcesStack.peek().getFonts();
+        return streamResourcesStack.peek().getFonts(getGlyphList());
+    }
+
+    /**
+     * Returns the glyph list for Unicode mapping, the default is the Adobe Glyph List.
+     * @throws IOException if the glyph list could not be loaded
+     */
+    protected GlyphList getGlyphList() throws IOException
+    {
+        return GlyphList.getAdobeGlyphList();
     }
 
     /**

Modified: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/util/PDFTextStreamEngine.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/util/PDFTextStreamEngine.java?rev=1627677&r1=1627676&r2=1627677&view=diff
==============================================================================
--- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/util/PDFTextStreamEngine.java (original)
+++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/util/PDFTextStreamEngine.java Thu Sep 25 22:34:20 2014
@@ -16,9 +16,11 @@
  */
 package org.apache.pdfbox.util;
 
+import java.io.InputStream;
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.pdfbox.cos.COSStream;
+import org.apache.pdfbox.encoding.GlyphList;
 import org.apache.pdfbox.pdmodel.PDResources;
 import org.apache.pdfbox.pdmodel.common.PDRectangle;
 import org.apache.pdfbox.pdmodel.font.PDFont;
@@ -64,6 +66,7 @@ public class PDFTextStreamEngine extends
 
     private int pageRotation;
     private PDRectangle pageSize;
+    private GlyphList glyphList;
 
     /**
      * Constructor.
@@ -234,4 +237,17 @@ public class PDFTextStreamEngine extends
     {
         // subclasses can override to provide specific functionality
     }
+
+    @Override
+    protected GlyphList getGlyphList() throws IOException
+    {
+        if (glyphList == null)
+        {
+            // load additional glyph list for Unicode mapping
+            String path = "org/apache/pdfbox/resources/glyphlist/additional.txt";
+            InputStream input = GlyphList.class.getClassLoader().getResourceAsStream(path);
+            glyphList = new GlyphList(GlyphList.getAdobeGlyphList(), input);
+        }
+        return glyphList;
+    }
 }

Modified: pdfbox/trunk/pdfbox/src/test/java/org/apache/pdfbox/pdmodel/font/TestTTFParser.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/test/java/org/apache/pdfbox/pdmodel/font/TestTTFParser.java?rev=1627677&r1=1627676&r2=1627677&view=diff
==============================================================================
--- pdfbox/trunk/pdfbox/src/test/java/org/apache/pdfbox/pdmodel/font/TestTTFParser.java (original)
+++ pdfbox/trunk/pdfbox/src/test/java/org/apache/pdfbox/pdmodel/font/TestTTFParser.java Thu Sep 25 22:34:20 2014
@@ -52,25 +52,25 @@ public class TestTTFParser
 
         TrueTypeFont arial = parser.parse(arialIs);
 
-        CmapTable cmap = arial.getCmap();
-        Assert.assertNotNull(cmap);
+        CmapTable cmapTable = arial.getCmap();
+        Assert.assertNotNull(cmapTable);
 
-        CmapSubtable[] cmaps = cmap.getCmaps();
+        CmapSubtable[] cmaps = cmapTable.getCmaps();
         Assert.assertNotNull(cmaps);
 
-        CmapSubtable uc = null;
+        CmapSubtable cmap = null;
 
         for (CmapSubtable e : cmaps)
         {
             if (e.getPlatformId() == NameRecord.PLATFORM_WINDOWS
                     && e.getPlatformEncodingId() == NameRecord.ENCODING_WINDOWS_UNICODE_BMP)
             {
-                uc = e;
+                cmap = e;
                 break;
             }
         }
 
-        Assert.assertNotNull(uc);
+        Assert.assertNotNull(cmap);
 
         PostScriptTable post = arial.getPostScript();
         Assert.assertNotNull(post);
@@ -78,49 +78,12 @@ public class TestTTFParser
         String[] glyphNames = arial.getPostScript().getGlyphNames();
         Assert.assertNotNull(glyphNames);
 
-        Encoding enc = new WinAnsiEncoding();
-
-        int[] charCodes = uc.getGlyphIdToCharacterCode();
-        Assert.assertNotNull(charCodes);
-
-        for (int gid = 0; gid < charCodes.length; ++gid)
-        {
-            int charCode = charCodes[gid];
-            String name = glyphNames[gid];
-            if (charCode < 0x8000 && charCode >= 32)
-            {
-                if ("space".equals(name) || "slash".equals(name) || "bracketleft".equals(name)
-                        || "bracketright".equals(name) || "braceleft".equals(name) || "braceright".equals(name)
-                        || "product".equals(name) || "integral".equals(name) || "Omega".equals(name)
-                        || "radical".equals(name) || "tilde".equals(name))
-                {
-                    Assert.assertTrue(GlyphList.DEFAULT.unicodeToName((char) charCode).startsWith(name));
-                }
-                else if ("bar".equals(name))
-                {
-                    Assert.assertTrue(GlyphList.DEFAULT.unicodeToName((char) charCode).endsWith(name));
-                }
-                else if ("sfthyphen".equals(name))
-                {
-                    Assert.assertEquals("softhyphen", GlyphList.DEFAULT.unicodeToName((char) charCode));
-                }
-                else if ("periodcentered".equals(name) && !GlyphList.DEFAULT.unicodeToName((char) charCode).equals(name))
-                {
-                    Assert.assertEquals("bulletoperator", GlyphList.DEFAULT.unicodeToName((char) charCode));
-                }
-                else if ("fraction".equals(name))
-                {
-                    Assert.assertEquals("divisionslash", GlyphList.DEFAULT.unicodeToName((char) charCode));
-                }
-                else if ("mu".equals(name))
-                {
-                    Assert.assertEquals("mu1", GlyphList.DEFAULT.unicodeToName((char) charCode));
-                }
-                else if ("pi".equals(name))
-                {
-                    Assert.assertEquals(0x03c0, charCode);
-                }
-            }
-        }
+        // test a WGL4 (Macintosh standard) name
+        int gid = cmap.getGlyphId(0x2122); // TRADE MARK SIGN
+        Assert.assertEquals("trademark", glyphNames[gid]);
+
+        // test an additional name
+        gid = cmap.getGlyphId(0x20AC); // EURO SIGN
+        Assert.assertEquals("Euro", glyphNames[gid]);
     }
 }

Modified: pdfbox/trunk/preflight/src/main/java/org/apache/pdfbox/preflight/font/container/TrueTypeContainer.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/preflight/src/main/java/org/apache/pdfbox/preflight/font/container/TrueTypeContainer.java?rev=1627677&r1=1627676&r2=1627677&view=diff
==============================================================================
--- pdfbox/trunk/preflight/src/main/java/org/apache/pdfbox/preflight/font/container/TrueTypeContainer.java (original)
+++ pdfbox/trunk/preflight/src/main/java/org/apache/pdfbox/preflight/font/container/TrueTypeContainer.java Thu Sep 25 22:34:20 2014
@@ -163,7 +163,7 @@ public class TrueTypeContainer extends F
         {
             Encoding fontEncoding = this.trueTypeFont.getEncoding();
             String name = fontEncoding.getName(cid);
-            String character = GlyphList.DEFAULT.toUnicode(name);
+            String character = GlyphList.getAdobeGlyphList().toUnicode(name);
             if (character == null)
             {
                 return notFoundGlyphID;