You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pdfbox.apache.org by ja...@apache.org on 2014/09/26 04:25:22 UTC

svn commit: r1627702 - in /pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox: pdmodel/ pdmodel/font/ util/

Author: jahewson
Date: Fri Sep 26 02:25:22 2014
New Revision: 1627702

URL: http://svn.apache.org/r1627702
Log:
PDFBOX-2380: Simplify custom GlyphList use for toUnicode

Modified:
    pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/PDResources.java
    pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDFont.java
    pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDFontFactory.java
    pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDMMType1Font.java
    pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDSimpleFont.java
    pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDTrueTypeFont.java
    pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDType1CFont.java
    pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDType1Font.java
    pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDType3Font.java
    pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/util/PDFMarkedContentExtractor.java
    pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/util/PDFStreamEngine.java
    pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/util/PDFTextStreamEngine.java

Modified: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/PDResources.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/PDResources.java?rev=1627702&r1=1627701&r2=1627702&view=diff
==============================================================================
--- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/PDResources.java (original)
+++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/PDResources.java Fri Sep 26 02:25:22 2014
@@ -140,17 +140,6 @@ public class PDResources implements COSO
      */
     public Map<String, PDFont> getFonts() throws IOException
     {
-        return getFonts((GlyphList) null);
-    }
-
-    /**
-     * This will get the map of fonts. This will never return null.
-     *
-     * @param glyphList A custom glyph list for Unicode mapping.
-     * @return The map of fonts.
-     */
-    public Map<String, PDFont> getFonts(GlyphList glyphList) throws IOException
-    {
         if (fonts == null)
         {
             // at least an empty map will be returned
@@ -180,7 +169,7 @@ public class PDResources implements COSO
                         }
                         else
                         {
-                            PDFont newFont = PDFontFactory.createFont((COSDictionary)font, glyphList);
+                            PDFont newFont = PDFontFactory.createFont((COSDictionary)font);
                             fonts.put(fontName.getName(), newFont);
                             seenFonts.put((COSDictionary) font, newFont);
                         }

Modified: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDFont.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDFont.java?rev=1627702&r1=1627701&r2=1627702&view=diff
==============================================================================
--- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDFont.java (original)
+++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDFont.java Fri Sep 26 02:25:22 2014
@@ -32,6 +32,7 @@ import org.apache.pdfbox.cos.COSDictiona
 import org.apache.pdfbox.cos.COSName;
 import org.apache.pdfbox.cos.COSNumber;
 import org.apache.pdfbox.cos.COSStream;
+import org.apache.pdfbox.encoding.GlyphList;
 import org.apache.pdfbox.io.IOUtils;
 import org.apache.pdfbox.pdmodel.common.COSArrayList;
 import org.apache.pdfbox.pdmodel.common.COSObjectable;
@@ -314,6 +315,18 @@ public abstract class PDFont implements 
      * Returns the Unicode character sequence which corresponds to the given character code.
      *
      * @param code character code
+     * @param customGlyphList a custom glyph list to use instead of the Adobe Glyph List
+     * @return Unicode character(s)
+     */
+    public String toUnicode(int code, GlyphList customGlyphList) throws IOException
+    {
+        return toUnicode(code);
+    }
+
+    /**
+     * Returns the Unicode character sequence which corresponds to the given character code.
+     *
+     * @param code character code
      * @return Unicode character(s)
      */
     public String toUnicode(int code) throws IOException

Modified: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDFontFactory.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDFontFactory.java?rev=1627702&r1=1627701&r2=1627702&view=diff
==============================================================================
--- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDFontFactory.java (original)
+++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDFontFactory.java Fri Sep 26 02:25:22 2014
@@ -46,20 +46,6 @@ public class PDFontFactory
      */
     public static PDFont createFont(COSDictionary dictionary) throws IOException
     {
-        return createFont(dictionary, null);
-    }
-
-    /**
-     * Creates a new PDFont instance with the appropriate subclass.
-     *
-     * @param dictionary a font dictionary
-     * @param glyphList the default glyph list to use for Unicode mapping
-     * @return a PDFont instance, based on the SubType entry of the dictionary
-     * @throws IOException
-     */
-    public static PDFont createFont(COSDictionary dictionary,
-                                    GlyphList glyphList) throws IOException
-    {
         COSName type = dictionary.getCOSName(COSName.TYPE, COSName.FONT);
         if (!COSName.FONT.equals(type))
         {
@@ -74,10 +60,10 @@ public class PDFontFactory
             {
                 if (((COSDictionary)fd).containsKey(COSName.FONT_FILE3))
                 {
-                    return new PDType1CFont(dictionary, glyphList);
+                    return new PDType1CFont(dictionary);
                 }
             }
-            return new PDType1Font(dictionary, glyphList);
+            return new PDType1Font(dictionary);
         }
         else if (COSName.MM_TYPE1.equals(subType))
         {
@@ -86,18 +72,18 @@ public class PDFontFactory
             {
                 if (((COSDictionary)fd).containsKey(COSName.FONT_FILE3))
                 {
-                    return new PDType1CFont(dictionary, glyphList);
+                    return new PDType1CFont(dictionary);
                 }
             }
-            return new PDMMType1Font(dictionary, glyphList);
+            return new PDMMType1Font(dictionary);
         }
         else if (COSName.TRUE_TYPE.equals(subType))
         {
-            return new PDTrueTypeFont(dictionary, glyphList);
+            return new PDTrueTypeFont(dictionary);
         }
         else if (COSName.TYPE3.equals(subType))
         {
-            return new PDType3Font(dictionary, glyphList);
+            return new PDType3Font(dictionary);
         }
         else if (COSName.TYPE0.equals(subType))
         {
@@ -116,7 +102,7 @@ public class PDFontFactory
             // assuming Type 1 font (see PDFBOX-1988) because it seems that Adobe Reader does this
             // however, we may need more sophisticated logic perhaps looking at the FontFile
             LOG.warn("Invalid font subtype '" + subType + "'");
-            return new PDType1Font(dictionary, glyphList);
+            return new PDType1Font(dictionary);
         }
     }
 

Modified: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDMMType1Font.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDMMType1Font.java?rev=1627702&r1=1627701&r2=1627702&view=diff
==============================================================================
--- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDMMType1Font.java (original)
+++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDMMType1Font.java Fri Sep 26 02:25:22 2014
@@ -32,10 +32,9 @@ public class PDMMType1Font extends PDTyp
      * Creates an MMType1Font from a Font dictionary in a PDF.
      *
      * @param fontDictionary font dictionary
-     * @param glyphList a custom glyph list for Unicode mapping
      */
-    public PDMMType1Font(COSDictionary fontDictionary, GlyphList glyphList) throws IOException
+    public PDMMType1Font(COSDictionary fontDictionary) throws IOException
     {
-        super(fontDictionary, glyphList);
+        super(fontDictionary);
     }
 }

Modified: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDSimpleFont.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDSimpleFont.java?rev=1627702&r1=1627701&r2=1627702&view=diff
==============================================================================
--- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDSimpleFont.java (original)
+++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDSimpleFont.java Fri Sep 26 02:25:22 2014
@@ -43,7 +43,6 @@ public abstract class PDSimpleFont exten
 
     protected Encoding encoding;
     protected GlyphList glyphList;
-    private final GlyphList defaultGlyphList;
     private final Set<Integer> noUnicode = new HashSet<Integer>(); // for logging
 
     /**
@@ -52,26 +51,16 @@ public abstract class PDSimpleFont exten
     protected PDSimpleFont()
     {
         super();
-        defaultGlyphList = GlyphList.getAdobeGlyphList();
     }
 
     /**
      * Constructor.
      *
      * @param fontDictionary Font dictionary.
-     * @param glyphList a custom glyph list for Unicode mapping
      */
-    protected PDSimpleFont(COSDictionary fontDictionary, GlyphList glyphList) throws IOException
+    protected PDSimpleFont(COSDictionary fontDictionary) throws IOException
     {
         super(fontDictionary);
-        if (glyphList == null)
-        {
-            defaultGlyphList = GlyphList.getAdobeGlyphList();
-        }
-        else
-        {
-            defaultGlyphList = glyphList;
-        }
     }
 
     /**
@@ -147,7 +136,7 @@ public abstract class PDSimpleFont exten
         }
         else
         {
-            glyphList = defaultGlyphList; // by default this is the AGL, but it can be overridden
+            glyphList = GlyphList.getAdobeGlyphList();
         }
     }
 
@@ -235,6 +224,24 @@ public abstract class PDSimpleFont exten
     @Override
     public String toUnicode(int code) throws IOException
     {
+        return toUnicode(code, GlyphList.getAdobeGlyphList());
+    }
+
+    @Override
+    public String toUnicode(int code, GlyphList customGlyphList) throws IOException
+    {
+        // allow the glyph list to be overridden for the purpose of extracting Unicode
+        // we only do this when the font's glyph list is the AGL, to avoid breaking Zapf Dingbats
+        GlyphList unicodeGlyphList;
+        if (this.glyphList == GlyphList.getAdobeGlyphList())
+        {
+            unicodeGlyphList = customGlyphList;
+        }
+        else
+        {
+            unicodeGlyphList = this.glyphList;
+        }
+
         // first try to use a ToUnicode CMap
         String unicode = super.toUnicode(code);
         if (unicode != null)
@@ -252,7 +259,7 @@ public abstract class PDSimpleFont exten
         if (encoding != null)
         {
             name = encoding.getName(code);
-            unicode = glyphList.toUnicode(name);
+            unicode = unicodeGlyphList.toUnicode(name);
             if (unicode != null)
             {
                 return unicode;

Modified: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDTrueTypeFont.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDTrueTypeFont.java?rev=1627702&r1=1627701&r2=1627702&view=diff
==============================================================================
--- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDTrueTypeFont.java (original)
+++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDTrueTypeFont.java Fri Sep 26 02:25:22 2014
@@ -103,11 +103,10 @@ public class PDTrueTypeFont extends PDSi
      * Creates a new TrueType font from a Font dictionary.
      *
      * @param fontDictionary The font dictionary according to the PDF specification.
-     * @param glyphList A custom glyph list for Unicode mapping
      */
-    public PDTrueTypeFont(COSDictionary fontDictionary, GlyphList glyphList) throws IOException
+    public PDTrueTypeFont(COSDictionary fontDictionary) throws IOException
     {
-        super(fontDictionary, glyphList);
+        super(fontDictionary);
 
         TrueTypeFont ttfFont = null;
         if (getFontDescriptor() != null)

Modified: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDType1CFont.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDType1CFont.java?rev=1627702&r1=1627701&r2=1627702&view=diff
==============================================================================
--- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDType1CFont.java (original)
+++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDType1CFont.java Fri Sep 26 02:25:22 2014
@@ -66,12 +66,11 @@ public class PDType1CFont extends PDSimp
      * Constructor.
      * 
      * @param fontDictionary the corresponding dictionary
-     * @param glyphList a custom glyph list for Unicode mapping
      * @throws IOException it something went wrong
      */
-    public PDType1CFont(COSDictionary fontDictionary, GlyphList glyphList) throws IOException
+    public PDType1CFont(COSDictionary fontDictionary) throws IOException
     {
-        super(fontDictionary, glyphList);
+        super(fontDictionary);
 
         PDFontDescriptor fd = getFontDescriptor();
         byte[] bytes = null;

Modified: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDType1Font.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDType1Font.java?rev=1627702&r1=1627701&r2=1627702&view=diff
==============================================================================
--- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDType1Font.java (original)
+++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDType1Font.java Fri Sep 26 02:25:22 2014
@@ -126,11 +126,10 @@ public class PDType1Font extends PDSimpl
      * Creates a Type 1 font from a Font dictionary in a PDF.
      * 
      * @param fontDictionary font dictionary
-     * @param glyphList A custom glyph list for Unicode mapping
      */
-    public PDType1Font(COSDictionary fontDictionary, GlyphList glyphList) throws IOException
+    public PDType1Font(COSDictionary fontDictionary) throws IOException
     {
-        super(fontDictionary, glyphList);
+        super(fontDictionary);
 
         PDFontDescriptor fd = getFontDescriptor();
         Type1Font t1 = null;

Modified: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDType3Font.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDType3Font.java?rev=1627702&r1=1627701&r2=1627702&view=diff
==============================================================================
--- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDType3Font.java (original)
+++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDType3Font.java Fri Sep 26 02:25:22 2014
@@ -50,11 +50,10 @@ public class PDType3Font extends PDSimpl
      * Constructor.
      *
      * @param fontDictionary The font dictionary according to the PDF specification.
-     * @param glyphList a custom glyph list for Unicode mapping
      */
-    public PDType3Font(COSDictionary fontDictionary, GlyphList glyphList) throws IOException
+    public PDType3Font(COSDictionary fontDictionary) throws IOException
     {
-        super(fontDictionary, glyphList);
+        super(fontDictionary);
         readEncoding();
     }
 

Modified: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/util/PDFMarkedContentExtractor.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/util/PDFMarkedContentExtractor.java?rev=1627702&r1=1627701&r2=1627702&view=diff
==============================================================================
--- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/util/PDFMarkedContentExtractor.java (original)
+++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/util/PDFMarkedContentExtractor.java Fri Sep 26 02:25:22 2014
@@ -16,6 +16,7 @@
  */
 package org.apache.pdfbox.util;
 
+import java.io.IOException;
 import java.util.ArrayList;
 import java.util.HashMap;
 import java.util.List;
@@ -59,7 +60,7 @@ public class PDFMarkedContentExtractor e
      * Instantiate a new PDFTextStripper object. Will not do anything special to convert
      * the text to a more encoding-specific output.
      */
-    public PDFMarkedContentExtractor()
+    public PDFMarkedContentExtractor() throws IOException
     {
         this(null);
     }
@@ -69,7 +70,7 @@ public class PDFMarkedContentExtractor e
      *
      * @param encoding The encoding that the output will be written in.
      */
-    public PDFMarkedContentExtractor(String encoding)
+    public PDFMarkedContentExtractor(String encoding) throws IOException
     {
         addOperator(new BeginMarkedContentSequenceWithProperties());
         addOperator(new BeginMarkedContentSequence());

Modified: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/util/PDFStreamEngine.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/util/PDFStreamEngine.java?rev=1627702&r1=1627701&r2=1627702&view=diff
==============================================================================
--- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/util/PDFStreamEngine.java (original)
+++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/util/PDFStreamEngine.java Fri Sep 26 02:25:22 2014
@@ -515,16 +515,7 @@ public class PDFStreamEngine
             return Collections.emptyMap();
         }
 
-        return streamResourcesStack.peek().getFonts(getGlyphList());
-    }
-
-    /**
-     * Returns the glyph list for Unicode mapping, the default is the Adobe Glyph List.
-     * @throws IOException if the glyph list could not be loaded
-     */
-    protected GlyphList getGlyphList() throws IOException
-    {
-        return GlyphList.getAdobeGlyphList();
+        return streamResourcesStack.peek().getFonts();
     }
 
     /**

Modified: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/util/PDFTextStreamEngine.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/util/PDFTextStreamEngine.java?rev=1627702&r1=1627701&r2=1627702&view=diff
==============================================================================
--- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/util/PDFTextStreamEngine.java (original)
+++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/util/PDFTextStreamEngine.java Fri Sep 26 02:25:22 2014
@@ -71,7 +71,7 @@ public class PDFTextStreamEngine extends
     /**
      * Constructor.
      */
-    public PDFTextStreamEngine()
+    public PDFTextStreamEngine() throws IOException
     {
         addOperator(new BeginText());
         addOperator(new Concatenate());
@@ -95,6 +95,11 @@ public class PDFTextStreamEngine extends
         addOperator(new SetTextHorizontalScaling());
         addOperator(new ShowTextLine());
         addOperator(new ShowTextLineAndSpace());
+
+        // load additional glyph list for Unicode mapping
+        String path = "org/apache/pdfbox/resources/glyphlist/additional.txt";
+        InputStream input = GlyphList.class.getClassLoader().getResourceAsStream(path);
+        glyphList = new GlyphList(GlyphList.getAdobeGlyphList(), input);
     }
 
     /**
@@ -202,6 +207,9 @@ public class PDFTextStreamEngine extends
         float spaceWidthDisplay = spaceWidthText * fontSizeText * horizontalScalingText *
                 textRenderingMatrix.getXScale()  * ctm.getXScale();
 
+        // use our additional glyph list for Unicode mapping
+        unicode = font.toUnicode(code, glyphList);
+
         // when there is no Unicode mapping available, Acrobat simply coerces the character code
         // into Unicode, so we do the same. Subclasses of PDFStreamEngine don't necessarily want
         // this, which is why we leave it until this point in PDFTextStreamEngine.
@@ -237,17 +245,4 @@ public class PDFTextStreamEngine extends
     {
         // subclasses can override to provide specific functionality
     }
-
-    @Override
-    protected GlyphList getGlyphList() throws IOException
-    {
-        if (glyphList == null)
-        {
-            // load additional glyph list for Unicode mapping
-            String path = "org/apache/pdfbox/resources/glyphlist/additional.txt";
-            InputStream input = GlyphList.class.getClassLoader().getResourceAsStream(path);
-            glyphList = new GlyphList(GlyphList.getAdobeGlyphList(), input);
-        }
-        return glyphList;
-    }
 }