You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pdfbox.apache.org by ja...@apache.org on 2014/09/26 04:25:22 UTC
svn commit: r1627702 - in
/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox: pdmodel/
pdmodel/font/ util/
Author: jahewson
Date: Fri Sep 26 02:25:22 2014
New Revision: 1627702
URL: http://svn.apache.org/r1627702
Log:
PDFBOX-2380: Simplify custom GlyphList use for toUnicode
Modified:
pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/PDResources.java
pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDFont.java
pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDFontFactory.java
pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDMMType1Font.java
pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDSimpleFont.java
pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDTrueTypeFont.java
pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDType1CFont.java
pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDType1Font.java
pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDType3Font.java
pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/util/PDFMarkedContentExtractor.java
pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/util/PDFStreamEngine.java
pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/util/PDFTextStreamEngine.java
Modified: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/PDResources.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/PDResources.java?rev=1627702&r1=1627701&r2=1627702&view=diff
==============================================================================
--- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/PDResources.java (original)
+++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/PDResources.java Fri Sep 26 02:25:22 2014
@@ -140,17 +140,6 @@ public class PDResources implements COSO
*/
public Map<String, PDFont> getFonts() throws IOException
{
- return getFonts((GlyphList) null);
- }
-
- /**
- * This will get the map of fonts. This will never return null.
- *
- * @param glyphList A custom glyph list for Unicode mapping.
- * @return The map of fonts.
- */
- public Map<String, PDFont> getFonts(GlyphList glyphList) throws IOException
- {
if (fonts == null)
{
// at least an empty map will be returned
@@ -180,7 +169,7 @@ public class PDResources implements COSO
}
else
{
- PDFont newFont = PDFontFactory.createFont((COSDictionary)font, glyphList);
+ PDFont newFont = PDFontFactory.createFont((COSDictionary)font);
fonts.put(fontName.getName(), newFont);
seenFonts.put((COSDictionary) font, newFont);
}
Modified: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDFont.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDFont.java?rev=1627702&r1=1627701&r2=1627702&view=diff
==============================================================================
--- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDFont.java (original)
+++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDFont.java Fri Sep 26 02:25:22 2014
@@ -32,6 +32,7 @@ import org.apache.pdfbox.cos.COSDictiona
import org.apache.pdfbox.cos.COSName;
import org.apache.pdfbox.cos.COSNumber;
import org.apache.pdfbox.cos.COSStream;
+import org.apache.pdfbox.encoding.GlyphList;
import org.apache.pdfbox.io.IOUtils;
import org.apache.pdfbox.pdmodel.common.COSArrayList;
import org.apache.pdfbox.pdmodel.common.COSObjectable;
@@ -314,6 +315,18 @@ public abstract class PDFont implements
* Returns the Unicode character sequence which corresponds to the given character code.
*
* @param code character code
+ * @param customGlyphList a custom glyph list to use instead of the Adobe Glyph List
+ * @return Unicode character(s)
+ */
+ public String toUnicode(int code, GlyphList customGlyphList) throws IOException
+ {
+ return toUnicode(code);
+ }
+
+ /**
+ * Returns the Unicode character sequence which corresponds to the given character code.
+ *
+ * @param code character code
* @return Unicode character(s)
*/
public String toUnicode(int code) throws IOException
Modified: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDFontFactory.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDFontFactory.java?rev=1627702&r1=1627701&r2=1627702&view=diff
==============================================================================
--- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDFontFactory.java (original)
+++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDFontFactory.java Fri Sep 26 02:25:22 2014
@@ -46,20 +46,6 @@ public class PDFontFactory
*/
public static PDFont createFont(COSDictionary dictionary) throws IOException
{
- return createFont(dictionary, null);
- }
-
- /**
- * Creates a new PDFont instance with the appropriate subclass.
- *
- * @param dictionary a font dictionary
- * @param glyphList the default glyph list to use for Unicode mapping
- * @return a PDFont instance, based on the SubType entry of the dictionary
- * @throws IOException
- */
- public static PDFont createFont(COSDictionary dictionary,
- GlyphList glyphList) throws IOException
- {
COSName type = dictionary.getCOSName(COSName.TYPE, COSName.FONT);
if (!COSName.FONT.equals(type))
{
@@ -74,10 +60,10 @@ public class PDFontFactory
{
if (((COSDictionary)fd).containsKey(COSName.FONT_FILE3))
{
- return new PDType1CFont(dictionary, glyphList);
+ return new PDType1CFont(dictionary);
}
}
- return new PDType1Font(dictionary, glyphList);
+ return new PDType1Font(dictionary);
}
else if (COSName.MM_TYPE1.equals(subType))
{
@@ -86,18 +72,18 @@ public class PDFontFactory
{
if (((COSDictionary)fd).containsKey(COSName.FONT_FILE3))
{
- return new PDType1CFont(dictionary, glyphList);
+ return new PDType1CFont(dictionary);
}
}
- return new PDMMType1Font(dictionary, glyphList);
+ return new PDMMType1Font(dictionary);
}
else if (COSName.TRUE_TYPE.equals(subType))
{
- return new PDTrueTypeFont(dictionary, glyphList);
+ return new PDTrueTypeFont(dictionary);
}
else if (COSName.TYPE3.equals(subType))
{
- return new PDType3Font(dictionary, glyphList);
+ return new PDType3Font(dictionary);
}
else if (COSName.TYPE0.equals(subType))
{
@@ -116,7 +102,7 @@ public class PDFontFactory
// assuming Type 1 font (see PDFBOX-1988) because it seems that Adobe Reader does this
// however, we may need more sophisticated logic perhaps looking at the FontFile
LOG.warn("Invalid font subtype '" + subType + "'");
- return new PDType1Font(dictionary, glyphList);
+ return new PDType1Font(dictionary);
}
}
Modified: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDMMType1Font.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDMMType1Font.java?rev=1627702&r1=1627701&r2=1627702&view=diff
==============================================================================
--- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDMMType1Font.java (original)
+++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDMMType1Font.java Fri Sep 26 02:25:22 2014
@@ -32,10 +32,9 @@ public class PDMMType1Font extends PDTyp
* Creates an MMType1Font from a Font dictionary in a PDF.
*
* @param fontDictionary font dictionary
- * @param glyphList a custom glyph list for Unicode mapping
*/
- public PDMMType1Font(COSDictionary fontDictionary, GlyphList glyphList) throws IOException
+ public PDMMType1Font(COSDictionary fontDictionary) throws IOException
{
- super(fontDictionary, glyphList);
+ super(fontDictionary);
}
}
Modified: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDSimpleFont.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDSimpleFont.java?rev=1627702&r1=1627701&r2=1627702&view=diff
==============================================================================
--- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDSimpleFont.java (original)
+++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDSimpleFont.java Fri Sep 26 02:25:22 2014
@@ -43,7 +43,6 @@ public abstract class PDSimpleFont exten
protected Encoding encoding;
protected GlyphList glyphList;
- private final GlyphList defaultGlyphList;
private final Set<Integer> noUnicode = new HashSet<Integer>(); // for logging
/**
@@ -52,26 +51,16 @@ public abstract class PDSimpleFont exten
protected PDSimpleFont()
{
super();
- defaultGlyphList = GlyphList.getAdobeGlyphList();
}
/**
* Constructor.
*
* @param fontDictionary Font dictionary.
- * @param glyphList a custom glyph list for Unicode mapping
*/
- protected PDSimpleFont(COSDictionary fontDictionary, GlyphList glyphList) throws IOException
+ protected PDSimpleFont(COSDictionary fontDictionary) throws IOException
{
super(fontDictionary);
- if (glyphList == null)
- {
- defaultGlyphList = GlyphList.getAdobeGlyphList();
- }
- else
- {
- defaultGlyphList = glyphList;
- }
}
/**
@@ -147,7 +136,7 @@ public abstract class PDSimpleFont exten
}
else
{
- glyphList = defaultGlyphList; // by default this is the AGL, but it can be overridden
+ glyphList = GlyphList.getAdobeGlyphList();
}
}
@@ -235,6 +224,24 @@ public abstract class PDSimpleFont exten
@Override
public String toUnicode(int code) throws IOException
{
+ return toUnicode(code, GlyphList.getAdobeGlyphList());
+ }
+
+ @Override
+ public String toUnicode(int code, GlyphList customGlyphList) throws IOException
+ {
+ // allow the glyph list to be overridden for the purpose of extracting Unicode
+ // we only do this when the font's glyph list is the AGL, to avoid breaking Zapf Dingbats
+ GlyphList unicodeGlyphList;
+ if (this.glyphList == GlyphList.getAdobeGlyphList())
+ {
+ unicodeGlyphList = customGlyphList;
+ }
+ else
+ {
+ unicodeGlyphList = this.glyphList;
+ }
+
// first try to use a ToUnicode CMap
String unicode = super.toUnicode(code);
if (unicode != null)
@@ -252,7 +259,7 @@ public abstract class PDSimpleFont exten
if (encoding != null)
{
name = encoding.getName(code);
- unicode = glyphList.toUnicode(name);
+ unicode = unicodeGlyphList.toUnicode(name);
if (unicode != null)
{
return unicode;
Modified: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDTrueTypeFont.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDTrueTypeFont.java?rev=1627702&r1=1627701&r2=1627702&view=diff
==============================================================================
--- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDTrueTypeFont.java (original)
+++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDTrueTypeFont.java Fri Sep 26 02:25:22 2014
@@ -103,11 +103,10 @@ public class PDTrueTypeFont extends PDSi
* Creates a new TrueType font from a Font dictionary.
*
* @param fontDictionary The font dictionary according to the PDF specification.
- * @param glyphList A custom glyph list for Unicode mapping
*/
- public PDTrueTypeFont(COSDictionary fontDictionary, GlyphList glyphList) throws IOException
+ public PDTrueTypeFont(COSDictionary fontDictionary) throws IOException
{
- super(fontDictionary, glyphList);
+ super(fontDictionary);
TrueTypeFont ttfFont = null;
if (getFontDescriptor() != null)
Modified: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDType1CFont.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDType1CFont.java?rev=1627702&r1=1627701&r2=1627702&view=diff
==============================================================================
--- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDType1CFont.java (original)
+++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDType1CFont.java Fri Sep 26 02:25:22 2014
@@ -66,12 +66,11 @@ public class PDType1CFont extends PDSimp
* Constructor.
*
* @param fontDictionary the corresponding dictionary
- * @param glyphList a custom glyph list for Unicode mapping
* @throws IOException it something went wrong
*/
- public PDType1CFont(COSDictionary fontDictionary, GlyphList glyphList) throws IOException
+ public PDType1CFont(COSDictionary fontDictionary) throws IOException
{
- super(fontDictionary, glyphList);
+ super(fontDictionary);
PDFontDescriptor fd = getFontDescriptor();
byte[] bytes = null;
Modified: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDType1Font.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDType1Font.java?rev=1627702&r1=1627701&r2=1627702&view=diff
==============================================================================
--- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDType1Font.java (original)
+++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDType1Font.java Fri Sep 26 02:25:22 2014
@@ -126,11 +126,10 @@ public class PDType1Font extends PDSimpl
* Creates a Type 1 font from a Font dictionary in a PDF.
*
* @param fontDictionary font dictionary
- * @param glyphList A custom glyph list for Unicode mapping
*/
- public PDType1Font(COSDictionary fontDictionary, GlyphList glyphList) throws IOException
+ public PDType1Font(COSDictionary fontDictionary) throws IOException
{
- super(fontDictionary, glyphList);
+ super(fontDictionary);
PDFontDescriptor fd = getFontDescriptor();
Type1Font t1 = null;
Modified: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDType3Font.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDType3Font.java?rev=1627702&r1=1627701&r2=1627702&view=diff
==============================================================================
--- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDType3Font.java (original)
+++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDType3Font.java Fri Sep 26 02:25:22 2014
@@ -50,11 +50,10 @@ public class PDType3Font extends PDSimpl
* Constructor.
*
* @param fontDictionary The font dictionary according to the PDF specification.
- * @param glyphList a custom glyph list for Unicode mapping
*/
- public PDType3Font(COSDictionary fontDictionary, GlyphList glyphList) throws IOException
+ public PDType3Font(COSDictionary fontDictionary) throws IOException
{
- super(fontDictionary, glyphList);
+ super(fontDictionary);
readEncoding();
}
Modified: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/util/PDFMarkedContentExtractor.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/util/PDFMarkedContentExtractor.java?rev=1627702&r1=1627701&r2=1627702&view=diff
==============================================================================
--- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/util/PDFMarkedContentExtractor.java (original)
+++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/util/PDFMarkedContentExtractor.java Fri Sep 26 02:25:22 2014
@@ -16,6 +16,7 @@
*/
package org.apache.pdfbox.util;
+import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
@@ -59,7 +60,7 @@ public class PDFMarkedContentExtractor e
* Instantiate a new PDFTextStripper object. Will not do anything special to convert
* the text to a more encoding-specific output.
*/
- public PDFMarkedContentExtractor()
+ public PDFMarkedContentExtractor() throws IOException
{
this(null);
}
@@ -69,7 +70,7 @@ public class PDFMarkedContentExtractor e
*
* @param encoding The encoding that the output will be written in.
*/
- public PDFMarkedContentExtractor(String encoding)
+ public PDFMarkedContentExtractor(String encoding) throws IOException
{
addOperator(new BeginMarkedContentSequenceWithProperties());
addOperator(new BeginMarkedContentSequence());
Modified: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/util/PDFStreamEngine.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/util/PDFStreamEngine.java?rev=1627702&r1=1627701&r2=1627702&view=diff
==============================================================================
--- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/util/PDFStreamEngine.java (original)
+++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/util/PDFStreamEngine.java Fri Sep 26 02:25:22 2014
@@ -515,16 +515,7 @@ public class PDFStreamEngine
return Collections.emptyMap();
}
- return streamResourcesStack.peek().getFonts(getGlyphList());
- }
-
- /**
- * Returns the glyph list for Unicode mapping, the default is the Adobe Glyph List.
- * @throws IOException if the glyph list could not be loaded
- */
- protected GlyphList getGlyphList() throws IOException
- {
- return GlyphList.getAdobeGlyphList();
+ return streamResourcesStack.peek().getFonts();
}
/**
Modified: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/util/PDFTextStreamEngine.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/util/PDFTextStreamEngine.java?rev=1627702&r1=1627701&r2=1627702&view=diff
==============================================================================
--- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/util/PDFTextStreamEngine.java (original)
+++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/util/PDFTextStreamEngine.java Fri Sep 26 02:25:22 2014
@@ -71,7 +71,7 @@ public class PDFTextStreamEngine extends
/**
* Constructor.
*/
- public PDFTextStreamEngine()
+ public PDFTextStreamEngine() throws IOException
{
addOperator(new BeginText());
addOperator(new Concatenate());
@@ -95,6 +95,11 @@ public class PDFTextStreamEngine extends
addOperator(new SetTextHorizontalScaling());
addOperator(new ShowTextLine());
addOperator(new ShowTextLineAndSpace());
+
+ // load additional glyph list for Unicode mapping
+ String path = "org/apache/pdfbox/resources/glyphlist/additional.txt";
+ InputStream input = GlyphList.class.getClassLoader().getResourceAsStream(path);
+ glyphList = new GlyphList(GlyphList.getAdobeGlyphList(), input);
}
/**
@@ -202,6 +207,9 @@ public class PDFTextStreamEngine extends
float spaceWidthDisplay = spaceWidthText * fontSizeText * horizontalScalingText *
textRenderingMatrix.getXScale() * ctm.getXScale();
+ // use our additional glyph list for Unicode mapping
+ unicode = font.toUnicode(code, glyphList);
+
// when there is no Unicode mapping available, Acrobat simply coerces the character code
// into Unicode, so we do the same. Subclasses of PDFStreamEngine don't necessarily want
// this, which is why we leave it until this point in PDFTextStreamEngine.
@@ -237,17 +245,4 @@ public class PDFTextStreamEngine extends
{
// subclasses can override to provide specific functionality
}
-
- @Override
- protected GlyphList getGlyphList() throws IOException
- {
- if (glyphList == null)
- {
- // load additional glyph list for Unicode mapping
- String path = "org/apache/pdfbox/resources/glyphlist/additional.txt";
- InputStream input = GlyphList.class.getClassLoader().getResourceAsStream(path);
- glyphList = new GlyphList(GlyphList.getAdobeGlyphList(), input);
- }
- return glyphList;
- }
}