You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pdfbox.apache.org by ja...@apache.org on 2014/08/23 04:34:37 UTC

svn commit: r1619956 [3/3] - in /pdfbox/branches/no-awt: examples/src/main/java/org/apache/pdfbox/examples/pdmodel/ fontbox/src/main/java/org/apache/fontbox/cff/ fontbox/src/main/java/org/apache/fontbox/cmap/ fontbox/src/main/java/org/apache/fontbox/tt...

Modified: pdfbox/branches/no-awt/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDType3Font.java
URL: http://svn.apache.org/viewvc/pdfbox/branches/no-awt/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDType3Font.java?rev=1619956&r1=1619955&r2=1619956&view=diff
==============================================================================
--- pdfbox/branches/no-awt/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDType3Font.java (original)
+++ pdfbox/branches/no-awt/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDType3Font.java Sat Aug 23 02:34:35 2014
@@ -17,12 +17,17 @@
 package org.apache.pdfbox.pdmodel.font;
 
 import java.io.IOException;
+import java.io.InputStream;
 
 import org.apache.pdfbox.cos.COSArray;
 import org.apache.pdfbox.cos.COSDictionary;
+import org.apache.pdfbox.cos.COSFloat;
+import org.apache.pdfbox.cos.COSInteger;
 import org.apache.pdfbox.cos.COSName;
 import org.apache.pdfbox.cos.COSStream;
+import org.apache.pdfbox.encoding.Encoding;
 import org.apache.pdfbox.pdmodel.PDResources;
+import org.apache.pdfbox.pdmodel.common.PDMatrix;
 import org.apache.pdfbox.pdmodel.common.PDRectangle;
 
 /**
@@ -30,20 +35,67 @@ import org.apache.pdfbox.pdmodel.common.
  *
  * @author Ben Litchfield
  */
-public class PDType3Font extends PDFont
+public class PDType3Font extends PDSimpleFont
 {
 	private PDResources type3Resources = null;
     private COSDictionary charProcs = null;
+    private PDMatrix fontMatrix;
 
     /**
      * Constructor.
      *
      * @param fontDictionary The font dictionary according to the PDF specification.
      */
-    public PDType3Font(COSDictionary fontDictionary)
+    public PDType3Font(COSDictionary fontDictionary) throws IOException
     {
         super(fontDictionary);
-        determineEncoding();
+        readEncoding();
+    }
+
+    @Override
+    protected Encoding readEncodingFromFont() throws IOException
+    {
+        return null;
+    }
+
+    @Override
+    protected float getWidthFromFont(int code)
+    {
+        // todo: implement me (need to peek into stream)
+        return 0;
+    }
+
+    @Override
+    protected boolean isEmbedded()
+    {
+        return true;
+    }
+
+    @Override
+    public int readCode(InputStream in) throws IOException
+    {
+        return in.read();
+    }
+
+    @Override
+    public PDMatrix getFontMatrix()
+    {
+        if (fontMatrix == null)
+        {
+            COSArray array = (COSArray) dict.getDictionaryObject(COSName.FONT_MATRIX);
+            if (array == null)
+            {
+                array = new COSArray();
+                array.add(new COSFloat(0.001f));
+                array.add(COSInteger.ZERO);
+                array.add(COSInteger.ZERO);
+                array.add(new COSFloat(0.001f));
+                array.add(COSInteger.ZERO);
+                array.add(COSInteger.ZERO);
+            }
+            fontMatrix = new PDMatrix(array);
+        }
+        return fontMatrix;
     }
 
     /**
@@ -98,21 +150,21 @@ public class PDType3Font extends PDFont
     /**
      * Returns the stream of the glyph representing by the given character
      * 
-     * @param character the represented character
+     * @param code char code
      * @return the stream to be used to render the glyph
      * @throws IOException If something went wrong when getting the stream.
      */
-    public COSStream getCharStream(Character character) throws IOException
+    public COSStream getCharStream(int code) throws IOException
     {
     	COSStream stream = null;
-        String cMapsTo = getFontEncoding().getName(character);
+        String cMapsTo = getEncoding().getName(code);
         if (cMapsTo != null)
         {
         	stream = (COSStream)getCharProcs().getDictionaryObject(COSName.getPDFName(cMapsTo));
         }
         return stream;
     }
-    
+
     @Override
     public void clear()
     {

Modified: pdfbox/branches/no-awt/pdfbox/src/main/java/org/apache/pdfbox/rendering/PageDrawer.java
URL: http://svn.apache.org/viewvc/pdfbox/branches/no-awt/pdfbox/src/main/java/org/apache/pdfbox/rendering/PageDrawer.java?rev=1619956&r1=1619955&r2=1619956&view=diff
==============================================================================
--- pdfbox/branches/no-awt/pdfbox/src/main/java/org/apache/pdfbox/rendering/PageDrawer.java (original)
+++ pdfbox/branches/no-awt/pdfbox/src/main/java/org/apache/pdfbox/rendering/PageDrawer.java Sat Aug 23 02:34:35 2014
@@ -46,7 +46,7 @@ import org.apache.pdfbox.pdmodel.font.PD
 import org.apache.pdfbox.pdmodel.font.PDCIDFontType2;
 import org.apache.pdfbox.pdmodel.graphics.image.PDImage;
 import org.apache.pdfbox.pdmodel.graphics.state.RenderingMode;
-import org.apache.pdfbox.rendering.font.CIDGlyph2D;
+import org.apache.pdfbox.rendering.font.CIDType0Glyph2D;
 import org.apache.pdfbox.rendering.font.Glyph2D;
 import org.apache.pdfbox.rendering.font.TTFGlyph2D;
 import org.apache.pdfbox.rendering.font.Type1Glyph2D;
@@ -287,7 +287,7 @@ public class PageDrawer extends PDFGraph
 
     @Override
     protected void processGlyph(Matrix textMatrix, Point2D.Float end, float maxHeight,
-                                float widthText, String unicode, int[] charCodes, PDFont font,
+                                float widthText, int code, String unicode, PDFont font,
                                 float fontSize) throws IOException
     {
         try
@@ -296,12 +296,12 @@ public class PageDrawer extends PDFGraph
             PDMatrix fontMatrix = font.getFontMatrix();
 
             // use different methods to draw the string
-            if (font.isType3Font())
+            if (font instanceof PDType3Font)
             {
                 // Type3 fonts don't use the same units within the font matrix as the other fonts
                 at.scale(fontMatrix.getValue(0, 0), fontMatrix.getValue(1, 1));
                 // Type3 fonts are using streams for each character
-                drawType3String((PDType3Font) font, charCodes, at);
+                drawType3String((PDType3Font) font, code, at);
             }
             else
             {
@@ -314,7 +314,7 @@ public class PageDrawer extends PDFGraph
                             fontMatrix.getValue(2, 0), fontMatrix.getValue(2, 1));
                     at.concatenate(fontMatrixAT);
                     // Let PDFBox render the font if supported
-                    drawGlyphs2D(glyph2D, charCodes, at);
+                    drawGlyph2D(glyph2D, code, at);
                 }
                 else
                 {
@@ -335,56 +335,40 @@ public class PageDrawer extends PDFGraph
      * Render the font using the Glyph2D interface.
      * 
      * @param glyph2D the Glyph2D implementation provided a GeneralPath for each glyph
-     * @param codePoints the string to be rendered
+     * @param code character code
      * @param at the transformation
      * @throws IOException if something went wrong
      */
-    private void drawGlyphs2D(Glyph2D glyph2D, int[] codePoints, AffineTransform at) throws IOException
+    private void drawGlyph2D(Glyph2D glyph2D, int code, AffineTransform at) throws IOException
     {
         graphics.setRenderingHint(RenderingHints.KEY_ANTIALIASING, RenderingHints.VALUE_ANTIALIAS_ON);
 
         PDGraphicsState state = getGraphicsState();
         RenderingMode renderingMode = state.getTextState().getRenderingMode();
 
-        boolean needsFill = true;
-        boolean needsStroke = true;
-
-        for (int codePoint : codePoints)
+        GeneralPath path = glyph2D.getPathForCharacterCode(code);
+        if (path != null)
         {
-            GeneralPath path = glyph2D.getPathForCharacterCode(codePoint);
-            if (path != null)
-            {
-                Shape glyph = at.createTransformedShape(path);
+            Shape glyph = at.createTransformedShape(path);
 
-                if (renderingMode.isFill())
-                {
-                    if (needsFill)
-                    {
-                        graphics.setComposite(state.getNonStrokingJavaComposite());
-                        graphics.setPaint(getNonStrokingPaint());
-                        needsFill = false;
-                        needsStroke = true;
-                    }
-                    graphics.fill(glyph);
-                }
+            if (renderingMode.isFill())
+            {
+                graphics.setComposite(state.getNonStrokingJavaComposite());
+                graphics.setPaint(getNonStrokingPaint());
+                graphics.fill(glyph);
+            }
 
-                if (renderingMode.isStroke())
-                {
-                    if (needsStroke)
-                    {
-                        graphics.setComposite(state.getStrokingJavaComposite());
-                        graphics.setPaint(getStrokingPaint());
-                        graphics.setStroke(getStroke());
-                        needsFill = true;
-                        needsStroke = false;
-                    }
-                    graphics.draw(glyph);
-                }
+            if (renderingMode.isStroke())
+            {
+                graphics.setComposite(state.getStrokingJavaComposite());
+                graphics.setPaint(getStrokingPaint());
+                graphics.setStroke(getStroke());
+                graphics.draw(glyph);
+            }
 
-                if (renderingMode.isClip())
-                {
-                    state.intersectClippingPath(new Area(glyph));
-                }
+            if (renderingMode.isClip())
+            {
+                state.intersectClippingPath(new Area(glyph));
             }
         }
     }
@@ -393,39 +377,35 @@ public class PageDrawer extends PDFGraph
      * Render the text using a type 3 font.
      * 
      * @param font the type3 font
-     * @param charCodes internal PDF character codes of glyphs
+     * @param code internal PDF character codes of glyph
      * @param at the transformation
      * 
      * @throws IOException if something went wrong
      */
-    private void drawType3String(PDType3Font font, int[] charCodes, AffineTransform at) throws IOException
+    private void drawType3String(PDType3Font font, int code, AffineTransform at) throws IOException
     {
-        int textLength = charCodes.length;
-        for (int i = 0; i < textLength; i++)
+        COSStream stream = font.getCharStream(code);
+        if (stream != null)
         {
-            COSStream stream = font.getCharStream((char) charCodes[i]);
-            if (stream != null)
-            {
-                // save the current graphics state and matrices
-                saveGraphicsState();
-                Matrix textMatrix = getTextMatrix();
-                Matrix textLineMatrix = getTextLineMatrix();
-                
-                Matrix ctm = new Matrix();
-                ctm.setFromAffineTransform(at);
-                getGraphicsState().setCurrentTransformationMatrix(ctm);
-                processSubStream(font.getType3Resources(), stream);
+            // save the current graphics state and matrices
+            saveGraphicsState();
+            Matrix textMatrix = getTextMatrix();
+            Matrix textLineMatrix = getTextLineMatrix();
+
+            Matrix ctm = new Matrix();
+            ctm.setFromAffineTransform(at);
+            getGraphicsState().setCurrentTransformationMatrix(ctm);
+            processSubStream(font.getType3Resources(), stream);
 
-                // restore the saved graphics state and matrices
-                restoreGraphicsState();
-                setTextLineMatrix(textLineMatrix);
-                setTextMatrix(textMatrix);
-                 
-            }
-            else
-            {
-                LOG.error("drawType3String: stream for character " + (char) charCodes[i] + " not found");
-            }
+            // restore the saved graphics state and matrices
+            restoreGraphicsState();
+            setTextLineMatrix(textLineMatrix);
+            setTextMatrix(textMatrix);
+
+        }
+        else
+        {
+            LOG.error("Stream for Type 3 character " + code + " not found");
         }
     }
 
@@ -478,7 +458,7 @@ public class PageDrawer extends PDFGraph
                 CFFCIDFont cffCIDFont = cidType0Font.getCFFCIDFont(); // todo: could be null (need incorporate fallback)
                 if (cffCIDFont != null)
                 {
-                    glyph2D = new CIDGlyph2D(cidType0Font);
+                    glyph2D = new CIDType0Glyph2D(cidType0Font);
                 }
             }
         }

Copied: pdfbox/branches/no-awt/pdfbox/src/main/java/org/apache/pdfbox/rendering/font/CIDType0Glyph2D.java (from r1618754, pdfbox/branches/no-awt/pdfbox/src/main/java/org/apache/pdfbox/rendering/font/CIDGlyph2D.java)
URL: http://svn.apache.org/viewvc/pdfbox/branches/no-awt/pdfbox/src/main/java/org/apache/pdfbox/rendering/font/CIDType0Glyph2D.java?p2=pdfbox/branches/no-awt/pdfbox/src/main/java/org/apache/pdfbox/rendering/font/CIDType0Glyph2D.java&p1=pdfbox/branches/no-awt/pdfbox/src/main/java/org/apache/pdfbox/rendering/font/CIDGlyph2D.java&r1=1618754&r2=1619956&rev=1619956&view=diff
==============================================================================
--- pdfbox/branches/no-awt/pdfbox/src/main/java/org/apache/pdfbox/rendering/font/CIDGlyph2D.java (original)
+++ pdfbox/branches/no-awt/pdfbox/src/main/java/org/apache/pdfbox/rendering/font/CIDType0Glyph2D.java Sat Aug 23 02:34:35 2014
@@ -31,7 +31,7 @@ import org.apache.pdfbox.pdmodel.font.PD
  *
  * @author John Hewson
  */
-public class CIDGlyph2D implements Glyph2D
+public class CIDType0Glyph2D implements Glyph2D
 {
     private static final Log LOG = LogFactory.getLog(Type1Glyph2D.class);
 
@@ -44,7 +44,7 @@ public class CIDGlyph2D implements Glyph
      *
      * @param font Type 0 CIDFont
      */
-    public CIDGlyph2D(PDCIDFontType0 font)
+    public CIDType0Glyph2D(PDCIDFontType0 font) // todo: what about PDCIDFontType2?
     {
         this.font = font;
         fontName = font.getBaseFont();
@@ -53,7 +53,7 @@ public class CIDGlyph2D implements Glyph
     @Override
     public GeneralPath getPathForCharacterCode(int code)
     {
-        int cid = font.codeToCID(code);
+        int cid = font.getParent().codeToCID(code);
         if (cache.containsKey(cid))
         {
             return cache.get(cid);

Modified: pdfbox/branches/no-awt/pdfbox/src/main/java/org/apache/pdfbox/rendering/font/TTFGlyph2D.java
URL: http://svn.apache.org/viewvc/pdfbox/branches/no-awt/pdfbox/src/main/java/org/apache/pdfbox/rendering/font/TTFGlyph2D.java?rev=1619956&r1=1619955&r2=1619956&view=diff
==============================================================================
--- pdfbox/branches/no-awt/pdfbox/src/main/java/org/apache/pdfbox/rendering/font/TTFGlyph2D.java (original)
+++ pdfbox/branches/no-awt/pdfbox/src/main/java/org/apache/pdfbox/rendering/font/TTFGlyph2D.java Sat Aug 23 02:34:35 2014
@@ -26,7 +26,6 @@ import java.util.Map;
 
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
-import org.apache.fontbox.cmap.CMap;
 import org.apache.fontbox.ttf.GlyphData;
 import org.apache.fontbox.ttf.HeaderTable;
 import org.apache.fontbox.ttf.TrueTypeFont;
@@ -44,16 +43,11 @@ public class TTFGlyph2D implements Glyph
 
     private PDFont pdFont;
     private TrueTypeFont ttf;
-    private PDCIDFontType2 descendantFont;
     private String name;
     private float scale = 1.0f;
     private boolean hasScaling = false;
     private Map<Integer, GeneralPath> glyphs = new HashMap<Integer, GeneralPath>();
-    private CMap fontCMap = null;
     private boolean isCIDFont = false;
-    private boolean hasIdentityCIDMapping = false;
-    private boolean hasCID2GIDMapping = false;
-    private boolean hasTwoByteMappings = false;
 
     /**
      * Constructor.
@@ -62,7 +56,7 @@ public class TTFGlyph2D implements Glyph
      */
     public TTFGlyph2D(PDTrueTypeFont ttfFont) throws IOException
     {
-        this(ttfFont.getTrueTypeFont(), ttfFont, null);
+        this(ttfFont.getTrueTypeFont(), ttfFont, false);
     }
 
     /**
@@ -72,11 +66,10 @@ public class TTFGlyph2D implements Glyph
      */
     public TTFGlyph2D(PDType0Font type0Font) throws IOException
     {
-        this(((PDCIDFontType2)type0Font.getDescendantFont()).getTrueTypeFont(), type0Font,
-                (PDCIDFontType2)type0Font.getDescendantFont());
+        this(((PDCIDFontType2)type0Font.getDescendantFont()).getTrueTypeFont(), type0Font, true);
     }
 
-    public TTFGlyph2D(TrueTypeFont ttf, PDFont pdFont, PDCIDFontType2 descFont)
+    public TTFGlyph2D(TrueTypeFont ttf, PDFont pdFont, boolean isCIDFont)
             throws IOException
     {
         this.pdFont = pdFont;
@@ -90,7 +83,7 @@ public class TTFGlyph2D implements Glyph
             scale = 1000f / header.getUnitsPerEm();
             hasScaling = true;
         }
-        extractFontSpecifics(pdFont, descFont);
+        extractFontSpecifics(pdFont, isCIDFont);
     }
 
     /**
@@ -98,72 +91,17 @@ public class TTFGlyph2D implements Glyph
      * 
      * @param pdFont the given PDFont
      */
-    private void extractFontSpecifics(PDFont pdFont, PDCIDFontType2 descFont)
+    private void extractFontSpecifics(PDFont pdFont, boolean isCIDFont)
     {
         name = pdFont.getBaseFont();
-        if (descFont != null)
-        {
-            isCIDFont = true;
-            descendantFont = descFont;
-            hasIdentityCIDMapping = descendantFont.hasIdentityCIDToGIDMap();
-            hasCID2GIDMapping = descendantFont.hasCIDToGIDMap();
-            fontCMap = pdFont.getCMap();
-            if (fontCMap != null)
-            {
-                hasTwoByteMappings = fontCMap.hasTwoByteMappings();
-            }
-        }
-    }
-
-    /**
-     * Get the GID for the given CIDFont.
-     * 
-     * @param code the given CID
-     * @return the mapped GID
-     */
-    private int getGID(int code)
-    {
-        if (hasIdentityCIDMapping)
-        {
-            // identity mapping
-            return code;
-        }
-        if (hasCID2GIDMapping)
-        {
-            // use the provided CID2GID mapping
-            return descendantFont.mapCIDToGID(code);
-        }
-        if (fontCMap != null)
-        {
-            String string = fontCMap.lookup(code, hasTwoByteMappings ? 2 : 1);
-            if (string != null)
-            {
-                return string.codePointAt(0);
-            }
-        }
-        return code;
+        this.isCIDFont = isCIDFont;
     }
 
     @Override
     public GeneralPath getPathForCharacterCode(int code) throws IOException
     {
-        int glyphId = getGIDForCharacterCode(code);
-
-        if (glyphId > 0)
-        {
-            return getPathForGlyphId(glyphId);
-        }
-        glyphId = code;
-        // there isn't any mapping, but probably an optional CMap
-        if (fontCMap != null)
-        {
-            String string = fontCMap.lookup(code, hasTwoByteMappings ? 2 : 1);
-            if (string != null)
-            {
-                glyphId = string.codePointAt(0);
-            }
-        }
-        return getPathForGlyphId(glyphId);
+        int gid = getGIDForCharacterCode(code);
+        return getPathForGlyphId(gid);
     }
 
     // Try to map the given code to the corresponding glyph-ID
@@ -171,11 +109,11 @@ public class TTFGlyph2D implements Glyph
     {
         if (isCIDFont)
         {
-            return getGID(code);
+            return ((PDType0Font)pdFont).codeToGID(code);
         }
         else
         {
-            return ((PDTrueTypeFont)pdFont).getGIDForCharacterCode(code);
+            return ((PDTrueTypeFont)pdFont).codeToGID(code);
         }
     }
 
@@ -239,8 +177,6 @@ public class TTFGlyph2D implements Glyph
     public void dispose()
     {
         ttf = null;
-        descendantFont = null;
-        fontCMap = null;
         if (glyphs != null)
         {
             glyphs.clear();

Modified: pdfbox/branches/no-awt/pdfbox/src/main/java/org/apache/pdfbox/util/PDFStreamEngine.java
URL: http://svn.apache.org/viewvc/pdfbox/branches/no-awt/pdfbox/src/main/java/org/apache/pdfbox/util/PDFStreamEngine.java?rev=1619956&r1=1619955&r2=1619956&view=diff
==============================================================================
--- pdfbox/branches/no-awt/pdfbox/src/main/java/org/apache/pdfbox/util/PDFStreamEngine.java (original)
+++ pdfbox/branches/no-awt/pdfbox/src/main/java/org/apache/pdfbox/util/PDFStreamEngine.java Sat Aug 23 02:34:35 2014
@@ -18,7 +18,9 @@ package org.apache.pdfbox.util;
 
 import java.awt.geom.GeneralPath;
 import java.awt.geom.Point2D;
+import java.io.ByteArrayInputStream;
 import java.io.IOException;
+import java.io.InputStream;
 import java.util.ArrayList;
 import java.util.Collections;
 import java.util.Enumeration;
@@ -42,7 +44,6 @@ import org.apache.pdfbox.pdmodel.common.
 import org.apache.pdfbox.pdmodel.common.PDRectangle;
 import org.apache.pdfbox.pdmodel.font.PDFont;
 import org.apache.pdfbox.pdmodel.font.PDFontFactory;
-import org.apache.pdfbox.pdmodel.font.PDType0Font;
 import org.apache.pdfbox.pdmodel.font.PDType3Font;
 import org.apache.pdfbox.pdmodel.graphics.form.PDFormXObject;
 import org.apache.pdfbox.pdmodel.graphics.state.PDExtendedGraphicsState;
@@ -358,7 +359,7 @@ public class PDFStreamEngine
      */
     protected void processText(byte[] string) throws IOException
     {
-        // Note on variable names. There are three different units being used in this code.
+        // Note on variable names: there are three different units being used in this code.
         // Character sizes are given in glyph units, text locations are initially given in text
         // units, and we want to save the data in display units. The variable names should end with
         // Text or Disp to represent if the values are in text or disp units (no glyph units are
@@ -380,9 +381,10 @@ public class PDFStreamEngine
         PDFont font = graphicsState.getTextState().getFont();
         if (font == null)
         {
-            LOG.warn("font is undefined, creating default font");
+            LOG.warn("No current font, will use default");
             font = PDFontFactory.createDefaultFont();
         }
+
         // all fonts have the width/height of a character in thousandths of a unit of text space
         float fontMatrixXScaling = 1 / 1000f;
         float fontMatrixYScaling = 1 / 1000f;
@@ -405,58 +407,16 @@ public class PDFStreamEngine
         Matrix td = new Matrix();
         Matrix tempMatrix = new Matrix();
 
-        // todo: push this decision into the Font, i.e. hasTwoByteCharacterCodes()
-        // for now we use a workaround which accesses CMap fonts directly
-        int codeLength;
-        if (font instanceof PDType0Font)
-        {
-            // "When the current font is a Type 0 font whose Encoding entry is Identity-H or
-            // Identity-V, the string to be shown shall contain pairs of bytes representing CIDs,
-            // high-order byte first."
-
-            // "When the current font is a CIDFont, the string to be shown shall contain pairs of
-            // bytes representing CIDs, high-order byte first."
-
-            // fixme: sanity check
-            if (!font.getCMap().getName().equals("Identity-H") &&
-                !font.getCMap().getName().equals("Identity-V"))
-            {
-                throw new UnsupportedOperationException("CMap Not implemented: " + font.getCMap().getName());
-            }
-
-            // todo: ((PDType0Font)font).getCMap().hasTwoByteMappings() ???
-            codeLength = 2; // todo: HACK, see "9.7.6.2 CMap Mapping" (also p275 for Identity-H or Identity-V,)
-        }
-        else
-        {
-            codeLength = 1;
-        }
-
-        for (int i = 0; i < string.length; i += codeLength)
+        InputStream in = new ByteArrayInputStream(string);
+        while (in.available() > 0)
         {
-            // fixme: sanity check
-            if (i + codeLength > string.length)
-            {
-                throw new UnsupportedOperationException("Not enough data: " + string.length + " < " + (i + codeLength));
-            }
-
-            // Decode the value to a Unicode character
-            String unicode = font.encode(string, i, codeLength);
-            int[] charCodes;
-            if (codeLength == 2)
-            {
-                charCodes = new int[] { font.getCodeFromArray(string, i, codeLength) };
-                // todo: shouldn't the above array have two codes?
-            }
-            else
-            {
-                charCodes = new int[] { font.getCodeFromArray(string, i, codeLength) };
-            }
+            int code = font.readCode(in);
+            String unicode = font.toUnicode(code);
 
             // TODO: handle horizontal displacement
             // get the width and height of this character in text units
-            float charHorizontalDisplacementText = font.getFontWidth(string, i, codeLength);
-            float charVerticalDisplacementText = font.getFontHeight(string, i, codeLength);
+            float charHorizontalDisplacementText = font.getWidth(code);
+            float charVerticalDisplacementText = font.getHeight(code); // todo: NOPE we want y-advance not BBox
 
             // multiply the width/height with the scaling factor
             charHorizontalDisplacementText = charHorizontalDisplacementText * fontMatrixXScaling;
@@ -481,7 +441,7 @@ public class PDFStreamEngine
             // code 32 non-space resulted in errors consistent with this interpretation.
             //
             float spacingText = 0;
-            if (string[i] == 0x20 && codeLength == 1)
+            if (code == 32)
             {
                 spacingText += wordSpacingText;
             }
@@ -517,7 +477,7 @@ public class PDFStreamEngine
             td.multiply(textMatrix, textMatrix);
 
             // determine the width of this character
-            // XXX: Note that if we handled vertical text, we should be using Y here
+            // TODO: Note that if we handled vertical text, we should be using Y here
             float startXPosition = textMatrixStart.getXPosition();
             float widthText = endXPosition - startXPosition;
 
@@ -526,7 +486,7 @@ public class PDFStreamEngine
 
             // process the decoded glyph
             processGlyph(textMatrixStart, new Point2D.Float(endXPosition, endYPosition),
-                    totalVerticalDisplacementDisp, widthText, unicode, charCodes,
+                    totalVerticalDisplacementDisp, widthText, code, unicode,
                     font, fontSizeText);
         }
     }
@@ -540,13 +500,13 @@ public class PDFStreamEngine
      * @param maxHeight the height of the glyph in device space
      * @param widthText the width of the glyph in text space
      * @param unicode the Unicode text for this glyph, or null. May be meaningless.
-     * @param charCodes array of internal PDF character codes for the glyph todo: should be 1 code?
+     * @param code internal PDF character code for the glyph
      * @param font the current font
      * @param fontSize font size in text space
      * @throws IOException if the glyph cannot be processed
      */
     protected void processGlyph(Matrix textMatrix, Point2D.Float end, float maxHeight,
-                                float widthText, String unicode, int[] charCodes, PDFont font,
+                                float widthText, int code, String unicode, PDFont font,
                                 float fontSize) throws IOException
     {
         // overridden in subclasses

Modified: pdfbox/branches/no-awt/pdfbox/src/main/java/org/apache/pdfbox/util/PDFTextStreamEngine.java
URL: http://svn.apache.org/viewvc/pdfbox/branches/no-awt/pdfbox/src/main/java/org/apache/pdfbox/util/PDFTextStreamEngine.java?rev=1619956&r1=1619955&r2=1619956&view=diff
==============================================================================
--- pdfbox/branches/no-awt/pdfbox/src/main/java/org/apache/pdfbox/util/PDFTextStreamEngine.java (original)
+++ pdfbox/branches/no-awt/pdfbox/src/main/java/org/apache/pdfbox/util/PDFTextStreamEngine.java Sat Aug 23 02:34:35 2014
@@ -22,6 +22,7 @@ import org.apache.pdfbox.cos.COSStream;
 import org.apache.pdfbox.pdmodel.PDResources;
 import org.apache.pdfbox.pdmodel.common.PDRectangle;
 import org.apache.pdfbox.pdmodel.font.PDFont;
+import org.apache.pdfbox.pdmodel.font.PDSimpleFont;
 import org.apache.pdfbox.pdmodel.font.PDType3Font;
 import org.apache.pdfbox.text.TextPosition;
 
@@ -81,9 +82,8 @@ public class PDFTextStreamEngine extends
      */
     @Override
     protected final void processGlyph(Matrix textMatrix, Point2D.Float end, float maxHeight,
-                                      float widthText, String unicode,
-                                      int[] charCodes, PDFont font, float fontSize)
-                                      throws IOException
+                                      float widthText, int code, String unicode, PDFont font,
+                                      float fontSize) throws IOException
     {
         // Note on variable names. There are three different units being used in this code.
         // Character sizes are given in glyph units, text locations are initially given in text
@@ -129,17 +129,27 @@ public class PDFTextStreamEngine extends
         float spaceWidthDisp = spaceWidthText * fontSizeText * horizontalScalingText *
                 textMatrix.getXScale()  * ctm.getXScale();
 
-        // PDFBOX-373: Replace a null entry with "?" so it is not printed as "(null)"
+        // when there is no Unicode mapping available, Acrobat simply coerces the character code
+        // into Unicode, so we do the same. Subclasses of PDFStreamEngine don't necessarily want
+        // this, which is why we leave it until this point in PDFTextStreamEngine.
         if (unicode == null)
         {
-            //unicode = "?";
-            // fixme: new: don't process non-unicode Text, as it's not meaningful.
-            return;
+            if (font instanceof PDSimpleFont)
+            {
+                char c = (char) code;
+                unicode = new String(new char[] { c });
+            }
+            else
+            {
+                // Acrobat doesn't seem to coerce composite font's character codes, instead it
+                // skips them. See the "allah2.pdf" TestTextStripper file.
+                return;
+            }
         }
 
         processTextPosition(new TextPosition(pageRotation, pageSize.getWidth(),
                 pageSize.getHeight(), textMatrix, end.x, end.y, maxHeight, widthText,
-                spaceWidthDisp, unicode, charCodes, font, fontSize,
+                spaceWidthDisp, unicode, new int[] { code } , font, fontSize,
                 (int)(fontSize * textMatrix.getXScale())));
     }
 

Modified: pdfbox/branches/no-awt/pdfbox/src/test/java/org/apache/pdfbox/ParallelParameterized.java
URL: http://svn.apache.org/viewvc/pdfbox/branches/no-awt/pdfbox/src/test/java/org/apache/pdfbox/ParallelParameterized.java?rev=1619956&r1=1619955&r2=1619956&view=diff
==============================================================================
--- pdfbox/branches/no-awt/pdfbox/src/test/java/org/apache/pdfbox/ParallelParameterized.java (original)
+++ pdfbox/branches/no-awt/pdfbox/src/test/java/org/apache/pdfbox/ParallelParameterized.java Sat Aug 23 02:34:35 2014
@@ -31,7 +31,7 @@ import java.util.concurrent.TimeUnit;
  */
 public class ParallelParameterized extends Parameterized
 {
-    static final long TIMEOUT_SECS = 30;
+    static final long TIMEOUT_SECS = 60;
 
     private static class FixedThreadPoolScheduler implements RunnerScheduler
     {

Modified: pdfbox/branches/no-awt/pdfbox/src/test/resources/input/allah2.pdf-sorted.txt
URL: http://svn.apache.org/viewvc/pdfbox/branches/no-awt/pdfbox/src/test/resources/input/allah2.pdf-sorted.txt?rev=1619956&r1=1619955&r2=1619956&view=diff
==============================================================================
Binary files - no diff available.

Modified: pdfbox/branches/no-awt/pdfbox/src/test/resources/input/allah2.pdf.txt
URL: http://svn.apache.org/viewvc/pdfbox/branches/no-awt/pdfbox/src/test/resources/input/allah2.pdf.txt?rev=1619956&r1=1619955&r2=1619956&view=diff
==============================================================================
Binary files - no diff available.