You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pdfbox.apache.org by ja...@apache.org on 2014/08/27 03:33:36 UTC

svn commit: r1620775 - in /pdfbox/branches/no-awt: fontbox/src/main/java/org/apache/fontbox/cmap/CMap.java pdfbox/src/main/java/org/apache/pdfbox/util/PDFStreamEngine.java

Author: jahewson
Date: Wed Aug 27 01:33:35 2014
New Revision: 1620775

URL: http://svn.apache.org/r1620775
Log:
PDFBOX-2262: Apply word space to single-byte codes only

Modified:
    pdfbox/branches/no-awt/fontbox/src/main/java/org/apache/fontbox/cmap/CMap.java
    pdfbox/branches/no-awt/pdfbox/src/main/java/org/apache/pdfbox/util/PDFStreamEngine.java

Modified: pdfbox/branches/no-awt/fontbox/src/main/java/org/apache/fontbox/cmap/CMap.java
URL: http://svn.apache.org/viewvc/pdfbox/branches/no-awt/fontbox/src/main/java/org/apache/fontbox/cmap/CMap.java?rev=1620775&r1=1620774&r2=1620775&view=diff
==============================================================================
--- pdfbox/branches/no-awt/fontbox/src/main/java/org/apache/fontbox/cmap/CMap.java (original)
+++ pdfbox/branches/no-awt/fontbox/src/main/java/org/apache/fontbox/cmap/CMap.java Wed Aug 27 01:33:35 2014
@@ -23,7 +23,6 @@ import java.util.HashMap;
 import java.util.LinkedList;
 import java.util.List;
 import java.util.Map;
-import java.util.Iterator;
 
 /**
  * This class represents a CMap file.
@@ -288,28 +287,6 @@ public class CMap
         this.codespaceRanges.addAll(cmap.codespaceRanges);
         this.charToUnicode.putAll(cmap.charToUnicode);
     }
- 
-    /**
-     *  Check whether the given byte array is in codespace ranges or not.
-     *  
-     *  @param code The byte array to look for in the codespace range.
-     *  @param code The byte array to look for in the codespace range.isInCodeSpaceRanges
-     *
-     *  @return true if the given byte array is in the codespace range.
-     */
-    public boolean isInCodeSpaceRanges( byte[] code )
-    {
-        Iterator<CodespaceRange> it = codespaceRanges.iterator();
-        while ( it.hasNext() ) 
-        {
-            CodespaceRange range = it.next();
-            if ( range != null && range.matches(code) )
-            {
-                return true;
-            }
-        }
-        return false;
-    }
     
     /**
      * Returns the WMode of a CMap.

Modified: pdfbox/branches/no-awt/pdfbox/src/main/java/org/apache/pdfbox/util/PDFStreamEngine.java
URL: http://svn.apache.org/viewvc/pdfbox/branches/no-awt/pdfbox/src/main/java/org/apache/pdfbox/util/PDFStreamEngine.java?rev=1620775&r1=1620774&r2=1620775&view=diff
==============================================================================
--- pdfbox/branches/no-awt/pdfbox/src/main/java/org/apache/pdfbox/util/PDFStreamEngine.java (original)
+++ pdfbox/branches/no-awt/pdfbox/src/main/java/org/apache/pdfbox/util/PDFStreamEngine.java Wed Aug 27 01:33:35 2014
@@ -406,7 +406,9 @@ public class PDFStreamEngine
         InputStream in = new ByteArrayInputStream(string);
         while (in.available() > 0)
         {
+            int before = in.available();
             int code = font.readCode(in);
+            int codeLength = in.available() - before;
             String unicode = font.toUnicode(code);
 
             // TODO: handle horizontal displacement
@@ -418,29 +420,18 @@ public class PDFStreamEngine
             charHorizontalDisplacementText = charHorizontalDisplacementText * fontMatrixXScaling;
             charVerticalDisplacementText = charVerticalDisplacementText * fontMatrixYScaling;
 
-            // PDF Spec - 5.5.2 Word Spacing
-            //
-            // Word spacing works the same was as character spacing, but applies
-            // only to the space character, code 32.
-            //
-            // Note: Word spacing is applied to every occurrence of the single-byte
-            // character code 32 in a string. This can occur when using a simple
-            // font or a composite font that defines code 32 as a single-byte code.
-            // It does not apply to occurrences of the byte value 32 in multiple-byte
-            // codes.
-            //
-            // RDD - My interpretation of this is that only character code 32's that
-            // encode to spaces should have word spacing applied. Cases have been
-            // observed where a font has a space character with a character code
-            // other than 32, and where word spacing (Tw) was used. In these cases,
-            // applying word spacing to either the non-32 space or to the character
-            // code 32 non-space resulted in errors consistent with this interpretation.
-            //
+            // Word spacing shall be applied to every occurrence of the single-byte character code
+            // 32 in a string when using a simple font or a composite font that defines code 32 as
+            // a single-byte code.
             float spacingText = 0;
-            if (code == 32)
+            if (codeLength == 1)
             {
-                spacingText += wordSpacingText;
+                if (code == 32)
+                {
+                    spacingText += wordSpacingText;
+                }
             }
+
             textMatrix.multiply(ctm, textXctm);
             // Convert textMatrix to display units
             // We need to instantiate a new Matrix instance here as it is passed to the TextPosition