You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pdfbox.apache.org by ja...@apache.org on 2014/08/27 03:33:36 UTC
svn commit: r1620775 - in /pdfbox/branches/no-awt:
fontbox/src/main/java/org/apache/fontbox/cmap/CMap.java
pdfbox/src/main/java/org/apache/pdfbox/util/PDFStreamEngine.java
Author: jahewson
Date: Wed Aug 27 01:33:35 2014
New Revision: 1620775
URL: http://svn.apache.org/r1620775
Log:
PDFBOX-2262: Apply word space to single-byte codes only
Modified:
pdfbox/branches/no-awt/fontbox/src/main/java/org/apache/fontbox/cmap/CMap.java
pdfbox/branches/no-awt/pdfbox/src/main/java/org/apache/pdfbox/util/PDFStreamEngine.java
Modified: pdfbox/branches/no-awt/fontbox/src/main/java/org/apache/fontbox/cmap/CMap.java
URL: http://svn.apache.org/viewvc/pdfbox/branches/no-awt/fontbox/src/main/java/org/apache/fontbox/cmap/CMap.java?rev=1620775&r1=1620774&r2=1620775&view=diff
==============================================================================
--- pdfbox/branches/no-awt/fontbox/src/main/java/org/apache/fontbox/cmap/CMap.java (original)
+++ pdfbox/branches/no-awt/fontbox/src/main/java/org/apache/fontbox/cmap/CMap.java Wed Aug 27 01:33:35 2014
@@ -23,7 +23,6 @@ import java.util.HashMap;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
-import java.util.Iterator;
/**
* This class represents a CMap file.
@@ -288,28 +287,6 @@ public class CMap
this.codespaceRanges.addAll(cmap.codespaceRanges);
this.charToUnicode.putAll(cmap.charToUnicode);
}
-
- /**
- * Check whether the given byte array is in codespace ranges or not.
- *
- * @param code The byte array to look for in the codespace range.
- * @param code The byte array to look for in the codespace range.isInCodeSpaceRanges
- *
- * @return true if the given byte array is in the codespace range.
- */
- public boolean isInCodeSpaceRanges( byte[] code )
- {
- Iterator<CodespaceRange> it = codespaceRanges.iterator();
- while ( it.hasNext() )
- {
- CodespaceRange range = it.next();
- if ( range != null && range.matches(code) )
- {
- return true;
- }
- }
- return false;
- }
/**
* Returns the WMode of a CMap.
Modified: pdfbox/branches/no-awt/pdfbox/src/main/java/org/apache/pdfbox/util/PDFStreamEngine.java
URL: http://svn.apache.org/viewvc/pdfbox/branches/no-awt/pdfbox/src/main/java/org/apache/pdfbox/util/PDFStreamEngine.java?rev=1620775&r1=1620774&r2=1620775&view=diff
==============================================================================
--- pdfbox/branches/no-awt/pdfbox/src/main/java/org/apache/pdfbox/util/PDFStreamEngine.java (original)
+++ pdfbox/branches/no-awt/pdfbox/src/main/java/org/apache/pdfbox/util/PDFStreamEngine.java Wed Aug 27 01:33:35 2014
@@ -406,7 +406,9 @@ public class PDFStreamEngine
InputStream in = new ByteArrayInputStream(string);
while (in.available() > 0)
{
+ int before = in.available();
int code = font.readCode(in);
+ int codeLength = in.available() - before;
String unicode = font.toUnicode(code);
// TODO: handle horizontal displacement
@@ -418,29 +420,18 @@ public class PDFStreamEngine
charHorizontalDisplacementText = charHorizontalDisplacementText * fontMatrixXScaling;
charVerticalDisplacementText = charVerticalDisplacementText * fontMatrixYScaling;
- // PDF Spec - 5.5.2 Word Spacing
- //
- // Word spacing works the same was as character spacing, but applies
- // only to the space character, code 32.
- //
- // Note: Word spacing is applied to every occurrence of the single-byte
- // character code 32 in a string. This can occur when using a simple
- // font or a composite font that defines code 32 as a single-byte code.
- // It does not apply to occurrences of the byte value 32 in multiple-byte
- // codes.
- //
- // RDD - My interpretation of this is that only character code 32's that
- // encode to spaces should have word spacing applied. Cases have been
- // observed where a font has a space character with a character code
- // other than 32, and where word spacing (Tw) was used. In these cases,
- // applying word spacing to either the non-32 space or to the character
- // code 32 non-space resulted in errors consistent with this interpretation.
- //
+ // Word spacing shall be applied to every occurrence of the single-byte character code
+ // 32 in a string when using a simple font or a composite font that defines code 32 as
+ // a single-byte code.
float spacingText = 0;
- if (code == 32)
+ if (codeLength == 1)
{
- spacingText += wordSpacingText;
+ if (code == 32)
+ {
+ spacingText += wordSpacingText;
+ }
}
+
textMatrix.multiply(ctm, textXctm);
// Convert textMatrix to display units
// We need to instantiate a new Matrix instance here as it is passed to the TextPosition