You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pdfbox.apache.org by le...@apache.org on 2020/02/05 16:52:58 UTC

svn commit: r1873656 - /pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/text/PDFTextStripper.java

Author: lehmi
Date: Wed Feb  5 16:52:58 2020
New Revision: 1873656

URL: http://svn.apache.org/viewvc?rev=1873656&view=rev
Log:
PDFBOX-4760: don't check for word separator at the end of a word if separator is empty

Modified:
    pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/text/PDFTextStripper.java

Modified: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/text/PDFTextStripper.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/text/PDFTextStripper.java?rev=1873656&r1=1873655&r2=1873656&view=diff
==============================================================================
--- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/text/PDFTextStripper.java (original)
+++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/text/PDFTextStripper.java Wed Feb  5 16:52:58 2020
@@ -635,8 +635,10 @@ public class PDFTextStripper extends Leg
                     if (Float.compare(expectedStartOfNextWordX, EXPECTED_START_OF_NEXT_WORD_X_RESET_VALUE) != 0
                             && expectedStartOfNextWordX < positionX
                             // only bother adding a word separator if the last character was not a word separator
-                            && lastPosition.getTextPosition().getUnicode() != null
-                            && !lastPosition.getTextPosition().getUnicode().endsWith(wordSeparator))
+                            && (wordSeparator.isEmpty() || //
+                                    (lastPosition.getTextPosition().getUnicode() != null
+                                            && !lastPosition.getTextPosition().getUnicode()
+                                                    .endsWith(wordSeparator))))
                     {
                         line.add(LineItem.getWordSeparator());
                     }