You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pdfbox.apache.org by le...@apache.org on 2020/02/05 16:52:58 UTC
svn commit: r1873656 -
/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/text/PDFTextStripper.java
Author: lehmi
Date: Wed Feb 5 16:52:58 2020
New Revision: 1873656
URL: http://svn.apache.org/viewvc?rev=1873656&view=rev
Log:
PDFBOX-4760: don't check for word separator at the end of a word if separator is empty
Modified:
pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/text/PDFTextStripper.java
Modified: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/text/PDFTextStripper.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/text/PDFTextStripper.java?rev=1873656&r1=1873655&r2=1873656&view=diff
==============================================================================
--- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/text/PDFTextStripper.java (original)
+++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/text/PDFTextStripper.java Wed Feb 5 16:52:58 2020
@@ -635,8 +635,10 @@ public class PDFTextStripper extends Leg
if (Float.compare(expectedStartOfNextWordX, EXPECTED_START_OF_NEXT_WORD_X_RESET_VALUE) != 0
&& expectedStartOfNextWordX < positionX
// only bother adding a word separator if the last character was not a word separator
- && lastPosition.getTextPosition().getUnicode() != null
- && !lastPosition.getTextPosition().getUnicode().endsWith(wordSeparator))
+ && (wordSeparator.isEmpty() || //
+ (lastPosition.getTextPosition().getUnicode() != null
+ && !lastPosition.getTextPosition().getUnicode()
+ .endsWith(wordSeparator))))
{
line.add(LineItem.getWordSeparator());
}