You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pdfbox.apache.org by le...@apache.org on 2020/02/05 16:37:12 UTC
svn commit: r1873653 -
/pdfbox/branches/2.0/pdfbox/src/main/java/org/apache/pdfbox/text/PDFTextStripper.java
Author: lehmi
Date: Wed Feb 5 16:37:12 2020
New Revision: 1873653
URL: http://svn.apache.org/viewvc?rev=1873653&view=rev
Log:
PDFBOX-4760: don't replace word separator as proposed by John Gesimondo
Modified:
pdfbox/branches/2.0/pdfbox/src/main/java/org/apache/pdfbox/text/PDFTextStripper.java
Modified: pdfbox/branches/2.0/pdfbox/src/main/java/org/apache/pdfbox/text/PDFTextStripper.java
URL: http://svn.apache.org/viewvc/pdfbox/branches/2.0/pdfbox/src/main/java/org/apache/pdfbox/text/PDFTextStripper.java?rev=1873653&r1=1873652&r2=1873653&view=diff
==============================================================================
--- pdfbox/branches/2.0/pdfbox/src/main/java/org/apache/pdfbox/text/PDFTextStripper.java (original)
+++ pdfbox/branches/2.0/pdfbox/src/main/java/org/apache/pdfbox/text/PDFTextStripper.java Wed Feb 5 16:37:12 2020
@@ -671,10 +671,10 @@ public class PDFTextStripper extends Leg
}
// test if our TextPosition starts after a new word would be expected to start
if (expectedStartOfNextWordX != EXPECTED_START_OF_NEXT_WORD_X_RESET_VALUE
- && expectedStartOfNextWordX < positionX &&
- // only bother adding a space if the last character was not a space
- lastPosition.getTextPosition().getUnicode() != null
- && !lastPosition.getTextPosition().getUnicode().endsWith(" "))
+ && expectedStartOfNextWordX < positionX
+ // only bother adding a word separator if the last character was not a word separator
+ && lastPosition.getTextPosition().getUnicode() != null
+ && !lastPosition.getTextPosition().getUnicode().endsWith(wordSeparator))
{
line.add(LineItem.getWordSeparator());
}
@@ -722,8 +722,8 @@ public class PDFTextStripper extends Leg
private boolean overlap(float y1, float height1, float y2, float height2)
{
- return within(y1, y2, .1f) || y2 <= y1 && y2 >= y1 - height1
- || y1 <= y2 && y1 >= y2 - height2;
+ return within(y1, y2, .1f) || (y2 <= y1 && y1 - height1 - y2 < -(height1 * 0.1f))
+ || (y1 <= y2 && y2 - height2 - y1 < -(height2 * 0.1f));
}
/**