You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pdfbox.apache.org by le...@apache.org on 2020/02/05 16:37:12 UTC

svn commit: r1873653 - /pdfbox/branches/2.0/pdfbox/src/main/java/org/apache/pdfbox/text/PDFTextStripper.java

Author: lehmi
Date: Wed Feb  5 16:37:12 2020
New Revision: 1873653

URL: http://svn.apache.org/viewvc?rev=1873653&view=rev
Log:
PDFBOX-4760: don't replace word separator as proposed by John Gesimondo 

Modified:
    pdfbox/branches/2.0/pdfbox/src/main/java/org/apache/pdfbox/text/PDFTextStripper.java

Modified: pdfbox/branches/2.0/pdfbox/src/main/java/org/apache/pdfbox/text/PDFTextStripper.java
URL: http://svn.apache.org/viewvc/pdfbox/branches/2.0/pdfbox/src/main/java/org/apache/pdfbox/text/PDFTextStripper.java?rev=1873653&r1=1873652&r2=1873653&view=diff
==============================================================================
--- pdfbox/branches/2.0/pdfbox/src/main/java/org/apache/pdfbox/text/PDFTextStripper.java (original)
+++ pdfbox/branches/2.0/pdfbox/src/main/java/org/apache/pdfbox/text/PDFTextStripper.java Wed Feb  5 16:37:12 2020
@@ -671,10 +671,10 @@ public class PDFTextStripper extends Leg
                     }
                     // test if our TextPosition starts after a new word would be expected to start
                     if (expectedStartOfNextWordX != EXPECTED_START_OF_NEXT_WORD_X_RESET_VALUE
-                            && expectedStartOfNextWordX < positionX &&
-                            // only bother adding a space if the last character was not a space
-                            lastPosition.getTextPosition().getUnicode() != null
-                            && !lastPosition.getTextPosition().getUnicode().endsWith(" "))
+                            && expectedStartOfNextWordX < positionX
+                            // only bother adding a word separator if the last character was not a word separator
+                            && lastPosition.getTextPosition().getUnicode() != null
+                            && !lastPosition.getTextPosition().getUnicode().endsWith(wordSeparator))
                     {
                         line.add(LineItem.getWordSeparator());
                     }
@@ -722,8 +722,8 @@ public class PDFTextStripper extends Leg
 
     private boolean overlap(float y1, float height1, float y2, float height2)
     {
-        return within(y1, y2, .1f) || y2 <= y1 && y2 >= y1 - height1
-                || y1 <= y2 && y1 >= y2 - height2;
+        return within(y1, y2, .1f) || (y2 <= y1 && y1 - height1 - y2 < -(height1 * 0.1f))
+                || (y1 <= y2 && y2 - height2 - y1 < -(height2 * 0.1f));
     }
 
     /**