You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pdfbox.apache.org by ti...@apache.org on 2019/07/25 17:43:19 UTC
svn commit: r1863757 -
/pdfbox/branches/2.0/pdfbox/src/main/java/org/apache/pdfbox/text/TextPosition.java
Author: tilman
Date: Thu Jul 25 17:43:19 2019
New Revision: 1863757
URL: http://svn.apache.org/viewvc?rev=1863757&view=rev
Log:
PDFBOX-4597: improve javadoc
Modified:
pdfbox/branches/2.0/pdfbox/src/main/java/org/apache/pdfbox/text/TextPosition.java
Modified: pdfbox/branches/2.0/pdfbox/src/main/java/org/apache/pdfbox/text/TextPosition.java
URL: http://svn.apache.org/viewvc/pdfbox/branches/2.0/pdfbox/src/main/java/org/apache/pdfbox/text/TextPosition.java?rev=1863757&r1=1863756&r2=1863757&view=diff
==============================================================================
--- pdfbox/branches/2.0/pdfbox/src/main/java/org/apache/pdfbox/text/TextPosition.java (original)
+++ pdfbox/branches/2.0/pdfbox/src/main/java/org/apache/pdfbox/text/TextPosition.java Thu Jul 25 17:43:19 2019
@@ -195,8 +195,16 @@ public final class TextPosition
}
/**
- * Return the direction/orientation of the string in this object based on its text matrix.
- * @return The direction of the text (0, 90, 180, or 270)
+ * Return the direction/orientation of the string in this object based on its text matrix. Only
+ * angles of 0, 90, 180, or 270 are supported. To get other angles, use this code:
+ * <pre>
+ * TextPosition text = ...
+ * Matrix m = text.getTextMatrix().clone();
+ * m.concatenate(text.getFont().getFontMatrix());
+ * int angle = (int) Math.round(Math.toDegrees(Math.atan2(m.getShearY(), m.getScaleY())));
+ * </pre>
+ *
+ * @return The direction of the text (0, 90, 180, or 270).
*/
public float getDir()
{
@@ -269,7 +277,12 @@ public final class TextPosition
/**
* This will get the page rotation adjusted x position of the character.
- * This is adjusted based on page rotation so that the upper left is 0,0.
+ * This is adjusted based on page rotation so that the upper left is 0,0 which is
+ * unlike PDF coordinates, which start at the bottom left. See also
+ * <a href="https://stackoverflow.com/questions/57067372/">this answer by Michael Klink</a> for
+ * further details and
+ * <a href="https://issues.apache.org/jira/browse/PDFBOX-4597">PDFBOX-4597</a> for a sample
+ * file.
*
* @return The x coordinate of the character.
*/
@@ -282,6 +295,13 @@ public final class TextPosition
* This will get the text direction adjusted x position of the character.
* This is adjusted based on text direction so that the first character
* in that direction is in the upper left at 0,0.
+ * This method ignores the page rotation but takes the text rotation (see
+ * {@link #getDir() getDir()}) and adjusts the coordinates to awt. This is useful when doing
+ * text extraction, to compare the glyph positions when imagining these to be horizontal. See also
+ * <a href="https://stackoverflow.com/questions/57067372/">this answer by Michael Klink</a> for
+ * further details and
+ * <a href="https://issues.apache.org/jira/browse/PDFBOX-4597">PDFBOX-4597</a> for a sample
+ * file.
*
* @return The x coordinate of the text.
*/
@@ -319,8 +339,13 @@ public final class TextPosition
}
/**
- * This will get the y position of the text, adjusted so that 0,0 is upper left and it is
- * adjusted based on the page rotation.
+ * This will get the page rotation adjusted x position of the character.
+ * This is adjusted based on page rotation so that the upper left is 0,0 which is
+ * unlike PDF coordinates, which start at the bottom left. See also
+ * <a href="https://stackoverflow.com/questions/57067372/">this answer by Michael Klink</a> for
+ * further details and
+ * <a href="https://issues.apache.org/jira/browse/PDFBOX-4597">PDFBOX-4597</a> for a sample
+ * file.
*
* @return The adjusted y coordinate of the character.
*/
@@ -332,6 +357,13 @@ public final class TextPosition
/**
* This will get the y position of the text, adjusted so that 0,0 is upper left and it is
* adjusted based on the text direction.
+ * This method ignores the page rotation but takes the
+ * text rotation and adjusts the coordinates to awt. This is useful when doing text extraction,
+ * to compare the glyph positions when imagining these to be horizontal. See also
+ * <a href="https://stackoverflow.com/questions/57067372/">this answer by Michael Klink</a> for
+ * further details and
+ * <a href="https://issues.apache.org/jira/browse/PDFBOX-4597">PDFBOX-4597</a> for a sample
+ * file.
*
* @return The adjusted y coordinate of the character.
*/