You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pdfbox.apache.org by ja...@apache.org on 2014/03/10 07:07:39 UTC
svn commit: r1575836 - in /pdfbox/trunk:
examples/src/main/java/org/apache/pdfbox/examples/util/
pdfbox/src/main/java/org/apache/pdfbox/pdmodel/documentinterchange/markedcontent/
pdfbox/src/main/java/org/apache/pdfbox/rendering/ pdfbox/src/main/java/or...
Author: jahewson
Date: Mon Mar 10 06:07:38 2014
New Revision: 1575836
URL: http://svn.apache.org/r1575836
Log:
PDFBOX-1962: move text handling classes from 'util' to 'text' package
Added:
pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/text/
pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/text/ICU4JImpl.java
- copied, changed from r1575620, pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/util/ICU4JImpl.java
pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/text/PositionWrapper.java
- copied, changed from r1575620, pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/util/PositionWrapper.java
pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/text/TextNormalize.java
- copied, changed from r1575620, pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/util/TextNormalize.java
pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/text/TextPosition.java
- copied, changed from r1575620, pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/util/TextPosition.java
pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/text/TextPositionComparator.java
- copied, changed from r1575620, pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/util/TextPositionComparator.java
Removed:
pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/util/ICU4JImpl.java
pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/util/PositionWrapper.java
pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/util/TextNormalize.java
pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/util/TextPosition.java
pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/util/TextPositionComparator.java
Modified:
pdfbox/trunk/examples/src/main/java/org/apache/pdfbox/examples/util/PrintTextLocations.java
pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/documentinterchange/markedcontent/PDMarkedContent.java
pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/rendering/PageDrawer.java
pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/util/PDFMarkedContentExtractor.java
pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/util/PDFStreamEngine.java
pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/util/PDFTextStripper.java
pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/util/PDFTextStripperByArea.java
pdfbox/trunk/tools/src/main/java/org/apache/pdfbox/tools/PDFText2HTML.java
Modified: pdfbox/trunk/examples/src/main/java/org/apache/pdfbox/examples/util/PrintTextLocations.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/examples/src/main/java/org/apache/pdfbox/examples/util/PrintTextLocations.java?rev=1575836&r1=1575835&r2=1575836&view=diff
==============================================================================
--- pdfbox/trunk/examples/src/main/java/org/apache/pdfbox/examples/util/PrintTextLocations.java (original)
+++ pdfbox/trunk/examples/src/main/java/org/apache/pdfbox/examples/util/PrintTextLocations.java Mon Mar 10 06:07:38 2014
@@ -23,7 +23,7 @@ import org.apache.pdfbox.pdmodel.PDDocum
import org.apache.pdfbox.pdmodel.PDPage;
import org.apache.pdfbox.pdmodel.common.PDStream;
import org.apache.pdfbox.util.PDFTextStripper;
-import org.apache.pdfbox.util.TextPosition;
+import org.apache.pdfbox.text.TextPosition;
import java.io.IOException;
Modified: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/documentinterchange/markedcontent/PDMarkedContent.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/documentinterchange/markedcontent/PDMarkedContent.java?rev=1575836&r1=1575835&r2=1575836&view=diff
==============================================================================
--- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/documentinterchange/markedcontent/PDMarkedContent.java (original)
+++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/documentinterchange/markedcontent/PDMarkedContent.java Mon Mar 10 06:07:38 2014
@@ -23,7 +23,7 @@ import org.apache.pdfbox.cos.COSDictiona
import org.apache.pdfbox.cos.COSName;
import org.apache.pdfbox.pdmodel.documentinterchange.taggedpdf.PDArtifactMarkedContent;
import org.apache.pdfbox.pdmodel.graphics.PDXObject;
-import org.apache.pdfbox.util.TextPosition;
+import org.apache.pdfbox.text.TextPosition;
/**
* A marked content.
Modified: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/rendering/PageDrawer.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/rendering/PageDrawer.java?rev=1575836&r1=1575835&r2=1575836&view=diff
==============================================================================
--- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/rendering/PageDrawer.java (original)
+++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/rendering/PageDrawer.java Mon Mar 10 06:07:38 2014
@@ -77,7 +77,7 @@ import org.apache.pdfbox.pdmodel.graphic
import org.apache.pdfbox.util.Matrix;
import org.apache.pdfbox.util.PDFStreamEngine;
import org.apache.pdfbox.util.ResourceLoader;
-import org.apache.pdfbox.util.TextPosition;
+import org.apache.pdfbox.text.TextPosition;
/**
* This will paint a page in a PDF document to a graphics context.
Copied: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/text/ICU4JImpl.java (from r1575620, pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/util/ICU4JImpl.java)
URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/text/ICU4JImpl.java?p2=pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/text/ICU4JImpl.java&p1=pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/util/ICU4JImpl.java&r1=1575620&r2=1575836&rev=1575836&view=diff
==============================================================================
--- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/util/ICU4JImpl.java (original)
+++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/text/ICU4JImpl.java Mon Mar 10 06:07:38 2014
@@ -14,7 +14,7 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-package org.apache.pdfbox.util;
+package org.apache.pdfbox.text;
import com.ibm.icu.text.Bidi;
import com.ibm.icu.text.Normalizer;
@@ -49,7 +49,7 @@ public class ICU4JImpl
/**
* Takes a line of text in presentation order and converts it to logical order.
- * @see TextNormalize#makeLineLogicalOrder(String, boolean)
+ * @see org.apache.pdfbox.text.TextNormalize#makeLineLogicalOrder(String, boolean)
*
* @param str String to convert
* @param isRtlDominant RTL (right-to-left) will be the dominant text direction
@@ -69,7 +69,7 @@ public class ICU4JImpl
/**
* Normalize presentation forms of characters to the separate parts.
- * @see TextNormalize#normalizePres(String)
+ * @see org.apache.pdfbox.text.TextNormalize#normalizePres(String)
*
* @param str String to normalize
* @return Normalized form
Copied: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/text/PositionWrapper.java (from r1575620, pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/util/PositionWrapper.java)
URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/text/PositionWrapper.java?p2=pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/text/PositionWrapper.java&p1=pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/util/PositionWrapper.java&r1=1575620&r2=1575836&rev=1575836&view=diff
==============================================================================
--- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/util/PositionWrapper.java (original)
+++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/text/PositionWrapper.java Mon Mar 10 06:07:38 2014
@@ -14,8 +14,7 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-package org.apache.pdfbox.util;
-
+package org.apache.pdfbox.text;
/**
* wrapper of TextPosition that adds flags to track
@@ -28,7 +27,6 @@ package org.apache.pdfbox.util;
* it makes sense to put these flags in this separate class.
* </p>
* @author m.martinez@ll.mit.edu
- *
*/
public class PositionWrapper
{
Copied: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/text/TextNormalize.java (from r1575620, pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/util/TextNormalize.java)
URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/text/TextNormalize.java?p2=pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/text/TextNormalize.java&p1=pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/util/TextNormalize.java&r1=1575620&r2=1575836&rev=1575836&view=diff
==============================================================================
--- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/util/TextNormalize.java (original)
+++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/text/TextNormalize.java Mon Mar 10 06:07:38 2014
@@ -14,7 +14,7 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-package org.apache.pdfbox.util;
+package org.apache.pdfbox.text;
import java.util.HashMap;
Copied: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/text/TextPosition.java (from r1575620, pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/util/TextPosition.java)
URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/text/TextPosition.java?p2=pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/text/TextPosition.java&p1=pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/util/TextPosition.java&r1=1575620&r2=1575836&rev=1575836&view=diff
==============================================================================
--- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/util/TextPosition.java (original)
+++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/text/TextPosition.java Mon Mar 10 06:07:38 2014
@@ -14,10 +14,11 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-package org.apache.pdfbox.util;
+package org.apache.pdfbox.text;
import org.apache.pdfbox.pdmodel.PDPage;
import org.apache.pdfbox.pdmodel.font.PDFont;
+import org.apache.pdfbox.util.Matrix;
/**
* This represents a string and a position on the screen of those characters.
Copied: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/text/TextPositionComparator.java (from r1575620, pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/util/TextPositionComparator.java)
URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/text/TextPositionComparator.java?p2=pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/text/TextPositionComparator.java&p1=pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/util/TextPositionComparator.java&r1=1575620&r2=1575836&rev=1575836&view=diff
==============================================================================
--- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/util/TextPositionComparator.java (original)
+++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/text/TextPositionComparator.java Mon Mar 10 06:07:38 2014
@@ -14,7 +14,7 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-package org.apache.pdfbox.util;
+package org.apache.pdfbox.text;
import java.util.Comparator;
Modified: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/util/PDFMarkedContentExtractor.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/util/PDFMarkedContentExtractor.java?rev=1575836&r1=1575835&r2=1575836&view=diff
==============================================================================
--- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/util/PDFMarkedContentExtractor.java (original)
+++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/util/PDFMarkedContentExtractor.java Mon Mar 10 06:07:38 2014
@@ -28,6 +28,8 @@ import org.apache.pdfbox.cos.COSDictiona
import org.apache.pdfbox.cos.COSName;
import org.apache.pdfbox.pdmodel.documentinterchange.markedcontent.PDMarkedContent;
import org.apache.pdfbox.pdmodel.graphics.PDXObject;
+import org.apache.pdfbox.text.TextNormalize;
+import org.apache.pdfbox.text.TextPosition;
/**
* This is an stream engine to extract the marked content of a pdf.
Modified: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/util/PDFStreamEngine.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/util/PDFStreamEngine.java?rev=1575836&r1=1575835&r2=1575836&view=diff
==============================================================================
--- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/util/PDFStreamEngine.java (original)
+++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/util/PDFStreamEngine.java Mon Mar 10 06:07:38 2014
@@ -45,6 +45,7 @@ import org.apache.pdfbox.pdmodel.graphic
import org.apache.pdfbox.pdmodel.graphics.state.PDGraphicsState;
import org.apache.pdfbox.pdmodel.graphics.color.PDColorSpace;
import org.apache.pdfbox.pdmodel.graphics.PDXObject;
+import org.apache.pdfbox.text.TextPosition;
import org.apache.pdfbox.util.operator.OperatorProcessor;
import org.apache.pdfbox.util.operator.PDFOperator;
Modified: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/util/PDFTextStripper.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/util/PDFTextStripper.java?rev=1575836&r1=1575835&r2=1575836&view=diff
==============================================================================
--- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/util/PDFTextStripper.java (original)
+++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/util/PDFTextStripper.java Mon Mar 10 06:07:38 2014
@@ -46,6 +46,10 @@ import org.apache.pdfbox.pdmodel.common.
import org.apache.pdfbox.pdmodel.common.PDStream;
import org.apache.pdfbox.pdmodel.interactive.documentnavigation.outline.PDOutlineItem;
import org.apache.pdfbox.pdmodel.interactive.pagenavigation.PDThreadBead;
+import org.apache.pdfbox.text.PositionWrapper;
+import org.apache.pdfbox.text.TextNormalize;
+import org.apache.pdfbox.text.TextPosition;
+import org.apache.pdfbox.text.TextPositionComparator;
/**
Modified: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/util/PDFTextStripperByArea.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/util/PDFTextStripperByArea.java?rev=1575836&r1=1575835&r2=1575836&view=diff
==============================================================================
--- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/util/PDFTextStripperByArea.java (original)
+++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/util/PDFTextStripperByArea.java Mon Mar 10 06:07:38 2014
@@ -30,6 +30,7 @@ import java.util.Vector;
import org.apache.pdfbox.cos.COSStream;
import org.apache.pdfbox.pdmodel.PDPage;
import org.apache.pdfbox.pdmodel.common.PDStream;
+import org.apache.pdfbox.text.TextPosition;
/**
* This will extract text from a specified region in the PDF.
@@ -41,7 +42,7 @@ public class PDFTextStripperByArea exten
{
private List<String> regions = new ArrayList<String>();
private Map<String,Rectangle2D> regionArea = new HashMap<String,Rectangle2D>();
- private Map<String,Vector<ArrayList<TextPosition>>> regionCharacterList =
+ private Map<String,Vector<ArrayList<TextPosition>>> regionCharacterList =
new HashMap<String,Vector<ArrayList<TextPosition>>>();
private Map<String,StringWriter> regionText = new HashMap<String,StringWriter>();
Modified: pdfbox/trunk/tools/src/main/java/org/apache/pdfbox/tools/PDFText2HTML.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/tools/src/main/java/org/apache/pdfbox/tools/PDFText2HTML.java?rev=1575836&r1=1575835&r2=1575836&view=diff
==============================================================================
--- pdfbox/trunk/tools/src/main/java/org/apache/pdfbox/tools/PDFText2HTML.java (original)
+++ pdfbox/trunk/tools/src/main/java/org/apache/pdfbox/tools/PDFText2HTML.java Mon Mar 10 06:07:38 2014
@@ -26,7 +26,7 @@ import java.util.Set;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.pdmodel.font.PDFontDescriptor;
import org.apache.pdfbox.util.PDFTextStripper;
-import org.apache.pdfbox.util.TextPosition;
+import org.apache.pdfbox.text.TextPosition;
/**
* Wrap stripped text in simple HTML, trying to form HTML paragraphs. Paragraphs