You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pdfbox.apache.org by ja...@apache.org on 2014/03/10 07:07:39 UTC

svn commit: r1575836 - in /pdfbox/trunk: examples/src/main/java/org/apache/pdfbox/examples/util/ pdfbox/src/main/java/org/apache/pdfbox/pdmodel/documentinterchange/markedcontent/ pdfbox/src/main/java/org/apache/pdfbox/rendering/ pdfbox/src/main/java/or...

Author: jahewson
Date: Mon Mar 10 06:07:38 2014
New Revision: 1575836

URL: http://svn.apache.org/r1575836
Log:
PDFBOX-1962: move text handling classes from 'util' to 'text' package

Added:
    pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/text/
    pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/text/ICU4JImpl.java
      - copied, changed from r1575620, pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/util/ICU4JImpl.java
    pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/text/PositionWrapper.java
      - copied, changed from r1575620, pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/util/PositionWrapper.java
    pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/text/TextNormalize.java
      - copied, changed from r1575620, pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/util/TextNormalize.java
    pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/text/TextPosition.java
      - copied, changed from r1575620, pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/util/TextPosition.java
    pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/text/TextPositionComparator.java
      - copied, changed from r1575620, pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/util/TextPositionComparator.java
Removed:
    pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/util/ICU4JImpl.java
    pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/util/PositionWrapper.java
    pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/util/TextNormalize.java
    pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/util/TextPosition.java
    pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/util/TextPositionComparator.java
Modified:
    pdfbox/trunk/examples/src/main/java/org/apache/pdfbox/examples/util/PrintTextLocations.java
    pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/documentinterchange/markedcontent/PDMarkedContent.java
    pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/rendering/PageDrawer.java
    pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/util/PDFMarkedContentExtractor.java
    pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/util/PDFStreamEngine.java
    pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/util/PDFTextStripper.java
    pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/util/PDFTextStripperByArea.java
    pdfbox/trunk/tools/src/main/java/org/apache/pdfbox/tools/PDFText2HTML.java

Modified: pdfbox/trunk/examples/src/main/java/org/apache/pdfbox/examples/util/PrintTextLocations.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/examples/src/main/java/org/apache/pdfbox/examples/util/PrintTextLocations.java?rev=1575836&r1=1575835&r2=1575836&view=diff
==============================================================================
--- pdfbox/trunk/examples/src/main/java/org/apache/pdfbox/examples/util/PrintTextLocations.java (original)
+++ pdfbox/trunk/examples/src/main/java/org/apache/pdfbox/examples/util/PrintTextLocations.java Mon Mar 10 06:07:38 2014
@@ -23,7 +23,7 @@ import org.apache.pdfbox.pdmodel.PDDocum
 import org.apache.pdfbox.pdmodel.PDPage;
 import org.apache.pdfbox.pdmodel.common.PDStream;
 import org.apache.pdfbox.util.PDFTextStripper;
-import org.apache.pdfbox.util.TextPosition;
+import org.apache.pdfbox.text.TextPosition;
 
 import java.io.IOException;
 

Modified: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/documentinterchange/markedcontent/PDMarkedContent.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/documentinterchange/markedcontent/PDMarkedContent.java?rev=1575836&r1=1575835&r2=1575836&view=diff
==============================================================================
--- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/documentinterchange/markedcontent/PDMarkedContent.java (original)
+++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/documentinterchange/markedcontent/PDMarkedContent.java Mon Mar 10 06:07:38 2014
@@ -23,7 +23,7 @@ import org.apache.pdfbox.cos.COSDictiona
 import org.apache.pdfbox.cos.COSName;
 import org.apache.pdfbox.pdmodel.documentinterchange.taggedpdf.PDArtifactMarkedContent;
 import org.apache.pdfbox.pdmodel.graphics.PDXObject;
-import org.apache.pdfbox.util.TextPosition;
+import org.apache.pdfbox.text.TextPosition;
 
 /**
  * A marked content.

Modified: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/rendering/PageDrawer.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/rendering/PageDrawer.java?rev=1575836&r1=1575835&r2=1575836&view=diff
==============================================================================
--- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/rendering/PageDrawer.java (original)
+++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/rendering/PageDrawer.java Mon Mar 10 06:07:38 2014
@@ -77,7 +77,7 @@ import org.apache.pdfbox.pdmodel.graphic
 import org.apache.pdfbox.util.Matrix;
 import org.apache.pdfbox.util.PDFStreamEngine;
 import org.apache.pdfbox.util.ResourceLoader;
-import org.apache.pdfbox.util.TextPosition;
+import org.apache.pdfbox.text.TextPosition;
 
 /**
  * This will paint a page in a PDF document to a graphics context.

Copied: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/text/ICU4JImpl.java (from r1575620, pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/util/ICU4JImpl.java)
URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/text/ICU4JImpl.java?p2=pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/text/ICU4JImpl.java&p1=pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/util/ICU4JImpl.java&r1=1575620&r2=1575836&rev=1575836&view=diff
==============================================================================
--- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/util/ICU4JImpl.java (original)
+++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/text/ICU4JImpl.java Mon Mar 10 06:07:38 2014
@@ -14,7 +14,7 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-package org.apache.pdfbox.util;
+package org.apache.pdfbox.text;
 
 import com.ibm.icu.text.Bidi;
 import com.ibm.icu.text.Normalizer;
@@ -49,7 +49,7 @@ public class ICU4JImpl 
 
     /**
      * Takes a line of text in presentation order and converts it to logical order.
-     * @see TextNormalize#makeLineLogicalOrder(String, boolean)     
+     * @see org.apache.pdfbox.text.TextNormalize#makeLineLogicalOrder(String, boolean)
      *  
      * @param str String to convert
      * @param isRtlDominant RTL (right-to-left) will be the dominant text direction
@@ -69,7 +69,7 @@ public class ICU4JImpl 
 
     /**
      * Normalize presentation forms of characters to the separate parts. 
-     * @see TextNormalize#normalizePres(String)
+     * @see org.apache.pdfbox.text.TextNormalize#normalizePres(String)
      * 
      * @param str String to normalize
      * @return Normalized form

Copied: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/text/PositionWrapper.java (from r1575620, pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/util/PositionWrapper.java)
URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/text/PositionWrapper.java?p2=pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/text/PositionWrapper.java&p1=pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/util/PositionWrapper.java&r1=1575620&r2=1575836&rev=1575836&view=diff
==============================================================================
--- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/util/PositionWrapper.java (original)
+++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/text/PositionWrapper.java Mon Mar 10 06:07:38 2014
@@ -14,8 +14,7 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-package org.apache.pdfbox.util;
-
+package org.apache.pdfbox.text;
 
 /**
  * wrapper of TextPosition that adds flags to track
@@ -28,7 +27,6 @@ package org.apache.pdfbox.util;
  * it makes sense to put these flags in this separate class.
  * </p>
  * @author m.martinez@ll.mit.edu
- *
  */
 public class PositionWrapper
 {

Copied: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/text/TextNormalize.java (from r1575620, pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/util/TextNormalize.java)
URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/text/TextNormalize.java?p2=pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/text/TextNormalize.java&p1=pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/util/TextNormalize.java&r1=1575620&r2=1575836&rev=1575836&view=diff
==============================================================================
--- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/util/TextNormalize.java (original)
+++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/text/TextNormalize.java Mon Mar 10 06:07:38 2014
@@ -14,7 +14,7 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-package org.apache.pdfbox.util;
+package org.apache.pdfbox.text;
 
 import java.util.HashMap;
 

Copied: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/text/TextPosition.java (from r1575620, pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/util/TextPosition.java)
URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/text/TextPosition.java?p2=pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/text/TextPosition.java&p1=pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/util/TextPosition.java&r1=1575620&r2=1575836&rev=1575836&view=diff
==============================================================================
--- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/util/TextPosition.java (original)
+++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/text/TextPosition.java Mon Mar 10 06:07:38 2014
@@ -14,10 +14,11 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-package org.apache.pdfbox.util;
+package org.apache.pdfbox.text;
 
 import org.apache.pdfbox.pdmodel.PDPage;
 import org.apache.pdfbox.pdmodel.font.PDFont;
+import org.apache.pdfbox.util.Matrix;
 
 /**
  * This represents a string and a position on the screen of those characters.

Copied: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/text/TextPositionComparator.java (from r1575620, pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/util/TextPositionComparator.java)
URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/text/TextPositionComparator.java?p2=pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/text/TextPositionComparator.java&p1=pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/util/TextPositionComparator.java&r1=1575620&r2=1575836&rev=1575836&view=diff
==============================================================================
--- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/util/TextPositionComparator.java (original)
+++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/text/TextPositionComparator.java Mon Mar 10 06:07:38 2014
@@ -14,7 +14,7 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-package org.apache.pdfbox.util;
+package org.apache.pdfbox.text;
 
 import java.util.Comparator;
 

Modified: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/util/PDFMarkedContentExtractor.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/util/PDFMarkedContentExtractor.java?rev=1575836&r1=1575835&r2=1575836&view=diff
==============================================================================
--- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/util/PDFMarkedContentExtractor.java (original)
+++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/util/PDFMarkedContentExtractor.java Mon Mar 10 06:07:38 2014
@@ -28,6 +28,8 @@ import org.apache.pdfbox.cos.COSDictiona
 import org.apache.pdfbox.cos.COSName;
 import org.apache.pdfbox.pdmodel.documentinterchange.markedcontent.PDMarkedContent;
 import org.apache.pdfbox.pdmodel.graphics.PDXObject;
+import org.apache.pdfbox.text.TextNormalize;
+import org.apache.pdfbox.text.TextPosition;
 
 /**
  * This is an stream engine to extract the marked content of a pdf.

Modified: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/util/PDFStreamEngine.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/util/PDFStreamEngine.java?rev=1575836&r1=1575835&r2=1575836&view=diff
==============================================================================
--- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/util/PDFStreamEngine.java (original)
+++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/util/PDFStreamEngine.java Mon Mar 10 06:07:38 2014
@@ -45,6 +45,7 @@ import org.apache.pdfbox.pdmodel.graphic
 import org.apache.pdfbox.pdmodel.graphics.state.PDGraphicsState;
 import org.apache.pdfbox.pdmodel.graphics.color.PDColorSpace;
 import org.apache.pdfbox.pdmodel.graphics.PDXObject;
+import org.apache.pdfbox.text.TextPosition;
 import org.apache.pdfbox.util.operator.OperatorProcessor;
 import org.apache.pdfbox.util.operator.PDFOperator;
 

Modified: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/util/PDFTextStripper.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/util/PDFTextStripper.java?rev=1575836&r1=1575835&r2=1575836&view=diff
==============================================================================
--- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/util/PDFTextStripper.java (original)
+++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/util/PDFTextStripper.java Mon Mar 10 06:07:38 2014
@@ -46,6 +46,10 @@ import org.apache.pdfbox.pdmodel.common.
 import org.apache.pdfbox.pdmodel.common.PDStream;
 import org.apache.pdfbox.pdmodel.interactive.documentnavigation.outline.PDOutlineItem;
 import org.apache.pdfbox.pdmodel.interactive.pagenavigation.PDThreadBead;
+import org.apache.pdfbox.text.PositionWrapper;
+import org.apache.pdfbox.text.TextNormalize;
+import org.apache.pdfbox.text.TextPosition;
+import org.apache.pdfbox.text.TextPositionComparator;
 
 
 /**

Modified: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/util/PDFTextStripperByArea.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/util/PDFTextStripperByArea.java?rev=1575836&r1=1575835&r2=1575836&view=diff
==============================================================================
--- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/util/PDFTextStripperByArea.java (original)
+++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/util/PDFTextStripperByArea.java Mon Mar 10 06:07:38 2014
@@ -30,6 +30,7 @@ import java.util.Vector;
 import org.apache.pdfbox.cos.COSStream;
 import org.apache.pdfbox.pdmodel.PDPage;
 import org.apache.pdfbox.pdmodel.common.PDStream;
+import org.apache.pdfbox.text.TextPosition;
 
 /**
  * This will extract text from a specified region in the PDF.
@@ -41,7 +42,7 @@ public class PDFTextStripperByArea exten
 {
     private List<String> regions = new ArrayList<String>();
     private Map<String,Rectangle2D> regionArea = new HashMap<String,Rectangle2D>();
-    private Map<String,Vector<ArrayList<TextPosition>>> regionCharacterList = 
+    private Map<String,Vector<ArrayList<TextPosition>>> regionCharacterList =
         new HashMap<String,Vector<ArrayList<TextPosition>>>();
     private Map<String,StringWriter> regionText = new HashMap<String,StringWriter>();
 

Modified: pdfbox/trunk/tools/src/main/java/org/apache/pdfbox/tools/PDFText2HTML.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/tools/src/main/java/org/apache/pdfbox/tools/PDFText2HTML.java?rev=1575836&r1=1575835&r2=1575836&view=diff
==============================================================================
--- pdfbox/trunk/tools/src/main/java/org/apache/pdfbox/tools/PDFText2HTML.java (original)
+++ pdfbox/trunk/tools/src/main/java/org/apache/pdfbox/tools/PDFText2HTML.java Mon Mar 10 06:07:38 2014
@@ -26,7 +26,7 @@ import java.util.Set;
 import org.apache.pdfbox.pdmodel.PDDocument;
 import org.apache.pdfbox.pdmodel.font.PDFontDescriptor;
 import org.apache.pdfbox.util.PDFTextStripper;
-import org.apache.pdfbox.util.TextPosition;
+import org.apache.pdfbox.text.TextPosition;
 
 /**
  * Wrap stripped text in simple HTML, trying to form HTML paragraphs. Paragraphs