You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pdfbox.apache.org by le...@apache.org on 2010/12/12 15:03:22 UTC

svn commit: r1044823 - in /pdfbox/trunk/pdfbox/src: main/java/org/apache/pdfbox/util/ main/java/org/apache/pdfbox/util/operator/ test/java/org/apache/pdfbox/ test/java/org/apache/pdfbox/util/

Author: lehmi
Date: Sun Dec 12 14:03:21 2010
New Revision: 1044823

URL: http://svn.apache.org/viewvc?rev=1044823&view=rev
Log:
PDFBOX-893: added some improvements concerning text extraction and matrices as suggested by Neil McErlean

Added:
    pdfbox/trunk/pdfbox/src/test/java/org/apache/pdfbox/util/TestMatrix.java
Modified:
    pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/util/Matrix.java
    pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/util/PDFStreamEngine.java
    pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/util/TextPosition.java
    pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/util/operator/ShowTextGlyph.java
    pdfbox/trunk/pdfbox/src/test/java/org/apache/pdfbox/TestAll.java

Modified: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/util/Matrix.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/util/Matrix.java?rev=1044823&r1=1044822&r2=1044823&view=diff
==============================================================================
--- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/util/Matrix.java (original)
+++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/util/Matrix.java Sun Dec 12 14:03:21 2010
@@ -26,19 +26,31 @@ import java.awt.geom.AffineTransform;
  */
 public class Matrix implements Cloneable
 {
-    private float[] single =
+    static final float[] default_single =
     {
         1,0,0,
         0,1,0,
         0,0,1
     };
 
+    private float[] single;
+
     /**
      * Constructor.
      */
     public Matrix()
     {
-        //default constructor
+    	single = new float[default_single.length];
+    	reset();
+    }
+    
+    /**
+     * This method resets the numbers in this Matrix to the original values, which are
+     * the values that a newly constructed Matrix would have.
+     */
+    public void reset()
+    {
+    	System.arraycopy(default_single, 0, single, 0, default_single.length);
     }
 
     /**
@@ -144,22 +156,69 @@ public class Matrix implements Cloneable
      */
     public Matrix multiply( Matrix b )
     {
-        Matrix result = new Matrix();
+        return this.multiply(b, new Matrix());
+    }
 
-        if (b != null && b.single != null) 
+    /**
+     * This method multiplies this Matrix with the specified other Matrix, storing the product in the specified
+     * result Matrix. By reusing Matrix instances like this, multiplication chains can be executed without having
+     * to create many temporary Matrix objects.
+     * <p/>
+     * It is allowed to have (other == this) or (result == this) or indeed (other == result) but if this is done,
+     * the backing float[] matrix values may be copied in order to ensure a correct product.
+     * 
+     * @param other the second operand Matrix in the multiplication
+     * @param result the Matrix instance into which the result should be stored. If result is null, a new Matrix
+     *               instance is created.
+     * @return the product of the two matrices.
+     */
+    public Matrix multiply( Matrix other, Matrix result )
+    {
+    	if (result == null)
+    	{
+    		result = new Matrix();
+    	}
+    	
+        if (other != null && other.single != null) 
         {
-            float[] bMatrix = b.single;
-            float[] resultMatrix = result.single;
-            resultMatrix[0] = single[0] * bMatrix[0] + single[1] * bMatrix[3] + single[2] * bMatrix[6];
-            resultMatrix[1] = single[0] * bMatrix[1] + single[1] * bMatrix[4] + single[2] * bMatrix[7];
-            resultMatrix[2] = single[0] * bMatrix[2] + single[1] * bMatrix[5] + single[2] * bMatrix[8];
-            resultMatrix[3] = single[3] * bMatrix[0] + single[4] * bMatrix[3] + single[5] * bMatrix[6];
-            resultMatrix[4] = single[3] * bMatrix[1] + single[4] * bMatrix[4] + single[5] * bMatrix[7];
-            resultMatrix[5] = single[3] * bMatrix[2] + single[4] * bMatrix[5] + single[5] * bMatrix[8];
-            resultMatrix[6] = single[6] * bMatrix[0] + single[7] * bMatrix[3] + single[8] * bMatrix[6];
-            resultMatrix[7] = single[6] * bMatrix[1] + single[7] * bMatrix[4] + single[8] * bMatrix[7];
-            resultMatrix[8] = single[6] * bMatrix[2] + single[7] * bMatrix[5] + single[8] * bMatrix[8];
+        	// the operands
+        	float[] thisOperand = this.single;
+        	float[] otherOperand = other.single;
+        	
+        	// We're multiplying 2 sets of floats together to produce a third, but we allow
+        	// any of these float[] instances to be the same objects.
+        	// There is the possibility then to overwrite one of the operands with result values
+        	// and therefore corrupt the result.
+        	
+        	// If either of these operands are the same float[] instance as the result, then
+        	// they need to be copied.
+        	
+        	if (this == result)
+        	{
+        		final float[] thisOrigVals = new float[this.single.length];
+        		System.arraycopy(this.single, 0, thisOrigVals, 0, this.single.length);
+        		
+        		thisOperand = thisOrigVals;
+        	}
+        	if (other == result)
+        	{
+        		final float[] otherOrigVals = new float[other.single.length];
+        		System.arraycopy(other.single, 0, otherOrigVals, 0, other.single.length);
+        		
+        		otherOperand = otherOrigVals;
+        	}
+        	
+            result.single[0] = thisOperand[0] * otherOperand[0] + thisOperand[1] * otherOperand[3] + thisOperand[2] * otherOperand[6];
+            result.single[1] = thisOperand[0] * otherOperand[1] + thisOperand[1] * otherOperand[4] + thisOperand[2] * otherOperand[7];
+            result.single[2] = thisOperand[0] * otherOperand[2] + thisOperand[1] * otherOperand[5] + thisOperand[2] * otherOperand[8];
+            result.single[3] = thisOperand[3] * otherOperand[0] + thisOperand[4] * otherOperand[3] + thisOperand[5] * otherOperand[6];
+            result.single[4] = thisOperand[3] * otherOperand[1] + thisOperand[4] * otherOperand[4] + thisOperand[5] * otherOperand[7];
+            result.single[5] = thisOperand[3] * otherOperand[2] + thisOperand[4] * otherOperand[5] + thisOperand[5] * otherOperand[8];
+            result.single[6] = thisOperand[6] * otherOperand[0] + thisOperand[7] * otherOperand[3] + thisOperand[8] * otherOperand[6];
+            result.single[7] = thisOperand[6] * otherOperand[1] + thisOperand[7] * otherOperand[4] + thisOperand[8] * otherOperand[7];
+            result.single[8] = thisOperand[6] * otherOperand[2] + thisOperand[7] * otherOperand[5] + thisOperand[8] * otherOperand[8];
         }
+        
         return result;
     }
 

Modified: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/util/PDFStreamEngine.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/util/PDFStreamEngine.java?rev=1044823&r1=1044822&r2=1044823&view=diff
==============================================================================
--- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/util/PDFStreamEngine.java (original)
+++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/util/PDFStreamEngine.java Sun Dec 12 14:03:21 2010
@@ -381,6 +381,11 @@ public class PDFStreamEngine
         float pageWidth = page.findMediaBox().getWidth();
 
         Matrix ctm = getGraphicsState().getCurrentTransformationMatrix();
+        Matrix textXctm = new Matrix();
+        Matrix textMatrixEnd = new Matrix();
+        Matrix td = new Matrix();
+        Matrix tempMatrix = new Matrix();
+
         int codeLength = 1;
         for( int i=0; i<string.length; i+=codeLength)
         {
@@ -434,14 +439,17 @@ public class PDFStreamEngine
             {
                 spacingText += wordSpacingText;
             }
+            textXctm = textMatrix.multiply(ctm, textXctm);
             // Convert textMatrix to display units
-            Matrix textMatrixStart = textStateParameters.multiply(textMatrix).multiply(ctm);
+            // We need to instantiate a new Matrix instance here as it is passed to the TextPosition constructor below.
+            Matrix textMatrixStart = textStateParameters.multiply(textXctm);
             
             // TODO : tx should be set for horizontal text and ty for vertical text
             // which seems to be specified in the font (not the direction in the matrix).
             float tx = ((characterHorizontalDisplacementText)*fontSizeText)*horizontalScalingText;
             float ty = 0;
-            Matrix td = new Matrix();
+            // reset the matrix instead of creating a new one
+            td.reset();
             td.setValue( 2, 0, tx );
             td.setValue( 2, 1, ty );
 
@@ -450,16 +458,20 @@ public class PDFStreamEngine
             // textMatrixEnd contains the coordinates of the end of the last glyph without 
             // taking characterSpacingText and spacintText into account, otherwise it'll be
             // impossible to detect new words within text extraction
-            Matrix textMatrixEnd = textStateParameters.multiply(td).multiply(textMatrix).multiply(ctm);
+            tempMatrix = textStateParameters.multiply(td, tempMatrix);
+            textMatrixEnd = tempMatrix.multiply(textXctm, textMatrixEnd);
+            final float endXPosition = textMatrixEnd.getXPosition();
+            final float endYPosition = textMatrixEnd.getYPosition();
 
             // add some spacing to the text matrix (see comment above)
             tx = ((characterHorizontalDisplacementText)*fontSizeText+characterSpacingText+spacingText)*horizontalScalingText;
             td.setValue( 2, 0, tx );
-            textMatrix = td.multiply( textMatrix );
+            textMatrix = td.multiply(textMatrix, textMatrix );
             
             // determine the width of this character
             // XXX: Note that if we handled vertical text, we should be using Y here
-            float widthText = textMatrixEnd.getXPosition() - textMatrixStart.getXPosition();
+            float startXPosition = textMatrixStart.getXPosition();
+            float widthText = endXPosition - startXPosition;
 
             //there are several cases where one character code will
             //output multiple characters.  For example "fi" or a
@@ -485,7 +497,8 @@ public class PDFStreamEngine
                             pageWidth,
                             pageHeight,
                             textMatrixStart,
-                            textMatrixEnd,
+                            endXPosition,
+                            endYPosition,
                             totalVerticalDisplacementDisp,
                             widthText,
                             spaceWidthDisp,

Modified: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/util/TextPosition.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/util/TextPosition.java?rev=1044823&r1=1044822&r2=1044823&view=diff
==============================================================================
--- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/util/TextPosition.java (original)
+++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/util/TextPosition.java Sun Dec 12 14:03:21 2010
@@ -127,6 +127,8 @@ public class TextPosition
      * @param currentFont The current for for this text position.
      * @param fontSizeValue The new font size.
      * @param fontSizeInPt The font size in pt units.
+     * 
+     * @deprecated Use {@link TextPosition(int, float, float, Matrix, float, float, float, float, float, String, PDFont, float, int)} instead.
      */
     public TextPosition(
             int pageRotation,
@@ -143,10 +145,48 @@ public class TextPosition
             int fontSizeInPt
     )
     {
+    	this(pageRotation, pageWidth, pageHeight, textPositionSt,
+    			textPositionEnd.getXPosition(), textPositionEnd.getYPosition(),
+    			maxFontH, individualWidth, spaceWidth, string, currentFont, fontSizeValue, fontSizeInPt);
+    }
+
+    /**
+     * Constructor.
+     *
+     * @param pageRotation rotation of the page that the text is located in
+     * @param pageWidth rotation of the page that the text is located in
+     * @param pageHeight rotation of the page that the text is located in
+     * @param textPositionSt TextMatrix for start of text (in display units)
+     * @param endX
+     * @param endY
+     * @param maxFontH Maximum height of text (in display units)
+     * @param individualWidth The width of the given character/string. (in ? units)
+     * @param spaceWidth The width of the space character. (in display units)
+     * @param string The character to be displayed.
+     * @param currentFont The current for for this text position.
+     * @param fontSizeValue The new font size.
+     * @param fontSizeInPt The font size in pt units.
+     */
+    public TextPosition(
+            int pageRotation,
+            float pageWidth,
+            float pageHeight,
+            Matrix textPositionSt,
+            float endX,
+            float endY,
+            float maxFontH,
+            float individualWidth,
+            float spaceWidth,
+            String string,
+            PDFont currentFont,
+            float fontSizeValue,
+            int fontSizeInPt
+    )
+    {
         this.textPos = textPositionSt;
 
-        this.endX = textPositionEnd.getXPosition();
-        this.endY = textPositionEnd.getYPosition();
+        this.endX = endX;
+        this.endY = endY;
 
         this.rot = pageRotation;
         // make sure it is 0 to 270 and no negative numbers
@@ -166,6 +206,7 @@ public class TextPosition
         this.fontSize = fontSizeValue;
         this.fontSizePt = fontSizeInPt;
     }
+    
     /**
      * Return the string of characters stored in this object.
      *

Modified: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/util/operator/ShowTextGlyph.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/util/operator/ShowTextGlyph.java?rev=1044823&r1=1044822&r2=1044823&view=diff
==============================================================================
--- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/util/operator/ShowTextGlyph.java (original)
+++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/util/operator/ShowTextGlyph.java Sun Dec 12 14:03:21 2010
@@ -55,7 +55,7 @@ public class ShowTextGlyph extends Opera
                 adjustment=-(adjustment/1000)*horizontalScaling*fontsize;
                 // TODO vertical writing mode
                 adjMatrix.setValue( 2, 0, adjustment );
-                context.setTextMatrix( adjMatrix.multiply(context.getTextMatrix()) );
+                context.setTextMatrix( adjMatrix.multiply(context.getTextMatrix(), adjMatrix) );
             }
             else if( next instanceof COSString )
             {

Modified: pdfbox/trunk/pdfbox/src/test/java/org/apache/pdfbox/TestAll.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/test/java/org/apache/pdfbox/TestAll.java?rev=1044823&r1=1044822&r2=1044823&view=diff
==============================================================================
--- pdfbox/trunk/pdfbox/src/test/java/org/apache/pdfbox/TestAll.java (original)
+++ pdfbox/trunk/pdfbox/src/test/java/org/apache/pdfbox/TestAll.java Sun Dec 12 14:03:21 2010
@@ -27,6 +27,7 @@ import org.apache.pdfbox.pdmodel.TestPDD
 import org.apache.pdfbox.pdmodel.TestPDDocumentInformation;
 import org.apache.pdfbox.pdmodel.interactive.form.TestFields;
 import org.apache.pdfbox.util.TestDateUtil;
+import org.apache.pdfbox.util.TestMatrix;
 
 /**
  * This is a holder for all test cases in the pdfbox system.
@@ -67,6 +68,7 @@ public class TestAll extends TestCase
     {
         TestSuite suite = new TestSuite();
         suite.addTest( TestDateUtil.suite() );
+        suite.addTest( TestMatrix.suite() );
         suite.addTestSuite( TestFilters.class );
         suite.addTest( TestFDF.suite() );
         suite.addTest( TestFields.suite() );

Added: pdfbox/trunk/pdfbox/src/test/java/org/apache/pdfbox/util/TestMatrix.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/test/java/org/apache/pdfbox/util/TestMatrix.java?rev=1044823&view=auto
==============================================================================
--- pdfbox/trunk/pdfbox/src/test/java/org/apache/pdfbox/util/TestMatrix.java (added)
+++ pdfbox/trunk/pdfbox/src/test/java/org/apache/pdfbox/util/TestMatrix.java Sun Dec 12 14:03:21 2010
@@ -0,0 +1,207 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.pdfbox.util;
+
+import java.io.IOException;
+
+import junit.framework.Test;
+import junit.framework.TestCase;
+import junit.framework.TestSuite;
+
+/**
+ * Test the {@link Matrix} class.
+ * @author Neil McErlean
+ * @since 1.4.0
+ */
+public class TestMatrix extends TestCase
+{
+    /**
+     * Test class constructor.
+     *
+     * @param name The name of the test class.
+     *
+     * @throws IOException If there is an error creating the test.
+     */
+    public TestMatrix( String name ) throws IOException
+    {
+        super( name );
+    }
+
+    public void testConstructionAndCopy() throws Exception
+    {
+    	Matrix m1 = new Matrix();
+    	assertMatrixIsPristine(m1);
+    	
+    	Matrix m2 = m1.copy();
+    	assertNotSame(m1, m2);
+    	assertMatrixIsPristine(m2);
+    }
+    
+    public void testMultiplication() throws Exception
+    {
+    	// This matrix will not change - we use it to drive the various multiplications.
+    	final Matrix testMatrix = new Matrix();
+    	
+    	// Create matrix with values
+    	// [ 0, 1, 2
+    	//   1, 2, 3
+    	//   2, 3, 4]
+    	for (int x = 0; x < 3; x++)
+    	{
+    		for (int y = 0; y < 3; y++)
+    		{
+    			testMatrix.setValue(x, y, x + y);
+    		}
+    	}
+    	
+    	Matrix m1 = testMatrix.copy();
+    	Matrix m2 = testMatrix.copy();
+
+    	// Multiply two matrices together producing a new result matrix.
+    	Matrix product = m1.multiply(m2);
+    	
+    	assertNotSame(m1, product);
+    	assertNotSame(m2, product);
+
+    	// Operand 1 should not have changed
+    	assertMatrixValuesEqualTo(new float[] {0,  1,  2,
+                                               1,  2,  3,
+                                               2,  3,  4}, m1);
+    	// Operand 2 should not have changed
+    	assertMatrixValuesEqualTo(new float[] {0,  1,  2,
+                                               1,  2,  3,
+                                               2,  3,  4}, m2);
+    	assertMatrixValuesEqualTo(new float[] {5,  8,  11,
+                                               8,  14, 20,
+                                               11, 20, 29}, product);
+    	product.reset();
+    	assertMatrixIsPristine(product);
+    	
+
+    	
+    	// Multiply two matrices together with the result being written to a third matrix
+    	// (Any existing values there will be overwritten).
+    	Matrix resultMatrix = new Matrix();
+    	
+    	Matrix retVal = m1.multiply(m2, resultMatrix);
+    	assertSame(retVal, resultMatrix);
+    	// Operand 1 should not have changed
+    	assertMatrixValuesEqualTo(new float[] {0,  1,  2,
+                                               1,  2,  3,
+                                               2,  3,  4}, m1);
+    	// Operand 2 should not have changed
+    	assertMatrixValuesEqualTo(new float[] {0,  1,  2,
+                                               1,  2,  3,
+                                               2,  3,  4}, m2);
+    	assertMatrixValuesEqualTo(new float[] {5,  8,  11,
+			   				                   8,  14, 20,
+			   			                       11, 20, 29}, resultMatrix);
+    	
+
+    	
+    	// Multiply two matrices together with the result being written into the other matrix
+    	retVal = m1.multiply(m2, m2);
+    	assertSame(retVal, m2);
+    	// Operand 1 should not have changed
+    	assertMatrixValuesEqualTo(new float[] {0,  1,  2,
+                                               1,  2,  3,
+                                               2,  3,  4}, m1);
+    	assertMatrixValuesEqualTo(new float[] {5,  8,  11,
+			   				                   8,  14, 20,
+			   			                       11, 20, 29}, retVal);
+
+
+    	
+    	// Multiply two matrices together with the result being written into 'this' matrix
+    	m1 = testMatrix.copy();
+    	m2 = testMatrix.copy();
+    	
+    	retVal = m1.multiply(m2, m1);
+    	assertSame(retVal, m1);
+    	// Operand 2 should not have changed
+    	assertMatrixValuesEqualTo(new float[] {0,  1,  2,
+                                               1,  2,  3,
+                                               2,  3,  4}, m2);
+    	assertMatrixValuesEqualTo(new float[] {5,  8,  11,
+			   				                   8,  14, 20,
+			   			                       11, 20, 29}, retVal);
+
+
+    	
+    	// Multiply the same matrix with itself with the result being written into 'this' matrix
+    	m1 = testMatrix.copy();
+
+    	retVal = m1.multiply(m1, m1);
+    	assertSame(retVal, m1);
+    	assertMatrixValuesEqualTo(new float[] {5,  8,  11,
+			   				                   8,  14, 20,
+			   			                       11, 20, 29}, retVal);
+    }
+    
+    /**
+     * This method asserts that the matrix values for the given {@link Matrix} object are equal
+     * to the pristine, or original, values.
+     * @param m the Matrix to test.
+     */
+    private void assertMatrixIsPristine(Matrix m)
+    {
+        assertMatrixValuesEqualTo(new float[] {1 ,0 ,0,
+        		                               0, 1, 0,
+        		                               0, 0, 1}, m);
+    }
+
+    /**
+     * This method asserts that the matrix values for the given {@link Matrix} object have
+     * the specified values.
+     * @param values the expected values
+     * @param m the matrix to test
+     */
+	private void assertMatrixValuesEqualTo(float[] values, Matrix m) {
+		float delta = 0.00001f;
+    	for (int i = 0; i < values.length; i++)
+    	{
+    		// Need to convert a (row, column) co-ordinate into a straight index.
+    		int row = (int)Math.floor(i / 3);
+			int column = i % 3;
+			StringBuilder failureMsg = new StringBuilder();
+			failureMsg.append("Incorrect value for matrix[")
+			          .append(row).append(",").append(column).append("]");
+			assertEquals(failureMsg.toString(), values[i], m.getValue(row, column), delta);
+    	}
+	}
+    
+    /**
+     * Set the tests in the suite for this test class.
+     *
+     * @return the Suite.
+     */
+    public static Test suite()
+    {
+        return new TestSuite( TestMatrix.class );
+    }
+
+    /**
+     * Command line execution.
+     *
+     * @param args Command line arguments.
+     */
+    public static void main( String[] args )
+    {
+        String[] arg = {TestMatrix.class.getName() };
+        junit.textui.TestRunner.main( arg );
+    }
+}