You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pdfbox.apache.org by ms...@apache.org on 2015/10/02 08:01:02 UTC

svn commit: r1706348 - /pdfbox/trunk/pdfbox/src/test/java/org/apache/pdfbox/text/BidiTest.java

Author: msahyoun
Date: Fri Oct  2 06:01:02 2015
New Revision: 1706348

URL: http://svn.apache.org/viewvc?rev=1706348&view=rev
Log:
PDFBOX-2252: revised unit test to deal with platform differences

Modified:
    pdfbox/trunk/pdfbox/src/test/java/org/apache/pdfbox/text/BidiTest.java

Modified: pdfbox/trunk/pdfbox/src/test/java/org/apache/pdfbox/text/BidiTest.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/test/java/org/apache/pdfbox/text/BidiTest.java?rev=1706348&r1=1706347&r2=1706348&view=diff
==============================================================================
--- pdfbox/trunk/pdfbox/src/test/java/org/apache/pdfbox/text/BidiTest.java (original)
+++ pdfbox/trunk/pdfbox/src/test/java/org/apache/pdfbox/text/BidiTest.java Fri Oct  2 06:01:02 2015
@@ -17,15 +17,20 @@
 
 package org.apache.pdfbox.text;
 
-import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.fail;
 
-import java.io.BufferedReader;
 import java.io.File;
 import java.io.FileInputStream;
+import java.io.FileOutputStream;
 import java.io.IOException;
 import java.io.InputStreamReader;
-import java.io.Reader;
+import java.io.LineNumberReader;
+import java.io.OutputStream;
+import java.io.OutputStreamWriter;
+import java.io.Writer;
 
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
 import org.apache.pdfbox.pdmodel.PDDocument;
 import org.junit.After;
 import org.junit.Before;
@@ -37,19 +42,33 @@ import org.junit.Test;
  */
 public class BidiTest
 {
+    /**
+     * Logger instance.
+     */
+    private static final Log log = LogFactory.getLog(TestTextStripper.class);
+    
     private static final File IN_DIR = new File("src/test/resources/org/apache/pdfbox/text/");
+    private static final File outDir = new File("target/test-output");
     private static final String NAME_OF_PDF = "BidiSample.pdf";
-    private static final String NAME_OF_TXT = "BidiSample.pdf.txt";
-    private static final String NAME_OF_TXT_SORTED = "BidiSample.pdf-sorted.txt";
     
     private static final String ENCODING = "UTF-8";
 
     private PDDocument document;
     private PDFTextStripper stripper;
+    
+    private boolean bFail = false;
 
     @Before
     public void setUp() throws IOException
     {
+        if (!outDir.exists()) 
+        {
+            if (!outDir.mkdirs()) 
+            {
+                throw (new IOException("Error creating " + outDir.getAbsolutePath() + " directory"));
+            }
+        }
+        
         document = PDDocument.load(new File(IN_DIR, NAME_OF_PDF));
         stripper = new PDFTextStripper();
         stripper.setLineSeparator("\n");
@@ -58,58 +77,227 @@ public class BidiTest
     @Test
     public void testSorted() throws IOException
     {
-        stripper.setSortByPosition(true);
-        String extractedText = stripper.getText(document);
-
-        Reader compareTextReader = new InputStreamReader(new FileInputStream(new File(IN_DIR, NAME_OF_TXT_SORTED)), ENCODING);;
-        BufferedReader bufferedCompareTextReader = new BufferedReader(compareTextReader);
-
-        StringBuilder compareTextBuilder = new StringBuilder();
-
-        String line = bufferedCompareTextReader.readLine();
-
-        while (line != null)
-        {
-            compareTextBuilder.append(line);
-            compareTextBuilder.append('\n');
-            line = bufferedCompareTextReader.readLine();
-        }
-
-        bufferedCompareTextReader.close();
-
-        assertEquals(extractedText, compareTextBuilder.toString());
-
+        File testFile = new File(IN_DIR, NAME_OF_PDF);
+        doTestFile(testFile, outDir, false, true);
     }
 
     @Test
     public void testNotSorted() throws IOException
     {
-        stripper.setSortByPosition(false);
-        String extractedText = stripper.getText(document);
-
-        Reader compareTextReader = new InputStreamReader(new FileInputStream(new File(IN_DIR, NAME_OF_TXT)), ENCODING);;
-        BufferedReader bufferedCompareTextReader = new BufferedReader(compareTextReader);
-
-        StringBuilder compareTextBuilder = new StringBuilder();
-        String line = bufferedCompareTextReader.readLine();
+        File testFile = new File(IN_DIR, NAME_OF_PDF);
+        doTestFile(testFile, outDir, false, false);
+    }
 
-        while (line != null)
+    @After
+    public void tearDown() throws IOException
+    {
+        document.close();
+    }
+    
+    /**
+     * Validate text extraction on a single file.
+     *
+     * @param inFile The PDF file to validate
+     * @param outDir The directory to store the output in
+     * @param bLogResult Whether to log the extracted text
+     * @param bSort Whether or not the extracted text is sorted
+     * @throws Exception when there is an exception
+     */
+    public void doTestFile(File inFile, File outDir, boolean bLogResult, boolean bSort)
+    throws IOException
+    {
+        if(bSort)
         {
-            compareTextBuilder.append(line);
-            compareTextBuilder.append('\n');
-            line = bufferedCompareTextReader.readLine();
+            log.info("Preparing to parse " + inFile.getName() + " for sorted test");
+        }
+        else
+        {
+            log.info("Preparing to parse " + inFile.getName() + " for standard test");
         }
 
-        bufferedCompareTextReader.close();
-
-        assertEquals(extractedText, compareTextBuilder.toString());
+        if (!outDir.exists()) 
+        {
+            if (!outDir.mkdirs()) 
+            {
+                throw (new IOException("Error creating " + outDir.getAbsolutePath() + " directory"));
+            }
+        }
 
+        PDDocument document = PDDocument.load(inFile);
+        try
+        {            
+            File outFile;
+            File expectedFile;
+
+            if(bSort)
+            {
+                outFile = new File(outDir,  inFile.getName() + "-sorted.txt");
+                expectedFile = new File(inFile.getParentFile(), inFile.getName() + "-sorted.txt");
+            }
+            else
+            {
+                outFile = new File(outDir, inFile.getName() + ".txt");
+                expectedFile = new File(inFile.getParentFile(), inFile.getName() + ".txt");
+            }
+
+            OutputStream os = new FileOutputStream(outFile);
+            try
+            {
+                Writer writer = new OutputStreamWriter(os, ENCODING);
+                try
+                {
+                    //Allows for sorted tests 
+                    stripper.setSortByPosition(bSort);
+                    stripper.writeText(document, writer);
+                }
+                finally
+                {
+                    // close the written file before reading it again
+                    writer.close();
+                }
+            }
+            finally
+            {
+                os.close();
+            }
+
+            if (bLogResult)
+            {
+                log.info("Text for " + inFile.getName() + ":");
+                log.info(stripper.getText(document));
+            }
+
+            if (!expectedFile.exists())
+            {
+                this.bFail = true;
+                fail("FAILURE: Input verification file: " + expectedFile.getAbsolutePath() +
+                        " did not exist");
+                return;
+            }
+
+            LineNumberReader expectedReader =
+                new LineNumberReader(new InputStreamReader(new FileInputStream(expectedFile), ENCODING));
+            LineNumberReader actualReader =
+                new LineNumberReader(new InputStreamReader(new FileInputStream(outFile), ENCODING));
+
+            while (true)
+            {
+                String expectedLine = expectedReader.readLine();
+                while( expectedLine != null && expectedLine.trim().length() == 0 )
+                {
+                    expectedLine = expectedReader.readLine();
+                }
+                String actualLine = actualReader.readLine();
+                while( actualLine != null && actualLine.trim().length() == 0 )
+                {
+                    actualLine = actualReader.readLine();
+                }
+                if (!stringsEqual(expectedLine, actualLine))
+                {
+                    this.bFail = true;
+                    fail("FAILURE: Line mismatch for file " + inFile.getName() +
+                            " (sort = "+bSort+")" +
+                            " at expected line: " + expectedReader.getLineNumber() +
+                            " at actual line: " + actualReader.getLineNumber() +
+                            "\nexpected line was: \"" + expectedLine + "\"" +
+                            "\nactual line was:   \"" + actualLine + "\"" + "\n");
+
+                    //lets report all lines, even though this might produce some verbose logging
+                    //break;
+                }
+
+                if( expectedLine == null || actualLine==null)
+                {
+                    break;
+                }
+            }
+            expectedReader.close();
+            actualReader.close();
+        }
+        finally
+        {
+            document.close();
+        }
+    }
+    
+    /**
+     * Determine whether two strings are equal, where two null strings are
+     * considered equal.
+     *
+     * @param expected Expected string
+     * @param actual Actual String
+     * @return <code>true</code> is the strings are both null,
+     * or if their contents are the same, otherwise <code>false</code>.
+     */
+    private boolean stringsEqual(String expected, String actual)
+    {
+        boolean equals = true;
+        if( (expected == null) && (actual == null) )
+        {
+            return true;
+        }
+        else if( expected != null && actual != null )
+        {
+            expected = expected.trim();
+            actual = actual.trim();
+            char[] expectedArray = expected.toCharArray();
+            char[] actualArray = actual.toCharArray();
+            int expectedIndex = 0;
+            int actualIndex = 0;
+            while( expectedIndex<expectedArray.length && actualIndex<actualArray.length )
+            {
+                if( expectedArray[expectedIndex] != actualArray[actualIndex] )
+                {
+                    equals = false;
+                    log.warn("Lines differ at index"
+                     + " expected:" + expectedIndex + "-" + (int)expectedArray[expectedIndex]
+                     + " actual:" + actualIndex + "-" + (int)actualArray[actualIndex] );
+                    break;
+                }
+                expectedIndex = skipWhitespace( expectedArray, expectedIndex );
+                actualIndex = skipWhitespace( actualArray, actualIndex );
+                expectedIndex++;
+                actualIndex++;
+            }
+            if( equals )
+            {
+                if( expectedIndex != expectedArray.length )
+                {
+                    equals = false;
+                    log.warn("Expected line is longer at:" + expectedIndex );
+                }
+                if( actualIndex != actualArray.length )
+                {
+                    equals = false;
+                    log.warn("Actual line is longer at:" + actualIndex );
+                }
+            }
+        }
+        else
+        {
+            equals = (expected == null && actual != null && actual.trim().isEmpty())
+                    || (actual == null && expected != null && expected.trim().isEmpty());
+        }
+        return equals;
     }
 
-    @After
-    public void tearDown() throws IOException
+    /**
+     * If the current index is whitespace then skip any subsequent whitespace.
+     */
+    private int skipWhitespace( char[] array, int index )
     {
-        document.close();
+        //if we are at a space character then skip all space
+        //characters, but when all done rollback 1 because stringsEqual
+        //will roll forward 1
+        if( array[index] == ' ' || array[index] > 256 )
+        {
+            while( index < array.length && (array[index] == ' ' || array[index] > 256))
+            {
+                index++;
+            }
+            index--;
+        }
+        return index;
     }
 
 }