You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pdfbox.apache.org by ti...@apache.org on 2015/10/23 17:48:41 UTC

svn commit: r1710247 - /pdfbox/trunk/pdfbox/src/test/java/org/apache/pdfbox/text/TestTextStripper.java

Author: tilman
Date: Fri Oct 23 15:48:41 2015
New Revision: 1710247

URL: http://svn.apache.org/viewvc?rev=1710247&view=rev
Log:
PDFBOX-3044: change encoding to utf8, don't fail immediately; output diff output; use diff library; update test files to utf8

Modified:
    pdfbox/trunk/pdfbox/src/test/java/org/apache/pdfbox/text/TestTextStripper.java

Modified: pdfbox/trunk/pdfbox/src/test/java/org/apache/pdfbox/text/TestTextStripper.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/test/java/org/apache/pdfbox/text/TestTextStripper.java?rev=1710247&r1=1710246&r2=1710247&view=diff
==============================================================================
--- pdfbox/trunk/pdfbox/src/test/java/org/apache/pdfbox/text/TestTextStripper.java (original)
+++ pdfbox/trunk/pdfbox/src/test/java/org/apache/pdfbox/text/TestTextStripper.java Fri Oct 23 15:48:41 2015
@@ -16,6 +16,13 @@
  */
 package org.apache.pdfbox.text;
 
+import difflib.ChangeDelta;
+import difflib.DeleteDelta;
+import difflib.DiffUtils;
+import difflib.InsertDelta;
+import difflib.Patch;
+import java.io.BufferedReader;
+import java.io.BufferedWriter;
 import java.io.File;
 import java.io.FileInputStream;
 import java.io.FilenameFilter;
@@ -25,8 +32,11 @@ import java.io.InputStreamReader;
 import java.io.LineNumberReader;
 import java.io.OutputStream;
 import java.io.OutputStreamWriter;
+import java.io.PrintStream;
 import java.io.Writer;
 import java.util.Iterator;
+import java.util.LinkedList;
+import java.util.List;
 
 import junit.framework.Test;
 import junit.framework.TestCase;
@@ -96,7 +106,7 @@ public class TestTextStripper extends Te
 
     private boolean bFail = false;
     private PDFTextStripper stripper = null;
-    private final String encoding = "UTF-16LE";
+    private static final String ENCODING = "UTF-8";
 
     /**
      * Test class constructor.
@@ -238,26 +248,30 @@ public class TestTextStripper extends Te
         try
         {            
             File outFile;
+            File diffFile;
             File expectedFile;
 
             if(bSort)
             {
                 outFile = new File(outDir,  inFile.getName() + "-sorted.txt");
+                diffFile = new File(outDir, inFile.getName() + "-sorted-diff.txt");
                 expectedFile = new File(inFile.getParentFile(), inFile.getName() + "-sorted.txt");
             }
             else
             {
                 outFile = new File(outDir, inFile.getName() + ".txt");
+                diffFile = new File(outDir, inFile.getName() + "-diff.txt");
                 expectedFile = new File(inFile.getParentFile(), inFile.getName() + ".txt");
             }
 
             OutputStream os = new FileOutputStream(outFile);
             try
             {
-                os.write( 0xFF );
-                os.write( 0xFE );
+                os.write (0xEF);
+                os.write (0xBB);
+                os.write (0xBF);
 
-                Writer writer = new OutputStreamWriter(os, encoding);
+                Writer writer = new BufferedWriter(new OutputStreamWriter(os, ENCODING));
                 try
                 {
                     //Allows for sorted tests 
@@ -284,15 +298,17 @@ public class TestTextStripper extends Te
             if (!expectedFile.exists())
             {
                 this.bFail = true;
-                fail("FAILURE: Input verification file: " + expectedFile.getAbsolutePath() +
+                log.error("FAILURE: Input verification file: " + expectedFile.getAbsolutePath() +
                         " did not exist");
                 return;
             }
+            
+            boolean localFail = false;
 
             LineNumberReader expectedReader =
-                new LineNumberReader(new InputStreamReader(new FileInputStream(expectedFile), encoding));
+                new LineNumberReader(new InputStreamReader(new FileInputStream(expectedFile), ENCODING));
             LineNumberReader actualReader =
-                new LineNumberReader(new InputStreamReader(new FileInputStream(outFile), encoding));
+                new LineNumberReader(new InputStreamReader(new FileInputStream(outFile), ENCODING));
 
             while (true)
             {
@@ -309,7 +325,8 @@ public class TestTextStripper extends Te
                 if (!stringsEqual(expectedLine, actualLine))
                 {
                     this.bFail = true;
-                    fail("FAILURE: Line mismatch for file " + inFile.getName() +
+                    localFail = true;
+                    log.error("FAILURE: Line mismatch for file " + inFile.getName() +
                             " (sort = "+bSort+")" +
                             " at expected line: " + expectedReader.getLineNumber() +
                             " at actual line: " + actualReader.getLineNumber() +
@@ -327,6 +344,50 @@ public class TestTextStripper extends Te
             }
             expectedReader.close();
             actualReader.close();
+            if (!localFail)
+            {
+                outFile.delete();
+            }
+            else
+            {
+                // https://code.google.com/p/java-diff-utils/wiki/SampleUsage
+                List<String> original = fileToLines(expectedFile);
+                List<String> revised = fileToLines(outFile);
+
+                // Compute diff. Get the Patch object. Patch is the container for computed deltas.
+                Patch patch = DiffUtils.diff(original, revised);
+
+                PrintStream diffPS = new PrintStream(diffFile);
+                for (Object delta : (List<ChangeDelta>) patch.getDeltas())
+                {
+                    if (delta instanceof ChangeDelta)
+                    {
+                        ChangeDelta cdelta = (ChangeDelta) delta;
+                        diffPS.println("Org: " + cdelta.getOriginal());
+                        diffPS.println("New: " + cdelta.getRevised());
+                        diffPS.println();
+                    }
+                    else if (delta instanceof DeleteDelta)
+                    {
+                        DeleteDelta ddelta = (DeleteDelta) delta;
+                        diffPS.println("Org: " + ddelta.getOriginal());
+                        diffPS.println("New: " + ddelta.getRevised());
+                        diffPS.println();
+                    }
+                    else if (delta instanceof InsertDelta)
+                    {
+                        InsertDelta idelta = (InsertDelta) delta;
+                        diffPS.println("Org: " + idelta.getOriginal());
+                        diffPS.println("New: " + idelta.getRevised());
+                        diffPS.println();
+                    }
+                    else
+                    {
+                        diffPS.println(delta);
+                    }
+                }
+                diffPS.close();
+            }
         }
         finally
         {
@@ -334,6 +395,27 @@ public class TestTextStripper extends Te
         }
     }
     
+    // Helper method for get the file content
+    private static List<String> fileToLines(File file)
+    {
+        List<String> lines = new LinkedList<String>();
+        String line = "";
+        try
+        {
+            BufferedReader in = new BufferedReader(new InputStreamReader(new FileInputStream(file), ENCODING));
+            while ((line = in.readLine()) != null)
+            {
+                lines.add(line);
+            }
+            in.close();
+        }
+        catch (IOException e)
+        {
+            e.printStackTrace();
+        }
+        return lines;
+    }
+
     private int findOutlineItemDestPageNum(PDDocument doc, PDOutlineItem oi) throws IOException
     {
         PDPageDestination pageDest = (PDPageDestination) oi.getDestination();