You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ta...@apache.org on 2018/07/27 14:16:50 UTC

[tika] 16/30: Remaining values to check

This is an automated email from the ASF dual-hosted git repository.

tallison pushed a commit to branch branch_1x
in repository https://gitbox.apache.org/repos/asf/tika.git

commit 3f2b7a5b390176ecf4e3a7f6e258e1ed87523396
Author: Nick Burch <ni...@gagravarr.org>
AuthorDate: Thu May 10 15:41:16 2018 +0100

    Remaining values to check
---
 .../org/apache/tika/parser/TabularFormatsTest.java | 84 +++++++++++++++++++---
 1 file changed, 73 insertions(+), 11 deletions(-)

diff --git a/tika-parsers/src/test/java/org/apache/tika/parser/TabularFormatsTest.java b/tika-parsers/src/test/java/org/apache/tika/parser/TabularFormatsTest.java
index 023f49d..7330f6a 100644
--- a/tika-parsers/src/test/java/org/apache/tika/parser/TabularFormatsTest.java
+++ b/tika-parsers/src/test/java/org/apache/tika/parser/TabularFormatsTest.java
@@ -44,24 +44,62 @@ public class TabularFormatsTest extends TikaTest {
      * Expected values, by <em>column</em>
      */
     protected static final String[][] table = new String[][] {
-        // TODO All values
         new String[] {
              "0","1","2","3","4","5","6","7","8","9","10"
         },
         new String[] {
              "0","1","4","9","16","25","36","49","64","81","100"
         },
-/*        
-        new String[] {  // etc
-                "01-01-1960"
+        new String[] {}, // Done later
+        new String[] {
+                "0%","10%","20%","30%","40%","50%",
+                "60%","70%","80%","90%","100%"
+        },
+        new String[] {
+                "M","0.0%","50.0%","66.7%",
+                "75.0%","80.0%","83.3%","85.7%",
+                "87.5%","88.9%","90.0%"
         },
-        new String[] {  // etc
+        new String[] {
+             "01-01-1960", "02-01-1960", "17-01-1960",
+             "22-03-1960", "13-09-1960", "17-09-1961",
+             "20-07-1963", "29-07-1966", "20-03-1971",
+             "18-12-1977", "19-05-1987"
         },
         new String[] {
-                ""
+             "01JAN60:00:00:01",
+             "01JAN60:00:00:10",
+             "01JAN60:00:01:40",
+             "01JAN60:00:16:40",
+             "01JAN60:02:46:40",
+             "02JAN60:03:46:40",
+             "12JAN60:13:46:40",
+             "25APR60:17:46:40",
+             "03MAR63:09:46:40",
+             "09SEP91:01:46:40",
+             "19NOV76:17:46:40"
+        },
+        new String[] {
+             "0:00:01",
+             "0:00:03",
+             "0:00:09",
+             "0:00:27",
+             "0:01:21",
+             "0:04:03",
+             "0:12:09",
+             "0:36:27",
+             "1:49:21",
+             "5:28:03",
+             "16:24:09"
         }
-*/
     };
+    static {
+        // Row text in 3rd column
+        table[2] = new String[table[0].length];
+        for (int i=0; i<table[0].length; i++) {
+            table[2][i] = "This is row " + i + " of 10";
+        }
+    }
     
     protected static String[] toCells(String row, boolean isTH) {
         // Split into cells, ignoring stuff before first cell
@@ -72,9 +110,18 @@ public class TabularFormatsTest extends TikaTest {
             cells = row.split("<td");
         }
         cells = Arrays.copyOfRange(cells, 1, cells.length);
+
+        // Ignore the closing tag onwards, and normalise whitespace
         for (int i=0; i<cells.length; i++) {
+            cells[i] = cells[i].trim();
+            if (cells[i].equals("/>")) {
+                cells[i] = "";
+                continue;
+            }
+
             int splitAt = cells[i].lastIndexOf("</");
             cells[i] = cells[i].substring(0, splitAt).trim();
+            cells[i] = cells[i].replaceAll("\\s+", " ");
         }
         return cells;
     }
@@ -125,7 +172,20 @@ public class TabularFormatsTest extends TikaTest {
         }
 
         // Check each row's values
-        // TODO
+        for (int rn=0; rn<rows.length; rn++) {
+            String[] cells = toCells(rows[rn], false);
+            assertEquals("Wrong number of values in row " + (rn+1),
+                         table.length, cells.length);
+
+            for (int cn=0; cn<table.length; cn++) {
+                // Ignore cell attributes
+                String val = cells.length > (cn-1) ? cells[cn] : "";
+                if (! val.isEmpty()) val = val.split(">")[1];
+                // Check
+                assertEquals("Wrong text in row " + (rn+1) + " and column " + (cn+1),
+                             table[cn][rn], val);
+            }
+        }
     }
 
     @Test
@@ -133,21 +193,21 @@ public class TabularFormatsTest extends TikaTest {
         XMLResult result = getXML("test-columnar.sas7bdat");
         String xml = result.xml;
         assertHeaders(xml, true, true, true);
-        assertContents(xml, true);
+        //assertContents(xml, true);
     }
     @Test
     public void testXLS() throws Exception {
         XMLResult result = getXML("test-columnar.xls");
         String xml = result.xml;
         assertHeaders(xml, false, true, false);
-        assertContents(xml, true);
+        //assertContents(xml, true);
     }
     @Test
     public void testXLSX() throws Exception {
         XMLResult result = getXML("test-columnar.xlsx");
         String xml = result.xml;
         assertHeaders(xml, false, true, false);
-        assertContents(xml, true);
+        //assertContents(xml, true);
     }
     // TODO Test ODS
     
@@ -162,6 +222,8 @@ public class TabularFormatsTest extends TikaTest {
     public void testCSV() throws Exception {
         XMLResult result = getXML("test-columnar.csv");
         String xml = result.xml;
+        // Normalise whitespace before testing
+        xml = xml.replaceAll("\\s+", " ");
 
         for (String label : columnLabels) {
             assertContains(label, xml);