You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ta...@apache.org on 2018/07/27 14:16:47 UTC

[tika] 13/30: Add a time column to the test columnar files

This is an automated email from the ASF dual-hosted git repository.

tallison pushed a commit to branch branch_1x
in repository https://gitbox.apache.org/repos/asf/tika.git

commit 7f68ebb4b80e4ef6437796d592d19f0f354adb92
Author: Nick Burch <ni...@gagravarr.org>
AuthorDate: Thu May 10 11:35:04 2018 +0100

    Add a time column to the test columnar files
---
 .../org/apache/tika/parser/TabularFormatsTest.java |  22 +++++++-----
 .../apache/tika/parser/sas/SAS7BDATParserTest.java |   8 ++---
 .../resources/test-documents/test-columnar.csv     |  37 +++++++--------------
 .../resources/test-documents/test-columnar.sas.xml |  11 ++++++
 .../test-documents/test-columnar.sas7bdat          | Bin 17408 -> 17408 bytes
 .../resources/test-documents/test-columnar.xls     | Bin 0 -> 6656 bytes
 .../resources/test-documents/test-columnar.xlsx    | Bin 0 -> 4941 bytes
 .../resources/test-documents/test-columnar.xpt     | Bin 4560 -> 4720 bytes
 .../src/test/resources/test-documents/testSAS2.sas |  27 ++++++++++++---
 9 files changed, 64 insertions(+), 41 deletions(-)

diff --git a/tika-parsers/src/test/java/org/apache/tika/parser/TabularFormatsTest.java b/tika-parsers/src/test/java/org/apache/tika/parser/TabularFormatsTest.java
index 61fcca2..4dc7336 100644
--- a/tika-parsers/src/test/java/org/apache/tika/parser/TabularFormatsTest.java
+++ b/tika-parsers/src/test/java/org/apache/tika/parser/TabularFormatsTest.java
@@ -26,25 +26,31 @@ import org.junit.Test;
  * This is mostly focused on the XHTML output
  */
 public class TabularFormatsTest extends TikaTest {
-    protected static final String[] headers = new String[] {
-        "String (Num=)","Number","Date","Datetime","Number"
+    protected static final String[] columnNames = new String[] {
+         "recnum","square","desc","pctdone","pctinc",
+         "date","datetime","time"
     };
+    protected static final String[] columnLabels = new String[] {
+        "Record Number","Square of the Record Number",
+        "Description of the Row","Percent Done",
+        "Percent Increment","date","datetime","time"    
+    };
+
     /**
      * Expected values, by <em>column</em>
      */
     protected static final String[][] table = new String[][] {
         // TODO All values
         new String[] {
-                "Num=0"
+             "0","1","2","3","4","5","6","7","8","9","10"
         },
         new String[] {
-                "0.0"
+             "0","1","4" // etc
         },
-        new String[] {
-                "1899-12-30"
+        new String[] {  // etc
+                "01-01-1960"
         },
-        new String[] {
-                "1900-01-01 11:00:00"
+        new String[] {  // etc
         },
         new String[] {
                 ""
diff --git a/tika-parsers/src/test/java/org/apache/tika/parser/sas/SAS7BDATParserTest.java b/tika-parsers/src/test/java/org/apache/tika/parser/sas/SAS7BDATParserTest.java
index 3bb3e01..610ffc3 100644
--- a/tika-parsers/src/test/java/org/apache/tika/parser/sas/SAS7BDATParserTest.java
+++ b/tika-parsers/src/test/java/org/apache/tika/parser/sas/SAS7BDATParserTest.java
@@ -89,11 +89,11 @@ public class SAS7BDATParserTest extends TikaTest {
         assertEquals("application/x-sas-data", metadata.get(Metadata.CONTENT_TYPE));
         assertEquals("TESTING", metadata.get(TikaCoreProperties.TITLE));
 
-        assertEquals("2018-05-09T16:42:04Z", metadata.get(TikaCoreProperties.CREATED));
-        assertEquals("2018-05-09T16:42:04Z", metadata.get(TikaCoreProperties.MODIFIED));
+        assertEquals("2018-05-09T17:59:33Z", metadata.get(TikaCoreProperties.CREATED));
+        assertEquals("2018-05-09T17:59:33Z", metadata.get(TikaCoreProperties.MODIFIED));
         
         assertEquals("1", metadata.get(PagedText.N_PAGES));
-        assertEquals("7", metadata.get(Database.COLUMN_COUNT));
+        assertEquals("8", metadata.get(Database.COLUMN_COUNT));
         assertEquals("11", metadata.get(Database.ROW_COUNT));
         assertEquals("windows-1252", metadata.get(HttpHeaders.CONTENT_ENCODING));
         assertEquals("W32_7PRO", metadata.get(OfficeOpenXMLExtended.APPLICATION));
@@ -102,7 +102,7 @@ public class SAS7BDATParserTest extends TikaTest {
         assertEquals("Little", metadata.get(MachineMetadata.ENDIAN));
         assertEquals(Arrays.asList("Record Number","Square of the Record Number",
                                    "Description of the Row","Percent Done",
-                                   "Percent Increment","date","datetime"),
+                                   "Percent Increment","date","datetime","time"),
                      Arrays.asList(metadata.getValues(Database.COLUMN_NAME)));
         
         String content = handler.toString();
diff --git a/tika-parsers/src/test/resources/test-documents/test-columnar.csv b/tika-parsers/src/test/resources/test-documents/test-columnar.csv
index 8de4097..5ef57bb 100644
--- a/tika-parsers/src/test/resources/test-documents/test-columnar.csv
+++ b/tika-parsers/src/test/resources/test-documents/test-columnar.csv
@@ -1,25 +1,12 @@
-"String (Num=)","Number","Date","Datetime","Number"
-Num=0,0.0,1899-12-30,1900-01-01 11:00:00,
-Num=0.1,0.1,1899-12-30,1899-12-30 02:24:00,0.1
-Num=0.25,0.25,1899-12-30,1899-12-30 06:00:00,0.25
-Num=0.5,0.5,1899-12-30,1899-12-30 12:00:00,0.5
-Num=1,1.0,1900-01-01,1900-01-01 00:00:00,
-Num=1.1,1.1,1900-01-01,1900-01-01 02:24:00,1.1
-Num=1.2,1.2,1900-01-01,1900-01-01 04:48:00,1.2
-Num=1.5,1.5,1900-01-01,1900-01-01 12:00:00,1.5
-Num=2,2.0,1900-01-02,1900-01-02 00:00:00,2.0
-Num=2.5,2.5,1900-01-02,1900-01-02 12:00:00,2.5
-Num=3,3.0,1900-01-03,1900-01-03 00:00:00,3.0
-Num=4,4.0,1900-01-04,1900-01-04 00:00:00,4.0
-Num=5,5.0,1900-01-05,1900-01-05 00:00:00,5.0
-Num=10,10.0,1900-01-10,1900-01-10 00:00:00,10.0
-Num=15,15.0,1900-01-15,1900-01-15 00:00:00,15.0
-Num=25,25.0,1900-01-25,1900-01-25 00:00:00,25.0
-Num=50,50.0,1900-02-19,1900-02-19 00:00:00,50.0
-Num=60,60.0,1900-02-28,1900-02-28 00:00:00,60.0
-Num=65,65.0,1900-03-05,1900-03-05 00:00:00,65.0
-Num=100,100.0,1900-04-09,1900-04-09 00:00:00,100.0
-Num=120,120.0,1900-04-29,1900-04-29 00:00:00,120.0
-Num=1500,1500.0,1904-02-08,1904-02-08 00:00:00,1500.0
-Num=20222,20222.0,1955-05-13,1955-05-13 00:00:00,20222.0
-Num=404242,404242.0,3006-10-10,3006-10-10 00:00:00,404242.0
+"Record Number","Square of the Record Number","Description of the Row","Percent Done","Percent Increment","date","datetime","time"
+0,0,This is row            0 of           10,0%,M,01-01-1960,01JAN60:00:00:01,0:00:01
+1,1,This is row            1 of           10,10%,0.0%,02-01-1960,01JAN60:00:00:10,0:00:03
+2,4,This is row            2 of           10,20%,50.0%,17-01-1960,01JAN60:00:01:40,0:00:09
+3,9,This is row            3 of           10,30%,66.7%,22-03-1960,01JAN60:00:16:40,0:00:27
+4,16,This is row            4 of           10,40%,75.0%,13-09-1960,01JAN60:02:46:40,0:01:21
+5,25,This is row            5 of           10,50%,80.0%,17-09-1961,02JAN60:03:46:40,0:04:03
+6,36,This is row            6 of           10,60%,83.3%,20-07-1963,12JAN60:13:46:40,0:12:09
+7,49,This is row            7 of           10,70%,85.7%,29-07-1966,25APR60:17:46:40,0:36:27
+8,64,This is row            8 of           10,80%,87.5%,20-03-1971,03MAR63:09:46:40,1:49:21
+9,81,This is row            9 of           10,90%,88.9%,18-12-1977,09SEP91:01:46:40,5:28:03
+10,100,This is row           10 of           10,100%,90.0%,19-05-1987,19NOV76:17:46:40,16:24:09
diff --git a/tika-parsers/src/test/resources/test-documents/test-columnar.sas.xml b/tika-parsers/src/test/resources/test-documents/test-columnar.sas.xml
index ae12fc5..45df965 100644
--- a/tika-parsers/src/test/resources/test-documents/test-columnar.sas.xml
+++ b/tika-parsers/src/test/resources/test-documents/test-columnar.sas.xml
@@ -8,6 +8,7 @@
       <pctincr missing="M" />
       <date>0</date>
       <datetime>1960-01-01T00:00:01</datetime>
+      <time>00:00:01</time>
    </TESTXML>
    <TESTXML>
       <recnum>1</recnum>
@@ -17,6 +18,7 @@
       <pctincr>0</pctincr>
       <date>1</date>
       <datetime>1960-01-01T00:00:10</datetime>
+      <time>00:00:03</time>
    </TESTXML>
    <TESTXML>
       <recnum>2</recnum>
@@ -26,6 +28,7 @@
       <pctincr>0.5</pctincr>
       <date>16</date>
       <datetime>1960-01-01T00:01:40</datetime>
+      <time>00:00:09</time>
    </TESTXML>
    <TESTXML>
       <recnum>3</recnum>
@@ -35,6 +38,7 @@
       <pctincr>0.6666666667</pctincr>
       <date>81</date>
       <datetime>1960-01-01T00:16:40</datetime>
+      <time>00:00:27</time>
    </TESTXML>
    <TESTXML>
       <recnum>4</recnum>
@@ -44,6 +48,7 @@
       <pctincr>0.75</pctincr>
       <date>256</date>
       <datetime>1960-01-01T02:46:40</datetime>
+      <time>00:01:21</time>
    </TESTXML>
    <TESTXML>
       <recnum>5</recnum>
@@ -53,6 +58,7 @@
       <pctincr>0.8</pctincr>
       <date>625</date>
       <datetime>1960-01-02T03:46:40</datetime>
+      <time>00:04:03</time>
    </TESTXML>
    <TESTXML>
       <recnum>6</recnum>
@@ -62,6 +68,7 @@
       <pctincr>0.8333333333</pctincr>
       <date>1296</date>
       <datetime>1960-01-12T13:46:40</datetime>
+      <time>00:12:09</time>
    </TESTXML>
    <TESTXML>
       <recnum>7</recnum>
@@ -71,6 +78,7 @@
       <pctincr>0.8571428571</pctincr>
       <date>2401</date>
       <datetime>1960-04-25T17:46:40</datetime>
+      <time>00:36:27</time>
    </TESTXML>
    <TESTXML>
       <recnum>8</recnum>
@@ -80,6 +88,7 @@
       <pctincr>0.875</pctincr>
       <date>4096</date>
       <datetime>1963-03-03T09:46:40</datetime>
+      <time>01:49:21</time>
    </TESTXML>
    <TESTXML>
       <recnum>9</recnum>
@@ -89,6 +98,7 @@
       <pctincr>0.8888888889</pctincr>
       <date>6561</date>
       <datetime>1991-09-09T01:46:40</datetime>
+      <time>05:28:03</time>
    </TESTXML>
    <TESTXML>
       <recnum>10</recnum>
@@ -98,5 +108,6 @@
       <pctincr>0.9</pctincr>
       <date>10000</date>
       <datetime>2276-11-19T17:46:40</datetime>
+      <time>16:24:09</time>
    </TESTXML>
 </TABLE>
diff --git a/tika-parsers/src/test/resources/test-documents/test-columnar.sas7bdat b/tika-parsers/src/test/resources/test-documents/test-columnar.sas7bdat
index 553c45c..33ee412 100644
Binary files a/tika-parsers/src/test/resources/test-documents/test-columnar.sas7bdat and b/tika-parsers/src/test/resources/test-documents/test-columnar.sas7bdat differ
diff --git a/tika-parsers/src/test/resources/test-documents/test-columnar.xls b/tika-parsers/src/test/resources/test-documents/test-columnar.xls
new file mode 100644
index 0000000..1d7b2cf
Binary files /dev/null and b/tika-parsers/src/test/resources/test-documents/test-columnar.xls differ
diff --git a/tika-parsers/src/test/resources/test-documents/test-columnar.xlsx b/tika-parsers/src/test/resources/test-documents/test-columnar.xlsx
new file mode 100644
index 0000000..58ffd47
Binary files /dev/null and b/tika-parsers/src/test/resources/test-documents/test-columnar.xlsx differ
diff --git a/tika-parsers/src/test/resources/test-documents/test-columnar.xpt b/tika-parsers/src/test/resources/test-documents/test-columnar.xpt
index d908228..bbb59b5 100644
Binary files a/tika-parsers/src/test/resources/test-documents/test-columnar.xpt and b/tika-parsers/src/test/resources/test-documents/test-columnar.xpt differ
diff --git a/tika-parsers/src/test/resources/test-documents/testSAS2.sas b/tika-parsers/src/test/resources/test-documents/testSAS2.sas
index bc8c1fe..96a9121 100644
--- a/tika-parsers/src/test/resources/test-documents/testSAS2.sas
+++ b/tika-parsers/src/test/resources/test-documents/testSAS2.sas
@@ -2,6 +2,7 @@ data testing;
 begin=0;
 end=10;
 msg="This is row %x of %y";
+
 do i = begin to end by 1;
 drop msg begin end i;
 recnum=i;
@@ -11,10 +12,13 @@ format pctdone percent8.0;
 format pctincr percent7.1;
 pctdone=divide(i,end);
 pctincr=divide(i-1,i);
+/* Days / Seconds since Epoc / Seconds since midnight */
 format date ddmmyyd10.;
 format datetime datetime.;
+format time time.;
 date=i**4;
 datetime=10**i;
+time=3**i;
 output;
 end;
 label recnum="Record Number"
@@ -24,10 +28,11 @@ label recnum="Record Number"
 	  pctincr="Percent Increment";
 run;
 
-libname out          '/home/tika/testing/sas';
-libname outxpt XPORT '/home/tika/testing/sas/testing.xpt';
-libname outv6 v6     '/home/tika/testing/sas';
-libname outxml xmlv2 '/home/tika/testing/sas';
+%let outpath = /home/tika/testing/sas;
+libname out          "&outpath";
+libname outxpt XPORT "&outpath./testing.xpt";
+libname outv6 v6     "&outpath";
+libname outxml xmlv2 "&outpath";
 
 data out.testing;
 set testing;
@@ -46,3 +51,17 @@ run;
 proc print data=testing;
 run;
 
+proc export data=testing label
+  outfile="&outpath./testing.csv"
+  dbms=CSV REPLACE;
+putnames=yes;
+run;
+
+proc export data=testing label 
+  outfile="&outpath./testing.xls"
+  dbms=XLS;
+run;
+proc export data=testing label
+  outfile="&outpath./testing.xlsx"
+  dbms=XLSX;
+run;