You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ta...@apache.org on 2018/07/27 14:16:47 UTC
[tika] 13/30: Add a time column to the test columnar files
This is an automated email from the ASF dual-hosted git repository.
tallison pushed a commit to branch branch_1x
in repository https://gitbox.apache.org/repos/asf/tika.git
commit 7f68ebb4b80e4ef6437796d592d19f0f354adb92
Author: Nick Burch <ni...@gagravarr.org>
AuthorDate: Thu May 10 11:35:04 2018 +0100
Add a time column to the test columnar files
---
.../org/apache/tika/parser/TabularFormatsTest.java | 22 +++++++-----
.../apache/tika/parser/sas/SAS7BDATParserTest.java | 8 ++---
.../resources/test-documents/test-columnar.csv | 37 +++++++--------------
.../resources/test-documents/test-columnar.sas.xml | 11 ++++++
.../test-documents/test-columnar.sas7bdat | Bin 17408 -> 17408 bytes
.../resources/test-documents/test-columnar.xls | Bin 0 -> 6656 bytes
.../resources/test-documents/test-columnar.xlsx | Bin 0 -> 4941 bytes
.../resources/test-documents/test-columnar.xpt | Bin 4560 -> 4720 bytes
.../src/test/resources/test-documents/testSAS2.sas | 27 ++++++++++++---
9 files changed, 64 insertions(+), 41 deletions(-)
diff --git a/tika-parsers/src/test/java/org/apache/tika/parser/TabularFormatsTest.java b/tika-parsers/src/test/java/org/apache/tika/parser/TabularFormatsTest.java
index 61fcca2..4dc7336 100644
--- a/tika-parsers/src/test/java/org/apache/tika/parser/TabularFormatsTest.java
+++ b/tika-parsers/src/test/java/org/apache/tika/parser/TabularFormatsTest.java
@@ -26,25 +26,31 @@ import org.junit.Test;
* This is mostly focused on the XHTML output
*/
public class TabularFormatsTest extends TikaTest {
- protected static final String[] headers = new String[] {
- "String (Num=)","Number","Date","Datetime","Number"
+ protected static final String[] columnNames = new String[] {
+ "recnum","square","desc","pctdone","pctinc",
+ "date","datetime","time"
};
+ protected static final String[] columnLabels = new String[] {
+ "Record Number","Square of the Record Number",
+ "Description of the Row","Percent Done",
+ "Percent Increment","date","datetime","time"
+ };
+
/**
* Expected values, by <em>column</em>
*/
protected static final String[][] table = new String[][] {
// TODO All values
new String[] {
- "Num=0"
+ "0","1","2","3","4","5","6","7","8","9","10"
},
new String[] {
- "0.0"
+ "0","1","4" // etc
},
- new String[] {
- "1899-12-30"
+ new String[] { // etc
+ "01-01-1960"
},
- new String[] {
- "1900-01-01 11:00:00"
+ new String[] { // etc
},
new String[] {
""
diff --git a/tika-parsers/src/test/java/org/apache/tika/parser/sas/SAS7BDATParserTest.java b/tika-parsers/src/test/java/org/apache/tika/parser/sas/SAS7BDATParserTest.java
index 3bb3e01..610ffc3 100644
--- a/tika-parsers/src/test/java/org/apache/tika/parser/sas/SAS7BDATParserTest.java
+++ b/tika-parsers/src/test/java/org/apache/tika/parser/sas/SAS7BDATParserTest.java
@@ -89,11 +89,11 @@ public class SAS7BDATParserTest extends TikaTest {
assertEquals("application/x-sas-data", metadata.get(Metadata.CONTENT_TYPE));
assertEquals("TESTING", metadata.get(TikaCoreProperties.TITLE));
- assertEquals("2018-05-09T16:42:04Z", metadata.get(TikaCoreProperties.CREATED));
- assertEquals("2018-05-09T16:42:04Z", metadata.get(TikaCoreProperties.MODIFIED));
+ assertEquals("2018-05-09T17:59:33Z", metadata.get(TikaCoreProperties.CREATED));
+ assertEquals("2018-05-09T17:59:33Z", metadata.get(TikaCoreProperties.MODIFIED));
assertEquals("1", metadata.get(PagedText.N_PAGES));
- assertEquals("7", metadata.get(Database.COLUMN_COUNT));
+ assertEquals("8", metadata.get(Database.COLUMN_COUNT));
assertEquals("11", metadata.get(Database.ROW_COUNT));
assertEquals("windows-1252", metadata.get(HttpHeaders.CONTENT_ENCODING));
assertEquals("W32_7PRO", metadata.get(OfficeOpenXMLExtended.APPLICATION));
@@ -102,7 +102,7 @@ public class SAS7BDATParserTest extends TikaTest {
assertEquals("Little", metadata.get(MachineMetadata.ENDIAN));
assertEquals(Arrays.asList("Record Number","Square of the Record Number",
"Description of the Row","Percent Done",
- "Percent Increment","date","datetime"),
+ "Percent Increment","date","datetime","time"),
Arrays.asList(metadata.getValues(Database.COLUMN_NAME)));
String content = handler.toString();
diff --git a/tika-parsers/src/test/resources/test-documents/test-columnar.csv b/tika-parsers/src/test/resources/test-documents/test-columnar.csv
index 8de4097..5ef57bb 100644
--- a/tika-parsers/src/test/resources/test-documents/test-columnar.csv
+++ b/tika-parsers/src/test/resources/test-documents/test-columnar.csv
@@ -1,25 +1,12 @@
-"String (Num=)","Number","Date","Datetime","Number"
-Num=0,0.0,1899-12-30,1900-01-01 11:00:00,
-Num=0.1,0.1,1899-12-30,1899-12-30 02:24:00,0.1
-Num=0.25,0.25,1899-12-30,1899-12-30 06:00:00,0.25
-Num=0.5,0.5,1899-12-30,1899-12-30 12:00:00,0.5
-Num=1,1.0,1900-01-01,1900-01-01 00:00:00,
-Num=1.1,1.1,1900-01-01,1900-01-01 02:24:00,1.1
-Num=1.2,1.2,1900-01-01,1900-01-01 04:48:00,1.2
-Num=1.5,1.5,1900-01-01,1900-01-01 12:00:00,1.5
-Num=2,2.0,1900-01-02,1900-01-02 00:00:00,2.0
-Num=2.5,2.5,1900-01-02,1900-01-02 12:00:00,2.5
-Num=3,3.0,1900-01-03,1900-01-03 00:00:00,3.0
-Num=4,4.0,1900-01-04,1900-01-04 00:00:00,4.0
-Num=5,5.0,1900-01-05,1900-01-05 00:00:00,5.0
-Num=10,10.0,1900-01-10,1900-01-10 00:00:00,10.0
-Num=15,15.0,1900-01-15,1900-01-15 00:00:00,15.0
-Num=25,25.0,1900-01-25,1900-01-25 00:00:00,25.0
-Num=50,50.0,1900-02-19,1900-02-19 00:00:00,50.0
-Num=60,60.0,1900-02-28,1900-02-28 00:00:00,60.0
-Num=65,65.0,1900-03-05,1900-03-05 00:00:00,65.0
-Num=100,100.0,1900-04-09,1900-04-09 00:00:00,100.0
-Num=120,120.0,1900-04-29,1900-04-29 00:00:00,120.0
-Num=1500,1500.0,1904-02-08,1904-02-08 00:00:00,1500.0
-Num=20222,20222.0,1955-05-13,1955-05-13 00:00:00,20222.0
-Num=404242,404242.0,3006-10-10,3006-10-10 00:00:00,404242.0
+"Record Number","Square of the Record Number","Description of the Row","Percent Done","Percent Increment","date","datetime","time"
+0,0,This is row 0 of 10,0%,M,01-01-1960,01JAN60:00:00:01,0:00:01
+1,1,This is row 1 of 10,10%,0.0%,02-01-1960,01JAN60:00:00:10,0:00:03
+2,4,This is row 2 of 10,20%,50.0%,17-01-1960,01JAN60:00:01:40,0:00:09
+3,9,This is row 3 of 10,30%,66.7%,22-03-1960,01JAN60:00:16:40,0:00:27
+4,16,This is row 4 of 10,40%,75.0%,13-09-1960,01JAN60:02:46:40,0:01:21
+5,25,This is row 5 of 10,50%,80.0%,17-09-1961,02JAN60:03:46:40,0:04:03
+6,36,This is row 6 of 10,60%,83.3%,20-07-1963,12JAN60:13:46:40,0:12:09
+7,49,This is row 7 of 10,70%,85.7%,29-07-1966,25APR60:17:46:40,0:36:27
+8,64,This is row 8 of 10,80%,87.5%,20-03-1971,03MAR63:09:46:40,1:49:21
+9,81,This is row 9 of 10,90%,88.9%,18-12-1977,09SEP91:01:46:40,5:28:03
+10,100,This is row 10 of 10,100%,90.0%,19-05-1987,19NOV76:17:46:40,16:24:09
diff --git a/tika-parsers/src/test/resources/test-documents/test-columnar.sas.xml b/tika-parsers/src/test/resources/test-documents/test-columnar.sas.xml
index ae12fc5..45df965 100644
--- a/tika-parsers/src/test/resources/test-documents/test-columnar.sas.xml
+++ b/tika-parsers/src/test/resources/test-documents/test-columnar.sas.xml
@@ -8,6 +8,7 @@
<pctincr missing="M" />
<date>0</date>
<datetime>1960-01-01T00:00:01</datetime>
+ <time>00:00:01</time>
</TESTXML>
<TESTXML>
<recnum>1</recnum>
@@ -17,6 +18,7 @@
<pctincr>0</pctincr>
<date>1</date>
<datetime>1960-01-01T00:00:10</datetime>
+ <time>00:00:03</time>
</TESTXML>
<TESTXML>
<recnum>2</recnum>
@@ -26,6 +28,7 @@
<pctincr>0.5</pctincr>
<date>16</date>
<datetime>1960-01-01T00:01:40</datetime>
+ <time>00:00:09</time>
</TESTXML>
<TESTXML>
<recnum>3</recnum>
@@ -35,6 +38,7 @@
<pctincr>0.6666666667</pctincr>
<date>81</date>
<datetime>1960-01-01T00:16:40</datetime>
+ <time>00:00:27</time>
</TESTXML>
<TESTXML>
<recnum>4</recnum>
@@ -44,6 +48,7 @@
<pctincr>0.75</pctincr>
<date>256</date>
<datetime>1960-01-01T02:46:40</datetime>
+ <time>00:01:21</time>
</TESTXML>
<TESTXML>
<recnum>5</recnum>
@@ -53,6 +58,7 @@
<pctincr>0.8</pctincr>
<date>625</date>
<datetime>1960-01-02T03:46:40</datetime>
+ <time>00:04:03</time>
</TESTXML>
<TESTXML>
<recnum>6</recnum>
@@ -62,6 +68,7 @@
<pctincr>0.8333333333</pctincr>
<date>1296</date>
<datetime>1960-01-12T13:46:40</datetime>
+ <time>00:12:09</time>
</TESTXML>
<TESTXML>
<recnum>7</recnum>
@@ -71,6 +78,7 @@
<pctincr>0.8571428571</pctincr>
<date>2401</date>
<datetime>1960-04-25T17:46:40</datetime>
+ <time>00:36:27</time>
</TESTXML>
<TESTXML>
<recnum>8</recnum>
@@ -80,6 +88,7 @@
<pctincr>0.875</pctincr>
<date>4096</date>
<datetime>1963-03-03T09:46:40</datetime>
+ <time>01:49:21</time>
</TESTXML>
<TESTXML>
<recnum>9</recnum>
@@ -89,6 +98,7 @@
<pctincr>0.8888888889</pctincr>
<date>6561</date>
<datetime>1991-09-09T01:46:40</datetime>
+ <time>05:28:03</time>
</TESTXML>
<TESTXML>
<recnum>10</recnum>
@@ -98,5 +108,6 @@
<pctincr>0.9</pctincr>
<date>10000</date>
<datetime>2276-11-19T17:46:40</datetime>
+ <time>16:24:09</time>
</TESTXML>
</TABLE>
diff --git a/tika-parsers/src/test/resources/test-documents/test-columnar.sas7bdat b/tika-parsers/src/test/resources/test-documents/test-columnar.sas7bdat
index 553c45c..33ee412 100644
Binary files a/tika-parsers/src/test/resources/test-documents/test-columnar.sas7bdat and b/tika-parsers/src/test/resources/test-documents/test-columnar.sas7bdat differ
diff --git a/tika-parsers/src/test/resources/test-documents/test-columnar.xls b/tika-parsers/src/test/resources/test-documents/test-columnar.xls
new file mode 100644
index 0000000..1d7b2cf
Binary files /dev/null and b/tika-parsers/src/test/resources/test-documents/test-columnar.xls differ
diff --git a/tika-parsers/src/test/resources/test-documents/test-columnar.xlsx b/tika-parsers/src/test/resources/test-documents/test-columnar.xlsx
new file mode 100644
index 0000000..58ffd47
Binary files /dev/null and b/tika-parsers/src/test/resources/test-documents/test-columnar.xlsx differ
diff --git a/tika-parsers/src/test/resources/test-documents/test-columnar.xpt b/tika-parsers/src/test/resources/test-documents/test-columnar.xpt
index d908228..bbb59b5 100644
Binary files a/tika-parsers/src/test/resources/test-documents/test-columnar.xpt and b/tika-parsers/src/test/resources/test-documents/test-columnar.xpt differ
diff --git a/tika-parsers/src/test/resources/test-documents/testSAS2.sas b/tika-parsers/src/test/resources/test-documents/testSAS2.sas
index bc8c1fe..96a9121 100644
--- a/tika-parsers/src/test/resources/test-documents/testSAS2.sas
+++ b/tika-parsers/src/test/resources/test-documents/testSAS2.sas
@@ -2,6 +2,7 @@ data testing;
begin=0;
end=10;
msg="This is row %x of %y";
+
do i = begin to end by 1;
drop msg begin end i;
recnum=i;
@@ -11,10 +12,13 @@ format pctdone percent8.0;
format pctincr percent7.1;
pctdone=divide(i,end);
pctincr=divide(i-1,i);
+/* Days / Seconds since Epoc / Seconds since midnight */
format date ddmmyyd10.;
format datetime datetime.;
+format time time.;
date=i**4;
datetime=10**i;
+time=3**i;
output;
end;
label recnum="Record Number"
@@ -24,10 +28,11 @@ label recnum="Record Number"
pctincr="Percent Increment";
run;
-libname out '/home/tika/testing/sas';
-libname outxpt XPORT '/home/tika/testing/sas/testing.xpt';
-libname outv6 v6 '/home/tika/testing/sas';
-libname outxml xmlv2 '/home/tika/testing/sas';
+%let outpath = /home/tika/testing/sas;
+libname out "&outpath";
+libname outxpt XPORT "&outpath./testing.xpt";
+libname outv6 v6 "&outpath";
+libname outxml xmlv2 "&outpath";
data out.testing;
set testing;
@@ -46,3 +51,17 @@ run;
proc print data=testing;
run;
+proc export data=testing label
+ outfile="&outpath./testing.csv"
+ dbms=CSV REPLACE;
+putnames=yes;
+run;
+
+proc export data=testing label
+ outfile="&outpath./testing.xls"
+ dbms=XLS;
+run;
+proc export data=testing label
+ outfile="&outpath./testing.xlsx"
+ dbms=XLSX;
+run;