You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ta...@apache.org on 2018/07/27 14:16:44 UTC
[tika] 10/30: Stub a unit test for TIKA-2641
This is an automated email from the ASF dual-hosted git repository.
tallison pushed a commit to branch branch_1x
in repository https://gitbox.apache.org/repos/asf/tika.git
commit aaa78a3d665d8c120e8eadbc26f3d86958042c05
Author: Nick Burch <ni...@gagravarr.org>
AuthorDate: Thu May 3 21:56:07 2018 +0100
Stub a unit test for TIKA-2641
---
.../org/apache/tika/parser/TabularFormatsTest.java | 71 ++++++++++++++++++++++
1 file changed, 71 insertions(+)
diff --git a/tika-parsers/src/test/java/org/apache/tika/parser/TabularFormatsTest.java b/tika-parsers/src/test/java/org/apache/tika/parser/TabularFormatsTest.java
new file mode 100644
index 0000000..61fcca2
--- /dev/null
+++ b/tika-parsers/src/test/java/org/apache/tika/parser/TabularFormatsTest.java
@@ -0,0 +1,71 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.parser;
+
+
+import org.apache.tika.TikaTest;
+import org.junit.Test;
+
+/**
+ * Ensure that our various Table-based formats produce consistent,
+ * broadly similar output.
+ * This is mostly focused on the XHTML output
+ */
+public class TabularFormatsTest extends TikaTest {
+ protected static final String[] headers = new String[] {
+ "String (Num=)","Number","Date","Datetime","Number"
+ };
+ /**
+ * Expected values, by <em>column</em>
+ */
+ protected static final String[][] table = new String[][] {
+ // TODO All values
+ new String[] {
+ "Num=0"
+ },
+ new String[] {
+ "0.0"
+ },
+ new String[] {
+ "1899-12-30"
+ },
+ new String[] {
+ "1900-01-01 11:00:00"
+ },
+ new String[] {
+ ""
+ }
+ };
+
+ protected void assertHeaders(String xml, boolean isTH) {
+ // TODO Check for the first row, then TR or TH
+ }
+ protected void assertContents(String xml, boolean hasHeader) {
+ // TODO Check the rows
+ }
+
+ @Test
+ public void testCSV() throws Exception {
+ XMLResult result = getXML("test-columnar.csv");
+ String xml = result.xml;
+
+ assertHeaders(xml, false);
+ assertContents(xml, true);
+ }
+ // TODO SAS7BDAT
+ // TODO Other formats
+}