You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by bo...@apache.org on 2016/01/06 04:50:57 UTC
svn commit: r1723223 [15/32] - in /tika/branches/2.x:
tika-core/src/test/resources/META-INF/
tika-core/src/test/resources/META-INF/services/ tika-parser-modules/
tika-parser-modules/tika-advanced-module/
tika-parser-modules/tika-advanced-module/src/ ti...
Added: tika/branches/2.x/tika-parser-modules/tika-office-module/src/test/java/org/apache/tika/parser/chm/TestChmItspHeader.java
URL: http://svn.apache.org/viewvc/tika/branches/2.x/tika-parser-modules/tika-office-module/src/test/java/org/apache/tika/parser/chm/TestChmItspHeader.java?rev=1723223&view=auto
==============================================================================
--- tika/branches/2.x/tika-parser-modules/tika-office-module/src/test/java/org/apache/tika/parser/chm/TestChmItspHeader.java (added)
+++ tika/branches/2.x/tika-parser-modules/tika-office-module/src/test/java/org/apache/tika/parser/chm/TestChmItspHeader.java Wed Jan 6 03:50:50 2016
@@ -0,0 +1,160 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.parser.chm;
+
+import static java.nio.charset.StandardCharsets.UTF_8;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
+
+import org.apache.tika.parser.chm.accessor.ChmItsfHeader;
+import org.apache.tika.parser.chm.accessor.ChmItspHeader;
+import org.apache.tika.parser.chm.core.ChmCommons;
+import org.apache.tika.parser.chm.core.ChmConstants;
+import org.junit.After;
+import org.junit.Before;
+import org.junit.Test;
+
+/**
+ * Tests all public methods of the ChmItspHeader
+ *
+ */
+public class TestChmItspHeader {
+ private ChmItspHeader chmItspHeader = null;
+
+ @Before
+ public void setUp() throws Exception {
+ byte[] data = TestParameters.chmData;
+
+ ChmItsfHeader chmItsfHeader = new ChmItsfHeader();
+ // chmItsfHeader.parse(Arrays.copyOfRange(data, 0,
+ // ChmConstants.CHM_ITSF_V3_LEN - 1), chmItsfHeader);
+ chmItsfHeader.parse(ChmCommons.copyOfRange(data, 0,
+ ChmConstants.CHM_ITSF_V3_LEN - 1), chmItsfHeader);
+
+ chmItspHeader = new ChmItspHeader();
+ // chmItspHeader.parse(Arrays.copyOfRange( data, (int)
+ // chmItsfHeader.getDirOffset(),
+ // (int) chmItsfHeader.getDirOffset()
+ // + ChmConstants.CHM_ITSP_V1_LEN), chmItspHeader);
+ chmItspHeader.parse(ChmCommons.copyOfRange(data,
+ (int) chmItsfHeader.getDirOffset(),
+ (int) chmItsfHeader.getDirOffset()
+ + ChmConstants.CHM_ITSP_V1_LEN), chmItspHeader);
+ }
+
+ @Test
+ public void testGetBlock_len() {
+ assertEquals(TestParameters.VP_BLOCK_LENGTH,
+ chmItspHeader.getBlock_len());
+ }
+
+ @Test
+ public void testGetBlockidx_intvl() {
+ assertEquals(TestParameters.VP_BLOCK_INDEX_INTERVAL,
+ chmItspHeader.getBlockidx_intvl());
+ }
+
+ @Test
+ public void testGetHeader_len() {
+ assertEquals(TestParameters.VP_ITSP_HEADER_LENGTH,
+ chmItspHeader.getHeader_len());
+ }
+
+ @Test
+ public void testGetIndex_depth() {
+ assertEquals(TestParameters.VP_INDEX_DEPTH,
+ chmItspHeader.getIndex_depth());
+ }
+
+ @Test
+ public void testGetIndex_head() {
+ assertEquals(TestParameters.VP_INDEX_HEAD,
+ chmItspHeader.getIndex_head());
+ }
+
+ @Test
+ public void testGetIndex_root() {
+ assertEquals(TestParameters.VP_INDEX_ROOT,
+ chmItspHeader.getIndex_root());
+ }
+
+ @Test
+ public void testGetLang_id() {
+ assertEquals(TestParameters.VP_LANGUAGE_ID,
+ chmItspHeader.getLang_id());
+ }
+
+ @Test
+ public void testGetNum_blocks() {
+ assertEquals(TestParameters.VP_UNKNOWN_NUM_BLOCKS,
+ chmItspHeader.getNum_blocks());
+ }
+
+ @Test
+ public void testGetUnknown_000c() {
+ assertEquals(TestParameters.VP_ITSP_UNKNOWN_000C,
+ chmItspHeader.getUnknown_000c());
+ }
+
+ @Test
+ public void testGetUnknown_0024() {
+ assertEquals(TestParameters.VP_ITSP_UNKNOWN_0024,
+ chmItspHeader.getUnknown_0024());
+ }
+
+ @Test
+ public void testGetUnknown_002() {
+ assertEquals(TestParameters.VP_ITSP_UNKNOWN_002C,
+ chmItspHeader.getUnknown_002c());
+ }
+
+ @Test
+ public void testGetUnknown_0044() {
+ assertEquals(TestParameters.VP_ITSP_BYTEARR_LEN,
+ chmItspHeader.getUnknown_0044().length);
+ }
+
+ @Test
+ public void testGetVersion() {
+ assertEquals(TestParameters.VP_ITSP_VERSION,
+ chmItspHeader.getVersion());
+ }
+
+ @Test
+ public void testGetSignature() {
+ assertEquals(TestParameters.VP_ISTP_SIGNATURE, new String(
+ chmItspHeader.getSignature(), UTF_8));
+ }
+
+ @Test
+ public void testGetSystem_uuid() {
+ assertEquals(TestParameters.VP_ITSP_BYTEARR_LEN,
+ chmItspHeader.getSystem_uuid().length);
+ }
+
+ @Test
+ public void testToString() {
+ assertTrue(chmItspHeader.toString().contains(
+ TestParameters.VP_ISTP_SIGNATURE));
+ }
+
+ @After
+ public void tearDown() throws Exception {
+ chmItspHeader = null;
+ }
+
+}
Added: tika/branches/2.x/tika-parser-modules/tika-office-module/src/test/java/org/apache/tika/parser/chm/TestChmLzxState.java
URL: http://svn.apache.org/viewvc/tika/branches/2.x/tika-parser-modules/tika-office-module/src/test/java/org/apache/tika/parser/chm/TestChmLzxState.java?rev=1723223&view=auto
==============================================================================
--- tika/branches/2.x/tika-parser-modules/tika-office-module/src/test/java/org/apache/tika/parser/chm/TestChmLzxState.java (added)
+++ tika/branches/2.x/tika-parser-modules/tika-office-module/src/test/java/org/apache/tika/parser/chm/TestChmLzxState.java Wed Jan 6 03:50:50 2016
@@ -0,0 +1,101 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.parser.chm;
+
+
+import static java.nio.charset.StandardCharsets.UTF_8;
+import static org.junit.Assert.assertNotNull;
+import static org.junit.Assert.assertTrue;
+
+import org.apache.tika.exception.TikaException;
+import org.apache.tika.parser.chm.accessor.ChmDirectoryListingSet;
+import org.apache.tika.parser.chm.accessor.ChmItsfHeader;
+import org.apache.tika.parser.chm.accessor.ChmItspHeader;
+import org.apache.tika.parser.chm.accessor.ChmLzxcControlData;
+import org.apache.tika.parser.chm.core.ChmCommons;
+import org.apache.tika.parser.chm.core.ChmConstants;
+import org.apache.tika.parser.chm.lzx.ChmLzxState;
+import org.junit.Before;
+import org.junit.Test;
+
+public class TestChmLzxState {
+ private ChmLzxState chmLzxState;
+ private int windowSize;
+
+ @Before
+ public void setUp() throws Exception {
+ byte[] data = TestParameters.chmData;
+
+ /* Creates and parses itsf header */
+ ChmItsfHeader chmItsHeader = new ChmItsfHeader();
+ // chmItsHeader.parse(Arrays.copyOfRange(data, 0,
+ // ChmConstants.CHM_ITSF_V3_LEN - 1), chmItsHeader);
+ chmItsHeader.parse(ChmCommons.copyOfRange(data, 0,
+ ChmConstants.CHM_ITSF_V3_LEN - 1), chmItsHeader);
+ /* Creates and parses itsp block */
+ ChmItspHeader chmItspHeader = new ChmItspHeader();
+ // chmItspHeader.parse(Arrays.copyOfRange( data, (int)
+ // chmItsHeader.getDirOffset(),
+ // (int) chmItsHeader.getDirOffset()
+ // + ChmConstants.CHM_ITSP_V1_LEN), chmItspHeader);
+ chmItspHeader.parse(ChmCommons.copyOfRange(data,
+ (int) chmItsHeader.getDirOffset(),
+ (int) chmItsHeader.getDirOffset()
+ + ChmConstants.CHM_ITSP_V1_LEN), chmItspHeader);
+
+ /* Creating instance of ChmDirListingContainer */
+ ChmDirectoryListingSet chmDirListCont = new ChmDirectoryListingSet(
+ data, chmItsHeader, chmItspHeader);
+ int indexOfControlData = ChmCommons.indexOf(
+ chmDirListCont.getDirectoryListingEntryList(),
+ ChmConstants.CONTROL_DATA);
+
+ int indexOfResetTable = ChmCommons.indexOfResetTableBlock(data,
+ ChmConstants.LZXC.getBytes(UTF_8));
+ byte[] dir_chunk = null;
+ if (indexOfResetTable > 0) {
+ // dir_chunk = Arrays.copyOfRange( data, indexOfResetTable,
+ // indexOfResetTable
+ // +
+ // chmDirListCont.getDirectoryListingEntryList().get(indexOfControlData).getLength());
+ dir_chunk = ChmCommons.copyOfRange(data, indexOfResetTable,
+ indexOfResetTable
+ + chmDirListCont.getDirectoryListingEntryList()
+ .get(indexOfControlData).getLength());
+ }
+
+ ChmLzxcControlData clcd = new ChmLzxcControlData();
+ clcd.parse(dir_chunk, clcd);
+ windowSize = (int) clcd.getWindowSize();
+ }
+
+ @Test
+ public void testChmLzxStateConstructor() throws TikaException {
+ chmLzxState = new ChmLzxState(windowSize);
+ assertNotNull(chmLzxState);
+ }
+
+ @Test
+ public void testToString() throws TikaException {
+ if (chmLzxState == null)
+ testChmLzxStateConstructor();
+ assertTrue(chmLzxState.toString().length() > 20);
+ }
+
+ // TODO add more tests
+
+}
Added: tika/branches/2.x/tika-parser-modules/tika-office-module/src/test/java/org/apache/tika/parser/chm/TestChmLzxcControlData.java
URL: http://svn.apache.org/viewvc/tika/branches/2.x/tika-parser-modules/tika-office-module/src/test/java/org/apache/tika/parser/chm/TestChmLzxcControlData.java?rev=1723223&view=auto
==============================================================================
--- tika/branches/2.x/tika-parser-modules/tika-office-module/src/test/java/org/apache/tika/parser/chm/TestChmLzxcControlData.java (added)
+++ tika/branches/2.x/tika-parser-modules/tika-office-module/src/test/java/org/apache/tika/parser/chm/TestChmLzxcControlData.java Wed Jan 6 03:50:50 2016
@@ -0,0 +1,144 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.parser.chm;
+
+import static java.nio.charset.StandardCharsets.UTF_8;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNotNull;
+import static org.junit.Assert.assertTrue;
+
+import org.apache.tika.parser.chm.accessor.ChmDirectoryListingSet;
+import org.apache.tika.parser.chm.accessor.ChmItsfHeader;
+import org.apache.tika.parser.chm.accessor.ChmItspHeader;
+import org.apache.tika.parser.chm.accessor.ChmLzxcControlData;
+import org.apache.tika.parser.chm.core.ChmCommons;
+import org.apache.tika.parser.chm.core.ChmConstants;
+import org.junit.Before;
+import org.junit.Test;
+
+/**
+ * Tests all public methods of ChmLzxcControlData block
+ */
+public class TestChmLzxcControlData {
+ private ChmLzxcControlData chmLzxcControlData = null;
+
+ @Before
+ public void setUp() throws Exception {
+ byte[] data = TestParameters.chmData;
+ /* Creates and parses itsf header */
+ ChmItsfHeader chmItsHeader = new ChmItsfHeader();
+ // chmItsHeader.parse(Arrays.copyOfRange(data, 0,
+ // ChmConstants.CHM_ITSF_V3_LEN - 1), chmItsHeader);
+ chmItsHeader.parse(ChmCommons.copyOfRange(data, 0,
+ ChmConstants.CHM_ITSF_V3_LEN - 1), chmItsHeader);
+ /* Creates and parses itsp block */
+ ChmItspHeader chmItspHeader = new ChmItspHeader();
+ // chmItspHeader.parse(Arrays.copyOfRange( data, (int)
+ // chmItsHeader.getDirOffset(),
+ // (int) chmItsHeader.getDirOffset()
+ // + ChmConstants.CHM_ITSP_V1_LEN), chmItspHeader);
+ chmItspHeader.parse(ChmCommons.copyOfRange(data,
+ (int) chmItsHeader.getDirOffset(),
+ (int) chmItsHeader.getDirOffset()
+ + ChmConstants.CHM_ITSP_V1_LEN), chmItspHeader);
+ /* Creating instance of ChmDirListingContainer */
+ ChmDirectoryListingSet chmDirListCont = new ChmDirectoryListingSet(
+ data, chmItsHeader, chmItspHeader);
+ int indexOfControlData = chmDirListCont.getControlDataIndex();
+
+ int indexOfResetTable = ChmCommons.indexOfResetTableBlock(data,
+ ChmConstants.LZXC.getBytes(UTF_8));
+ byte[] dir_chunk = null;
+ if (indexOfResetTable > 0) {
+ // dir_chunk = Arrays.copyOfRange( data, indexOfResetTable,
+ // indexOfResetTable
+ // +
+ // chmDirListCont.getDirectoryListingEntryList().get(indexOfControlData).getLength());
+ dir_chunk = ChmCommons.copyOfRange(data, indexOfResetTable,
+ indexOfResetTable
+ + chmDirListCont.getDirectoryListingEntryList()
+ .get(indexOfControlData).getLength());
+ }
+
+ /* Creates and parses control block */
+ chmLzxcControlData = new ChmLzxcControlData();
+ chmLzxcControlData.parse(dir_chunk, chmLzxcControlData);
+
+ }
+
+ @Test
+ public void testConstructorNotNull() {
+ assertNotNull(chmLzxcControlData);
+ }
+
+ @Test
+ public void testGetResetInterval() {
+ assertEquals(TestParameters.VP_RESET_INTERVAL,
+ chmLzxcControlData.getResetInterval());
+ }
+
+ @Test
+ public void testGetSize() {
+ assertEquals(TestParameters.VP_CONTROL_DATA_SIZE,
+ chmLzxcControlData.getSize());
+ }
+
+ @Test
+ public void testGetUnknown_18() {
+ assertEquals(TestParameters.VP_UNKNOWN_18,
+ chmLzxcControlData.getUnknown_18());
+ }
+
+ @Test
+ public void testGetVersion() {
+ assertEquals(TestParameters.VP_CONTROL_DATA_VERSION,
+ chmLzxcControlData.getVersion());
+ }
+
+ @Test
+ public void testGetWindowSize() {
+ assertEquals(TestParameters.VP_WINDOW_SIZE,
+ chmLzxcControlData.getWindowSize());
+ }
+
+ @Test
+ public void testGetWindowsPerReset() {
+ assertEquals(TestParameters.VP_WINDOWS_PER_RESET,
+ chmLzxcControlData.getWindowsPerReset());
+ }
+
+ @Test
+ public void testGetToString() {
+ assertTrue(chmLzxcControlData.toString().contains(
+ TestParameters.VP_CONTROL_DATA_SIGNATURE));
+ }
+
+ @Test
+ public void testGetSignature() {
+ assertEquals(
+ TestParameters.VP_CONTROL_DATA_SIGNATURE.getBytes(UTF_8).length,
+ chmLzxcControlData.getSignature().length);
+ }
+
+ @Test
+ public void testGetSignaure() {
+ assertEquals(
+ TestParameters.VP_CONTROL_DATA_SIGNATURE.getBytes(UTF_8).length,
+ chmLzxcControlData.getSignature().length);
+ }
+
+}
Added: tika/branches/2.x/tika-parser-modules/tika-office-module/src/test/java/org/apache/tika/parser/chm/TestChmLzxcResetTable.java
URL: http://svn.apache.org/viewvc/tika/branches/2.x/tika-parser-modules/tika-office-module/src/test/java/org/apache/tika/parser/chm/TestChmLzxcResetTable.java?rev=1723223&view=auto
==============================================================================
--- tika/branches/2.x/tika-parser-modules/tika-office-module/src/test/java/org/apache/tika/parser/chm/TestChmLzxcResetTable.java (added)
+++ tika/branches/2.x/tika-parser-modules/tika-office-module/src/test/java/org/apache/tika/parser/chm/TestChmLzxcResetTable.java Wed Jan 6 03:50:50 2016
@@ -0,0 +1,156 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.tika.parser.chm;
+
+import static java.nio.charset.StandardCharsets.UTF_8;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
+
+import org.apache.tika.parser.chm.accessor.ChmDirectoryListingSet;
+import org.apache.tika.parser.chm.accessor.ChmItsfHeader;
+import org.apache.tika.parser.chm.accessor.ChmItspHeader;
+import org.apache.tika.parser.chm.accessor.ChmLzxcControlData;
+import org.apache.tika.parser.chm.accessor.ChmLzxcResetTable;
+import org.apache.tika.parser.chm.assertion.ChmAssert;
+import org.apache.tika.parser.chm.core.ChmCommons;
+import org.apache.tika.parser.chm.core.ChmConstants;
+import org.junit.Before;
+import org.junit.Test;
+
+public class TestChmLzxcResetTable {
+ private ChmLzxcResetTable chmLzxcResetTable = null;
+
+ @Before
+ public void setUp() throws Exception {
+ byte[] data = TestParameters.chmData;
+ /* Creates and parses itsf header */
+ ChmItsfHeader chmItsfHeader = new ChmItsfHeader();
+ // chmItsfHeader.parse(Arrays.copyOfRange(data, 0,
+ // ChmConstants.CHM_ITSF_V3_LEN - 1), chmItsfHeader);
+ chmItsfHeader.parse(ChmCommons.copyOfRange(data, 0,
+ ChmConstants.CHM_ITSF_V3_LEN - 1), chmItsfHeader);
+ /* Creates and parses itsp block */
+ ChmItspHeader chmItspHeader = new ChmItspHeader();
+ // chmItspHeader.parse(Arrays.copyOfRange( data, (int)
+ // chmItsfHeader.getDirOffset(),
+ // (int) chmItsfHeader.getDirOffset()
+ // + ChmConstants.CHM_ITSP_V1_LEN), chmItspHeader);
+ chmItspHeader.parse(ChmCommons.copyOfRange(data,
+ (int) chmItsfHeader.getDirOffset(),
+ (int) chmItsfHeader.getDirOffset()
+ + ChmConstants.CHM_ITSP_V1_LEN), chmItspHeader);
+ /* Creating instance of ChmDirListingContainer */
+ ChmDirectoryListingSet chmDirListCont = new ChmDirectoryListingSet(
+ data, chmItsfHeader, chmItspHeader);
+ int indexOfControlData = chmDirListCont.getControlDataIndex();
+
+ int indexOfResetTable = ChmCommons.indexOfResetTableBlock(data,
+ ChmConstants.LZXC.getBytes(UTF_8));
+ byte[] dir_chunk = null;
+ if (indexOfResetTable > 0) {
+ // dir_chunk = Arrays.copyOfRange( data, indexOfResetTable,
+ // indexOfResetTable
+ // +
+ // chmDirListCont.getDirectoryListingEntryList().get(indexOfControlData).getLength());
+ dir_chunk = ChmCommons.copyOfRange(data, indexOfResetTable,
+ indexOfResetTable
+ + chmDirListCont.getDirectoryListingEntryList()
+ .get(indexOfControlData).getLength());
+ }
+
+ /* Creates and parses control block */
+ ChmLzxcControlData chmLzxcControlData = new ChmLzxcControlData();
+ chmLzxcControlData.parse(dir_chunk, chmLzxcControlData);
+
+ indexOfResetTable = chmDirListCont.getResetTableIndex();
+ chmLzxcResetTable = new ChmLzxcResetTable();
+
+ int startIndex = (int) chmDirListCont.getDataOffset()
+ + chmDirListCont.getDirectoryListingEntryList()
+ .get(indexOfResetTable).getOffset();
+
+ ChmAssert.assertCopyingDataIndex(startIndex, data.length);
+
+ // dir_chunk = Arrays.copyOfRange(data, startIndex, startIndex
+ // +
+ // chmDirListCont.getDirectoryListingEntryList().get(indexOfResetTable).getLength());
+ dir_chunk = ChmCommons.copyOfRange(
+ data,
+ startIndex,
+ startIndex
+ + chmDirListCont.getDirectoryListingEntryList()
+ .get(indexOfResetTable).getLength());
+
+ chmLzxcResetTable.parse(dir_chunk, chmLzxcResetTable);
+ }
+
+ @Test
+ public void testGetBlockAddress() {
+ assertEquals(TestParameters.VP_RESET_TABLE_BA,
+ chmLzxcResetTable.getBlockAddress().length);
+ }
+
+ @Test
+ public void testGetBlockCount() {
+ assertEquals(TestParameters.VP_RESET_TABLE_BA,
+ chmLzxcResetTable.getBlockCount());
+ }
+
+ @Test
+ public void testGetBlockLen() {
+ assertEquals(TestParameters.VP_RES_TBL_BLOCK_LENGTH,
+ chmLzxcResetTable.getBlockLen());
+ }
+
+ @Test
+ public void testGetCompressedLen() {
+ assertEquals(TestParameters.VP_RES_TBL_COMPR_LENGTH,
+ chmLzxcResetTable.getCompressedLen());
+ }
+
+ @Test
+ public void testGetTableOffset() {
+ assertEquals(TestParameters.VP_TBL_OFFSET,
+ chmLzxcResetTable.getTableOffset());
+ }
+
+ @Test
+ public void testGetUncompressedLen() {
+ assertEquals(TestParameters.VP_RES_TBL_UNCOMP_LENGTH,
+ chmLzxcResetTable.getUncompressedLen());
+ }
+
+ @Test
+ public void testGetUnknown() {
+ assertEquals(TestParameters.VP_RES_TBL_UNKNOWN,
+ chmLzxcResetTable.getUnknown());
+ }
+
+ @Test
+ public void testGetVersion() {
+ assertEquals(TestParameters.VP_RES_TBL_VERSION,
+ chmLzxcResetTable.getVersion());
+ }
+
+ @Test
+ public void testToString() {
+ assertTrue(chmLzxcResetTable.toString().length() > 0);
+ }
+
+ // TODO: add setters to be tested
+}
Added: tika/branches/2.x/tika-parser-modules/tika-office-module/src/test/java/org/apache/tika/parser/chm/TestDirectoryListingEntry.java
URL: http://svn.apache.org/viewvc/tika/branches/2.x/tika-parser-modules/tika-office-module/src/test/java/org/apache/tika/parser/chm/TestDirectoryListingEntry.java?rev=1723223&view=auto
==============================================================================
--- tika/branches/2.x/tika-parser-modules/tika-office-module/src/test/java/org/apache/tika/parser/chm/TestDirectoryListingEntry.java (added)
+++ tika/branches/2.x/tika-parser-modules/tika-office-module/src/test/java/org/apache/tika/parser/chm/TestDirectoryListingEntry.java Wed Jan 6 03:50:50 2016
@@ -0,0 +1,85 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.parser.chm;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNotNull;
+
+import org.apache.tika.parser.chm.accessor.DirectoryListingEntry;
+import org.junit.Before;
+import org.junit.Test;
+
+/**
+ * Tests public methods of the DirectoryListingEntry class
+ *
+ * @author olegt
+ *
+ */
+public class TestDirectoryListingEntry {
+ private DirectoryListingEntry dle = null;
+
+ @Before
+ public void setUp() throws Exception {
+ dle = new DirectoryListingEntry(TestParameters.nameLength,
+ TestParameters.entryName, TestParameters.entryType,
+ TestParameters.offset, TestParameters.length);
+ }
+
+ @Test
+ public void testDefaultConstructor() {
+ assertNotNull(dle);
+ }
+
+ @Test
+ public void testParamConstructor() {
+ assertEquals(TestParameters.nameLength, dle.getNameLength());
+ assertEquals(TestParameters.entryName, dle.getName());
+ assertEquals(TestParameters.entryType, dle.getEntryType());
+ assertEquals(TestParameters.offset, dle.getOffset());
+ assertEquals(TestParameters.length, dle.getLength());
+ }
+
+ @Test
+ public void testToString() {
+ assertNotNull(dle.toString());
+ }
+
+ @Test
+ public void testGetNameLength() {
+ assertEquals(TestParameters.nameLength, dle.getNameLength());
+ }
+
+ @Test
+ public void testGetName() {
+ assertEquals(TestParameters.entryName, dle.getName());
+ }
+
+ @Test
+ public void testGetEntryType() {
+ assertEquals(TestParameters.entryType, dle.getEntryType());
+ }
+
+ @Test
+ public void testGetOffset() {
+ assertEquals(TestParameters.offset, dle.getOffset());
+ }
+
+ @Test
+ public void testGetLength() {
+ assertEquals(TestParameters.length, dle.getLength());
+ }
+}
Added: tika/branches/2.x/tika-parser-modules/tika-office-module/src/test/java/org/apache/tika/parser/chm/TestParameters.java
URL: http://svn.apache.org/viewvc/tika/branches/2.x/tika-parser-modules/tika-office-module/src/test/java/org/apache/tika/parser/chm/TestParameters.java?rev=1723223&view=auto
==============================================================================
--- tika/branches/2.x/tika-parser-modules/tika-office-module/src/test/java/org/apache/tika/parser/chm/TestParameters.java (added)
+++ tika/branches/2.x/tika-parser-modules/tika-office-module/src/test/java/org/apache/tika/parser/chm/TestParameters.java Wed Jan 6 03:50:50 2016
@@ -0,0 +1,104 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.parser.chm;
+
+import java.io.IOException;
+import java.io.InputStream;
+
+import org.apache.commons.io.IOUtils;
+import org.apache.tika.parser.chm.core.ChmCommons.EntryType;
+
+/**
+ * Holds test parameters such as verification points
+ */
+public class TestParameters {
+ /* Prevents initialization */
+ private TestParameters() {
+ }
+
+ /* Tests values */
+ static final int nameLength = 5;
+ static final String entryName = TestParameters.class.getName();
+ static EntryType entryType = EntryType.COMPRESSED;
+ static final int offset = 3;
+ static final int length = 20;
+ static final int NTHREADS = 2;
+
+ static final int BUFFER_SIZE = 16384;
+
+ static final byte[] chmData = readResource("/test-documents/testChm.chm");
+
+ private static byte[] readResource(String name) {
+ try {
+ try (InputStream stream = TestParameters.class.getResourceAsStream(name)) {
+ return IOUtils.toByteArray(stream);
+ }
+ } catch (IOException e) {
+ throw new RuntimeException(e);
+ }
+ }
+
+ /* Verification points */
+ static final String VP_CHM_MIME_TYPE = "Content-Type=application/x-chm";
+ static final String VP_EXTRACTED_TEXT = "The TCard method accepts only numeric arguments";
+ static final String VP_ISTF_SIGNATURE = "ITSF";
+ static final String VP_ISTP_SIGNATURE = "ITSP";
+ static final String VP_PMGL_SIGNATURE = "PMGL";
+ static final String VP_CONTROL_DATA_SIGNATURE = "LZXC";
+
+ static final int VP_DIRECTORY_LENGTH = 4180;
+ static final int VP_DATA_OFFSET_LENGTH = 4300;
+ static final int VP_DIRECTORY_OFFSET = 120;
+ static final int VP_ITSF_HEADER_LENGTH = 96;
+ static final int VP_LANGUAGE_ID = 1033;
+ static final int VP_LAST_MODIFIED = 1042357880;
+ static final int VP_UNKNOWN_000C = 1;
+ static final int VP_UNKNOWN_LEN = 24;
+ static final int VP_UNKNOWN_OFFSET = 96;
+ static final int VP_VERSION = 3;
+ static final int VP_BLOCK_LENGTH = 4096;
+ static final int VP_BLOCK_INDEX_INTERVAL = 2;
+ static final int VP_ITSP_HEADER_LENGTH = 84;
+ static final int VP_INDEX_DEPTH = 1;
+ static final int VP_INDEX_HEAD = 0;
+ static final int VP_INDEX_ROOT = -1;
+ static final int VP_UNKNOWN_NUM_BLOCKS = -1;
+ static final int VP_ITSP_UNKNOWN_000C = 10;
+ static final int VP_ITSP_UNKNOWN_0024 = 0;
+ static final int VP_ITSP_UNKNOWN_002C = 1;
+ static final int VP_ITSP_BYTEARR_LEN = 16;
+ static final int VP_ITSP_VERSION = 1;
+ static final int VP_RESET_INTERVAL = 2;
+ static final int VP_CONTROL_DATA_SIZE = 6;
+ static final int VP_UNKNOWN_18 = 0;
+ static final int VP_CONTROL_DATA_VERSION = 2;
+ static final int VP_WINDOW_SIZE = 65536;
+ static final int VP_WINDOWS_PER_RESET = 1;
+ static final int VP_CHM_ENTITIES_NUMBER = 100; //updated by Hawking
+ static final int VP_PMGI_FREE_SPACE = 3;
+ static final int VP_PMGL_BLOCK_NEXT = -1;
+ static final int VP_PMGL_BLOCK_PREV = -1;
+ static final int VP_PMGL_FREE_SPACE = 1644;
+ static final int VP_PMGL_UNKNOWN_008 = 0;
+ static final int VP_RESET_TABLE_BA = 12;
+ static final int VP_RES_TBL_BLOCK_LENGTH = 32768;
+ static final int VP_RES_TBL_COMPR_LENGTH = 177408;
+ static final int VP_RES_TBL_UNCOMP_LENGTH = 383786;
+ static final int VP_TBL_OFFSET = 40;
+ static final int VP_RES_TBL_UNKNOWN = 8;
+ static final int VP_RES_TBL_VERSION = 2;
+}
Added: tika/branches/2.x/tika-parser-modules/tika-office-module/src/test/java/org/apache/tika/parser/chm/TestPmgiHeader.java
URL: http://svn.apache.org/viewvc/tika/branches/2.x/tika-parser-modules/tika-office-module/src/test/java/org/apache/tika/parser/chm/TestPmgiHeader.java?rev=1723223&view=auto
==============================================================================
--- tika/branches/2.x/tika-parser-modules/tika-office-module/src/test/java/org/apache/tika/parser/chm/TestPmgiHeader.java (added)
+++ tika/branches/2.x/tika-parser-modules/tika-office-module/src/test/java/org/apache/tika/parser/chm/TestPmgiHeader.java Wed Jan 6 03:50:50 2016
@@ -0,0 +1,45 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.parser.chm;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
+
+import org.apache.tika.parser.chm.accessor.ChmPmgiHeader;
+import org.junit.Before;
+import org.junit.Test;
+
+public class TestPmgiHeader {
+ ChmPmgiHeader chmPmgiHeader = null;
+
+ @Before
+ public void setUp() throws Exception {
+ byte[] data = TestParameters.chmData;
+ chmPmgiHeader = new ChmPmgiHeader();
+ chmPmgiHeader.parse(data, chmPmgiHeader);
+ }
+
+ @Test
+ public void testToString() {
+ assertTrue((chmPmgiHeader != null) && (chmPmgiHeader.toString().length() > 0));
+ }
+
+ @Test
+ public void testGetFreeSpace() {
+ assertEquals(TestParameters.VP_PMGI_FREE_SPACE, chmPmgiHeader.getFreeSpace());
+ }
+}
Added: tika/branches/2.x/tika-parser-modules/tika-office-module/src/test/java/org/apache/tika/parser/chm/TestPmglHeader.java
URL: http://svn.apache.org/viewvc/tika/branches/2.x/tika-parser-modules/tika-office-module/src/test/java/org/apache/tika/parser/chm/TestPmglHeader.java?rev=1723223&view=auto
==============================================================================
--- tika/branches/2.x/tika-parser-modules/tika-office-module/src/test/java/org/apache/tika/parser/chm/TestPmglHeader.java (added)
+++ tika/branches/2.x/tika-parser-modules/tika-office-module/src/test/java/org/apache/tika/parser/chm/TestPmglHeader.java Wed Jan 6 03:50:50 2016
@@ -0,0 +1,76 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.parser.chm;
+
+import static java.nio.charset.StandardCharsets.UTF_8;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
+
+import org.apache.tika.parser.chm.accessor.ChmPmglHeader;
+import org.apache.tika.parser.chm.core.ChmCommons;
+import org.apache.tika.parser.chm.core.ChmConstants;
+import org.junit.Before;
+import org.junit.Test;
+
+public class TestPmglHeader {
+ ChmPmglHeader chmPmglHeader = null;
+
+ @Before
+ public void setUp() throws Exception {
+ byte[] data = TestParameters.chmData;
+ chmPmglHeader = new ChmPmglHeader();
+ chmPmglHeader.parse(ChmCommons.copyOfRange(data,
+ ChmConstants.START_PMGL, ChmConstants.START_PMGL
+ + ChmConstants.CHM_PMGL_LEN + 10), chmPmglHeader);
+ }
+
+ @Test
+ public void testToString() {
+ assertTrue((chmPmglHeader != null)
+ && chmPmglHeader.toString().length() > 0);
+ }
+
+ @Test
+ public void testChmPmglHeaderGet() {
+ assertEquals(TestParameters.VP_PMGL_SIGNATURE, new String(
+ chmPmglHeader.getSignature(), UTF_8));
+ }
+
+ @Test
+ public void testGetBlockNext() {
+ assertEquals(TestParameters.VP_PMGL_BLOCK_NEXT,
+ chmPmglHeader.getBlockNext());
+ }
+
+ @Test
+ public void testGetBlockPrev() {
+ assertEquals(TestParameters.VP_PMGL_BLOCK_PREV,
+ chmPmglHeader.getBlockPrev());
+ }
+
+ @Test
+ public void testGetFreeSpace() {
+ assertEquals(TestParameters.VP_PMGL_FREE_SPACE,
+ chmPmglHeader.getFreeSpace());
+ }
+
+ @Test
+ public void testGetUnknown0008() {
+ assertEquals(TestParameters.VP_PMGL_UNKNOWN_008,
+ chmPmglHeader.getUnknown0008());
+ }
+}
Added: tika/branches/2.x/tika-parser-modules/tika-office-module/src/test/java/org/apache/tika/parser/microsoft/AbstractPOIContainerExtractionTest.java
URL: http://svn.apache.org/viewvc/tika/branches/2.x/tika-parser-modules/tika-office-module/src/test/java/org/apache/tika/parser/microsoft/AbstractPOIContainerExtractionTest.java?rev=1723223&view=auto
==============================================================================
--- tika/branches/2.x/tika-parser-modules/tika-office-module/src/test/java/org/apache/tika/parser/microsoft/AbstractPOIContainerExtractionTest.java (added)
+++ tika/branches/2.x/tika-parser-modules/tika-office-module/src/test/java/org/apache/tika/parser/microsoft/AbstractPOIContainerExtractionTest.java Wed Jan 6 03:50:50 2016
@@ -0,0 +1,75 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.parser.microsoft;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNotNull;
+
+import java.net.URL;
+
+import org.apache.tika.TikaTest.TrackingHandler;
+import org.apache.tika.extractor.ContainerExtractor;
+import org.apache.tika.io.TikaInputStream;
+import org.apache.tika.mime.MediaType;
+
+/**
+ * Parent class of tests that the various POI powered parsers are
+ * able to extract their embedded contents.
+ */
+public abstract class AbstractPOIContainerExtractionTest {
+ public static final MediaType TYPE_DOC = MediaType.application("msword");
+ public static final MediaType TYPE_PPT = MediaType.application("vnd.ms-powerpoint");
+ public static final MediaType TYPE_XLS = MediaType.application("vnd.ms-excel");
+ public static final MediaType TYPE_DOCX = MediaType.application("vnd.openxmlformats-officedocument.wordprocessingml.document");
+ public static final MediaType TYPE_PPTX = MediaType.application("vnd.openxmlformats-officedocument.presentationml.presentation");
+ public static final MediaType TYPE_XLSX = MediaType.application("vnd.openxmlformats-officedocument.spreadsheetml.sheet");
+ public static final MediaType TYPE_MSG = MediaType.application("vnd.ms-outlook");
+
+ public static final MediaType TYPE_TXT = MediaType.text("plain");
+ public static final MediaType TYPE_PDF = MediaType.application("pdf");
+
+ public static final MediaType TYPE_JPG = MediaType.image("jpeg");
+ public static final MediaType TYPE_GIF = MediaType.image("gif");
+ public static final MediaType TYPE_PNG = MediaType.image("png");
+ public static final MediaType TYPE_EMF = MediaType.application("x-emf");
+ public static final MediaType TYPE_WMF = MediaType.application("x-msmetafile");
+
+ protected static TikaInputStream getTestFile(String filename) throws Exception {
+ URL input = AbstractPOIContainerExtractionTest.class.getResource(
+ "/test-documents/" + filename);
+ assertNotNull(filename + " not found", input);
+
+ return TikaInputStream.get(input);
+ }
+
+ protected TrackingHandler process(String filename, ContainerExtractor extractor, boolean recurse) throws Exception {
+ try (TikaInputStream stream = getTestFile(filename)) {
+ assertEquals(true, extractor.isSupported(stream));
+
+ // Process it
+ TrackingHandler handler = new TrackingHandler();
+ if (recurse) {
+ extractor.extract(stream, extractor, handler);
+ } else {
+ extractor.extract(stream, null, handler);
+ }
+
+ // So they can check what happened
+ return handler;
+ }
+ }
+}
Added: tika/branches/2.x/tika-parser-modules/tika-office-module/src/test/java/org/apache/tika/parser/microsoft/ExcelParserTest.java
URL: http://svn.apache.org/viewvc/tika/branches/2.x/tika-parser-modules/tika-office-module/src/test/java/org/apache/tika/parser/microsoft/ExcelParserTest.java?rev=1723223&view=auto
==============================================================================
--- tika/branches/2.x/tika-parser-modules/tika-office-module/src/test/java/org/apache/tika/parser/microsoft/ExcelParserTest.java (added)
+++ tika/branches/2.x/tika-parser-modules/tika-office-module/src/test/java/org/apache/tika/parser/microsoft/ExcelParserTest.java Wed Jan 6 03:50:50 2016
@@ -0,0 +1,443 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ * <p/>
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * <p/>
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.parser.microsoft;
+
+import static org.apache.tika.TikaTest.assertContains;
+import static org.apache.tika.TikaTest.assertNotContained;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
+import static org.junit.Assert.fail;
+
+import java.io.InputStream;
+import java.util.Locale;
+
+import org.apache.tika.detect.DefaultDetector;
+import org.apache.tika.detect.Detector;
+import org.apache.tika.exception.EncryptedDocumentException;
+import org.apache.tika.metadata.Metadata;
+import org.apache.tika.metadata.Office;
+import org.apache.tika.metadata.OfficeOpenXMLExtended;
+import org.apache.tika.metadata.TikaCoreProperties;
+import org.apache.tika.mime.MediaType;
+import org.apache.tika.parser.AutoDetectParser;
+import org.apache.tika.parser.ParseContext;
+import org.apache.tika.parser.PasswordProvider;
+import org.apache.tika.parser.microsoft.ooxml.OOXMLParser;
+import org.apache.tika.sax.BodyContentHandler;
+import org.junit.Test;
+import org.xml.sax.ContentHandler;
+
+public class ExcelParserTest {
+ @Test
+ @SuppressWarnings("deprecation") // Checks legacy Tika-1.0 style metadata keys
+ public void testExcelParser() throws Exception {
+ try (InputStream input = ExcelParserTest.class.getResourceAsStream(
+ "/test-documents/testEXCEL.xls")) {
+ Metadata metadata = new Metadata();
+ ContentHandler handler = new BodyContentHandler();
+ ParseContext context = new ParseContext();
+ context.set(Locale.class, Locale.US);
+ new OfficeParser().parse(input, handler, metadata, context);
+
+ assertEquals(
+ "application/vnd.ms-excel",
+ metadata.get(Metadata.CONTENT_TYPE));
+ assertEquals("Simple Excel document", metadata.get(TikaCoreProperties.TITLE));
+ assertEquals("Keith Bennett", metadata.get(TikaCoreProperties.CREATOR));
+ assertEquals("Keith Bennett", metadata.get(Metadata.AUTHOR));
+
+ // Mon Oct 01 17:13:56 BST 2007
+ assertEquals("2007-10-01T16:13:56Z", metadata.get(TikaCoreProperties.CREATED));
+ assertEquals("2007-10-01T16:13:56Z", metadata.get(Metadata.CREATION_DATE));
+
+ // Mon Oct 01 17:31:43 BST 2007
+ assertEquals("2007-10-01T16:31:43Z", metadata.get(TikaCoreProperties.MODIFIED));
+ assertEquals("2007-10-01T16:31:43Z", metadata.get(Metadata.DATE));
+
+ String content = handler.toString();
+ assertContains("Sample Excel Worksheet", content);
+ assertContains("Numbers and their Squares", content);
+ assertContains("\t\tNumber\tSquare", content);
+ assertContains("9", content);
+ assertNotContained("9.0", content);
+ assertContains("196", content);
+ assertNotContained("196.0", content);
+ }
+ }
+
+ @Test
+ public void testExcelParserFormatting() throws Exception {
+ try (InputStream input = ExcelParserTest.class.getResourceAsStream(
+ "/test-documents/testEXCEL-formats.xls")) {
+ Metadata metadata = new Metadata();
+ ParseContext context = new ParseContext();
+ context.set(Locale.class, Locale.US);
+ ContentHandler handler = new BodyContentHandler();
+ new OfficeParser().parse(input, handler, metadata, context);
+
+ assertEquals(
+ "application/vnd.ms-excel",
+ metadata.get(Metadata.CONTENT_TYPE));
+
+ String content = handler.toString();
+
+ // Number #,##0.00
+ assertContains("1,599.99", content);
+ assertContains("-1,599.99", content);
+
+ // Currency $#,##0.00;[Red]($#,##0.00)
+ assertContains("$1,599.99", content);
+ assertContains("($1,599.99)", content);
+
+ // Scientific 0.00E+00
+ // poi <=3.8beta1 returns 1.98E08, newer versions return 1.98+E08
+ assertTrue(content.contains("1.98E08") || content.contains("1.98E+08"));
+ assertTrue(content.contains("-1.98E08") || content.contains("-1.98E+08"));
+
+ // Percentage.
+ assertContains("2.50%", content);
+ // Excel rounds up to 3%, but that requires Java 1.6 or later
+ if (System.getProperty("java.version").startsWith("1.5")) {
+ assertContains("2%", content);
+ } else {
+ assertContains("3%", content);
+ }
+
+ // Time Format: h:mm
+ assertContains("6:15", content);
+ assertContains("18:15", content);
+
+ // Date Format: d-mmm-yy
+ assertContains("17-May-07", content);
+
+ // Date Format: m/d/yy
+ assertContains("10/3/09", content);
+
+ // Date/Time Format: m/d/yy h:mm
+ assertContains("1/19/08 4:35", content);
+
+ // Fraction (2.5): # ?/?
+ assertContains("2 1/2", content);
+
+
+ // Below assertions represent outstanding formatting issues to be addressed
+ // they are included to allow the issues to be progressed with the Apache POI
+ // team - See TIKA-103.
+
+ /*************************************************************************
+ // Custom Number (0 "dollars and" .00 "cents")
+ assertContains("19 dollars and .99 cents", content);
+
+ // Custom Number ("At" h:mm AM/PM "on" dddd mmmm d"," yyyy)
+ assertContains("At 4:20 AM on Thursday May 17, 2007", content);
+ **************************************************************************/
+
+ }
+ }
+
+ @Test
+ public void testExcelParserPassword() throws Exception {
+ try (InputStream input = ExcelParserTest.class.getResourceAsStream(
+ "/test-documents/testEXCEL_protected_passtika.xls")) {
+ Metadata metadata = new Metadata();
+ ContentHandler handler = new BodyContentHandler();
+ ParseContext context = new ParseContext();
+ context.set(Locale.class, Locale.US);
+ new OfficeParser().parse(input, handler, metadata, context);
+ fail("Document is encrypted, shouldn't parse");
+ } catch (EncryptedDocumentException e) {
+ // Good
+ }
+
+ // Try again, this time with the password
+ try (InputStream input = ExcelParserTest.class.getResourceAsStream(
+ "/test-documents/testEXCEL_protected_passtika.xls")) {
+ Metadata metadata = new Metadata();
+ ContentHandler handler = new BodyContentHandler();
+ ParseContext context = new ParseContext();
+ context.set(Locale.class, Locale.US);
+ context.set(PasswordProvider.class, new PasswordProvider() {
+ @Override
+ public String getPassword(Metadata metadata) {
+ return "tika";
+ }
+ });
+ new OfficeParser().parse(input, handler, metadata, context);
+
+ assertEquals(
+ "application/vnd.ms-excel",
+ metadata.get(Metadata.CONTENT_TYPE));
+
+ assertEquals(null, metadata.get(TikaCoreProperties.TITLE));
+ assertEquals("Antoni", metadata.get(TikaCoreProperties.CREATOR));
+ assertEquals("2011-11-25T09:52:48Z", metadata.get(TikaCoreProperties.CREATED));
+
+ String content = handler.toString();
+ assertContains("This is an Encrypted Excel spreadsheet", content);
+ assertNotContained("9.0", content);
+ }
+ }
+
+ /**
+ * TIKA-214 - Ensure we extract labels etc from Charts
+ */
+ @Test
+ public void testExcelParserCharts() throws Exception {
+ try (InputStream input = ExcelParserTest.class.getResourceAsStream(
+ "/test-documents/testEXCEL-charts.xls")) {
+ Metadata metadata = new Metadata();
+ ParseContext context = new ParseContext();
+ context.set(Locale.class, Locale.US);
+ ContentHandler handler = new BodyContentHandler();
+ new OfficeParser().parse(input, handler, metadata, context);
+
+ assertEquals(
+ "application/vnd.ms-excel",
+ metadata.get(Metadata.CONTENT_TYPE));
+
+ String content = handler.toString();
+
+ // The first sheet has a pie chart
+ assertContains("charttabyodawg", content);
+ assertContains("WhamPuff", content);
+
+ // The second sheet has a bar chart and some text
+ assertContains("Sheet1", content);
+ assertContains("Test Excel Spreasheet", content);
+ assertContains("foo", content);
+ assertContains("bar", content);
+ assertContains("fizzlepuff", content);
+ assertContains("whyaxis", content);
+ assertContains("eksaxis", content);
+
+ // The third sheet has some text
+ assertContains("Sheet2", content);
+ assertContains("dingdong", content);
+ }
+ }
+
+ @Test
+ public void testJXL() throws Exception {
+ try (InputStream input = ExcelParserTest.class.getResourceAsStream(
+ "/test-documents/jxl.xls")) {
+ Metadata metadata = new Metadata();
+ ContentHandler handler = new BodyContentHandler(-1);
+ ParseContext context = new ParseContext();
+ context.set(Locale.class, Locale.US);
+ new OfficeParser().parse(input, handler, metadata, context);
+
+ assertEquals(
+ "application/vnd.ms-excel",
+ metadata.get(Metadata.CONTENT_TYPE));
+ String content = handler.toString();
+ assertContains("Number Formats", content);
+ }
+ }
+
+ @Test
+ public void testWorksSpreadsheet70() throws Exception {
+ try (InputStream input = ExcelParserTest.class.getResourceAsStream(
+ "/test-documents/testWORKSSpreadsheet7.0.xlr")) {
+ Metadata metadata = new Metadata();
+ ContentHandler handler = new BodyContentHandler(-1);
+ ParseContext context = new ParseContext();
+ context.set(Locale.class, Locale.US);
+ new OfficeParser().parse(input, handler, metadata, context);
+
+ String content = handler.toString();
+ assertContains("Microsoft Works", content);
+ }
+ }
+
+ /**
+ * We don't currently support the .xlsb file format
+ * (an OOXML container with binary blobs), but we
+ * shouldn't break on these files either (TIKA-826)
+ */
+ @Test
+ public void testExcelXLSB() throws Exception {
+ Detector detector = new DefaultDetector();
+ AutoDetectParser parser = new AutoDetectParser();
+
+ Metadata m = new Metadata();
+ m.add(Metadata.RESOURCE_NAME_KEY, "excel.xlsb");
+
+ // Should be detected correctly
+ MediaType type;
+ try (InputStream input = ExcelParserTest.class.getResourceAsStream(
+ "/test-documents/testEXCEL.xlsb")) {
+ type = detector.detect(input, m);
+ assertEquals("application/vnd.ms-excel.sheet.binary.macroenabled.12", type.toString());
+ }
+
+ // OfficeParser won't handle it
+ assertEquals(false, (new OfficeParser()).getSupportedTypes(new ParseContext()).contains(type));
+
+ // OOXMLParser won't handle it
+ assertEquals(false, (new OOXMLParser()).getSupportedTypes(new ParseContext()).contains(type));
+
+ // AutoDetectParser doesn't break on it
+ try (InputStream input = ExcelParserTest.class.getResourceAsStream("/test-documents/testEXCEL.xlsb")) {
+ ContentHandler handler = new BodyContentHandler(-1);
+ ParseContext context = new ParseContext();
+ context.set(Locale.class, Locale.US);
+ parser.parse(input, handler, m, context);
+
+ String content = handler.toString();
+ assertEquals("", content);
+ }
+ }
+
+ /**
+ * Excel 5 and 95 are older formats, and only get basic support
+ */
+ @Test
+ public void testExcel95() throws Exception {
+ Detector detector = new DefaultDetector();
+ AutoDetectParser parser = new AutoDetectParser();
+ MediaType type;
+ Metadata m;
+
+ // First try detection of Excel 5
+ m = new Metadata();
+ m.add(Metadata.RESOURCE_NAME_KEY, "excel_5.xls");
+ try (InputStream input = ExcelParserTest.class.getResourceAsStream("/test-documents/testEXCEL_5.xls")) {
+ type = detector.detect(input, m);
+ assertEquals("application/vnd.ms-excel", type.toString());
+ }
+
+ // Now Excel 95
+ m = new Metadata();
+ m.add(Metadata.RESOURCE_NAME_KEY, "excel_95.xls");
+ try (InputStream input = ExcelParserTest.class.getResourceAsStream("/test-documents/testEXCEL_95.xls")) {
+ type = detector.detect(input, m);
+ assertEquals("application/vnd.ms-excel", type.toString());
+ }
+
+ // OfficeParser can handle it
+ assertEquals(true, (new OfficeParser()).getSupportedTypes(new ParseContext()).contains(type));
+
+ // OOXMLParser won't handle it
+ assertEquals(false, (new OOXMLParser()).getSupportedTypes(new ParseContext()).contains(type));
+
+
+ // Parse the Excel 5 file
+ m = new Metadata();
+ try (InputStream input = ExcelParserTest.class.getResourceAsStream("/test-documents/testEXCEL_5.xls")) {
+ ContentHandler handler = new BodyContentHandler(-1);
+ ParseContext context = new ParseContext();
+ context.set(Locale.class, Locale.US);
+ parser.parse(input, handler, m, context);
+
+ String content = handler.toString();
+
+ // Sheet names
+ assertContains("Feuil1", content);
+ assertContains("Feuil3", content);
+
+ // Text
+ assertContains("Sample Excel", content);
+ assertContains("Number", content);
+
+ // Numbers
+ assertContains("15", content);
+ assertContains("225", content);
+
+ // Metadata was also fetched
+ assertEquals("Simple Excel document", m.get(TikaCoreProperties.TITLE));
+ assertEquals("Keith Bennett", m.get(TikaCoreProperties.CREATOR));
+ }
+
+ // Parse the Excel 95 file
+ m = new Metadata();
+ try (InputStream input = ExcelParserTest.class.getResourceAsStream("/test-documents/testEXCEL_95.xls")) {
+ ContentHandler handler = new BodyContentHandler(-1);
+ ParseContext context = new ParseContext();
+ context.set(Locale.class, Locale.US);
+ parser.parse(input, handler, m, context);
+
+ String content = handler.toString();
+
+ // Sheet name
+ assertContains("Foglio1", content);
+
+ // Very boring file, no actual text or numbers!
+
+ // Metadata was also fetched
+ assertEquals(null, m.get(TikaCoreProperties.TITLE));
+ assertEquals("Marco Quaranta", m.get(Office.LAST_AUTHOR));
+ }
+ }
+
+ /**
+ * Ensures that custom OLE2 (HPSF) properties are extracted
+ */
+ @Test
+ public void testCustomProperties() throws Exception {
+ Metadata metadata = new Metadata();
+
+ try (InputStream input = ExcelParserTest.class.getResourceAsStream(
+ "/test-documents/testEXCEL_custom_props.xls")) {
+ ContentHandler handler = new BodyContentHandler(-1);
+ ParseContext context = new ParseContext();
+ context.set(Locale.class, Locale.US);
+ new OfficeParser().parse(input, handler, metadata, context);
+ }
+
+ assertEquals("application/vnd.ms-excel", metadata.get(Metadata.CONTENT_TYPE));
+ assertEquals("", metadata.get(TikaCoreProperties.CREATOR));
+ assertEquals("", metadata.get(TikaCoreProperties.MODIFIER));
+ assertEquals("2011-08-22T13:45:54Z", metadata.get(TikaCoreProperties.MODIFIED));
+ assertEquals("2006-09-12T15:06:44Z", metadata.get(TikaCoreProperties.CREATED));
+ assertEquals("Microsoft Excel", metadata.get(OfficeOpenXMLExtended.APPLICATION));
+ assertEquals("true", metadata.get("custom:myCustomBoolean"));
+ assertEquals("3", metadata.get("custom:myCustomNumber"));
+ assertEquals("MyStringValue", metadata.get("custom:MyCustomString"));
+ assertEquals("2010-12-30T22:00:00Z", metadata.get("custom:MyCustomDate"));
+ assertEquals("2010-12-29T22:00:00Z", metadata.get("custom:myCustomSecondDate"));
+ }
+
+ @Test
+ public void testHeaderAndFooterExtraction() throws Exception {
+ try (InputStream input = ExcelParserTest.class.getResourceAsStream(
+ "/test-documents/testEXCEL_headers_footers.xls")) {
+ Metadata metadata = new Metadata();
+ ContentHandler handler = new BodyContentHandler();
+ ParseContext context = new ParseContext();
+ context.set(Locale.class, Locale.UK);
+ new OfficeParser().parse(input, handler, metadata, context);
+
+ assertEquals(
+ "application/vnd.ms-excel",
+ metadata.get(Metadata.CONTENT_TYPE));
+ assertEquals("Internal spreadsheet", metadata.get(TikaCoreProperties.TITLE));
+ assertEquals("Aeham Abushwashi", metadata.get(TikaCoreProperties.CREATOR));
+ assertEquals("Aeham Abushwashi", metadata.get(Metadata.AUTHOR));
+
+ String content = handler.toString();
+ assertContains("John Smith1", content);
+ assertContains("John Smith50", content);
+ assertContains("1 Corporate HQ", content);
+ assertContains("Header - Corporate Spreadsheet", content);
+ assertContains("Header - For Internal Use Only", content);
+ assertContains("Header - Author: John Smith", content);
+ assertContains("Footer - Corporate Spreadsheet", content);
+ assertContains("Footer - For Internal Use Only", content);
+ assertContains("Footer - Author: John Smith", content);
+ }
+ }
+}
Added: tika/branches/2.x/tika-parser-modules/tika-office-module/src/test/java/org/apache/tika/parser/microsoft/JackcessParserTest.java
URL: http://svn.apache.org/viewvc/tika/branches/2.x/tika-parser-modules/tika-office-module/src/test/java/org/apache/tika/parser/microsoft/JackcessParserTest.java?rev=1723223&view=auto
==============================================================================
--- tika/branches/2.x/tika-parser-modules/tika-office-module/src/test/java/org/apache/tika/parser/microsoft/JackcessParserTest.java (added)
+++ tika/branches/2.x/tika-parser-modules/tika-office-module/src/test/java/org/apache/tika/parser/microsoft/JackcessParserTest.java Wed Jan 6 03:50:50 2016
@@ -0,0 +1,194 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.tika.parser.microsoft;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertTrue;
+
+import java.io.InputStream;
+import java.util.List;
+
+import org.apache.commons.io.IOUtils;
+import org.apache.tika.TikaTest;
+import org.apache.tika.exception.EncryptedDocumentException;
+import org.apache.tika.metadata.Metadata;
+import org.apache.tika.metadata.OfficeOpenXMLExtended;
+import org.apache.tika.metadata.TikaCoreProperties;
+import org.apache.tika.parser.AutoDetectParser;
+import org.apache.tika.parser.ParseContext;
+import org.apache.tika.parser.Parser;
+import org.apache.tika.parser.PasswordProvider;
+import org.apache.tika.parser.RecursiveParserWrapper;
+import org.apache.tika.sax.BasicContentHandlerFactory;
+import org.junit.Test;
+import org.xml.sax.helpers.DefaultHandler;
+
+public class JackcessParserTest extends TikaTest {
+
+ @Test
+ public void testBasic() throws Exception {
+
+ Parser p = new AutoDetectParser();
+
+ RecursiveParserWrapper w = new RecursiveParserWrapper(p,
+ new BasicContentHandlerFactory(BasicContentHandlerFactory.HANDLER_TYPE.XML, -1));
+
+ for (String fName : new String[]{"testAccess2.accdb", "testAccess2_2000.mdb",
+ "testAccess2_2002-2003.mdb"}) {
+ InputStream is = null;
+ try {
+ is = this.getResourceAsStream("/test-documents/" + fName);
+
+ Metadata meta = new Metadata();
+ ParseContext c = new ParseContext();
+ w.parse(is, new DefaultHandler(), meta, c);
+ } finally {
+ IOUtils.closeQuietly(is);
+ }
+ List<Metadata> list = w.getMetadata();
+ assertEquals(4, list.size());
+ String mainContent = list.get(0).get(RecursiveParserWrapper.TIKA_CONTENT);
+
+ //make sure there's a thead and tbody
+ assertContains("</thead><tbody>", mainContent);
+
+ //assert table header
+ assertContains("<th>ShortTextField</th>", mainContent);
+
+ //test date format
+ assertContains("6/24/15", mainContent);
+
+ //test that markup is stripped
+ assertContains("over the bold italic dog", mainContent);
+
+ //test unicode
+ assertContains("\u666E\u6797\u65AF\u987F\u5927\u5B66", mainContent);
+
+ //test embedded document handling
+ assertContains("Test Document with embedded pdf",
+ list.get(3).get(RecursiveParserWrapper.TIKA_CONTENT));
+
+ w.reset();
+ }
+ }
+
+ @Test
+ public void testPassword() throws Exception {
+ ParseContext c = new ParseContext();
+ c.set(PasswordProvider.class, new PasswordProvider() {
+ @Override
+ public String getPassword(Metadata metadata) {
+ return "tika";
+ }
+ });
+ Parser p = new AutoDetectParser();
+ String content = null;
+ try (InputStream is =
+ this.getResourceAsStream(
+ "/test-documents/testAccess2_encrypted.accdb")){
+ content = getText(is, p, c);
+ }
+ assertContains("red and brown", content);
+
+ //now try wrong password
+ c.set(PasswordProvider.class, new PasswordProvider() {
+ @Override
+ public String getPassword(Metadata metadata) {
+ return "WRONG";
+ }
+ });
+
+ boolean ex = false;
+ try (InputStream is =
+ this.getResourceAsStream(
+ "/test-documents/testAccess2_encrypted.accdb")){
+ getText(is, p, c);
+ } catch (EncryptedDocumentException e) {
+ ex = true;
+ }
+ assertTrue("failed to throw encrypted document exception for wrong password", ex);
+
+ //now try null
+ c.set(PasswordProvider.class, new PasswordProvider() {
+ @Override
+ public String getPassword(Metadata metadata) {
+ return null;
+ }
+ });
+
+ ex = false;
+ try (InputStream is =
+ this.getResourceAsStream(
+ "/test-documents/testAccess2_encrypted.accdb")){
+ getText(is, p, c);
+ } catch (EncryptedDocumentException e) {
+ ex = true;
+ }
+ assertTrue("failed to throw encrypted document exception for null password", ex);
+
+
+ //now try missing password provider
+ c = new ParseContext();
+ ex = false;
+ try (InputStream is =
+ this.getResourceAsStream(
+ "/test-documents/testAccess2_encrypted.accdb")){
+ getText(is, p, c);
+ } catch (EncryptedDocumentException e) {
+ ex = true;
+ }
+ assertTrue("failed to throw encrypted document exception for missing password provider", ex);
+
+ //now try password on file that doesn't need a password
+ c = new ParseContext();
+ c.set(PasswordProvider.class, new PasswordProvider() {
+ @Override
+ public String getPassword(Metadata metadata) {
+ return "tika";
+ }
+ });
+ ex = false;
+ try (InputStream is =
+ this.getResourceAsStream(
+ "/test-documents/testAccess2.accdb")){
+ content = getText(is, p, c);
+ } catch (EncryptedDocumentException e) {
+ ex = true;
+ }
+ assertFalse("shouldn't have thrown encrypted document exception for "+
+ "opening unencrypted file that doesn't need passowrd", ex);
+ assertContains("red and brown", content);
+ }
+
+ @Test
+ public void testReadOnly() throws Exception {
+ //TIKA-1681: just make sure an exception is not thrown
+ XMLResult r = getXML("testAccess_V1997.mdb");
+ assertContains("hijklmnop", r.xml);
+ }
+
+ @Test
+ public void testMetadata() throws Exception {
+ //basic tests for normalized metadata
+ XMLResult r = getXML("testAccess_V1997.mdb");
+ assertEquals("tmccune", r.metadata.get(TikaCoreProperties.CREATOR));
+ assertEquals("Health Market Science", r.metadata.get(OfficeOpenXMLExtended.COMPANY));
+ assertEquals("test", r.metadata.get(TikaCoreProperties.TITLE));
+ }
+}
Added: tika/branches/2.x/tika-parser-modules/tika-office-module/src/test/java/org/apache/tika/parser/microsoft/OfficeParserTest.java
URL: http://svn.apache.org/viewvc/tika/branches/2.x/tika-parser-modules/tika-office-module/src/test/java/org/apache/tika/parser/microsoft/OfficeParserTest.java?rev=1723223&view=auto
==============================================================================
--- tika/branches/2.x/tika-parser-modules/tika-office-module/src/test/java/org/apache/tika/parser/microsoft/OfficeParserTest.java (added)
+++ tika/branches/2.x/tika-parser-modules/tika-office-module/src/test/java/org/apache/tika/parser/microsoft/OfficeParserTest.java Wed Jan 6 03:50:50 2016
@@ -0,0 +1,46 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.parser.microsoft;
+
+import static org.junit.Assert.assertTrue;
+
+import java.io.InputStream;
+
+import org.apache.tika.TikaTest;
+import org.apache.tika.io.TikaInputStream;
+import org.apache.tika.metadata.Metadata;
+import org.apache.tika.parser.Parser;
+import org.apache.tika.parser.microsoft.ooxml.OOXMLParserTest;
+import org.junit.Test;
+
+
+public class OfficeParserTest extends TikaTest {
+
+ @Test
+ public void parseOfficeWord() throws Exception {
+ Metadata metadata = new Metadata();
+ Parser parser = new OfficeParser();
+
+ String xml = getXML(getTestDocument("test.doc"), parser, metadata).xml;
+
+ assertTrue(xml.contains("test"));
+ }
+
+ private InputStream getTestDocument(String name) {
+ return TikaInputStream.get(OOXMLParserTest.class.getResourceAsStream("/test-documents/" + name));
+ }
+}
Added: tika/branches/2.x/tika-parser-modules/tika-office-module/src/test/java/org/apache/tika/parser/microsoft/OldExcelParserTest.java
URL: http://svn.apache.org/viewvc/tika/branches/2.x/tika-parser-modules/tika-office-module/src/test/java/org/apache/tika/parser/microsoft/OldExcelParserTest.java?rev=1723223&view=auto
==============================================================================
--- tika/branches/2.x/tika-parser-modules/tika-office-module/src/test/java/org/apache/tika/parser/microsoft/OldExcelParserTest.java (added)
+++ tika/branches/2.x/tika-parser-modules/tika-office-module/src/test/java/org/apache/tika/parser/microsoft/OldExcelParserTest.java Wed Jan 6 03:50:50 2016
@@ -0,0 +1,114 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.parser.microsoft;
+
+import static org.apache.tika.parser.microsoft.AbstractPOIContainerExtractionTest.getTestFile;
+import static org.junit.Assert.assertEquals;
+
+import org.apache.tika.TikaTest;
+import org.apache.tika.detect.DefaultDetector;
+import org.apache.tika.detect.Detector;
+import org.apache.tika.io.TikaInputStream;
+import org.apache.tika.metadata.Metadata;
+import org.apache.tika.metadata.TikaCoreProperties;
+import org.apache.tika.mime.MediaType;
+import org.apache.tika.parser.ParseContext;
+import org.apache.tika.sax.BodyContentHandler;
+import org.junit.Ignore;
+import org.junit.Test;
+import org.xml.sax.ContentHandler;
+
+/**
+ * Tests for the Old Excel (2-4) parser
+ */
+public class OldExcelParserTest extends TikaTest {
+ private static final String file = "testEXCEL_4.xls";
+
+ @Test
+ public void testDetection() throws Exception {
+ Detector detector = new DefaultDetector();
+ try (TikaInputStream stream = getTestFile(file)) {
+ assertEquals(
+ MediaType.application("vnd.ms-excel.sheet.4"),
+ detector.detect(stream, new Metadata()));
+ }
+ }
+
+ // Disabled, until we can get the POI code to tell us the version
+ @Test
+ @Ignore
+ public void testMetadata() throws Exception {
+ TikaInputStream stream = getTestFile(file);
+
+ Metadata metadata = new Metadata();
+ ContentHandler handler = new BodyContentHandler();
+
+ OldExcelParser parser = new OldExcelParser();
+ parser.parse(stream, handler, metadata, new ParseContext());
+
+ // We can get the content type
+ assertEquals("application/vnd.ms-excel.sheet.4", metadata.get(Metadata.CONTENT_TYPE));
+
+ // But no other metadata
+ assertEquals(null, metadata.get(TikaCoreProperties.TITLE));
+ assertEquals(null, metadata.get(Metadata.SUBJECT));
+ }
+
+ /**
+ * Check we can get the plain text properly
+ */
+ @Test
+ public void testPlainText() throws Exception {
+ ContentHandler handler = new BodyContentHandler();
+ Metadata metadata = new Metadata();
+
+ try (TikaInputStream stream = getTestFile(file)) {
+ new OldExcelParser().parse(stream, handler, metadata, new ParseContext());
+ }
+
+ String text = handler.toString();
+
+ // Check we find a few words we expect in there
+ assertContains("Size", text);
+ assertContains("Returns", text);
+
+ // Check we find a few numbers we expect in there
+ assertContains("11", text);
+ assertContains("784", text);
+ }
+
+ /**
+ * Check the HTML version comes through correctly
+ */
+ @Test
+ public void testHTML() throws Exception {
+ XMLResult result = getXML(file);
+ String xml = result.xml;
+
+ // Sheet name not found - only 5+ have sheet names
+ assertNotContained("<p>Sheet 1</p>", xml);
+
+ // String cells
+ assertContains("<p>Table 10 -", xml);
+ assertContains("<p>Tax</p>", xml);
+ assertContains("<p>N/A</p>", xml);
+
+ // Number cells
+ assertContains("<p>(1)</p>", xml);
+ assertContains("<p>5.0</p>", xml);
+ }
+}