You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by bo...@apache.org on 2016/01/06 04:50:57 UTC

svn commit: r1723223 [15/32] - in /tika/branches/2.x: tika-core/src/test/resources/META-INF/ tika-core/src/test/resources/META-INF/services/ tika-parser-modules/ tika-parser-modules/tika-advanced-module/ tika-parser-modules/tika-advanced-module/src/ ti...

Added: tika/branches/2.x/tika-parser-modules/tika-office-module/src/test/java/org/apache/tika/parser/chm/TestChmItspHeader.java
URL: http://svn.apache.org/viewvc/tika/branches/2.x/tika-parser-modules/tika-office-module/src/test/java/org/apache/tika/parser/chm/TestChmItspHeader.java?rev=1723223&view=auto
==============================================================================
--- tika/branches/2.x/tika-parser-modules/tika-office-module/src/test/java/org/apache/tika/parser/chm/TestChmItspHeader.java (added)
+++ tika/branches/2.x/tika-parser-modules/tika-office-module/src/test/java/org/apache/tika/parser/chm/TestChmItspHeader.java Wed Jan  6 03:50:50 2016
@@ -0,0 +1,160 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.parser.chm;
+
+import static java.nio.charset.StandardCharsets.UTF_8;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
+
+import org.apache.tika.parser.chm.accessor.ChmItsfHeader;
+import org.apache.tika.parser.chm.accessor.ChmItspHeader;
+import org.apache.tika.parser.chm.core.ChmCommons;
+import org.apache.tika.parser.chm.core.ChmConstants;
+import org.junit.After;
+import org.junit.Before;
+import org.junit.Test;
+
+/**
+ * Tests all public methods of the ChmItspHeader
+ * 
+ */
+public class TestChmItspHeader {
+    private ChmItspHeader chmItspHeader = null;
+
+    @Before
+    public void setUp() throws Exception {
+        byte[] data = TestParameters.chmData;
+
+        ChmItsfHeader chmItsfHeader = new ChmItsfHeader();
+        // chmItsfHeader.parse(Arrays.copyOfRange(data, 0,
+        // ChmConstants.CHM_ITSF_V3_LEN - 1), chmItsfHeader);
+        chmItsfHeader.parse(ChmCommons.copyOfRange(data, 0,
+                ChmConstants.CHM_ITSF_V3_LEN - 1), chmItsfHeader);
+
+        chmItspHeader = new ChmItspHeader();
+        // chmItspHeader.parse(Arrays.copyOfRange( data, (int)
+        // chmItsfHeader.getDirOffset(),
+        // (int) chmItsfHeader.getDirOffset()
+        // + ChmConstants.CHM_ITSP_V1_LEN), chmItspHeader);
+        chmItspHeader.parse(ChmCommons.copyOfRange(data,
+                (int) chmItsfHeader.getDirOffset(),
+                (int) chmItsfHeader.getDirOffset()
+                        + ChmConstants.CHM_ITSP_V1_LEN), chmItspHeader);
+    }
+
+    @Test
+    public void testGetBlock_len() {
+        assertEquals(TestParameters.VP_BLOCK_LENGTH,
+                chmItspHeader.getBlock_len());
+    }
+
+    @Test
+    public void testGetBlockidx_intvl() {
+        assertEquals(TestParameters.VP_BLOCK_INDEX_INTERVAL,
+                chmItspHeader.getBlockidx_intvl());
+    }
+
+    @Test
+    public void testGetHeader_len() {
+        assertEquals(TestParameters.VP_ITSP_HEADER_LENGTH,
+                chmItspHeader.getHeader_len());
+    }
+
+    @Test
+    public void testGetIndex_depth() {
+        assertEquals(TestParameters.VP_INDEX_DEPTH,
+                chmItspHeader.getIndex_depth());
+    }
+
+    @Test
+    public void testGetIndex_head() {
+        assertEquals(TestParameters.VP_INDEX_HEAD,
+                chmItspHeader.getIndex_head());
+    }
+
+    @Test
+    public void testGetIndex_root() {
+        assertEquals(TestParameters.VP_INDEX_ROOT,
+                chmItspHeader.getIndex_root());
+    }
+
+    @Test
+    public void testGetLang_id() {
+        assertEquals(TestParameters.VP_LANGUAGE_ID,
+                chmItspHeader.getLang_id());
+    }
+
+    @Test
+    public void testGetNum_blocks() {
+        assertEquals(TestParameters.VP_UNKNOWN_NUM_BLOCKS,
+                chmItspHeader.getNum_blocks());
+    }
+
+    @Test
+    public void testGetUnknown_000c() {
+        assertEquals(TestParameters.VP_ITSP_UNKNOWN_000C,
+                chmItspHeader.getUnknown_000c());
+    }
+
+    @Test
+    public void testGetUnknown_0024() {
+        assertEquals(TestParameters.VP_ITSP_UNKNOWN_0024,
+                chmItspHeader.getUnknown_0024());
+    }
+
+    @Test
+    public void testGetUnknown_002() {
+        assertEquals(TestParameters.VP_ITSP_UNKNOWN_002C,
+                chmItspHeader.getUnknown_002c());
+    }
+
+    @Test
+    public void testGetUnknown_0044() {
+        assertEquals(TestParameters.VP_ITSP_BYTEARR_LEN,
+                chmItspHeader.getUnknown_0044().length);
+    }
+
+    @Test
+    public void testGetVersion() {
+        assertEquals(TestParameters.VP_ITSP_VERSION,
+                chmItspHeader.getVersion());
+    }
+
+    @Test
+    public void testGetSignature() {
+        assertEquals(TestParameters.VP_ISTP_SIGNATURE, new String(
+                chmItspHeader.getSignature(), UTF_8));
+    }
+
+    @Test
+    public void testGetSystem_uuid() {
+        assertEquals(TestParameters.VP_ITSP_BYTEARR_LEN,
+                chmItspHeader.getSystem_uuid().length);
+    }
+
+    @Test
+    public void testToString() {
+        assertTrue(chmItspHeader.toString().contains(
+                TestParameters.VP_ISTP_SIGNATURE));
+    }
+
+    @After
+    public void tearDown() throws Exception {
+        chmItspHeader = null;
+    }
+
+}

Added: tika/branches/2.x/tika-parser-modules/tika-office-module/src/test/java/org/apache/tika/parser/chm/TestChmLzxState.java
URL: http://svn.apache.org/viewvc/tika/branches/2.x/tika-parser-modules/tika-office-module/src/test/java/org/apache/tika/parser/chm/TestChmLzxState.java?rev=1723223&view=auto
==============================================================================
--- tika/branches/2.x/tika-parser-modules/tika-office-module/src/test/java/org/apache/tika/parser/chm/TestChmLzxState.java (added)
+++ tika/branches/2.x/tika-parser-modules/tika-office-module/src/test/java/org/apache/tika/parser/chm/TestChmLzxState.java Wed Jan  6 03:50:50 2016
@@ -0,0 +1,101 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.parser.chm;
+
+
+import static java.nio.charset.StandardCharsets.UTF_8;
+import static org.junit.Assert.assertNotNull;
+import static org.junit.Assert.assertTrue;
+
+import org.apache.tika.exception.TikaException;
+import org.apache.tika.parser.chm.accessor.ChmDirectoryListingSet;
+import org.apache.tika.parser.chm.accessor.ChmItsfHeader;
+import org.apache.tika.parser.chm.accessor.ChmItspHeader;
+import org.apache.tika.parser.chm.accessor.ChmLzxcControlData;
+import org.apache.tika.parser.chm.core.ChmCommons;
+import org.apache.tika.parser.chm.core.ChmConstants;
+import org.apache.tika.parser.chm.lzx.ChmLzxState;
+import org.junit.Before;
+import org.junit.Test;
+
+public class TestChmLzxState {
+    private ChmLzxState chmLzxState;
+    private int windowSize;
+
+    @Before
+    public void setUp() throws Exception {
+        byte[] data = TestParameters.chmData;
+
+        /* Creates and parses itsf header */
+        ChmItsfHeader chmItsHeader = new ChmItsfHeader();
+        // chmItsHeader.parse(Arrays.copyOfRange(data, 0,
+        // ChmConstants.CHM_ITSF_V3_LEN - 1), chmItsHeader);
+        chmItsHeader.parse(ChmCommons.copyOfRange(data, 0,
+                ChmConstants.CHM_ITSF_V3_LEN - 1), chmItsHeader);
+        /* Creates and parses itsp block */
+        ChmItspHeader chmItspHeader = new ChmItspHeader();
+        // chmItspHeader.parse(Arrays.copyOfRange( data, (int)
+        // chmItsHeader.getDirOffset(),
+        // (int) chmItsHeader.getDirOffset()
+        // + ChmConstants.CHM_ITSP_V1_LEN), chmItspHeader);
+        chmItspHeader.parse(ChmCommons.copyOfRange(data,
+                (int) chmItsHeader.getDirOffset(),
+                (int) chmItsHeader.getDirOffset()
+                + ChmConstants.CHM_ITSP_V1_LEN), chmItspHeader);
+
+        /* Creating instance of ChmDirListingContainer */
+        ChmDirectoryListingSet chmDirListCont = new ChmDirectoryListingSet(
+                data, chmItsHeader, chmItspHeader);
+        int indexOfControlData = ChmCommons.indexOf(
+                chmDirListCont.getDirectoryListingEntryList(),
+                ChmConstants.CONTROL_DATA);
+
+        int indexOfResetTable = ChmCommons.indexOfResetTableBlock(data,
+                ChmConstants.LZXC.getBytes(UTF_8));
+        byte[] dir_chunk = null;
+        if (indexOfResetTable > 0) {
+            // dir_chunk = Arrays.copyOfRange( data, indexOfResetTable,
+            // indexOfResetTable
+            // +
+            // chmDirListCont.getDirectoryListingEntryList().get(indexOfControlData).getLength());
+            dir_chunk = ChmCommons.copyOfRange(data, indexOfResetTable,
+                    indexOfResetTable
+                    + chmDirListCont.getDirectoryListingEntryList()
+                    .get(indexOfControlData).getLength());
+        }
+
+        ChmLzxcControlData clcd = new ChmLzxcControlData();
+        clcd.parse(dir_chunk, clcd);
+        windowSize = (int) clcd.getWindowSize();
+    }
+
+    @Test
+    public void testChmLzxStateConstructor() throws TikaException {
+        chmLzxState = new ChmLzxState(windowSize);
+        assertNotNull(chmLzxState);
+    }
+
+    @Test
+    public void testToString() throws TikaException {
+        if (chmLzxState == null)
+            testChmLzxStateConstructor();
+        assertTrue(chmLzxState.toString().length() > 20);
+    }
+
+    // TODO add more tests
+
+}

Added: tika/branches/2.x/tika-parser-modules/tika-office-module/src/test/java/org/apache/tika/parser/chm/TestChmLzxcControlData.java
URL: http://svn.apache.org/viewvc/tika/branches/2.x/tika-parser-modules/tika-office-module/src/test/java/org/apache/tika/parser/chm/TestChmLzxcControlData.java?rev=1723223&view=auto
==============================================================================
--- tika/branches/2.x/tika-parser-modules/tika-office-module/src/test/java/org/apache/tika/parser/chm/TestChmLzxcControlData.java (added)
+++ tika/branches/2.x/tika-parser-modules/tika-office-module/src/test/java/org/apache/tika/parser/chm/TestChmLzxcControlData.java Wed Jan  6 03:50:50 2016
@@ -0,0 +1,144 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.parser.chm;
+
+import static java.nio.charset.StandardCharsets.UTF_8;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNotNull;
+import static org.junit.Assert.assertTrue;
+
+import org.apache.tika.parser.chm.accessor.ChmDirectoryListingSet;
+import org.apache.tika.parser.chm.accessor.ChmItsfHeader;
+import org.apache.tika.parser.chm.accessor.ChmItspHeader;
+import org.apache.tika.parser.chm.accessor.ChmLzxcControlData;
+import org.apache.tika.parser.chm.core.ChmCommons;
+import org.apache.tika.parser.chm.core.ChmConstants;
+import org.junit.Before;
+import org.junit.Test;
+
+/**
+ * Tests all public methods of ChmLzxcControlData block
+ */
+public class TestChmLzxcControlData {
+    private ChmLzxcControlData chmLzxcControlData = null;
+
+    @Before
+    public void setUp() throws Exception {
+        byte[] data = TestParameters.chmData;
+        /* Creates and parses itsf header */
+        ChmItsfHeader chmItsHeader = new ChmItsfHeader();
+        // chmItsHeader.parse(Arrays.copyOfRange(data, 0,
+        // ChmConstants.CHM_ITSF_V3_LEN - 1), chmItsHeader);
+        chmItsHeader.parse(ChmCommons.copyOfRange(data, 0,
+                ChmConstants.CHM_ITSF_V3_LEN - 1), chmItsHeader);
+        /* Creates and parses itsp block */
+        ChmItspHeader chmItspHeader = new ChmItspHeader();
+        // chmItspHeader.parse(Arrays.copyOfRange( data, (int)
+        // chmItsHeader.getDirOffset(),
+        // (int) chmItsHeader.getDirOffset()
+        // + ChmConstants.CHM_ITSP_V1_LEN), chmItspHeader);
+        chmItspHeader.parse(ChmCommons.copyOfRange(data,
+                (int) chmItsHeader.getDirOffset(),
+                (int) chmItsHeader.getDirOffset()
+                        + ChmConstants.CHM_ITSP_V1_LEN), chmItspHeader);
+        /* Creating instance of ChmDirListingContainer */
+        ChmDirectoryListingSet chmDirListCont = new ChmDirectoryListingSet(
+                data, chmItsHeader, chmItspHeader);
+        int indexOfControlData = chmDirListCont.getControlDataIndex();
+
+        int indexOfResetTable = ChmCommons.indexOfResetTableBlock(data,
+                ChmConstants.LZXC.getBytes(UTF_8));
+        byte[] dir_chunk = null;
+        if (indexOfResetTable > 0) {
+            // dir_chunk = Arrays.copyOfRange( data, indexOfResetTable,
+            // indexOfResetTable
+            // +
+            // chmDirListCont.getDirectoryListingEntryList().get(indexOfControlData).getLength());
+            dir_chunk = ChmCommons.copyOfRange(data, indexOfResetTable,
+                    indexOfResetTable
+                            + chmDirListCont.getDirectoryListingEntryList()
+                                    .get(indexOfControlData).getLength());
+        }
+
+        /* Creates and parses control block */
+        chmLzxcControlData = new ChmLzxcControlData();
+        chmLzxcControlData.parse(dir_chunk, chmLzxcControlData);
+
+    }
+
+    @Test
+    public void testConstructorNotNull() {
+        assertNotNull(chmLzxcControlData);
+    }
+
+    @Test
+    public void testGetResetInterval() {
+        assertEquals(TestParameters.VP_RESET_INTERVAL,
+                chmLzxcControlData.getResetInterval());
+    }
+
+    @Test
+    public void testGetSize() {
+        assertEquals(TestParameters.VP_CONTROL_DATA_SIZE,
+                chmLzxcControlData.getSize());
+    }
+
+    @Test
+    public void testGetUnknown_18() {
+        assertEquals(TestParameters.VP_UNKNOWN_18,
+                chmLzxcControlData.getUnknown_18());
+    }
+
+    @Test
+    public void testGetVersion() {
+        assertEquals(TestParameters.VP_CONTROL_DATA_VERSION,
+                chmLzxcControlData.getVersion());
+    }
+
+    @Test
+    public void testGetWindowSize() {
+        assertEquals(TestParameters.VP_WINDOW_SIZE,
+                chmLzxcControlData.getWindowSize());
+    }
+
+    @Test
+    public void testGetWindowsPerReset() {
+        assertEquals(TestParameters.VP_WINDOWS_PER_RESET,
+                chmLzxcControlData.getWindowsPerReset());
+    }
+
+    @Test
+    public void testGetToString() {
+        assertTrue(chmLzxcControlData.toString().contains(
+                TestParameters.VP_CONTROL_DATA_SIGNATURE));
+    }
+
+    @Test
+    public void testGetSignature() {
+        assertEquals(
+                TestParameters.VP_CONTROL_DATA_SIGNATURE.getBytes(UTF_8).length,
+                chmLzxcControlData.getSignature().length);
+    }
+
+    @Test
+    public void testGetSignaure() {
+        assertEquals(
+                TestParameters.VP_CONTROL_DATA_SIGNATURE.getBytes(UTF_8).length,
+                chmLzxcControlData.getSignature().length);
+    }
+
+}

Added: tika/branches/2.x/tika-parser-modules/tika-office-module/src/test/java/org/apache/tika/parser/chm/TestChmLzxcResetTable.java
URL: http://svn.apache.org/viewvc/tika/branches/2.x/tika-parser-modules/tika-office-module/src/test/java/org/apache/tika/parser/chm/TestChmLzxcResetTable.java?rev=1723223&view=auto
==============================================================================
--- tika/branches/2.x/tika-parser-modules/tika-office-module/src/test/java/org/apache/tika/parser/chm/TestChmLzxcResetTable.java (added)
+++ tika/branches/2.x/tika-parser-modules/tika-office-module/src/test/java/org/apache/tika/parser/chm/TestChmLzxcResetTable.java Wed Jan  6 03:50:50 2016
@@ -0,0 +1,156 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.tika.parser.chm;
+
+import static java.nio.charset.StandardCharsets.UTF_8;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
+
+import org.apache.tika.parser.chm.accessor.ChmDirectoryListingSet;
+import org.apache.tika.parser.chm.accessor.ChmItsfHeader;
+import org.apache.tika.parser.chm.accessor.ChmItspHeader;
+import org.apache.tika.parser.chm.accessor.ChmLzxcControlData;
+import org.apache.tika.parser.chm.accessor.ChmLzxcResetTable;
+import org.apache.tika.parser.chm.assertion.ChmAssert;
+import org.apache.tika.parser.chm.core.ChmCommons;
+import org.apache.tika.parser.chm.core.ChmConstants;
+import org.junit.Before;
+import org.junit.Test;
+
+public class TestChmLzxcResetTable {
+    private ChmLzxcResetTable chmLzxcResetTable = null;
+
+    @Before
+    public void setUp() throws Exception {
+        byte[] data = TestParameters.chmData;
+        /* Creates and parses itsf header */
+        ChmItsfHeader chmItsfHeader = new ChmItsfHeader();
+        // chmItsfHeader.parse(Arrays.copyOfRange(data, 0,
+        // ChmConstants.CHM_ITSF_V3_LEN - 1), chmItsfHeader);
+        chmItsfHeader.parse(ChmCommons.copyOfRange(data, 0,
+                ChmConstants.CHM_ITSF_V3_LEN - 1), chmItsfHeader);
+        /* Creates and parses itsp block */
+        ChmItspHeader chmItspHeader = new ChmItspHeader();
+        // chmItspHeader.parse(Arrays.copyOfRange( data, (int)
+        // chmItsfHeader.getDirOffset(),
+        // (int) chmItsfHeader.getDirOffset()
+        // + ChmConstants.CHM_ITSP_V1_LEN), chmItspHeader);
+        chmItspHeader.parse(ChmCommons.copyOfRange(data,
+                (int) chmItsfHeader.getDirOffset(),
+                (int) chmItsfHeader.getDirOffset()
+                        + ChmConstants.CHM_ITSP_V1_LEN), chmItspHeader);
+        /* Creating instance of ChmDirListingContainer */
+        ChmDirectoryListingSet chmDirListCont = new ChmDirectoryListingSet(
+                data, chmItsfHeader, chmItspHeader);
+        int indexOfControlData = chmDirListCont.getControlDataIndex();
+
+        int indexOfResetTable = ChmCommons.indexOfResetTableBlock(data,
+                ChmConstants.LZXC.getBytes(UTF_8));
+        byte[] dir_chunk = null;
+        if (indexOfResetTable > 0) {
+            // dir_chunk = Arrays.copyOfRange( data, indexOfResetTable,
+            // indexOfResetTable
+            // +
+            // chmDirListCont.getDirectoryListingEntryList().get(indexOfControlData).getLength());
+            dir_chunk = ChmCommons.copyOfRange(data, indexOfResetTable,
+                    indexOfResetTable
+                            + chmDirListCont.getDirectoryListingEntryList()
+                                    .get(indexOfControlData).getLength());
+        }
+
+        /* Creates and parses control block */
+        ChmLzxcControlData chmLzxcControlData = new ChmLzxcControlData();
+        chmLzxcControlData.parse(dir_chunk, chmLzxcControlData);
+
+        indexOfResetTable = chmDirListCont.getResetTableIndex();
+        chmLzxcResetTable = new ChmLzxcResetTable();
+
+        int startIndex = (int) chmDirListCont.getDataOffset()
+                + chmDirListCont.getDirectoryListingEntryList()
+                        .get(indexOfResetTable).getOffset();
+
+        ChmAssert.assertCopyingDataIndex(startIndex, data.length);
+
+        // dir_chunk = Arrays.copyOfRange(data, startIndex, startIndex
+        // +
+        // chmDirListCont.getDirectoryListingEntryList().get(indexOfResetTable).getLength());
+        dir_chunk = ChmCommons.copyOfRange(
+                data,
+                startIndex,
+                startIndex
+                        + chmDirListCont.getDirectoryListingEntryList()
+                                .get(indexOfResetTable).getLength());
+
+        chmLzxcResetTable.parse(dir_chunk, chmLzxcResetTable);
+    }
+
+    @Test
+    public void testGetBlockAddress() {
+        assertEquals(TestParameters.VP_RESET_TABLE_BA,
+                chmLzxcResetTable.getBlockAddress().length);
+    }
+
+    @Test
+    public void testGetBlockCount() {
+        assertEquals(TestParameters.VP_RESET_TABLE_BA,
+                chmLzxcResetTable.getBlockCount());
+    }
+
+    @Test
+    public void testGetBlockLen() {
+        assertEquals(TestParameters.VP_RES_TBL_BLOCK_LENGTH,
+                chmLzxcResetTable.getBlockLen());
+    }
+
+    @Test
+    public void testGetCompressedLen() {
+        assertEquals(TestParameters.VP_RES_TBL_COMPR_LENGTH,
+                chmLzxcResetTable.getCompressedLen());
+    }
+
+    @Test
+    public void testGetTableOffset() {
+        assertEquals(TestParameters.VP_TBL_OFFSET,
+                chmLzxcResetTable.getTableOffset());
+    }
+
+    @Test
+    public void testGetUncompressedLen() {
+        assertEquals(TestParameters.VP_RES_TBL_UNCOMP_LENGTH,
+                chmLzxcResetTable.getUncompressedLen());
+    }
+
+    @Test
+    public void testGetUnknown() {
+        assertEquals(TestParameters.VP_RES_TBL_UNKNOWN,
+                chmLzxcResetTable.getUnknown());
+    }
+
+    @Test
+    public void testGetVersion() {
+        assertEquals(TestParameters.VP_RES_TBL_VERSION,
+                chmLzxcResetTable.getVersion());
+    }
+
+    @Test
+    public void testToString() {
+        assertTrue(chmLzxcResetTable.toString().length() > 0);
+    }
+
+    // TODO: add setters to be tested
+}

Added: tika/branches/2.x/tika-parser-modules/tika-office-module/src/test/java/org/apache/tika/parser/chm/TestDirectoryListingEntry.java
URL: http://svn.apache.org/viewvc/tika/branches/2.x/tika-parser-modules/tika-office-module/src/test/java/org/apache/tika/parser/chm/TestDirectoryListingEntry.java?rev=1723223&view=auto
==============================================================================
--- tika/branches/2.x/tika-parser-modules/tika-office-module/src/test/java/org/apache/tika/parser/chm/TestDirectoryListingEntry.java (added)
+++ tika/branches/2.x/tika-parser-modules/tika-office-module/src/test/java/org/apache/tika/parser/chm/TestDirectoryListingEntry.java Wed Jan  6 03:50:50 2016
@@ -0,0 +1,85 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.parser.chm;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNotNull;
+
+import org.apache.tika.parser.chm.accessor.DirectoryListingEntry;
+import org.junit.Before;
+import org.junit.Test;
+
+/**
+ * Tests public methods of the DirectoryListingEntry class
+ * 
+ * @author olegt
+ * 
+ */
+public class TestDirectoryListingEntry {
+    private DirectoryListingEntry dle = null;
+
+    @Before
+    public void setUp() throws Exception {
+        dle = new DirectoryListingEntry(TestParameters.nameLength,
+                TestParameters.entryName, TestParameters.entryType,
+                TestParameters.offset, TestParameters.length);
+    }
+
+    @Test
+    public void testDefaultConstructor() {
+        assertNotNull(dle);
+    }
+
+    @Test
+    public void testParamConstructor() {
+        assertEquals(TestParameters.nameLength, dle.getNameLength());
+        assertEquals(TestParameters.entryName, dle.getName());
+        assertEquals(TestParameters.entryType, dle.getEntryType());
+        assertEquals(TestParameters.offset, dle.getOffset());
+        assertEquals(TestParameters.length, dle.getLength());
+    }
+
+    @Test
+    public void testToString() {
+        assertNotNull(dle.toString());
+    }
+
+    @Test
+    public void testGetNameLength() {
+        assertEquals(TestParameters.nameLength, dle.getNameLength());
+    }
+
+    @Test
+    public void testGetName() {
+        assertEquals(TestParameters.entryName, dle.getName());
+    }
+
+    @Test
+    public void testGetEntryType() {
+        assertEquals(TestParameters.entryType, dle.getEntryType());
+    }
+
+    @Test
+    public void testGetOffset() {
+        assertEquals(TestParameters.offset, dle.getOffset());
+    }
+
+    @Test
+    public void testGetLength() {
+        assertEquals(TestParameters.length, dle.getLength());
+    }
+}

Added: tika/branches/2.x/tika-parser-modules/tika-office-module/src/test/java/org/apache/tika/parser/chm/TestParameters.java
URL: http://svn.apache.org/viewvc/tika/branches/2.x/tika-parser-modules/tika-office-module/src/test/java/org/apache/tika/parser/chm/TestParameters.java?rev=1723223&view=auto
==============================================================================
--- tika/branches/2.x/tika-parser-modules/tika-office-module/src/test/java/org/apache/tika/parser/chm/TestParameters.java (added)
+++ tika/branches/2.x/tika-parser-modules/tika-office-module/src/test/java/org/apache/tika/parser/chm/TestParameters.java Wed Jan  6 03:50:50 2016
@@ -0,0 +1,104 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.parser.chm;
+
+import java.io.IOException;
+import java.io.InputStream;
+
+import org.apache.commons.io.IOUtils;
+import org.apache.tika.parser.chm.core.ChmCommons.EntryType;
+
+/**
+ * Holds test parameters such as verification points
+ */
+public class TestParameters {
+    /* Prevents initialization */
+    private TestParameters() {
+    }
+
+    /* Tests values */
+    static final int nameLength = 5;
+    static final String entryName = TestParameters.class.getName();
+    static EntryType entryType = EntryType.COMPRESSED;
+    static final int offset = 3;
+    static final int length = 20;
+    static final int NTHREADS = 2;
+
+    static final int BUFFER_SIZE = 16384;
+
+    static final byte[] chmData = readResource("/test-documents/testChm.chm");
+
+    private static byte[] readResource(String name) {
+        try {
+            try (InputStream stream = TestParameters.class.getResourceAsStream(name)) {
+                return IOUtils.toByteArray(stream);
+            }
+        } catch (IOException e) {
+            throw new RuntimeException(e);
+        }
+    }
+
+    /* Verification points */
+    static final String VP_CHM_MIME_TYPE = "Content-Type=application/x-chm";
+    static final String VP_EXTRACTED_TEXT = "The TCard method accepts only numeric arguments";
+    static final String VP_ISTF_SIGNATURE = "ITSF";
+    static final String VP_ISTP_SIGNATURE = "ITSP";
+    static final String VP_PMGL_SIGNATURE = "PMGL";
+    static final String VP_CONTROL_DATA_SIGNATURE = "LZXC";
+
+    static final int VP_DIRECTORY_LENGTH = 4180;
+    static final int VP_DATA_OFFSET_LENGTH = 4300;
+    static final int VP_DIRECTORY_OFFSET = 120;
+    static final int VP_ITSF_HEADER_LENGTH = 96;
+    static final int VP_LANGUAGE_ID = 1033;
+    static final int VP_LAST_MODIFIED = 1042357880;
+    static final int VP_UNKNOWN_000C = 1;
+    static final int VP_UNKNOWN_LEN = 24;
+    static final int VP_UNKNOWN_OFFSET = 96;
+    static final int VP_VERSION = 3;
+    static final int VP_BLOCK_LENGTH = 4096;
+    static final int VP_BLOCK_INDEX_INTERVAL = 2;
+    static final int VP_ITSP_HEADER_LENGTH = 84;
+    static final int VP_INDEX_DEPTH = 1;
+    static final int VP_INDEX_HEAD = 0;
+    static final int VP_INDEX_ROOT = -1;
+    static final int VP_UNKNOWN_NUM_BLOCKS = -1;
+    static final int VP_ITSP_UNKNOWN_000C = 10;
+    static final int VP_ITSP_UNKNOWN_0024 = 0;
+    static final int VP_ITSP_UNKNOWN_002C = 1;
+    static final int VP_ITSP_BYTEARR_LEN = 16;
+    static final int VP_ITSP_VERSION = 1;
+    static final int VP_RESET_INTERVAL = 2;
+    static final int VP_CONTROL_DATA_SIZE = 6;
+    static final int VP_UNKNOWN_18 = 0;
+    static final int VP_CONTROL_DATA_VERSION = 2;
+    static final int VP_WINDOW_SIZE = 65536;
+    static final int VP_WINDOWS_PER_RESET = 1;
+    static final int VP_CHM_ENTITIES_NUMBER = 100; //updated  by Hawking
+    static final int VP_PMGI_FREE_SPACE = 3;
+    static final int VP_PMGL_BLOCK_NEXT = -1;
+    static final int VP_PMGL_BLOCK_PREV = -1;
+    static final int VP_PMGL_FREE_SPACE = 1644;
+    static final int VP_PMGL_UNKNOWN_008 = 0;
+    static final int VP_RESET_TABLE_BA = 12;
+    static final int VP_RES_TBL_BLOCK_LENGTH = 32768;
+    static final int VP_RES_TBL_COMPR_LENGTH = 177408;
+    static final int VP_RES_TBL_UNCOMP_LENGTH = 383786;
+    static final int VP_TBL_OFFSET = 40;
+    static final int VP_RES_TBL_UNKNOWN = 8;
+    static final int VP_RES_TBL_VERSION = 2;
+}

Added: tika/branches/2.x/tika-parser-modules/tika-office-module/src/test/java/org/apache/tika/parser/chm/TestPmgiHeader.java
URL: http://svn.apache.org/viewvc/tika/branches/2.x/tika-parser-modules/tika-office-module/src/test/java/org/apache/tika/parser/chm/TestPmgiHeader.java?rev=1723223&view=auto
==============================================================================
--- tika/branches/2.x/tika-parser-modules/tika-office-module/src/test/java/org/apache/tika/parser/chm/TestPmgiHeader.java (added)
+++ tika/branches/2.x/tika-parser-modules/tika-office-module/src/test/java/org/apache/tika/parser/chm/TestPmgiHeader.java Wed Jan  6 03:50:50 2016
@@ -0,0 +1,45 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.parser.chm;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
+
+import org.apache.tika.parser.chm.accessor.ChmPmgiHeader;
+import org.junit.Before;
+import org.junit.Test;
+
+public class TestPmgiHeader {
+    ChmPmgiHeader chmPmgiHeader = null;
+
+    @Before
+    public void setUp() throws Exception {
+        byte[] data = TestParameters.chmData;
+        chmPmgiHeader = new ChmPmgiHeader();
+        chmPmgiHeader.parse(data, chmPmgiHeader);
+    }
+
+    @Test
+    public void testToString() {
+        assertTrue((chmPmgiHeader != null) && (chmPmgiHeader.toString().length() > 0));
+    }
+
+    @Test
+    public void testGetFreeSpace() {
+        assertEquals(TestParameters.VP_PMGI_FREE_SPACE, chmPmgiHeader.getFreeSpace());
+    }
+}

Added: tika/branches/2.x/tika-parser-modules/tika-office-module/src/test/java/org/apache/tika/parser/chm/TestPmglHeader.java
URL: http://svn.apache.org/viewvc/tika/branches/2.x/tika-parser-modules/tika-office-module/src/test/java/org/apache/tika/parser/chm/TestPmglHeader.java?rev=1723223&view=auto
==============================================================================
--- tika/branches/2.x/tika-parser-modules/tika-office-module/src/test/java/org/apache/tika/parser/chm/TestPmglHeader.java (added)
+++ tika/branches/2.x/tika-parser-modules/tika-office-module/src/test/java/org/apache/tika/parser/chm/TestPmglHeader.java Wed Jan  6 03:50:50 2016
@@ -0,0 +1,76 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.parser.chm;
+
+import static java.nio.charset.StandardCharsets.UTF_8;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
+
+import org.apache.tika.parser.chm.accessor.ChmPmglHeader;
+import org.apache.tika.parser.chm.core.ChmCommons;
+import org.apache.tika.parser.chm.core.ChmConstants;
+import org.junit.Before;
+import org.junit.Test;
+
+public class TestPmglHeader {
+    ChmPmglHeader chmPmglHeader = null;
+
+    @Before
+    public void setUp() throws Exception {
+        byte[] data = TestParameters.chmData;
+        chmPmglHeader = new ChmPmglHeader();
+        chmPmglHeader.parse(ChmCommons.copyOfRange(data,
+                ChmConstants.START_PMGL, ChmConstants.START_PMGL
+                        + ChmConstants.CHM_PMGL_LEN + 10), chmPmglHeader);
+    }
+
+    @Test
+    public void testToString() {
+        assertTrue((chmPmglHeader != null)
+                && chmPmglHeader.toString().length() > 0);
+    }
+
+    @Test
+    public void testChmPmglHeaderGet() {
+        assertEquals(TestParameters.VP_PMGL_SIGNATURE, new String(
+                chmPmglHeader.getSignature(), UTF_8));
+    }
+
+    @Test
+    public void testGetBlockNext() {
+        assertEquals(TestParameters.VP_PMGL_BLOCK_NEXT,
+                chmPmglHeader.getBlockNext());
+    }
+
+    @Test
+    public void testGetBlockPrev() {
+        assertEquals(TestParameters.VP_PMGL_BLOCK_PREV,
+                chmPmglHeader.getBlockPrev());
+    }
+
+    @Test
+    public void testGetFreeSpace() {
+        assertEquals(TestParameters.VP_PMGL_FREE_SPACE,
+                chmPmglHeader.getFreeSpace());
+    }
+
+    @Test
+    public void testGetUnknown0008() {
+        assertEquals(TestParameters.VP_PMGL_UNKNOWN_008,
+                chmPmglHeader.getUnknown0008());
+    }
+}

Added: tika/branches/2.x/tika-parser-modules/tika-office-module/src/test/java/org/apache/tika/parser/microsoft/AbstractPOIContainerExtractionTest.java
URL: http://svn.apache.org/viewvc/tika/branches/2.x/tika-parser-modules/tika-office-module/src/test/java/org/apache/tika/parser/microsoft/AbstractPOIContainerExtractionTest.java?rev=1723223&view=auto
==============================================================================
--- tika/branches/2.x/tika-parser-modules/tika-office-module/src/test/java/org/apache/tika/parser/microsoft/AbstractPOIContainerExtractionTest.java (added)
+++ tika/branches/2.x/tika-parser-modules/tika-office-module/src/test/java/org/apache/tika/parser/microsoft/AbstractPOIContainerExtractionTest.java Wed Jan  6 03:50:50 2016
@@ -0,0 +1,75 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.parser.microsoft;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNotNull;
+
+import java.net.URL;
+
+import org.apache.tika.TikaTest.TrackingHandler;
+import org.apache.tika.extractor.ContainerExtractor;
+import org.apache.tika.io.TikaInputStream;
+import org.apache.tika.mime.MediaType;
+
+/**
+ * Parent class of tests that the various POI powered parsers are
+ * able to extract their embedded contents.
+ */
+public abstract class AbstractPOIContainerExtractionTest {
+    public static final MediaType TYPE_DOC = MediaType.application("msword");
+    public static final MediaType TYPE_PPT = MediaType.application("vnd.ms-powerpoint");
+    public static final MediaType TYPE_XLS = MediaType.application("vnd.ms-excel");
+    public static final MediaType TYPE_DOCX = MediaType.application("vnd.openxmlformats-officedocument.wordprocessingml.document");
+    public static final MediaType TYPE_PPTX = MediaType.application("vnd.openxmlformats-officedocument.presentationml.presentation");
+    public static final MediaType TYPE_XLSX = MediaType.application("vnd.openxmlformats-officedocument.spreadsheetml.sheet");
+    public static final MediaType TYPE_MSG = MediaType.application("vnd.ms-outlook");
+
+    public static final MediaType TYPE_TXT = MediaType.text("plain");
+    public static final MediaType TYPE_PDF = MediaType.application("pdf");
+
+    public static final MediaType TYPE_JPG = MediaType.image("jpeg");
+    public static final MediaType TYPE_GIF = MediaType.image("gif");
+    public static final MediaType TYPE_PNG = MediaType.image("png");
+    public static final MediaType TYPE_EMF = MediaType.application("x-emf");
+    public static final MediaType TYPE_WMF = MediaType.application("x-msmetafile");
+
+    protected static TikaInputStream getTestFile(String filename) throws Exception {
+        URL input = AbstractPOIContainerExtractionTest.class.getResource(
+                "/test-documents/" + filename);
+        assertNotNull(filename + " not found", input);
+
+        return TikaInputStream.get(input);
+    }
+
+    protected TrackingHandler process(String filename, ContainerExtractor extractor, boolean recurse) throws Exception {
+        try (TikaInputStream stream = getTestFile(filename)) {
+            assertEquals(true, extractor.isSupported(stream));
+
+            // Process it
+            TrackingHandler handler = new TrackingHandler();
+            if (recurse) {
+                extractor.extract(stream, extractor, handler);
+            } else {
+                extractor.extract(stream, null, handler);
+            }
+
+            // So they can check what happened
+            return handler;
+        }
+    }
+}

Added: tika/branches/2.x/tika-parser-modules/tika-office-module/src/test/java/org/apache/tika/parser/microsoft/ExcelParserTest.java
URL: http://svn.apache.org/viewvc/tika/branches/2.x/tika-parser-modules/tika-office-module/src/test/java/org/apache/tika/parser/microsoft/ExcelParserTest.java?rev=1723223&view=auto
==============================================================================
--- tika/branches/2.x/tika-parser-modules/tika-office-module/src/test/java/org/apache/tika/parser/microsoft/ExcelParserTest.java (added)
+++ tika/branches/2.x/tika-parser-modules/tika-office-module/src/test/java/org/apache/tika/parser/microsoft/ExcelParserTest.java Wed Jan  6 03:50:50 2016
@@ -0,0 +1,443 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ * <p/>
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * <p/>
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.parser.microsoft;
+
+import static org.apache.tika.TikaTest.assertContains;
+import static org.apache.tika.TikaTest.assertNotContained;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
+import static org.junit.Assert.fail;
+
+import java.io.InputStream;
+import java.util.Locale;
+
+import org.apache.tika.detect.DefaultDetector;
+import org.apache.tika.detect.Detector;
+import org.apache.tika.exception.EncryptedDocumentException;
+import org.apache.tika.metadata.Metadata;
+import org.apache.tika.metadata.Office;
+import org.apache.tika.metadata.OfficeOpenXMLExtended;
+import org.apache.tika.metadata.TikaCoreProperties;
+import org.apache.tika.mime.MediaType;
+import org.apache.tika.parser.AutoDetectParser;
+import org.apache.tika.parser.ParseContext;
+import org.apache.tika.parser.PasswordProvider;
+import org.apache.tika.parser.microsoft.ooxml.OOXMLParser;
+import org.apache.tika.sax.BodyContentHandler;
+import org.junit.Test;
+import org.xml.sax.ContentHandler;
+
+public class ExcelParserTest {
+    @Test
+    @SuppressWarnings("deprecation") // Checks legacy Tika-1.0 style metadata keys
+    public void testExcelParser() throws Exception {
+        try (InputStream input = ExcelParserTest.class.getResourceAsStream(
+                "/test-documents/testEXCEL.xls")) {
+            Metadata metadata = new Metadata();
+            ContentHandler handler = new BodyContentHandler();
+            ParseContext context = new ParseContext();
+            context.set(Locale.class, Locale.US);
+            new OfficeParser().parse(input, handler, metadata, context);
+
+            assertEquals(
+                    "application/vnd.ms-excel",
+                    metadata.get(Metadata.CONTENT_TYPE));
+            assertEquals("Simple Excel document", metadata.get(TikaCoreProperties.TITLE));
+            assertEquals("Keith Bennett", metadata.get(TikaCoreProperties.CREATOR));
+            assertEquals("Keith Bennett", metadata.get(Metadata.AUTHOR));
+
+            // Mon Oct 01 17:13:56 BST 2007
+            assertEquals("2007-10-01T16:13:56Z", metadata.get(TikaCoreProperties.CREATED));
+            assertEquals("2007-10-01T16:13:56Z", metadata.get(Metadata.CREATION_DATE));
+
+            // Mon Oct 01 17:31:43 BST 2007
+            assertEquals("2007-10-01T16:31:43Z", metadata.get(TikaCoreProperties.MODIFIED));
+            assertEquals("2007-10-01T16:31:43Z", metadata.get(Metadata.DATE));
+
+            String content = handler.toString();
+            assertContains("Sample Excel Worksheet", content);
+            assertContains("Numbers and their Squares", content);
+            assertContains("\t\tNumber\tSquare", content);
+            assertContains("9", content);
+            assertNotContained("9.0", content);
+            assertContains("196", content);
+            assertNotContained("196.0", content);
+        }
+    }
+
+    @Test
+    public void testExcelParserFormatting() throws Exception {
+        try (InputStream input = ExcelParserTest.class.getResourceAsStream(
+                "/test-documents/testEXCEL-formats.xls")) {
+            Metadata metadata = new Metadata();
+            ParseContext context = new ParseContext();
+            context.set(Locale.class, Locale.US);
+            ContentHandler handler = new BodyContentHandler();
+            new OfficeParser().parse(input, handler, metadata, context);
+
+            assertEquals(
+                    "application/vnd.ms-excel",
+                    metadata.get(Metadata.CONTENT_TYPE));
+
+            String content = handler.toString();
+
+            // Number #,##0.00
+            assertContains("1,599.99", content);
+            assertContains("-1,599.99", content);
+
+            // Currency $#,##0.00;[Red]($#,##0.00)
+            assertContains("$1,599.99", content);
+            assertContains("($1,599.99)", content);
+
+            // Scientific 0.00E+00
+            // poi <=3.8beta1 returns 1.98E08, newer versions return 1.98+E08
+            assertTrue(content.contains("1.98E08") || content.contains("1.98E+08"));
+            assertTrue(content.contains("-1.98E08") || content.contains("-1.98E+08"));
+
+            // Percentage.
+            assertContains("2.50%", content);
+            // Excel rounds up to 3%, but that requires Java 1.6 or later
+            if (System.getProperty("java.version").startsWith("1.5")) {
+                assertContains("2%", content);
+            } else {
+                assertContains("3%", content);
+            }
+
+            // Time Format: h:mm
+            assertContains("6:15", content);
+            assertContains("18:15", content);
+
+            // Date Format: d-mmm-yy
+            assertContains("17-May-07", content);
+
+            // Date Format: m/d/yy
+            assertContains("10/3/09", content);
+
+            // Date/Time Format: m/d/yy h:mm
+            assertContains("1/19/08 4:35", content);
+
+            // Fraction (2.5): # ?/?
+            assertContains("2 1/2", content);
+
+
+            // Below assertions represent outstanding formatting issues to be addressed
+            // they are included to allow the issues to be progressed with the Apache POI
+            // team - See TIKA-103.
+
+            /*************************************************************************
+             // Custom Number (0 "dollars and" .00 "cents")
+             assertContains("19 dollars and .99 cents", content);
+
+             // Custom Number ("At" h:mm AM/PM "on" dddd mmmm d"," yyyy)
+             assertContains("At 4:20 AM on Thursday May 17, 2007", content);
+             **************************************************************************/
+
+        }
+    }
+
+    @Test
+    public void testExcelParserPassword() throws Exception {
+        try (InputStream input = ExcelParserTest.class.getResourceAsStream(
+                "/test-documents/testEXCEL_protected_passtika.xls")) {
+            Metadata metadata = new Metadata();
+            ContentHandler handler = new BodyContentHandler();
+            ParseContext context = new ParseContext();
+            context.set(Locale.class, Locale.US);
+            new OfficeParser().parse(input, handler, metadata, context);
+            fail("Document is encrypted, shouldn't parse");
+        } catch (EncryptedDocumentException e) {
+            // Good
+        }
+
+        // Try again, this time with the password
+        try (InputStream input = ExcelParserTest.class.getResourceAsStream(
+                "/test-documents/testEXCEL_protected_passtika.xls")) {
+            Metadata metadata = new Metadata();
+            ContentHandler handler = new BodyContentHandler();
+            ParseContext context = new ParseContext();
+            context.set(Locale.class, Locale.US);
+            context.set(PasswordProvider.class, new PasswordProvider() {
+                @Override
+                public String getPassword(Metadata metadata) {
+                    return "tika";
+                }
+            });
+            new OfficeParser().parse(input, handler, metadata, context);
+
+            assertEquals(
+                    "application/vnd.ms-excel",
+                    metadata.get(Metadata.CONTENT_TYPE));
+
+            assertEquals(null, metadata.get(TikaCoreProperties.TITLE));
+            assertEquals("Antoni", metadata.get(TikaCoreProperties.CREATOR));
+            assertEquals("2011-11-25T09:52:48Z", metadata.get(TikaCoreProperties.CREATED));
+
+            String content = handler.toString();
+            assertContains("This is an Encrypted Excel spreadsheet", content);
+            assertNotContained("9.0", content);
+        }
+    }
+
+    /**
+     * TIKA-214 - Ensure we extract labels etc from Charts
+     */
+    @Test
+    public void testExcelParserCharts() throws Exception {
+        try (InputStream input = ExcelParserTest.class.getResourceAsStream(
+                "/test-documents/testEXCEL-charts.xls")) {
+            Metadata metadata = new Metadata();
+            ParseContext context = new ParseContext();
+            context.set(Locale.class, Locale.US);
+            ContentHandler handler = new BodyContentHandler();
+            new OfficeParser().parse(input, handler, metadata, context);
+
+            assertEquals(
+                    "application/vnd.ms-excel",
+                    metadata.get(Metadata.CONTENT_TYPE));
+
+            String content = handler.toString();
+
+            // The first sheet has a pie chart
+            assertContains("charttabyodawg", content);
+            assertContains("WhamPuff", content);
+
+            // The second sheet has a bar chart and some text
+            assertContains("Sheet1", content);
+            assertContains("Test Excel Spreasheet", content);
+            assertContains("foo", content);
+            assertContains("bar", content);
+            assertContains("fizzlepuff", content);
+            assertContains("whyaxis", content);
+            assertContains("eksaxis", content);
+
+            // The third sheet has some text
+            assertContains("Sheet2", content);
+            assertContains("dingdong", content);
+        }
+    }
+
+    @Test
+    public void testJXL() throws Exception {
+        try (InputStream input = ExcelParserTest.class.getResourceAsStream(
+                "/test-documents/jxl.xls")) {
+            Metadata metadata = new Metadata();
+            ContentHandler handler = new BodyContentHandler(-1);
+            ParseContext context = new ParseContext();
+            context.set(Locale.class, Locale.US);
+            new OfficeParser().parse(input, handler, metadata, context);
+
+            assertEquals(
+                    "application/vnd.ms-excel",
+                    metadata.get(Metadata.CONTENT_TYPE));
+            String content = handler.toString();
+            assertContains("Number Formats", content);
+        }
+    }
+
+    @Test
+    public void testWorksSpreadsheet70() throws Exception {
+        try (InputStream input = ExcelParserTest.class.getResourceAsStream(
+                "/test-documents/testWORKSSpreadsheet7.0.xlr")) {
+            Metadata metadata = new Metadata();
+            ContentHandler handler = new BodyContentHandler(-1);
+            ParseContext context = new ParseContext();
+            context.set(Locale.class, Locale.US);
+            new OfficeParser().parse(input, handler, metadata, context);
+
+            String content = handler.toString();
+            assertContains("Microsoft Works", content);
+        }
+    }
+
+    /**
+     * We don't currently support the .xlsb file format 
+     *  (an OOXML container with binary blobs), but we 
+     *  shouldn't break on these files either (TIKA-826)  
+     */
+    @Test
+    public void testExcelXLSB() throws Exception {
+        Detector detector = new DefaultDetector();
+        AutoDetectParser parser = new AutoDetectParser();
+
+        Metadata m = new Metadata();
+        m.add(Metadata.RESOURCE_NAME_KEY, "excel.xlsb");
+
+        // Should be detected correctly
+        MediaType type;
+        try (InputStream input = ExcelParserTest.class.getResourceAsStream(
+                "/test-documents/testEXCEL.xlsb")) {
+            type = detector.detect(input, m);
+            assertEquals("application/vnd.ms-excel.sheet.binary.macroenabled.12", type.toString());
+        }
+
+        // OfficeParser won't handle it
+        assertEquals(false, (new OfficeParser()).getSupportedTypes(new ParseContext()).contains(type));
+
+        // OOXMLParser won't handle it
+        assertEquals(false, (new OOXMLParser()).getSupportedTypes(new ParseContext()).contains(type));
+
+        // AutoDetectParser doesn't break on it
+        try (InputStream input = ExcelParserTest.class.getResourceAsStream("/test-documents/testEXCEL.xlsb")) {
+            ContentHandler handler = new BodyContentHandler(-1);
+            ParseContext context = new ParseContext();
+            context.set(Locale.class, Locale.US);
+            parser.parse(input, handler, m, context);
+
+            String content = handler.toString();
+            assertEquals("", content);
+        }
+    }
+
+    /**
+     * Excel 5 and 95 are older formats, and only get basic support
+     */
+    @Test
+    public void testExcel95() throws Exception {
+        Detector detector = new DefaultDetector();
+        AutoDetectParser parser = new AutoDetectParser();
+        MediaType type;
+        Metadata m;
+
+        // First try detection of Excel 5
+        m = new Metadata();
+        m.add(Metadata.RESOURCE_NAME_KEY, "excel_5.xls");
+        try (InputStream input = ExcelParserTest.class.getResourceAsStream("/test-documents/testEXCEL_5.xls")) {
+            type = detector.detect(input, m);
+            assertEquals("application/vnd.ms-excel", type.toString());
+        }
+
+        // Now Excel 95
+        m = new Metadata();
+        m.add(Metadata.RESOURCE_NAME_KEY, "excel_95.xls");
+        try (InputStream input = ExcelParserTest.class.getResourceAsStream("/test-documents/testEXCEL_95.xls")) {
+            type = detector.detect(input, m);
+            assertEquals("application/vnd.ms-excel", type.toString());
+        }
+
+        // OfficeParser can handle it
+        assertEquals(true, (new OfficeParser()).getSupportedTypes(new ParseContext()).contains(type));
+
+        // OOXMLParser won't handle it
+        assertEquals(false, (new OOXMLParser()).getSupportedTypes(new ParseContext()).contains(type));
+
+
+        // Parse the Excel 5 file
+        m = new Metadata();
+        try (InputStream input = ExcelParserTest.class.getResourceAsStream("/test-documents/testEXCEL_5.xls")) {
+            ContentHandler handler = new BodyContentHandler(-1);
+            ParseContext context = new ParseContext();
+            context.set(Locale.class, Locale.US);
+            parser.parse(input, handler, m, context);
+
+            String content = handler.toString();
+
+            // Sheet names
+            assertContains("Feuil1", content);
+            assertContains("Feuil3", content);
+
+            // Text
+            assertContains("Sample Excel", content);
+            assertContains("Number", content);
+
+            // Numbers
+            assertContains("15", content);
+            assertContains("225", content);
+
+            // Metadata was also fetched
+            assertEquals("Simple Excel document", m.get(TikaCoreProperties.TITLE));
+            assertEquals("Keith Bennett", m.get(TikaCoreProperties.CREATOR));
+        }
+
+        // Parse the Excel 95 file
+        m = new Metadata();
+        try (InputStream input = ExcelParserTest.class.getResourceAsStream("/test-documents/testEXCEL_95.xls")) {
+            ContentHandler handler = new BodyContentHandler(-1);
+            ParseContext context = new ParseContext();
+            context.set(Locale.class, Locale.US);
+            parser.parse(input, handler, m, context);
+
+            String content = handler.toString();
+
+            // Sheet name
+            assertContains("Foglio1", content);
+
+            // Very boring file, no actual text or numbers!
+
+            // Metadata was also fetched
+            assertEquals(null, m.get(TikaCoreProperties.TITLE));
+            assertEquals("Marco Quaranta", m.get(Office.LAST_AUTHOR));
+        }
+    }
+
+    /**
+     * Ensures that custom OLE2 (HPSF) properties are extracted
+     */
+    @Test
+    public void testCustomProperties() throws Exception {
+        Metadata metadata = new Metadata();
+
+        try (InputStream input = ExcelParserTest.class.getResourceAsStream(
+                "/test-documents/testEXCEL_custom_props.xls")) {
+            ContentHandler handler = new BodyContentHandler(-1);
+            ParseContext context = new ParseContext();
+            context.set(Locale.class, Locale.US);
+            new OfficeParser().parse(input, handler, metadata, context);
+        }
+
+        assertEquals("application/vnd.ms-excel", metadata.get(Metadata.CONTENT_TYPE));
+        assertEquals("", metadata.get(TikaCoreProperties.CREATOR));
+        assertEquals("", metadata.get(TikaCoreProperties.MODIFIER));
+        assertEquals("2011-08-22T13:45:54Z", metadata.get(TikaCoreProperties.MODIFIED));
+        assertEquals("2006-09-12T15:06:44Z", metadata.get(TikaCoreProperties.CREATED));
+        assertEquals("Microsoft Excel", metadata.get(OfficeOpenXMLExtended.APPLICATION));
+        assertEquals("true", metadata.get("custom:myCustomBoolean"));
+        assertEquals("3", metadata.get("custom:myCustomNumber"));
+        assertEquals("MyStringValue", metadata.get("custom:MyCustomString"));
+        assertEquals("2010-12-30T22:00:00Z", metadata.get("custom:MyCustomDate"));
+        assertEquals("2010-12-29T22:00:00Z", metadata.get("custom:myCustomSecondDate"));
+    }
+
+	@Test
+    public void testHeaderAndFooterExtraction() throws Exception {
+        try (InputStream input = ExcelParserTest.class.getResourceAsStream(
+                "/test-documents/testEXCEL_headers_footers.xls")) {
+            Metadata metadata = new Metadata();
+            ContentHandler handler = new BodyContentHandler();
+            ParseContext context = new ParseContext();
+            context.set(Locale.class, Locale.UK);
+            new OfficeParser().parse(input, handler, metadata, context);
+
+            assertEquals(
+                    "application/vnd.ms-excel",
+                    metadata.get(Metadata.CONTENT_TYPE));
+            assertEquals("Internal spreadsheet", metadata.get(TikaCoreProperties.TITLE));
+            assertEquals("Aeham Abushwashi", metadata.get(TikaCoreProperties.CREATOR));
+            assertEquals("Aeham Abushwashi", metadata.get(Metadata.AUTHOR));
+
+            String content = handler.toString();
+            assertContains("John Smith1", content);
+            assertContains("John Smith50", content);
+            assertContains("1 Corporate HQ", content);
+            assertContains("Header - Corporate Spreadsheet", content);
+            assertContains("Header - For Internal Use Only", content);
+            assertContains("Header - Author: John Smith", content);
+            assertContains("Footer - Corporate Spreadsheet", content);
+            assertContains("Footer - For Internal Use Only", content);
+            assertContains("Footer - Author: John Smith", content);
+        }
+    }
+}

Added: tika/branches/2.x/tika-parser-modules/tika-office-module/src/test/java/org/apache/tika/parser/microsoft/JackcessParserTest.java
URL: http://svn.apache.org/viewvc/tika/branches/2.x/tika-parser-modules/tika-office-module/src/test/java/org/apache/tika/parser/microsoft/JackcessParserTest.java?rev=1723223&view=auto
==============================================================================
--- tika/branches/2.x/tika-parser-modules/tika-office-module/src/test/java/org/apache/tika/parser/microsoft/JackcessParserTest.java (added)
+++ tika/branches/2.x/tika-parser-modules/tika-office-module/src/test/java/org/apache/tika/parser/microsoft/JackcessParserTest.java Wed Jan  6 03:50:50 2016
@@ -0,0 +1,194 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.tika.parser.microsoft;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertTrue;
+
+import java.io.InputStream;
+import java.util.List;
+
+import org.apache.commons.io.IOUtils;
+import org.apache.tika.TikaTest;
+import org.apache.tika.exception.EncryptedDocumentException;
+import org.apache.tika.metadata.Metadata;
+import org.apache.tika.metadata.OfficeOpenXMLExtended;
+import org.apache.tika.metadata.TikaCoreProperties;
+import org.apache.tika.parser.AutoDetectParser;
+import org.apache.tika.parser.ParseContext;
+import org.apache.tika.parser.Parser;
+import org.apache.tika.parser.PasswordProvider;
+import org.apache.tika.parser.RecursiveParserWrapper;
+import org.apache.tika.sax.BasicContentHandlerFactory;
+import org.junit.Test;
+import org.xml.sax.helpers.DefaultHandler;
+
+public class JackcessParserTest extends TikaTest {
+
+    @Test
+    public void testBasic() throws Exception {
+
+        Parser p = new AutoDetectParser();
+
+        RecursiveParserWrapper w = new RecursiveParserWrapper(p,
+                new BasicContentHandlerFactory(BasicContentHandlerFactory.HANDLER_TYPE.XML, -1));
+
+        for (String fName : new String[]{"testAccess2.accdb", "testAccess2_2000.mdb",
+                "testAccess2_2002-2003.mdb"}) {
+            InputStream is = null;
+            try {
+                is = this.getResourceAsStream("/test-documents/" + fName);
+
+                Metadata meta = new Metadata();
+                ParseContext c = new ParseContext();
+                w.parse(is, new DefaultHandler(), meta, c);
+            } finally {
+                IOUtils.closeQuietly(is);
+            }
+            List<Metadata> list = w.getMetadata();
+            assertEquals(4, list.size());
+            String mainContent = list.get(0).get(RecursiveParserWrapper.TIKA_CONTENT);
+
+            //make sure there's a thead and tbody
+            assertContains("</thead><tbody>", mainContent);
+
+            //assert table header
+            assertContains("<th>ShortTextField</th>", mainContent);
+
+            //test date format
+            assertContains("6/24/15", mainContent);
+
+            //test that markup is stripped
+            assertContains("over the bold italic dog", mainContent);
+
+            //test unicode
+            assertContains("\u666E\u6797\u65AF\u987F\u5927\u5B66", mainContent);
+
+            //test embedded document handling
+            assertContains("Test Document with embedded pdf",
+                    list.get(3).get(RecursiveParserWrapper.TIKA_CONTENT));
+
+            w.reset();
+        }
+    }
+
+    @Test
+    public void testPassword() throws Exception {
+        ParseContext c = new ParseContext();
+        c.set(PasswordProvider.class, new PasswordProvider() {
+            @Override
+            public String getPassword(Metadata metadata) {
+                return "tika";
+            }
+        });
+        Parser p = new AutoDetectParser();
+        String content = null;
+        try (InputStream is =
+                     this.getResourceAsStream(
+                             "/test-documents/testAccess2_encrypted.accdb")){
+            content = getText(is, p, c);
+        }
+        assertContains("red and brown", content);
+
+        //now try wrong password
+        c.set(PasswordProvider.class, new PasswordProvider() {
+            @Override
+            public String getPassword(Metadata metadata) {
+                return "WRONG";
+            }
+        });
+
+        boolean ex = false;
+        try (InputStream is =
+                     this.getResourceAsStream(
+                             "/test-documents/testAccess2_encrypted.accdb")){
+            getText(is, p, c);
+        } catch (EncryptedDocumentException e) {
+            ex = true;
+        }
+        assertTrue("failed to throw encrypted document exception for wrong password", ex);
+
+        //now try null
+        c.set(PasswordProvider.class, new PasswordProvider() {
+            @Override
+            public String getPassword(Metadata metadata) {
+                return null;
+            }
+        });
+
+        ex = false;
+        try (InputStream is =
+                     this.getResourceAsStream(
+                             "/test-documents/testAccess2_encrypted.accdb")){
+            getText(is, p, c);
+        } catch (EncryptedDocumentException e) {
+            ex = true;
+        }
+        assertTrue("failed to throw encrypted document exception for null password", ex);
+
+
+        //now try missing password provider
+        c = new ParseContext();
+        ex = false;
+        try (InputStream is =
+                     this.getResourceAsStream(
+                             "/test-documents/testAccess2_encrypted.accdb")){
+            getText(is, p, c);
+        } catch (EncryptedDocumentException e) {
+            ex = true;
+        }
+        assertTrue("failed to throw encrypted document exception for missing password provider", ex);
+
+        //now try password on file that doesn't need a password
+        c = new ParseContext();
+        c.set(PasswordProvider.class, new PasswordProvider() {
+            @Override
+            public String getPassword(Metadata metadata) {
+                return "tika";
+            }
+        });
+        ex = false;
+        try (InputStream is =
+                     this.getResourceAsStream(
+                             "/test-documents/testAccess2.accdb")){
+            content = getText(is, p, c);
+        } catch (EncryptedDocumentException e) {
+            ex = true;
+        }
+        assertFalse("shouldn't have thrown encrypted document exception for "+
+                        "opening unencrypted file that doesn't need passowrd", ex);
+        assertContains("red and brown", content);
+    }
+
+    @Test
+    public void testReadOnly() throws Exception {
+        //TIKA-1681: just make sure an exception is not thrown
+        XMLResult r = getXML("testAccess_V1997.mdb");
+        assertContains("hijklmnop", r.xml);
+    }
+
+    @Test
+    public void testMetadata() throws Exception {
+        //basic tests for normalized metadata
+        XMLResult r = getXML("testAccess_V1997.mdb");
+        assertEquals("tmccune", r.metadata.get(TikaCoreProperties.CREATOR));
+        assertEquals("Health Market Science", r.metadata.get(OfficeOpenXMLExtended.COMPANY));
+        assertEquals("test", r.metadata.get(TikaCoreProperties.TITLE));
+    }
+}

Added: tika/branches/2.x/tika-parser-modules/tika-office-module/src/test/java/org/apache/tika/parser/microsoft/OfficeParserTest.java
URL: http://svn.apache.org/viewvc/tika/branches/2.x/tika-parser-modules/tika-office-module/src/test/java/org/apache/tika/parser/microsoft/OfficeParserTest.java?rev=1723223&view=auto
==============================================================================
--- tika/branches/2.x/tika-parser-modules/tika-office-module/src/test/java/org/apache/tika/parser/microsoft/OfficeParserTest.java (added)
+++ tika/branches/2.x/tika-parser-modules/tika-office-module/src/test/java/org/apache/tika/parser/microsoft/OfficeParserTest.java Wed Jan  6 03:50:50 2016
@@ -0,0 +1,46 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.parser.microsoft;
+
+import static org.junit.Assert.assertTrue;
+
+import java.io.InputStream;
+
+import org.apache.tika.TikaTest;
+import org.apache.tika.io.TikaInputStream;
+import org.apache.tika.metadata.Metadata;
+import org.apache.tika.parser.Parser;
+import org.apache.tika.parser.microsoft.ooxml.OOXMLParserTest;
+import org.junit.Test;
+
+
+public class OfficeParserTest extends TikaTest {
+
+    @Test
+    public void parseOfficeWord() throws Exception {
+        Metadata metadata = new Metadata();
+        Parser parser = new OfficeParser();
+
+        String xml = getXML(getTestDocument("test.doc"), parser, metadata).xml;
+
+        assertTrue(xml.contains("test"));
+    }
+
+    private InputStream getTestDocument(String name) {
+        return TikaInputStream.get(OOXMLParserTest.class.getResourceAsStream("/test-documents/" + name));
+    }
+}

Added: tika/branches/2.x/tika-parser-modules/tika-office-module/src/test/java/org/apache/tika/parser/microsoft/OldExcelParserTest.java
URL: http://svn.apache.org/viewvc/tika/branches/2.x/tika-parser-modules/tika-office-module/src/test/java/org/apache/tika/parser/microsoft/OldExcelParserTest.java?rev=1723223&view=auto
==============================================================================
--- tika/branches/2.x/tika-parser-modules/tika-office-module/src/test/java/org/apache/tika/parser/microsoft/OldExcelParserTest.java (added)
+++ tika/branches/2.x/tika-parser-modules/tika-office-module/src/test/java/org/apache/tika/parser/microsoft/OldExcelParserTest.java Wed Jan  6 03:50:50 2016
@@ -0,0 +1,114 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.parser.microsoft;
+
+import static org.apache.tika.parser.microsoft.AbstractPOIContainerExtractionTest.getTestFile;
+import static org.junit.Assert.assertEquals;
+
+import org.apache.tika.TikaTest;
+import org.apache.tika.detect.DefaultDetector;
+import org.apache.tika.detect.Detector;
+import org.apache.tika.io.TikaInputStream;
+import org.apache.tika.metadata.Metadata;
+import org.apache.tika.metadata.TikaCoreProperties;
+import org.apache.tika.mime.MediaType;
+import org.apache.tika.parser.ParseContext;
+import org.apache.tika.sax.BodyContentHandler;
+import org.junit.Ignore;
+import org.junit.Test;
+import org.xml.sax.ContentHandler;
+
+/**
+ * Tests for the Old Excel (2-4) parser
+ */
+public class OldExcelParserTest extends TikaTest {
+    private static final String file = "testEXCEL_4.xls";
+
+    @Test
+    public void testDetection() throws Exception {
+        Detector detector = new DefaultDetector();
+        try (TikaInputStream stream = getTestFile(file)) {
+            assertEquals(
+                    MediaType.application("vnd.ms-excel.sheet.4"),
+                    detector.detect(stream, new Metadata()));
+        }
+    }
+
+    // Disabled, until we can get the POI code to tell us the version
+    @Test
+    @Ignore
+    public void testMetadata() throws Exception {
+        TikaInputStream stream = getTestFile(file);
+
+        Metadata metadata = new Metadata();
+        ContentHandler handler = new BodyContentHandler();
+
+        OldExcelParser parser = new OldExcelParser();
+        parser.parse(stream, handler, metadata, new ParseContext());
+
+        // We can get the content type
+        assertEquals("application/vnd.ms-excel.sheet.4", metadata.get(Metadata.CONTENT_TYPE));
+
+        // But no other metadata
+        assertEquals(null, metadata.get(TikaCoreProperties.TITLE));
+        assertEquals(null, metadata.get(Metadata.SUBJECT));
+    }
+
+    /**
+     * Check we can get the plain text properly
+     */
+    @Test
+    public void testPlainText() throws Exception {
+        ContentHandler handler = new BodyContentHandler();
+        Metadata metadata = new Metadata();
+
+        try (TikaInputStream stream = getTestFile(file)) {
+            new OldExcelParser().parse(stream, handler, metadata, new ParseContext());
+        }
+
+        String text = handler.toString();
+
+        // Check we find a few words we expect in there
+        assertContains("Size", text);
+        assertContains("Returns", text);
+
+        // Check we find a few numbers we expect in there
+        assertContains("11", text);
+        assertContains("784", text);
+    }
+
+    /**
+     * Check the HTML version comes through correctly
+     */
+    @Test
+    public void testHTML() throws Exception {
+        XMLResult result = getXML(file);
+        String xml = result.xml;
+
+        // Sheet name not found - only 5+ have sheet names
+        assertNotContained("<p>Sheet 1</p>", xml);
+
+        // String cells
+        assertContains("<p>Table 10 -", xml);
+        assertContains("<p>Tax</p>", xml);
+        assertContains("<p>N/A</p>", xml);
+
+        // Number cells
+        assertContains("<p>(1)</p>", xml);
+        assertContains("<p>5.0</p>", xml);
+    }
+}