You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by bo...@apache.org on 2016/01/16 19:23:06 UTC

svn commit: r1725014 [14/28] - in /tika/branches/2.x: tika-parser-bundles/tika-multimedia-bundle/ tika-parser-modules/ tika-parser-modules/tika-advanced-module/ tika-parser-modules/tika-advanced-parser-module/ tika-parser-modules/tika-advanced-parser-m...

Added: tika/branches/2.x/tika-parser-modules/tika-office-parser-module/src/test/java/org/apache/tika/parser/chm/TestChmItspHeader.java
URL: http://svn.apache.org/viewvc/tika/branches/2.x/tika-parser-modules/tika-office-parser-module/src/test/java/org/apache/tika/parser/chm/TestChmItspHeader.java?rev=1725014&view=auto
==============================================================================
--- tika/branches/2.x/tika-parser-modules/tika-office-parser-module/src/test/java/org/apache/tika/parser/chm/TestChmItspHeader.java (added)
+++ tika/branches/2.x/tika-parser-modules/tika-office-parser-module/src/test/java/org/apache/tika/parser/chm/TestChmItspHeader.java Sat Jan 16 18:23:01 2016
@@ -0,0 +1,160 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.parser.chm;
+
+import static java.nio.charset.StandardCharsets.UTF_8;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
+
+import org.apache.tika.parser.chm.accessor.ChmItsfHeader;
+import org.apache.tika.parser.chm.accessor.ChmItspHeader;
+import org.apache.tika.parser.chm.core.ChmCommons;
+import org.apache.tika.parser.chm.core.ChmConstants;
+import org.junit.After;
+import org.junit.Before;
+import org.junit.Test;
+
+/**
+ * Tests all public methods of the ChmItspHeader
+ * 
+ */
+public class TestChmItspHeader {
+    private ChmItspHeader chmItspHeader = null;
+
+    @Before
+    public void setUp() throws Exception {
+        byte[] data = TestParameters.chmData;
+
+        ChmItsfHeader chmItsfHeader = new ChmItsfHeader();
+        // chmItsfHeader.parse(Arrays.copyOfRange(data, 0,
+        // ChmConstants.CHM_ITSF_V3_LEN - 1), chmItsfHeader);
+        chmItsfHeader.parse(ChmCommons.copyOfRange(data, 0,
+                ChmConstants.CHM_ITSF_V3_LEN - 1), chmItsfHeader);
+
+        chmItspHeader = new ChmItspHeader();
+        // chmItspHeader.parse(Arrays.copyOfRange( data, (int)
+        // chmItsfHeader.getDirOffset(),
+        // (int) chmItsfHeader.getDirOffset()
+        // + ChmConstants.CHM_ITSP_V1_LEN), chmItspHeader);
+        chmItspHeader.parse(ChmCommons.copyOfRange(data,
+                (int) chmItsfHeader.getDirOffset(),
+                (int) chmItsfHeader.getDirOffset()
+                        + ChmConstants.CHM_ITSP_V1_LEN), chmItspHeader);
+    }
+
+    @Test
+    public void testGetBlock_len() {
+        assertEquals(TestParameters.VP_BLOCK_LENGTH,
+                chmItspHeader.getBlock_len());
+    }
+
+    @Test
+    public void testGetBlockidx_intvl() {
+        assertEquals(TestParameters.VP_BLOCK_INDEX_INTERVAL,
+                chmItspHeader.getBlockidx_intvl());
+    }
+
+    @Test
+    public void testGetHeader_len() {
+        assertEquals(TestParameters.VP_ITSP_HEADER_LENGTH,
+                chmItspHeader.getHeader_len());
+    }
+
+    @Test
+    public void testGetIndex_depth() {
+        assertEquals(TestParameters.VP_INDEX_DEPTH,
+                chmItspHeader.getIndex_depth());
+    }
+
+    @Test
+    public void testGetIndex_head() {
+        assertEquals(TestParameters.VP_INDEX_HEAD,
+                chmItspHeader.getIndex_head());
+    }
+
+    @Test
+    public void testGetIndex_root() {
+        assertEquals(TestParameters.VP_INDEX_ROOT,
+                chmItspHeader.getIndex_root());
+    }
+
+    @Test
+    public void testGetLang_id() {
+        assertEquals(TestParameters.VP_LANGUAGE_ID,
+                chmItspHeader.getLang_id());
+    }
+
+    @Test
+    public void testGetNum_blocks() {
+        assertEquals(TestParameters.VP_UNKNOWN_NUM_BLOCKS,
+                chmItspHeader.getNum_blocks());
+    }
+
+    @Test
+    public void testGetUnknown_000c() {
+        assertEquals(TestParameters.VP_ITSP_UNKNOWN_000C,
+                chmItspHeader.getUnknown_000c());
+    }
+
+    @Test
+    public void testGetUnknown_0024() {
+        assertEquals(TestParameters.VP_ITSP_UNKNOWN_0024,
+                chmItspHeader.getUnknown_0024());
+    }
+
+    @Test
+    public void testGetUnknown_002() {
+        assertEquals(TestParameters.VP_ITSP_UNKNOWN_002C,
+                chmItspHeader.getUnknown_002c());
+    }
+
+    @Test
+    public void testGetUnknown_0044() {
+        assertEquals(TestParameters.VP_ITSP_BYTEARR_LEN,
+                chmItspHeader.getUnknown_0044().length);
+    }
+
+    @Test
+    public void testGetVersion() {
+        assertEquals(TestParameters.VP_ITSP_VERSION,
+                chmItspHeader.getVersion());
+    }
+
+    @Test
+    public void testGetSignature() {
+        assertEquals(TestParameters.VP_ISTP_SIGNATURE, new String(
+                chmItspHeader.getSignature(), UTF_8));
+    }
+
+    @Test
+    public void testGetSystem_uuid() {
+        assertEquals(TestParameters.VP_ITSP_BYTEARR_LEN,
+                chmItspHeader.getSystem_uuid().length);
+    }
+
+    @Test
+    public void testToString() {
+        assertTrue(chmItspHeader.toString().contains(
+                TestParameters.VP_ISTP_SIGNATURE));
+    }
+
+    @After
+    public void tearDown() throws Exception {
+        chmItspHeader = null;
+    }
+
+}

Added: tika/branches/2.x/tika-parser-modules/tika-office-parser-module/src/test/java/org/apache/tika/parser/chm/TestChmLzxState.java
URL: http://svn.apache.org/viewvc/tika/branches/2.x/tika-parser-modules/tika-office-parser-module/src/test/java/org/apache/tika/parser/chm/TestChmLzxState.java?rev=1725014&view=auto
==============================================================================
--- tika/branches/2.x/tika-parser-modules/tika-office-parser-module/src/test/java/org/apache/tika/parser/chm/TestChmLzxState.java (added)
+++ tika/branches/2.x/tika-parser-modules/tika-office-parser-module/src/test/java/org/apache/tika/parser/chm/TestChmLzxState.java Sat Jan 16 18:23:01 2016
@@ -0,0 +1,101 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.parser.chm;
+
+
+import static java.nio.charset.StandardCharsets.UTF_8;
+import static org.junit.Assert.assertNotNull;
+import static org.junit.Assert.assertTrue;
+
+import org.apache.tika.exception.TikaException;
+import org.apache.tika.parser.chm.accessor.ChmDirectoryListingSet;
+import org.apache.tika.parser.chm.accessor.ChmItsfHeader;
+import org.apache.tika.parser.chm.accessor.ChmItspHeader;
+import org.apache.tika.parser.chm.accessor.ChmLzxcControlData;
+import org.apache.tika.parser.chm.core.ChmCommons;
+import org.apache.tika.parser.chm.core.ChmConstants;
+import org.apache.tika.parser.chm.lzx.ChmLzxState;
+import org.junit.Before;
+import org.junit.Test;
+
+public class TestChmLzxState {
+    private ChmLzxState chmLzxState;
+    private int windowSize;
+
+    @Before
+    public void setUp() throws Exception {
+        byte[] data = TestParameters.chmData;
+
+        /* Creates and parses itsf header */
+        ChmItsfHeader chmItsHeader = new ChmItsfHeader();
+        // chmItsHeader.parse(Arrays.copyOfRange(data, 0,
+        // ChmConstants.CHM_ITSF_V3_LEN - 1), chmItsHeader);
+        chmItsHeader.parse(ChmCommons.copyOfRange(data, 0,
+                ChmConstants.CHM_ITSF_V3_LEN - 1), chmItsHeader);
+        /* Creates and parses itsp block */
+        ChmItspHeader chmItspHeader = new ChmItspHeader();
+        // chmItspHeader.parse(Arrays.copyOfRange( data, (int)
+        // chmItsHeader.getDirOffset(),
+        // (int) chmItsHeader.getDirOffset()
+        // + ChmConstants.CHM_ITSP_V1_LEN), chmItspHeader);
+        chmItspHeader.parse(ChmCommons.copyOfRange(data,
+                (int) chmItsHeader.getDirOffset(),
+                (int) chmItsHeader.getDirOffset()
+                + ChmConstants.CHM_ITSP_V1_LEN), chmItspHeader);
+
+        /* Creating instance of ChmDirListingContainer */
+        ChmDirectoryListingSet chmDirListCont = new ChmDirectoryListingSet(
+                data, chmItsHeader, chmItspHeader);
+        int indexOfControlData = ChmCommons.indexOf(
+                chmDirListCont.getDirectoryListingEntryList(),
+                ChmConstants.CONTROL_DATA);
+
+        int indexOfResetTable = ChmCommons.indexOfResetTableBlock(data,
+                ChmConstants.LZXC.getBytes(UTF_8));
+        byte[] dir_chunk = null;
+        if (indexOfResetTable > 0) {
+            // dir_chunk = Arrays.copyOfRange( data, indexOfResetTable,
+            // indexOfResetTable
+            // +
+            // chmDirListCont.getDirectoryListingEntryList().get(indexOfControlData).getLength());
+            dir_chunk = ChmCommons.copyOfRange(data, indexOfResetTable,
+                    indexOfResetTable
+                    + chmDirListCont.getDirectoryListingEntryList()
+                    .get(indexOfControlData).getLength());
+        }
+
+        ChmLzxcControlData clcd = new ChmLzxcControlData();
+        clcd.parse(dir_chunk, clcd);
+        windowSize = (int) clcd.getWindowSize();
+    }
+
+    @Test
+    public void testChmLzxStateConstructor() throws TikaException {
+        chmLzxState = new ChmLzxState(windowSize);
+        assertNotNull(chmLzxState);
+    }
+
+    @Test
+    public void testToString() throws TikaException {
+        if (chmLzxState == null)
+            testChmLzxStateConstructor();
+        assertTrue(chmLzxState.toString().length() > 20);
+    }
+
+    // TODO add more tests
+
+}

Added: tika/branches/2.x/tika-parser-modules/tika-office-parser-module/src/test/java/org/apache/tika/parser/chm/TestChmLzxcControlData.java
URL: http://svn.apache.org/viewvc/tika/branches/2.x/tika-parser-modules/tika-office-parser-module/src/test/java/org/apache/tika/parser/chm/TestChmLzxcControlData.java?rev=1725014&view=auto
==============================================================================
--- tika/branches/2.x/tika-parser-modules/tika-office-parser-module/src/test/java/org/apache/tika/parser/chm/TestChmLzxcControlData.java (added)
+++ tika/branches/2.x/tika-parser-modules/tika-office-parser-module/src/test/java/org/apache/tika/parser/chm/TestChmLzxcControlData.java Sat Jan 16 18:23:01 2016
@@ -0,0 +1,144 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.parser.chm;
+
+import static java.nio.charset.StandardCharsets.UTF_8;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNotNull;
+import static org.junit.Assert.assertTrue;
+
+import org.apache.tika.parser.chm.accessor.ChmDirectoryListingSet;
+import org.apache.tika.parser.chm.accessor.ChmItsfHeader;
+import org.apache.tika.parser.chm.accessor.ChmItspHeader;
+import org.apache.tika.parser.chm.accessor.ChmLzxcControlData;
+import org.apache.tika.parser.chm.core.ChmCommons;
+import org.apache.tika.parser.chm.core.ChmConstants;
+import org.junit.Before;
+import org.junit.Test;
+
+/**
+ * Tests all public methods of ChmLzxcControlData block
+ */
+public class TestChmLzxcControlData {
+    private ChmLzxcControlData chmLzxcControlData = null;
+
+    @Before
+    public void setUp() throws Exception {
+        byte[] data = TestParameters.chmData;
+        /* Creates and parses itsf header */
+        ChmItsfHeader chmItsHeader = new ChmItsfHeader();
+        // chmItsHeader.parse(Arrays.copyOfRange(data, 0,
+        // ChmConstants.CHM_ITSF_V3_LEN - 1), chmItsHeader);
+        chmItsHeader.parse(ChmCommons.copyOfRange(data, 0,
+                ChmConstants.CHM_ITSF_V3_LEN - 1), chmItsHeader);
+        /* Creates and parses itsp block */
+        ChmItspHeader chmItspHeader = new ChmItspHeader();
+        // chmItspHeader.parse(Arrays.copyOfRange( data, (int)
+        // chmItsHeader.getDirOffset(),
+        // (int) chmItsHeader.getDirOffset()
+        // + ChmConstants.CHM_ITSP_V1_LEN), chmItspHeader);
+        chmItspHeader.parse(ChmCommons.copyOfRange(data,
+                (int) chmItsHeader.getDirOffset(),
+                (int) chmItsHeader.getDirOffset()
+                        + ChmConstants.CHM_ITSP_V1_LEN), chmItspHeader);
+        /* Creating instance of ChmDirListingContainer */
+        ChmDirectoryListingSet chmDirListCont = new ChmDirectoryListingSet(
+                data, chmItsHeader, chmItspHeader);
+        int indexOfControlData = chmDirListCont.getControlDataIndex();
+
+        int indexOfResetTable = ChmCommons.indexOfResetTableBlock(data,
+                ChmConstants.LZXC.getBytes(UTF_8));
+        byte[] dir_chunk = null;
+        if (indexOfResetTable > 0) {
+            // dir_chunk = Arrays.copyOfRange( data, indexOfResetTable,
+            // indexOfResetTable
+            // +
+            // chmDirListCont.getDirectoryListingEntryList().get(indexOfControlData).getLength());
+            dir_chunk = ChmCommons.copyOfRange(data, indexOfResetTable,
+                    indexOfResetTable
+                            + chmDirListCont.getDirectoryListingEntryList()
+                                    .get(indexOfControlData).getLength());
+        }
+
+        /* Creates and parses control block */
+        chmLzxcControlData = new ChmLzxcControlData();
+        chmLzxcControlData.parse(dir_chunk, chmLzxcControlData);
+
+    }
+
+    @Test
+    public void testConstructorNotNull() {
+        assertNotNull(chmLzxcControlData);
+    }
+
+    @Test
+    public void testGetResetInterval() {
+        assertEquals(TestParameters.VP_RESET_INTERVAL,
+                chmLzxcControlData.getResetInterval());
+    }
+
+    @Test
+    public void testGetSize() {
+        assertEquals(TestParameters.VP_CONTROL_DATA_SIZE,
+                chmLzxcControlData.getSize());
+    }
+
+    @Test
+    public void testGetUnknown_18() {
+        assertEquals(TestParameters.VP_UNKNOWN_18,
+                chmLzxcControlData.getUnknown_18());
+    }
+
+    @Test
+    public void testGetVersion() {
+        assertEquals(TestParameters.VP_CONTROL_DATA_VERSION,
+                chmLzxcControlData.getVersion());
+    }
+
+    @Test
+    public void testGetWindowSize() {
+        assertEquals(TestParameters.VP_WINDOW_SIZE,
+                chmLzxcControlData.getWindowSize());
+    }
+
+    @Test
+    public void testGetWindowsPerReset() {
+        assertEquals(TestParameters.VP_WINDOWS_PER_RESET,
+                chmLzxcControlData.getWindowsPerReset());
+    }
+
+    @Test
+    public void testGetToString() {
+        assertTrue(chmLzxcControlData.toString().contains(
+                TestParameters.VP_CONTROL_DATA_SIGNATURE));
+    }
+
+    @Test
+    public void testGetSignature() {
+        assertEquals(
+                TestParameters.VP_CONTROL_DATA_SIGNATURE.getBytes(UTF_8).length,
+                chmLzxcControlData.getSignature().length);
+    }
+
+    @Test
+    public void testGetSignaure() {
+        assertEquals(
+                TestParameters.VP_CONTROL_DATA_SIGNATURE.getBytes(UTF_8).length,
+                chmLzxcControlData.getSignature().length);
+    }
+
+}

Added: tika/branches/2.x/tika-parser-modules/tika-office-parser-module/src/test/java/org/apache/tika/parser/chm/TestChmLzxcResetTable.java
URL: http://svn.apache.org/viewvc/tika/branches/2.x/tika-parser-modules/tika-office-parser-module/src/test/java/org/apache/tika/parser/chm/TestChmLzxcResetTable.java?rev=1725014&view=auto
==============================================================================
--- tika/branches/2.x/tika-parser-modules/tika-office-parser-module/src/test/java/org/apache/tika/parser/chm/TestChmLzxcResetTable.java (added)
+++ tika/branches/2.x/tika-parser-modules/tika-office-parser-module/src/test/java/org/apache/tika/parser/chm/TestChmLzxcResetTable.java Sat Jan 16 18:23:01 2016
@@ -0,0 +1,156 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.tika.parser.chm;
+
+import static java.nio.charset.StandardCharsets.UTF_8;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
+
+import org.apache.tika.parser.chm.accessor.ChmDirectoryListingSet;
+import org.apache.tika.parser.chm.accessor.ChmItsfHeader;
+import org.apache.tika.parser.chm.accessor.ChmItspHeader;
+import org.apache.tika.parser.chm.accessor.ChmLzxcControlData;
+import org.apache.tika.parser.chm.accessor.ChmLzxcResetTable;
+import org.apache.tika.parser.chm.assertion.ChmAssert;
+import org.apache.tika.parser.chm.core.ChmCommons;
+import org.apache.tika.parser.chm.core.ChmConstants;
+import org.junit.Before;
+import org.junit.Test;
+
+public class TestChmLzxcResetTable {
+    private ChmLzxcResetTable chmLzxcResetTable = null;
+
+    @Before
+    public void setUp() throws Exception {
+        byte[] data = TestParameters.chmData;
+        /* Creates and parses itsf header */
+        ChmItsfHeader chmItsfHeader = new ChmItsfHeader();
+        // chmItsfHeader.parse(Arrays.copyOfRange(data, 0,
+        // ChmConstants.CHM_ITSF_V3_LEN - 1), chmItsfHeader);
+        chmItsfHeader.parse(ChmCommons.copyOfRange(data, 0,
+                ChmConstants.CHM_ITSF_V3_LEN - 1), chmItsfHeader);
+        /* Creates and parses itsp block */
+        ChmItspHeader chmItspHeader = new ChmItspHeader();
+        // chmItspHeader.parse(Arrays.copyOfRange( data, (int)
+        // chmItsfHeader.getDirOffset(),
+        // (int) chmItsfHeader.getDirOffset()
+        // + ChmConstants.CHM_ITSP_V1_LEN), chmItspHeader);
+        chmItspHeader.parse(ChmCommons.copyOfRange(data,
+                (int) chmItsfHeader.getDirOffset(),
+                (int) chmItsfHeader.getDirOffset()
+                        + ChmConstants.CHM_ITSP_V1_LEN), chmItspHeader);
+        /* Creating instance of ChmDirListingContainer */
+        ChmDirectoryListingSet chmDirListCont = new ChmDirectoryListingSet(
+                data, chmItsfHeader, chmItspHeader);
+        int indexOfControlData = chmDirListCont.getControlDataIndex();
+
+        int indexOfResetTable = ChmCommons.indexOfResetTableBlock(data,
+                ChmConstants.LZXC.getBytes(UTF_8));
+        byte[] dir_chunk = null;
+        if (indexOfResetTable > 0) {
+            // dir_chunk = Arrays.copyOfRange( data, indexOfResetTable,
+            // indexOfResetTable
+            // +
+            // chmDirListCont.getDirectoryListingEntryList().get(indexOfControlData).getLength());
+            dir_chunk = ChmCommons.copyOfRange(data, indexOfResetTable,
+                    indexOfResetTable
+                            + chmDirListCont.getDirectoryListingEntryList()
+                                    .get(indexOfControlData).getLength());
+        }
+
+        /* Creates and parses control block */
+        ChmLzxcControlData chmLzxcControlData = new ChmLzxcControlData();
+        chmLzxcControlData.parse(dir_chunk, chmLzxcControlData);
+
+        indexOfResetTable = chmDirListCont.getResetTableIndex();
+        chmLzxcResetTable = new ChmLzxcResetTable();
+
+        int startIndex = (int) chmDirListCont.getDataOffset()
+                + chmDirListCont.getDirectoryListingEntryList()
+                        .get(indexOfResetTable).getOffset();
+
+        ChmAssert.assertCopyingDataIndex(startIndex, data.length);
+
+        // dir_chunk = Arrays.copyOfRange(data, startIndex, startIndex
+        // +
+        // chmDirListCont.getDirectoryListingEntryList().get(indexOfResetTable).getLength());
+        dir_chunk = ChmCommons.copyOfRange(
+                data,
+                startIndex,
+                startIndex
+                        + chmDirListCont.getDirectoryListingEntryList()
+                                .get(indexOfResetTable).getLength());
+
+        chmLzxcResetTable.parse(dir_chunk, chmLzxcResetTable);
+    }
+
+    @Test
+    public void testGetBlockAddress() {
+        assertEquals(TestParameters.VP_RESET_TABLE_BA,
+                chmLzxcResetTable.getBlockAddress().length);
+    }
+
+    @Test
+    public void testGetBlockCount() {
+        assertEquals(TestParameters.VP_RESET_TABLE_BA,
+                chmLzxcResetTable.getBlockCount());
+    }
+
+    @Test
+    public void testGetBlockLen() {
+        assertEquals(TestParameters.VP_RES_TBL_BLOCK_LENGTH,
+                chmLzxcResetTable.getBlockLen());
+    }
+
+    @Test
+    public void testGetCompressedLen() {
+        assertEquals(TestParameters.VP_RES_TBL_COMPR_LENGTH,
+                chmLzxcResetTable.getCompressedLen());
+    }
+
+    @Test
+    public void testGetTableOffset() {
+        assertEquals(TestParameters.VP_TBL_OFFSET,
+                chmLzxcResetTable.getTableOffset());
+    }
+
+    @Test
+    public void testGetUncompressedLen() {
+        assertEquals(TestParameters.VP_RES_TBL_UNCOMP_LENGTH,
+                chmLzxcResetTable.getUncompressedLen());
+    }
+
+    @Test
+    public void testGetUnknown() {
+        assertEquals(TestParameters.VP_RES_TBL_UNKNOWN,
+                chmLzxcResetTable.getUnknown());
+    }
+
+    @Test
+    public void testGetVersion() {
+        assertEquals(TestParameters.VP_RES_TBL_VERSION,
+                chmLzxcResetTable.getVersion());
+    }
+
+    @Test
+    public void testToString() {
+        assertTrue(chmLzxcResetTable.toString().length() > 0);
+    }
+
+    // TODO: add setters to be tested
+}

Added: tika/branches/2.x/tika-parser-modules/tika-office-parser-module/src/test/java/org/apache/tika/parser/chm/TestDirectoryListingEntry.java
URL: http://svn.apache.org/viewvc/tika/branches/2.x/tika-parser-modules/tika-office-parser-module/src/test/java/org/apache/tika/parser/chm/TestDirectoryListingEntry.java?rev=1725014&view=auto
==============================================================================
--- tika/branches/2.x/tika-parser-modules/tika-office-parser-module/src/test/java/org/apache/tika/parser/chm/TestDirectoryListingEntry.java (added)
+++ tika/branches/2.x/tika-parser-modules/tika-office-parser-module/src/test/java/org/apache/tika/parser/chm/TestDirectoryListingEntry.java Sat Jan 16 18:23:01 2016
@@ -0,0 +1,85 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.parser.chm;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNotNull;
+
+import org.apache.tika.parser.chm.accessor.DirectoryListingEntry;
+import org.junit.Before;
+import org.junit.Test;
+
+/**
+ * Tests public methods of the DirectoryListingEntry class
+ * 
+ * @author olegt
+ * 
+ */
+public class TestDirectoryListingEntry {
+    private DirectoryListingEntry dle = null;
+
+    @Before
+    public void setUp() throws Exception {
+        dle = new DirectoryListingEntry(TestParameters.nameLength,
+                TestParameters.entryName, TestParameters.entryType,
+                TestParameters.offset, TestParameters.length);
+    }
+
+    @Test
+    public void testDefaultConstructor() {
+        assertNotNull(dle);
+    }
+
+    @Test
+    public void testParamConstructor() {
+        assertEquals(TestParameters.nameLength, dle.getNameLength());
+        assertEquals(TestParameters.entryName, dle.getName());
+        assertEquals(TestParameters.entryType, dle.getEntryType());
+        assertEquals(TestParameters.offset, dle.getOffset());
+        assertEquals(TestParameters.length, dle.getLength());
+    }
+
+    @Test
+    public void testToString() {
+        assertNotNull(dle.toString());
+    }
+
+    @Test
+    public void testGetNameLength() {
+        assertEquals(TestParameters.nameLength, dle.getNameLength());
+    }
+
+    @Test
+    public void testGetName() {
+        assertEquals(TestParameters.entryName, dle.getName());
+    }
+
+    @Test
+    public void testGetEntryType() {
+        assertEquals(TestParameters.entryType, dle.getEntryType());
+    }
+
+    @Test
+    public void testGetOffset() {
+        assertEquals(TestParameters.offset, dle.getOffset());
+    }
+
+    @Test
+    public void testGetLength() {
+        assertEquals(TestParameters.length, dle.getLength());
+    }
+}

Added: tika/branches/2.x/tika-parser-modules/tika-office-parser-module/src/test/java/org/apache/tika/parser/chm/TestParameters.java
URL: http://svn.apache.org/viewvc/tika/branches/2.x/tika-parser-modules/tika-office-parser-module/src/test/java/org/apache/tika/parser/chm/TestParameters.java?rev=1725014&view=auto
==============================================================================
--- tika/branches/2.x/tika-parser-modules/tika-office-parser-module/src/test/java/org/apache/tika/parser/chm/TestParameters.java (added)
+++ tika/branches/2.x/tika-parser-modules/tika-office-parser-module/src/test/java/org/apache/tika/parser/chm/TestParameters.java Sat Jan 16 18:23:01 2016
@@ -0,0 +1,104 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.parser.chm;
+
+import java.io.IOException;
+import java.io.InputStream;
+
+import org.apache.commons.io.IOUtils;
+import org.apache.tika.parser.chm.core.ChmCommons.EntryType;
+
+/**
+ * Holds test parameters such as verification points
+ */
+public class TestParameters {
+    /* Prevents initialization */
+    private TestParameters() {
+    }
+
+    /* Tests values */
+    static final int nameLength = 5;
+    static final String entryName = TestParameters.class.getName();
+    static EntryType entryType = EntryType.COMPRESSED;
+    static final int offset = 3;
+    static final int length = 20;
+    static final int NTHREADS = 2;
+
+    static final int BUFFER_SIZE = 16384;
+
+    static final byte[] chmData = readResource("/test-documents/testChm.chm");
+
+    private static byte[] readResource(String name) {
+        try {
+            try (InputStream stream = TestParameters.class.getResourceAsStream(name)) {
+                return IOUtils.toByteArray(stream);
+            }
+        } catch (IOException e) {
+            throw new RuntimeException(e);
+        }
+    }
+
+    /* Verification points */
+    static final String VP_CHM_MIME_TYPE = "Content-Type=application/x-chm";
+    static final String VP_EXTRACTED_TEXT = "The TCard method accepts only numeric arguments";
+    static final String VP_ISTF_SIGNATURE = "ITSF";
+    static final String VP_ISTP_SIGNATURE = "ITSP";
+    static final String VP_PMGL_SIGNATURE = "PMGL";
+    static final String VP_CONTROL_DATA_SIGNATURE = "LZXC";
+
+    static final int VP_DIRECTORY_LENGTH = 4180;
+    static final int VP_DATA_OFFSET_LENGTH = 4300;
+    static final int VP_DIRECTORY_OFFSET = 120;
+    static final int VP_ITSF_HEADER_LENGTH = 96;
+    static final int VP_LANGUAGE_ID = 1033;
+    static final int VP_LAST_MODIFIED = 1042357880;
+    static final int VP_UNKNOWN_000C = 1;
+    static final int VP_UNKNOWN_LEN = 24;
+    static final int VP_UNKNOWN_OFFSET = 96;
+    static final int VP_VERSION = 3;
+    static final int VP_BLOCK_LENGTH = 4096;
+    static final int VP_BLOCK_INDEX_INTERVAL = 2;
+    static final int VP_ITSP_HEADER_LENGTH = 84;
+    static final int VP_INDEX_DEPTH = 1;
+    static final int VP_INDEX_HEAD = 0;
+    static final int VP_INDEX_ROOT = -1;
+    static final int VP_UNKNOWN_NUM_BLOCKS = -1;
+    static final int VP_ITSP_UNKNOWN_000C = 10;
+    static final int VP_ITSP_UNKNOWN_0024 = 0;
+    static final int VP_ITSP_UNKNOWN_002C = 1;
+    static final int VP_ITSP_BYTEARR_LEN = 16;
+    static final int VP_ITSP_VERSION = 1;
+    static final int VP_RESET_INTERVAL = 2;
+    static final int VP_CONTROL_DATA_SIZE = 6;
+    static final int VP_UNKNOWN_18 = 0;
+    static final int VP_CONTROL_DATA_VERSION = 2;
+    static final int VP_WINDOW_SIZE = 65536;
+    static final int VP_WINDOWS_PER_RESET = 1;
+    static final int VP_CHM_ENTITIES_NUMBER = 100; //updated  by Hawking
+    static final int VP_PMGI_FREE_SPACE = 3;
+    static final int VP_PMGL_BLOCK_NEXT = -1;
+    static final int VP_PMGL_BLOCK_PREV = -1;
+    static final int VP_PMGL_FREE_SPACE = 1644;
+    static final int VP_PMGL_UNKNOWN_008 = 0;
+    static final int VP_RESET_TABLE_BA = 12;
+    static final int VP_RES_TBL_BLOCK_LENGTH = 32768;
+    static final int VP_RES_TBL_COMPR_LENGTH = 177408;
+    static final int VP_RES_TBL_UNCOMP_LENGTH = 383786;
+    static final int VP_TBL_OFFSET = 40;
+    static final int VP_RES_TBL_UNKNOWN = 8;
+    static final int VP_RES_TBL_VERSION = 2;
+}

Added: tika/branches/2.x/tika-parser-modules/tika-office-parser-module/src/test/java/org/apache/tika/parser/chm/TestPmgiHeader.java
URL: http://svn.apache.org/viewvc/tika/branches/2.x/tika-parser-modules/tika-office-parser-module/src/test/java/org/apache/tika/parser/chm/TestPmgiHeader.java?rev=1725014&view=auto
==============================================================================
--- tika/branches/2.x/tika-parser-modules/tika-office-parser-module/src/test/java/org/apache/tika/parser/chm/TestPmgiHeader.java (added)
+++ tika/branches/2.x/tika-parser-modules/tika-office-parser-module/src/test/java/org/apache/tika/parser/chm/TestPmgiHeader.java Sat Jan 16 18:23:01 2016
@@ -0,0 +1,45 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.parser.chm;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
+
+import org.apache.tika.parser.chm.accessor.ChmPmgiHeader;
+import org.junit.Before;
+import org.junit.Test;
+
+public class TestPmgiHeader {
+    ChmPmgiHeader chmPmgiHeader = null;
+
+    @Before
+    public void setUp() throws Exception {
+        byte[] data = TestParameters.chmData;
+        chmPmgiHeader = new ChmPmgiHeader();
+        chmPmgiHeader.parse(data, chmPmgiHeader);
+    }
+
+    @Test
+    public void testToString() {
+        assertTrue((chmPmgiHeader != null) && (chmPmgiHeader.toString().length() > 0));
+    }
+
+    @Test
+    public void testGetFreeSpace() {
+        assertEquals(TestParameters.VP_PMGI_FREE_SPACE, chmPmgiHeader.getFreeSpace());
+    }
+}

Added: tika/branches/2.x/tika-parser-modules/tika-office-parser-module/src/test/java/org/apache/tika/parser/chm/TestPmglHeader.java
URL: http://svn.apache.org/viewvc/tika/branches/2.x/tika-parser-modules/tika-office-parser-module/src/test/java/org/apache/tika/parser/chm/TestPmglHeader.java?rev=1725014&view=auto
==============================================================================
--- tika/branches/2.x/tika-parser-modules/tika-office-parser-module/src/test/java/org/apache/tika/parser/chm/TestPmglHeader.java (added)
+++ tika/branches/2.x/tika-parser-modules/tika-office-parser-module/src/test/java/org/apache/tika/parser/chm/TestPmglHeader.java Sat Jan 16 18:23:01 2016
@@ -0,0 +1,76 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.parser.chm;
+
+import static java.nio.charset.StandardCharsets.UTF_8;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
+
+import org.apache.tika.parser.chm.accessor.ChmPmglHeader;
+import org.apache.tika.parser.chm.core.ChmCommons;
+import org.apache.tika.parser.chm.core.ChmConstants;
+import org.junit.Before;
+import org.junit.Test;
+
+public class TestPmglHeader {
+    ChmPmglHeader chmPmglHeader = null;
+
+    @Before
+    public void setUp() throws Exception {
+        byte[] data = TestParameters.chmData;
+        chmPmglHeader = new ChmPmglHeader();
+        chmPmglHeader.parse(ChmCommons.copyOfRange(data,
+                ChmConstants.START_PMGL, ChmConstants.START_PMGL
+                        + ChmConstants.CHM_PMGL_LEN + 10), chmPmglHeader);
+    }
+
+    @Test
+    public void testToString() {
+        assertTrue((chmPmglHeader != null)
+                && chmPmglHeader.toString().length() > 0);
+    }
+
+    @Test
+    public void testChmPmglHeaderGet() {
+        assertEquals(TestParameters.VP_PMGL_SIGNATURE, new String(
+                chmPmglHeader.getSignature(), UTF_8));
+    }
+
+    @Test
+    public void testGetBlockNext() {
+        assertEquals(TestParameters.VP_PMGL_BLOCK_NEXT,
+                chmPmglHeader.getBlockNext());
+    }
+
+    @Test
+    public void testGetBlockPrev() {
+        assertEquals(TestParameters.VP_PMGL_BLOCK_PREV,
+                chmPmglHeader.getBlockPrev());
+    }
+
+    @Test
+    public void testGetFreeSpace() {
+        assertEquals(TestParameters.VP_PMGL_FREE_SPACE,
+                chmPmglHeader.getFreeSpace());
+    }
+
+    @Test
+    public void testGetUnknown0008() {
+        assertEquals(TestParameters.VP_PMGL_UNKNOWN_008,
+                chmPmglHeader.getUnknown0008());
+    }
+}

Added: tika/branches/2.x/tika-parser-modules/tika-office-parser-module/src/test/java/org/apache/tika/parser/mbox/MboxParserTest.java
URL: http://svn.apache.org/viewvc/tika/branches/2.x/tika-parser-modules/tika-office-parser-module/src/test/java/org/apache/tika/parser/mbox/MboxParserTest.java?rev=1725014&view=auto
==============================================================================
--- tika/branches/2.x/tika-parser-modules/tika-office-parser-module/src/test/java/org/apache/tika/parser/mbox/MboxParserTest.java (added)
+++ tika/branches/2.x/tika-parser-modules/tika-office-parser-module/src/test/java/org/apache/tika/parser/mbox/MboxParserTest.java Sat Jan 16 18:23:01 2016
@@ -0,0 +1,156 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.parser.mbox;
+
+import static org.apache.tika.TikaTest.assertContains;
+import static org.junit.Assert.assertEquals;
+
+import java.io.InputStream;
+import java.util.Map;
+
+import org.apache.tika.detect.TypeDetector;
+import org.apache.tika.metadata.Metadata;
+import org.apache.tika.metadata.TikaCoreProperties;
+import org.apache.tika.parser.AutoDetectParser;
+import org.apache.tika.parser.ParseContext;
+import org.apache.tika.parser.Parser;
+import org.apache.tika.sax.BodyContentHandler;
+import org.junit.Before;
+import org.junit.Test;
+import org.xml.sax.ContentHandler;
+
+public class MboxParserTest {
+
+    protected ParseContext recursingContext;
+    private Parser autoDetectParser;
+    private TypeDetector typeDetector;
+    private MboxParser mboxParser;
+
+    private static InputStream getStream(String name) {
+        return MboxParserTest.class.getClass().getResourceAsStream(name);
+    }
+
+    @Before
+    public void setUp() throws Exception {
+        typeDetector = new TypeDetector();
+        autoDetectParser = new AutoDetectParser(typeDetector);
+        recursingContext = new ParseContext();
+        recursingContext.set(Parser.class, autoDetectParser);
+
+        mboxParser = new MboxParser();
+        mboxParser.setTracking(true);
+    }
+
+    @Test
+    public void testSimple() throws Exception {
+        ContentHandler handler = new BodyContentHandler();
+        Metadata metadata = new Metadata();
+
+        try (InputStream stream = getStream("/test-documents/simple.mbox")) {
+            mboxParser.parse(stream, handler, metadata, recursingContext);
+        }
+
+        String content = handler.toString();
+        assertContains("Test content 1", content);
+        assertContains("Test content 2", content);
+        assertEquals("application/mbox", metadata.get(Metadata.CONTENT_TYPE));
+
+        Map<Integer, Metadata> mailsMetadata = mboxParser.getTrackingMetadata();
+        assertEquals("Nb. Of mails", 2, mailsMetadata.size());
+
+        Metadata mail1 = mailsMetadata.get(0);
+        assertEquals("message/rfc822", mail1.get(Metadata.CONTENT_TYPE));
+        assertEquals("envelope-sender-mailbox-name Mon Jun 01 10:00:00 2009", mail1.get("MboxParser-from"));
+
+        Metadata mail2 = mailsMetadata.get(1);
+        assertEquals("message/rfc822", mail2.get(Metadata.CONTENT_TYPE));
+        assertEquals("envelope-sender-mailbox-name Mon Jun 01 11:00:00 2010", mail2.get("MboxParser-from"));
+    }
+
+    @Test
+    public void testHeaders() throws Exception {
+        ContentHandler handler = new BodyContentHandler();
+        Metadata metadata = new Metadata();
+
+        try (InputStream stream = getStream("/test-documents/headers.mbox")) {
+            mboxParser.parse(stream, handler, metadata, recursingContext);
+        }
+
+        assertContains("Test content", handler.toString());
+        assertEquals("Nb. Of mails", 1, mboxParser.getTrackingMetadata().size());
+
+        Metadata mailMetadata = mboxParser.getTrackingMetadata().get(0);
+
+        assertEquals("2009-06-10T03:58:45Z", mailMetadata.get(TikaCoreProperties.CREATED));
+        assertEquals("<au...@domain.com>", mailMetadata.get(TikaCoreProperties.CREATOR));
+        assertEquals("subject", mailMetadata.get(Metadata.SUBJECT));
+        assertEquals("<au...@domain.com>", mailMetadata.get(Metadata.AUTHOR));
+        assertEquals("message/rfc822", mailMetadata.get(Metadata.CONTENT_TYPE));
+        assertEquals("author@domain.com", mailMetadata.get("Message-From"));
+        assertEquals("<na...@domain.com>", mailMetadata.get("MboxParser-return-path"));
+    }
+
+    @Test
+    public void testMultilineHeader() throws Exception {
+        ContentHandler handler = new BodyContentHandler();
+        Metadata metadata = new Metadata();
+
+        try (InputStream stream = getStream("/test-documents/multiline.mbox")) {
+            mboxParser.parse(stream, handler, metadata, recursingContext);
+        }
+
+        assertEquals("Nb. Of mails", 1, mboxParser.getTrackingMetadata().size());
+
+        Metadata mailMetadata = mboxParser.getTrackingMetadata().get(0);
+        assertEquals("from xxx by xxx with xxx; date", mailMetadata.get("MboxParser-received"));
+    }
+
+    @Test
+    public void testQuoted() throws Exception {
+        ContentHandler handler = new BodyContentHandler();
+        Metadata metadata = new Metadata();
+
+        try (InputStream stream = getStream("/test-documents/quoted.mbox")) {
+            mboxParser.parse(stream, handler, metadata, recursingContext);
+        }
+
+        assertContains("Test content", handler.toString());
+        assertContains("> quoted stuff", handler.toString());
+    }
+
+    @Test
+    public void testComplex() throws Exception {
+        ContentHandler handler = new BodyContentHandler();
+        Metadata metadata = new Metadata();
+
+        try (InputStream stream = getStream("/test-documents/complex.mbox")) {
+            mboxParser.parse(stream, handler, metadata, recursingContext);
+        }
+
+        assertEquals("Nb. Of mails", 3, mboxParser.getTrackingMetadata().size());
+
+        Metadata firstMail = mboxParser.getTrackingMetadata().get(0);
+        assertEquals("Re: question about when shuffle/sort start working", firstMail.get(Metadata.SUBJECT));
+        assertEquals("Re: question about when shuffle/sort start working", firstMail.get(TikaCoreProperties.TITLE));
+        assertEquals("Jothi Padmanabhan <jo...@yahoo-inc.com>", firstMail.get(Metadata.AUTHOR));
+        assertEquals("Jothi Padmanabhan <jo...@yahoo-inc.com>", firstMail.get(TikaCoreProperties.CREATOR));
+        assertEquals("core-user@hadoop.apache.org", firstMail.get(Metadata.MESSAGE_RECIPIENT_ADDRESS));
+
+        assertContains("When a Mapper completes", handler.toString());
+    }
+
+}

Added: tika/branches/2.x/tika-parser-modules/tika-office-parser-module/src/test/java/org/apache/tika/parser/mbox/OutlookPSTParserTest.java
URL: http://svn.apache.org/viewvc/tika/branches/2.x/tika-parser-modules/tika-office-parser-module/src/test/java/org/apache/tika/parser/mbox/OutlookPSTParserTest.java?rev=1725014&view=auto
==============================================================================
--- tika/branches/2.x/tika-parser-modules/tika-office-parser-module/src/test/java/org/apache/tika/parser/mbox/OutlookPSTParserTest.java (added)
+++ tika/branches/2.x/tika-parser-modules/tika-office-parser-module/src/test/java/org/apache/tika/parser/mbox/OutlookPSTParserTest.java Sat Jan 16 18:23:01 2016
@@ -0,0 +1,110 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.parser.mbox;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertTrue;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.tika.TikaTest;
+import org.apache.tika.extractor.EmbeddedDocumentExtractor;
+import org.apache.tika.extractor.ParsingEmbeddedDocumentExtractor;
+import org.apache.tika.metadata.Metadata;
+import org.apache.tika.metadata.TikaCoreProperties;
+import org.apache.tika.mime.MediaType;
+import org.apache.tika.parser.AutoDetectParser;
+import org.apache.tika.parser.ParseContext;
+import org.apache.tika.parser.Parser;
+import org.apache.tika.sax.ToHTMLContentHandler;
+import org.junit.Test;
+import org.xml.sax.ContentHandler;
+import org.xml.sax.SAXException;
+
+public class OutlookPSTParserTest extends TikaTest {
+
+  private Parser parser = new OutlookPSTParser();
+
+  @Test
+  public void testAccept() throws Exception {
+    assertTrue((parser.getSupportedTypes(null).contains(MediaType.application("vnd.ms-outlook-pst"))));
+  }
+
+  @Test
+  public void testParse() throws Exception {
+    Parser pstParser = new AutoDetectParser();
+    Metadata metadata = new Metadata();
+    ContentHandler handler = new ToHTMLContentHandler();
+
+    ParseContext context = new ParseContext();
+    EmbeddedTrackingExtrator trackingExtrator = new EmbeddedTrackingExtrator(context);
+    context.set(EmbeddedDocumentExtractor.class, trackingExtrator);
+    context.set(Parser.class, new AutoDetectParser());
+
+    pstParser.parse(getResourceAsStream("/test-documents/testPST.pst"), handler, metadata, context);
+
+    String output = handler.toString();
+
+    assertFalse(output.isEmpty());
+    assertTrue(output.contains("<meta name=\"Content-Length\" content=\"271360\">"));
+    assertTrue(output.contains("<meta name=\"Content-Type\" content=\"application/vnd.ms-outlook-pst\">"));
+
+    assertTrue(output.contains("<body><div class=\"email-folder\"><h1>"));
+    assertTrue(output.contains("<div class=\"embedded\" id=\"&lt;530D9CAC.5080901@gmail.com&gt;\"><h1>Re: Feature Generators</h1>"));
+    assertTrue(output.contains("<div class=\"embedded\" id=\"&lt;1393363252.28814.YahooMailNeo@web140906.mail.bf1.yahoo.com&gt;\"><h1>Re: init tokenizer fails: \"Bad type in putfield/putstatic\"</h1>"));
+    assertTrue(output.contains("Gary Murphy commented on TIKA-1250:"));
+
+    assertTrue(output.contains("<div class=\"email-folder\"><h1>Racine (pour la recherche)</h1>"));
+
+
+    List<Metadata> metaList = trackingExtrator.trackingMetadata;
+    assertEquals(6, metaList.size());
+
+    Metadata firstMail = metaList.get(0);
+    assertEquals("Jörn Kottmann", firstMail.get(TikaCoreProperties.CREATOR));
+    assertEquals("Re: Feature Generators", firstMail.get(TikaCoreProperties.TITLE));
+    assertEquals("kottmann@gmail.com", firstMail.get("senderEmailAddress"));
+    assertEquals("users@opennlp.apache.org", firstMail.get("displayTo"));
+    assertEquals("", firstMail.get("displayCC"));
+    assertEquals("", firstMail.get("displayBCC"));
+  }
+
+
+  private class EmbeddedTrackingExtrator extends ParsingEmbeddedDocumentExtractor {
+    List<Metadata> trackingMetadata = new ArrayList<Metadata>();
+
+    public EmbeddedTrackingExtrator(ParseContext context) {
+      super(context);
+    }
+
+    @Override
+    public boolean shouldParseEmbedded(Metadata metadata) {
+      return true;
+    }
+
+    @Override
+    public void parseEmbedded(InputStream stream, ContentHandler handler, Metadata metadata, boolean outputHtml) throws SAXException, IOException {
+      this.trackingMetadata.add(metadata);
+      super.parseEmbedded(stream, handler, metadata, outputHtml);
+    }
+
+  }
+}

Added: tika/branches/2.x/tika-parser-modules/tika-office-parser-module/src/test/java/org/apache/tika/parser/microsoft/AbstractPOIContainerExtractionTest.java
URL: http://svn.apache.org/viewvc/tika/branches/2.x/tika-parser-modules/tika-office-parser-module/src/test/java/org/apache/tika/parser/microsoft/AbstractPOIContainerExtractionTest.java?rev=1725014&view=auto
==============================================================================
--- tika/branches/2.x/tika-parser-modules/tika-office-parser-module/src/test/java/org/apache/tika/parser/microsoft/AbstractPOIContainerExtractionTest.java (added)
+++ tika/branches/2.x/tika-parser-modules/tika-office-parser-module/src/test/java/org/apache/tika/parser/microsoft/AbstractPOIContainerExtractionTest.java Sat Jan 16 18:23:01 2016
@@ -0,0 +1,75 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.parser.microsoft;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNotNull;
+
+import java.net.URL;
+
+import org.apache.tika.TikaTest.TrackingHandler;
+import org.apache.tika.extractor.ContainerExtractor;
+import org.apache.tika.io.TikaInputStream;
+import org.apache.tika.mime.MediaType;
+
+/**
+ * Parent class of tests that the various POI powered parsers are
+ * able to extract their embedded contents.
+ */
+public abstract class AbstractPOIContainerExtractionTest {
+    public static final MediaType TYPE_DOC = MediaType.application("msword");
+    public static final MediaType TYPE_PPT = MediaType.application("vnd.ms-powerpoint");
+    public static final MediaType TYPE_XLS = MediaType.application("vnd.ms-excel");
+    public static final MediaType TYPE_DOCX = MediaType.application("vnd.openxmlformats-officedocument.wordprocessingml.document");
+    public static final MediaType TYPE_PPTX = MediaType.application("vnd.openxmlformats-officedocument.presentationml.presentation");
+    public static final MediaType TYPE_XLSX = MediaType.application("vnd.openxmlformats-officedocument.spreadsheetml.sheet");
+    public static final MediaType TYPE_MSG = MediaType.application("vnd.ms-outlook");
+
+    public static final MediaType TYPE_TXT = MediaType.text("plain");
+    public static final MediaType TYPE_PDF = MediaType.application("pdf");
+
+    public static final MediaType TYPE_JPG = MediaType.image("jpeg");
+    public static final MediaType TYPE_GIF = MediaType.image("gif");
+    public static final MediaType TYPE_PNG = MediaType.image("png");
+    public static final MediaType TYPE_EMF = MediaType.application("x-emf");
+    public static final MediaType TYPE_WMF = MediaType.application("x-msmetafile");
+
+    protected static TikaInputStream getTestFile(String filename) throws Exception {
+        URL input = AbstractPOIContainerExtractionTest.class.getResource(
+                "/test-documents/" + filename);
+        assertNotNull(filename + " not found", input);
+
+        return TikaInputStream.get(input);
+    }
+
+    protected TrackingHandler process(String filename, ContainerExtractor extractor, boolean recurse) throws Exception {
+        try (TikaInputStream stream = getTestFile(filename)) {
+            assertEquals(true, extractor.isSupported(stream));
+
+            // Process it
+            TrackingHandler handler = new TrackingHandler();
+            if (recurse) {
+                extractor.extract(stream, extractor, handler);
+            } else {
+                extractor.extract(stream, null, handler);
+            }
+
+            // So they can check what happened
+            return handler;
+        }
+    }
+}

Added: tika/branches/2.x/tika-parser-modules/tika-office-parser-module/src/test/java/org/apache/tika/parser/microsoft/ExcelParserTest.java
URL: http://svn.apache.org/viewvc/tika/branches/2.x/tika-parser-modules/tika-office-parser-module/src/test/java/org/apache/tika/parser/microsoft/ExcelParserTest.java?rev=1725014&view=auto
==============================================================================
--- tika/branches/2.x/tika-parser-modules/tika-office-parser-module/src/test/java/org/apache/tika/parser/microsoft/ExcelParserTest.java (added)
+++ tika/branches/2.x/tika-parser-modules/tika-office-parser-module/src/test/java/org/apache/tika/parser/microsoft/ExcelParserTest.java Sat Jan 16 18:23:01 2016
@@ -0,0 +1,443 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ * <p/>
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * <p/>
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.parser.microsoft;
+
+import static org.apache.tika.TikaTest.assertContains;
+import static org.apache.tika.TikaTest.assertNotContained;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
+import static org.junit.Assert.fail;
+
+import java.io.InputStream;
+import java.util.Locale;
+
+import org.apache.tika.detect.DefaultDetector;
+import org.apache.tika.detect.Detector;
+import org.apache.tika.exception.EncryptedDocumentException;
+import org.apache.tika.metadata.Metadata;
+import org.apache.tika.metadata.Office;
+import org.apache.tika.metadata.OfficeOpenXMLExtended;
+import org.apache.tika.metadata.TikaCoreProperties;
+import org.apache.tika.mime.MediaType;
+import org.apache.tika.parser.AutoDetectParser;
+import org.apache.tika.parser.ParseContext;
+import org.apache.tika.parser.PasswordProvider;
+import org.apache.tika.parser.microsoft.ooxml.OOXMLParser;
+import org.apache.tika.sax.BodyContentHandler;
+import org.junit.Test;
+import org.xml.sax.ContentHandler;
+
+public class ExcelParserTest {
+    @Test
+    @SuppressWarnings("deprecation") // Checks legacy Tika-1.0 style metadata keys
+    public void testExcelParser() throws Exception {
+        try (InputStream input = ExcelParserTest.class.getResourceAsStream(
+                "/test-documents/testEXCEL.xls")) {
+            Metadata metadata = new Metadata();
+            ContentHandler handler = new BodyContentHandler();
+            ParseContext context = new ParseContext();
+            context.set(Locale.class, Locale.US);
+            new OfficeParser().parse(input, handler, metadata, context);
+
+            assertEquals(
+                    "application/vnd.ms-excel",
+                    metadata.get(Metadata.CONTENT_TYPE));
+            assertEquals("Simple Excel document", metadata.get(TikaCoreProperties.TITLE));
+            assertEquals("Keith Bennett", metadata.get(TikaCoreProperties.CREATOR));
+            assertEquals("Keith Bennett", metadata.get(Metadata.AUTHOR));
+
+            // Mon Oct 01 17:13:56 BST 2007
+            assertEquals("2007-10-01T16:13:56Z", metadata.get(TikaCoreProperties.CREATED));
+            assertEquals("2007-10-01T16:13:56Z", metadata.get(Metadata.CREATION_DATE));
+
+            // Mon Oct 01 17:31:43 BST 2007
+            assertEquals("2007-10-01T16:31:43Z", metadata.get(TikaCoreProperties.MODIFIED));
+            assertEquals("2007-10-01T16:31:43Z", metadata.get(Metadata.DATE));
+
+            String content = handler.toString();
+            assertContains("Sample Excel Worksheet", content);
+            assertContains("Numbers and their Squares", content);
+            assertContains("\t\tNumber\tSquare", content);
+            assertContains("9", content);
+            assertNotContained("9.0", content);
+            assertContains("196", content);
+            assertNotContained("196.0", content);
+        }
+    }
+
+    @Test
+    public void testExcelParserFormatting() throws Exception {
+        try (InputStream input = ExcelParserTest.class.getResourceAsStream(
+                "/test-documents/testEXCEL-formats.xls")) {
+            Metadata metadata = new Metadata();
+            ParseContext context = new ParseContext();
+            context.set(Locale.class, Locale.US);
+            ContentHandler handler = new BodyContentHandler();
+            new OfficeParser().parse(input, handler, metadata, context);
+
+            assertEquals(
+                    "application/vnd.ms-excel",
+                    metadata.get(Metadata.CONTENT_TYPE));
+
+            String content = handler.toString();
+
+            // Number #,##0.00
+            assertContains("1,599.99", content);
+            assertContains("-1,599.99", content);
+
+            // Currency $#,##0.00;[Red]($#,##0.00)
+            assertContains("$1,599.99", content);
+            assertContains("($1,599.99)", content);
+
+            // Scientific 0.00E+00
+            // poi <=3.8beta1 returns 1.98E08, newer versions return 1.98+E08
+            assertTrue(content.contains("1.98E08") || content.contains("1.98E+08"));
+            assertTrue(content.contains("-1.98E08") || content.contains("-1.98E+08"));
+
+            // Percentage.
+            assertContains("2.50%", content);
+            // Excel rounds up to 3%, but that requires Java 1.6 or later
+            if (System.getProperty("java.version").startsWith("1.5")) {
+                assertContains("2%", content);
+            } else {
+                assertContains("3%", content);
+            }
+
+            // Time Format: h:mm
+            assertContains("6:15", content);
+            assertContains("18:15", content);
+
+            // Date Format: d-mmm-yy
+            assertContains("17-May-07", content);
+
+            // Date Format: m/d/yy
+            assertContains("10/3/09", content);
+
+            // Date/Time Format: m/d/yy h:mm
+            assertContains("1/19/08 4:35", content);
+
+            // Fraction (2.5): # ?/?
+            assertContains("2 1/2", content);
+
+
+            // Below assertions represent outstanding formatting issues to be addressed
+            // they are included to allow the issues to be progressed with the Apache POI
+            // team - See TIKA-103.
+
+            /*************************************************************************
+             // Custom Number (0 "dollars and" .00 "cents")
+             assertContains("19 dollars and .99 cents", content);
+
+             // Custom Number ("At" h:mm AM/PM "on" dddd mmmm d"," yyyy)
+             assertContains("At 4:20 AM on Thursday May 17, 2007", content);
+             **************************************************************************/
+
+        }
+    }
+
+    @Test
+    public void testExcelParserPassword() throws Exception {
+        try (InputStream input = ExcelParserTest.class.getResourceAsStream(
+                "/test-documents/testEXCEL_protected_passtika.xls")) {
+            Metadata metadata = new Metadata();
+            ContentHandler handler = new BodyContentHandler();
+            ParseContext context = new ParseContext();
+            context.set(Locale.class, Locale.US);
+            new OfficeParser().parse(input, handler, metadata, context);
+            fail("Document is encrypted, shouldn't parse");
+        } catch (EncryptedDocumentException e) {
+            // Good
+        }
+
+        // Try again, this time with the password
+        try (InputStream input = ExcelParserTest.class.getResourceAsStream(
+                "/test-documents/testEXCEL_protected_passtika.xls")) {
+            Metadata metadata = new Metadata();
+            ContentHandler handler = new BodyContentHandler();
+            ParseContext context = new ParseContext();
+            context.set(Locale.class, Locale.US);
+            context.set(PasswordProvider.class, new PasswordProvider() {
+                @Override
+                public String getPassword(Metadata metadata) {
+                    return "tika";
+                }
+            });
+            new OfficeParser().parse(input, handler, metadata, context);
+
+            assertEquals(
+                    "application/vnd.ms-excel",
+                    metadata.get(Metadata.CONTENT_TYPE));
+
+            assertEquals(null, metadata.get(TikaCoreProperties.TITLE));
+            assertEquals("Antoni", metadata.get(TikaCoreProperties.CREATOR));
+            assertEquals("2011-11-25T09:52:48Z", metadata.get(TikaCoreProperties.CREATED));
+
+            String content = handler.toString();
+            assertContains("This is an Encrypted Excel spreadsheet", content);
+            assertNotContained("9.0", content);
+        }
+    }
+
+    /**
+     * TIKA-214 - Ensure we extract labels etc from Charts
+     */
+    @Test
+    public void testExcelParserCharts() throws Exception {
+        try (InputStream input = ExcelParserTest.class.getResourceAsStream(
+                "/test-documents/testEXCEL-charts.xls")) {
+            Metadata metadata = new Metadata();
+            ParseContext context = new ParseContext();
+            context.set(Locale.class, Locale.US);
+            ContentHandler handler = new BodyContentHandler();
+            new OfficeParser().parse(input, handler, metadata, context);
+
+            assertEquals(
+                    "application/vnd.ms-excel",
+                    metadata.get(Metadata.CONTENT_TYPE));
+
+            String content = handler.toString();
+
+            // The first sheet has a pie chart
+            assertContains("charttabyodawg", content);
+            assertContains("WhamPuff", content);
+
+            // The second sheet has a bar chart and some text
+            assertContains("Sheet1", content);
+            assertContains("Test Excel Spreasheet", content);
+            assertContains("foo", content);
+            assertContains("bar", content);
+            assertContains("fizzlepuff", content);
+            assertContains("whyaxis", content);
+            assertContains("eksaxis", content);
+
+            // The third sheet has some text
+            assertContains("Sheet2", content);
+            assertContains("dingdong", content);
+        }
+    }
+
+    @Test
+    public void testJXL() throws Exception {
+        try (InputStream input = ExcelParserTest.class.getResourceAsStream(
+                "/test-documents/jxl.xls")) {
+            Metadata metadata = new Metadata();
+            ContentHandler handler = new BodyContentHandler(-1);
+            ParseContext context = new ParseContext();
+            context.set(Locale.class, Locale.US);
+            new OfficeParser().parse(input, handler, metadata, context);
+
+            assertEquals(
+                    "application/vnd.ms-excel",
+                    metadata.get(Metadata.CONTENT_TYPE));
+            String content = handler.toString();
+            assertContains("Number Formats", content);
+        }
+    }
+
+    @Test
+    public void testWorksSpreadsheet70() throws Exception {
+        try (InputStream input = ExcelParserTest.class.getResourceAsStream(
+                "/test-documents/testWORKSSpreadsheet7.0.xlr")) {
+            Metadata metadata = new Metadata();
+            ContentHandler handler = new BodyContentHandler(-1);
+            ParseContext context = new ParseContext();
+            context.set(Locale.class, Locale.US);
+            new OfficeParser().parse(input, handler, metadata, context);
+
+            String content = handler.toString();
+            assertContains("Microsoft Works", content);
+        }
+    }
+
+    /**
+     * We don't currently support the .xlsb file format 
+     *  (an OOXML container with binary blobs), but we 
+     *  shouldn't break on these files either (TIKA-826)  
+     */
+    @Test
+    public void testExcelXLSB() throws Exception {
+        Detector detector = new DefaultDetector();
+        AutoDetectParser parser = new AutoDetectParser();
+
+        Metadata m = new Metadata();
+        m.add(Metadata.RESOURCE_NAME_KEY, "excel.xlsb");
+
+        // Should be detected correctly
+        MediaType type;
+        try (InputStream input = ExcelParserTest.class.getResourceAsStream(
+                "/test-documents/testEXCEL.xlsb")) {
+            type = detector.detect(input, m);
+            assertEquals("application/vnd.ms-excel.sheet.binary.macroenabled.12", type.toString());
+        }
+
+        // OfficeParser won't handle it
+        assertEquals(false, (new OfficeParser()).getSupportedTypes(new ParseContext()).contains(type));
+
+        // OOXMLParser won't handle it
+        assertEquals(false, (new OOXMLParser()).getSupportedTypes(new ParseContext()).contains(type));
+
+        // AutoDetectParser doesn't break on it
+        try (InputStream input = ExcelParserTest.class.getResourceAsStream("/test-documents/testEXCEL.xlsb")) {
+            ContentHandler handler = new BodyContentHandler(-1);
+            ParseContext context = new ParseContext();
+            context.set(Locale.class, Locale.US);
+            parser.parse(input, handler, m, context);
+
+            String content = handler.toString();
+            assertEquals("", content);
+        }
+    }
+
+    /**
+     * Excel 5 and 95 are older formats, and only get basic support
+     */
+    @Test
+    public void testExcel95() throws Exception {
+        Detector detector = new DefaultDetector();
+        AutoDetectParser parser = new AutoDetectParser();
+        MediaType type;
+        Metadata m;
+
+        // First try detection of Excel 5
+        m = new Metadata();
+        m.add(Metadata.RESOURCE_NAME_KEY, "excel_5.xls");
+        try (InputStream input = ExcelParserTest.class.getResourceAsStream("/test-documents/testEXCEL_5.xls")) {
+            type = detector.detect(input, m);
+            assertEquals("application/vnd.ms-excel", type.toString());
+        }
+
+        // Now Excel 95
+        m = new Metadata();
+        m.add(Metadata.RESOURCE_NAME_KEY, "excel_95.xls");
+        try (InputStream input = ExcelParserTest.class.getResourceAsStream("/test-documents/testEXCEL_95.xls")) {
+            type = detector.detect(input, m);
+            assertEquals("application/vnd.ms-excel", type.toString());
+        }
+
+        // OfficeParser can handle it
+        assertEquals(true, (new OfficeParser()).getSupportedTypes(new ParseContext()).contains(type));
+
+        // OOXMLParser won't handle it
+        assertEquals(false, (new OOXMLParser()).getSupportedTypes(new ParseContext()).contains(type));
+
+
+        // Parse the Excel 5 file
+        m = new Metadata();
+        try (InputStream input = ExcelParserTest.class.getResourceAsStream("/test-documents/testEXCEL_5.xls")) {
+            ContentHandler handler = new BodyContentHandler(-1);
+            ParseContext context = new ParseContext();
+            context.set(Locale.class, Locale.US);
+            parser.parse(input, handler, m, context);
+
+            String content = handler.toString();
+
+            // Sheet names
+            assertContains("Feuil1", content);
+            assertContains("Feuil3", content);
+
+            // Text
+            assertContains("Sample Excel", content);
+            assertContains("Number", content);
+
+            // Numbers
+            assertContains("15", content);
+            assertContains("225", content);
+
+            // Metadata was also fetched
+            assertEquals("Simple Excel document", m.get(TikaCoreProperties.TITLE));
+            assertEquals("Keith Bennett", m.get(TikaCoreProperties.CREATOR));
+        }
+
+        // Parse the Excel 95 file
+        m = new Metadata();
+        try (InputStream input = ExcelParserTest.class.getResourceAsStream("/test-documents/testEXCEL_95.xls")) {
+            ContentHandler handler = new BodyContentHandler(-1);
+            ParseContext context = new ParseContext();
+            context.set(Locale.class, Locale.US);
+            parser.parse(input, handler, m, context);
+
+            String content = handler.toString();
+
+            // Sheet name
+            assertContains("Foglio1", content);
+
+            // Very boring file, no actual text or numbers!
+
+            // Metadata was also fetched
+            assertEquals(null, m.get(TikaCoreProperties.TITLE));
+            assertEquals("Marco Quaranta", m.get(Office.LAST_AUTHOR));
+        }
+    }
+
+    /**
+     * Ensures that custom OLE2 (HPSF) properties are extracted
+     */
+    @Test
+    public void testCustomProperties() throws Exception {
+        Metadata metadata = new Metadata();
+
+        try (InputStream input = ExcelParserTest.class.getResourceAsStream(
+                "/test-documents/testEXCEL_custom_props.xls")) {
+            ContentHandler handler = new BodyContentHandler(-1);
+            ParseContext context = new ParseContext();
+            context.set(Locale.class, Locale.US);
+            new OfficeParser().parse(input, handler, metadata, context);
+        }
+
+        assertEquals("application/vnd.ms-excel", metadata.get(Metadata.CONTENT_TYPE));
+        assertEquals("", metadata.get(TikaCoreProperties.CREATOR));
+        assertEquals("", metadata.get(TikaCoreProperties.MODIFIER));
+        assertEquals("2011-08-22T13:45:54Z", metadata.get(TikaCoreProperties.MODIFIED));
+        assertEquals("2006-09-12T15:06:44Z", metadata.get(TikaCoreProperties.CREATED));
+        assertEquals("Microsoft Excel", metadata.get(OfficeOpenXMLExtended.APPLICATION));
+        assertEquals("true", metadata.get("custom:myCustomBoolean"));
+        assertEquals("3", metadata.get("custom:myCustomNumber"));
+        assertEquals("MyStringValue", metadata.get("custom:MyCustomString"));
+        assertEquals("2010-12-30T22:00:00Z", metadata.get("custom:MyCustomDate"));
+        assertEquals("2010-12-29T22:00:00Z", metadata.get("custom:myCustomSecondDate"));
+    }
+
+	@Test
+    public void testHeaderAndFooterExtraction() throws Exception {
+        try (InputStream input = ExcelParserTest.class.getResourceAsStream(
+                "/test-documents/testEXCEL_headers_footers.xls")) {
+            Metadata metadata = new Metadata();
+            ContentHandler handler = new BodyContentHandler();
+            ParseContext context = new ParseContext();
+            context.set(Locale.class, Locale.UK);
+            new OfficeParser().parse(input, handler, metadata, context);
+
+            assertEquals(
+                    "application/vnd.ms-excel",
+                    metadata.get(Metadata.CONTENT_TYPE));
+            assertEquals("Internal spreadsheet", metadata.get(TikaCoreProperties.TITLE));
+            assertEquals("Aeham Abushwashi", metadata.get(TikaCoreProperties.CREATOR));
+            assertEquals("Aeham Abushwashi", metadata.get(Metadata.AUTHOR));
+
+            String content = handler.toString();
+            assertContains("John Smith1", content);
+            assertContains("John Smith50", content);
+            assertContains("1 Corporate HQ", content);
+            assertContains("Header - Corporate Spreadsheet", content);
+            assertContains("Header - For Internal Use Only", content);
+            assertContains("Header - Author: John Smith", content);
+            assertContains("Footer - Corporate Spreadsheet", content);
+            assertContains("Footer - For Internal Use Only", content);
+            assertContains("Footer - Author: John Smith", content);
+        }
+    }
+}

Added: tika/branches/2.x/tika-parser-modules/tika-office-parser-module/src/test/java/org/apache/tika/parser/microsoft/JackcessParserTest.java
URL: http://svn.apache.org/viewvc/tika/branches/2.x/tika-parser-modules/tika-office-parser-module/src/test/java/org/apache/tika/parser/microsoft/JackcessParserTest.java?rev=1725014&view=auto
==============================================================================
--- tika/branches/2.x/tika-parser-modules/tika-office-parser-module/src/test/java/org/apache/tika/parser/microsoft/JackcessParserTest.java (added)
+++ tika/branches/2.x/tika-parser-modules/tika-office-parser-module/src/test/java/org/apache/tika/parser/microsoft/JackcessParserTest.java Sat Jan 16 18:23:01 2016
@@ -0,0 +1,194 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.tika.parser.microsoft;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertTrue;
+
+import java.io.InputStream;
+import java.util.List;
+
+import org.apache.commons.io.IOUtils;
+import org.apache.tika.TikaTest;
+import org.apache.tika.exception.EncryptedDocumentException;
+import org.apache.tika.metadata.Metadata;
+import org.apache.tika.metadata.OfficeOpenXMLExtended;
+import org.apache.tika.metadata.TikaCoreProperties;
+import org.apache.tika.parser.AutoDetectParser;
+import org.apache.tika.parser.ParseContext;
+import org.apache.tika.parser.Parser;
+import org.apache.tika.parser.PasswordProvider;
+import org.apache.tika.parser.RecursiveParserWrapper;
+import org.apache.tika.sax.BasicContentHandlerFactory;
+import org.junit.Test;
+import org.xml.sax.helpers.DefaultHandler;
+
+public class JackcessParserTest extends TikaTest {
+
+    @Test
+    public void testBasic() throws Exception {
+
+        Parser p = new AutoDetectParser();
+
+        RecursiveParserWrapper w = new RecursiveParserWrapper(p,
+                new BasicContentHandlerFactory(BasicContentHandlerFactory.HANDLER_TYPE.XML, -1));
+
+        for (String fName : new String[]{"testAccess2.accdb", "testAccess2_2000.mdb",
+                "testAccess2_2002-2003.mdb"}) {
+            InputStream is = null;
+            try {
+                is = this.getResourceAsStream("/test-documents/" + fName);
+
+                Metadata meta = new Metadata();
+                ParseContext c = new ParseContext();
+                w.parse(is, new DefaultHandler(), meta, c);
+            } finally {
+                IOUtils.closeQuietly(is);
+            }
+            List<Metadata> list = w.getMetadata();
+            assertEquals(4, list.size());
+            String mainContent = list.get(0).get(RecursiveParserWrapper.TIKA_CONTENT);
+
+            //make sure there's a thead and tbody
+            assertContains("</thead><tbody>", mainContent);
+
+            //assert table header
+            assertContains("<th>ShortTextField</th>", mainContent);
+
+            //test date format
+            assertContains("6/24/15", mainContent);
+
+            //test that markup is stripped
+            assertContains("over the bold italic dog", mainContent);
+
+            //test unicode
+            assertContains("\u666E\u6797\u65AF\u987F\u5927\u5B66", mainContent);
+
+            //test embedded document handling
+            assertContains("Test Document with embedded pdf",
+                    list.get(3).get(RecursiveParserWrapper.TIKA_CONTENT));
+
+            w.reset();
+        }
+    }
+
+    @Test
+    public void testPassword() throws Exception {
+        ParseContext c = new ParseContext();
+        c.set(PasswordProvider.class, new PasswordProvider() {
+            @Override
+            public String getPassword(Metadata metadata) {
+                return "tika";
+            }
+        });
+        Parser p = new AutoDetectParser();
+        String content = null;
+        try (InputStream is =
+                     this.getResourceAsStream(
+                             "/test-documents/testAccess2_encrypted.accdb")){
+            content = getText(is, p, c);
+        }
+        assertContains("red and brown", content);
+
+        //now try wrong password
+        c.set(PasswordProvider.class, new PasswordProvider() {
+            @Override
+            public String getPassword(Metadata metadata) {
+                return "WRONG";
+            }
+        });
+
+        boolean ex = false;
+        try (InputStream is =
+                     this.getResourceAsStream(
+                             "/test-documents/testAccess2_encrypted.accdb")){
+            getText(is, p, c);
+        } catch (EncryptedDocumentException e) {
+            ex = true;
+        }
+        assertTrue("failed to throw encrypted document exception for wrong password", ex);
+
+        //now try null
+        c.set(PasswordProvider.class, new PasswordProvider() {
+            @Override
+            public String getPassword(Metadata metadata) {
+                return null;
+            }
+        });
+
+        ex = false;
+        try (InputStream is =
+                     this.getResourceAsStream(
+                             "/test-documents/testAccess2_encrypted.accdb")){
+            getText(is, p, c);
+        } catch (EncryptedDocumentException e) {
+            ex = true;
+        }
+        assertTrue("failed to throw encrypted document exception for null password", ex);
+
+
+        //now try missing password provider
+        c = new ParseContext();
+        ex = false;
+        try (InputStream is =
+                     this.getResourceAsStream(
+                             "/test-documents/testAccess2_encrypted.accdb")){
+            getText(is, p, c);
+        } catch (EncryptedDocumentException e) {
+            ex = true;
+        }
+        assertTrue("failed to throw encrypted document exception for missing password provider", ex);
+
+        //now try password on file that doesn't need a password
+        c = new ParseContext();
+        c.set(PasswordProvider.class, new PasswordProvider() {
+            @Override
+            public String getPassword(Metadata metadata) {
+                return "tika";
+            }
+        });
+        ex = false;
+        try (InputStream is =
+                     this.getResourceAsStream(
+                             "/test-documents/testAccess2.accdb")){
+            content = getText(is, p, c);
+        } catch (EncryptedDocumentException e) {
+            ex = true;
+        }
+        assertFalse("shouldn't have thrown encrypted document exception for "+
+                        "opening unencrypted file that doesn't need passowrd", ex);
+        assertContains("red and brown", content);
+    }
+
+    @Test
+    public void testReadOnly() throws Exception {
+        //TIKA-1681: just make sure an exception is not thrown
+        XMLResult r = getXML("testAccess_V1997.mdb");
+        assertContains("hijklmnop", r.xml);
+    }
+
+    @Test
+    public void testMetadata() throws Exception {
+        //basic tests for normalized metadata
+        XMLResult r = getXML("testAccess_V1997.mdb");
+        assertEquals("tmccune", r.metadata.get(TikaCoreProperties.CREATOR));
+        assertEquals("Health Market Science", r.metadata.get(OfficeOpenXMLExtended.COMPANY));
+        assertEquals("test", r.metadata.get(TikaCoreProperties.TITLE));
+    }
+}

Added: tika/branches/2.x/tika-parser-modules/tika-office-parser-module/src/test/java/org/apache/tika/parser/microsoft/OfficeParserTest.java
URL: http://svn.apache.org/viewvc/tika/branches/2.x/tika-parser-modules/tika-office-parser-module/src/test/java/org/apache/tika/parser/microsoft/OfficeParserTest.java?rev=1725014&view=auto
==============================================================================
--- tika/branches/2.x/tika-parser-modules/tika-office-parser-module/src/test/java/org/apache/tika/parser/microsoft/OfficeParserTest.java (added)
+++ tika/branches/2.x/tika-parser-modules/tika-office-parser-module/src/test/java/org/apache/tika/parser/microsoft/OfficeParserTest.java Sat Jan 16 18:23:01 2016
@@ -0,0 +1,46 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.parser.microsoft;
+
+import static org.junit.Assert.assertTrue;
+
+import java.io.InputStream;
+
+import org.apache.tika.TikaTest;
+import org.apache.tika.io.TikaInputStream;
+import org.apache.tika.metadata.Metadata;
+import org.apache.tika.parser.Parser;
+import org.apache.tika.parser.microsoft.ooxml.OOXMLParserTest;
+import org.junit.Test;
+
+
+public class OfficeParserTest extends TikaTest {
+
+    @Test
+    public void parseOfficeWord() throws Exception {
+        Metadata metadata = new Metadata();
+        Parser parser = new OfficeParser();
+
+        String xml = getXML(getTestDocument("test.doc"), parser, metadata).xml;
+
+        assertTrue(xml.contains("test"));
+    }
+
+    private InputStream getTestDocument(String name) {
+        return TikaInputStream.get(OOXMLParserTest.class.getResourceAsStream("/test-documents/" + name));
+    }
+}