You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ta...@apache.org on 2016/06/29 11:11:24 UTC
[18/39] tika git commit: Convert new lines from windows to unix
http://git-wip-us.apache.org/repos/asf/tika/blob/c7a6bcac/tika-parser-modules/tika-parser-office-module/src/test/java/org/apache/tika/parser/chm/TestDirectoryListingEntry.java
----------------------------------------------------------------------
diff --git a/tika-parser-modules/tika-parser-office-module/src/test/java/org/apache/tika/parser/chm/TestDirectoryListingEntry.java b/tika-parser-modules/tika-parser-office-module/src/test/java/org/apache/tika/parser/chm/TestDirectoryListingEntry.java
index 65894e3..e337c15 100644
--- a/tika-parser-modules/tika-parser-office-module/src/test/java/org/apache/tika/parser/chm/TestDirectoryListingEntry.java
+++ b/tika-parser-modules/tika-parser-office-module/src/test/java/org/apache/tika/parser/chm/TestDirectoryListingEntry.java
@@ -1,85 +1,85 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.tika.parser.chm;
-
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertNotNull;
-
-import org.apache.tika.parser.chm.accessor.DirectoryListingEntry;
-import org.junit.Before;
-import org.junit.Test;
-
-/**
- * Tests public methods of the DirectoryListingEntry class
- *
- * @author olegt
- *
- */
-public class TestDirectoryListingEntry {
- private DirectoryListingEntry dle = null;
-
- @Before
- public void setUp() throws Exception {
- dle = new DirectoryListingEntry(TestParameters.nameLength,
- TestParameters.entryName, TestParameters.entryType,
- TestParameters.offset, TestParameters.length);
- }
-
- @Test
- public void testDefaultConstructor() {
- assertNotNull(dle);
- }
-
- @Test
- public void testParamConstructor() {
- assertEquals(TestParameters.nameLength, dle.getNameLength());
- assertEquals(TestParameters.entryName, dle.getName());
- assertEquals(TestParameters.entryType, dle.getEntryType());
- assertEquals(TestParameters.offset, dle.getOffset());
- assertEquals(TestParameters.length, dle.getLength());
- }
-
- @Test
- public void testToString() {
- assertNotNull(dle.toString());
- }
-
- @Test
- public void testGetNameLength() {
- assertEquals(TestParameters.nameLength, dle.getNameLength());
- }
-
- @Test
- public void testGetName() {
- assertEquals(TestParameters.entryName, dle.getName());
- }
-
- @Test
- public void testGetEntryType() {
- assertEquals(TestParameters.entryType, dle.getEntryType());
- }
-
- @Test
- public void testGetOffset() {
- assertEquals(TestParameters.offset, dle.getOffset());
- }
-
- @Test
- public void testGetLength() {
- assertEquals(TestParameters.length, dle.getLength());
- }
-}
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.parser.chm;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNotNull;
+
+import org.apache.tika.parser.chm.accessor.DirectoryListingEntry;
+import org.junit.Before;
+import org.junit.Test;
+
+/**
+ * Tests public methods of the DirectoryListingEntry class
+ *
+ * @author olegt
+ *
+ */
+public class TestDirectoryListingEntry {
+ private DirectoryListingEntry dle = null;
+
+ @Before
+ public void setUp() throws Exception {
+ dle = new DirectoryListingEntry(TestParameters.nameLength,
+ TestParameters.entryName, TestParameters.entryType,
+ TestParameters.offset, TestParameters.length);
+ }
+
+ @Test
+ public void testDefaultConstructor() {
+ assertNotNull(dle);
+ }
+
+ @Test
+ public void testParamConstructor() {
+ assertEquals(TestParameters.nameLength, dle.getNameLength());
+ assertEquals(TestParameters.entryName, dle.getName());
+ assertEquals(TestParameters.entryType, dle.getEntryType());
+ assertEquals(TestParameters.offset, dle.getOffset());
+ assertEquals(TestParameters.length, dle.getLength());
+ }
+
+ @Test
+ public void testToString() {
+ assertNotNull(dle.toString());
+ }
+
+ @Test
+ public void testGetNameLength() {
+ assertEquals(TestParameters.nameLength, dle.getNameLength());
+ }
+
+ @Test
+ public void testGetName() {
+ assertEquals(TestParameters.entryName, dle.getName());
+ }
+
+ @Test
+ public void testGetEntryType() {
+ assertEquals(TestParameters.entryType, dle.getEntryType());
+ }
+
+ @Test
+ public void testGetOffset() {
+ assertEquals(TestParameters.offset, dle.getOffset());
+ }
+
+ @Test
+ public void testGetLength() {
+ assertEquals(TestParameters.length, dle.getLength());
+ }
+}
http://git-wip-us.apache.org/repos/asf/tika/blob/c7a6bcac/tika-parser-modules/tika-parser-office-module/src/test/java/org/apache/tika/parser/chm/TestParameters.java
----------------------------------------------------------------------
diff --git a/tika-parser-modules/tika-parser-office-module/src/test/java/org/apache/tika/parser/chm/TestParameters.java b/tika-parser-modules/tika-parser-office-module/src/test/java/org/apache/tika/parser/chm/TestParameters.java
index 2512e85..5937d18 100644
--- a/tika-parser-modules/tika-parser-office-module/src/test/java/org/apache/tika/parser/chm/TestParameters.java
+++ b/tika-parser-modules/tika-parser-office-module/src/test/java/org/apache/tika/parser/chm/TestParameters.java
@@ -1,104 +1,104 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.tika.parser.chm;
-
-import java.io.IOException;
-import java.io.InputStream;
-
-import org.apache.commons.io.IOUtils;
-import org.apache.tika.parser.chm.core.ChmCommons.EntryType;
-
-/**
- * Holds test parameters such as verification points
- */
-public class TestParameters {
- /* Prevents initialization */
- private TestParameters() {
- }
-
- /* Tests values */
- static final int nameLength = 5;
- static final String entryName = TestParameters.class.getName();
- static EntryType entryType = EntryType.COMPRESSED;
- static final int offset = 3;
- static final int length = 20;
- static final int NTHREADS = 2;
-
- static final int BUFFER_SIZE = 16384;
-
- static final byte[] chmData = readResource("/test-documents/testChm.chm");
-
- private static byte[] readResource(String name) {
- try {
- try (InputStream stream = TestParameters.class.getResourceAsStream(name)) {
- return IOUtils.toByteArray(stream);
- }
- } catch (IOException e) {
- throw new RuntimeException(e);
- }
- }
-
- /* Verification points */
- static final String VP_CHM_MIME_TYPE = "Content-Type=application/x-chm";
- static final String VP_EXTRACTED_TEXT = "The TCard method accepts only numeric arguments";
- static final String VP_ISTF_SIGNATURE = "ITSF";
- static final String VP_ISTP_SIGNATURE = "ITSP";
- static final String VP_PMGL_SIGNATURE = "PMGL";
- static final String VP_CONTROL_DATA_SIGNATURE = "LZXC";
-
- static final int VP_DIRECTORY_LENGTH = 4180;
- static final int VP_DATA_OFFSET_LENGTH = 4300;
- static final int VP_DIRECTORY_OFFSET = 120;
- static final int VP_ITSF_HEADER_LENGTH = 96;
- static final int VP_LANGUAGE_ID = 1033;
- static final int VP_LAST_MODIFIED = 1042357880;
- static final int VP_UNKNOWN_000C = 1;
- static final int VP_UNKNOWN_LEN = 24;
- static final int VP_UNKNOWN_OFFSET = 96;
- static final int VP_VERSION = 3;
- static final int VP_BLOCK_LENGTH = 4096;
- static final int VP_BLOCK_INDEX_INTERVAL = 2;
- static final int VP_ITSP_HEADER_LENGTH = 84;
- static final int VP_INDEX_DEPTH = 1;
- static final int VP_INDEX_HEAD = 0;
- static final int VP_INDEX_ROOT = -1;
- static final int VP_UNKNOWN_NUM_BLOCKS = -1;
- static final int VP_ITSP_UNKNOWN_000C = 10;
- static final int VP_ITSP_UNKNOWN_0024 = 0;
- static final int VP_ITSP_UNKNOWN_002C = 1;
- static final int VP_ITSP_BYTEARR_LEN = 16;
- static final int VP_ITSP_VERSION = 1;
- static final int VP_RESET_INTERVAL = 2;
- static final int VP_CONTROL_DATA_SIZE = 6;
- static final int VP_UNKNOWN_18 = 0;
- static final int VP_CONTROL_DATA_VERSION = 2;
- static final int VP_WINDOW_SIZE = 65536;
- static final int VP_WINDOWS_PER_RESET = 1;
- static final int VP_CHM_ENTITIES_NUMBER = 100; //updated by Hawking
- static final int VP_PMGI_FREE_SPACE = 3;
- static final int VP_PMGL_BLOCK_NEXT = -1;
- static final int VP_PMGL_BLOCK_PREV = -1;
- static final int VP_PMGL_FREE_SPACE = 1644;
- static final int VP_PMGL_UNKNOWN_008 = 0;
- static final int VP_RESET_TABLE_BA = 12;
- static final int VP_RES_TBL_BLOCK_LENGTH = 32768;
- static final int VP_RES_TBL_COMPR_LENGTH = 177408;
- static final int VP_RES_TBL_UNCOMP_LENGTH = 383786;
- static final int VP_TBL_OFFSET = 40;
- static final int VP_RES_TBL_UNKNOWN = 8;
- static final int VP_RES_TBL_VERSION = 2;
-}
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.parser.chm;
+
+import java.io.IOException;
+import java.io.InputStream;
+
+import org.apache.commons.io.IOUtils;
+import org.apache.tika.parser.chm.core.ChmCommons.EntryType;
+
+/**
+ * Holds test parameters such as verification points
+ */
+public class TestParameters {
+ /* Prevents initialization */
+ private TestParameters() {
+ }
+
+ /* Tests values */
+ static final int nameLength = 5;
+ static final String entryName = TestParameters.class.getName();
+ static EntryType entryType = EntryType.COMPRESSED;
+ static final int offset = 3;
+ static final int length = 20;
+ static final int NTHREADS = 2;
+
+ static final int BUFFER_SIZE = 16384;
+
+ static final byte[] chmData = readResource("/test-documents/testChm.chm");
+
+ private static byte[] readResource(String name) {
+ try {
+ try (InputStream stream = TestParameters.class.getResourceAsStream(name)) {
+ return IOUtils.toByteArray(stream);
+ }
+ } catch (IOException e) {
+ throw new RuntimeException(e);
+ }
+ }
+
+ /* Verification points */
+ static final String VP_CHM_MIME_TYPE = "Content-Type=application/x-chm";
+ static final String VP_EXTRACTED_TEXT = "The TCard method accepts only numeric arguments";
+ static final String VP_ISTF_SIGNATURE = "ITSF";
+ static final String VP_ISTP_SIGNATURE = "ITSP";
+ static final String VP_PMGL_SIGNATURE = "PMGL";
+ static final String VP_CONTROL_DATA_SIGNATURE = "LZXC";
+
+ static final int VP_DIRECTORY_LENGTH = 4180;
+ static final int VP_DATA_OFFSET_LENGTH = 4300;
+ static final int VP_DIRECTORY_OFFSET = 120;
+ static final int VP_ITSF_HEADER_LENGTH = 96;
+ static final int VP_LANGUAGE_ID = 1033;
+ static final int VP_LAST_MODIFIED = 1042357880;
+ static final int VP_UNKNOWN_000C = 1;
+ static final int VP_UNKNOWN_LEN = 24;
+ static final int VP_UNKNOWN_OFFSET = 96;
+ static final int VP_VERSION = 3;
+ static final int VP_BLOCK_LENGTH = 4096;
+ static final int VP_BLOCK_INDEX_INTERVAL = 2;
+ static final int VP_ITSP_HEADER_LENGTH = 84;
+ static final int VP_INDEX_DEPTH = 1;
+ static final int VP_INDEX_HEAD = 0;
+ static final int VP_INDEX_ROOT = -1;
+ static final int VP_UNKNOWN_NUM_BLOCKS = -1;
+ static final int VP_ITSP_UNKNOWN_000C = 10;
+ static final int VP_ITSP_UNKNOWN_0024 = 0;
+ static final int VP_ITSP_UNKNOWN_002C = 1;
+ static final int VP_ITSP_BYTEARR_LEN = 16;
+ static final int VP_ITSP_VERSION = 1;
+ static final int VP_RESET_INTERVAL = 2;
+ static final int VP_CONTROL_DATA_SIZE = 6;
+ static final int VP_UNKNOWN_18 = 0;
+ static final int VP_CONTROL_DATA_VERSION = 2;
+ static final int VP_WINDOW_SIZE = 65536;
+ static final int VP_WINDOWS_PER_RESET = 1;
+ static final int VP_CHM_ENTITIES_NUMBER = 100; //updated by Hawking
+ static final int VP_PMGI_FREE_SPACE = 3;
+ static final int VP_PMGL_BLOCK_NEXT = -1;
+ static final int VP_PMGL_BLOCK_PREV = -1;
+ static final int VP_PMGL_FREE_SPACE = 1644;
+ static final int VP_PMGL_UNKNOWN_008 = 0;
+ static final int VP_RESET_TABLE_BA = 12;
+ static final int VP_RES_TBL_BLOCK_LENGTH = 32768;
+ static final int VP_RES_TBL_COMPR_LENGTH = 177408;
+ static final int VP_RES_TBL_UNCOMP_LENGTH = 383786;
+ static final int VP_TBL_OFFSET = 40;
+ static final int VP_RES_TBL_UNKNOWN = 8;
+ static final int VP_RES_TBL_VERSION = 2;
+}
http://git-wip-us.apache.org/repos/asf/tika/blob/c7a6bcac/tika-parser-modules/tika-parser-office-module/src/test/java/org/apache/tika/parser/chm/TestPmgiHeader.java
----------------------------------------------------------------------
diff --git a/tika-parser-modules/tika-parser-office-module/src/test/java/org/apache/tika/parser/chm/TestPmgiHeader.java b/tika-parser-modules/tika-parser-office-module/src/test/java/org/apache/tika/parser/chm/TestPmgiHeader.java
index 493c03e..070583b 100644
--- a/tika-parser-modules/tika-parser-office-module/src/test/java/org/apache/tika/parser/chm/TestPmgiHeader.java
+++ b/tika-parser-modules/tika-parser-office-module/src/test/java/org/apache/tika/parser/chm/TestPmgiHeader.java
@@ -1,45 +1,45 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.tika.parser.chm;
-
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertTrue;
-
-import org.apache.tika.parser.chm.accessor.ChmPmgiHeader;
-import org.junit.Before;
-import org.junit.Test;
-
-public class TestPmgiHeader {
- ChmPmgiHeader chmPmgiHeader = null;
-
- @Before
- public void setUp() throws Exception {
- byte[] data = TestParameters.chmData;
- chmPmgiHeader = new ChmPmgiHeader();
- chmPmgiHeader.parse(data, chmPmgiHeader);
- }
-
- @Test
- public void testToString() {
- assertTrue((chmPmgiHeader != null) && (chmPmgiHeader.toString().length() > 0));
- }
-
- @Test
- public void testGetFreeSpace() {
- assertEquals(TestParameters.VP_PMGI_FREE_SPACE, chmPmgiHeader.getFreeSpace());
- }
-}
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.parser.chm;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
+
+import org.apache.tika.parser.chm.accessor.ChmPmgiHeader;
+import org.junit.Before;
+import org.junit.Test;
+
+public class TestPmgiHeader {
+ ChmPmgiHeader chmPmgiHeader = null;
+
+ @Before
+ public void setUp() throws Exception {
+ byte[] data = TestParameters.chmData;
+ chmPmgiHeader = new ChmPmgiHeader();
+ chmPmgiHeader.parse(data, chmPmgiHeader);
+ }
+
+ @Test
+ public void testToString() {
+ assertTrue((chmPmgiHeader != null) && (chmPmgiHeader.toString().length() > 0));
+ }
+
+ @Test
+ public void testGetFreeSpace() {
+ assertEquals(TestParameters.VP_PMGI_FREE_SPACE, chmPmgiHeader.getFreeSpace());
+ }
+}
http://git-wip-us.apache.org/repos/asf/tika/blob/c7a6bcac/tika-parser-modules/tika-parser-office-module/src/test/java/org/apache/tika/parser/chm/TestPmglHeader.java
----------------------------------------------------------------------
diff --git a/tika-parser-modules/tika-parser-office-module/src/test/java/org/apache/tika/parser/chm/TestPmglHeader.java b/tika-parser-modules/tika-parser-office-module/src/test/java/org/apache/tika/parser/chm/TestPmglHeader.java
index f8652da..55c08f2 100644
--- a/tika-parser-modules/tika-parser-office-module/src/test/java/org/apache/tika/parser/chm/TestPmglHeader.java
+++ b/tika-parser-modules/tika-parser-office-module/src/test/java/org/apache/tika/parser/chm/TestPmglHeader.java
@@ -1,76 +1,76 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.tika.parser.chm;
-
-import static java.nio.charset.StandardCharsets.UTF_8;
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertTrue;
-
-import org.apache.tika.parser.chm.accessor.ChmPmglHeader;
-import org.apache.tika.parser.chm.core.ChmCommons;
-import org.apache.tika.parser.chm.core.ChmConstants;
-import org.junit.Before;
-import org.junit.Test;
-
-public class TestPmglHeader {
- ChmPmglHeader chmPmglHeader = null;
-
- @Before
- public void setUp() throws Exception {
- byte[] data = TestParameters.chmData;
- chmPmglHeader = new ChmPmglHeader();
- chmPmglHeader.parse(ChmCommons.copyOfRange(data,
- ChmConstants.START_PMGL, ChmConstants.START_PMGL
- + ChmConstants.CHM_PMGL_LEN + 10), chmPmglHeader);
- }
-
- @Test
- public void testToString() {
- assertTrue((chmPmglHeader != null)
- && chmPmglHeader.toString().length() > 0);
- }
-
- @Test
- public void testChmPmglHeaderGet() {
- assertEquals(TestParameters.VP_PMGL_SIGNATURE, new String(
- chmPmglHeader.getSignature(), UTF_8));
- }
-
- @Test
- public void testGetBlockNext() {
- assertEquals(TestParameters.VP_PMGL_BLOCK_NEXT,
- chmPmglHeader.getBlockNext());
- }
-
- @Test
- public void testGetBlockPrev() {
- assertEquals(TestParameters.VP_PMGL_BLOCK_PREV,
- chmPmglHeader.getBlockPrev());
- }
-
- @Test
- public void testGetFreeSpace() {
- assertEquals(TestParameters.VP_PMGL_FREE_SPACE,
- chmPmglHeader.getFreeSpace());
- }
-
- @Test
- public void testGetUnknown0008() {
- assertEquals(TestParameters.VP_PMGL_UNKNOWN_008,
- chmPmglHeader.getUnknown0008());
- }
-}
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.parser.chm;
+
+import static java.nio.charset.StandardCharsets.UTF_8;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
+
+import org.apache.tika.parser.chm.accessor.ChmPmglHeader;
+import org.apache.tika.parser.chm.core.ChmCommons;
+import org.apache.tika.parser.chm.core.ChmConstants;
+import org.junit.Before;
+import org.junit.Test;
+
+public class TestPmglHeader {
+ ChmPmglHeader chmPmglHeader = null;
+
+ @Before
+ public void setUp() throws Exception {
+ byte[] data = TestParameters.chmData;
+ chmPmglHeader = new ChmPmglHeader();
+ chmPmglHeader.parse(ChmCommons.copyOfRange(data,
+ ChmConstants.START_PMGL, ChmConstants.START_PMGL
+ + ChmConstants.CHM_PMGL_LEN + 10), chmPmglHeader);
+ }
+
+ @Test
+ public void testToString() {
+ assertTrue((chmPmglHeader != null)
+ && chmPmglHeader.toString().length() > 0);
+ }
+
+ @Test
+ public void testChmPmglHeaderGet() {
+ assertEquals(TestParameters.VP_PMGL_SIGNATURE, new String(
+ chmPmglHeader.getSignature(), UTF_8));
+ }
+
+ @Test
+ public void testGetBlockNext() {
+ assertEquals(TestParameters.VP_PMGL_BLOCK_NEXT,
+ chmPmglHeader.getBlockNext());
+ }
+
+ @Test
+ public void testGetBlockPrev() {
+ assertEquals(TestParameters.VP_PMGL_BLOCK_PREV,
+ chmPmglHeader.getBlockPrev());
+ }
+
+ @Test
+ public void testGetFreeSpace() {
+ assertEquals(TestParameters.VP_PMGL_FREE_SPACE,
+ chmPmglHeader.getFreeSpace());
+ }
+
+ @Test
+ public void testGetUnknown0008() {
+ assertEquals(TestParameters.VP_PMGL_UNKNOWN_008,
+ chmPmglHeader.getUnknown0008());
+ }
+}
http://git-wip-us.apache.org/repos/asf/tika/blob/c7a6bcac/tika-parser-modules/tika-parser-office-module/src/test/java/org/apache/tika/parser/mbox/MboxParserTest.java
----------------------------------------------------------------------
diff --git a/tika-parser-modules/tika-parser-office-module/src/test/java/org/apache/tika/parser/mbox/MboxParserTest.java b/tika-parser-modules/tika-parser-office-module/src/test/java/org/apache/tika/parser/mbox/MboxParserTest.java
index 78761fe..6ef803d 100644
--- a/tika-parser-modules/tika-parser-office-module/src/test/java/org/apache/tika/parser/mbox/MboxParserTest.java
+++ b/tika-parser-modules/tika-parser-office-module/src/test/java/org/apache/tika/parser/mbox/MboxParserTest.java
@@ -1,156 +1,156 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.tika.parser.mbox;
-
-import static org.apache.tika.TikaTest.assertContains;
-import static org.junit.Assert.assertEquals;
-
-import java.io.InputStream;
-import java.util.Map;
-
-import org.apache.tika.detect.TypeDetector;
-import org.apache.tika.metadata.Metadata;
-import org.apache.tika.metadata.TikaCoreProperties;
-import org.apache.tika.parser.AutoDetectParser;
-import org.apache.tika.parser.ParseContext;
-import org.apache.tika.parser.Parser;
-import org.apache.tika.sax.BodyContentHandler;
-import org.junit.Before;
-import org.junit.Test;
-import org.xml.sax.ContentHandler;
-
-public class MboxParserTest {
-
- protected ParseContext recursingContext;
- private Parser autoDetectParser;
- private TypeDetector typeDetector;
- private MboxParser mboxParser;
-
- private static InputStream getStream(String name) {
- return MboxParserTest.class.getClass().getResourceAsStream(name);
- }
-
- @Before
- public void setUp() throws Exception {
- typeDetector = new TypeDetector();
- autoDetectParser = new AutoDetectParser(typeDetector);
- recursingContext = new ParseContext();
- recursingContext.set(Parser.class, autoDetectParser);
-
- mboxParser = new MboxParser();
- mboxParser.setTracking(true);
- }
-
- @Test
- public void testSimple() throws Exception {
- ContentHandler handler = new BodyContentHandler();
- Metadata metadata = new Metadata();
-
- try (InputStream stream = getStream("/test-documents/simple.mbox")) {
- mboxParser.parse(stream, handler, metadata, recursingContext);
- }
-
- String content = handler.toString();
- assertContains("Test content 1", content);
- assertContains("Test content 2", content);
- assertEquals("application/mbox", metadata.get(Metadata.CONTENT_TYPE));
-
- Map<Integer, Metadata> mailsMetadata = mboxParser.getTrackingMetadata();
- assertEquals("Nb. Of mails", 2, mailsMetadata.size());
-
- Metadata mail1 = mailsMetadata.get(0);
- assertEquals("message/rfc822", mail1.get(Metadata.CONTENT_TYPE));
- assertEquals("envelope-sender-mailbox-name Mon Jun 01 10:00:00 2009", mail1.get("MboxParser-from"));
-
- Metadata mail2 = mailsMetadata.get(1);
- assertEquals("message/rfc822", mail2.get(Metadata.CONTENT_TYPE));
- assertEquals("envelope-sender-mailbox-name Mon Jun 01 11:00:00 2010", mail2.get("MboxParser-from"));
- }
-
- @Test
- public void testHeaders() throws Exception {
- ContentHandler handler = new BodyContentHandler();
- Metadata metadata = new Metadata();
-
- try (InputStream stream = getStream("/test-documents/headers.mbox")) {
- mboxParser.parse(stream, handler, metadata, recursingContext);
- }
-
- assertContains("Test content", handler.toString());
- assertEquals("Nb. Of mails", 1, mboxParser.getTrackingMetadata().size());
-
- Metadata mailMetadata = mboxParser.getTrackingMetadata().get(0);
-
- assertEquals("2009-06-10T03:58:45Z", mailMetadata.get(TikaCoreProperties.CREATED));
- assertEquals("<au...@domain.com>", mailMetadata.get(TikaCoreProperties.CREATOR));
- assertEquals("subject", mailMetadata.get(Metadata.SUBJECT));
- assertEquals("<au...@domain.com>", mailMetadata.get(Metadata.AUTHOR));
- assertEquals("message/rfc822", mailMetadata.get(Metadata.CONTENT_TYPE));
- assertEquals("author@domain.com", mailMetadata.get("Message-From"));
- assertEquals("<na...@domain.com>", mailMetadata.get("MboxParser-return-path"));
- }
-
- @Test
- public void testMultilineHeader() throws Exception {
- ContentHandler handler = new BodyContentHandler();
- Metadata metadata = new Metadata();
-
- try (InputStream stream = getStream("/test-documents/multiline.mbox")) {
- mboxParser.parse(stream, handler, metadata, recursingContext);
- }
-
- assertEquals("Nb. Of mails", 1, mboxParser.getTrackingMetadata().size());
-
- Metadata mailMetadata = mboxParser.getTrackingMetadata().get(0);
- assertEquals("from xxx by xxx with xxx; date", mailMetadata.get("MboxParser-received"));
- }
-
- @Test
- public void testQuoted() throws Exception {
- ContentHandler handler = new BodyContentHandler();
- Metadata metadata = new Metadata();
-
- try (InputStream stream = getStream("/test-documents/quoted.mbox")) {
- mboxParser.parse(stream, handler, metadata, recursingContext);
- }
-
- assertContains("Test content", handler.toString());
- assertContains("> quoted stuff", handler.toString());
- }
-
- @Test
- public void testComplex() throws Exception {
- ContentHandler handler = new BodyContentHandler();
- Metadata metadata = new Metadata();
-
- try (InputStream stream = getStream("/test-documents/complex.mbox")) {
- mboxParser.parse(stream, handler, metadata, recursingContext);
- }
-
- assertEquals("Nb. Of mails", 3, mboxParser.getTrackingMetadata().size());
-
- Metadata firstMail = mboxParser.getTrackingMetadata().get(0);
- assertEquals("Re: question about when shuffle/sort start working", firstMail.get(Metadata.SUBJECT));
- assertEquals("Re: question about when shuffle/sort start working", firstMail.get(TikaCoreProperties.TITLE));
- assertEquals("Jothi Padmanabhan <jo...@yahoo-inc.com>", firstMail.get(Metadata.AUTHOR));
- assertEquals("Jothi Padmanabhan <jo...@yahoo-inc.com>", firstMail.get(TikaCoreProperties.CREATOR));
- assertEquals("core-user@hadoop.apache.org", firstMail.get(Metadata.MESSAGE_RECIPIENT_ADDRESS));
-
- assertContains("When a Mapper completes", handler.toString());
- }
-
-}
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.parser.mbox;
+
+import static org.apache.tika.TikaTest.assertContains;
+import static org.junit.Assert.assertEquals;
+
+import java.io.InputStream;
+import java.util.Map;
+
+import org.apache.tika.detect.TypeDetector;
+import org.apache.tika.metadata.Metadata;
+import org.apache.tika.metadata.TikaCoreProperties;
+import org.apache.tika.parser.AutoDetectParser;
+import org.apache.tika.parser.ParseContext;
+import org.apache.tika.parser.Parser;
+import org.apache.tika.sax.BodyContentHandler;
+import org.junit.Before;
+import org.junit.Test;
+import org.xml.sax.ContentHandler;
+
+public class MboxParserTest {
+
+ protected ParseContext recursingContext;
+ private Parser autoDetectParser;
+ private TypeDetector typeDetector;
+ private MboxParser mboxParser;
+
+ private static InputStream getStream(String name) {
+ return MboxParserTest.class.getClass().getResourceAsStream(name);
+ }
+
+ @Before
+ public void setUp() throws Exception {
+ typeDetector = new TypeDetector();
+ autoDetectParser = new AutoDetectParser(typeDetector);
+ recursingContext = new ParseContext();
+ recursingContext.set(Parser.class, autoDetectParser);
+
+ mboxParser = new MboxParser();
+ mboxParser.setTracking(true);
+ }
+
+ @Test
+ public void testSimple() throws Exception {
+ ContentHandler handler = new BodyContentHandler();
+ Metadata metadata = new Metadata();
+
+ try (InputStream stream = getStream("/test-documents/simple.mbox")) {
+ mboxParser.parse(stream, handler, metadata, recursingContext);
+ }
+
+ String content = handler.toString();
+ assertContains("Test content 1", content);
+ assertContains("Test content 2", content);
+ assertEquals("application/mbox", metadata.get(Metadata.CONTENT_TYPE));
+
+ Map<Integer, Metadata> mailsMetadata = mboxParser.getTrackingMetadata();
+ assertEquals("Nb. Of mails", 2, mailsMetadata.size());
+
+ Metadata mail1 = mailsMetadata.get(0);
+ assertEquals("message/rfc822", mail1.get(Metadata.CONTENT_TYPE));
+ assertEquals("envelope-sender-mailbox-name Mon Jun 01 10:00:00 2009", mail1.get("MboxParser-from"));
+
+ Metadata mail2 = mailsMetadata.get(1);
+ assertEquals("message/rfc822", mail2.get(Metadata.CONTENT_TYPE));
+ assertEquals("envelope-sender-mailbox-name Mon Jun 01 11:00:00 2010", mail2.get("MboxParser-from"));
+ }
+
+ @Test
+ public void testHeaders() throws Exception {
+ ContentHandler handler = new BodyContentHandler();
+ Metadata metadata = new Metadata();
+
+ try (InputStream stream = getStream("/test-documents/headers.mbox")) {
+ mboxParser.parse(stream, handler, metadata, recursingContext);
+ }
+
+ assertContains("Test content", handler.toString());
+ assertEquals("Nb. Of mails", 1, mboxParser.getTrackingMetadata().size());
+
+ Metadata mailMetadata = mboxParser.getTrackingMetadata().get(0);
+
+ assertEquals("2009-06-10T03:58:45Z", mailMetadata.get(TikaCoreProperties.CREATED));
+ assertEquals("<au...@domain.com>", mailMetadata.get(TikaCoreProperties.CREATOR));
+ assertEquals("subject", mailMetadata.get(Metadata.SUBJECT));
+ assertEquals("<au...@domain.com>", mailMetadata.get(Metadata.AUTHOR));
+ assertEquals("message/rfc822", mailMetadata.get(Metadata.CONTENT_TYPE));
+ assertEquals("author@domain.com", mailMetadata.get("Message-From"));
+ assertEquals("<na...@domain.com>", mailMetadata.get("MboxParser-return-path"));
+ }
+
+ @Test
+ public void testMultilineHeader() throws Exception {
+ ContentHandler handler = new BodyContentHandler();
+ Metadata metadata = new Metadata();
+
+ try (InputStream stream = getStream("/test-documents/multiline.mbox")) {
+ mboxParser.parse(stream, handler, metadata, recursingContext);
+ }
+
+ assertEquals("Nb. Of mails", 1, mboxParser.getTrackingMetadata().size());
+
+ Metadata mailMetadata = mboxParser.getTrackingMetadata().get(0);
+ assertEquals("from xxx by xxx with xxx; date", mailMetadata.get("MboxParser-received"));
+ }
+
+ @Test
+ public void testQuoted() throws Exception {
+ ContentHandler handler = new BodyContentHandler();
+ Metadata metadata = new Metadata();
+
+ try (InputStream stream = getStream("/test-documents/quoted.mbox")) {
+ mboxParser.parse(stream, handler, metadata, recursingContext);
+ }
+
+ assertContains("Test content", handler.toString());
+ assertContains("> quoted stuff", handler.toString());
+ }
+
+ @Test
+ public void testComplex() throws Exception {
+ ContentHandler handler = new BodyContentHandler();
+ Metadata metadata = new Metadata();
+
+ try (InputStream stream = getStream("/test-documents/complex.mbox")) {
+ mboxParser.parse(stream, handler, metadata, recursingContext);
+ }
+
+ assertEquals("Nb. Of mails", 3, mboxParser.getTrackingMetadata().size());
+
+ Metadata firstMail = mboxParser.getTrackingMetadata().get(0);
+ assertEquals("Re: question about when shuffle/sort start working", firstMail.get(Metadata.SUBJECT));
+ assertEquals("Re: question about when shuffle/sort start working", firstMail.get(TikaCoreProperties.TITLE));
+ assertEquals("Jothi Padmanabhan <jo...@yahoo-inc.com>", firstMail.get(Metadata.AUTHOR));
+ assertEquals("Jothi Padmanabhan <jo...@yahoo-inc.com>", firstMail.get(TikaCoreProperties.CREATOR));
+ assertEquals("core-user@hadoop.apache.org", firstMail.get(Metadata.MESSAGE_RECIPIENT_ADDRESS));
+
+ assertContains("When a Mapper completes", handler.toString());
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/tika/blob/c7a6bcac/tika-parser-modules/tika-parser-office-module/src/test/java/org/apache/tika/parser/mbox/OutlookPSTParserTest.java
----------------------------------------------------------------------
diff --git a/tika-parser-modules/tika-parser-office-module/src/test/java/org/apache/tika/parser/mbox/OutlookPSTParserTest.java b/tika-parser-modules/tika-parser-office-module/src/test/java/org/apache/tika/parser/mbox/OutlookPSTParserTest.java
index 1d2904c..89a1b86 100644
--- a/tika-parser-modules/tika-parser-office-module/src/test/java/org/apache/tika/parser/mbox/OutlookPSTParserTest.java
+++ b/tika-parser-modules/tika-parser-office-module/src/test/java/org/apache/tika/parser/mbox/OutlookPSTParserTest.java
@@ -1,110 +1,110 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.tika.parser.mbox;
-
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertFalse;
-import static org.junit.Assert.assertTrue;
-
-import java.io.IOException;
-import java.io.InputStream;
-import java.util.ArrayList;
-import java.util.List;
-
-import org.apache.tika.TikaTest;
-import org.apache.tika.extractor.EmbeddedDocumentExtractor;
-import org.apache.tika.extractor.ParsingEmbeddedDocumentExtractor;
-import org.apache.tika.metadata.Metadata;
-import org.apache.tika.metadata.TikaCoreProperties;
-import org.apache.tika.mime.MediaType;
-import org.apache.tika.parser.AutoDetectParser;
-import org.apache.tika.parser.ParseContext;
-import org.apache.tika.parser.Parser;
-import org.apache.tika.sax.ToHTMLContentHandler;
-import org.junit.Test;
-import org.xml.sax.ContentHandler;
-import org.xml.sax.SAXException;
-
-public class OutlookPSTParserTest extends TikaTest {
-
- private Parser parser = new OutlookPSTParser();
-
- @Test
- public void testAccept() throws Exception {
- assertTrue((parser.getSupportedTypes(null).contains(MediaType.application("vnd.ms-outlook-pst"))));
- }
-
- @Test
- public void testParse() throws Exception {
- Parser pstParser = new AutoDetectParser();
- Metadata metadata = new Metadata();
- ContentHandler handler = new ToHTMLContentHandler();
-
- ParseContext context = new ParseContext();
- EmbeddedTrackingExtrator trackingExtrator = new EmbeddedTrackingExtrator(context);
- context.set(EmbeddedDocumentExtractor.class, trackingExtrator);
- context.set(Parser.class, new AutoDetectParser());
-
- pstParser.parse(getResourceAsStream("/test-documents/testPST.pst"), handler, metadata, context);
-
- String output = handler.toString();
-
- assertFalse(output.isEmpty());
- assertTrue(output.contains("<meta name=\"Content-Length\" content=\"271360\">"));
- assertTrue(output.contains("<meta name=\"Content-Type\" content=\"application/vnd.ms-outlook-pst\">"));
-
- assertTrue(output.contains("<body><div class=\"email-folder\"><h1>"));
- assertTrue(output.contains("<div class=\"embedded\" id=\"<530D9CAC.5080901@gmail.com>\"><h1>Re: Feature Generators</h1>"));
- assertTrue(output.contains("<div class=\"embedded\" id=\"<1393363252.28814.YahooMailNeo@web140906.mail.bf1.yahoo.com>\"><h1>Re: init tokenizer fails: \"Bad type in putfield/putstatic\"</h1>"));
- assertTrue(output.contains("Gary Murphy commented on TIKA-1250:"));
-
- assertTrue(output.contains("<div class=\"email-folder\"><h1>Racine (pour la recherche)</h1>"));
-
-
- List<Metadata> metaList = trackingExtrator.trackingMetadata;
- assertEquals(6, metaList.size());
-
- Metadata firstMail = metaList.get(0);
- assertEquals("J�rn Kottmann", firstMail.get(TikaCoreProperties.CREATOR));
- assertEquals("Re: Feature Generators", firstMail.get(TikaCoreProperties.TITLE));
- assertEquals("kottmann@gmail.com", firstMail.get("senderEmailAddress"));
- assertEquals("users@opennlp.apache.org", firstMail.get("displayTo"));
- assertEquals("", firstMail.get("displayCC"));
- assertEquals("", firstMail.get("displayBCC"));
- }
-
-
- private class EmbeddedTrackingExtrator extends ParsingEmbeddedDocumentExtractor {
- List<Metadata> trackingMetadata = new ArrayList<Metadata>();
-
- public EmbeddedTrackingExtrator(ParseContext context) {
- super(context);
- }
-
- @Override
- public boolean shouldParseEmbedded(Metadata metadata) {
- return true;
- }
-
- @Override
- public void parseEmbedded(InputStream stream, ContentHandler handler, Metadata metadata, boolean outputHtml) throws SAXException, IOException {
- this.trackingMetadata.add(metadata);
- super.parseEmbedded(stream, handler, metadata, outputHtml);
- }
-
- }
-}
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.parser.mbox;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertTrue;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.tika.TikaTest;
+import org.apache.tika.extractor.EmbeddedDocumentExtractor;
+import org.apache.tika.extractor.ParsingEmbeddedDocumentExtractor;
+import org.apache.tika.metadata.Metadata;
+import org.apache.tika.metadata.TikaCoreProperties;
+import org.apache.tika.mime.MediaType;
+import org.apache.tika.parser.AutoDetectParser;
+import org.apache.tika.parser.ParseContext;
+import org.apache.tika.parser.Parser;
+import org.apache.tika.sax.ToHTMLContentHandler;
+import org.junit.Test;
+import org.xml.sax.ContentHandler;
+import org.xml.sax.SAXException;
+
+public class OutlookPSTParserTest extends TikaTest {
+
+ private Parser parser = new OutlookPSTParser();
+
+ @Test
+ public void testAccept() throws Exception {
+ assertTrue((parser.getSupportedTypes(null).contains(MediaType.application("vnd.ms-outlook-pst"))));
+ }
+
+ @Test
+ public void testParse() throws Exception {
+ Parser pstParser = new AutoDetectParser();
+ Metadata metadata = new Metadata();
+ ContentHandler handler = new ToHTMLContentHandler();
+
+ ParseContext context = new ParseContext();
+ EmbeddedTrackingExtrator trackingExtrator = new EmbeddedTrackingExtrator(context);
+ context.set(EmbeddedDocumentExtractor.class, trackingExtrator);
+ context.set(Parser.class, new AutoDetectParser());
+
+ pstParser.parse(getResourceAsStream("/test-documents/testPST.pst"), handler, metadata, context);
+
+ String output = handler.toString();
+
+ assertFalse(output.isEmpty());
+ assertTrue(output.contains("<meta name=\"Content-Length\" content=\"271360\">"));
+ assertTrue(output.contains("<meta name=\"Content-Type\" content=\"application/vnd.ms-outlook-pst\">"));
+
+ assertTrue(output.contains("<body><div class=\"email-folder\"><h1>"));
+ assertTrue(output.contains("<div class=\"embedded\" id=\"<530D9CAC.5080901@gmail.com>\"><h1>Re: Feature Generators</h1>"));
+ assertTrue(output.contains("<div class=\"embedded\" id=\"<1393363252.28814.YahooMailNeo@web140906.mail.bf1.yahoo.com>\"><h1>Re: init tokenizer fails: \"Bad type in putfield/putstatic\"</h1>"));
+ assertTrue(output.contains("Gary Murphy commented on TIKA-1250:"));
+
+ assertTrue(output.contains("<div class=\"email-folder\"><h1>Racine (pour la recherche)</h1>"));
+
+
+ List<Metadata> metaList = trackingExtrator.trackingMetadata;
+ assertEquals(6, metaList.size());
+
+ Metadata firstMail = metaList.get(0);
+ assertEquals("J�rn Kottmann", firstMail.get(TikaCoreProperties.CREATOR));
+ assertEquals("Re: Feature Generators", firstMail.get(TikaCoreProperties.TITLE));
+ assertEquals("kottmann@gmail.com", firstMail.get("senderEmailAddress"));
+ assertEquals("users@opennlp.apache.org", firstMail.get("displayTo"));
+ assertEquals("", firstMail.get("displayCC"));
+ assertEquals("", firstMail.get("displayBCC"));
+ }
+
+
+ private class EmbeddedTrackingExtrator extends ParsingEmbeddedDocumentExtractor {
+ List<Metadata> trackingMetadata = new ArrayList<Metadata>();
+
+ public EmbeddedTrackingExtrator(ParseContext context) {
+ super(context);
+ }
+
+ @Override
+ public boolean shouldParseEmbedded(Metadata metadata) {
+ return true;
+ }
+
+ @Override
+ public void parseEmbedded(InputStream stream, ContentHandler handler, Metadata metadata, boolean outputHtml) throws SAXException, IOException {
+ this.trackingMetadata.add(metadata);
+ super.parseEmbedded(stream, handler, metadata, outputHtml);
+ }
+
+ }
+}
http://git-wip-us.apache.org/repos/asf/tika/blob/c7a6bcac/tika-parser-modules/tika-parser-office-module/src/test/java/org/apache/tika/parser/microsoft/AbstractPOIContainerExtractionTest.java
----------------------------------------------------------------------
diff --git a/tika-parser-modules/tika-parser-office-module/src/test/java/org/apache/tika/parser/microsoft/AbstractPOIContainerExtractionTest.java b/tika-parser-modules/tika-parser-office-module/src/test/java/org/apache/tika/parser/microsoft/AbstractPOIContainerExtractionTest.java
index 2b3d141..f454446 100644
--- a/tika-parser-modules/tika-parser-office-module/src/test/java/org/apache/tika/parser/microsoft/AbstractPOIContainerExtractionTest.java
+++ b/tika-parser-modules/tika-parser-office-module/src/test/java/org/apache/tika/parser/microsoft/AbstractPOIContainerExtractionTest.java
@@ -1,75 +1,75 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.tika.parser.microsoft;
-
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertNotNull;
-
-import java.net.URL;
-
-import org.apache.tika.TikaTest;
-import org.apache.tika.extractor.ContainerExtractor;
-import org.apache.tika.io.TikaInputStream;
-import org.apache.tika.mime.MediaType;
-
-/**
- * Parent class of tests that the various POI powered parsers are
- * able to extract their embedded contents.
- */
-public abstract class AbstractPOIContainerExtractionTest extends TikaTest {
- public static final MediaType TYPE_DOC = MediaType.application("msword");
- public static final MediaType TYPE_PPT = MediaType.application("vnd.ms-powerpoint");
- public static final MediaType TYPE_XLS = MediaType.application("vnd.ms-excel");
- public static final MediaType TYPE_DOCX = MediaType.application("vnd.openxmlformats-officedocument.wordprocessingml.document");
- public static final MediaType TYPE_PPTX = MediaType.application("vnd.openxmlformats-officedocument.presentationml.presentation");
- public static final MediaType TYPE_XLSX = MediaType.application("vnd.openxmlformats-officedocument.spreadsheetml.sheet");
- public static final MediaType TYPE_MSG = MediaType.application("vnd.ms-outlook");
-
- public static final MediaType TYPE_TXT = MediaType.text("plain");
- public static final MediaType TYPE_PDF = MediaType.application("pdf");
-
- public static final MediaType TYPE_JPG = MediaType.image("jpeg");
- public static final MediaType TYPE_GIF = MediaType.image("gif");
- public static final MediaType TYPE_PNG = MediaType.image("png");
- public static final MediaType TYPE_EMF = MediaType.application("x-emf");
- public static final MediaType TYPE_WMF = MediaType.application("x-msmetafile");
-
- protected static TikaInputStream getTestFile(String filename) throws Exception {
- URL input = AbstractPOIContainerExtractionTest.class.getResource(
- "/test-documents/" + filename);
- assertNotNull(filename + " not found", input);
-
- return TikaInputStream.get(input);
- }
-
- protected TrackingHandler process(String filename, ContainerExtractor extractor, boolean recurse) throws Exception {
- try (TikaInputStream stream = getTestFile(filename)) {
- assertEquals(true, extractor.isSupported(stream));
-
- // Process it
- TrackingHandler handler = new TrackingHandler();
- if (recurse) {
- extractor.extract(stream, extractor, handler);
- } else {
- extractor.extract(stream, null, handler);
- }
-
- // So they can check what happened
- return handler;
- }
- }
-}
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.parser.microsoft;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNotNull;
+
+import java.net.URL;
+
+import org.apache.tika.TikaTest;
+import org.apache.tika.extractor.ContainerExtractor;
+import org.apache.tika.io.TikaInputStream;
+import org.apache.tika.mime.MediaType;
+
+/**
+ * Parent class of tests that the various POI powered parsers are
+ * able to extract their embedded contents.
+ */
+public abstract class AbstractPOIContainerExtractionTest extends TikaTest {
+ public static final MediaType TYPE_DOC = MediaType.application("msword");
+ public static final MediaType TYPE_PPT = MediaType.application("vnd.ms-powerpoint");
+ public static final MediaType TYPE_XLS = MediaType.application("vnd.ms-excel");
+ public static final MediaType TYPE_DOCX = MediaType.application("vnd.openxmlformats-officedocument.wordprocessingml.document");
+ public static final MediaType TYPE_PPTX = MediaType.application("vnd.openxmlformats-officedocument.presentationml.presentation");
+ public static final MediaType TYPE_XLSX = MediaType.application("vnd.openxmlformats-officedocument.spreadsheetml.sheet");
+ public static final MediaType TYPE_MSG = MediaType.application("vnd.ms-outlook");
+
+ public static final MediaType TYPE_TXT = MediaType.text("plain");
+ public static final MediaType TYPE_PDF = MediaType.application("pdf");
+
+ public static final MediaType TYPE_JPG = MediaType.image("jpeg");
+ public static final MediaType TYPE_GIF = MediaType.image("gif");
+ public static final MediaType TYPE_PNG = MediaType.image("png");
+ public static final MediaType TYPE_EMF = MediaType.application("x-emf");
+ public static final MediaType TYPE_WMF = MediaType.application("x-msmetafile");
+
+ protected static TikaInputStream getTestFile(String filename) throws Exception {
+ URL input = AbstractPOIContainerExtractionTest.class.getResource(
+ "/test-documents/" + filename);
+ assertNotNull(filename + " not found", input);
+
+ return TikaInputStream.get(input);
+ }
+
+ protected TrackingHandler process(String filename, ContainerExtractor extractor, boolean recurse) throws Exception {
+ try (TikaInputStream stream = getTestFile(filename)) {
+ assertEquals(true, extractor.isSupported(stream));
+
+ // Process it
+ TrackingHandler handler = new TrackingHandler();
+ if (recurse) {
+ extractor.extract(stream, extractor, handler);
+ } else {
+ extractor.extract(stream, null, handler);
+ }
+
+ // So they can check what happened
+ return handler;
+ }
+ }
+}