You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ta...@apache.org on 2020/08/17 17:49:46 UTC

[tika] branch branch_2x updated: Proof of concept for tika-parser-integration-tests module

This is an automated email from the ASF dual-hosted git repository.

tallison pushed a commit to branch branch_2x
in repository https://gitbox.apache.org/repos/asf/tika.git


The following commit(s) were added to refs/heads/branch_2x by this push:
     new a8677bc  Proof of concept for tika-parser-integration-tests module
a8677bc is described below

commit a8677bc4afd699641b2a12085a1abaecc0aca200
Author: tallison <ta...@apache.org>
AuthorDate: Mon Aug 17 13:49:25 2020 -0400

    Proof of concept for tika-parser-integration-tests module
---
 pom.xml                                            |   2 +-
 .../apache/tika/sax/StoppingEarlyException.java    |  24 +--
 tika-parser-integration-tests/pom.xml              |  63 +++++++
 .../apache/tika/parser/tests/pkg/ArParserTest.java |  62 +++----
 .../tika/parser/tests}/pkg/Bzip2ParserTest.java    |  42 +----
 .../pkg/CompositeZipContainerDetectorTest.java     |  19 +-
 .../tika/parser/tests}/pkg/CompressParserTest.java |  52 +-----
 .../parser/tests/pkg/CompressorParserTest.java     |  65 +++++++
 .../tika/parser/tests}/pkg/GzipParserTest.java     |  41 +----
 .../tika/parser/tests/pkg/RarParserTest.java       |  55 ++----
 .../tika/parser/tests}/pkg/Seven7ParserTest.java   |  62 +------
 .../tika/parser/tests/pkg/TarParserTest.java       |  57 ++----
 .../tika/parser/tests}/pkg/ZipParserTest.java      | 122 ++----------
 .../tika/parser/tests}/pkg/ZlibParserTest.java     |  37 +---
 .../detect/microsoft/ooxml/OPCPackageDetector.java |   4 +-
 ...rg.apache.tika.detect.zip.ZipContainerDetector} |   0
 tika-parser-modules/tika-parser-pkg-module/pom.xml |   8 -
 .../org/apache/tika/parser/pkg/ArParserTest.java   |  27 ---
 .../apache/tika/parser/pkg/Bzip2ParserTest.java    |  33 ----
 .../apache/tika/parser/pkg/CompressParserTest.java |  39 ----
 .../tika/parser/pkg/CompressorParserTest.java      |  32 ----
 .../org/apache/tika/parser/pkg/GzipParserTest.java |  52 +-----
 .../org/apache/tika/parser/pkg/RarParserTest.java  |  32 ----
 .../apache/tika/parser/pkg/Seven7ParserTest.java   | 141 --------------
 .../org/apache/tika/parser/pkg/TarParserTest.java  |  32 ----
 .../org/apache/tika/parser/pkg/ZipParserTest.java  |  94 ----------
 .../org/apache/tika/parser/pkg/ZlibParserTest.java |  15 --
 .../resources/test-documents/full_encrypted.7z     | Bin
 .../src/test/resources/test-documents/moby.zip     | Bin
 .../test-documents/test7Z_protected_passTika.7z    | Bin
 .../test-documents/testBROTLI_compressed.br        | Bin
 .../test/resources/test-documents/testEmbedded.zip | Bin
 .../resources/test-documents/testJAR_with_HTML.jar | Bin
 .../test-documents/testJAR_with_PEHDR.jar          | Bin
 .../resources/test-documents/testSnappy-framed.sz  | Bin
 .../src/test/resources/test-documents/testTXT.zlib | Bin
 .../test/resources/test-documents/testTXT.zlib0    | Bin
 .../test/resources/test-documents/testTXT.zlib5    | Bin
 .../test/resources/test-documents/testTXT.zlib9    | Bin
 .../tika-parser-text-module/pom.xml                |  18 ++
 ...ector.java => DefaultZipContainerDetector.java} |  69 +++++--
 ...> DeprecatedStreamingZipContainerDetector.java} |   2 +-
 .../org/apache/tika/detect/zip/IPADetector.java    |  54 +++++-
 .../org/apache/tika/detect/zip/JarDetector.java    |  50 ++++-
 .../org/apache/tika/detect/zip/KMZDetector.java    |  48 ++++-
 .../tika/detect/zip/OpenDocumentDetector.java      |  22 ++-
 .../apache/tika/detect/zip/StarOfficeDetector.java |  97 +++++++++-
 .../tika/detect/zip/StreamingDetectContext.java    |  62 +++++++
 .../tika/detect/zip/ZipContainerDetector.java      | 204 +++------------------
 .../tika/detect/zip/ZipContainerDetectorBase.java  |  69 +------
 .../services/org.apache.tika.detect.Detector       |  15 ++
 ...org.apache.tika.detect.zip.ZipContainerDetector |  19 ++
 .../org.apache.tika.detect.zip.ZipDetector         |   5 -
 .../org/apache/tika/detect/zip/ZipParserTest.java} |  37 ++--
 .../src/test/resources/test-documents/testJAR.jar  | Bin
 .../src/test/resources/test-documents/testKMZ.kmz  | Bin
 .../parser/fork/ForkParserIntegrationTest.java     |   4 +-
 57 files changed, 744 insertions(+), 1243 deletions(-)

diff --git a/pom.xml b/pom.xml
index e8e6cc7..eec4d3e 100644
--- a/pom.xml
+++ b/pom.xml
@@ -39,6 +39,7 @@
     <module>tika-core</module>
     <module>tika-parsers</module>
     <module>tika-parser-modules</module>
+    <module>tika-parser-integration-tests</module>
     <module>tika-bundle</module>
     <module>tika-xmp</module>
     <module>tika-serialization</module>
@@ -53,7 +54,6 @@
     <module>tika-eval</module>
     <module>tika-dl</module>
     <module>tika-nlp</module>
-    <module>tika-parser-zip-commons</module>
   </modules>
 
   <profiles>
diff --git a/tika-parser-modules/tika-parser-zip-commons/src/main/java/org/apache/tika/detect/zip/ZipDetector.java b/tika-core/src/main/java/org/apache/tika/sax/StoppingEarlyException.java
similarity index 59%
rename from tika-parser-modules/tika-parser-zip-commons/src/main/java/org/apache/tika/detect/zip/ZipDetector.java
rename to tika-core/src/main/java/org/apache/tika/sax/StoppingEarlyException.java
index 07a6c9b..c79dd80 100644
--- a/tika-parser-modules/tika-parser-zip-commons/src/main/java/org/apache/tika/detect/zip/ZipDetector.java
+++ b/tika-core/src/main/java/org/apache/tika/sax/StoppingEarlyException.java
@@ -14,23 +14,17 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-package org.apache.tika.detect.zip;
 
-import org.apache.commons.compress.archivers.zip.ZipFile;
-import org.apache.tika.io.TikaInputStream;
-import org.apache.tika.mime.MediaType;
+package org.apache.tika.sax;
 
-import java.io.IOException;
+import org.xml.sax.SAXException;
 
-public interface ZipDetector {
+/**
+ * Sentinel exception to stop parsing xml once target is found
+ * while SAX parsing. This should be used when the parse
+ * can be stopped and the exception ignored.
+ */
+public class StoppingEarlyException extends SAXException {
 
-    /**
-     * If detection is successful, the ZipDetector should set the zip
-     * file or OPCPackage in TikaInputStream.setOpenContainer()
-     * @param zipFile
-     * @param tis
-     * @return
-     * @throws IOException
-     */
-    MediaType detect(ZipFile zipFile, TikaInputStream tis) throws IOException;
+    public static final StoppingEarlyException INSTANCE = new StoppingEarlyException();
 }
diff --git a/tika-parser-integration-tests/pom.xml b/tika-parser-integration-tests/pom.xml
new file mode 100644
index 0000000..d951902
--- /dev/null
+++ b/tika-parser-integration-tests/pom.xml
@@ -0,0 +1,63 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project xmlns="http://maven.apache.org/POM/4.0.0"
+         xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+    <parent>
+        <artifactId>tika-parent</artifactId>
+        <groupId>org.apache.tika</groupId>
+        <version>2.0.0-SNAPSHOT</version>
+        <relativePath>../tika-parent/pom.xml</relativePath>
+    </parent>
+    <modelVersion>4.0.0</modelVersion>
+
+    <artifactId>tika-parser-integration-tests</artifactId>
+
+    <dependencies>
+        <!-- test dependencies -->
+        <dependency>
+            <groupId>${project.groupId}</groupId>
+            <artifactId>tika-core</artifactId>
+            <version>${project.version}</version>
+            <scope>test</scope>
+        </dependency>
+        <dependency>
+            <groupId>junit</groupId>
+            <artifactId>junit</artifactId>
+            <scope>test</scope>
+        </dependency>
+        <dependency>
+            <groupId>${project.groupId}</groupId>
+            <artifactId>tika-core</artifactId>
+            <version>${project.version}</version>
+            <type>test-jar</type>
+            <scope>test</scope>
+        </dependency>
+        <dependency>
+            <groupId>${project.groupId}</groupId>
+            <artifactId>tika-parser-pkg-module</artifactId>
+            <version>${project.version}</version>
+            <scope>test</scope>
+        </dependency>
+        <dependency>
+            <groupId>${project.groupId}</groupId>
+            <artifactId>tika-parser-text-module</artifactId>
+            <version>${project.version}</version>
+            <scope>test</scope>
+        </dependency>
+        <dependency>
+            <groupId>${project.groupId}</groupId>
+            <artifactId>tika-parser-pkg-module</artifactId>
+            <version>${project.version}</version>
+            <type>test-jar</type>
+            <scope>test</scope>
+        </dependency>
+        <dependency>
+            <groupId>${project.groupId}</groupId>
+            <artifactId>tika-parser-text-module</artifactId>
+            <version>${project.version}</version>
+            <type>test-jar</type>
+            <scope>test</scope>
+        </dependency>
+    </dependencies>
+
+</project>
\ No newline at end of file
diff --git a/tika-parser-modules/tika-parser-pkg-module/src/test/java/org/apache/tika/parser/pkg/ZlibParserTest.java b/tika-parser-integration-tests/src/test/java/org/apache/tika/parser/tests/pkg/ArParserTest.java
similarity index 52%
copy from tika-parser-modules/tika-parser-pkg-module/src/test/java/org/apache/tika/parser/pkg/ZlibParserTest.java
copy to tika-parser-integration-tests/src/test/java/org/apache/tika/parser/tests/pkg/ArParserTest.java
index 5d2faaf..d977b64 100644
--- a/tika-parser-modules/tika-parser-pkg-module/src/test/java/org/apache/tika/parser/pkg/ZlibParserTest.java
+++ b/tika-parser-integration-tests/src/test/java/org/apache/tika/parser/tests/pkg/ArParserTest.java
@@ -1,4 +1,4 @@
-/*
+/**
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
  * this work for additional information regarding copyright ownership.
@@ -14,60 +14,48 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-package org.apache.tika.parser.pkg;
 
-import static org.junit.Assert.assertEquals;
-
-import java.io.InputStream;
+package org.apache.tika.parser.tests.pkg;
 
 import org.apache.tika.metadata.Metadata;
+import org.apache.tika.parser.pkg.AbstractPkgTest;
 import org.apache.tika.sax.BodyContentHandler;
 import org.junit.Test;
 import org.xml.sax.ContentHandler;
 
-/**
- * Test case for parsing zlib compressed
- */
-public class ZlibParserTest extends AbstractPkgTest {
+import java.io.InputStream;
+
+import static org.junit.Assert.assertEquals;
+
+
+public class ArParserTest extends AbstractPkgTest {
+
     @Test
-    public void testZlibParsing() throws Exception {
+    public void testArParsing() throws Exception {
+
         ContentHandler handler = new BodyContentHandler();
         Metadata metadata = new Metadata();
 
-        try (InputStream stream = ZipParserTest.class.getResourceAsStream(
-                "/test-documents/testTXT.zlib")) {
+        try (InputStream stream = ArParserTest.class.getResourceAsStream(
+                "/test-documents/testARofText.ar")) {
             AUTO_DETECT_PARSER.parse(stream, handler, metadata, recursingContext);
         }
 
-        assertEquals("application/zlib", metadata.get(Metadata.CONTENT_TYPE));
+        assertEquals("application/x-archive",
+                metadata.get(Metadata.CONTENT_TYPE));
         String content = handler.toString();
+        assertContains("testTXT.txt", content);
         assertContains("Test d'indexation de Txt", content);
         assertContains("http://www.apache.org", content);
-    }
 
-    /**
-     * Tests that the ParseContext parser is correctly
-     *  fired for all the embedded entries.
-     */
-    @Test
-    public void testEmbedded() throws Exception {
-       ContentHandler handler = new BodyContentHandler();
-       Metadata metadata = new Metadata();
-
-        try (InputStream stream = ZipParserTest.class.getResourceAsStream(
-                "/test-documents/testTXT.zlib")) {
-            AUTO_DETECT_PARSER.parse(stream, handler, metadata, trackingContext);
+        try (InputStream stream = ArParserTest.class.getResourceAsStream(
+                "/test-documents/testARofSND.ar")) {
+            AUTO_DETECT_PARSER.parse(stream, handler, metadata, recursingContext);
         }
-       
-       // Should have found a single text document inside
-       assertEquals(1, tracker.filenames.size());
-       assertEquals(1, tracker.mediatypes.size());
-       assertEquals(1, tracker.modifiedAts.size());
-       
-       // Won't have names, dates or types, as zlib doesn't have that 
-       assertEquals(null, tracker.filenames.get(0));
-       assertEquals(null, tracker.mediatypes.get(0));
-       assertEquals(null, tracker.createdAts.get(0));
-       assertEquals(null, tracker.modifiedAts.get(0));
+
+        assertEquals("application/x-archive",
+                metadata.get(Metadata.CONTENT_TYPE));
+        content = handler.toString();
+        assertContains("testAU.au", content);
     }
 }
diff --git a/tika-parser-modules/tika-parser-pkg-module/src/test/java/org/apache/tika/parser/pkg/Bzip2ParserTest.java b/tika-parser-integration-tests/src/test/java/org/apache/tika/parser/tests/pkg/Bzip2ParserTest.java
similarity index 71%
copy from tika-parser-modules/tika-parser-pkg-module/src/test/java/org/apache/tika/parser/pkg/Bzip2ParserTest.java
copy to tika-parser-integration-tests/src/test/java/org/apache/tika/parser/tests/pkg/Bzip2ParserTest.java
index b85b2e6..3b03eea 100644
--- a/tika-parser-modules/tika-parser-pkg-module/src/test/java/org/apache/tika/parser/pkg/Bzip2ParserTest.java
+++ b/tika-parser-integration-tests/src/test/java/org/apache/tika/parser/tests/pkg/Bzip2ParserTest.java
@@ -14,18 +14,20 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-package org.apache.tika.parser.pkg;
-
-import static java.nio.charset.StandardCharsets.US_ASCII;
-import static org.junit.Assert.assertEquals;
-
-import java.io.InputStream;
+package org.apache.tika.parser.tests.pkg;
 
 import org.apache.tika.metadata.Metadata;
+import org.apache.tika.parser.pkg.AbstractPkgTest;
+import org.apache.tika.parser.pkg.ZipParserTest;
 import org.apache.tika.sax.BodyContentHandler;
 import org.junit.Test;
 import org.xml.sax.ContentHandler;
 
+import java.io.InputStream;
+
+import static java.nio.charset.StandardCharsets.US_ASCII;
+import static org.junit.Assert.assertEquals;
+
 /**
  * Test case for parsing bzip2 files.
  */
@@ -63,32 +65,4 @@ public class Bzip2ParserTest extends AbstractPkgTest {
         assertContains("Rida Benjelloun", content);
     }
 
-
-    /**
-     * Tests that the ParseContext parser is correctly
-     *  fired for all the embedded entries.
-     */
-    @Test
-    public void testEmbedded() throws Exception {
-       ContentHandler handler = new BodyContentHandler();
-       Metadata metadata = new Metadata();
-
-        try (InputStream stream = ZipParserTest.class.getResourceAsStream(
-                "/test-documents/test-documents.tbz2")) {
-            AUTO_DETECT_PARSER.parse(stream, handler, metadata, trackingContext);
-        }
-       
-       // Should find a single entry, for the (compressed) tar file
-       assertEquals(1, tracker.filenames.size());
-       assertEquals(1, tracker.mediatypes.size());
-       assertEquals(1, tracker.modifiedAts.size());
-       
-       assertEquals(null, tracker.filenames.get(0));
-       assertEquals(null, tracker.mediatypes.get(0));
-       assertEquals(null, tracker.createdAts.get(0));
-       assertEquals(null, tracker.modifiedAts.get(0));
-
-       // Tar file starts with the directory name
-       assertEquals("test-documents/", new String(tracker.lastSeenStart, 0, 15, US_ASCII));
-    }
 }
diff --git a/tika-parser-modules/tika-parser-pkg-module/src/test/java/org/apache/tika/parser/pkg/ZipContainerDetectorTest.java b/tika-parser-integration-tests/src/test/java/org/apache/tika/parser/tests/pkg/CompositeZipContainerDetectorTest.java
similarity index 92%
rename from tika-parser-modules/tika-parser-pkg-module/src/test/java/org/apache/tika/parser/pkg/ZipContainerDetectorTest.java
rename to tika-parser-integration-tests/src/test/java/org/apache/tika/parser/tests/pkg/CompositeZipContainerDetectorTest.java
index 2bf49ab..ebdda9e 100644
--- a/tika-parser-modules/tika-parser-pkg-module/src/test/java/org/apache/tika/parser/pkg/ZipContainerDetectorTest.java
+++ b/tika-parser-integration-tests/src/test/java/org/apache/tika/parser/tests/pkg/CompositeZipContainerDetectorTest.java
@@ -15,7 +15,7 @@
  * limitations under the License.
  */
 
-package org.apache.tika.parser.pkg;
+package org.apache.tika.parser.tests.pkg;
 
 
 import static org.junit.Assert.assertEquals;
@@ -34,8 +34,8 @@ import java.util.Set;
 import org.apache.tika.TikaTest;
 import org.apache.tika.config.TikaConfig;
 import org.apache.tika.detect.Detector;
-import org.apache.tika.detect.zip.StreamingZipContainerDetector;
-import org.apache.tika.detect.zip.ZipContainerDetector;
+import org.apache.tika.detect.zip.DeprecatedStreamingZipContainerDetector;
+import org.apache.tika.detect.zip.DefaultZipContainerDetector;
 import org.apache.tika.io.TikaInputStream;
 import org.apache.tika.metadata.Metadata;
 import org.apache.tika.mime.MediaType;
@@ -43,26 +43,27 @@ import org.apache.tika.mime.MediaTypeRegistry;
 import org.junit.Ignore;
 import org.junit.Test;
 
-public class ZipContainerDetectorTest extends TikaTest {
+public class CompositeZipContainerDetectorTest extends TikaTest {
     private static MediaType ODT_TEXT = MediaType.application("vnd.oasis.opendocument.text");
     private static MediaType TIFF = MediaType.image("tiff");
-    ZipContainerDetector zipContainerDetector = new ZipContainerDetector();
-    StreamingZipContainerDetector streamingZipDetector = new StreamingZipContainerDetector();
-/*
+    DefaultZipContainerDetector compositeZipContainerDetector = new DefaultZipContainerDetector();
+    DeprecatedStreamingZipContainerDetector streamingZipDetector = new DeprecatedStreamingZipContainerDetector();
+
     @Test
     public void testTiffWorkaround() throws Exception {
         //TIKA-2591
         Metadata metadata = new Metadata();
         try (InputStream is = TikaInputStream.get(getResourceAsStream("/test-documents/testTIFF.tif"))) {
-            MediaType mt = zipContainerDetector.detect(is, metadata);
+            MediaType mt = compositeZipContainerDetector.detect(is, metadata);
             assertEquals(TIFF, mt);
         }
         metadata = new Metadata();
         try (InputStream is = TikaInputStream.get(getResourceAsStream("/test-documents/testTIFF_multipage.tif"))) {
-            MediaType mt = zipContainerDetector.detect(is, metadata);
+            MediaType mt = compositeZipContainerDetector.detect(is, metadata);
             assertEquals(TIFF, mt);
         }
     }
+/* TODO these tests!
 
     @Test
     public void testODT() throws Exception {
diff --git a/tika-parser-modules/tika-parser-pkg-module/src/test/java/org/apache/tika/parser/pkg/CompressParserTest.java b/tika-parser-integration-tests/src/test/java/org/apache/tika/parser/tests/pkg/CompressParserTest.java
similarity index 70%
copy from tika-parser-modules/tika-parser-pkg-module/src/test/java/org/apache/tika/parser/pkg/CompressParserTest.java
copy to tika-parser-integration-tests/src/test/java/org/apache/tika/parser/tests/pkg/CompressParserTest.java
index a62bbee..76050de 100644
--- a/tika-parser-modules/tika-parser-pkg-module/src/test/java/org/apache/tika/parser/pkg/CompressParserTest.java
+++ b/tika-parser-integration-tests/src/test/java/org/apache/tika/parser/tests/pkg/CompressParserTest.java
@@ -14,20 +14,23 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-package org.apache.tika.parser.pkg;
-
-import static java.nio.charset.StandardCharsets.US_ASCII;
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.fail;
-
-import java.io.InputStream;
+package org.apache.tika.parser.tests.pkg;
 
 import org.apache.tika.exception.TikaMemoryLimitException;
 import org.apache.tika.metadata.Metadata;
+import org.apache.tika.parser.pkg.AbstractPkgTest;
+import org.apache.tika.parser.pkg.TarParserTest;
+import org.apache.tika.parser.pkg.ZipParserTest;
 import org.apache.tika.sax.BodyContentHandler;
 import org.junit.Test;
 import org.xml.sax.ContentHandler;
 
+import java.io.InputStream;
+
+import static java.nio.charset.StandardCharsets.US_ASCII;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.fail;
+
 /**
  * Test case for parsing compress (.Z) files.
  */
@@ -63,31 +66,6 @@ public class CompressParserTest extends AbstractPkgTest {
         assertContains("Rida Benjelloun", content);
     }
 
-    /**
-     * Tests that the ParseContext parser is correctly
-     *  fired for all the embedded entries.
-     */
-    @Test
-    public void testEmbedded() throws Exception {
-       ContentHandler handler = new BodyContentHandler();
-       Metadata metadata = new Metadata();
-
-        try (InputStream stream = ZipParserTest.class.getResourceAsStream("/test-documents/test-documents.tar.Z")) {
-           AUTO_DETECT_PARSER.parse(stream, handler, metadata, trackingContext);
-        }
-       
-       // Should find a single entry, for the (compressed) tar file
-       assertEquals(1, tracker.filenames.size());
-       assertEquals(1, tracker.mediatypes.size());
-       assertEquals(1, tracker.modifiedAts.size());
-       
-       assertEquals(null, tracker.filenames.get(0));
-       assertEquals(null, tracker.mediatypes.get(0));
-       assertEquals(null, tracker.modifiedAts.get(0));
-
-       // Tar file starts with the directory name
-       assertEquals("test-documents/", new String(tracker.lastSeenStart, 0, 15, US_ASCII));
-    }
 
     @Test
     public void testLZMAOOM() throws Exception {
@@ -98,14 +76,4 @@ public class CompressParserTest extends AbstractPkgTest {
         }
     }
 
-    @Test
-    public void testCompressOOM() throws Exception {
-        try {
-            XMLResult r = getXML("testZ_oom.Z");
-            fail("should have thrown TikaMemoryLimitException");
-        } catch (TikaMemoryLimitException e) {
-        }
-    }
-
-
 }
\ No newline at end of file
diff --git a/tika-parser-integration-tests/src/test/java/org/apache/tika/parser/tests/pkg/CompressorParserTest.java b/tika-parser-integration-tests/src/test/java/org/apache/tika/parser/tests/pkg/CompressorParserTest.java
new file mode 100644
index 0000000..8618f2b
--- /dev/null
+++ b/tika-parser-integration-tests/src/test/java/org/apache/tika/parser/tests/pkg/CompressorParserTest.java
@@ -0,0 +1,65 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.tika.parser.tests.pkg;
+
+
+import org.apache.tika.TikaTest;
+import org.apache.tika.metadata.Metadata;
+import org.apache.tika.metadata.TikaCoreProperties;
+import org.apache.tika.sax.AbstractRecursiveParserWrapperHandler;
+import org.junit.Test;
+
+import java.util.List;
+
+import static org.junit.Assert.assertEquals;
+
+public class CompressorParserTest extends TikaTest {
+
+
+    @Test
+    public void testLZ4Framed() throws Exception {
+        XMLResult r = getXML("testLZ4-framed.lz4");
+        assertEquals("application/x-lz4", r.metadata.get(Metadata.CONTENT_TYPE));
+        //xml parser throws an exception for test1.xml
+        //for now, be content that the container file is correctly identified
+        assertContains("test1.xml", r.xml);
+    }
+
+    @Test
+    public void testZstd() throws Exception {
+        XMLResult r = getXML("testZSTD.zstd");
+        assertContains("0123456789", r.xml);
+    }
+
+    @Test
+    public void testSnappyFramed() throws Exception {
+        XMLResult r = getXML("testSnappy-framed.sz");
+        assertEquals("application/x-snappy", r.metadata.get(Metadata.CONTENT_TYPE));
+        assertContains("Lorem ipsum dolor sit amet", r.xml);
+    }
+
+    @Test
+    public void testBrotli() throws Exception {
+        Metadata metadata = new Metadata();
+        metadata.set(TikaCoreProperties.RESOURCE_NAME_KEY, "testBROTLI_compressed.br");
+        List<Metadata> metadataList = getRecursiveMetadata("testBROTLI_compressed.br", metadata);
+
+        assertContains("XXXXXXXXXXYYYYYYYYYY", metadataList.get(1).get(AbstractRecursiveParserWrapperHandler.TIKA_CONTENT));
+        assertEquals("testBROTLI_compressed", metadataList.get(1).get(TikaCoreProperties.RESOURCE_NAME_KEY));
+    }
+}
diff --git a/tika-parser-modules/tika-parser-pkg-module/src/test/java/org/apache/tika/parser/pkg/GzipParserTest.java b/tika-parser-integration-tests/src/test/java/org/apache/tika/parser/tests/pkg/GzipParserTest.java
similarity index 75%
copy from tika-parser-modules/tika-parser-pkg-module/src/test/java/org/apache/tika/parser/pkg/GzipParserTest.java
copy to tika-parser-integration-tests/src/test/java/org/apache/tika/parser/tests/pkg/GzipParserTest.java
index 91dc8c2..58ca3f9 100644
--- a/tika-parser-modules/tika-parser-pkg-module/src/test/java/org/apache/tika/parser/pkg/GzipParserTest.java
+++ b/tika-parser-integration-tests/src/test/java/org/apache/tika/parser/tests/pkg/GzipParserTest.java
@@ -14,18 +14,20 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-package org.apache.tika.parser.pkg;
-
-import static java.nio.charset.StandardCharsets.US_ASCII;
-import static org.junit.Assert.assertEquals;
-
-import java.io.InputStream;
+package org.apache.tika.parser.tests.pkg;
 
 import org.apache.tika.metadata.Metadata;
+import org.apache.tika.parser.pkg.AbstractPkgTest;
+import org.apache.tika.parser.pkg.ZipParserTest;
 import org.apache.tika.sax.BodyContentHandler;
 import org.junit.Test;
 import org.xml.sax.ContentHandler;
 
+import java.io.InputStream;
+
+import static java.nio.charset.StandardCharsets.US_ASCII;
+import static org.junit.Assert.assertEquals;
+
 /**
  * Test case for parsing gzip files.
  */
@@ -62,33 +64,6 @@ public class GzipParserTest extends AbstractPkgTest {
         assertContains("test-documents/testXML.xml", content);
         assertContains("Rida Benjelloun", content);
     }
-
-    /**
-     * Tests that the ParseContext parser is correctly
-     *  fired for all the embedded entries.
-     */
-    @Test
-    public void testEmbedded() throws Exception {
-       ContentHandler handler = new BodyContentHandler();
-       Metadata metadata = new Metadata();
-
-        try (InputStream stream = ZipParserTest.class.getResourceAsStream(
-                "/test-documents/test-documents.tgz")) {
-            AUTO_DETECT_PARSER.parse(stream, handler, metadata, trackingContext);
-        }
-       
-       // Should find a single entry, for the (compressed) tar file
-       assertEquals(1, tracker.filenames.size());
-       assertEquals(1, tracker.mediatypes.size());
-       assertEquals(1, tracker.modifiedAts.size());
-       
-       assertEquals(null, tracker.filenames.get(0));
-       assertEquals(null, tracker.mediatypes.get(0));
-       assertEquals(null, tracker.modifiedAts.get(0));
-
-       // Tar file starts with the directory name
-       assertEquals("test-documents/", new String(tracker.lastSeenStart, 0, 15, US_ASCII));
-    }
     
     @Test
     public void testSvgzParsing() throws Exception {
diff --git a/tika-parser-modules/tika-parser-pkg-module/src/test/java/org/apache/tika/parser/pkg/Bzip2ParserTest.java b/tika-parser-integration-tests/src/test/java/org/apache/tika/parser/tests/pkg/RarParserTest.java
similarity index 60%
copy from tika-parser-modules/tika-parser-pkg-module/src/test/java/org/apache/tika/parser/pkg/Bzip2ParserTest.java
copy to tika-parser-integration-tests/src/test/java/org/apache/tika/parser/tests/pkg/RarParserTest.java
index b85b2e6..66facd0 100644
--- a/tika-parser-modules/tika-parser-pkg-module/src/test/java/org/apache/tika/parser/pkg/Bzip2ParserTest.java
+++ b/tika-parser-integration-tests/src/test/java/org/apache/tika/parser/tests/pkg/RarParserTest.java
@@ -14,34 +14,35 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-package org.apache.tika.parser.pkg;
-
-import static java.nio.charset.StandardCharsets.US_ASCII;
-import static org.junit.Assert.assertEquals;
-
-import java.io.InputStream;
+package org.apache.tika.parser.tests.pkg;
 
 import org.apache.tika.metadata.Metadata;
+import org.apache.tika.parser.pkg.AbstractPkgTest;
 import org.apache.tika.sax.BodyContentHandler;
 import org.junit.Test;
 import org.xml.sax.ContentHandler;
 
+import java.io.InputStream;
+
+import static org.junit.Assert.assertEquals;
+
+
 /**
- * Test case for parsing bzip2 files.
+ * Test case for parsing rar files.
  */
-public class Bzip2ParserTest extends AbstractPkgTest {
+public class RarParserTest extends AbstractPkgTest {
 
     @Test
-    public void testBzip2Parsing() throws Exception {
+    public void testRarParsing() throws Exception {
         ContentHandler handler = new BodyContentHandler();
         Metadata metadata = new Metadata();
 
-        try (InputStream stream = Bzip2ParserTest.class.getResourceAsStream(
-                "/test-documents/test-documents.tbz2")) {
+        try (InputStream stream = RarParserTest.class.getResourceAsStream(
+                "/test-documents/test-documents.rar")) {
             AUTO_DETECT_PARSER.parse(stream, handler, metadata, recursingContext);
         }
 
-        assertEquals("application/x-bzip2", metadata.get(Metadata.CONTENT_TYPE));
+        assertEquals("application/x-rar-compressed; version=4", metadata.get(Metadata.CONTENT_TYPE));
         String content = handler.toString();
         assertContains("test-documents/testEXCEL.xls", content);
         assertContains("Sample Excel Worksheet", content);
@@ -63,32 +64,4 @@ public class Bzip2ParserTest extends AbstractPkgTest {
         assertContains("Rida Benjelloun", content);
     }
 
-
-    /**
-     * Tests that the ParseContext parser is correctly
-     *  fired for all the embedded entries.
-     */
-    @Test
-    public void testEmbedded() throws Exception {
-       ContentHandler handler = new BodyContentHandler();
-       Metadata metadata = new Metadata();
-
-        try (InputStream stream = ZipParserTest.class.getResourceAsStream(
-                "/test-documents/test-documents.tbz2")) {
-            AUTO_DETECT_PARSER.parse(stream, handler, metadata, trackingContext);
-        }
-       
-       // Should find a single entry, for the (compressed) tar file
-       assertEquals(1, tracker.filenames.size());
-       assertEquals(1, tracker.mediatypes.size());
-       assertEquals(1, tracker.modifiedAts.size());
-       
-       assertEquals(null, tracker.filenames.get(0));
-       assertEquals(null, tracker.mediatypes.get(0));
-       assertEquals(null, tracker.createdAts.get(0));
-       assertEquals(null, tracker.modifiedAts.get(0));
-
-       // Tar file starts with the directory name
-       assertEquals("test-documents/", new String(tracker.lastSeenStart, 0, 15, US_ASCII));
-    }
-}
+}
\ No newline at end of file
diff --git a/tika-parser-modules/tika-parser-pkg-module/src/test/java/org/apache/tika/parser/pkg/Seven7ParserTest.java b/tika-parser-integration-tests/src/test/java/org/apache/tika/parser/tests/pkg/Seven7ParserTest.java
similarity index 80%
copy from tika-parser-modules/tika-parser-pkg-module/src/test/java/org/apache/tika/parser/pkg/Seven7ParserTest.java
copy to tika-parser-integration-tests/src/test/java/org/apache/tika/parser/tests/pkg/Seven7ParserTest.java
index 2fc6841..829f7ee 100644
--- a/tika-parser-modules/tika-parser-pkg-module/src/test/java/org/apache/tika/parser/pkg/Seven7ParserTest.java
+++ b/tika-parser-integration-tests/src/test/java/org/apache/tika/parser/tests/pkg/Seven7ParserTest.java
@@ -14,28 +14,26 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-package org.apache.tika.parser.pkg;
-
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertNotNull;
-import static org.junit.Assert.assertNull;
-import static org.junit.Assert.assertTrue;
-import static org.junit.Assert.fail;
-
-import javax.crypto.Cipher;
-
-import java.io.InputStream;
-import java.security.NoSuchAlgorithmException;
+package org.apache.tika.parser.tests.pkg;
 
 import org.apache.tika.exception.EncryptedDocumentException;
 import org.apache.tika.exception.TikaException;
 import org.apache.tika.metadata.Metadata;
 import org.apache.tika.mime.MediaType;
 import org.apache.tika.parser.PasswordProvider;
+import org.apache.tika.parser.pkg.AbstractPkgTest;
 import org.apache.tika.sax.BodyContentHandler;
 import org.junit.Test;
 import org.xml.sax.ContentHandler;
 
+import javax.crypto.Cipher;
+import java.io.InputStream;
+import java.security.NoSuchAlgorithmException;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
+import static org.junit.Assert.fail;
+
 /**
  * Test case for parsing 7z files.
  */
@@ -74,46 +72,6 @@ public class Seven7ParserTest extends AbstractPkgTest {
         assertContains("Rida Benjelloun", content);
     }
 
-    /**
-     * Tests that the ParseContext parser is correctly
-     *  fired for all the embedded entries.
-     */
-    @Test
-    public void testEmbedded() throws Exception {
-       ContentHandler handler = new BodyContentHandler();
-       Metadata metadata = new Metadata();
-
-        try (InputStream stream = Seven7ParserTest.class.getResourceAsStream(
-                "/test-documents/test-documents.7z")) {
-            AUTO_DETECT_PARSER.parse(stream, handler, metadata, trackingContext);
-        }
-       
-       // Should have found all 9 documents, but not the directory
-       assertEquals(9, tracker.filenames.size());
-       assertEquals(9, tracker.mediatypes.size());
-       assertEquals(9, tracker.modifiedAts.size());
-       
-       // Should have names but not content types, as 7z doesn't
-       //  store the content types
-       assertEquals("test-documents/testEXCEL.xls", tracker.filenames.get(0));
-       assertEquals("test-documents/testHTML.html", tracker.filenames.get(1));
-       assertEquals("test-documents/testOpenOffice2.odt", tracker.filenames.get(2));
-       assertEquals("test-documents/testPDF.pdf", tracker.filenames.get(3));
-       assertEquals("test-documents/testPPT.ppt", tracker.filenames.get(4));
-       assertEquals("test-documents/testRTF.rtf", tracker.filenames.get(5));
-       assertEquals("test-documents/testTXT.txt", tracker.filenames.get(6));
-       assertEquals("test-documents/testWORD.doc", tracker.filenames.get(7));
-       assertEquals("test-documents/testXML.xml", tracker.filenames.get(8));
-       
-       for(String type : tracker.mediatypes) {
-          assertNull(type);
-       }
-       for(String mod : tracker.modifiedAts) {
-           assertNotNull(mod);
-           assertTrue("Modified at " + mod, mod.startsWith("20"));
-       }
-    }
-
     @Test
     public void testPasswordProtected() throws Exception {
         ContentHandler handler = new BodyContentHandler();
diff --git a/tika-parser-modules/tika-parser-pkg-module/src/test/java/org/apache/tika/parser/pkg/Bzip2ParserTest.java b/tika-parser-integration-tests/src/test/java/org/apache/tika/parser/tests/pkg/TarParserTest.java
similarity index 60%
copy from tika-parser-modules/tika-parser-pkg-module/src/test/java/org/apache/tika/parser/pkg/Bzip2ParserTest.java
copy to tika-parser-integration-tests/src/test/java/org/apache/tika/parser/tests/pkg/TarParserTest.java
index b85b2e6..c523f11 100644
--- a/tika-parser-modules/tika-parser-pkg-module/src/test/java/org/apache/tika/parser/pkg/Bzip2ParserTest.java
+++ b/tika-parser-integration-tests/src/test/java/org/apache/tika/parser/tests/pkg/TarParserTest.java
@@ -14,34 +14,38 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-package org.apache.tika.parser.pkg;
-
-import static java.nio.charset.StandardCharsets.US_ASCII;
-import static org.junit.Assert.assertEquals;
-
-import java.io.InputStream;
+package org.apache.tika.parser.tests.pkg;
 
 import org.apache.tika.metadata.Metadata;
+import org.apache.tika.parser.pkg.AbstractPkgTest;
+import org.apache.tika.parser.pkg.ZipParserTest;
 import org.apache.tika.sax.BodyContentHandler;
 import org.junit.Test;
 import org.xml.sax.ContentHandler;
 
+import java.io.InputStream;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNotNull;
+import static org.junit.Assert.assertNull;
+import static org.junit.Assert.assertTrue;
+
 /**
- * Test case for parsing bzip2 files.
+ * Test case for parsing tar files.
  */
-public class Bzip2ParserTest extends AbstractPkgTest {
+public class TarParserTest extends AbstractPkgTest {
 
     @Test
-    public void testBzip2Parsing() throws Exception {
+    public void testTarParsing() throws Exception {
         ContentHandler handler = new BodyContentHandler();
         Metadata metadata = new Metadata();
 
-        try (InputStream stream = Bzip2ParserTest.class.getResourceAsStream(
-                "/test-documents/test-documents.tbz2")) {
+        try (InputStream stream = TarParserTest.class.getResourceAsStream(
+                "/test-documents/test-documents.tar")) {
             AUTO_DETECT_PARSER.parse(stream, handler, metadata, recursingContext);
         }
 
-        assertEquals("application/x-bzip2", metadata.get(Metadata.CONTENT_TYPE));
+        assertEquals("application/x-gtar", metadata.get(Metadata.CONTENT_TYPE));
         String content = handler.toString();
         assertContains("test-documents/testEXCEL.xls", content);
         assertContains("Sample Excel Worksheet", content);
@@ -62,33 +66,4 @@ public class Bzip2ParserTest extends AbstractPkgTest {
         assertContains("test-documents/testXML.xml", content);
         assertContains("Rida Benjelloun", content);
     }
-
-
-    /**
-     * Tests that the ParseContext parser is correctly
-     *  fired for all the embedded entries.
-     */
-    @Test
-    public void testEmbedded() throws Exception {
-       ContentHandler handler = new BodyContentHandler();
-       Metadata metadata = new Metadata();
-
-        try (InputStream stream = ZipParserTest.class.getResourceAsStream(
-                "/test-documents/test-documents.tbz2")) {
-            AUTO_DETECT_PARSER.parse(stream, handler, metadata, trackingContext);
-        }
-       
-       // Should find a single entry, for the (compressed) tar file
-       assertEquals(1, tracker.filenames.size());
-       assertEquals(1, tracker.mediatypes.size());
-       assertEquals(1, tracker.modifiedAts.size());
-       
-       assertEquals(null, tracker.filenames.get(0));
-       assertEquals(null, tracker.mediatypes.get(0));
-       assertEquals(null, tracker.createdAts.get(0));
-       assertEquals(null, tracker.modifiedAts.get(0));
-
-       // Tar file starts with the directory name
-       assertEquals("test-documents/", new String(tracker.lastSeenStart, 0, 15, US_ASCII));
-    }
 }
diff --git a/tika-parser-modules/tika-parser-pkg-module/src/test/java/org/apache/tika/parser/pkg/ZipParserTest.java b/tika-parser-integration-tests/src/test/java/org/apache/tika/parser/tests/pkg/ZipParserTest.java
similarity index 59%
copy from tika-parser-modules/tika-parser-pkg-module/src/test/java/org/apache/tika/parser/pkg/ZipParserTest.java
copy to tika-parser-integration-tests/src/test/java/org/apache/tika/parser/tests/pkg/ZipParserTest.java
index 25fcfb1..a3ca42c 100644
--- a/tika-parser-modules/tika-parser-pkg-module/src/test/java/org/apache/tika/parser/pkg/ZipParserTest.java
+++ b/tika-parser-integration-tests/src/test/java/org/apache/tika/parser/tests/pkg/ZipParserTest.java
@@ -14,17 +14,7 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-package org.apache.tika.parser.pkg;
-
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertNotNull;
-import static org.junit.Assert.assertNull;
-import static org.junit.Assert.assertTrue;
-
-import java.io.InputStream;
-import java.util.HashSet;
-import java.util.List;
-import java.util.Set;
+package org.apache.tika.parser.tests.pkg;
 
 import org.apache.commons.codec.binary.Base64;
 import org.apache.commons.compress.archivers.ArchiveStreamFactory;
@@ -36,12 +26,23 @@ import org.apache.tika.metadata.HttpHeaders;
 import org.apache.tika.metadata.Metadata;
 import org.apache.tika.metadata.TikaCoreProperties;
 import org.apache.tika.parser.ParseContext;
+import org.apache.tika.parser.pkg.AbstractPkgTest;
 import org.apache.tika.sax.AbstractRecursiveParserWrapperHandler;
 import org.apache.tika.sax.BodyContentHandler;
 import org.junit.Test;
 import org.xml.sax.ContentHandler;
 import org.xml.sax.helpers.DefaultHandler;
 
+import java.io.InputStream;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Set;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNotNull;
+import static org.junit.Assert.assertNull;
+import static org.junit.Assert.assertTrue;
+
 /**
  * Test case for parsing zip files.
  */
@@ -79,67 +80,9 @@ public class ZipParserTest extends AbstractPkgTest {
         assertContains("Rida Benjelloun", content);
     }
 
-    /**
-     * Tests that the ParseContext parser is correctly
-     *  fired for all the embedded entries.
-     */
-    @Test
-    public void testEmbedded() throws Exception {
-       ContentHandler handler = new BodyContentHandler();
-       Metadata metadata = new Metadata();
-
-        try (InputStream stream = ZipParserTest.class.getResourceAsStream(
-                "/test-documents/test-documents.zip")) {
-            AUTO_DETECT_PARSER.parse(stream, handler, metadata, trackingContext);
-        }
-       
-       // Should have found all 9 documents
-       assertEquals(9, tracker.filenames.size());
-       assertEquals(9, tracker.mediatypes.size());
-       assertEquals(9, tracker.modifiedAts.size());
-       
-       // Should have names and modified dates, but not content types, 
-       //  as zip doesn't store the content types
-       assertEquals("testEXCEL.xls", tracker.filenames.get(0));
-       assertEquals("testHTML.html", tracker.filenames.get(1));
-       assertEquals("testOpenOffice2.odt", tracker.filenames.get(2));
-       assertEquals("testPDF.pdf", tracker.filenames.get(3));
-       assertEquals("testPPT.ppt", tracker.filenames.get(4));
-       assertEquals("testRTF.rtf", tracker.filenames.get(5));
-       assertEquals("testTXT.txt", tracker.filenames.get(6));
-       assertEquals("testWORD.doc", tracker.filenames.get(7));
-       assertEquals("testXML.xml", tracker.filenames.get(8));
-       
-       for(String type : tracker.mediatypes) {
-          assertNull(type);
-       }
-       for(String crt : tracker.createdAts) {
-           assertNull(crt);
-       }
-       for(String mod : tracker.modifiedAts) {
-           assertNotNull(mod);
-           assertTrue("Modified at " + mod, mod.startsWith("20"));
-       }
-    }
-
-    /**
-     * Test case for the ability of the ZIP parser to extract the name of
-     * a ZIP entry even if the content of the entry is unreadable due to an
-     * unsupported compression method.
-     *
-     * @see <a href="https://issues.apache.org/jira/browse/TIKA-346">TIKA-346</a>
-     */
-    @Test
-    public void testUnsupportedZipCompressionMethod() throws Exception {
-        String content = new Tika().parseToString(
-                ZipParserTest.class.getResourceAsStream(
-                        "/test-documents/moby.zip"));
-        assertContains("README", content);
-    }
-
     private class GatherRelIDsDocumentExtractor implements EmbeddedDocumentExtractor {
         public Set<String> allRelIDs = new HashSet<String>();
-        public boolean shouldParseEmbedded(Metadata metadata) {      
+        public boolean shouldParseEmbedded(Metadata metadata) {
             String relID = metadata.get(TikaCoreProperties.EMBEDDED_RELATIONSHIP_ID);
             if (relID != null) {
                 allRelIDs.add(relID);
@@ -175,27 +118,6 @@ public class ZipParserTest extends AbstractPkgTest {
         assertTrue(relIDs.allRelIDs.contains("test2.txt"));
     }
 
-    @Test // TIKA-936
-    public void testCustomEncoding() throws Exception {
-        ArchiveStreamFactory factory = new ArchiveStreamFactory();
-        factory.setEntryEncoding("SJIS");
-        trackingContext.set(ArchiveStreamFactory.class, factory);
-
-        try (InputStream stream = TikaInputStream.get(Base64.decodeBase64(
-                "UEsDBBQAAAAIAI+CvUCDo3+zIgAAACgAAAAOAAAAk/qWe4zqg4GDgi50"
-                        + "eHRr2tj0qulsc2pzRHN609Gm7Y1OvFxNYLHJv6ZV97yCiQEAUEsBAh"
-                        + "QLFAAAAAgAj4K9QIOjf7MiAAAAKAAAAA4AAAAAAAAAAAAgAAAAAAAA"
-                        + "AJP6lnuM6oOBg4IudHh0UEsFBgAAAAABAAEAPAAAAE4AAAAAAA=="))) {
-            AUTO_DETECT_PARSER.parse(
-                    stream, new DefaultHandler(),
-                    new Metadata(), trackingContext);
-        }
-
-        assertEquals(1, tracker.filenames.size());
-        assertEquals(
-                "\u65E5\u672C\u8A9E\u30E1\u30E2.txt",
-                tracker.filenames.get(0));
-    }
 
     @Test
     public void testZipEncrypted() throws Exception {
@@ -211,18 +133,6 @@ public class ZipParserTest extends AbstractPkgTest {
     }
 
     @Test
-    public void testKMZDetection() throws Exception {
-        List<Metadata> metadataList = getRecursiveMetadata("testKMZ.kmz");
-        assertEquals("application/vnd.google-earth.kmz", metadataList.get(0).get(HttpHeaders.CONTENT_TYPE));
-    }
-
-    @Test
-    public void testJARDetection() throws Exception {
-        List<Metadata> metadataList = getRecursiveMetadata("testJAR.jar");
-        assertEquals("application/java-archive", metadataList.get(0).get(HttpHeaders.CONTENT_TYPE));
-    }
-
-    @Test
     public void testQuineRecursiveParserWrapper() throws Exception {
         //received permission from author via dm
         //2019-07-25 to include
@@ -231,10 +141,4 @@ public class ZipParserTest extends AbstractPkgTest {
         //the original file name
         getRecursiveMetadata("droste.zip");
     }
-
-    @Test(expected = TikaException.class)
-    public void testQuine() throws Exception {
-        getXML("droste.zip");
-    }
-
 }
diff --git a/tika-parser-modules/tika-parser-pkg-module/src/test/java/org/apache/tika/parser/pkg/ZlibParserTest.java b/tika-parser-integration-tests/src/test/java/org/apache/tika/parser/tests/pkg/ZlibParserTest.java
similarity index 62%
copy from tika-parser-modules/tika-parser-pkg-module/src/test/java/org/apache/tika/parser/pkg/ZlibParserTest.java
copy to tika-parser-integration-tests/src/test/java/org/apache/tika/parser/tests/pkg/ZlibParserTest.java
index 5d2faaf..f7cd7a3 100644
--- a/tika-parser-modules/tika-parser-pkg-module/src/test/java/org/apache/tika/parser/pkg/ZlibParserTest.java
+++ b/tika-parser-integration-tests/src/test/java/org/apache/tika/parser/tests/pkg/ZlibParserTest.java
@@ -14,17 +14,18 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-package org.apache.tika.parser.pkg;
-
-import static org.junit.Assert.assertEquals;
-
-import java.io.InputStream;
+package org.apache.tika.parser.tests.pkg;
 
 import org.apache.tika.metadata.Metadata;
+import org.apache.tika.parser.pkg.AbstractPkgTest;
 import org.apache.tika.sax.BodyContentHandler;
 import org.junit.Test;
 import org.xml.sax.ContentHandler;
 
+import java.io.InputStream;
+
+import static org.junit.Assert.assertEquals;
+
 /**
  * Test case for parsing zlib compressed
  */
@@ -44,30 +45,4 @@ public class ZlibParserTest extends AbstractPkgTest {
         assertContains("Test d'indexation de Txt", content);
         assertContains("http://www.apache.org", content);
     }
-
-    /**
-     * Tests that the ParseContext parser is correctly
-     *  fired for all the embedded entries.
-     */
-    @Test
-    public void testEmbedded() throws Exception {
-       ContentHandler handler = new BodyContentHandler();
-       Metadata metadata = new Metadata();
-
-        try (InputStream stream = ZipParserTest.class.getResourceAsStream(
-                "/test-documents/testTXT.zlib")) {
-            AUTO_DETECT_PARSER.parse(stream, handler, metadata, trackingContext);
-        }
-       
-       // Should have found a single text document inside
-       assertEquals(1, tracker.filenames.size());
-       assertEquals(1, tracker.mediatypes.size());
-       assertEquals(1, tracker.modifiedAts.size());
-       
-       // Won't have names, dates or types, as zlib doesn't have that 
-       assertEquals(null, tracker.filenames.get(0));
-       assertEquals(null, tracker.mediatypes.get(0));
-       assertEquals(null, tracker.createdAts.get(0));
-       assertEquals(null, tracker.modifiedAts.get(0));
-    }
 }
diff --git a/tika-parser-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/detect/microsoft/ooxml/OPCPackageDetector.java b/tika-parser-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/detect/microsoft/ooxml/OPCPackageDetector.java
index b27155d..7b709bf 100644
--- a/tika-parser-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/detect/microsoft/ooxml/OPCPackageDetector.java
+++ b/tika-parser-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/detect/microsoft/ooxml/OPCPackageDetector.java
@@ -8,7 +8,7 @@ import org.apache.poi.openxml4j.opc.PackageRelationshipCollection;
 import org.apache.poi.openxml4j.opc.PackageRelationshipTypes;
 import org.apache.poi.openxml4j.util.ZipEntrySource;
 import org.apache.poi.openxml4j.util.ZipFileZipEntrySource;
-import org.apache.tika.detect.zip.ZipDetector;
+import org.apache.tika.detect.zip.ZipContainerDetector;
 import org.apache.tika.io.TikaInputStream;
 import org.apache.tika.mime.MediaType;
 
@@ -16,7 +16,7 @@ import java.io.IOException;
 import java.util.Locale;
 import java.util.regex.Pattern;
 
-public class OPCPackageDetector implements ZipDetector {
+public class OPCPackageDetector implements ZipContainerDetector {
 
 
     private static final Pattern MACRO_TEMPLATE_PATTERN = Pattern.compile("macroenabledtemplate$", Pattern.CASE_INSENSITIVE);
diff --git a/tika-parser-modules/tika-parser-microsoft-module/src/main/resources/META-INF/services/org.apache.tika.detect.zip.ZipDetector b/tika-parser-modules/tika-parser-microsoft-module/src/main/resources/META-INF/services/org.apache.tika.detect.zip.ZipContainerDetector
similarity index 100%
rename from tika-parser-modules/tika-parser-microsoft-module/src/main/resources/META-INF/services/org.apache.tika.detect.zip.ZipDetector
rename to tika-parser-modules/tika-parser-microsoft-module/src/main/resources/META-INF/services/org.apache.tika.detect.zip.ZipContainerDetector
diff --git a/tika-parser-modules/tika-parser-pkg-module/pom.xml b/tika-parser-modules/tika-parser-pkg-module/pom.xml
index 43b3a57..40ada9b 100644
--- a/tika-parser-modules/tika-parser-pkg-module/pom.xml
+++ b/tika-parser-modules/tika-parser-pkg-module/pom.xml
@@ -45,14 +45,6 @@
             <artifactId>junrar</artifactId>
             <version>4.0.0</version>
             <exclusions>
-                <exclusion>
-                    <groupId>commons-logging</groupId>
-                    <artifactId>commons-logging</artifactId>
-                </exclusion>
-                <exclusion>
-                    <groupId>commons-logging</groupId>
-                    <artifactId>commons-logging-api</artifactId>
-                </exclusion>
                 <!-- TIKA-2504 exclude to avoid vulnerability in plexus-utils -->
                 <exclusion>
                     <groupId>org.apache.commons</groupId>
diff --git a/tika-parser-modules/tika-parser-pkg-module/src/test/java/org/apache/tika/parser/pkg/ArParserTest.java b/tika-parser-modules/tika-parser-pkg-module/src/test/java/org/apache/tika/parser/pkg/ArParserTest.java
index c29fb60..579540c 100644
--- a/tika-parser-modules/tika-parser-pkg-module/src/test/java/org/apache/tika/parser/pkg/ArParserTest.java
+++ b/tika-parser-modules/tika-parser-pkg-module/src/test/java/org/apache/tika/parser/pkg/ArParserTest.java
@@ -29,34 +29,7 @@ import org.junit.Test;
 import org.xml.sax.ContentHandler;
 
 public class ArParserTest extends AbstractPkgTest {
-    @Test
-    public void testArParsing() throws Exception {
-
-        ContentHandler handler = new BodyContentHandler();
-        Metadata metadata = new Metadata();
-
-        try (InputStream stream = ArParserTest.class.getResourceAsStream(
-                "/test-documents/testARofText.ar")) {
-            AUTO_DETECT_PARSER.parse(stream, handler, metadata, recursingContext);
-        }
-
-        assertEquals("application/x-archive",
-                metadata.get(Metadata.CONTENT_TYPE));
-        String content = handler.toString();
-        assertContains("testTXT.txt", content);
-        assertContains("Test d'indexation de Txt", content);
-        assertContains("http://www.apache.org", content);
 
-        try (InputStream stream = ArParserTest.class.getResourceAsStream(
-                "/test-documents/testARofSND.ar")) {
-            AUTO_DETECT_PARSER.parse(stream, handler, metadata, recursingContext);
-        }
-
-        assertEquals("application/x-archive",
-                     metadata.get(Metadata.CONTENT_TYPE));
-        content = handler.toString();
-        assertContains("testAU.au", content);
-    }
 
     /**
      * Tests that the ParseContext parser is correctly fired for all the
diff --git a/tika-parser-modules/tika-parser-pkg-module/src/test/java/org/apache/tika/parser/pkg/Bzip2ParserTest.java b/tika-parser-modules/tika-parser-pkg-module/src/test/java/org/apache/tika/parser/pkg/Bzip2ParserTest.java
index b85b2e6..14da361 100644
--- a/tika-parser-modules/tika-parser-pkg-module/src/test/java/org/apache/tika/parser/pkg/Bzip2ParserTest.java
+++ b/tika-parser-modules/tika-parser-pkg-module/src/test/java/org/apache/tika/parser/pkg/Bzip2ParserTest.java
@@ -31,39 +31,6 @@ import org.xml.sax.ContentHandler;
  */
 public class Bzip2ParserTest extends AbstractPkgTest {
 
-    @Test
-    public void testBzip2Parsing() throws Exception {
-        ContentHandler handler = new BodyContentHandler();
-        Metadata metadata = new Metadata();
-
-        try (InputStream stream = Bzip2ParserTest.class.getResourceAsStream(
-                "/test-documents/test-documents.tbz2")) {
-            AUTO_DETECT_PARSER.parse(stream, handler, metadata, recursingContext);
-        }
-
-        assertEquals("application/x-bzip2", metadata.get(Metadata.CONTENT_TYPE));
-        String content = handler.toString();
-        assertContains("test-documents/testEXCEL.xls", content);
-        assertContains("Sample Excel Worksheet", content);
-        assertContains("test-documents/testHTML.html", content);
-        assertContains("Test Indexation Html", content);
-        assertContains("test-documents/testOpenOffice2.odt", content);
-        assertContains("This is a sample Open Office document", content);
-        assertContains("test-documents/testPDF.pdf", content);
-        assertContains("Apache Tika", content);
-        assertContains("test-documents/testPPT.ppt", content);
-        assertContains("Sample Powerpoint Slide", content);
-        assertContains("test-documents/testRTF.rtf", content);
-        assertContains("indexation Word", content);
-        assertContains("test-documents/testTXT.txt", content);
-        assertContains("Test d'indexation de Txt", content);
-        assertContains("test-documents/testWORD.doc", content);
-        assertContains("This is a sample Microsoft Word Document", content);
-        assertContains("test-documents/testXML.xml", content);
-        assertContains("Rida Benjelloun", content);
-    }
-
-
     /**
      * Tests that the ParseContext parser is correctly
      *  fired for all the embedded entries.
diff --git a/tika-parser-modules/tika-parser-pkg-module/src/test/java/org/apache/tika/parser/pkg/CompressParserTest.java b/tika-parser-modules/tika-parser-pkg-module/src/test/java/org/apache/tika/parser/pkg/CompressParserTest.java
index a62bbee..04aba66 100644
--- a/tika-parser-modules/tika-parser-pkg-module/src/test/java/org/apache/tika/parser/pkg/CompressParserTest.java
+++ b/tika-parser-modules/tika-parser-pkg-module/src/test/java/org/apache/tika/parser/pkg/CompressParserTest.java
@@ -32,36 +32,6 @@ import org.xml.sax.ContentHandler;
  * Test case for parsing compress (.Z) files.
  */
 public class CompressParserTest extends AbstractPkgTest {
-    @Test
-    public void testCompressParsing() throws Exception {
-        ContentHandler handler = new BodyContentHandler();
-        Metadata metadata = new Metadata();
-
-        try (InputStream stream = TarParserTest.class.getResourceAsStream("/test-documents/test-documents.tar.Z")) {
-            AUTO_DETECT_PARSER.parse(stream, handler, metadata, recursingContext);
-        }
-
-        assertEquals("application/x-compress", metadata.get(Metadata.CONTENT_TYPE));
-        String content = handler.toString();
-        assertContains("test-documents/testEXCEL.xls", content);
-        assertContains("Sample Excel Worksheet", content);
-        assertContains("test-documents/testHTML.html", content);
-        assertContains("Test Indexation Html", content);
-        assertContains("test-documents/testOpenOffice2.odt", content);
-        assertContains("This is a sample Open Office document", content);
-        assertContains("test-documents/testPDF.pdf", content);
-        assertContains("Apache Tika", content);
-        assertContains("test-documents/testPPT.ppt", content);
-        assertContains("Sample Powerpoint Slide", content);
-        assertContains("test-documents/testRTF.rtf", content);
-        assertContains("indexation Word", content);
-        assertContains("test-documents/testTXT.txt", content);
-        assertContains("Test d'indexation de Txt", content);
-        assertContains("test-documents/testWORD.doc", content);
-        assertContains("This is a sample Microsoft Word Document", content);
-        assertContains("test-documents/testXML.xml", content);
-        assertContains("Rida Benjelloun", content);
-    }
 
     /**
      * Tests that the ParseContext parser is correctly
@@ -90,15 +60,6 @@ public class CompressParserTest extends AbstractPkgTest {
     }
 
     @Test
-    public void testLZMAOOM() throws Exception {
-        try {
-            XMLResult r = getXML("testLZMA_oom");
-            fail("should have thrown TikaMemoryLimitException");
-        } catch (TikaMemoryLimitException e) {
-        }
-    }
-
-    @Test
     public void testCompressOOM() throws Exception {
         try {
             XMLResult r = getXML("testZ_oom.Z");
diff --git a/tika-parser-modules/tika-parser-pkg-module/src/test/java/org/apache/tika/parser/pkg/CompressorParserTest.java b/tika-parser-modules/tika-parser-pkg-module/src/test/java/org/apache/tika/parser/pkg/CompressorParserTest.java
index 8826c50..5e221f0 100644
--- a/tika-parser-modules/tika-parser-pkg-module/src/test/java/org/apache/tika/parser/pkg/CompressorParserTest.java
+++ b/tika-parser-modules/tika-parser-pkg-module/src/test/java/org/apache/tika/parser/pkg/CompressorParserTest.java
@@ -59,38 +59,6 @@ public class CompressorParserTest extends TikaTest {
     }
 
     @Test
-    public void testSnappyFramed() throws Exception {
-        XMLResult r = getXML("testSnappy-framed.sz");
-        assertEquals("application/x-snappy", r.metadata.get(Metadata.CONTENT_TYPE));
-        assertContains("Lorem ipsum dolor sit amet", r.xml);
-    }
-
-    @Test
-    public void testLZ4Framed() throws Exception {
-        XMLResult r = getXML("testLZ4-framed.lz4");
-        assertEquals("application/x-lz4", r.metadata.get(Metadata.CONTENT_TYPE));
-        //xml parser throws an exception for test1.xml
-        //for now, be content that the container file is correctly identified
-        assertContains("test1.xml", r.xml);
-    }
-
-    @Test
-    public void testZstd() throws Exception {
-        XMLResult r = getXML("testZSTD.zstd");
-        assertContains("0123456789", r.xml);
-    }
-
-    @Test
-    public void testBrotli() throws Exception {
-        Metadata metadata = new Metadata();
-        metadata.set(TikaCoreProperties.RESOURCE_NAME_KEY, "testBROTLI_compressed.br");
-        List<Metadata> metadataList = getRecursiveMetadata("testBROTLI_compressed.br", metadata);
-
-        assertContains("XXXXXXXXXXYYYYYYYYYY", metadataList.get(1).get(AbstractRecursiveParserWrapperHandler.TIKA_CONTENT));
-        assertEquals("testBROTLI_compressed", metadataList.get(1).get(TikaCoreProperties.RESOURCE_NAME_KEY));
-    }
-
-    @Test
     public void testCoverage() throws Exception {
         //test that the package parser covers all inputstreams handled
         //by CompressorStreamFactory.  When we update commons-compress, and they add
diff --git a/tika-parser-modules/tika-parser-pkg-module/src/test/java/org/apache/tika/parser/pkg/GzipParserTest.java b/tika-parser-modules/tika-parser-pkg-module/src/test/java/org/apache/tika/parser/pkg/GzipParserTest.java
index 91dc8c2..c8ace4e 100644
--- a/tika-parser-modules/tika-parser-pkg-module/src/test/java/org/apache/tika/parser/pkg/GzipParserTest.java
+++ b/tika-parser-modules/tika-parser-pkg-module/src/test/java/org/apache/tika/parser/pkg/GzipParserTest.java
@@ -31,38 +31,6 @@ import org.xml.sax.ContentHandler;
  */
 public class GzipParserTest extends AbstractPkgTest {
 
-    @Test
-    public void testGzipParsing() throws Exception {
-        ContentHandler handler = new BodyContentHandler();
-        Metadata metadata = new Metadata();
-
-        try (InputStream stream = GzipParserTest.class.getResourceAsStream(
-                "/test-documents/test-documents.tgz")) {
-            AUTO_DETECT_PARSER.parse(stream, handler, metadata, recursingContext);
-        }
-
-        assertEquals("application/gzip", metadata.get(Metadata.CONTENT_TYPE));
-        String content = handler.toString();
-        assertContains("test-documents/testEXCEL.xls", content);
-        assertContains("Sample Excel Worksheet", content);
-        assertContains("test-documents/testHTML.html", content);
-        assertContains("Test Indexation Html", content);
-        assertContains("test-documents/testOpenOffice2.odt", content);
-        assertContains("This is a sample Open Office document", content);
-        assertContains("test-documents/testPDF.pdf", content);
-        assertContains("Apache Tika", content);
-        assertContains("test-documents/testPPT.ppt", content);
-        assertContains("Sample Powerpoint Slide", content);
-        assertContains("test-documents/testRTF.rtf", content);
-        assertContains("indexation Word", content);
-        assertContains("test-documents/testTXT.txt", content);
-        assertContains("Test d'indexation de Txt", content);
-        assertContains("test-documents/testWORD.doc", content);
-        assertContains("This is a sample Microsoft Word Document", content);
-        assertContains("test-documents/testXML.xml", content);
-        assertContains("Rida Benjelloun", content);
-    }
-
     /**
      * Tests that the ParseContext parser is correctly
      *  fired for all the embedded entries.
@@ -76,12 +44,12 @@ public class GzipParserTest extends AbstractPkgTest {
                 "/test-documents/test-documents.tgz")) {
             AUTO_DETECT_PARSER.parse(stream, handler, metadata, trackingContext);
         }
-       
+
        // Should find a single entry, for the (compressed) tar file
        assertEquals(1, tracker.filenames.size());
        assertEquals(1, tracker.mediatypes.size());
        assertEquals(1, tracker.modifiedAts.size());
-       
+
        assertEquals(null, tracker.filenames.get(0));
        assertEquals(null, tracker.mediatypes.get(0));
        assertEquals(null, tracker.modifiedAts.get(0));
@@ -89,20 +57,4 @@ public class GzipParserTest extends AbstractPkgTest {
        // Tar file starts with the directory name
        assertEquals("test-documents/", new String(tracker.lastSeenStart, 0, 15, US_ASCII));
     }
-    
-    @Test
-    public void testSvgzParsing() throws Exception {
-        ContentHandler handler = new BodyContentHandler();
-        Metadata metadata = new Metadata();
-
-        try (InputStream stream = GzipParserTest.class.getResourceAsStream(
-                "/test-documents/testSVG.svgz")) {
-            AUTO_DETECT_PARSER.parse(stream, handler, metadata, recursingContext);
-        }
-
-        assertEquals("application/gzip", metadata.get(Metadata.CONTENT_TYPE));
-        String content = handler.toString();
-        assertContains("Test SVG image", content);
-    }
-
 }
diff --git a/tika-parser-modules/tika-parser-pkg-module/src/test/java/org/apache/tika/parser/pkg/RarParserTest.java b/tika-parser-modules/tika-parser-pkg-module/src/test/java/org/apache/tika/parser/pkg/RarParserTest.java
index d6f5af1..340679c 100644
--- a/tika-parser-modules/tika-parser-pkg-module/src/test/java/org/apache/tika/parser/pkg/RarParserTest.java
+++ b/tika-parser-modules/tika-parser-pkg-module/src/test/java/org/apache/tika/parser/pkg/RarParserTest.java
@@ -38,38 +38,6 @@ import org.xml.sax.ContentHandler;
  */
 public class RarParserTest extends AbstractPkgTest {
 
-    @Test
-    public void testRarParsing() throws Exception {
-        ContentHandler handler = new BodyContentHandler();
-        Metadata metadata = new Metadata();
-
-        try (InputStream stream = RarParserTest.class.getResourceAsStream(
-                "/test-documents/test-documents.rar")) {
-            AUTO_DETECT_PARSER.parse(stream, handler, metadata, recursingContext);
-        }
-
-        assertEquals("application/x-rar-compressed; version=4", metadata.get(Metadata.CONTENT_TYPE));
-        String content = handler.toString();
-        assertContains("test-documents/testEXCEL.xls", content);
-        assertContains("Sample Excel Worksheet", content);
-        assertContains("test-documents/testHTML.html", content);
-        assertContains("Test Indexation Html", content);
-        assertContains("test-documents/testOpenOffice2.odt", content);
-        assertContains("This is a sample Open Office document", content);
-        assertContains("test-documents/testPDF.pdf", content);
-        assertContains("Apache Tika", content);
-        assertContains("test-documents/testPPT.ppt", content);
-        assertContains("Sample Powerpoint Slide", content);
-        assertContains("test-documents/testRTF.rtf", content);
-        assertContains("indexation Word", content);
-        assertContains("test-documents/testTXT.txt", content);
-        assertContains("Test d'indexation de Txt", content);
-        assertContains("test-documents/testWORD.doc", content);
-        assertContains("This is a sample Microsoft Word Document", content);
-        assertContains("test-documents/testXML.xml", content);
-        assertContains("Rida Benjelloun", content);
-    }
-
     /**
      * Tests that the ParseContext parser is correctly
      *  fired for all the embedded entries.
diff --git a/tika-parser-modules/tika-parser-pkg-module/src/test/java/org/apache/tika/parser/pkg/Seven7ParserTest.java b/tika-parser-modules/tika-parser-pkg-module/src/test/java/org/apache/tika/parser/pkg/Seven7ParserTest.java
index 2fc6841..e893092 100644
--- a/tika-parser-modules/tika-parser-pkg-module/src/test/java/org/apache/tika/parser/pkg/Seven7ParserTest.java
+++ b/tika-parser-modules/tika-parser-pkg-module/src/test/java/org/apache/tika/parser/pkg/Seven7ParserTest.java
@@ -40,39 +40,6 @@ import org.xml.sax.ContentHandler;
  * Test case for parsing 7z files.
  */
 public class Seven7ParserTest extends AbstractPkgTest {
-    private static final MediaType TYPE_7ZIP = MediaType.application("x-7z-compressed");
-    
-    @Test
-    public void test7ZParsing() throws Exception {
-        Metadata metadata = new Metadata();
-        
-        // Ensure 7zip is a parsable format
-        assertTrue("No 7zip parser found", 
-                AUTO_DETECT_PARSER.getSupportedTypes(recursingContext).contains(TYPE_7ZIP));
-        
-        // Parse
-        String content = getText("test-documents.7z", metadata);
-
-        assertEquals(TYPE_7ZIP.toString(), metadata.get(Metadata.CONTENT_TYPE));
-        assertContains("test-documents/testEXCEL.xls", content);
-        assertContains("Sample Excel Worksheet", content);
-        assertContains("test-documents/testHTML.html", content);
-        assertContains("Test Indexation Html", content);
-        assertContains("test-documents/testOpenOffice2.odt", content);
-        assertContains("This is a sample Open Office document", content);
-        assertContains("test-documents/testPDF.pdf", content);
-        assertContains("Apache Tika", content);
-        assertContains("test-documents/testPPT.ppt", content);
-        assertContains("Sample Powerpoint Slide", content);
-        assertContains("test-documents/testRTF.rtf", content);
-        assertContains("indexation Word", content);
-        assertContains("test-documents/testTXT.txt", content);
-        assertContains("Test d'indexation de Txt", content);
-        assertContains("test-documents/testWORD.doc", content);
-        assertContains("This is a sample Microsoft Word Document", content);
-        assertContains("test-documents/testXML.xml", content);
-        assertContains("Rida Benjelloun", content);
-    }
 
     /**
      * Tests that the ParseContext parser is correctly
@@ -113,112 +80,4 @@ public class Seven7ParserTest extends AbstractPkgTest {
            assertTrue("Modified at " + mod, mod.startsWith("20"));
        }
     }
-
-    @Test
-    public void testPasswordProtected() throws Exception {
-        ContentHandler handler = new BodyContentHandler();
-        Metadata metadata = new Metadata();
-        
-        // No password, will fail with EncryptedDocumentException
-        boolean ex = false;
-        try (InputStream stream = Seven7ParserTest.class.getResourceAsStream(
-                "/test-documents/test7Z_protected_passTika.7z")) {
-            AUTO_DETECT_PARSER.parse(stream, handler, metadata, recursingContext);
-            fail("Shouldn't be able to read a password protected 7z without the password");
-        } catch (EncryptedDocumentException e) {
-            // Good
-            ex = true;
-        }
-        
-        assertTrue("test no password", ex);
-        
-        // No password, will fail with EncryptedDocumentException
-        ex = false;
-        try (InputStream stream = Seven7ParserTest.class.getResourceAsStream(
-                "/test-documents/full_encrypted.7z")) {
-            AUTO_DETECT_PARSER.parse(stream, handler, metadata, recursingContext);
-            fail("Shouldn't be able to read a full password protected 7z without the password");
-        } catch (EncryptedDocumentException e) {
-            // Good
-            ex = true;
-        } catch (Exception e){
-            ex = false;
-        }
-        
-        assertTrue("test no password for full encrypted 7z", ex);
-
-        ex = false;
-        
-        // Wrong password currently silently gives no content
-        // Ideally we'd like Commons Compress to give an error, but it doesn't...
-        recursingContext.set(PasswordProvider.class, new PasswordProvider() {
-            @Override
-            public String getPassword(Metadata metadata) {
-                return "wrong";
-            }
-        });
-        handler = new BodyContentHandler();
-        try (InputStream stream = Seven7ParserTest.class.getResourceAsStream(
-                "/test-documents/test7Z_protected_passTika.7z")) {
-            AUTO_DETECT_PARSER.parse(stream, handler, metadata, recursingContext);
-            fail("Shouldn't be able to read a password protected 7z with wrong password");
-        } catch (TikaException e) {
-            //if JCE is installed, the cause will be: Caused by: org.tukaani.xz.CorruptedInputException: Compressed data is corrupt
-            //if JCE is not installed, the message will include
-            // "(do you have the JCE  Unlimited Strength Jurisdiction Policy Files installed?")
-            ex = true;
-        }
-        assertTrue("TikaException for bad password", ex);
-        // Will be empty
-        assertEquals("", handler.toString());
-
-        ex = false;
-        // Right password works fine if JCE Unlimited Strength has been installed!!!
-        if (isStrongCryptoAvailable()) {
-            recursingContext.set(PasswordProvider.class, new PasswordProvider() {
-                @Override
-                public String getPassword(Metadata metadata) {
-                    return "Tika";
-                }
-            });
-            handler = new BodyContentHandler();
-            try (InputStream stream = Seven7ParserTest.class.getResourceAsStream(
-                    "/test-documents/test7Z_protected_passTika.7z")) {
-                AUTO_DETECT_PARSER.parse(stream, handler, metadata, recursingContext);
-            }
-
-            assertEquals(TYPE_7ZIP.toString(), metadata.get(Metadata.CONTENT_TYPE));
-            String content = handler.toString();
-
-            // Should get filename
-            assertContains("text.txt", content);
-
-            // Should get contents from the text file in the 7z file
-            assertContains("TEST DATA FOR TIKA.", content);
-            assertContains("This is text inside an encrypted 7zip (7z) file.", content);
-            assertContains("It should be processed by Tika just fine!", content);
-            assertContains("TIKA-1521", content);
-        } else {
-            //if jce is not installed, test for IOException wrapped in TikaException
-            boolean ioe = false;
-            recursingContext.set(PasswordProvider.class, new PasswordProvider() {
-                @Override
-                public String getPassword(Metadata metadata) {
-                    return "Tika";
-                }
-            });
-            handler = new BodyContentHandler();
-            try (InputStream stream = Seven7ParserTest.class.getResourceAsStream(
-                    "/test-documents/test7Z_protected_passTika.7z")) {
-                AUTO_DETECT_PARSER.parse(stream, handler, metadata, recursingContext);
-            } catch (TikaException e) {
-                ioe = true;
-            }
-            assertTrue("IOException because JCE was not installed", ioe);
-        }
-    }
-
-    private static boolean isStrongCryptoAvailable() throws NoSuchAlgorithmException {
-        return Cipher.getMaxAllowedKeyLength("AES/ECB/PKCS5Padding") >= 256;
-    }
 }
diff --git a/tika-parser-modules/tika-parser-pkg-module/src/test/java/org/apache/tika/parser/pkg/TarParserTest.java b/tika-parser-modules/tika-parser-pkg-module/src/test/java/org/apache/tika/parser/pkg/TarParserTest.java
index abdd3f4..286538d 100644
--- a/tika-parser-modules/tika-parser-pkg-module/src/test/java/org/apache/tika/parser/pkg/TarParserTest.java
+++ b/tika-parser-modules/tika-parser-pkg-module/src/test/java/org/apache/tika/parser/pkg/TarParserTest.java
@@ -33,38 +33,6 @@ import org.xml.sax.ContentHandler;
  */
 public class TarParserTest extends AbstractPkgTest {
 
-    @Test
-    public void testTarParsing() throws Exception {
-        ContentHandler handler = new BodyContentHandler();
-        Metadata metadata = new Metadata();
-
-        try (InputStream stream = TarParserTest.class.getResourceAsStream(
-                "/test-documents/test-documents.tar")) {
-            AUTO_DETECT_PARSER.parse(stream, handler, metadata, recursingContext);
-        }
-
-        assertEquals("application/x-gtar", metadata.get(Metadata.CONTENT_TYPE));
-        String content = handler.toString();
-        assertContains("test-documents/testEXCEL.xls", content);
-        assertContains("Sample Excel Worksheet", content);
-        assertContains("test-documents/testHTML.html", content);
-        assertContains("Test Indexation Html", content);
-        assertContains("test-documents/testOpenOffice2.odt", content);
-        assertContains("This is a sample Open Office document", content);
-        assertContains("test-documents/testPDF.pdf", content);
-        assertContains("Apache Tika", content);
-        assertContains("test-documents/testPPT.ppt", content);
-        assertContains("Sample Powerpoint Slide", content);
-        assertContains("test-documents/testRTF.rtf", content);
-        assertContains("indexation Word", content);
-        assertContains("test-documents/testTXT.txt", content);
-        assertContains("Test d'indexation de Txt", content);
-        assertContains("test-documents/testWORD.doc", content);
-        assertContains("This is a sample Microsoft Word Document", content);
-        assertContains("test-documents/testXML.xml", content);
-        assertContains("Rida Benjelloun", content);
-    }
-
     /**
      * Tests that the ParseContext parser is correctly
      *  fired for all the embedded entries.
diff --git a/tika-parser-modules/tika-parser-pkg-module/src/test/java/org/apache/tika/parser/pkg/ZipParserTest.java b/tika-parser-modules/tika-parser-pkg-module/src/test/java/org/apache/tika/parser/pkg/ZipParserTest.java
index 25fcfb1..32ac389 100644
--- a/tika-parser-modules/tika-parser-pkg-module/src/test/java/org/apache/tika/parser/pkg/ZipParserTest.java
+++ b/tika-parser-modules/tika-parser-pkg-module/src/test/java/org/apache/tika/parser/pkg/ZipParserTest.java
@@ -47,38 +47,6 @@ import org.xml.sax.helpers.DefaultHandler;
  */
 public class ZipParserTest extends AbstractPkgTest {
 
-    @Test
-    public void testZipParsing() throws Exception {
-        ContentHandler handler = new BodyContentHandler();
-        Metadata metadata = new Metadata();
-
-        try (InputStream stream = ZipParserTest.class.getResourceAsStream(
-                "/test-documents/test-documents.zip")) {
-            AUTO_DETECT_PARSER.parse(stream, handler, metadata, recursingContext);
-        }
-
-        assertEquals("application/zip", metadata.get(Metadata.CONTENT_TYPE));
-        String content = handler.toString();
-        assertContains("testEXCEL.xls", content);
-        assertContains("Sample Excel Worksheet", content);
-        assertContains("testHTML.html", content);
-        assertContains("Test Indexation Html", content);
-        assertContains("testOpenOffice2.odt", content);
-        assertContains("This is a sample Open Office document", content);
-        assertContains("testPDF.pdf", content);
-        assertContains("Apache Tika", content);
-        assertContains("testPPT.ppt", content);
-        assertContains("Sample Powerpoint Slide", content);
-        assertContains("testRTF.rtf", content);
-        assertContains("indexation Word", content);
-        assertContains("testTXT.txt", content);
-        assertContains("Test d'indexation de Txt", content);
-        assertContains("testWORD.doc", content);
-        assertContains("This is a sample Microsoft Word Document", content);
-        assertContains("testXML.xml", content);
-        assertContains("Rida Benjelloun", content);
-    }
-
     /**
      * Tests that the ParseContext parser is correctly
      *  fired for all the embedded entries.
@@ -137,43 +105,6 @@ public class ZipParserTest extends AbstractPkgTest {
         assertContains("README", content);
     }
 
-    private class GatherRelIDsDocumentExtractor implements EmbeddedDocumentExtractor {
-        public Set<String> allRelIDs = new HashSet<String>();
-        public boolean shouldParseEmbedded(Metadata metadata) {      
-            String relID = metadata.get(TikaCoreProperties.EMBEDDED_RELATIONSHIP_ID);
-            if (relID != null) {
-                allRelIDs.add(relID);
-            }
-            return false;
-        }
-
-        public void parseEmbedded(InputStream inputStream, ContentHandler contentHandler, Metadata metadata, boolean outputHtml) {
-            throw new UnsupportedOperationException("should never be called");
-        }
-    }
-
-    // TIKA-1036
-    @Test
-    public void testPlaceholders() throws Exception {
-        String xml = getXML("testEmbedded.zip").xml;
-        assertContains("<div class=\"embedded\" id=\"test1.txt\" />", xml);
-        assertContains("<div class=\"embedded\" id=\"test2.txt\" />", xml);
-
-        // Also make sure EMBEDDED_RELATIONSHIP_ID was
-        // passed when parsing the embedded docs:
-        ParseContext context = new ParseContext();
-        GatherRelIDsDocumentExtractor relIDs = new GatherRelIDsDocumentExtractor();
-        context.set(EmbeddedDocumentExtractor.class, relIDs);
-        try (InputStream input = getResourceAsStream("/test-documents/testEmbedded.zip")) {
-            AUTO_DETECT_PARSER.parse(input,
-                    new BodyContentHandler(),
-                    new Metadata(),
-                    context);
-        }
-
-        assertTrue(relIDs.allRelIDs.contains("test1.txt"));
-        assertTrue(relIDs.allRelIDs.contains("test2.txt"));
-    }
 
     @Test // TIKA-936
     public void testCustomEncoding() throws Exception {
@@ -198,31 +129,6 @@ public class ZipParserTest extends AbstractPkgTest {
     }
 
     @Test
-    public void testZipEncrypted() throws Exception {
-        List<Metadata> metadataList = getRecursiveMetadata("testZipEncrypted.zip");
-        assertEquals(2, metadataList.size());
-        String[] values = metadataList.get(0).getValues(TikaCoreProperties.TIKA_META_EXCEPTION_EMBEDDED_STREAM);
-        assertNotNull(values);
-        assertEquals(1, values.length);
-        assertContains("EncryptedDocumentException: stream (encrypted.txt) is encrypted", values[0]);
-
-
-        assertContains("hello world", metadataList.get(1).get(AbstractRecursiveParserWrapperHandler.TIKA_CONTENT));
-    }
-
-    @Test
-    public void testKMZDetection() throws Exception {
-        List<Metadata> metadataList = getRecursiveMetadata("testKMZ.kmz");
-        assertEquals("application/vnd.google-earth.kmz", metadataList.get(0).get(HttpHeaders.CONTENT_TYPE));
-    }
-
-    @Test
-    public void testJARDetection() throws Exception {
-        List<Metadata> metadataList = getRecursiveMetadata("testJAR.jar");
-        assertEquals("application/java-archive", metadataList.get(0).get(HttpHeaders.CONTENT_TYPE));
-    }
-
-    @Test
     public void testQuineRecursiveParserWrapper() throws Exception {
         //received permission from author via dm
         //2019-07-25 to include
diff --git a/tika-parser-modules/tika-parser-pkg-module/src/test/java/org/apache/tika/parser/pkg/ZlibParserTest.java b/tika-parser-modules/tika-parser-pkg-module/src/test/java/org/apache/tika/parser/pkg/ZlibParserTest.java
index 5d2faaf..30e3868 100644
--- a/tika-parser-modules/tika-parser-pkg-module/src/test/java/org/apache/tika/parser/pkg/ZlibParserTest.java
+++ b/tika-parser-modules/tika-parser-pkg-module/src/test/java/org/apache/tika/parser/pkg/ZlibParserTest.java
@@ -29,21 +29,6 @@ import org.xml.sax.ContentHandler;
  * Test case for parsing zlib compressed
  */
 public class ZlibParserTest extends AbstractPkgTest {
-    @Test
-    public void testZlibParsing() throws Exception {
-        ContentHandler handler = new BodyContentHandler();
-        Metadata metadata = new Metadata();
-
-        try (InputStream stream = ZipParserTest.class.getResourceAsStream(
-                "/test-documents/testTXT.zlib")) {
-            AUTO_DETECT_PARSER.parse(stream, handler, metadata, recursingContext);
-        }
-
-        assertEquals("application/zlib", metadata.get(Metadata.CONTENT_TYPE));
-        String content = handler.toString();
-        assertContains("Test d'indexation de Txt", content);
-        assertContains("http://www.apache.org", content);
-    }
 
     /**
      * Tests that the ParseContext parser is correctly
diff --git a/tika-parsers/src/test/resources/test-documents/full_encrypted.7z b/tika-parser-modules/tika-parser-pkg-module/src/test/resources/test-documents/full_encrypted.7z
similarity index 100%
rename from tika-parsers/src/test/resources/test-documents/full_encrypted.7z
rename to tika-parser-modules/tika-parser-pkg-module/src/test/resources/test-documents/full_encrypted.7z
diff --git a/tika-parsers/src/test/resources/test-documents/moby.zip b/tika-parser-modules/tika-parser-pkg-module/src/test/resources/test-documents/moby.zip
similarity index 100%
rename from tika-parsers/src/test/resources/test-documents/moby.zip
rename to tika-parser-modules/tika-parser-pkg-module/src/test/resources/test-documents/moby.zip
diff --git a/tika-parsers/src/test/resources/test-documents/test7Z_protected_passTika.7z b/tika-parser-modules/tika-parser-pkg-module/src/test/resources/test-documents/test7Z_protected_passTika.7z
similarity index 100%
rename from tika-parsers/src/test/resources/test-documents/test7Z_protected_passTika.7z
rename to tika-parser-modules/tika-parser-pkg-module/src/test/resources/test-documents/test7Z_protected_passTika.7z
diff --git a/tika-parsers/src/test/resources/test-documents/testBROTLI_compressed.br b/tika-parser-modules/tika-parser-pkg-module/src/test/resources/test-documents/testBROTLI_compressed.br
similarity index 100%
rename from tika-parsers/src/test/resources/test-documents/testBROTLI_compressed.br
rename to tika-parser-modules/tika-parser-pkg-module/src/test/resources/test-documents/testBROTLI_compressed.br
diff --git a/tika-parsers/src/test/resources/test-documents/testEmbedded.zip b/tika-parser-modules/tika-parser-pkg-module/src/test/resources/test-documents/testEmbedded.zip
similarity index 100%
rename from tika-parsers/src/test/resources/test-documents/testEmbedded.zip
rename to tika-parser-modules/tika-parser-pkg-module/src/test/resources/test-documents/testEmbedded.zip
diff --git a/tika-parsers/src/test/resources/test-documents/testJAR_with_HTML.jar b/tika-parser-modules/tika-parser-pkg-module/src/test/resources/test-documents/testJAR_with_HTML.jar
similarity index 100%
rename from tika-parsers/src/test/resources/test-documents/testJAR_with_HTML.jar
rename to tika-parser-modules/tika-parser-pkg-module/src/test/resources/test-documents/testJAR_with_HTML.jar
diff --git a/tika-parsers/src/test/resources/test-documents/testJAR_with_PEHDR.jar b/tika-parser-modules/tika-parser-pkg-module/src/test/resources/test-documents/testJAR_with_PEHDR.jar
similarity index 100%
rename from tika-parsers/src/test/resources/test-documents/testJAR_with_PEHDR.jar
rename to tika-parser-modules/tika-parser-pkg-module/src/test/resources/test-documents/testJAR_with_PEHDR.jar
diff --git a/tika-parsers/src/test/resources/test-documents/testSnappy-framed.sz b/tika-parser-modules/tika-parser-pkg-module/src/test/resources/test-documents/testSnappy-framed.sz
similarity index 100%
rename from tika-parsers/src/test/resources/test-documents/testSnappy-framed.sz
rename to tika-parser-modules/tika-parser-pkg-module/src/test/resources/test-documents/testSnappy-framed.sz
diff --git a/tika-parsers/src/test/resources/test-documents/testTXT.zlib b/tika-parser-modules/tika-parser-pkg-module/src/test/resources/test-documents/testTXT.zlib
similarity index 100%
rename from tika-parsers/src/test/resources/test-documents/testTXT.zlib
rename to tika-parser-modules/tika-parser-pkg-module/src/test/resources/test-documents/testTXT.zlib
diff --git a/tika-parsers/src/test/resources/test-documents/testTXT.zlib0 b/tika-parser-modules/tika-parser-pkg-module/src/test/resources/test-documents/testTXT.zlib0
similarity index 100%
rename from tika-parsers/src/test/resources/test-documents/testTXT.zlib0
rename to tika-parser-modules/tika-parser-pkg-module/src/test/resources/test-documents/testTXT.zlib0
diff --git a/tika-parsers/src/test/resources/test-documents/testTXT.zlib5 b/tika-parser-modules/tika-parser-pkg-module/src/test/resources/test-documents/testTXT.zlib5
similarity index 100%
rename from tika-parsers/src/test/resources/test-documents/testTXT.zlib5
rename to tika-parser-modules/tika-parser-pkg-module/src/test/resources/test-documents/testTXT.zlib5
diff --git a/tika-parsers/src/test/resources/test-documents/testTXT.zlib9 b/tika-parser-modules/tika-parser-pkg-module/src/test/resources/test-documents/testTXT.zlib9
similarity index 100%
rename from tika-parsers/src/test/resources/test-documents/testTXT.zlib9
rename to tika-parser-modules/tika-parser-pkg-module/src/test/resources/test-documents/testTXT.zlib9
diff --git a/tika-parser-modules/tika-parser-text-module/pom.xml b/tika-parser-modules/tika-parser-text-module/pom.xml
index c65dbd8..c92d22e 100644
--- a/tika-parser-modules/tika-parser-text-module/pom.xml
+++ b/tika-parser-modules/tika-parser-text-module/pom.xml
@@ -58,6 +58,24 @@
         <plugins>
             <plugin>
                 <groupId>org.apache.maven.plugins</groupId>
+                <artifactId>maven-jar-plugin</artifactId>
+                <configuration>
+                    <archive>
+                        <manifestEntries>
+                            <Automatic-Module-Name>org.apache.tika.parser.txt</Automatic-Module-Name>
+                        </manifestEntries>
+                    </archive>
+                </configuration>
+                <executions>
+                    <execution>
+                        <goals>
+                            <goal>test-jar</goal>
+                        </goals>
+                    </execution>
+                </executions>
+            </plugin>
+            <plugin>
+                <groupId>org.apache.maven.plugins</groupId>
                 <artifactId>maven-dependency-plugin</artifactId>
             </plugin>
         </plugins>
diff --git a/tika-parser-modules/tika-parser-zip-commons/src/main/java/org/apache/tika/detect/zip/ZipContainerDetector.java b/tika-parser-modules/tika-parser-zip-commons/src/main/java/org/apache/tika/detect/zip/DefaultZipContainerDetector.java
similarity index 74%
copy from tika-parser-modules/tika-parser-zip-commons/src/main/java/org/apache/tika/detect/zip/ZipContainerDetector.java
copy to tika-parser-modules/tika-parser-zip-commons/src/main/java/org/apache/tika/detect/zip/DefaultZipContainerDetector.java
index 9b58de2..cadf38d 100644
--- a/tika-parser-modules/tika-parser-zip-commons/src/main/java/org/apache/tika/detect/zip/ZipContainerDetector.java
+++ b/tika-parser-modules/tika-parser-zip-commons/src/main/java/org/apache/tika/detect/zip/DefaultZipContainerDetector.java
@@ -18,13 +18,15 @@ package org.apache.tika.detect.zip;
 
 import org.apache.commons.compress.archivers.ArchiveException;
 import org.apache.commons.compress.archivers.ArchiveStreamFactory;
+import org.apache.commons.compress.archivers.zip.ZipArchiveEntry;
+import org.apache.commons.compress.archivers.zip.ZipArchiveInputStream;
 import org.apache.commons.compress.archivers.zip.ZipFile;
 import org.apache.commons.compress.compressors.CompressorException;
 import org.apache.commons.compress.compressors.CompressorStreamFactory;
 import org.apache.tika.config.Field;
 import org.apache.tika.config.ServiceLoader;
-import org.apache.tika.detect.DefaultEncodingDetector;
 import org.apache.tika.detect.Detector;
+import org.apache.tika.io.CloseShieldInputStream;
 import org.apache.tika.io.IOUtils;
 import org.apache.tika.io.LookaheadInputStream;
 import org.apache.tika.io.TikaInputStream;
@@ -36,10 +38,7 @@ import java.io.IOException;
 import java.io.InputStream;
 import java.util.List;
 
-public class ZipContainerDetector implements Detector {
-
-
-
+public class DefaultZipContainerDetector implements Detector {
 
     //Regrettably, some tiff files can be incorrectly identified
     //as tar files.  We need this ugly workaround to rule out TIFF.
@@ -61,25 +60,25 @@ public class ZipContainerDetector implements Detector {
     @Field
     int markLimit = 16 * 1024 * 1024;
 
-    List<ZipDetector> zipDetectors;
+    List<ZipContainerDetector> zipDetectors;
 
-    public ZipContainerDetector() {
-        this(new ServiceLoader(DefaultEncodingDetector.class.getClassLoader()));
+    public DefaultZipContainerDetector() {
+        this(new ServiceLoader(DefaultZipContainerDetector.class.getClassLoader()));
     }
 
-    public ZipContainerDetector(ServiceLoader loader) {
-        this(loader.loadServiceProviders(ZipDetector.class));
+    public DefaultZipContainerDetector(ServiceLoader loader) {
+        this(loader.loadServiceProviders(ZipContainerDetector.class));
     }
 
-    public ZipContainerDetector(List<ZipDetector> zipDetectors) {
-        //OPCBased needs to be last!!!
+    public DefaultZipContainerDetector(List<ZipContainerDetector> zipDetectors) {
+        //TODO: OPCBased needs to be last!!!
         this.zipDetectors = zipDetectors;
     }
 
     /**
      * If this is less than 0, the file will be spooled to disk,
      * and detection will run on the full file.
-     * If this is greater than 0, the {@link StreamingZipContainerDetector}
+     * If this is greater than 0, the {@link DeprecatedStreamingZipContainerDetector}
      * will be called only up to the markLimit.
      *
      * @param markLimit mark limit for streaming detection
@@ -121,15 +120,13 @@ public class ZipContainerDetector implements Detector {
             }
 
             try (LookaheadInputStream lookahead = new LookaheadInputStream(input, markLimit)) {
-                //TODO: figure out this one
-                //return streamingZipContainerDetector.detect(lookahead, metadata);
+                return detectStreaming(lookahead, metadata);
             }
         } else if (!type.equals(MediaType.OCTET_STREAM)) {
             return type;
         } else {
             return detectCompressorFormat(prefix, length);
         }
-        return PackageConstants.ZIP;
     }
 
     /**
@@ -144,7 +141,7 @@ public class ZipContainerDetector implements Detector {
             ZipFile zip = new ZipFile(tis.getFile()); // TODO: hasFile()?
 
             try{
-            for (ZipDetector zipDetector : zipDetectors) {
+            for (ZipContainerDetector zipDetector : zipDetectors) {
                 MediaType type = zipDetector.detect(zip, tis);
                 if (type != null) {
                     return type;
@@ -208,4 +205,42 @@ public class ZipContainerDetector implements Detector {
             return MediaType.OCTET_STREAM;
         }
     }
+
+    private MediaType detectStreaming(InputStream input, Metadata metadata) throws IOException {
+        StreamingDetectContext detectContext = new StreamingDetectContext();
+        try (
+                ZipArchiveInputStream zis =
+                        new ZipArchiveInputStream(new CloseShieldInputStream(input))) {
+            ZipArchiveEntry zae = zis.getNextZipEntry();
+            while (zae != null) {
+                MediaType mt = detect(zae, zis, detectContext);
+                if (mt != null) {
+                    return mt;
+                }
+                zae = zis.getNextZipEntry();
+            }
+        }
+        return finalDetect(detectContext);
+    }
+
+
+    private MediaType detect(ZipArchiveEntry zae, ZipArchiveInputStream zis, StreamingDetectContext detectContext) {
+        for (ZipContainerDetector d : zipDetectors) {
+            MediaType mt = d.streamingDetectUpdate(zae, zis, detectContext);
+            if (mt != null) {
+                return mt;
+            }
+        }
+        return null;
+    }
+
+    private MediaType finalDetect(StreamingDetectContext detectContext) {
+        for (ZipContainerDetector d : zipDetectors) {
+            MediaType mt = d.streamingDetectFinal(detectContext);
+            if (mt != null) {
+                return mt;
+            }
+        }
+        return MediaType.APPLICATION_ZIP;
+    }
 }
diff --git a/tika-parser-modules/tika-parser-zip-commons/src/main/java/org/apache/tika/detect/zip/StreamingZipContainerDetector.java b/tika-parser-modules/tika-parser-zip-commons/src/main/java/org/apache/tika/detect/zip/DeprecatedStreamingZipContainerDetector.java
similarity index 99%
rename from tika-parser-modules/tika-parser-zip-commons/src/main/java/org/apache/tika/detect/zip/StreamingZipContainerDetector.java
rename to tika-parser-modules/tika-parser-zip-commons/src/main/java/org/apache/tika/detect/zip/DeprecatedStreamingZipContainerDetector.java
index f0e9493..63b4191 100644
--- a/tika-parser-modules/tika-parser-zip-commons/src/main/java/org/apache/tika/detect/zip/StreamingZipContainerDetector.java
+++ b/tika-parser-modules/tika-parser-zip-commons/src/main/java/org/apache/tika/detect/zip/DeprecatedStreamingZipContainerDetector.java
@@ -40,7 +40,7 @@ import org.xml.sax.Attributes;
 import org.xml.sax.SAXException;
 import org.xml.sax.helpers.DefaultHandler;
 
-public class StreamingZipContainerDetector extends ZipContainerDetectorBase implements Detector {
+public class DeprecatedStreamingZipContainerDetector extends ZipContainerDetectorBase implements Detector {
 
     private static final int MAX_MIME_TYPE = 1024;
     private static final int MAX_MANIFEST = 20 * 1024 * 1024;
diff --git a/tika-parser-modules/tika-parser-zip-commons/src/main/java/org/apache/tika/detect/zip/IPADetector.java b/tika-parser-modules/tika-parser-zip-commons/src/main/java/org/apache/tika/detect/zip/IPADetector.java
index 3702ffe..78585a9 100644
--- a/tika-parser-modules/tika-parser-zip-commons/src/main/java/org/apache/tika/detect/zip/IPADetector.java
+++ b/tika-parser-modules/tika-parser-zip-commons/src/main/java/org/apache/tika/detect/zip/IPADetector.java
@@ -17,18 +17,22 @@
 package org.apache.tika.detect.zip;
 
 import org.apache.commons.compress.archivers.zip.ZipArchiveEntry;
+import org.apache.commons.compress.archivers.zip.ZipArchiveInputStream;
 import org.apache.commons.compress.archivers.zip.ZipFile;
 import org.apache.tika.io.TikaInputStream;
 import org.apache.tika.mime.MediaType;
 
 import java.io.IOException;
+import java.io.InputStream;
 import java.util.Enumeration;
 import java.util.HashSet;
 import java.util.Iterator;
 import java.util.Set;
 import java.util.regex.Pattern;
 
-public class IPADetector implements ZipDetector {
+public class IPADetector implements ZipContainerDetector {
+
+    static final MediaType IPA = MediaType.application("x-itunes-ipa");
 
     /**
      * To be considered as an IPA file, it needs to match all of these
@@ -47,20 +51,20 @@ public class IPADetector implements ZipDetector {
     @Override
     public MediaType detect(ZipFile zip, TikaInputStream tis) throws IOException {
         // Note - consider generalising this logic, if another format needs many regexp matching
-        Set<Pattern> tmpPatterns = (Set<Pattern>)ipaEntryPatterns.clone();
+        TmpPatterns tmpPatterns = new TmpPatterns();
 
         Enumeration<ZipArchiveEntry> entries = zip.getEntries();
         while (entries.hasMoreElements()) {
             ZipArchiveEntry entry = entries.nextElement();
             String name = entry.getName();
 
-            Iterator<Pattern> ip = tmpPatterns.iterator();
+            Iterator<Pattern> ip = tmpPatterns.patterns.iterator();
             while (ip.hasNext()) {
                 if (ip.next().matcher(name).matches()) {
                     ip.remove();
                 }
             }
-            if (tmpPatterns.isEmpty()) {
+            if (tmpPatterns.patterns.isEmpty()) {
                 // We've found everything we need to find
                 return MediaType.application("x-itunes-ipa");
             }
@@ -70,4 +74,46 @@ public class IPADetector implements ZipDetector {
         return null;
 
     }
+
+    @Override
+    public MediaType streamingDetectUpdate(ZipArchiveEntry zae,
+                                           InputStream zis,
+                                           StreamingDetectContext detectContext) {
+        String name = zae.getName();
+        TmpPatterns tmp = detectContext.get(TmpPatterns.class);
+        if (tmp == null) {
+            tmp = new TmpPatterns();
+            detectContext.set(TmpPatterns.class, tmp);
+        }
+
+        Iterator<Pattern> ip = tmp.patterns.iterator();
+        while (ip.hasNext()) {
+            if (ip.next().matcher(name).matches()) {
+                ip.remove();
+            }
+        }
+        if (tmp.patterns.isEmpty()) {
+            // We've found everything we need to find
+            return IPA;
+        }
+        return null;
+    }
+
+    @Override
+    public MediaType streamingDetectFinal(StreamingDetectContext detectContext) {
+        TmpPatterns tmp = detectContext.get(TmpPatterns.class);
+        if (tmp == null) {
+            return null;
+        }
+        if (tmp.patterns.isEmpty()) {
+            // We've found everything we need to find
+            return IPA;
+        }
+        detectContext.remove(TmpPatterns.class);
+        return null;
+    }
+
+    private static class TmpPatterns {
+        Set<Pattern> patterns = (Set<Pattern>)ipaEntryPatterns.clone();
+    }
 }
diff --git a/tika-parser-modules/tika-parser-zip-commons/src/main/java/org/apache/tika/detect/zip/JarDetector.java b/tika-parser-modules/tika-parser-zip-commons/src/main/java/org/apache/tika/detect/zip/JarDetector.java
index cf6445d..60cc95c 100644
--- a/tika-parser-modules/tika-parser-zip-commons/src/main/java/org/apache/tika/detect/zip/JarDetector.java
+++ b/tika-parser-modules/tika-parser-zip-commons/src/main/java/org/apache/tika/detect/zip/JarDetector.java
@@ -16,13 +16,19 @@
  */
 package org.apache.tika.detect.zip;
 
+import org.apache.commons.compress.archivers.zip.ZipArchiveEntry;
+import org.apache.commons.compress.archivers.zip.ZipArchiveInputStream;
 import org.apache.commons.compress.archivers.zip.ZipFile;
 import org.apache.tika.io.TikaInputStream;
 import org.apache.tika.mime.MediaType;
 
 import java.io.IOException;
+import java.io.InputStream;
+
+public class JarDetector implements ZipContainerDetector {
+
+    private static SeenManifest SEEN_MANIFEST = new SeenManifest();
 
-public class JarDetector implements ZipDetector {
     @Override
     public MediaType detect(ZipFile zip, TikaInputStream tis) throws IOException {
         if (zip.getEntry("META-INF/MANIFEST.MF") != null) {
@@ -52,4 +58,46 @@ public class JarDetector implements ZipDetector {
             return null;
         }
     }
+
+    @Override
+    public MediaType streamingDetectUpdate(ZipArchiveEntry zae,
+                                           InputStream zis,
+                                           StreamingDetectContext detectContext) {
+
+        String name = zae.getName();
+        if (name.equals("AndroidManifest.xml")) {
+            return MediaType.application("vnd.android.package-archive");
+        } else if (name.equals("META-INF/MANIFEST.MF")) {
+            // It's a Jar file, or something based on Jar
+            detectContext.set(SeenManifest.class, SEEN_MANIFEST);
+        }
+        SeenManifest seenManifest = detectContext.get(SeenManifest.class);
+
+        if (seenManifest != null) {
+            if (name.equals("AndroidManifest.xml")) {
+                // Is it an Android APK?
+                return MediaType.application("vnd.android.package-archive");
+            } else if (name.equals("WEB-INF/")) {
+                // Check for WAR and EAR
+                return MediaType.application("x-tika-java-web-archive");
+            }
+            if (name.equals("META-INF/application.xml")) {
+                return MediaType.application("x-tika-java-enterprise-archive");
+            }
+        }
+        return null;
+
+    }
+
+    @Override
+    public MediaType streamingDetectFinal(StreamingDetectContext detectContext) {
+        if (detectContext.get(SeenManifest.class) != null) {
+            // Looks like a regular Jar Archive
+            return MediaType.application("java-archive");
+
+        }
+        return null;
+    }
+
+    private static class SeenManifest { }
 }
diff --git a/tika-parser-modules/tika-parser-zip-commons/src/main/java/org/apache/tika/detect/zip/KMZDetector.java b/tika-parser-modules/tika-parser-zip-commons/src/main/java/org/apache/tika/detect/zip/KMZDetector.java
index f848d6a..e2cf83a 100644
--- a/tika-parser-modules/tika-parser-zip-commons/src/main/java/org/apache/tika/detect/zip/KMZDetector.java
+++ b/tika-parser-modules/tika-parser-zip-commons/src/main/java/org/apache/tika/detect/zip/KMZDetector.java
@@ -17,14 +17,18 @@
 package org.apache.tika.detect.zip;
 
 import org.apache.commons.compress.archivers.zip.ZipArchiveEntry;
+import org.apache.commons.compress.archivers.zip.ZipArchiveInputStream;
 import org.apache.commons.compress.archivers.zip.ZipFile;
 import org.apache.tika.io.TikaInputStream;
 import org.apache.tika.mime.MediaType;
 
 import java.io.IOException;
+import java.io.InputStream;
 import java.util.Enumeration;
 
-public class KMZDetector implements ZipDetector {
+import static org.apache.tika.detect.zip.PackageConstants.KMZ;
+
+public class KMZDetector implements ZipContainerDetector {
     @Override
     public MediaType detect(ZipFile zip, TikaInputStream tis) throws IOException {
         boolean kmlFound = false;
@@ -49,4 +53,46 @@ public class KMZDetector implements ZipDetector {
             return null;
         }
     }
+
+    @Override
+    public MediaType streamingDetectUpdate(ZipArchiveEntry zae,
+                                           InputStream zis, StreamingDetectContext detectContext) {
+        String name = zae.getName();
+
+        if (name.indexOf('/') != -1
+                || name.indexOf('\\') != -1) {
+            return null;
+        }
+        if (name.endsWith(".kml")) {
+            KMLCounter counter = detectContext.get(KMLCounter.class);
+            if (counter == null) {
+                counter = new KMLCounter();
+                detectContext.set(KMLCounter.class, counter);
+            }
+            counter.increment();
+        }
+        return null;
+    }
+
+    @Override
+    public MediaType streamingDetectFinal(StreamingDetectContext detectContext) {
+        if (detectContext.get(KMLCounter.class) != null) {
+            if (detectContext.get(KMLCounter.class).getCount() == 1) {
+                return KMZ;
+            }
+        }
+        return null;
+    }
+
+    private static class KMLCounter {
+        private int cnt = 0;
+
+        int getCount() {
+            return cnt;
+        }
+
+        void increment() {
+            cnt++;
+        }
+    }
 }
diff --git a/tika-parser-modules/tika-parser-zip-commons/src/main/java/org/apache/tika/detect/zip/OpenDocumentDetector.java b/tika-parser-modules/tika-parser-zip-commons/src/main/java/org/apache/tika/detect/zip/OpenDocumentDetector.java
index e0b21d9..c7b61a0 100644
--- a/tika-parser-modules/tika-parser-zip-commons/src/main/java/org/apache/tika/detect/zip/OpenDocumentDetector.java
+++ b/tika-parser-modules/tika-parser-zip-commons/src/main/java/org/apache/tika/detect/zip/OpenDocumentDetector.java
@@ -17,6 +17,7 @@
 package org.apache.tika.detect.zip;
 
 import org.apache.commons.compress.archivers.zip.ZipArchiveEntry;
+import org.apache.commons.compress.archivers.zip.ZipArchiveInputStream;
 import org.apache.commons.compress.archivers.zip.ZipFile;
 import org.apache.tika.io.IOUtils;
 import org.apache.tika.io.TikaInputStream;
@@ -24,10 +25,11 @@ import org.apache.tika.mime.MediaType;
 
 import java.io.IOException;
 import java.io.InputStream;
+import java.nio.charset.StandardCharsets;
 
 import static java.nio.charset.StandardCharsets.UTF_8;
 
-public class OpenDocumentDetector implements ZipDetector {
+public class OpenDocumentDetector implements ZipContainerDetector {
     @Override
     public MediaType detect(ZipFile zip, TikaInputStream tis) throws IOException {
         try {
@@ -43,4 +45,22 @@ public class OpenDocumentDetector implements ZipDetector {
             return null;
         }
     }
+
+    @Override
+    public MediaType streamingDetectUpdate(ZipArchiveEntry zae, InputStream zis, StreamingDetectContext detectContext) {
+        String name = zae.getName();
+        if ("mimetype".equals(name)) {
+            try {
+                return MediaType.parse(IOUtils.toString(zis, UTF_8));
+            } catch (IOException e) {
+                return null;
+            }
+        }
+        return null;
+    }
+
+    @Override
+    public MediaType streamingDetectFinal(StreamingDetectContext detectContext) {
+        return null;
+    }
 }
diff --git a/tika-parser-modules/tika-parser-zip-commons/src/main/java/org/apache/tika/detect/zip/StarOfficeDetector.java b/tika-parser-modules/tika-parser-zip-commons/src/main/java/org/apache/tika/detect/zip/StarOfficeDetector.java
index bc484fe..bae797c 100644
--- a/tika-parser-modules/tika-parser-zip-commons/src/main/java/org/apache/tika/detect/zip/StarOfficeDetector.java
+++ b/tika-parser-modules/tika-parser-zip-commons/src/main/java/org/apache/tika/detect/zip/StarOfficeDetector.java
@@ -17,24 +17,115 @@
 package org.apache.tika.detect.zip;
 
 import org.apache.commons.compress.archivers.zip.ZipArchiveEntry;
+import org.apache.commons.compress.archivers.zip.ZipArchiveInputStream;
 import org.apache.commons.compress.archivers.zip.ZipFile;
 import org.apache.tika.io.TikaInputStream;
 import org.apache.tika.mime.MediaType;
+import org.apache.tika.parser.ParseContext;
+import org.apache.tika.sax.OfflineContentHandler;
+import org.apache.tika.sax.StoppingEarlyException;
+import org.apache.tika.utils.XMLReaderUtils;
+import org.xml.sax.Attributes;
+import org.xml.sax.SAXException;
+import org.xml.sax.helpers.DefaultHandler;
 
 import java.io.IOException;
 import java.io.InputStream;
+import java.util.HashMap;
+import java.util.Map;
+
+public class StarOfficeDetector implements ZipContainerDetector {
+
+    static final Map<String, MediaType> STAR_OFFICE_X = new HashMap<>();
+
+    static {
+        STAR_OFFICE_X.put("application/vnd.sun.xml.writer",
+                MediaType.application("vnd.sun.xml.writer"));
+        STAR_OFFICE_X.put("application/vnd.sun.xml.calc",
+                MediaType.application("vnd.sun.xml.calc"));
+        STAR_OFFICE_X.put("application/vnd.sun.xml.draw",
+                MediaType.application("vnd.sun.xml.draw"));
+        STAR_OFFICE_X.put("application/vnd.sun.xml.impress",
+                MediaType.application("vnd.sun.xml.impress"));
+        STAR_OFFICE_X.put("application/vnd.sun.star.configuration-data",
+                MediaType.application("vnd.openofficeorg.extension"));
+    }
+
+    static final MediaType BAU =
+            MediaType.application("vnd.openofficeorg.autotext");
+
 
-public class StarOfficeDetector implements ZipDetector {
     @Override
     public MediaType detect(ZipFile zip, TikaInputStream tis) throws IOException {
+
         ZipArchiveEntry zae = zip.getEntry("META-INF/manifest.xml");
 
         if (zae == null) {
             return null;
         }
+        return detectStarOfficeX(zip.getInputStream(zae));
+    }
+
+    @Override
+    public MediaType streamingDetectUpdate(ZipArchiveEntry zae, InputStream zis,
+                                           StreamingDetectContext detectContext) {
+        String name = zae.getName();
+        if (! "META-INF/manifest.xml".equals(name)) {
+            return null;
+        }
 
-        try (InputStream is = zip.getInputStream(zae)) {
-            return ZipContainerDetectorBase.detectStarOfficeX(is);
+        return detectStarOfficeX(zis);
+
+    }
+
+    @Override
+    public MediaType streamingDetectFinal(StreamingDetectContext detectContext) {
+        return null;
+    }
+
+    //parse the META-INF/content.xml file
+    static MediaType detectStarOfficeX(InputStream is) {
+        StarOfficeXHandler handler = new StarOfficeXHandler();
+        try {
+            XMLReaderUtils.parseSAX(is,
+                    new OfflineContentHandler(handler),
+                    new ParseContext());
+        } catch (SecurityException e) {
+            throw e;
+        } catch (Exception e) {
         }
+        return handler.mediaType;
     }
+
+    private static class StarOfficeXHandler extends DefaultHandler {
+
+        private MediaType mediaType = null;
+
+        @Override
+        public void startElement(String uri, String localName,
+                                 String name, Attributes attrs) throws SAXException {
+            if (! "file-entry".equals(localName)) {
+                return;
+            }
+            String mediaTypeString = null;
+            String fullPath = null;
+            for (int i = 0; i < attrs.getLength(); i++) {
+                String attrName = attrs.getLocalName(i);
+                if (attrName.equals("media-type")) {
+                    mediaTypeString = attrs.getValue(i);
+                    if (STAR_OFFICE_X.containsKey(mediaTypeString)) {
+                        mediaType = STAR_OFFICE_X.get(mediaTypeString);
+                        throw StoppingEarlyException.INSTANCE;
+                    }
+                } else if (attrName.equals("full-path")) {
+                    fullPath = attrs.getValue(i);
+                }
+            }
+            if ("".equals(mediaTypeString) && "/".equals(fullPath)) {
+                mediaType = BAU;
+                throw StoppingEarlyException.INSTANCE;
+            }
+        }
+    }
+
 }
diff --git a/tika-parser-modules/tika-parser-zip-commons/src/main/java/org/apache/tika/detect/zip/StreamingDetectContext.java b/tika-parser-modules/tika-parser-zip-commons/src/main/java/org/apache/tika/detect/zip/StreamingDetectContext.java
new file mode 100644
index 0000000..dc6e3f1
--- /dev/null
+++ b/tika-parser-modules/tika-parser-zip-commons/src/main/java/org/apache/tika/detect/zip/StreamingDetectContext.java
@@ -0,0 +1,62 @@
+package org.apache.tika.detect.zip;
+
+import java.util.HashMap;
+import java.util.Map;
+
+class StreamingDetectContext {
+
+    /** Serial version UID. */
+    private static final long serialVersionUID = -5921436862145826534L;
+
+    /** Map of objects in this context */
+    private final Map<String, Object> context = new HashMap<String, Object>();
+
+    /**
+     * Adds the given value to the context as an implementation of the given
+     * interface.
+     *
+     * @param key the interface implemented by the given value
+     * @param value the value to be added, or <code>null</code> to remove
+     */
+    public <T> void set(Class<T> key, T value) {
+        if (value != null) {
+            context.put(key.getName(), value);
+        } else {
+            context.remove(key.getName());
+        }
+    }
+
+    /**
+     * Returns the object in this context that implements the given interface.
+     *
+     * @param key the interface implemented by the requested object
+     * @return the object that implements the given interface,
+     *         or <code>null</code> if not found
+     */
+    @SuppressWarnings("unchecked")
+    public <T> T get(Class<T> key) {
+        return (T) context.get(key.getName());
+    }
+
+    /**
+     * Returns the object in this context that implements the given interface,
+     * or the given default value if such an object is not found.
+     *
+     * @param key the interface implemented by the requested object
+     * @param defaultValue value to return if the requested object is not found
+     * @return the object that implements the given interface,
+     *         or the given default value if not found
+     */
+    public <T> T get(Class<T> key, T defaultValue) {
+        T value = get(key);
+        if (value != null) {
+            return value;
+        } else {
+            return defaultValue;
+        }
+    }
+
+    public void remove(Class key) {
+        context.remove(key);
+    }
+}
diff --git a/tika-parser-modules/tika-parser-zip-commons/src/main/java/org/apache/tika/detect/zip/ZipContainerDetector.java b/tika-parser-modules/tika-parser-zip-commons/src/main/java/org/apache/tika/detect/zip/ZipContainerDetector.java
index 9b58de2..2db5e94 100644
--- a/tika-parser-modules/tika-parser-zip-commons/src/main/java/org/apache/tika/detect/zip/ZipContainerDetector.java
+++ b/tika-parser-modules/tika-parser-zip-commons/src/main/java/org/apache/tika/detect/zip/ZipContainerDetector.java
@@ -16,196 +16,50 @@
  */
 package org.apache.tika.detect.zip;
 
-import org.apache.commons.compress.archivers.ArchiveException;
-import org.apache.commons.compress.archivers.ArchiveStreamFactory;
+import org.apache.commons.compress.archivers.zip.ZipArchiveEntry;
+import org.apache.commons.compress.archivers.zip.ZipArchiveInputStream;
 import org.apache.commons.compress.archivers.zip.ZipFile;
-import org.apache.commons.compress.compressors.CompressorException;
-import org.apache.commons.compress.compressors.CompressorStreamFactory;
-import org.apache.tika.config.Field;
-import org.apache.tika.config.ServiceLoader;
-import org.apache.tika.detect.DefaultEncodingDetector;
-import org.apache.tika.detect.Detector;
-import org.apache.tika.io.IOUtils;
-import org.apache.tika.io.LookaheadInputStream;
 import org.apache.tika.io.TikaInputStream;
-import org.apache.tika.metadata.Metadata;
 import org.apache.tika.mime.MediaType;
 
-import java.io.ByteArrayInputStream;
 import java.io.IOException;
 import java.io.InputStream;
-import java.util.List;
 
-public class ZipContainerDetector implements Detector {
-
-
-
-
-    //Regrettably, some tiff files can be incorrectly identified
-    //as tar files.  We need this ugly workaround to rule out TIFF.
-    //If commons-compress ever chooses to take over TIFF detection
-    //we can remove all of this. See TIKA-2591.
-    private final static MediaType TIFF = MediaType.image("tiff");
-    private final static byte[][] TIFF_SIGNATURES = new byte[3][];
-    static {
-        TIFF_SIGNATURES[0] = new byte[]{'M','M',0x00,0x2a};
-        TIFF_SIGNATURES[1] = new byte[]{'I','I',0x2a, 0x00};
-        TIFF_SIGNATURES[2] = new byte[]{'M','M', 0x00, 0x2b};
-    }
-
-    /** Serial version UID */
-    private static final long serialVersionUID = 2891763938430295453L;
-
-    //this has to be > 100,000 to handle some of the iworks files
-    //in our unit tests
-    @Field
-    int markLimit = 16 * 1024 * 1024;
-
-    List<ZipDetector> zipDetectors;
-
-    public ZipContainerDetector() {
-        this(new ServiceLoader(DefaultEncodingDetector.class.getClassLoader()));
-    }
-
-    public ZipContainerDetector(ServiceLoader loader) {
-        this(loader.loadServiceProviders(ZipDetector.class));
-    }
+/**
+ * Classes that implement this must be able to detect on a ZipFile and in streaming mode.
+ * In streaming mode, each ziparchiventry is "updated" and then
+ * {@link #streamingDetectFinal(StreamingDetectContext)} is
+ * called for a final decision.
+ *
+ * During streaming detection, state is stored in the StreamingDetectContext
+ */
+public interface ZipContainerDetector {
 
-    public ZipContainerDetector(List<ZipDetector> zipDetectors) {
-        //OPCBased needs to be last!!!
-        this.zipDetectors = zipDetectors;
-    }
+    /**
+     * If detection is successful, the ZipDetector should set the zip
+     * file or OPCPackage in TikaInputStream.setOpenContainer()
+     * @param zipFile
+     * @param tis
+     * @return
+     * @throws IOException
+     */
+    MediaType detect(ZipFile zipFile, TikaInputStream tis) throws IOException;
 
     /**
-     * If this is less than 0, the file will be spooled to disk,
-     * and detection will run on the full file.
-     * If this is greater than 0, the {@link StreamingZipContainerDetector}
-     * will be called only up to the markLimit.
+     * Try to detect on a specific entry.  Detectors are allowed to store
+     * state (e.g. "remember what they've seen") in the {@link StreamingDetectContext}
      *
-     * @param markLimit mark limit for streaming detection
+     * @param zae
+     * @return
      */
-    public void setMarkLimit(int markLimit) {
-        this.markLimit = markLimit;
-    }
-
-    @Override
-    public MediaType detect(InputStream input, Metadata metadata) throws IOException {
-        // Check if we have access to the document
-        if (input == null) {
-            return MediaType.OCTET_STREAM;
-        }
-
-        byte[] prefix = new byte[1024]; // enough for all known archive formats
-        input.mark(1024);
-        int length = -1;
-        try {
-            length = IOUtils.read(input, prefix, 0, 1024);
-        } finally {
-            input.reset();
-        }
-
-        MediaType type = detectArchiveFormat(prefix, length);
-
-        if (type == TIFF) {
-            return TIFF;
-        } else if (isZipArchive(type)) {
-
-            if (TikaInputStream.isTikaInputStream(input)) {
-                TikaInputStream tis = TikaInputStream.cast(input);
-                if (markLimit < 0) {
-                    tis.getFile();
-                }
-                if (tis.hasFile()) {
-                    return detectZipFormatOnFile(tis);
-                }
-            }
-
-            try (LookaheadInputStream lookahead = new LookaheadInputStream(input, markLimit)) {
-                //TODO: figure out this one
-                //return streamingZipContainerDetector.detect(lookahead, metadata);
-            }
-        } else if (!type.equals(MediaType.OCTET_STREAM)) {
-            return type;
-        } else {
-            return detectCompressorFormat(prefix, length);
-        }
-        return PackageConstants.ZIP;
-    }
+    MediaType streamingDetectUpdate(ZipArchiveEntry zae, InputStream zis, StreamingDetectContext detectContext);
 
     /**
-     * This will call TikaInputStream's getFile(). If there are no exceptions,
-     * it will place the ZipFile in TikaInputStream's openContainer and leave it
-     * open.
-     * @param tis
+     * After we've finished streaming the zip archive entries,
+     * a detector may make a final decision.
+     *
      * @return
      */
-    private MediaType detectZipFormatOnFile(TikaInputStream tis) {
-        try {
-            ZipFile zip = new ZipFile(tis.getFile()); // TODO: hasFile()?
-
-            try{
-            for (ZipDetector zipDetector : zipDetectors) {
-                MediaType type = zipDetector.detect(zip, tis);
-                if (type != null) {
-                    return type;
-                }
-            }
-            } finally {
-                tis.setOpenContainer(zip);
-            }
-
-        } catch (IOException e) {
-            // ignore
-        }
-        // Fallback: it's still a zip file, we just don't know what kind of one
-        return MediaType.APPLICATION_ZIP;
-    }
-
-
-    static boolean isZipArchive(MediaType type) {
-        return type.equals(PackageConstants.ZIP)
-                || type.equals(PackageConstants.JAR);
-    }
-
-    private static boolean isTiff(byte[] prefix) {
-        for (byte[] sig : TIFF_SIGNATURES) {
-            if(arrayStartWith(sig, prefix)) {
-                return true;
-            }
-        }
-        return false;
-    }
-
-    private static boolean arrayStartWith(byte[] needle, byte[] haystack) {
-        if (haystack.length < needle.length) {
-            return false;
-        }
-        for (int i = 0; i < needle.length; i++) {
-            if (haystack[i] != needle[i]) {
-                return false;
-            }
-        }
-        return true;
-    }
-
-    private static MediaType detectArchiveFormat(byte[] prefix, int length) {
-        if (isTiff(prefix)) {
-            return TIFF;
-        }
-        try {
-            String name = ArchiveStreamFactory.detect(new ByteArrayInputStream(prefix, 0, length));
-            return PackageConstants.getMediaType(name);
-        } catch (ArchiveException e) {
-            return MediaType.OCTET_STREAM;
-        }
-    }
+    MediaType streamingDetectFinal(StreamingDetectContext detectContext);
 
-    private static MediaType detectCompressorFormat(byte[] prefix, int length) {
-        try {
-            String type = CompressorStreamFactory.detect(new ByteArrayInputStream(prefix, 0, length));
-            return CompressorConstants.getMediaType(type);
-        } catch (CompressorException e) {
-            return MediaType.OCTET_STREAM;
-        }
-    }
 }
diff --git a/tika-parser-modules/tika-parser-zip-commons/src/main/java/org/apache/tika/detect/zip/ZipContainerDetectorBase.java b/tika-parser-modules/tika-parser-zip-commons/src/main/java/org/apache/tika/detect/zip/ZipContainerDetectorBase.java
index b9e324e..db38843 100644
--- a/tika-parser-modules/tika-parser-zip-commons/src/main/java/org/apache/tika/detect/zip/ZipContainerDetectorBase.java
+++ b/tika-parser-modules/tika-parser-zip-commons/src/main/java/org/apache/tika/detect/zip/ZipContainerDetectorBase.java
@@ -74,8 +74,6 @@ abstract class ZipContainerDetectorBase {
     static final MediaType XPS =
             MediaType.application("vnd.ms-xpsdocument");
 
-    static final MediaType BAU =
-            MediaType.application("vnd.openofficeorg.autotext");
 
     static final Set<String> OOXML_HINTS = fillSet(
             "word/document.xml",
@@ -88,20 +86,9 @@ abstract class ZipContainerDetectorBase {
             "xl/worksheets/sheet1.xml"
     );
 
-    static final Map<String, MediaType> STAR_OFFICE_X = new HashMap<>();
-
-    static {
-        STAR_OFFICE_X.put("application/vnd.sun.xml.writer",
-                MediaType.application("vnd.sun.xml.writer"));
-        STAR_OFFICE_X.put("application/vnd.sun.xml.calc",
-                MediaType.application("vnd.sun.xml.calc"));
-        STAR_OFFICE_X.put("application/vnd.sun.xml.draw",
-                MediaType.application("vnd.sun.xml.draw"));
-        STAR_OFFICE_X.put("application/vnd.sun.xml.impress",
-                MediaType.application("vnd.sun.xml.impress"));
-        STAR_OFFICE_X.put("application/vnd.sun.star.configuration-data",
-                MediaType.application("vnd.openofficeorg.extension"));
-    }
+
+
+
     private static Set<String> fillSet(String ... args) {
         Set<String> tmp = new HashSet<>();
         for (String arg : args) {
@@ -154,7 +141,6 @@ abstract class ZipContainerDetectorBase {
             }
         }
         if (kmlFound) {
-            return MediaType.application("vnd.google-earth.kmz");
         }
         return null;
     }
@@ -193,55 +179,6 @@ abstract class ZipContainerDetectorBase {
         return null;
     }
 
-    //parse the META-INF/content.xml file
-    static MediaType detectStarOfficeX(InputStream is) {
-        StarOfficeXHandler handler = new StarOfficeXHandler();
-        try {
-            XMLReaderUtils.parseSAX(is,
-                    new OfflineContentHandler(handler),
-                    new ParseContext());
-        } catch (SecurityException e) {
-            throw e;
-        } catch (Exception e) {
-        }
-        return handler.mediaType;
-    }
-
-    private static class StarOfficeXHandler extends DefaultHandler {
-
-        private MediaType mediaType = null;
-
-        @Override
-        public void startElement(String uri, String localName,
-                                 String name, Attributes attrs) throws SAXException {
-            if (! "file-entry".equals(localName)) {
-                return;
-            }
-            String mediaTypeString = null;
-            String fullPath = null;
-            for (int i = 0; i < attrs.getLength(); i++) {
-                String attrName = attrs.getLocalName(i);
-                if (attrName.equals("media-type")) {
-                    mediaTypeString = attrs.getValue(i);
-                    if (STAR_OFFICE_X.containsKey(mediaTypeString)) {
-                        mediaType = STAR_OFFICE_X.get(mediaTypeString);
-                        throw new StoppingEarlyException();
-                    }
-                } else if (attrName.equals("full-path")) {
-                    fullPath = attrs.getValue(i);
-                }
-            }
-            if ("".equals(mediaTypeString) && "/".equals(fullPath)) {
-                mediaType = BAU;
-                throw new StoppingEarlyException();
-            }
-        }
-    }
 
-    /**
-     * sentinel exception to stop parsing xml once target is found
-     */
-    static class StoppingEarlyException extends SAXException {
 
-    }
 }
diff --git a/tika-parser-modules/tika-parser-zip-commons/src/main/resources/META-INF/services/org.apache.tika.detect.Detector b/tika-parser-modules/tika-parser-zip-commons/src/main/resources/META-INF/services/org.apache.tika.detect.Detector
new file mode 100644
index 0000000..bada2c9
--- /dev/null
+++ b/tika-parser-modules/tika-parser-zip-commons/src/main/resources/META-INF/services/org.apache.tika.detect.Detector
@@ -0,0 +1,15 @@
+#  Licensed to the Apache Software Foundation (ASF) under one or more
+#  contributor license agreements.  See the NOTICE file distributed with
+#  this work for additional information regarding copyright ownership.
+#  The ASF licenses this file to You under the Apache License, Version 2.0
+#  (the "License"); you may not use this file except in compliance with
+#  the License.  You may obtain a copy of the License at
+#
+#       http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+org.apache.tika.detect.zip.DefaultZipContainerDetector
\ No newline at end of file
diff --git a/tika-parser-modules/tika-parser-zip-commons/src/main/resources/META-INF/services/org.apache.tika.detect.zip.ZipContainerDetector b/tika-parser-modules/tika-parser-zip-commons/src/main/resources/META-INF/services/org.apache.tika.detect.zip.ZipContainerDetector
new file mode 100644
index 0000000..2f26d0c
--- /dev/null
+++ b/tika-parser-modules/tika-parser-zip-commons/src/main/resources/META-INF/services/org.apache.tika.detect.zip.ZipContainerDetector
@@ -0,0 +1,19 @@
+#  Licensed to the Apache Software Foundation (ASF) under one or more
+#  contributor license agreements.  See the NOTICE file distributed with
+#  this work for additional information regarding copyright ownership.
+#  The ASF licenses this file to You under the Apache License, Version 2.0
+#  (the "License"); you may not use this file except in compliance with
+#  the License.  You may obtain a copy of the License at
+#
+#       http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+org.apache.tika.detect.zip.IPADetector
+org.apache.tika.detect.zip.JarDetector
+org.apache.tika.detect.zip.KMZDetector
+org.apache.tika.detect.zip.OpenDocumentDetector
+org.apache.tika.detect.zip.StarOfficeDetector
\ No newline at end of file
diff --git a/tika-parser-modules/tika-parser-zip-commons/src/main/resources/META-INF/services/org.apache.tika.detect.zip.ZipDetector b/tika-parser-modules/tika-parser-zip-commons/src/main/resources/META-INF/services/org.apache.tika.detect.zip.ZipDetector
deleted file mode 100644
index 405749b..0000000
--- a/tika-parser-modules/tika-parser-zip-commons/src/main/resources/META-INF/services/org.apache.tika.detect.zip.ZipDetector
+++ /dev/null
@@ -1,5 +0,0 @@
-org.apache.tika.detect.zip.IPADetector
-org.apache.tika.detect.zip.JarDetector
-org.apache.tika.detect.zip.KMZDetector
-org.apache.tika.detect.zip.OpenDocumentDetector
-org.apache.tika.detect.zip.StarOfficeDetector
\ No newline at end of file
diff --git a/tika-parser-modules/tika-parser-zip-commons/src/main/java/org/apache/tika/detect/zip/StarOfficeDetector.java b/tika-parser-modules/tika-parser-zip-commons/src/test/java/org/apache/tika/detect/zip/ZipParserTest.java
similarity index 51%
copy from tika-parser-modules/tika-parser-zip-commons/src/main/java/org/apache/tika/detect/zip/StarOfficeDetector.java
copy to tika-parser-modules/tika-parser-zip-commons/src/test/java/org/apache/tika/detect/zip/ZipParserTest.java
index bc484fe..c00ff45 100644
--- a/tika-parser-modules/tika-parser-zip-commons/src/main/java/org/apache/tika/detect/zip/StarOfficeDetector.java
+++ b/tika-parser-modules/tika-parser-zip-commons/src/test/java/org/apache/tika/detect/zip/ZipParserTest.java
@@ -16,25 +16,30 @@
  */
 package org.apache.tika.detect.zip;
 
-import org.apache.commons.compress.archivers.zip.ZipArchiveEntry;
-import org.apache.commons.compress.archivers.zip.ZipFile;
-import org.apache.tika.io.TikaInputStream;
-import org.apache.tika.mime.MediaType;
+import org.apache.tika.TikaTest;
+import org.apache.tika.metadata.HttpHeaders;
+import org.apache.tika.metadata.Metadata;
+import org.junit.Test;
 
-import java.io.IOException;
-import java.io.InputStream;
+import java.util.List;
 
-public class StarOfficeDetector implements ZipDetector {
-    @Override
-    public MediaType detect(ZipFile zip, TikaInputStream tis) throws IOException {
-        ZipArchiveEntry zae = zip.getEntry("META-INF/manifest.xml");
+import static org.junit.Assert.assertEquals;
 
-        if (zae == null) {
-            return null;
-        }
+/**
+ * Test case for parsing zip files.
+ */
+public class ZipParserTest extends TikaTest {
+
+
+    @Test
+    public void testKMZDetection() throws Exception {
+        List<Metadata> metadataList = getRecursiveMetadata("testKMZ.kmz");
+        assertEquals("application/vnd.google-earth.kmz", metadataList.get(0).get(HttpHeaders.CONTENT_TYPE));
+    }
 
-        try (InputStream is = zip.getInputStream(zae)) {
-            return ZipContainerDetectorBase.detectStarOfficeX(is);
-        }
+    @Test
+    public void testJARDetection() throws Exception {
+        List<Metadata> metadataList = getRecursiveMetadata("testJAR.jar");
+        assertEquals("application/java-archive", metadataList.get(0).get(HttpHeaders.CONTENT_TYPE));
     }
 }
diff --git a/tika-parsers/src/test/resources/test-documents/testJAR.jar b/tika-parser-modules/tika-parser-zip-commons/src/test/resources/test-documents/testJAR.jar
similarity index 100%
rename from tika-parsers/src/test/resources/test-documents/testJAR.jar
rename to tika-parser-modules/tika-parser-zip-commons/src/test/resources/test-documents/testJAR.jar
diff --git a/tika-parser-modules/tika-parser-pkg-module/src/test/resources/test-documents/testKMZ.kmz b/tika-parser-modules/tika-parser-zip-commons/src/test/resources/test-documents/testKMZ.kmz
similarity index 100%
rename from tika-parser-modules/tika-parser-pkg-module/src/test/resources/test-documents/testKMZ.kmz
rename to tika-parser-modules/tika-parser-zip-commons/src/test/resources/test-documents/testKMZ.kmz
diff --git a/tika-parsers/src/test/java/org/apache/tika/parser/fork/ForkParserIntegrationTest.java b/tika-parsers/src/test/java/org/apache/tika/parser/fork/ForkParserIntegrationTest.java
index ce3d638..2dfac4f 100644
--- a/tika-parsers/src/test/java/org/apache/tika/parser/fork/ForkParserIntegrationTest.java
+++ b/tika-parsers/src/test/java/org/apache/tika/parser/fork/ForkParserIntegrationTest.java
@@ -31,10 +31,8 @@ import java.util.Arrays;
 import java.util.HashSet;
 import java.util.Set;
 
-import org.apache.commons.io.filefilter.TrueFileFilter;
 import org.apache.tika.MultiThreadedTikaTest;
 import org.apache.tika.Tika;
-import org.apache.tika.TikaTest;
 import org.apache.tika.detect.Detector;
 import org.apache.tika.exception.TikaException;
 import org.apache.tika.fork.ForkParser;
@@ -272,7 +270,7 @@ public class ForkParserIntegrationTest extends MultiThreadedTikaTest {
         try (ForkParser parser = new ForkParser(ForkParserIntegrationTest.class.getClassLoader(), tika.getParser())) {
             ContentHandler output = new BodyContentHandler();
             InputStream stream = ForkParserIntegrationTest.class.getResourceAsStream(
-                "/test-documents/moby.zip");
+                    "/test-documents/moby.zip");
             ParseContext context = new ParseContext();
             parser.parse(stream, output, new Metadata(), context);
             assertContains("Moby Dick", output.toString());