You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ta...@apache.org on 2017/06/01 19:16:32 UTC

[tika] 05/05: TIKA-2341 -- upgrade commons-compress to 1.14, added capabilities for snappy and lz4-framed

This is an automated email from the ASF dual-hosted git repository.

tallison pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/tika.git

commit 77900ab626a2a05700cadf46f090966295c29149
Author: tballison <ta...@mitre.org>
AuthorDate: Thu Jun 1 15:16:09 2017 -0400

    TIKA-2341 -- upgrade commons-compress to 1.14, added capabilities for snappy and lz4-framed
---
 LICENSE.txt                                        |  22 +
 tika-bundle/pom.xml                                |   1 +
 tika-parent/pom.xml                                |   2 +-
 .../apache/tika/parser/pkg/CompressorParser.java   |  56 +-
 .../org/apache/tika/parser/pkg/PackageParser.java  |  16 +-
 .../tika/parser/pkg/TikaArchiveStreamFactory.java  | 565 ---------------------
 .../parser/pkg/TikaCompressorStreamFactory.java    | 551 --------------------
 .../tika/parser/pkg/ZipContainerDetector.java      |   6 +-
 .../tika/parser/pkg/CompressorParserTest.java      |  30 +-
 .../apache/tika/parser/pkg/PackageParserTest.java  |   3 +-
 .../resources/test-documents/testLZ4-framed.lz4    | Bin 0 -> 1443 bytes
 .../resources/test-documents/testSnappy-framed.sz  | Bin 0 -> 58586 bytes
 12 files changed, 105 insertions(+), 1147 deletions(-)

diff --git a/LICENSE.txt b/LICENSE.txt
index e3cd6ff..b9b6fcf 100644
--- a/LICENSE.txt
+++ b/LICENSE.txt
@@ -386,3 +386,25 @@ H2 Database in tika-eval
     (Mozilla Public License) or under the EPL 1.0 (Eclipse Public License).
     An original copy of the license agreement can be found at:
     http://www.h2database.com/html/license.html
+
+org.brotli.dec dependency of commons-compress (MIT License)
+
+Copyright (c) 2009, 2010, 2013-2016 by the Brotli Authors.
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
\ No newline at end of file
diff --git a/tika-bundle/pom.xml b/tika-bundle/pom.xml
index 5f70dcb..b67d1b5 100644
--- a/tika-bundle/pom.xml
+++ b/tika-bundle/pom.xml
@@ -239,6 +239,7 @@
               org.bouncycastle.operator;resolution:=optional,
               org.bouncycastle.operator.bc;resolution:=optional,
               org.bouncycastle.tsp;resolution:=optional,
+              org.brotli.dec;resolution:=optional,
               org.cyberneko.html.xercesbridge;resolution:=optional,
               org.etsi.uri.x01903.v14;resolution:=optional,
               org.ibex.nestedvm;resolution:=optional,
diff --git a/tika-parent/pom.xml b/tika-parent/pom.xml
index 525b26d..dfb8671 100644
--- a/tika-parent/pom.xml
+++ b/tika-parent/pom.xml
@@ -306,7 +306,7 @@
     <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
     <project.reporting.outputEncoding>${project.build.sourceEncoding}</project.reporting.outputEncoding>
     <!-- NOTE: sync tukaani version with commons-compress in tika-parsers -->
-    <commons.compress.version>1.13</commons.compress.version>
+    <commons.compress.version>1.14</commons.compress.version>
     <commons.io.version>2.5</commons.io.version>
     <slf4j.version>1.7.24</slf4j.version>
   </properties>
diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/pkg/CompressorParser.java b/tika-parsers/src/main/java/org/apache/tika/parser/pkg/CompressorParser.java
index ff589e0..48f8bec 100644
--- a/tika-parsers/src/main/java/org/apache/tika/parser/pkg/CompressorParser.java
+++ b/tika-parsers/src/main/java/org/apache/tika/parser/pkg/CompressorParser.java
@@ -23,8 +23,10 @@ import java.io.IOException;
 import java.io.InputStream;
 import java.util.Set;
 
+import org.apache.commons.compress.MemoryLimitException;
 import org.apache.commons.compress.compressors.CompressorException;
 import org.apache.commons.compress.compressors.CompressorInputStream;
+import org.apache.commons.compress.compressors.CompressorStreamFactory;
 import org.apache.commons.compress.compressors.bzip2.BZip2CompressorInputStream;
 import org.apache.commons.compress.compressors.deflate.DeflateCompressorInputStream;
 import org.apache.commons.compress.compressors.gzip.GzipCompressorInputStream;
@@ -57,6 +59,11 @@ public class CompressorParser extends AbstractParser {
     /** Serial version UID */
     private static final long serialVersionUID = 2793565792967222459L;
 
+    private static final MediaType BROTLI = MediaType.application("x-brotli");
+    private static final MediaType LZ4_BLOCK = MediaType.application("x-lz4-block");
+    private static final MediaType SNAPPY_RAW = MediaType.application("x-snappy-raw");
+
+
     private static final MediaType BZIP = MediaType.application("x-bzip");
     private static final MediaType BZIP2 = MediaType.application("x-bzip2");
     private static final MediaType GZIP = MediaType.application("gzip");
@@ -64,15 +71,24 @@ public class CompressorParser extends AbstractParser {
     private static final MediaType COMPRESS = MediaType.application("x-compress");
     private static final MediaType XZ = MediaType.application("x-xz");
     private static final MediaType PACK = MediaType.application("x-java-pack200");
-    private static final MediaType SNAPPY = MediaType.application("x-snappy-framed");
+    private static final MediaType SNAPPY_FRAMED = MediaType.application("x-snappy");
     private static final MediaType ZLIB = MediaType.application("zlib");
     private static final MediaType LZMA = MediaType.application("x-lzma");
+    private static final MediaType LZ4_FRAMED = MediaType.application("x-lz4");
 
     private static final Set<MediaType> SUPPORTED_TYPES =
-            MediaType.set(BZIP, BZIP2, GZIP, GZIP_ALT, COMPRESS, XZ, PACK, ZLIB, LZMA);
+            MediaType.set(BZIP, BZIP2, GZIP, GZIP_ALT, LZ4_FRAMED, COMPRESS,
+                    XZ, PACK, SNAPPY_FRAMED, ZLIB, LZMA);
 
     private int memoryLimitInKb = 100000;//100MB
 
+    /**
+     *
+     * @deprecated use {@link #getMediaType(String)}
+     * @param stream stream
+     * @return MediaType
+     */
+    @Deprecated
     static MediaType getMediaType(CompressorInputStream stream) {
         // TODO Add support for the remaining CompressorInputStream formats:
         //   LZ4
@@ -92,7 +108,7 @@ public class CompressorParser extends AbstractParser {
         } else if (stream instanceof FramedSnappyCompressorInputStream ||
                    stream instanceof SnappyCompressorInputStream) {
             // TODO Add unit tests for this format
-            return SNAPPY;
+            return SNAPPY_FRAMED;
         } else if (stream instanceof LZMACompressorInputStream) {
             return LZMA;
         } else {
@@ -101,26 +117,34 @@ public class CompressorParser extends AbstractParser {
     }
 
     static MediaType getMediaType(String name) {
-        if (TikaCompressorStreamFactory.BZIP2.equals(name)) {
+        if (CompressorStreamFactory.BROTLI.equals(name)) {
+            return BROTLI;
+        } else if (CompressorStreamFactory.LZ4_BLOCK.equals(name)) {
+            return LZ4_BLOCK;
+        } else if (CompressorStreamFactory.LZ4_FRAMED.equals(name)) {
+            return LZ4_FRAMED;
+        } else if (CompressorStreamFactory.BZIP2.equals(name)) {
             return BZIP2;
-        } else if (TikaCompressorStreamFactory.GZIP.equals(name)) {
+        } else if (CompressorStreamFactory.GZIP.equals(name)) {
             return GZIP;
-        } else if (TikaCompressorStreamFactory.XZ.equals(name)) {
+        } else if (CompressorStreamFactory.XZ.equals(name)) {
             return XZ;
-        } else if (TikaCompressorStreamFactory.DEFLATE.equals(name)) {
+        } else if (CompressorStreamFactory.DEFLATE.equals(name)) {
             return ZLIB;
-        } else if (TikaCompressorStreamFactory.Z.equals(name)) {
+        } else if (CompressorStreamFactory.Z.equals(name)) {
             return COMPRESS;
-        } else if (TikaCompressorStreamFactory.PACK200.equals(name)) {
+        } else if (CompressorStreamFactory.PACK200.equals(name)) {
             return PACK;
-        } else if (TikaCompressorStreamFactory.SNAPPY_FRAMED.equals(name) ||
-                TikaCompressorStreamFactory.SNAPPY_RAW.equals(name)) {
-            return SNAPPY;
-        } else if (TikaCompressorStreamFactory.LZMA.equals(name)) {
+        } else if (CompressorStreamFactory.SNAPPY_FRAMED.equals(name)) {
+            return SNAPPY_FRAMED;
+        } else if (CompressorStreamFactory.SNAPPY_RAW.equals(name)) {
+            return SNAPPY_RAW;
+        } else if (CompressorStreamFactory.LZMA.equals(name)) {
             return LZMA;
         } else {
             return MediaType.OCTET_STREAM;
         }
+
     }
 
     public Set<MediaType> getSupportedTypes(ParseContext context) {
@@ -149,11 +173,11 @@ public class CompressorParser extends AbstractParser {
                          return false;
                      }
                  });
-            TikaCompressorStreamFactory factory =
-                    new TikaCompressorStreamFactory(options.decompressConcatenated(metadata), memoryLimitInKb);
+            CompressorStreamFactory factory =
+                    new CompressorStreamFactory(options.decompressConcatenated(metadata), memoryLimitInKb);
             cis = factory.createCompressorInputStream(stream);
         } catch (CompressorException e) {
-            if (e.getMessage() != null && e.getMessage().startsWith("MemoryLimitException:")) {
+            if (e.getCause() != null && e.getCause() instanceof MemoryLimitException) {
                 throw new TikaMemoryLimitException(e.getMessage());
             }
             throw new TikaException("Unable to uncompress document stream", e);
diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/pkg/PackageParser.java b/tika-parsers/src/main/java/org/apache/tika/parser/pkg/PackageParser.java
index 119c2e6..d8341af 100644
--- a/tika-parsers/src/main/java/org/apache/tika/parser/pkg/PackageParser.java
+++ b/tika-parsers/src/main/java/org/apache/tika/parser/pkg/PackageParser.java
@@ -116,21 +116,21 @@ public class PackageParser extends AbstractParser {
     }
 
     static MediaType getMediaType(String name) {
-        if (TikaArchiveStreamFactory.JAR.equals(name)) {
+        if (ArchiveStreamFactory.JAR.equals(name)) {
             return JAR;
-        } else if (TikaArchiveStreamFactory.ZIP.equals(name)) {
+        } else if (ArchiveStreamFactory.ZIP.equals(name)) {
             return ZIP;
-        } else if (TikaArchiveStreamFactory.AR.equals(name)) {
+        } else if (ArchiveStreamFactory.AR.equals(name)) {
             return AR;
-        } else if (TikaArchiveStreamFactory.ARJ.equals(name)) {
+        } else if (ArchiveStreamFactory.ARJ.equals(name)) {
             return ARJ;
-        } else if (TikaArchiveStreamFactory.CPIO.equals(name)) {
+        } else if (ArchiveStreamFactory.CPIO.equals(name)) {
             return CPIO;
-        } else if (TikaArchiveStreamFactory.DUMP.equals(name)) {
+        } else if (ArchiveStreamFactory.DUMP.equals(name)) {
             return DUMP;
-        } else if (TikaArchiveStreamFactory.TAR.equals(name)) {
+        } else if (ArchiveStreamFactory.TAR.equals(name)) {
             return TAR;
-        } else if (TikaArchiveStreamFactory.SEVEN_Z.equals(name)) {
+        } else if (ArchiveStreamFactory.SEVEN_Z.equals(name)) {
             return SEVENZ;
         } else {
             return MediaType.OCTET_STREAM;
diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/pkg/TikaArchiveStreamFactory.java b/tika-parsers/src/main/java/org/apache/tika/parser/pkg/TikaArchiveStreamFactory.java
deleted file mode 100644
index c4b534f..0000000
--- a/tika-parsers/src/main/java/org/apache/tika/parser/pkg/TikaArchiveStreamFactory.java
+++ /dev/null
@@ -1,565 +0,0 @@
-package org.apache.tika.parser.pkg;
-    /*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-import java.io.ByteArrayInputStream;
-import java.io.IOException;
-import java.io.InputStream;
-import java.io.OutputStream;
-import java.security.AccessController;
-import java.security.PrivilegedAction;
-import java.util.ArrayList;
-import java.util.Collections;
-import java.util.Iterator;
-import java.util.Locale;
-import java.util.Set;
-import java.util.SortedMap;
-import java.util.TreeMap;
-
-import org.apache.commons.compress.archivers.ArchiveException;
-import org.apache.commons.compress.archivers.ArchiveInputStream;
-import org.apache.commons.compress.archivers.ArchiveOutputStream;
-import org.apache.commons.compress.archivers.ArchiveStreamFactory;
-import org.apache.commons.compress.archivers.ArchiveStreamProvider;
-import org.apache.commons.compress.archivers.StreamingNotSupportedException;
-import org.apache.commons.compress.archivers.ar.ArArchiveInputStream;
-import org.apache.commons.compress.archivers.ar.ArArchiveOutputStream;
-import org.apache.commons.compress.archivers.arj.ArjArchiveInputStream;
-import org.apache.commons.compress.archivers.cpio.CpioArchiveInputStream;
-import org.apache.commons.compress.archivers.cpio.CpioArchiveOutputStream;
-import org.apache.commons.compress.archivers.dump.DumpArchiveInputStream;
-import org.apache.commons.compress.archivers.jar.JarArchiveInputStream;
-import org.apache.commons.compress.archivers.jar.JarArchiveOutputStream;
-import org.apache.commons.compress.archivers.sevenz.SevenZFile;
-import org.apache.commons.compress.archivers.tar.TarArchiveInputStream;
-import org.apache.commons.compress.archivers.tar.TarArchiveOutputStream;
-import org.apache.commons.compress.archivers.zip.ZipArchiveInputStream;
-import org.apache.commons.compress.archivers.zip.ZipArchiveOutputStream;
-import org.apache.commons.compress.utils.IOUtils;
-import org.apache.commons.compress.utils.Lists;
-import org.apache.commons.compress.utils.ServiceLoaderIterator;
-import org.apache.commons.compress.utils.Sets;
-
-/**
- * This is a temporary copy/paste hack from commons-compress for Tika 1.15
- * that 1) allows detection without initialization of a stream and
- * 2) prevents easily preventable OOM on three file formats.
- *
- * Once commons-compress 1.14 is released, we will delete this class
- * and go back to commons-compress's CompressorStreamFactory.
- */
-@Deprecated
-class TikaArchiveStreamFactory implements ArchiveStreamProvider {
-
-    private static final int TAR_HEADER_SIZE = 512;
-
-    private static final int DUMP_SIGNATURE_SIZE = 32;
-
-    private static final int SIGNATURE_SIZE = 12;
-
-    private static final ArchiveStreamFactory SINGLETON = new ArchiveStreamFactory();
-
-    /**
-     * Constant (value {@value}) used to identify the AR archive format.
-     * @since 1.1
-     */
-    public static final String AR = "ar";
-
-    /**
-     * Constant (value {@value}) used to identify the ARJ archive format.
-     * Not supported as an output stream type.
-     * @since 1.6
-     */
-    public static final String ARJ = "arj";
-
-    /**
-     * Constant (value {@value}) used to identify the CPIO archive format.
-     * @since 1.1
-     */
-    public static final String CPIO = "cpio";
-
-    /**
-     * Constant (value {@value}) used to identify the Unix DUMP archive format.
-     * Not supported as an output stream type.
-     * @since 1.3
-     */
-    public static final String DUMP = "dump";
-
-    /**
-     * Constant (value {@value}) used to identify the JAR archive format.
-     * @since 1.1
-     */
-    public static final String JAR = "jar";
-
-    /**
-     * Constant used to identify the TAR archive format.
-     * @since 1.1
-     */
-    public static final String TAR = "tar";
-
-    /**
-     * Constant (value {@value}) used to identify the ZIP archive format.
-     * @since 1.1
-     */
-    public static final String ZIP = "zip";
-
-    /**
-     * Constant (value {@value}) used to identify the 7z archive format.
-     * @since 1.8
-     */
-    public static final String SEVEN_Z = "7z";
-
-    /**
-     * Entry encoding, null for the platform default.
-     */
-    private final String encoding;
-
-    /**
-     * Entry encoding, null for the default.
-     */
-    private volatile String entryEncoding;
-
-    private SortedMap<String, ArchiveStreamProvider> archiveInputStreamProviders;
-
-    private SortedMap<String, ArchiveStreamProvider> archiveOutputStreamProviders;
-
-    private static ArrayList<ArchiveStreamProvider> findArchiveStreamProviders() {
-        return Lists.newArrayList(serviceLoaderIterator());
-    }
-
-    static void putAll(Set<String> names, ArchiveStreamProvider provider,
-                       TreeMap<String, ArchiveStreamProvider> map) {
-        for (String name : names) {
-            map.put(toKey(name), provider);
-        }
-    }
-
-    private static Iterator<ArchiveStreamProvider> serviceLoaderIterator() {
-        return new ServiceLoaderIterator<>(ArchiveStreamProvider.class);
-    }
-
-    private static String toKey(final String name) {
-        return name.toUpperCase(Locale.ROOT);
-    }
-
-    /**
-     * Constructs a new sorted map from input stream provider names to provider
-     * objects.
-     *
-     * <p>
-     * The map returned by this method will have one entry for each provider for
-     * which support is available in the current Java virtual machine. If two or
-     * more supported provider have the same name then the resulting map will
-     * contain just one of them; which one it will contain is not specified.
-     * </p>
-     *
-     * <p>
-     * The invocation of this method, and the subsequent use of the resulting
-     * map, may cause time-consuming disk or network I/O operations to occur.
-     * This method is provided for applications that need to enumerate all of
-     * the available providers, for example to allow user provider selection.
-     * </p>
-     *
-     * <p>
-     * This method may return different results at different times if new
-     * providers are dynamically made available to the current Java virtual
-     * machine.
-     * </p>
-     *
-     * @return An immutable, map from names to provider objects
-     * @since 1.13
-     */
-    public static SortedMap<String, ArchiveStreamProvider> findAvailableArchiveInputStreamProviders() {
-        return AccessController.doPrivileged(new PrivilegedAction<SortedMap<String, ArchiveStreamProvider>>() {
-            @Override
-            public SortedMap<String, ArchiveStreamProvider> run() {
-                TreeMap<String, ArchiveStreamProvider> map = new TreeMap<>();
-                putAll(SINGLETON.getInputStreamArchiveNames(), SINGLETON, map);
-                for (ArchiveStreamProvider provider : findArchiveStreamProviders()) {
-                    putAll(provider.getInputStreamArchiveNames(), provider, map);
-                }
-                return map;
-            }
-        });
-    }
-
-    /**
-     * Constructs a new sorted map from output stream provider names to provider
-     * objects.
-     *
-     * <p>
-     * The map returned by this method will have one entry for each provider for
-     * which support is available in the current Java virtual machine. If two or
-     * more supported provider have the same name then the resulting map will
-     * contain just one of them; which one it will contain is not specified.
-     * </p>
-     *
-     * <p>
-     * The invocation of this method, and the subsequent use of the resulting
-     * map, may cause time-consuming disk or network I/O operations to occur.
-     * This method is provided for applications that need to enumerate all of
-     * the available providers, for example to allow user provider selection.
-     * </p>
-     *
-     * <p>
-     * This method may return different results at different times if new
-     * providers are dynamically made available to the current Java virtual
-     * machine.
-     * </p>
-     *
-     * @return An immutable, map from names to provider objects
-     * @since 1.13
-     */
-    public static SortedMap<String, ArchiveStreamProvider> findAvailableArchiveOutputStreamProviders() {
-        return AccessController.doPrivileged(new PrivilegedAction<SortedMap<String, ArchiveStreamProvider>>() {
-            @Override
-            public SortedMap<String, ArchiveStreamProvider> run() {
-                TreeMap<String, ArchiveStreamProvider> map = new TreeMap<>();
-                putAll(SINGLETON.getOutputStreamArchiveNames(), SINGLETON, map);
-                for (ArchiveStreamProvider provider : findArchiveStreamProviders()) {
-                    putAll(provider.getOutputStreamArchiveNames(), provider, map);
-                }
-                return map;
-            }
-        });
-    }
-
-
-    /**
-     * Create an instance using the specified encoding.
-     *
-     * @param encoding the encoding to be used.
-     *
-     * @since 1.10
-     */
-    public TikaArchiveStreamFactory(final String encoding) {
-        super();
-        this.encoding = encoding;
-        // Also set the original field so can continue to use it.
-        this.entryEncoding = encoding;
-    }
-
-    /**
-     * Returns the encoding to use for arj, jar, zip, dump, cpio and tar
-     * files, or null for the archiver default.
-     *
-     * @return entry encoding, or null for the archiver default
-     * @since 1.5
-     */
-    public String getEntryEncoding() {
-        return entryEncoding;
-    }
-
-    /**
-     * Sets the encoding to use for arj, jar, zip, dump, cpio and tar files. Use null for the archiver default.
-     *
-     * @param entryEncoding the entry encoding, null uses the archiver default.
-     * @since 1.5
-     * @deprecated 1.10 use {@link #TikaArchiveStreamFactory(String)} to specify the encoding
-     * @throws IllegalStateException if the constructor {@link #TikaArchiveStreamFactory(String)}
-     * was used to specify the factory encoding.
-     */
-    @Deprecated
-    public void setEntryEncoding(final String entryEncoding) {
-        // Note: this does not detect new ArchiveStreamFactory(null) but that does not set the encoding anyway
-        if (encoding != null) {
-            throw new IllegalStateException("Cannot overide encoding set by the constructor");
-        }
-        this.entryEncoding = entryEncoding;
-    }
-
-    /**
-     * Creates an archive input stream from an archiver name and an input stream.
-     *
-     * @param archiverName the archive name,
-     * i.e. {@value #AR}, {@value #ARJ}, {@value #ZIP}, {@value #TAR}, {@value #JAR}, {@value #CPIO}, {@value #DUMP} or {@value #SEVEN_Z}
-     * @param in the input stream
-     * @return the archive input stream
-     * @throws ArchiveException if the archiver name is not known
-     * @throws StreamingNotSupportedException if the format cannot be
-     * read from a stream
-     * @throws IllegalArgumentException if the archiver name or stream is null
-     */
-    public ArchiveInputStream createArchiveInputStream(final String archiverName, final InputStream in)
-            throws ArchiveException {
-        return createArchiveInputStream(archiverName, in, entryEncoding);
-    }
-
-    @Override
-    public ArchiveInputStream createArchiveInputStream(final String archiverName, final InputStream in,
-                                                       final String actualEncoding) throws ArchiveException {
-
-        if (archiverName == null) {
-            throw new IllegalArgumentException("Archivername must not be null.");
-        }
-
-        if (in == null) {
-            throw new IllegalArgumentException("InputStream must not be null.");
-        }
-
-        if (AR.equalsIgnoreCase(archiverName)) {
-            return new ArArchiveInputStream(in);
-        }
-        if (ARJ.equalsIgnoreCase(archiverName)) {
-            if (actualEncoding != null) {
-                return new ArjArchiveInputStream(in, actualEncoding);
-            }
-            return new ArjArchiveInputStream(in);
-        }
-        if (ZIP.equalsIgnoreCase(archiverName)) {
-            if (actualEncoding != null) {
-                return new ZipArchiveInputStream(in, actualEncoding);
-            }
-            return new ZipArchiveInputStream(in);
-        }
-        if (TAR.equalsIgnoreCase(archiverName)) {
-            if (actualEncoding != null) {
-                return new TarArchiveInputStream(in, actualEncoding);
-            }
-            return new TarArchiveInputStream(in);
-        }
-        if (JAR.equalsIgnoreCase(archiverName)) {
-            if (actualEncoding != null) {
-                return new JarArchiveInputStream(in, actualEncoding);
-            }
-            return new JarArchiveInputStream(in);
-        }
-        if (CPIO.equalsIgnoreCase(archiverName)) {
-            if (actualEncoding != null) {
-                return new CpioArchiveInputStream(in, actualEncoding);
-            }
-            return new CpioArchiveInputStream(in);
-        }
-        if (DUMP.equalsIgnoreCase(archiverName)) {
-            if (actualEncoding != null) {
-                return new DumpArchiveInputStream(in, actualEncoding);
-            }
-            return new DumpArchiveInputStream(in);
-        }
-        if (SEVEN_Z.equalsIgnoreCase(archiverName)) {
-            throw new StreamingNotSupportedException(SEVEN_Z);
-        }
-
-        final ArchiveStreamProvider archiveStreamProvider = getArchiveInputStreamProviders().get(toKey(archiverName));
-        if (archiveStreamProvider != null) {
-            return archiveStreamProvider.createArchiveInputStream(archiverName, in, actualEncoding);
-        }
-
-        throw new ArchiveException("Archiver: " + archiverName + " not found.");
-    }
-
-    /**
-     * Creates an archive output stream from an archiver name and an output stream.
-     *
-     * @param archiverName the archive name,
-     * i.e. {@value #AR}, {@value #ZIP}, {@value #TAR}, {@value #JAR} or {@value #CPIO}
-     * @param out the output stream
-     * @return the archive output stream
-     * @throws ArchiveException if the archiver name is not known
-     * @throws StreamingNotSupportedException if the format cannot be
-     * written to a stream
-     * @throws IllegalArgumentException if the archiver name or stream is null
-     */
-    public ArchiveOutputStream createArchiveOutputStream(final String archiverName, final OutputStream out)
-            throws ArchiveException {
-        return createArchiveOutputStream(archiverName, out, entryEncoding);
-    }
-
-    @Override
-    public ArchiveOutputStream createArchiveOutputStream(
-            final String archiverName, final OutputStream out, final String actualEncoding)
-            throws ArchiveException {
-        if (archiverName == null) {
-            throw new IllegalArgumentException("Archivername must not be null.");
-        }
-        if (out == null) {
-            throw new IllegalArgumentException("OutputStream must not be null.");
-        }
-
-        if (AR.equalsIgnoreCase(archiverName)) {
-            return new ArArchiveOutputStream(out);
-        }
-        if (ZIP.equalsIgnoreCase(archiverName)) {
-            final ZipArchiveOutputStream zip = new ZipArchiveOutputStream(out);
-            if (actualEncoding != null) {
-                zip.setEncoding(actualEncoding);
-            }
-            return zip;
-        }
-        if (TAR.equalsIgnoreCase(archiverName)) {
-            if (actualEncoding != null) {
-                return new TarArchiveOutputStream(out, actualEncoding);
-            }
-            return new TarArchiveOutputStream(out);
-        }
-        if (JAR.equalsIgnoreCase(archiverName)) {
-            if (actualEncoding != null) {
-                return new JarArchiveOutputStream(out, actualEncoding);
-            }
-            return new JarArchiveOutputStream(out);
-        }
-        if (CPIO.equalsIgnoreCase(archiverName)) {
-            if (actualEncoding != null) {
-                return new CpioArchiveOutputStream(out, actualEncoding);
-            }
-            return new CpioArchiveOutputStream(out);
-        }
-        if (SEVEN_Z.equalsIgnoreCase(archiverName)) {
-            throw new StreamingNotSupportedException(SEVEN_Z);
-        }
-
-        final ArchiveStreamProvider archiveStreamProvider = getArchiveOutputStreamProviders().get(toKey(archiverName));
-        if (archiveStreamProvider != null) {
-            return archiveStreamProvider.createArchiveOutputStream(archiverName, out, actualEncoding);
-        }
-
-        throw new ArchiveException("Archiver: " + archiverName + " not found.");
-    }
-
-    /**
-     * Create an archive input stream from an input stream, autodetecting
-     * the archive type from the first few bytes of the stream. The InputStream
-     * must support marks, like BufferedInputStream.
-     *
-     * @param in the input stream
-     * @return the archive input stream
-     * @throws ArchiveException if the archiver name is not known
-     * @throws StreamingNotSupportedException if the format cannot be
-     * read from a stream
-     * @throws IllegalArgumentException if the stream is null or does not support mark
-     */
-    public ArchiveInputStream createArchiveInputStream(final InputStream in)
-            throws ArchiveException {
-        return createArchiveInputStream(detect(in), in);
-    }
-
-    /**
-     * Try to determine the type of Archiver
-     * @param in input stream
-     * @return type of archiver if found
-     * @throws ArchiveException if an archiver cannot be detected in the stream
-     * @since 1.14
-     */
-    public static String detect(InputStream in) throws ArchiveException {
-        if (in == null) {
-            throw new IllegalArgumentException("Stream must not be null.");
-        }
-
-        if (!in.markSupported()) {
-            throw new IllegalArgumentException("Mark is not supported.");
-        }
-
-        final byte[] signature = new byte[SIGNATURE_SIZE];
-        in.mark(signature.length);
-        int signatureLength = -1;
-        try {
-            signatureLength = IOUtils.readFully(in, signature);
-            in.reset();
-        } catch (IOException e) {
-            throw new ArchiveException("IOException while reading signature.");
-        }
-
-        if (ZipArchiveInputStream.matches(signature, signatureLength)) {
-            return ZIP;
-        } else if (JarArchiveInputStream.matches(signature, signatureLength)) {
-            return JAR;
-        } if (ArArchiveInputStream.matches(signature, signatureLength)) {
-            return AR;
-        } else if (CpioArchiveInputStream.matches(signature, signatureLength)) {
-            return CPIO;
-        } else if (ArjArchiveInputStream.matches(signature, signatureLength)) {
-            return ARJ;
-        } else if (SevenZFile.matches(signature, signatureLength)) {
-            return SEVEN_Z;
-        }
-
-        // Dump needs a bigger buffer to check the signature;
-        final byte[] dumpsig = new byte[DUMP_SIGNATURE_SIZE];
-        in.mark(dumpsig.length);
-        try {
-            signatureLength = IOUtils.readFully(in, dumpsig);
-            in.reset();
-        } catch (IOException e) {
-            throw new ArchiveException("IOException while reading dump signature");
-        }
-        if (DumpArchiveInputStream.matches(dumpsig, signatureLength)) {
-            return DUMP;
-        }
-
-        // Tar needs an even bigger buffer to check the signature; read the first block
-        final byte[] tarHeader = new byte[TAR_HEADER_SIZE];
-        in.mark(tarHeader.length);
-        try {
-            signatureLength = IOUtils.readFully(in, tarHeader);
-            in.reset();
-        } catch (IOException e) {
-            throw new ArchiveException("IOException while reading tar signature");
-        }
-        if (TarArchiveInputStream.matches(tarHeader, signatureLength)) {
-            return TAR;
-        }
-
-        // COMPRESS-117 - improve auto-recognition
-        if (signatureLength >= TAR_HEADER_SIZE) {
-            TarArchiveInputStream tais = null;
-            try {
-                tais = new TarArchiveInputStream(new ByteArrayInputStream(tarHeader));
-                // COMPRESS-191 - verify the header checksum
-                if (tais.getNextTarEntry().isCheckSumOK()) {
-                    return TAR;
-                }
-            } catch (final Exception e) { // NOPMD
-                // can generate IllegalArgumentException as well
-                // as IOException
-                // autodetection, simply not a TAR
-                // ignored
-            } finally {
-                IOUtils.closeQuietly(tais);
-            }
-        }
-        throw new ArchiveException("No Archiver found for the stream signature");
-    }
-
-    public SortedMap<String, ArchiveStreamProvider> getArchiveInputStreamProviders() {
-        if (archiveInputStreamProviders == null) {
-            archiveInputStreamProviders = Collections
-                    .unmodifiableSortedMap(findAvailableArchiveInputStreamProviders());
-        }
-        return archiveInputStreamProviders;
-    }
-
-    public SortedMap<String, ArchiveStreamProvider> getArchiveOutputStreamProviders() {
-        if (archiveOutputStreamProviders == null) {
-            archiveOutputStreamProviders = Collections
-                    .unmodifiableSortedMap(findAvailableArchiveOutputStreamProviders());
-        }
-        return archiveOutputStreamProviders;
-    }
-
-    @Override
-    public Set<String> getInputStreamArchiveNames() {
-        return Sets.newHashSet(AR, ARJ, ZIP, TAR, JAR, CPIO, DUMP, SEVEN_Z);
-    }
-
-    @Override
-    public Set<String> getOutputStreamArchiveNames() {
-        return Sets.newHashSet(AR, ZIP, TAR, JAR, CPIO, SEVEN_Z);
-    }
-
-}
diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/pkg/TikaCompressorStreamFactory.java b/tika-parsers/src/main/java/org/apache/tika/parser/pkg/TikaCompressorStreamFactory.java
deleted file mode 100644
index a1a8405..0000000
--- a/tika-parsers/src/main/java/org/apache/tika/parser/pkg/TikaCompressorStreamFactory.java
+++ /dev/null
@@ -1,551 +0,0 @@
-package org.apache.tika.parser.pkg;
-    /*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-import java.io.IOException;
-import java.io.InputStream;
-import java.io.OutputStream;
-import java.security.AccessController;
-import java.security.PrivilegedAction;
-import java.util.ArrayList;
-import java.util.Collections;
-import java.util.Iterator;
-import java.util.Locale;
-import java.util.Set;
-import java.util.SortedMap;
-import java.util.TreeMap;
-
-import org.apache.commons.compress.compressors.CompressorException;
-import org.apache.commons.compress.compressors.CompressorInputStream;
-import org.apache.commons.compress.compressors.CompressorOutputStream;
-import org.apache.commons.compress.compressors.CompressorStreamProvider;
-import org.apache.commons.compress.compressors.bzip2.BZip2CompressorInputStream;
-import org.apache.commons.compress.compressors.deflate.DeflateCompressorInputStream;
-import org.apache.commons.compress.compressors.gzip.GzipCompressorInputStream;
-import org.apache.commons.compress.compressors.lzma.LZMAUtils;
-import org.apache.commons.compress.compressors.pack200.Pack200CompressorInputStream;
-import org.apache.commons.compress.compressors.snappy.FramedSnappyCompressorInputStream;
-import org.apache.commons.compress.compressors.snappy.SnappyCompressorInputStream;
-import org.apache.commons.compress.compressors.xz.XZCompressorInputStream;
-import org.apache.commons.compress.compressors.xz.XZUtils;
-import org.apache.commons.compress.compressors.z.ZCompressorInputStream;
-import org.apache.commons.compress.utils.IOUtils;
-import org.apache.commons.compress.utils.Lists;
-import org.apache.commons.compress.utils.ServiceLoaderIterator;
-import org.apache.commons.compress.utils.Sets;
-import org.apache.tika.exception.TikaMemoryLimitException;
-import org.tukaani.xz.LZMAInputStream;
-import org.tukaani.xz.MemoryLimitException;
-
-/**
- * This is a temporary copy/paste hack from commons-compress for Tika 1.15
- * that 1) allows detection without initialization of a stream and
- * 2) prevents easily preventable OOM on two file formats.
- *
- * Once commons-compress 1.14 is released, we will delete this class
- * and go back to commons-compress's CompressorStreamFactory.
- */
-@Deprecated
-class TikaCompressorStreamFactory implements CompressorStreamProvider {
-
-
-
-        private static final TikaCompressorStreamFactory SINGLETON = new TikaCompressorStreamFactory(true, -1);
-
-        /**
-         * Constant (value {@value}) used to identify the BZIP2 compression
-         * algorithm.
-         *
-         * @since 1.1
-         */
-        public static final String BZIP2 = "bzip2";
-
-        /**
-         * Constant (value {@value}) used to identify the GZIP compression
-         * algorithm.
-         *
-         * @since 1.1
-         */
-        public static final String GZIP = "gz";
-
-        /**
-         * Constant (value {@value}) used to identify the PACK200 compression
-         * algorithm.
-         *
-         * @since 1.3
-         */
-        public static final String PACK200 = "pack200";
-
-        /**
-         * Constant (value {@value}) used to identify the XZ compression method.
-         *
-         * @since 1.4
-         */
-        public static final String XZ = "xz";
-
-        /**
-         * Constant (value {@value}) used to identify the LZMA compression method.
-         *
-         * @since 1.6
-         */
-        public static final String LZMA = "lzma";
-
-        /**
-         * Constant (value {@value}) used to identify the "framed" Snappy
-         * compression method.
-         *
-         * @since 1.7
-         */
-        public static final String SNAPPY_FRAMED = "snappy-framed";
-
-        /**
-         * Constant (value {@value}) used to identify the "raw" Snappy compression
-         * method. Not supported as an output stream type.
-         *
-         * @since 1.7
-         */
-        public static final String SNAPPY_RAW = "snappy-raw";
-
-        /**
-         * Constant (value {@value}) used to identify the traditional Unix compress
-         * method. Not supported as an output stream type.
-         *
-         * @since 1.7
-         */
-        public static final String Z = "z";
-
-        /**
-         * Constant (value {@value}) used to identify the Deflate compress method.
-         *
-         * @since 1.9
-         */
-        public static final String DEFLATE = "deflate";
-
-
-        private final int memoryLimitInKb;
-
-    private SortedMap<String, CompressorStreamProvider> compressorInputStreamProviders;
-
-
-    public static String getBzip2() {
-            return BZIP2;
-        }
-
-        public static String getDeflate() {
-            return DEFLATE;
-        }
-
-        public static String getGzip() {
-            return GZIP;
-        }
-
-        public static String getLzma() {
-            return LZMA;
-        }
-
-        public static String getPack200() {
-            return PACK200;
-        }
-
-        public static TikaCompressorStreamFactory getSingleton() {
-            return SINGLETON;
-        }
-
-        public static String getSnappyFramed() {
-            return SNAPPY_FRAMED;
-        }
-
-        public static String getSnappyRaw() {
-            return SNAPPY_RAW;
-        }
-
-        public static String getXz() {
-            return XZ;
-        }
-
-        public static String getZ() {
-            return Z;
-        }
-
-        static void putAll(final Set<String> names, final CompressorStreamProvider provider,
-                           final TreeMap<String, CompressorStreamProvider> map) {
-            for (final String name : names) {
-                map.put(toKey(name), provider);
-            }
-        }
-
-        private static String toKey(final String name) {
-            return name.toUpperCase(Locale.ROOT);
-        }
-
-        /**
-         * If true, decompress until the end of the input. If false, stop after the
-         * first stream and leave the input position to point to the next byte after
-         * the stream
-         */
-        private final Boolean decompressUntilEOF;
-
-        /**
-         * If true, decompress until the end of the input. If false, stop after the
-         * first stream and leave the input position to point to the next byte after
-         * the stream
-         */
-        private volatile boolean decompressConcatenated = false;
-
-        /**
-         * Create an instance with the provided decompress Concatenated option.
-         *
-         * @param decompressUntilEOF
-         *            if true, decompress until the end of the input; if false, stop
-         *            after the first stream and leave the input position to point
-         *            to the next byte after the stream. This setting applies to the
-         *            gzip, bzip2 and xz formats only.
-         * @since 1.10
-         */
-        public TikaCompressorStreamFactory(final boolean decompressUntilEOF, final int memoryLimitInKb) {
-            this.decompressUntilEOF = Boolean.valueOf(decompressUntilEOF);
-            // Also copy to existing variable so can continue to use that as the
-            // current value
-            this.decompressConcatenated = decompressUntilEOF;
-            this.memoryLimitInKb = memoryLimitInKb;
-        }
-
-        /**
-         * Try to detect the type of compressor stream.
-         *
-         * @param in input stream
-         * @return type of compressor stream detected
-         * @throws CompressorException if no compressor stream type was detected
-         *                             or if something else went wrong
-         * @throws IllegalArgumentException if stream is null or does not support mark
-         *
-         * @since 1.14
-         */
-        public static String detect(final InputStream in) throws CompressorException {
-            if (in == null) {
-                throw new IllegalArgumentException("Stream must not be null.");
-            }
-
-            if (!in.markSupported()) {
-                throw new IllegalArgumentException("Mark is not supported.");
-            }
-
-            final byte[] signature = new byte[12];
-            in.mark(signature.length);
-            int signatureLength = -1;
-            try {
-                signatureLength = IOUtils.readFully(in, signature);
-                in.reset();
-            } catch (IOException e) {
-                throw new CompressorException("IOException while reading signature.", e);
-            }
-
-            if (BZip2CompressorInputStream.matches(signature, signatureLength)) {
-                return BZIP2;
-            }
-
-            if (GzipCompressorInputStream.matches(signature, signatureLength)) {
-                return GZIP;
-            }
-
-            if (Pack200CompressorInputStream.matches(signature, signatureLength)) {
-                return PACK200;
-            }
-
-            if (FramedSnappyCompressorInputStream.matches(signature, signatureLength)) {
-                return SNAPPY_FRAMED;
-            }
-
-            if (ZCompressorInputStream.matches(signature, signatureLength)) {
-                return Z;
-            }
-
-            if (DeflateCompressorInputStream.matches(signature, signatureLength)) {
-                return DEFLATE;
-            }
-
-            if (XZUtils.matches(signature, signatureLength)) {
-                return XZ;
-            }
-
-            if (LZMAUtils.matches(signature, signatureLength)) {
-                return LZMA;
-            }
-
-/*            if (FramedLZ4CompressorInputStream.matches(signature, signatureLength)) {
-                return LZ4_FRAMED;
-            }*/
-
-            throw new CompressorException("No Compressor found for the stream signature.");
-        }
-
-    public SortedMap<String, CompressorStreamProvider> getCompressorInputStreamProviders() {
-        if (compressorInputStreamProviders == null) {
-            compressorInputStreamProviders = Collections
-                    .unmodifiableSortedMap(findAvailableCompressorInputStreamProviders());
-        }
-        return compressorInputStreamProviders;
-    }
-
-    public static SortedMap<String, CompressorStreamProvider> findAvailableCompressorInputStreamProviders() {
-        return AccessController.doPrivileged(new PrivilegedAction<SortedMap<String, CompressorStreamProvider>>() {
-            @Override
-            public SortedMap<String, CompressorStreamProvider> run() {
-                final TreeMap<String, CompressorStreamProvider> map = new TreeMap<>();
-                putAll(SINGLETON.getInputStreamCompressorNames(), SINGLETON, map);
-                for (final CompressorStreamProvider provider : findCompressorStreamProviders()) {
-                    putAll(provider.getInputStreamCompressorNames(), provider, map);
-                }
-                return map;
-            }
-        });
-    }
-
-    private static ArrayList<CompressorStreamProvider> findCompressorStreamProviders() {
-        return Lists.newArrayList(serviceLoaderIterator());
-    }
-
-    private static Iterator<CompressorStreamProvider> serviceLoaderIterator() {
-        return new ServiceLoaderIterator<>(CompressorStreamProvider.class);
-    }
-
-        /**
-         * Create an compressor input stream from an input stream, autodetecting the
-         * compressor type from the first few bytes of the stream. The InputStream
-         * must support marks, like BufferedInputStream.
-         *
-         * @param in
-         *            the input stream
-         * @return the compressor input stream
-         * @throws CompressorException
-         *             if the compressor name is not known
-         * @throws IllegalArgumentException
-         *             if the stream is null or does not support mark
-         * @since 1.1
-         */
-        public CompressorInputStream createCompressorInputStream(final InputStream in) throws CompressorException,
-                TikaMemoryLimitException {
-            return createCompressorInputStream(detect(in), in);
-        }
-
-        /**
-         * Creates a compressor input stream from a compressor name and an input
-         * stream.
-         *
-         * @param name
-         *            of the compressor, i.e. {@value #GZIP}, {@value #BZIP2},
-         *            {@value #XZ}, {@value #LZMA}, {@value #PACK200},
-         *            {@value #SNAPPY_RAW}, {@value #SNAPPY_FRAMED}, {@value #Z},
-         *            or {@value #DEFLATE}
-         * @param in
-         *            the input stream
-         * @return compressor input stream
-         * @throws CompressorException
-         *             if the compressor name is not known or not available
-         * @throws IllegalArgumentException
-         *             if the name or input stream is null
-         */
-        public CompressorInputStream createCompressorInputStream(final String name, final InputStream in)
-                throws CompressorException, TikaMemoryLimitException {
-            return createCompressorInputStream(name, in, decompressConcatenated);
-        }
-
-        public CompressorInputStream createCompressorInputStream(final String name, final InputStream in,
-                                                                 final boolean actualDecompressConcatenated) throws CompressorException {
-            if (name == null || in == null) {
-                throw new IllegalArgumentException("Compressor name and stream must not be null.");
-            }
-
-            try {
-
-                if (GZIP.equalsIgnoreCase(name)) {
-                    return new GzipCompressorInputStream(in, actualDecompressConcatenated);
-                }
-
-                if (BZIP2.equalsIgnoreCase(name)) {
-                    return new BZip2CompressorInputStream(in, actualDecompressConcatenated);
-                }
-
-                if (XZ.equalsIgnoreCase(name)) {
-                    if (!XZUtils.isXZCompressionAvailable()) {
-                        throw new CompressorException("XZ compression is not available.");
-                    }
-                    return new XZCompressorInputStream(in, actualDecompressConcatenated);
-                }
-
-                if (LZMA.equalsIgnoreCase(name)) {
-                    if (!LZMAUtils.isLZMACompressionAvailable()) {
-                        throw new CompressorException("LZMA compression is not available");
-                    }
-                    try {
-                        return new SaferLZMACompressorInputStream(in);
-                    } catch (MemoryLimitException e) {
-                        throw new CompressorException("MemoryLimitException: " + e.getMessage(), e);
-                    }
-                }
-
-                if (PACK200.equalsIgnoreCase(name)) {
-                    return new Pack200CompressorInputStream(in);
-                }
-
-                if (SNAPPY_RAW.equalsIgnoreCase(name)) {
-                    return new SnappyCompressorInputStream(in);
-                }
-
-                if (SNAPPY_FRAMED.equalsIgnoreCase(name)) {
-                    return new FramedSnappyCompressorInputStream(in);
-                }
-
-                if (Z.equalsIgnoreCase(name)) {
-                    try {
-                        return new SaferZCompressorInputStream(in);
-                    } catch (TikaRuntimeMemoryLimitException e) {
-                        throw new CompressorException("MemoryLimitException: " + e.getMessage(), e);
-                    }
-                }
-
-                if (DEFLATE.equalsIgnoreCase(name)) {
-                    return new DeflateCompressorInputStream(in);
-                }
-/*
-not currently supported
-                if (LZ4_BLOCK.equalsIgnoreCase(name)) {
-                    return new BlockLZ4CompressorInputStream(in);
-                }
-
-                if (LZ4_FRAMED.equalsIgnoreCase(name)) {
-                    return new FramedLZ4CompressorInputStream(in, actualDecompressConcatenated);
-                }
- */
-
-            } catch (final IOException e) {
-                throw new CompressorException("Could not create CompressorInputStream.", e);
-            }
-
-            final CompressorStreamProvider compressorStreamProvider = getCompressorInputStreamProviders().get(toKey(name));
-            if (compressorStreamProvider != null) {
-                return compressorStreamProvider.createCompressorInputStream(name, in, actualDecompressConcatenated);
-            }
-
-            throw new CompressorException("Compressor: " + name + " not found.");
-        }
-
-    @Override
-    public CompressorOutputStream createCompressorOutputStream(String s, OutputStream outputStream) throws CompressorException {
-        throw new UnsupportedOperationException();
-    }
-
-
-    // For Unit tests
-        boolean getDecompressConcatenated() {
-            return decompressConcatenated;
-        }
-
-    public Set<String> getInputStreamCompressorNames() {
-        return Sets.newHashSet(GZIP, BZIP2, XZ, LZMA, PACK200, DEFLATE, SNAPPY_RAW, SNAPPY_FRAMED, Z);
-    }
-
-    @Override
-    public Set<String> getOutputStreamCompressorNames() {
-        throw new UnsupportedOperationException();
-    }
-
-    public Boolean getDecompressUntilEOF() {
-            return decompressUntilEOF;
-        }
-
-    private class SaferZCompressorInputStream extends ZCompressorInputStream {
-
-        public SaferZCompressorInputStream(InputStream inputStream) throws IOException {
-            super(inputStream);
-        }
-
-        @Override
-        protected void initializeTables(int maxCodeSize) {
-            int maxTableSize = 1 << maxCodeSize;
-            if (memoryLimitInKb > -1 && maxTableSize > (memoryLimitInKb*1024)) {
-                throw new TikaRuntimeMemoryLimitException("Calculated maxCodeSize ("+maxCodeSize+" bytes) is greater "+
-                 "than the maximum allowable ("+ (memoryLimitInKb*1024) +" bytes).\n"+
-                        "If the file is not corrupt, consider increasing " +
-                        "the memoryLimitInKb parameter in the CompressorParser");
-            }
-            super.initializeTables(maxCodeSize);
-        }
-    }
-
-    private static class TikaRuntimeMemoryLimitException extends RuntimeException {
-        public TikaRuntimeMemoryLimitException(String msg) {
-            super(msg);
-        }
-    }
-
-    private class SaferLZMACompressorInputStream extends CompressorInputStream {
-        private final InputStream in;
-
-        /**
-         * Creates a new input stream that decompresses LZMA-compressed data
-         * from the specified input stream.
-         *
-         * @param       inputStream where to read the compressed data
-         *
-         * @throws      IOException if the input is not in the .lzma format,
-         *                          the input is corrupt or truncated, the .lzma
-         *                          headers specify sizes that are not supported
-         *                          by this implementation, or the underlying
-         *                          <code>inputStream</code> throws an exception
-         */
-        public SaferLZMACompressorInputStream(final InputStream inputStream) throws IOException {
-            in = new LZMAInputStream(inputStream, memoryLimitInKb);
-        }
-
-        /** {@inheritDoc} */
-        @Override
-        public int read() throws IOException {
-            final int ret = in.read();
-            count(ret == -1 ? 0 : 1);
-            return ret;
-        }
-
-        /** {@inheritDoc} */
-        @Override
-        public int read(final byte[] buf, final int off, final int len) throws IOException {
-            final int ret = in.read(buf, off, len);
-            count(ret);
-            return ret;
-        }
-
-        /** {@inheritDoc} */
-        @Override
-        public long skip(final long n) throws IOException {
-            return in.skip(n);
-        }
-
-        /** {@inheritDoc} */
-        @Override
-        public int available() throws IOException {
-            return in.available();
-        }
-
-        /** {@inheritDoc} */
-        @Override
-        public void close() throws IOException {
-            in.close();
-        }
-    }
-}
diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/pkg/ZipContainerDetector.java b/tika-parsers/src/main/java/org/apache/tika/parser/pkg/ZipContainerDetector.java
index 495fd2d..3f9211b 100644
--- a/tika-parsers/src/main/java/org/apache/tika/parser/pkg/ZipContainerDetector.java
+++ b/tika-parsers/src/main/java/org/apache/tika/parser/pkg/ZipContainerDetector.java
@@ -29,9 +29,11 @@ import java.util.Set;
 import java.util.regex.Pattern;
 
 import org.apache.commons.compress.archivers.ArchiveException;
+import org.apache.commons.compress.archivers.ArchiveStreamFactory;
 import org.apache.commons.compress.archivers.zip.ZipArchiveEntry;
 import org.apache.commons.compress.archivers.zip.ZipFile;
 import org.apache.commons.compress.compressors.CompressorException;
+import org.apache.commons.compress.compressors.CompressorStreamFactory;
 import org.apache.commons.io.IOUtils;
 import org.apache.poi.openxml4j.exceptions.InvalidFormatException;
 import org.apache.poi.openxml4j.opc.OPCPackage;
@@ -100,7 +102,7 @@ public class ZipContainerDetector implements Detector {
 
     private static MediaType detectCompressorFormat(byte[] prefix, int length) {
         try {
-            String type = TikaCompressorStreamFactory.detect(new ByteArrayInputStream(prefix, 0, length));
+            String type = CompressorStreamFactory.detect(new ByteArrayInputStream(prefix, 0, length));
             return CompressorParser.getMediaType(type);
         } catch (CompressorException e) {
             return MediaType.OCTET_STREAM;
@@ -109,7 +111,7 @@ public class ZipContainerDetector implements Detector {
 
     private static MediaType detectArchiveFormat(byte[] prefix, int length) {
         try {
-            String name = TikaArchiveStreamFactory.detect(new ByteArrayInputStream(prefix, 0, length));
+            String name = ArchiveStreamFactory.detect(new ByteArrayInputStream(prefix, 0, length));
             return PackageParser.getMediaType(name);
         } catch (ArchiveException e) {
             return MediaType.OCTET_STREAM;
diff --git a/tika-parsers/src/test/java/org/apache/tika/parser/pkg/CompressorParserTest.java b/tika-parsers/src/test/java/org/apache/tika/parser/pkg/CompressorParserTest.java
index 77531fc..444afc7 100644
--- a/tika-parsers/src/test/java/org/apache/tika/parser/pkg/CompressorParserTest.java
+++ b/tika-parsers/src/test/java/org/apache/tika/parser/pkg/CompressorParserTest.java
@@ -18,22 +18,46 @@
 package org.apache.tika.parser.pkg;
 
 
+import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.fail;
 
 import java.util.HashSet;
 import java.util.Set;
 
+import org.apache.commons.compress.compressors.CompressorStreamFactory;
+import org.apache.tika.TikaTest;
+import org.apache.tika.metadata.Metadata;
 import org.apache.tika.mime.MediaType;
 import org.apache.tika.parser.ParseContext;
 import org.junit.BeforeClass;
 import org.junit.Test;
 
-public class CompressorParserTest {
+public class CompressorParserTest extends TikaTest {
+    //These compressed stream types can't currently
+    //be detected.
     private static Set<MediaType> NOT_COVERED = new HashSet();
 
     @BeforeClass
     public static void setUp() {
-        NOT_COVERED.add(MediaType.application("x-snappy-framed"));
+        NOT_COVERED.add(MediaType.application("x-brotli"));
+        NOT_COVERED.add(MediaType.application("x-lz4-block"));
+        NOT_COVERED.add(MediaType.application("x-snappy-raw"));
+    }
+
+    @Test
+    public void testSnappyFramed() throws Exception {
+        XMLResult r = getXML("testSnappy-framed.sz");
+        assertEquals("application/x-snappy", r.metadata.get(Metadata.CONTENT_TYPE));
+        assertContains("Lorem ipsum dolor sit amet", r.xml);
+    }
+
+    @Test
+    public void testLZ4Framed() throws Exception {
+        XMLResult r = getXML("testLZ4-framed.lz4");
+        assertEquals("application/x-lz4", r.metadata.get(Metadata.CONTENT_TYPE));
+        //xml parser throws an exception for test1.xml
+        //for now, be content that the container file is correctly identified
+        assertContains("test1.xml", r.xml);
     }
 
     @Test
@@ -41,7 +65,7 @@ public class CompressorParserTest {
         //test that the package parser covers all inputstreams handled
         //by CompressorStreamFactory.  When we update commons-compress, and they add
         //a new stream type, we want to make sure that we're handling it.
-        TikaCompressorStreamFactory archiveStreamFactory = new TikaCompressorStreamFactory(true, 1000);
+        CompressorStreamFactory archiveStreamFactory = new CompressorStreamFactory(true, 1000);
         CompressorParser compressorParser = new CompressorParser();
         ParseContext parseContext = new ParseContext();
         for (String name : archiveStreamFactory.getInputStreamCompressorNames()) {
diff --git a/tika-parsers/src/test/java/org/apache/tika/parser/pkg/PackageParserTest.java b/tika-parsers/src/test/java/org/apache/tika/parser/pkg/PackageParserTest.java
index 412228c..12b7bb8 100644
--- a/tika-parsers/src/test/java/org/apache/tika/parser/pkg/PackageParserTest.java
+++ b/tika-parsers/src/test/java/org/apache/tika/parser/pkg/PackageParserTest.java
@@ -22,6 +22,7 @@ import static org.junit.Assert.fail;
 
 import java.nio.charset.StandardCharsets;
 
+import org.apache.commons.compress.archivers.ArchiveStreamFactory;
 import org.apache.tika.mime.MediaType;
 import org.apache.tika.parser.ParseContext;
 import org.junit.Test;
@@ -33,7 +34,7 @@ public class PackageParserTest {
         //test that the package parser covers all inputstreams handled
         //by ArchiveStreamFactory.  When we update commons-compress, and they add
         //a new stream type, we want to make sure that we're handling it.
-        TikaArchiveStreamFactory archiveStreamFactory = new TikaArchiveStreamFactory(StandardCharsets.UTF_8.name());
+        ArchiveStreamFactory archiveStreamFactory = new ArchiveStreamFactory(StandardCharsets.UTF_8.name());
         PackageParser packageParser = new PackageParser();
         ParseContext parseContext = new ParseContext();
         for (String name : archiveStreamFactory.getInputStreamArchiveNames()) {
diff --git a/tika-parsers/src/test/resources/test-documents/testLZ4-framed.lz4 b/tika-parsers/src/test/resources/test-documents/testLZ4-framed.lz4
new file mode 100644
index 0000000..d2a813f
Binary files /dev/null and b/tika-parsers/src/test/resources/test-documents/testLZ4-framed.lz4 differ
diff --git a/tika-parsers/src/test/resources/test-documents/testSnappy-framed.sz b/tika-parsers/src/test/resources/test-documents/testSnappy-framed.sz
new file mode 100644
index 0000000..9a6b1fb
Binary files /dev/null and b/tika-parsers/src/test/resources/test-documents/testSnappy-framed.sz differ

-- 
To stop receiving notification emails like this one, please contact
"commits@tika.apache.org" <co...@tika.apache.org>.