You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ta...@apache.org on 2017/04/20 01:20:14 UTC

[tika] 01/03: TIKA-2330 -- prevent preventable ooms in both detecting and parsing corrupt files or files that are misidentified as compressed streams.

This is an automated email from the ASF dual-hosted git repository.

tallison pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/tika.git

commit 75eea6e5502f4f5a2edf5ab459b4c369d33f66e5
Author: tballison <ta...@mitre.org>
AuthorDate: Wed Apr 19 21:18:56 2017 -0400

    TIKA-2330 -- prevent preventable ooms in both detecting and parsing
    corrupt files or files that are misidentified as compressed streams.
---
 .../tika/exception/TikaMemoryLimitException.java   |  30 ++
 tika-parent/pom.xml                                |   2 +-
 tika-parsers/pom.xml                               |   2 +-
 .../apache/tika/parser/pkg/CompressorParser.java   |  48 +-
 .../parser/pkg/TikaCompressorStreamFactory.java    | 551 +++++++++++++++++++++
 .../tika/parser/pkg/ZipContainerDetector.java      |  16 +-
 .../tika/detect/TestContainerAwareDetector.java    |  10 +
 .../apache/tika/parser/pkg/CompressParserTest.java |  22 +
 .../src/test/resources/test-documents/testLZMA_oom | Bin 0 -> 19 bytes
 .../src/test/resources/test-documents/testZ_oom.Z  |   1 +
 10 files changed, 663 insertions(+), 19 deletions(-)

diff --git a/tika-core/src/main/java/org/apache/tika/exception/TikaMemoryLimitException.java b/tika-core/src/main/java/org/apache/tika/exception/TikaMemoryLimitException.java
new file mode 100644
index 0000000..baf5818
--- /dev/null
+++ b/tika-core/src/main/java/org/apache/tika/exception/TikaMemoryLimitException.java
@@ -0,0 +1,30 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.tika.exception;
+
+/*
+ * Thrown when a parser is asked to allocate more memory than is allowable
+ * for a given threshold.  For example, the ZCompressorInputStream might
+ * be asked to create an array many gigabytes of length by a corrupt file.
+ */
+public class TikaMemoryLimitException extends TikaException {
+
+    public TikaMemoryLimitException(String msg) {
+        super(msg);
+    }
+}
diff --git a/tika-parent/pom.xml b/tika-parent/pom.xml
index 9220c3d..5e4b0dc 100644
--- a/tika-parent/pom.xml
+++ b/tika-parent/pom.xml
@@ -306,7 +306,7 @@
     <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
     <project.reporting.outputEncoding>${project.build.sourceEncoding}</project.reporting.outputEncoding>
     <!-- NOTE: sync tukaani version with commons-compress in tika-parsers -->
-    <commons.compress.version>1.12</commons.compress.version>
+    <commons.compress.version>1.13</commons.compress.version>
     <commons.io.version>2.5</commons.io.version>
     <slf4j.version>1.7.24</slf4j.version>
   </properties>
diff --git a/tika-parsers/pom.xml b/tika-parsers/pom.xml
index 58ac745..7ff88c2 100644
--- a/tika-parsers/pom.xml
+++ b/tika-parsers/pom.xml
@@ -39,7 +39,7 @@
     <!-- NOTE: sync codec version with POI -->
     <codec.version>1.10</codec.version>
     <!-- NOTE: sync tukaani version with commons-compress in tika-parent-->
-    <tukaani.version>1.5</tukaani.version>
+    <tukaani.version>1.6</tukaani.version>
     <mime4j.version>0.7.2</mime4j.version>
     <vorbis.version>0.8</vorbis.version>
     <pdfbox.version>2.0.5</pdfbox.version>
diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/pkg/CompressorParser.java b/tika-parsers/src/main/java/org/apache/tika/parser/pkg/CompressorParser.java
index f82db54..ff589e0 100644
--- a/tika-parsers/src/main/java/org/apache/tika/parser/pkg/CompressorParser.java
+++ b/tika-parsers/src/main/java/org/apache/tika/parser/pkg/CompressorParser.java
@@ -25,18 +25,20 @@ import java.util.Set;
 
 import org.apache.commons.compress.compressors.CompressorException;
 import org.apache.commons.compress.compressors.CompressorInputStream;
-import org.apache.commons.compress.compressors.CompressorStreamFactory;
 import org.apache.commons.compress.compressors.bzip2.BZip2CompressorInputStream;
 import org.apache.commons.compress.compressors.deflate.DeflateCompressorInputStream;
 import org.apache.commons.compress.compressors.gzip.GzipCompressorInputStream;
 import org.apache.commons.compress.compressors.gzip.GzipUtils;
+import org.apache.commons.compress.compressors.lzma.LZMACompressorInputStream;
 import org.apache.commons.compress.compressors.pack200.Pack200CompressorInputStream;
 import org.apache.commons.compress.compressors.snappy.FramedSnappyCompressorInputStream;
 import org.apache.commons.compress.compressors.snappy.SnappyCompressorInputStream;
 import org.apache.commons.compress.compressors.xz.XZCompressorInputStream;
 import org.apache.commons.compress.compressors.z.ZCompressorInputStream;
 import org.apache.commons.io.input.CloseShieldInputStream;
+import org.apache.tika.config.Field;
 import org.apache.tika.exception.TikaException;
+import org.apache.tika.exception.TikaMemoryLimitException;
 import org.apache.tika.extractor.EmbeddedDocumentExtractor;
 import org.apache.tika.extractor.EmbeddedDocumentUtil;
 import org.apache.tika.metadata.Metadata;
@@ -64,13 +66,16 @@ public class CompressorParser extends AbstractParser {
     private static final MediaType PACK = MediaType.application("x-java-pack200");
     private static final MediaType SNAPPY = MediaType.application("x-snappy-framed");
     private static final MediaType ZLIB = MediaType.application("zlib");
+    private static final MediaType LZMA = MediaType.application("x-lzma");
 
     private static final Set<MediaType> SUPPORTED_TYPES =
-            MediaType.set(BZIP, BZIP2, GZIP, GZIP_ALT, COMPRESS, XZ, PACK, ZLIB);
+            MediaType.set(BZIP, BZIP2, GZIP, GZIP_ALT, COMPRESS, XZ, PACK, ZLIB, LZMA);
+
+    private int memoryLimitInKb = 100000;//100MB
 
     static MediaType getMediaType(CompressorInputStream stream) {
         // TODO Add support for the remaining CompressorInputStream formats:
-        //   LZMACompressorInputStream
+        //   LZ4
         //   LZWInputStream -> UnshrinkingInputStream
         if (stream instanceof BZip2CompressorInputStream) {
             return BZIP2;
@@ -88,6 +93,31 @@ public class CompressorParser extends AbstractParser {
                    stream instanceof SnappyCompressorInputStream) {
             // TODO Add unit tests for this format
             return SNAPPY;
+        } else if (stream instanceof LZMACompressorInputStream) {
+            return LZMA;
+        } else {
+            return MediaType.OCTET_STREAM;
+        }
+    }
+
+    static MediaType getMediaType(String name) {
+        if (TikaCompressorStreamFactory.BZIP2.equals(name)) {
+            return BZIP2;
+        } else if (TikaCompressorStreamFactory.GZIP.equals(name)) {
+            return GZIP;
+        } else if (TikaCompressorStreamFactory.XZ.equals(name)) {
+            return XZ;
+        } else if (TikaCompressorStreamFactory.DEFLATE.equals(name)) {
+            return ZLIB;
+        } else if (TikaCompressorStreamFactory.Z.equals(name)) {
+            return COMPRESS;
+        } else if (TikaCompressorStreamFactory.PACK200.equals(name)) {
+            return PACK;
+        } else if (TikaCompressorStreamFactory.SNAPPY_FRAMED.equals(name) ||
+                TikaCompressorStreamFactory.SNAPPY_RAW.equals(name)) {
+            return SNAPPY;
+        } else if (TikaCompressorStreamFactory.LZMA.equals(name)) {
+            return LZMA;
         } else {
             return MediaType.OCTET_STREAM;
         }
@@ -119,10 +149,13 @@ public class CompressorParser extends AbstractParser {
                          return false;
                      }
                  });
-            CompressorStreamFactory factory = 
-                    new CompressorStreamFactory(options.decompressConcatenated(metadata));
+            TikaCompressorStreamFactory factory =
+                    new TikaCompressorStreamFactory(options.decompressConcatenated(metadata), memoryLimitInKb);
             cis = factory.createCompressorInputStream(stream);
         } catch (CompressorException e) {
+            if (e.getMessage() != null && e.getMessage().startsWith("MemoryLimitException:")) {
+                throw new TikaMemoryLimitException(e.getMessage());
+            }
             throw new TikaException("Unable to uncompress document stream", e);
         }
 
@@ -171,4 +204,9 @@ public class CompressorParser extends AbstractParser {
         xhtml.endDocument();
     }
 
+    @Field
+    public void setMemoryLimitInKb(int memoryLimitInKb) {
+        this.memoryLimitInKb = memoryLimitInKb;
+    }
+
 }
diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/pkg/TikaCompressorStreamFactory.java b/tika-parsers/src/main/java/org/apache/tika/parser/pkg/TikaCompressorStreamFactory.java
new file mode 100644
index 0000000..a1a8405
--- /dev/null
+++ b/tika-parsers/src/main/java/org/apache/tika/parser/pkg/TikaCompressorStreamFactory.java
@@ -0,0 +1,551 @@
+package org.apache.tika.parser.pkg;
+    /*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.OutputStream;
+import java.security.AccessController;
+import java.security.PrivilegedAction;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.Iterator;
+import java.util.Locale;
+import java.util.Set;
+import java.util.SortedMap;
+import java.util.TreeMap;
+
+import org.apache.commons.compress.compressors.CompressorException;
+import org.apache.commons.compress.compressors.CompressorInputStream;
+import org.apache.commons.compress.compressors.CompressorOutputStream;
+import org.apache.commons.compress.compressors.CompressorStreamProvider;
+import org.apache.commons.compress.compressors.bzip2.BZip2CompressorInputStream;
+import org.apache.commons.compress.compressors.deflate.DeflateCompressorInputStream;
+import org.apache.commons.compress.compressors.gzip.GzipCompressorInputStream;
+import org.apache.commons.compress.compressors.lzma.LZMAUtils;
+import org.apache.commons.compress.compressors.pack200.Pack200CompressorInputStream;
+import org.apache.commons.compress.compressors.snappy.FramedSnappyCompressorInputStream;
+import org.apache.commons.compress.compressors.snappy.SnappyCompressorInputStream;
+import org.apache.commons.compress.compressors.xz.XZCompressorInputStream;
+import org.apache.commons.compress.compressors.xz.XZUtils;
+import org.apache.commons.compress.compressors.z.ZCompressorInputStream;
+import org.apache.commons.compress.utils.IOUtils;
+import org.apache.commons.compress.utils.Lists;
+import org.apache.commons.compress.utils.ServiceLoaderIterator;
+import org.apache.commons.compress.utils.Sets;
+import org.apache.tika.exception.TikaMemoryLimitException;
+import org.tukaani.xz.LZMAInputStream;
+import org.tukaani.xz.MemoryLimitException;
+
+/**
+ * This is a temporary copy/paste hack from commons-compress for Tika 1.15
+ * that 1) allows detection without initialization of a stream and
+ * 2) prevents easily preventable OOM on two file formats.
+ *
+ * Once commons-compress 1.14 is released, we will delete this class
+ * and go back to commons-compress's CompressorStreamFactory.
+ */
+@Deprecated
+class TikaCompressorStreamFactory implements CompressorStreamProvider {
+
+
+
+        private static final TikaCompressorStreamFactory SINGLETON = new TikaCompressorStreamFactory(true, -1);
+
+        /**
+         * Constant (value {@value}) used to identify the BZIP2 compression
+         * algorithm.
+         *
+         * @since 1.1
+         */
+        public static final String BZIP2 = "bzip2";
+
+        /**
+         * Constant (value {@value}) used to identify the GZIP compression
+         * algorithm.
+         *
+         * @since 1.1
+         */
+        public static final String GZIP = "gz";
+
+        /**
+         * Constant (value {@value}) used to identify the PACK200 compression
+         * algorithm.
+         *
+         * @since 1.3
+         */
+        public static final String PACK200 = "pack200";
+
+        /**
+         * Constant (value {@value}) used to identify the XZ compression method.
+         *
+         * @since 1.4
+         */
+        public static final String XZ = "xz";
+
+        /**
+         * Constant (value {@value}) used to identify the LZMA compression method.
+         *
+         * @since 1.6
+         */
+        public static final String LZMA = "lzma";
+
+        /**
+         * Constant (value {@value}) used to identify the "framed" Snappy
+         * compression method.
+         *
+         * @since 1.7
+         */
+        public static final String SNAPPY_FRAMED = "snappy-framed";
+
+        /**
+         * Constant (value {@value}) used to identify the "raw" Snappy compression
+         * method. Not supported as an output stream type.
+         *
+         * @since 1.7
+         */
+        public static final String SNAPPY_RAW = "snappy-raw";
+
+        /**
+         * Constant (value {@value}) used to identify the traditional Unix compress
+         * method. Not supported as an output stream type.
+         *
+         * @since 1.7
+         */
+        public static final String Z = "z";
+
+        /**
+         * Constant (value {@value}) used to identify the Deflate compress method.
+         *
+         * @since 1.9
+         */
+        public static final String DEFLATE = "deflate";
+
+
+        private final int memoryLimitInKb;
+
+    private SortedMap<String, CompressorStreamProvider> compressorInputStreamProviders;
+
+
+    public static String getBzip2() {
+            return BZIP2;
+        }
+
+        public static String getDeflate() {
+            return DEFLATE;
+        }
+
+        public static String getGzip() {
+            return GZIP;
+        }
+
+        public static String getLzma() {
+            return LZMA;
+        }
+
+        public static String getPack200() {
+            return PACK200;
+        }
+
+        public static TikaCompressorStreamFactory getSingleton() {
+            return SINGLETON;
+        }
+
+        public static String getSnappyFramed() {
+            return SNAPPY_FRAMED;
+        }
+
+        public static String getSnappyRaw() {
+            return SNAPPY_RAW;
+        }
+
+        public static String getXz() {
+            return XZ;
+        }
+
+        public static String getZ() {
+            return Z;
+        }
+
+        static void putAll(final Set<String> names, final CompressorStreamProvider provider,
+                           final TreeMap<String, CompressorStreamProvider> map) {
+            for (final String name : names) {
+                map.put(toKey(name), provider);
+            }
+        }
+
+        private static String toKey(final String name) {
+            return name.toUpperCase(Locale.ROOT);
+        }
+
+        /**
+         * If true, decompress until the end of the input. If false, stop after the
+         * first stream and leave the input position to point to the next byte after
+         * the stream
+         */
+        private final Boolean decompressUntilEOF;
+
+        /**
+         * If true, decompress until the end of the input. If false, stop after the
+         * first stream and leave the input position to point to the next byte after
+         * the stream
+         */
+        private volatile boolean decompressConcatenated = false;
+
+        /**
+         * Create an instance with the provided decompress Concatenated option.
+         *
+         * @param decompressUntilEOF
+         *            if true, decompress until the end of the input; if false, stop
+         *            after the first stream and leave the input position to point
+         *            to the next byte after the stream. This setting applies to the
+         *            gzip, bzip2 and xz formats only.
+         * @since 1.10
+         */
+        public TikaCompressorStreamFactory(final boolean decompressUntilEOF, final int memoryLimitInKb) {
+            this.decompressUntilEOF = Boolean.valueOf(decompressUntilEOF);
+            // Also copy to existing variable so can continue to use that as the
+            // current value
+            this.decompressConcatenated = decompressUntilEOF;
+            this.memoryLimitInKb = memoryLimitInKb;
+        }
+
+        /**
+         * Try to detect the type of compressor stream.
+         *
+         * @param in input stream
+         * @return type of compressor stream detected
+         * @throws CompressorException if no compressor stream type was detected
+         *                             or if something else went wrong
+         * @throws IllegalArgumentException if stream is null or does not support mark
+         *
+         * @since 1.14
+         */
+        public static String detect(final InputStream in) throws CompressorException {
+            if (in == null) {
+                throw new IllegalArgumentException("Stream must not be null.");
+            }
+
+            if (!in.markSupported()) {
+                throw new IllegalArgumentException("Mark is not supported.");
+            }
+
+            final byte[] signature = new byte[12];
+            in.mark(signature.length);
+            int signatureLength = -1;
+            try {
+                signatureLength = IOUtils.readFully(in, signature);
+                in.reset();
+            } catch (IOException e) {
+                throw new CompressorException("IOException while reading signature.", e);
+            }
+
+            if (BZip2CompressorInputStream.matches(signature, signatureLength)) {
+                return BZIP2;
+            }
+
+            if (GzipCompressorInputStream.matches(signature, signatureLength)) {
+                return GZIP;
+            }
+
+            if (Pack200CompressorInputStream.matches(signature, signatureLength)) {
+                return PACK200;
+            }
+
+            if (FramedSnappyCompressorInputStream.matches(signature, signatureLength)) {
+                return SNAPPY_FRAMED;
+            }
+
+            if (ZCompressorInputStream.matches(signature, signatureLength)) {
+                return Z;
+            }
+
+            if (DeflateCompressorInputStream.matches(signature, signatureLength)) {
+                return DEFLATE;
+            }
+
+            if (XZUtils.matches(signature, signatureLength)) {
+                return XZ;
+            }
+
+            if (LZMAUtils.matches(signature, signatureLength)) {
+                return LZMA;
+            }
+
+/*            if (FramedLZ4CompressorInputStream.matches(signature, signatureLength)) {
+                return LZ4_FRAMED;
+            }*/
+
+            throw new CompressorException("No Compressor found for the stream signature.");
+        }
+
+    public SortedMap<String, CompressorStreamProvider> getCompressorInputStreamProviders() {
+        if (compressorInputStreamProviders == null) {
+            compressorInputStreamProviders = Collections
+                    .unmodifiableSortedMap(findAvailableCompressorInputStreamProviders());
+        }
+        return compressorInputStreamProviders;
+    }
+
+    public static SortedMap<String, CompressorStreamProvider> findAvailableCompressorInputStreamProviders() {
+        return AccessController.doPrivileged(new PrivilegedAction<SortedMap<String, CompressorStreamProvider>>() {
+            @Override
+            public SortedMap<String, CompressorStreamProvider> run() {
+                final TreeMap<String, CompressorStreamProvider> map = new TreeMap<>();
+                putAll(SINGLETON.getInputStreamCompressorNames(), SINGLETON, map);
+                for (final CompressorStreamProvider provider : findCompressorStreamProviders()) {
+                    putAll(provider.getInputStreamCompressorNames(), provider, map);
+                }
+                return map;
+            }
+        });
+    }
+
+    private static ArrayList<CompressorStreamProvider> findCompressorStreamProviders() {
+        return Lists.newArrayList(serviceLoaderIterator());
+    }
+
+    private static Iterator<CompressorStreamProvider> serviceLoaderIterator() {
+        return new ServiceLoaderIterator<>(CompressorStreamProvider.class);
+    }
+
+        /**
+         * Create an compressor input stream from an input stream, autodetecting the
+         * compressor type from the first few bytes of the stream. The InputStream
+         * must support marks, like BufferedInputStream.
+         *
+         * @param in
+         *            the input stream
+         * @return the compressor input stream
+         * @throws CompressorException
+         *             if the compressor name is not known
+         * @throws IllegalArgumentException
+         *             if the stream is null or does not support mark
+         * @since 1.1
+         */
+        public CompressorInputStream createCompressorInputStream(final InputStream in) throws CompressorException,
+                TikaMemoryLimitException {
+            return createCompressorInputStream(detect(in), in);
+        }
+
+        /**
+         * Creates a compressor input stream from a compressor name and an input
+         * stream.
+         *
+         * @param name
+         *            of the compressor, i.e. {@value #GZIP}, {@value #BZIP2},
+         *            {@value #XZ}, {@value #LZMA}, {@value #PACK200},
+         *            {@value #SNAPPY_RAW}, {@value #SNAPPY_FRAMED}, {@value #Z},
+         *            or {@value #DEFLATE}
+         * @param in
+         *            the input stream
+         * @return compressor input stream
+         * @throws CompressorException
+         *             if the compressor name is not known or not available
+         * @throws IllegalArgumentException
+         *             if the name or input stream is null
+         */
+        public CompressorInputStream createCompressorInputStream(final String name, final InputStream in)
+                throws CompressorException, TikaMemoryLimitException {
+            return createCompressorInputStream(name, in, decompressConcatenated);
+        }
+
+        public CompressorInputStream createCompressorInputStream(final String name, final InputStream in,
+                                                                 final boolean actualDecompressConcatenated) throws CompressorException {
+            if (name == null || in == null) {
+                throw new IllegalArgumentException("Compressor name and stream must not be null.");
+            }
+
+            try {
+
+                if (GZIP.equalsIgnoreCase(name)) {
+                    return new GzipCompressorInputStream(in, actualDecompressConcatenated);
+                }
+
+                if (BZIP2.equalsIgnoreCase(name)) {
+                    return new BZip2CompressorInputStream(in, actualDecompressConcatenated);
+                }
+
+                if (XZ.equalsIgnoreCase(name)) {
+                    if (!XZUtils.isXZCompressionAvailable()) {
+                        throw new CompressorException("XZ compression is not available.");
+                    }
+                    return new XZCompressorInputStream(in, actualDecompressConcatenated);
+                }
+
+                if (LZMA.equalsIgnoreCase(name)) {
+                    if (!LZMAUtils.isLZMACompressionAvailable()) {
+                        throw new CompressorException("LZMA compression is not available");
+                    }
+                    try {
+                        return new SaferLZMACompressorInputStream(in);
+                    } catch (MemoryLimitException e) {
+                        throw new CompressorException("MemoryLimitException: " + e.getMessage(), e);
+                    }
+                }
+
+                if (PACK200.equalsIgnoreCase(name)) {
+                    return new Pack200CompressorInputStream(in);
+                }
+
+                if (SNAPPY_RAW.equalsIgnoreCase(name)) {
+                    return new SnappyCompressorInputStream(in);
+                }
+
+                if (SNAPPY_FRAMED.equalsIgnoreCase(name)) {
+                    return new FramedSnappyCompressorInputStream(in);
+                }
+
+                if (Z.equalsIgnoreCase(name)) {
+                    try {
+                        return new SaferZCompressorInputStream(in);
+                    } catch (TikaRuntimeMemoryLimitException e) {
+                        throw new CompressorException("MemoryLimitException: " + e.getMessage(), e);
+                    }
+                }
+
+                if (DEFLATE.equalsIgnoreCase(name)) {
+                    return new DeflateCompressorInputStream(in);
+                }
+/*
+not currently supported
+                if (LZ4_BLOCK.equalsIgnoreCase(name)) {
+                    return new BlockLZ4CompressorInputStream(in);
+                }
+
+                if (LZ4_FRAMED.equalsIgnoreCase(name)) {
+                    return new FramedLZ4CompressorInputStream(in, actualDecompressConcatenated);
+                }
+ */
+
+            } catch (final IOException e) {
+                throw new CompressorException("Could not create CompressorInputStream.", e);
+            }
+
+            final CompressorStreamProvider compressorStreamProvider = getCompressorInputStreamProviders().get(toKey(name));
+            if (compressorStreamProvider != null) {
+                return compressorStreamProvider.createCompressorInputStream(name, in, actualDecompressConcatenated);
+            }
+
+            throw new CompressorException("Compressor: " + name + " not found.");
+        }
+
+    @Override
+    public CompressorOutputStream createCompressorOutputStream(String s, OutputStream outputStream) throws CompressorException {
+        throw new UnsupportedOperationException();
+    }
+
+
+    // For Unit tests
+        boolean getDecompressConcatenated() {
+            return decompressConcatenated;
+        }
+
+    public Set<String> getInputStreamCompressorNames() {
+        return Sets.newHashSet(GZIP, BZIP2, XZ, LZMA, PACK200, DEFLATE, SNAPPY_RAW, SNAPPY_FRAMED, Z);
+    }
+
+    @Override
+    public Set<String> getOutputStreamCompressorNames() {
+        throw new UnsupportedOperationException();
+    }
+
+    public Boolean getDecompressUntilEOF() {
+            return decompressUntilEOF;
+        }
+
+    private class SaferZCompressorInputStream extends ZCompressorInputStream {
+
+        public SaferZCompressorInputStream(InputStream inputStream) throws IOException {
+            super(inputStream);
+        }
+
+        @Override
+        protected void initializeTables(int maxCodeSize) {
+            int maxTableSize = 1 << maxCodeSize;
+            if (memoryLimitInKb > -1 && maxTableSize > (memoryLimitInKb*1024)) {
+                throw new TikaRuntimeMemoryLimitException("Calculated maxCodeSize ("+maxCodeSize+" bytes) is greater "+
+                 "than the maximum allowable ("+ (memoryLimitInKb*1024) +" bytes).\n"+
+                        "If the file is not corrupt, consider increasing " +
+                        "the memoryLimitInKb parameter in the CompressorParser");
+            }
+            super.initializeTables(maxCodeSize);
+        }
+    }
+
+    private static class TikaRuntimeMemoryLimitException extends RuntimeException {
+        public TikaRuntimeMemoryLimitException(String msg) {
+            super(msg);
+        }
+    }
+
+    private class SaferLZMACompressorInputStream extends CompressorInputStream {
+        private final InputStream in;
+
+        /**
+         * Creates a new input stream that decompresses LZMA-compressed data
+         * from the specified input stream.
+         *
+         * @param       inputStream where to read the compressed data
+         *
+         * @throws      IOException if the input is not in the .lzma format,
+         *                          the input is corrupt or truncated, the .lzma
+         *                          headers specify sizes that are not supported
+         *                          by this implementation, or the underlying
+         *                          <code>inputStream</code> throws an exception
+         */
+        public SaferLZMACompressorInputStream(final InputStream inputStream) throws IOException {
+            in = new LZMAInputStream(inputStream, memoryLimitInKb);
+        }
+
+        /** {@inheritDoc} */
+        @Override
+        public int read() throws IOException {
+            final int ret = in.read();
+            count(ret == -1 ? 0 : 1);
+            return ret;
+        }
+
+        /** {@inheritDoc} */
+        @Override
+        public int read(final byte[] buf, final int off, final int len) throws IOException {
+            final int ret = in.read(buf, off, len);
+            count(ret);
+            return ret;
+        }
+
+        /** {@inheritDoc} */
+        @Override
+        public long skip(final long n) throws IOException {
+            return in.skip(n);
+        }
+
+        /** {@inheritDoc} */
+        @Override
+        public int available() throws IOException {
+            return in.available();
+        }
+
+        /** {@inheritDoc} */
+        @Override
+        public void close() throws IOException {
+            in.close();
+        }
+    }
+}
diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/pkg/ZipContainerDetector.java b/tika-parsers/src/main/java/org/apache/tika/parser/pkg/ZipContainerDetector.java
index d43a17c..2434d1a 100644
--- a/tika-parsers/src/main/java/org/apache/tika/parser/pkg/ZipContainerDetector.java
+++ b/tika-parsers/src/main/java/org/apache/tika/parser/pkg/ZipContainerDetector.java
@@ -16,6 +16,8 @@
  */
 package org.apache.tika.parser.pkg;
 
+import static java.nio.charset.StandardCharsets.UTF_8;
+
 import java.io.ByteArrayInputStream;
 import java.io.IOException;
 import java.io.InputStream;
@@ -33,8 +35,6 @@ import org.apache.commons.compress.archivers.tar.TarArchiveInputStream;
 import org.apache.commons.compress.archivers.zip.ZipArchiveEntry;
 import org.apache.commons.compress.archivers.zip.ZipFile;
 import org.apache.commons.compress.compressors.CompressorException;
-import org.apache.commons.compress.compressors.CompressorInputStream;
-import org.apache.commons.compress.compressors.CompressorStreamFactory;
 import org.apache.commons.io.IOUtils;
 import org.apache.poi.openxml4j.exceptions.InvalidFormatException;
 import org.apache.poi.openxml4j.opc.OPCPackage;
@@ -52,8 +52,6 @@ import org.apache.tika.parser.iwork.IWorkPackageParser;
 import org.apache.tika.parser.iwork.IWorkPackageParser.IWORKDocumentType;
 import org.apache.tika.parser.iwork.iwana.IWork13PackageParser;
 
-import static java.nio.charset.StandardCharsets.UTF_8;
-
 /**
  * A detector that works on Zip documents and other archive and compression
  * formats to figure out exactly what the file is.
@@ -105,14 +103,8 @@ public class ZipContainerDetector implements Detector {
 
     private static MediaType detectCompressorFormat(byte[] prefix, int length) {
         try {
-            CompressorStreamFactory factory = new CompressorStreamFactory();
-            CompressorInputStream cis = factory.createCompressorInputStream(
-                    new ByteArrayInputStream(prefix, 0, length));
-            try {
-                return CompressorParser.getMediaType(cis);
-            } finally {
-                IOUtils.closeQuietly(cis);
-            }
+            String type = TikaCompressorStreamFactory.detect(new ByteArrayInputStream(prefix, 0, length));
+            return CompressorParser.getMediaType(type);
         } catch (CompressorException e) {
             return MediaType.OCTET_STREAM;
         }
diff --git a/tika-parsers/src/test/java/org/apache/tika/detect/TestContainerAwareDetector.java b/tika-parsers/src/test/java/org/apache/tika/detect/TestContainerAwareDetector.java
index 89ff371..9cff7c4 100644
--- a/tika-parsers/src/test/java/org/apache/tika/detect/TestContainerAwareDetector.java
+++ b/tika-parsers/src/test/java/org/apache/tika/detect/TestContainerAwareDetector.java
@@ -361,6 +361,16 @@ public class TestContainerAwareDetector {
                                 "application/java-archive", "application/java-archive");
     }
 
+    @Test
+    public void testLZMAOOM() throws Exception {
+        assertTypeByData("testLZMA_oom", "application/x-lzma");
+    }
+
+    @Test
+    public void testCompressOOM() throws Exception {
+        assertTypeByData("testZ_oom.Z", "application/x-compress");
+    }
+
     private TikaInputStream getTruncatedFile(String name, int n)
             throws IOException {
         try (InputStream input = TestContainerAwareDetector.class.getResourceAsStream(
diff --git a/tika-parsers/src/test/java/org/apache/tika/parser/pkg/CompressParserTest.java b/tika-parsers/src/test/java/org/apache/tika/parser/pkg/CompressParserTest.java
index b80de07..7a006a9 100644
--- a/tika-parsers/src/test/java/org/apache/tika/parser/pkg/CompressParserTest.java
+++ b/tika-parsers/src/test/java/org/apache/tika/parser/pkg/CompressParserTest.java
@@ -18,9 +18,11 @@ package org.apache.tika.parser.pkg;
 
 import static java.nio.charset.StandardCharsets.US_ASCII;
 import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.fail;
 
 import java.io.InputStream;
 
+import org.apache.tika.exception.TikaMemoryLimitException;
 import org.apache.tika.metadata.Metadata;
 import org.apache.tika.parser.AutoDetectParser;
 import org.apache.tika.parser.Parser;
@@ -98,4 +100,24 @@ public class CompressParserTest extends AbstractPkgTest {
        // Tar file starts with the directory name
        assertEquals("test-documents/", new String(tracker.lastSeenStart, 0, 15, US_ASCII));
     }
+
+    @Test
+    public void testLZMAOOM() throws Exception {
+        try {
+            XMLResult r = getXML("testLZMA_oom");
+            fail("should have thrown TikaMemoryLimitException");
+        } catch (TikaMemoryLimitException e) {
+        }
+    }
+
+    @Test
+    public void testCompressOOM() throws Exception {
+        try {
+            XMLResult r = getXML("testZ_oom.Z");
+            fail("should have thrown TikaMemoryLimitException");
+        } catch (TikaMemoryLimitException e) {
+        }
+    }
+
+
 }
\ No newline at end of file
diff --git a/tika-parsers/src/test/resources/test-documents/testLZMA_oom b/tika-parsers/src/test/resources/test-documents/testLZMA_oom
new file mode 100644
index 0000000..be257f2
Binary files /dev/null and b/tika-parsers/src/test/resources/test-documents/testLZMA_oom differ
diff --git a/tika-parsers/src/test/resources/test-documents/testZ_oom.Z b/tika-parsers/src/test/resources/test-documents/testZ_oom.Z
new file mode 100644
index 0000000..36d7f52
--- /dev/null
+++ b/tika-parsers/src/test/resources/test-documents/testZ_oom.Z
@@ -0,0 +1 @@
+\ufffdB
\ No newline at end of file

-- 
To stop receiving notification emails like this one, please contact
"commits@tika.apache.org" <co...@tika.apache.org>.