You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ta...@apache.org on 2017/04/20 01:20:13 UTC

[tika] branch master updated (0f1034a -> 37d0f05)

This is an automated email from the ASF dual-hosted git repository.

tallison pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/tika.git.

      from  0f1034a   update javadoc for Latin1StringsParser
       new  75eea6e   TIKA-2330 -- prevent preventable ooms in both detecting and parsing corrupt files or files that are misidentified as compressed streams.
       new  9e89b44   TIKA-2331 -- Upgrade RTFParser to use new TikaMemoryLimitException
       new  37d0f05   Merge remote-tracking branch 'origin/master'

The 3 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "adds" were already present in the repository and have only
been added to this reference.


Summary of changes:
 ...xception.java => TikaMemoryLimitException.java} |  16 +-
 tika-parent/pom.xml                                |   2 +-
 tika-parsers/pom.xml                               |   2 +-
 .../apache/tika/parser/pkg/CompressorParser.java   |  48 +-
 .../parser/pkg/TikaCompressorStreamFactory.java    | 551 +++++++++++++++++++++
 .../tika/parser/pkg/ZipContainerDetector.java      |  16 +-
 .../apache/tika/parser/rtf/RTFEmbObjHandler.java   |  14 +-
 .../java/org/apache/tika/parser/rtf/RTFParser.java |  30 +-
 .../org/apache/tika/parser/rtf/TextExtractor.java  |   6 +-
 .../tika/detect/TestContainerAwareDetector.java    |  10 +
 .../apache/tika/parser/pkg/CompressParserTest.java |  22 +
 .../org/apache/tika/parser/rtf/RTFParserTest.java  |  15 +
 .../tika/parser/{pdf => rtf}/tika-config.xml       |   4 +-
 .../src/test/resources/test-documents/testLZMA_oom | Bin 0 -> 19 bytes
 .../src/test/resources/test-documents/testZ_oom.Z  |   1 +
 15 files changed, 699 insertions(+), 38 deletions(-)
 copy tika-core/src/main/java/org/apache/tika/exception/{TikaException.java => TikaMemoryLimitException.java} (72%)
 create mode 100644 tika-parsers/src/main/java/org/apache/tika/parser/pkg/TikaCompressorStreamFactory.java
 copy tika-parsers/src/test/resources/org/apache/tika/parser/{pdf => rtf}/tika-config.xml (87%)
 create mode 100644 tika-parsers/src/test/resources/test-documents/testLZMA_oom
 create mode 100644 tika-parsers/src/test/resources/test-documents/testZ_oom.Z

-- 
To stop receiving notification emails like this one, please contact
['"commits@tika.apache.org" <co...@tika.apache.org>'].

[tika] 01/03: TIKA-2330 -- prevent preventable ooms in both detecting and parsing corrupt files or files that are misidentified as compressed streams.

Posted by ta...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

tallison pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/tika.git

commit 75eea6e5502f4f5a2edf5ab459b4c369d33f66e5
Author: tballison <ta...@mitre.org>
AuthorDate: Wed Apr 19 21:18:56 2017 -0400

    TIKA-2330 -- prevent preventable ooms in both detecting and parsing
    corrupt files or files that are misidentified as compressed streams.
---
 .../tika/exception/TikaMemoryLimitException.java   |  30 ++
 tika-parent/pom.xml                                |   2 +-
 tika-parsers/pom.xml                               |   2 +-
 .../apache/tika/parser/pkg/CompressorParser.java   |  48 +-
 .../parser/pkg/TikaCompressorStreamFactory.java    | 551 +++++++++++++++++++++
 .../tika/parser/pkg/ZipContainerDetector.java      |  16 +-
 .../tika/detect/TestContainerAwareDetector.java    |  10 +
 .../apache/tika/parser/pkg/CompressParserTest.java |  22 +
 .../src/test/resources/test-documents/testLZMA_oom | Bin 0 -> 19 bytes
 .../src/test/resources/test-documents/testZ_oom.Z  |   1 +
 10 files changed, 663 insertions(+), 19 deletions(-)

diff --git a/tika-core/src/main/java/org/apache/tika/exception/TikaMemoryLimitException.java b/tika-core/src/main/java/org/apache/tika/exception/TikaMemoryLimitException.java
new file mode 100644
index 0000000..baf5818
--- /dev/null
+++ b/tika-core/src/main/java/org/apache/tika/exception/TikaMemoryLimitException.java
@@ -0,0 +1,30 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.tika.exception;
+
+/*
+ * Thrown when a parser is asked to allocate more memory than is allowable
+ * for a given threshold.  For example, the ZCompressorInputStream might
+ * be asked to create an array many gigabytes of length by a corrupt file.
+ */
+public class TikaMemoryLimitException extends TikaException {
+
+    public TikaMemoryLimitException(String msg) {
+        super(msg);
+    }
+}
diff --git a/tika-parent/pom.xml b/tika-parent/pom.xml
index 9220c3d..5e4b0dc 100644
--- a/tika-parent/pom.xml
+++ b/tika-parent/pom.xml
@@ -306,7 +306,7 @@
     <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
     <project.reporting.outputEncoding>${project.build.sourceEncoding}</project.reporting.outputEncoding>
     <!-- NOTE: sync tukaani version with commons-compress in tika-parsers -->
-    <commons.compress.version>1.12</commons.compress.version>
+    <commons.compress.version>1.13</commons.compress.version>
     <commons.io.version>2.5</commons.io.version>
     <slf4j.version>1.7.24</slf4j.version>
   </properties>
diff --git a/tika-parsers/pom.xml b/tika-parsers/pom.xml
index 58ac745..7ff88c2 100644
--- a/tika-parsers/pom.xml
+++ b/tika-parsers/pom.xml
@@ -39,7 +39,7 @@
     <!-- NOTE: sync codec version with POI -->
     <codec.version>1.10</codec.version>
     <!-- NOTE: sync tukaani version with commons-compress in tika-parent-->
-    <tukaani.version>1.5</tukaani.version>
+    <tukaani.version>1.6</tukaani.version>
     <mime4j.version>0.7.2</mime4j.version>
     <vorbis.version>0.8</vorbis.version>
     <pdfbox.version>2.0.5</pdfbox.version>
diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/pkg/CompressorParser.java b/tika-parsers/src/main/java/org/apache/tika/parser/pkg/CompressorParser.java
index f82db54..ff589e0 100644
--- a/tika-parsers/src/main/java/org/apache/tika/parser/pkg/CompressorParser.java
+++ b/tika-parsers/src/main/java/org/apache/tika/parser/pkg/CompressorParser.java
@@ -25,18 +25,20 @@ import java.util.Set;
 
 import org.apache.commons.compress.compressors.CompressorException;
 import org.apache.commons.compress.compressors.CompressorInputStream;
-import org.apache.commons.compress.compressors.CompressorStreamFactory;
 import org.apache.commons.compress.compressors.bzip2.BZip2CompressorInputStream;
 import org.apache.commons.compress.compressors.deflate.DeflateCompressorInputStream;
 import org.apache.commons.compress.compressors.gzip.GzipCompressorInputStream;
 import org.apache.commons.compress.compressors.gzip.GzipUtils;
+import org.apache.commons.compress.compressors.lzma.LZMACompressorInputStream;
 import org.apache.commons.compress.compressors.pack200.Pack200CompressorInputStream;
 import org.apache.commons.compress.compressors.snappy.FramedSnappyCompressorInputStream;
 import org.apache.commons.compress.compressors.snappy.SnappyCompressorInputStream;
 import org.apache.commons.compress.compressors.xz.XZCompressorInputStream;
 import org.apache.commons.compress.compressors.z.ZCompressorInputStream;
 import org.apache.commons.io.input.CloseShieldInputStream;
+import org.apache.tika.config.Field;
 import org.apache.tika.exception.TikaException;
+import org.apache.tika.exception.TikaMemoryLimitException;
 import org.apache.tika.extractor.EmbeddedDocumentExtractor;
 import org.apache.tika.extractor.EmbeddedDocumentUtil;
 import org.apache.tika.metadata.Metadata;
@@ -64,13 +66,16 @@ public class CompressorParser extends AbstractParser {
     private static final MediaType PACK = MediaType.application("x-java-pack200");
     private static final MediaType SNAPPY = MediaType.application("x-snappy-framed");
     private static final MediaType ZLIB = MediaType.application("zlib");
+    private static final MediaType LZMA = MediaType.application("x-lzma");
 
     private static final Set<MediaType> SUPPORTED_TYPES =
-            MediaType.set(BZIP, BZIP2, GZIP, GZIP_ALT, COMPRESS, XZ, PACK, ZLIB);
+            MediaType.set(BZIP, BZIP2, GZIP, GZIP_ALT, COMPRESS, XZ, PACK, ZLIB, LZMA);
+
+    private int memoryLimitInKb = 100000;//100MB
 
     static MediaType getMediaType(CompressorInputStream stream) {
         // TODO Add support for the remaining CompressorInputStream formats:
-        //   LZMACompressorInputStream
+        //   LZ4
         //   LZWInputStream -> UnshrinkingInputStream
         if (stream instanceof BZip2CompressorInputStream) {
             return BZIP2;
@@ -88,6 +93,31 @@ public class CompressorParser extends AbstractParser {
                    stream instanceof SnappyCompressorInputStream) {
             // TODO Add unit tests for this format
             return SNAPPY;
+        } else if (stream instanceof LZMACompressorInputStream) {
+            return LZMA;
+        } else {
+            return MediaType.OCTET_STREAM;
+        }
+    }
+
+    static MediaType getMediaType(String name) {
+        if (TikaCompressorStreamFactory.BZIP2.equals(name)) {
+            return BZIP2;
+        } else if (TikaCompressorStreamFactory.GZIP.equals(name)) {
+            return GZIP;
+        } else if (TikaCompressorStreamFactory.XZ.equals(name)) {
+            return XZ;
+        } else if (TikaCompressorStreamFactory.DEFLATE.equals(name)) {
+            return ZLIB;
+        } else if (TikaCompressorStreamFactory.Z.equals(name)) {
+            return COMPRESS;
+        } else if (TikaCompressorStreamFactory.PACK200.equals(name)) {
+            return PACK;
+        } else if (TikaCompressorStreamFactory.SNAPPY_FRAMED.equals(name) ||
+                TikaCompressorStreamFactory.SNAPPY_RAW.equals(name)) {
+            return SNAPPY;
+        } else if (TikaCompressorStreamFactory.LZMA.equals(name)) {
+            return LZMA;
         } else {
             return MediaType.OCTET_STREAM;
         }
@@ -119,10 +149,13 @@ public class CompressorParser extends AbstractParser {
                          return false;
                      }
                  });
-            CompressorStreamFactory factory = 
-                    new CompressorStreamFactory(options.decompressConcatenated(metadata));
+            TikaCompressorStreamFactory factory =
+                    new TikaCompressorStreamFactory(options.decompressConcatenated(metadata), memoryLimitInKb);
             cis = factory.createCompressorInputStream(stream);
         } catch (CompressorException e) {
+            if (e.getMessage() != null && e.getMessage().startsWith("MemoryLimitException:")) {
+                throw new TikaMemoryLimitException(e.getMessage());
+            }
             throw new TikaException("Unable to uncompress document stream", e);
         }
 
@@ -171,4 +204,9 @@ public class CompressorParser extends AbstractParser {
         xhtml.endDocument();
     }
 
+    @Field
+    public void setMemoryLimitInKb(int memoryLimitInKb) {
+        this.memoryLimitInKb = memoryLimitInKb;
+    }
+
 }
diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/pkg/TikaCompressorStreamFactory.java b/tika-parsers/src/main/java/org/apache/tika/parser/pkg/TikaCompressorStreamFactory.java
new file mode 100644
index 0000000..a1a8405
--- /dev/null
+++ b/tika-parsers/src/main/java/org/apache/tika/parser/pkg/TikaCompressorStreamFactory.java
@@ -0,0 +1,551 @@
+package org.apache.tika.parser.pkg;
+    /*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.OutputStream;
+import java.security.AccessController;
+import java.security.PrivilegedAction;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.Iterator;
+import java.util.Locale;
+import java.util.Set;
+import java.util.SortedMap;
+import java.util.TreeMap;
+
+import org.apache.commons.compress.compressors.CompressorException;
+import org.apache.commons.compress.compressors.CompressorInputStream;
+import org.apache.commons.compress.compressors.CompressorOutputStream;
+import org.apache.commons.compress.compressors.CompressorStreamProvider;
+import org.apache.commons.compress.compressors.bzip2.BZip2CompressorInputStream;
+import org.apache.commons.compress.compressors.deflate.DeflateCompressorInputStream;
+import org.apache.commons.compress.compressors.gzip.GzipCompressorInputStream;
+import org.apache.commons.compress.compressors.lzma.LZMAUtils;
+import org.apache.commons.compress.compressors.pack200.Pack200CompressorInputStream;
+import org.apache.commons.compress.compressors.snappy.FramedSnappyCompressorInputStream;
+import org.apache.commons.compress.compressors.snappy.SnappyCompressorInputStream;
+import org.apache.commons.compress.compressors.xz.XZCompressorInputStream;
+import org.apache.commons.compress.compressors.xz.XZUtils;
+import org.apache.commons.compress.compressors.z.ZCompressorInputStream;
+import org.apache.commons.compress.utils.IOUtils;
+import org.apache.commons.compress.utils.Lists;
+import org.apache.commons.compress.utils.ServiceLoaderIterator;
+import org.apache.commons.compress.utils.Sets;
+import org.apache.tika.exception.TikaMemoryLimitException;
+import org.tukaani.xz.LZMAInputStream;
+import org.tukaani.xz.MemoryLimitException;
+
+/**
+ * This is a temporary copy/paste hack from commons-compress for Tika 1.15
+ * that 1) allows detection without initialization of a stream and
+ * 2) prevents easily preventable OOM on two file formats.
+ *
+ * Once commons-compress 1.14 is released, we will delete this class
+ * and go back to commons-compress's CompressorStreamFactory.
+ */
+@Deprecated
+class TikaCompressorStreamFactory implements CompressorStreamProvider {
+
+
+
+        private static final TikaCompressorStreamFactory SINGLETON = new TikaCompressorStreamFactory(true, -1);
+
+        /**
+         * Constant (value {@value}) used to identify the BZIP2 compression
+         * algorithm.
+         *
+         * @since 1.1
+         */
+        public static final String BZIP2 = "bzip2";
+
+        /**
+         * Constant (value {@value}) used to identify the GZIP compression
+         * algorithm.
+         *
+         * @since 1.1
+         */
+        public static final String GZIP = "gz";
+
+        /**
+         * Constant (value {@value}) used to identify the PACK200 compression
+         * algorithm.
+         *
+         * @since 1.3
+         */
+        public static final String PACK200 = "pack200";
+
+        /**
+         * Constant (value {@value}) used to identify the XZ compression method.
+         *
+         * @since 1.4
+         */
+        public static final String XZ = "xz";
+
+        /**
+         * Constant (value {@value}) used to identify the LZMA compression method.
+         *
+         * @since 1.6
+         */
+        public static final String LZMA = "lzma";
+
+        /**
+         * Constant (value {@value}) used to identify the "framed" Snappy
+         * compression method.
+         *
+         * @since 1.7
+         */
+        public static final String SNAPPY_FRAMED = "snappy-framed";
+
+        /**
+         * Constant (value {@value}) used to identify the "raw" Snappy compression
+         * method. Not supported as an output stream type.
+         *
+         * @since 1.7
+         */
+        public static final String SNAPPY_RAW = "snappy-raw";
+
+        /**
+         * Constant (value {@value}) used to identify the traditional Unix compress
+         * method. Not supported as an output stream type.
+         *
+         * @since 1.7
+         */
+        public static final String Z = "z";
+
+        /**
+         * Constant (value {@value}) used to identify the Deflate compress method.
+         *
+         * @since 1.9
+         */
+        public static final String DEFLATE = "deflate";
+
+
+        private final int memoryLimitInKb;
+
+    private SortedMap<String, CompressorStreamProvider> compressorInputStreamProviders;
+
+
+    public static String getBzip2() {
+            return BZIP2;
+        }
+
+        public static String getDeflate() {
+            return DEFLATE;
+        }
+
+        public static String getGzip() {
+            return GZIP;
+        }
+
+        public static String getLzma() {
+            return LZMA;
+        }
+
+        public static String getPack200() {
+            return PACK200;
+        }
+
+        public static TikaCompressorStreamFactory getSingleton() {
+            return SINGLETON;
+        }
+
+        public static String getSnappyFramed() {
+            return SNAPPY_FRAMED;
+        }
+
+        public static String getSnappyRaw() {
+            return SNAPPY_RAW;
+        }
+
+        public static String getXz() {
+            return XZ;
+        }
+
+        public static String getZ() {
+            return Z;
+        }
+
+        static void putAll(final Set<String> names, final CompressorStreamProvider provider,
+                           final TreeMap<String, CompressorStreamProvider> map) {
+            for (final String name : names) {
+                map.put(toKey(name), provider);
+            }
+        }
+
+        private static String toKey(final String name) {
+            return name.toUpperCase(Locale.ROOT);
+        }
+
+        /**
+         * If true, decompress until the end of the input. If false, stop after the
+         * first stream and leave the input position to point to the next byte after
+         * the stream
+         */
+        private final Boolean decompressUntilEOF;
+
+        /**
+         * If true, decompress until the end of the input. If false, stop after the
+         * first stream and leave the input position to point to the next byte after
+         * the stream
+         */
+        private volatile boolean decompressConcatenated = false;
+
+        /**
+         * Create an instance with the provided decompress Concatenated option.
+         *
+         * @param decompressUntilEOF
+         *            if true, decompress until the end of the input; if false, stop
+         *            after the first stream and leave the input position to point
+         *            to the next byte after the stream. This setting applies to the
+         *            gzip, bzip2 and xz formats only.
+         * @since 1.10
+         */
+        public TikaCompressorStreamFactory(final boolean decompressUntilEOF, final int memoryLimitInKb) {
+            this.decompressUntilEOF = Boolean.valueOf(decompressUntilEOF);
+            // Also copy to existing variable so can continue to use that as the
+            // current value
+            this.decompressConcatenated = decompressUntilEOF;
+            this.memoryLimitInKb = memoryLimitInKb;
+        }
+
+        /**
+         * Try to detect the type of compressor stream.
+         *
+         * @param in input stream
+         * @return type of compressor stream detected
+         * @throws CompressorException if no compressor stream type was detected
+         *                             or if something else went wrong
+         * @throws IllegalArgumentException if stream is null or does not support mark
+         *
+         * @since 1.14
+         */
+        public static String detect(final InputStream in) throws CompressorException {
+            if (in == null) {
+                throw new IllegalArgumentException("Stream must not be null.");
+            }
+
+            if (!in.markSupported()) {
+                throw new IllegalArgumentException("Mark is not supported.");
+            }
+
+            final byte[] signature = new byte[12];
+            in.mark(signature.length);
+            int signatureLength = -1;
+            try {
+                signatureLength = IOUtils.readFully(in, signature);
+                in.reset();
+            } catch (IOException e) {
+                throw new CompressorException("IOException while reading signature.", e);
+            }
+
+            if (BZip2CompressorInputStream.matches(signature, signatureLength)) {
+                return BZIP2;
+            }
+
+            if (GzipCompressorInputStream.matches(signature, signatureLength)) {
+                return GZIP;
+            }
+
+            if (Pack200CompressorInputStream.matches(signature, signatureLength)) {
+                return PACK200;
+            }
+
+            if (FramedSnappyCompressorInputStream.matches(signature, signatureLength)) {
+                return SNAPPY_FRAMED;
+            }
+
+            if (ZCompressorInputStream.matches(signature, signatureLength)) {
+                return Z;
+            }
+
+            if (DeflateCompressorInputStream.matches(signature, signatureLength)) {
+                return DEFLATE;
+            }
+
+            if (XZUtils.matches(signature, signatureLength)) {
+                return XZ;
+            }
+
+            if (LZMAUtils.matches(signature, signatureLength)) {
+                return LZMA;
+            }
+
+/*            if (FramedLZ4CompressorInputStream.matches(signature, signatureLength)) {
+                return LZ4_FRAMED;
+            }*/
+
+            throw new CompressorException("No Compressor found for the stream signature.");
+        }
+
+    public SortedMap<String, CompressorStreamProvider> getCompressorInputStreamProviders() {
+        if (compressorInputStreamProviders == null) {
+            compressorInputStreamProviders = Collections
+                    .unmodifiableSortedMap(findAvailableCompressorInputStreamProviders());
+        }
+        return compressorInputStreamProviders;
+    }
+
+    public static SortedMap<String, CompressorStreamProvider> findAvailableCompressorInputStreamProviders() {
+        return AccessController.doPrivileged(new PrivilegedAction<SortedMap<String, CompressorStreamProvider>>() {
+            @Override
+            public SortedMap<String, CompressorStreamProvider> run() {
+                final TreeMap<String, CompressorStreamProvider> map = new TreeMap<>();
+                putAll(SINGLETON.getInputStreamCompressorNames(), SINGLETON, map);
+                for (final CompressorStreamProvider provider : findCompressorStreamProviders()) {
+                    putAll(provider.getInputStreamCompressorNames(), provider, map);
+                }
+                return map;
+            }
+        });
+    }
+
+    private static ArrayList<CompressorStreamProvider> findCompressorStreamProviders() {
+        return Lists.newArrayList(serviceLoaderIterator());
+    }
+
+    private static Iterator<CompressorStreamProvider> serviceLoaderIterator() {
+        return new ServiceLoaderIterator<>(CompressorStreamProvider.class);
+    }
+
+        /**
+         * Create an compressor input stream from an input stream, autodetecting the
+         * compressor type from the first few bytes of the stream. The InputStream
+         * must support marks, like BufferedInputStream.
+         *
+         * @param in
+         *            the input stream
+         * @return the compressor input stream
+         * @throws CompressorException
+         *             if the compressor name is not known
+         * @throws IllegalArgumentException
+         *             if the stream is null or does not support mark
+         * @since 1.1
+         */
+        public CompressorInputStream createCompressorInputStream(final InputStream in) throws CompressorException,
+                TikaMemoryLimitException {
+            return createCompressorInputStream(detect(in), in);
+        }
+
+        /**
+         * Creates a compressor input stream from a compressor name and an input
+         * stream.
+         *
+         * @param name
+         *            of the compressor, i.e. {@value #GZIP}, {@value #BZIP2},
+         *            {@value #XZ}, {@value #LZMA}, {@value #PACK200},
+         *            {@value #SNAPPY_RAW}, {@value #SNAPPY_FRAMED}, {@value #Z},
+         *            or {@value #DEFLATE}
+         * @param in
+         *            the input stream
+         * @return compressor input stream
+         * @throws CompressorException
+         *             if the compressor name is not known or not available
+         * @throws IllegalArgumentException
+         *             if the name or input stream is null
+         */
+        public CompressorInputStream createCompressorInputStream(final String name, final InputStream in)
+                throws CompressorException, TikaMemoryLimitException {
+            return createCompressorInputStream(name, in, decompressConcatenated);
+        }
+
+        public CompressorInputStream createCompressorInputStream(final String name, final InputStream in,
+                                                                 final boolean actualDecompressConcatenated) throws CompressorException {
+            if (name == null || in == null) {
+                throw new IllegalArgumentException("Compressor name and stream must not be null.");
+            }
+
+            try {
+
+                if (GZIP.equalsIgnoreCase(name)) {
+                    return new GzipCompressorInputStream(in, actualDecompressConcatenated);
+                }
+
+                if (BZIP2.equalsIgnoreCase(name)) {
+                    return new BZip2CompressorInputStream(in, actualDecompressConcatenated);
+                }
+
+                if (XZ.equalsIgnoreCase(name)) {
+                    if (!XZUtils.isXZCompressionAvailable()) {
+                        throw new CompressorException("XZ compression is not available.");
+                    }
+                    return new XZCompressorInputStream(in, actualDecompressConcatenated);
+                }
+
+                if (LZMA.equalsIgnoreCase(name)) {
+                    if (!LZMAUtils.isLZMACompressionAvailable()) {
+                        throw new CompressorException("LZMA compression is not available");
+                    }
+                    try {
+                        return new SaferLZMACompressorInputStream(in);
+                    } catch (MemoryLimitException e) {
+                        throw new CompressorException("MemoryLimitException: " + e.getMessage(), e);
+                    }
+                }
+
+                if (PACK200.equalsIgnoreCase(name)) {
+                    return new Pack200CompressorInputStream(in);
+                }
+
+                if (SNAPPY_RAW.equalsIgnoreCase(name)) {
+                    return new SnappyCompressorInputStream(in);
+                }
+
+                if (SNAPPY_FRAMED.equalsIgnoreCase(name)) {
+                    return new FramedSnappyCompressorInputStream(in);
+                }
+
+                if (Z.equalsIgnoreCase(name)) {
+                    try {
+                        return new SaferZCompressorInputStream(in);
+                    } catch (TikaRuntimeMemoryLimitException e) {
+                        throw new CompressorException("MemoryLimitException: " + e.getMessage(), e);
+                    }
+                }
+
+                if (DEFLATE.equalsIgnoreCase(name)) {
+                    return new DeflateCompressorInputStream(in);
+                }
+/*
+not currently supported
+                if (LZ4_BLOCK.equalsIgnoreCase(name)) {
+                    return new BlockLZ4CompressorInputStream(in);
+                }
+
+                if (LZ4_FRAMED.equalsIgnoreCase(name)) {
+                    return new FramedLZ4CompressorInputStream(in, actualDecompressConcatenated);
+                }
+ */
+
+            } catch (final IOException e) {
+                throw new CompressorException("Could not create CompressorInputStream.", e);
+            }
+
+            final CompressorStreamProvider compressorStreamProvider = getCompressorInputStreamProviders().get(toKey(name));
+            if (compressorStreamProvider != null) {
+                return compressorStreamProvider.createCompressorInputStream(name, in, actualDecompressConcatenated);
+            }
+
+            throw new CompressorException("Compressor: " + name + " not found.");
+        }
+
+    @Override
+    public CompressorOutputStream createCompressorOutputStream(String s, OutputStream outputStream) throws CompressorException {
+        throw new UnsupportedOperationException();
+    }
+
+
+    // For Unit tests
+        boolean getDecompressConcatenated() {
+            return decompressConcatenated;
+        }
+
+    public Set<String> getInputStreamCompressorNames() {
+        return Sets.newHashSet(GZIP, BZIP2, XZ, LZMA, PACK200, DEFLATE, SNAPPY_RAW, SNAPPY_FRAMED, Z);
+    }
+
+    @Override
+    public Set<String> getOutputStreamCompressorNames() {
+        throw new UnsupportedOperationException();
+    }
+
+    public Boolean getDecompressUntilEOF() {
+            return decompressUntilEOF;
+        }
+
+    private class SaferZCompressorInputStream extends ZCompressorInputStream {
+
+        public SaferZCompressorInputStream(InputStream inputStream) throws IOException {
+            super(inputStream);
+        }
+
+        @Override
+        protected void initializeTables(int maxCodeSize) {
+            int maxTableSize = 1 << maxCodeSize;
+            if (memoryLimitInKb > -1 && maxTableSize > (memoryLimitInKb*1024)) {
+                throw new TikaRuntimeMemoryLimitException("Calculated maxCodeSize ("+maxCodeSize+" bytes) is greater "+
+                 "than the maximum allowable ("+ (memoryLimitInKb*1024) +" bytes).\n"+
+                        "If the file is not corrupt, consider increasing " +
+                        "the memoryLimitInKb parameter in the CompressorParser");
+            }
+            super.initializeTables(maxCodeSize);
+        }
+    }
+
+    private static class TikaRuntimeMemoryLimitException extends RuntimeException {
+        public TikaRuntimeMemoryLimitException(String msg) {
+            super(msg);
+        }
+    }
+
+    private class SaferLZMACompressorInputStream extends CompressorInputStream {
+        private final InputStream in;
+
+        /**
+         * Creates a new input stream that decompresses LZMA-compressed data
+         * from the specified input stream.
+         *
+         * @param       inputStream where to read the compressed data
+         *
+         * @throws      IOException if the input is not in the .lzma format,
+         *                          the input is corrupt or truncated, the .lzma
+         *                          headers specify sizes that are not supported
+         *                          by this implementation, or the underlying
+         *                          <code>inputStream</code> throws an exception
+         */
+        public SaferLZMACompressorInputStream(final InputStream inputStream) throws IOException {
+            in = new LZMAInputStream(inputStream, memoryLimitInKb);
+        }
+
+        /** {@inheritDoc} */
+        @Override
+        public int read() throws IOException {
+            final int ret = in.read();
+            count(ret == -1 ? 0 : 1);
+            return ret;
+        }
+
+        /** {@inheritDoc} */
+        @Override
+        public int read(final byte[] buf, final int off, final int len) throws IOException {
+            final int ret = in.read(buf, off, len);
+            count(ret);
+            return ret;
+        }
+
+        /** {@inheritDoc} */
+        @Override
+        public long skip(final long n) throws IOException {
+            return in.skip(n);
+        }
+
+        /** {@inheritDoc} */
+        @Override
+        public int available() throws IOException {
+            return in.available();
+        }
+
+        /** {@inheritDoc} */
+        @Override
+        public void close() throws IOException {
+            in.close();
+        }
+    }
+}
diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/pkg/ZipContainerDetector.java b/tika-parsers/src/main/java/org/apache/tika/parser/pkg/ZipContainerDetector.java
index d43a17c..2434d1a 100644
--- a/tika-parsers/src/main/java/org/apache/tika/parser/pkg/ZipContainerDetector.java
+++ b/tika-parsers/src/main/java/org/apache/tika/parser/pkg/ZipContainerDetector.java
@@ -16,6 +16,8 @@
  */
 package org.apache.tika.parser.pkg;
 
+import static java.nio.charset.StandardCharsets.UTF_8;
+
 import java.io.ByteArrayInputStream;
 import java.io.IOException;
 import java.io.InputStream;
@@ -33,8 +35,6 @@ import org.apache.commons.compress.archivers.tar.TarArchiveInputStream;
 import org.apache.commons.compress.archivers.zip.ZipArchiveEntry;
 import org.apache.commons.compress.archivers.zip.ZipFile;
 import org.apache.commons.compress.compressors.CompressorException;
-import org.apache.commons.compress.compressors.CompressorInputStream;
-import org.apache.commons.compress.compressors.CompressorStreamFactory;
 import org.apache.commons.io.IOUtils;
 import org.apache.poi.openxml4j.exceptions.InvalidFormatException;
 import org.apache.poi.openxml4j.opc.OPCPackage;
@@ -52,8 +52,6 @@ import org.apache.tika.parser.iwork.IWorkPackageParser;
 import org.apache.tika.parser.iwork.IWorkPackageParser.IWORKDocumentType;
 import org.apache.tika.parser.iwork.iwana.IWork13PackageParser;
 
-import static java.nio.charset.StandardCharsets.UTF_8;
-
 /**
  * A detector that works on Zip documents and other archive and compression
  * formats to figure out exactly what the file is.
@@ -105,14 +103,8 @@ public class ZipContainerDetector implements Detector {
 
     private static MediaType detectCompressorFormat(byte[] prefix, int length) {
         try {
-            CompressorStreamFactory factory = new CompressorStreamFactory();
-            CompressorInputStream cis = factory.createCompressorInputStream(
-                    new ByteArrayInputStream(prefix, 0, length));
-            try {
-                return CompressorParser.getMediaType(cis);
-            } finally {
-                IOUtils.closeQuietly(cis);
-            }
+            String type = TikaCompressorStreamFactory.detect(new ByteArrayInputStream(prefix, 0, length));
+            return CompressorParser.getMediaType(type);
         } catch (CompressorException e) {
             return MediaType.OCTET_STREAM;
         }
diff --git a/tika-parsers/src/test/java/org/apache/tika/detect/TestContainerAwareDetector.java b/tika-parsers/src/test/java/org/apache/tika/detect/TestContainerAwareDetector.java
index 89ff371..9cff7c4 100644
--- a/tika-parsers/src/test/java/org/apache/tika/detect/TestContainerAwareDetector.java
+++ b/tika-parsers/src/test/java/org/apache/tika/detect/TestContainerAwareDetector.java
@@ -361,6 +361,16 @@ public class TestContainerAwareDetector {
                                 "application/java-archive", "application/java-archive");
     }
 
+    @Test
+    public void testLZMAOOM() throws Exception {
+        assertTypeByData("testLZMA_oom", "application/x-lzma");
+    }
+
+    @Test
+    public void testCompressOOM() throws Exception {
+        assertTypeByData("testZ_oom.Z", "application/x-compress");
+    }
+
     private TikaInputStream getTruncatedFile(String name, int n)
             throws IOException {
         try (InputStream input = TestContainerAwareDetector.class.getResourceAsStream(
diff --git a/tika-parsers/src/test/java/org/apache/tika/parser/pkg/CompressParserTest.java b/tika-parsers/src/test/java/org/apache/tika/parser/pkg/CompressParserTest.java
index b80de07..7a006a9 100644
--- a/tika-parsers/src/test/java/org/apache/tika/parser/pkg/CompressParserTest.java
+++ b/tika-parsers/src/test/java/org/apache/tika/parser/pkg/CompressParserTest.java
@@ -18,9 +18,11 @@ package org.apache.tika.parser.pkg;
 
 import static java.nio.charset.StandardCharsets.US_ASCII;
 import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.fail;
 
 import java.io.InputStream;
 
+import org.apache.tika.exception.TikaMemoryLimitException;
 import org.apache.tika.metadata.Metadata;
 import org.apache.tika.parser.AutoDetectParser;
 import org.apache.tika.parser.Parser;
@@ -98,4 +100,24 @@ public class CompressParserTest extends AbstractPkgTest {
        // Tar file starts with the directory name
        assertEquals("test-documents/", new String(tracker.lastSeenStart, 0, 15, US_ASCII));
     }
+
+    @Test
+    public void testLZMAOOM() throws Exception {
+        try {
+            XMLResult r = getXML("testLZMA_oom");
+            fail("should have thrown TikaMemoryLimitException");
+        } catch (TikaMemoryLimitException e) {
+        }
+    }
+
+    @Test
+    public void testCompressOOM() throws Exception {
+        try {
+            XMLResult r = getXML("testZ_oom.Z");
+            fail("should have thrown TikaMemoryLimitException");
+        } catch (TikaMemoryLimitException e) {
+        }
+    }
+
+
 }
\ No newline at end of file
diff --git a/tika-parsers/src/test/resources/test-documents/testLZMA_oom b/tika-parsers/src/test/resources/test-documents/testLZMA_oom
new file mode 100644
index 0000000..be257f2
Binary files /dev/null and b/tika-parsers/src/test/resources/test-documents/testLZMA_oom differ
diff --git a/tika-parsers/src/test/resources/test-documents/testZ_oom.Z b/tika-parsers/src/test/resources/test-documents/testZ_oom.Z
new file mode 100644
index 0000000..36d7f52
--- /dev/null
+++ b/tika-parsers/src/test/resources/test-documents/testZ_oom.Z
@@ -0,0 +1 @@
+\ufffdB
\ No newline at end of file

-- 
To stop receiving notification emails like this one, please contact
"commits@tika.apache.org" <co...@tika.apache.org>.

[tika] 03/03: Merge remote-tracking branch 'origin/master'

Posted by ta...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

tallison pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/tika.git

commit 37d0f058c9a633a61346d9c04b22e4aa558fa634
Merge: 9e89b44 0f1034a
Author: tballison <ta...@mitre.org>
AuthorDate: Wed Apr 19 21:20:05 2017 -0400

    Merge remote-tracking branch 'origin/master'

 .../apache/tika/parser/strings/Latin1StringsParser.java   | 15 ++++++++++-----
 1 file changed, 10 insertions(+), 5 deletions(-)

-- 
To stop receiving notification emails like this one, please contact
"commits@tika.apache.org" <co...@tika.apache.org>.

[tika] 02/03: TIKA-2331 -- Upgrade RTFParser to use new TikaMemoryLimitException

Posted by ta...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

tallison pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/tika.git

commit 9e89b442bd2b211c328eb563e42ed902f9e0ae6e
Author: tballison <ta...@mitre.org>
AuthorDate: Wed Apr 19 21:19:46 2017 -0400

    TIKA-2331 -- Upgrade RTFParser to use new TikaMemoryLimitException
---
 .../apache/tika/parser/rtf/RTFEmbObjHandler.java   | 14 +++++++---
 .../java/org/apache/tika/parser/rtf/RTFParser.java | 30 +++++++++++++++++++++-
 .../org/apache/tika/parser/rtf/TextExtractor.java  |  6 ++---
 .../org/apache/tika/parser/rtf/RTFParserTest.java  | 15 +++++++++++
 .../org/apache/tika/parser/rtf/tika-config.xml     | 26 +++++++++++++++++++
 5 files changed, 83 insertions(+), 8 deletions(-)

diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/rtf/RTFEmbObjHandler.java b/tika-parsers/src/main/java/org/apache/tika/parser/rtf/RTFEmbObjHandler.java
index 5e2ab25..42900fc 100644
--- a/tika-parsers/src/main/java/org/apache/tika/parser/rtf/RTFEmbObjHandler.java
+++ b/tika-parsers/src/main/java/org/apache/tika/parser/rtf/RTFEmbObjHandler.java
@@ -24,6 +24,7 @@ import java.util.concurrent.atomic.AtomicInteger;
 import org.apache.commons.io.FilenameUtils;
 import org.apache.commons.io.IOUtils;
 import org.apache.tika.exception.TikaException;
+import org.apache.tika.exception.TikaMemoryLimitException;
 import org.apache.tika.extractor.EmbeddedDocumentUtil;
 import org.apache.tika.io.TikaInputStream;
 import org.apache.tika.metadata.Metadata;
@@ -70,11 +71,13 @@ class RTFEmbObjHandler {
     private StringBuilder sb = new StringBuilder();
     private Metadata metadata;
     private EMB_STATE state = EMB_STATE.NADA;
+    private final int memoryLimitInKb;
 
-    protected RTFEmbObjHandler(ContentHandler handler, Metadata metadata, ParseContext context) {
+    protected RTFEmbObjHandler(ContentHandler handler, Metadata metadata, ParseContext context, int memoryLimitInKb) {
         this.handler = handler;
         this.embeddedDocumentUtil = new EmbeddedDocumentUtil(context);
         os = new ByteArrayOutputStream();
+        this.memoryLimitInKb = memoryLimitInKb;
     }
 
     protected void startPict() {
@@ -145,8 +148,13 @@ class RTFEmbObjHandler {
     }
 
     protected void writeBytes(InputStream is, int len) throws IOException, TikaException {
-        if (len < 0 || len > RTFParser.getMaxBytesForEmbeddedObject()) {
-            throw new IOException("length of bytes to read out of bounds: " + len);
+        if (len < 0) {
+            throw new TikaException("Requesting I read < 0 bytes ?!");
+        }
+        if (len > memoryLimitInKb) {
+            throw new TikaMemoryLimitException("File embedded in RTF caused this (" + len +
+                    ") bytes), but maximum allowed is ("+memoryLimitInKb+")."+
+                    "If this is a valid RTF file, consider increasing the memory limit via TikaConfig.");
         }
 
         byte[] bytes = new byte[len];
diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/rtf/RTFParser.java b/tika-parsers/src/main/java/org/apache/tika/parser/rtf/RTFParser.java
index d2c448b..567a7a8 100644
--- a/tika-parsers/src/main/java/org/apache/tika/parser/rtf/RTFParser.java
+++ b/tika-parsers/src/main/java/org/apache/tika/parser/rtf/RTFParser.java
@@ -22,6 +22,7 @@ import java.util.Collections;
 import java.util.Set;
 
 import org.apache.commons.io.input.TaggedInputStream;
+import org.apache.tika.config.Field;
 import org.apache.tika.exception.TikaException;
 import org.apache.tika.metadata.Metadata;
 import org.apache.tika.mime.MediaType;
@@ -53,6 +54,7 @@ public class RTFParser extends AbstractParser {
      *
      * @return maximum number of bytes allowed for an embedded object.
      */
+    @Deprecated
     public static int getMaxBytesForEmbeddedObject() {
         return EMB_OBJ_MAX_BYTES;
     }
@@ -65,15 +67,24 @@ public class RTFParser extends AbstractParser {
      *
      * @param max maximum number of bytes to allow for embedded objects.  If
      *            the embedded object has more than this number of bytes, skip it.
+     * @deprecated use {@link #setMemoryLimitInKb(int)} instead
      */
+    @Deprecated
     public static void setMaxBytesForEmbeddedObject(int max) {
         EMB_OBJ_MAX_BYTES = max;
+        USE_STATIC = true;
     }
 
+    //get rid of this once we get rid of the other static maxbytes...
+    private static volatile boolean USE_STATIC = false;
+
     public Set<MediaType> getSupportedTypes(ParseContext context) {
         return SUPPORTED_TYPES;
     }
 
+    @Field
+    private int memoryLimitInKb = EMB_OBJ_MAX_BYTES;
+
     public void parse(
             InputStream stream, ContentHandler handler,
             Metadata metadata, ParseContext context)
@@ -82,7 +93,7 @@ public class RTFParser extends AbstractParser {
         TaggedInputStream tagged = new TaggedInputStream(stream);
         try {
             XHTMLContentHandler xhtmlHandler = new XHTMLContentHandler(handler, metadata);
-            RTFEmbObjHandler embObjHandler = new RTFEmbObjHandler(xhtmlHandler, metadata, context);
+            RTFEmbObjHandler embObjHandler = new RTFEmbObjHandler(xhtmlHandler, metadata, context, getMemoryLimitInKb());
             final TextExtractor ert = new TextExtractor(xhtmlHandler, metadata, embObjHandler);
             ert.extract(stream);
         } catch (IOException e) {
@@ -90,4 +101,21 @@ public class RTFParser extends AbstractParser {
             throw new TikaException("Error parsing an RTF document", e);
         }
     }
+
+    @Field
+    public void setMemoryLimitInKb(int memoryLimitInKb) {
+        this.memoryLimitInKb = memoryLimitInKb;
+        USE_STATIC = false;
+    }
+
+    private int getMemoryLimitInKb() {
+        //there's a race condition here, but it shouldn't matter.
+        if (USE_STATIC) {
+            if (EMB_OBJ_MAX_BYTES < 0) {
+                return EMB_OBJ_MAX_BYTES;
+            }
+            return EMB_OBJ_MAX_BYTES/1024;
+        }
+        return memoryLimitInKb;
+    }
 }
diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/rtf/TextExtractor.java b/tika-parsers/src/main/java/org/apache/tika/parser/rtf/TextExtractor.java
index 8ba8961..b07a3a0 100644
--- a/tika-parsers/src/main/java/org/apache/tika/parser/rtf/TextExtractor.java
+++ b/tika-parsers/src/main/java/org/apache/tika/parser/rtf/TextExtractor.java
@@ -947,10 +947,8 @@ final class TextExtractor {
                 if (groupState.pictDepth == 1) {
                     try {
                         embObjHandler.writeBytes(in, param);
-                    } catch (IOException e) {
-                        //param was out of bounds or something went wrong during writing.
-                        //skip this obj and move on
-                        //TODO: log.warn
+                    } catch (IOException|TikaException e) {
+                        EmbeddedDocumentUtil.recordEmbeddedStreamException(e, metadata);
                         embObjHandler.reset();
                     }
                 } else {
diff --git a/tika-parsers/src/test/java/org/apache/tika/parser/rtf/RTFParserTest.java b/tika-parsers/src/test/java/org/apache/tika/parser/rtf/RTFParserTest.java
index b957b8c..aed6cf5 100644
--- a/tika-parsers/src/test/java/org/apache/tika/parser/rtf/RTFParserTest.java
+++ b/tika-parsers/src/test/java/org/apache/tika/parser/rtf/RTFParserTest.java
@@ -35,6 +35,7 @@ import java.util.Set;
 import org.apache.commons.io.FilenameUtils;
 import org.apache.tika.Tika;
 import org.apache.tika.TikaTest;
+import org.apache.tika.config.TikaConfig;
 import org.apache.tika.extractor.ContainerExtractor;
 import org.apache.tika.extractor.ParserContainerExtractor;
 import org.apache.tika.io.TikaInputStream;
@@ -524,6 +525,20 @@ public class RTFParserTest extends TikaTest {
         assertEquals(2, tracker.filenames.size());
     }
 
+    @Test
+    public void testConfig() throws Exception {
+        //test that memory allocation of the bin element is limited
+        //via the config file.  Unfortunately, this test file's bin embedding contains 10 bytes
+        //so we had to set the config to 0.
+        InputStream is = getClass().getResourceAsStream("/org/apache/tika/parser/rtf/tika-config.xml");
+        assertNotNull(is);
+        TikaConfig tikaConfig = new TikaConfig(is);
+        Parser p = new AutoDetectParser(tikaConfig);
+        List<Metadata> metadataList = getRecursiveMetadata("testBinControlWord.rtf", p);
+        assertEquals(1, metadataList.size());
+        assertContains("TikaMemoryLimitException", metadataList.get(0).get(TikaCoreProperties.TIKA_META_EXCEPTION_EMBEDDED_STREAM));
+    }
+
     private Result getResult(String filename) throws Exception {
         File file = getResourceAsFile("/test-documents/" + filename);
 
diff --git a/tika-parsers/src/test/resources/org/apache/tika/parser/rtf/tika-config.xml b/tika-parsers/src/test/resources/org/apache/tika/parser/rtf/tika-config.xml
new file mode 100644
index 0000000..1f53a78
--- /dev/null
+++ b/tika-parsers/src/test/resources/org/apache/tika/parser/rtf/tika-config.xml
@@ -0,0 +1,26 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+
+  http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+-->
+<properties>
+    <parsers>
+        <parser class="org.apache.tika.parser.rtf.RTFParser">
+            <params>
+                <param name="memoryLimitInKb" type="int">0</param>
+            </params>
+        </parser>
+    </parsers>
+</properties>

-- 
To stop receiving notification emails like this one, please contact
"commits@tika.apache.org" <co...@tika.apache.org>.