You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ta...@apache.org on 2019/04/22 16:11:51 UTC

[tika] branch master updated: TIKA-2849 -- improve documentation in POIFSContainerDetector

This is an automated email from the ASF dual-hosted git repository.

tallison pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/tika.git


The following commit(s) were added to refs/heads/master by this push:
     new 2084896  TIKA-2849 -- improve documentation in POIFSContainerDetector
2084896 is described below

commit 20848961fae9853aa2afa7957f6421c57fe09272
Author: TALLISON <ta...@apache.org>
AuthorDate: Mon Apr 22 12:11:38 2019 -0400

    TIKA-2849 -- improve documentation in POIFSContainerDetector
---
 .../tika/parser/microsoft/POIFSContainerDetector.java   | 17 ++++++++++++++++-
 1 file changed, 16 insertions(+), 1 deletion(-)

diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/POIFSContainerDetector.java b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/POIFSContainerDetector.java
index 576cf52..ca5e6f3 100644
--- a/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/POIFSContainerDetector.java
+++ b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/POIFSContainerDetector.java
@@ -180,6 +180,19 @@ public class POIFSContainerDetector implements Detector {
     @Field
     private int markLimit = 16 * 1024 * 1024;
 
+    /**
+     * If a TikaInputStream is passed in to {@link #detect(InputStream, Metadata)},
+     * and there is not an underlying file, this detector will spool up to {@link #markLimit}
+     * to disk.  If the stream was read in entirety (e.g. the spooled file is not truncated),
+     * this detector will open the file with POI and perform detection.
+     * If the spooled file is truncated, the detector will return {@link #OLE} (or
+     * {@link MediaType#OCTET_STREAM} if there's no OLE header).
+     *
+     * As of Tika 1.21, this detector respects the legacy behavior of not performing detection
+     * on a non-TikaInputStream.
+     *
+     * @param markLimit
+     */
     public void setMarkLimit(int markLimit) {
         this.markLimit = markLimit;
     }
@@ -391,7 +404,9 @@ public class POIFSContainerDetector implements Detector {
     private Set<String> getTopLevelNames(TikaInputStream stream)
             throws IOException {
         // Force the document stream to a (possibly temporary) file
-        // so we don't modify the current position of the stream
+        // so we don't modify the current position of the stream.
+        //If the markLimit is < 0, this will spool the entire file
+        //to disk if there is not an underlying file.
         Path file = stream.getPath(markLimit);
 
         //if the stream was longer than markLimit, don't detect