You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ta...@apache.org on 2019/04/22 16:11:51 UTC
[tika] branch master updated: TIKA-2849 -- improve documentation in
POIFSContainerDetector
This is an automated email from the ASF dual-hosted git repository.
tallison pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/tika.git
The following commit(s) were added to refs/heads/master by this push:
new 2084896 TIKA-2849 -- improve documentation in POIFSContainerDetector
2084896 is described below
commit 20848961fae9853aa2afa7957f6421c57fe09272
Author: TALLISON <ta...@apache.org>
AuthorDate: Mon Apr 22 12:11:38 2019 -0400
TIKA-2849 -- improve documentation in POIFSContainerDetector
---
.../tika/parser/microsoft/POIFSContainerDetector.java | 17 ++++++++++++++++-
1 file changed, 16 insertions(+), 1 deletion(-)
diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/POIFSContainerDetector.java b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/POIFSContainerDetector.java
index 576cf52..ca5e6f3 100644
--- a/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/POIFSContainerDetector.java
+++ b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/POIFSContainerDetector.java
@@ -180,6 +180,19 @@ public class POIFSContainerDetector implements Detector {
@Field
private int markLimit = 16 * 1024 * 1024;
+ /**
+ * If a TikaInputStream is passed in to {@link #detect(InputStream, Metadata)},
+ * and there is not an underlying file, this detector will spool up to {@link #markLimit}
+ * to disk. If the stream was read in entirety (e.g. the spooled file is not truncated),
+ * this detector will open the file with POI and perform detection.
+ * If the spooled file is truncated, the detector will return {@link #OLE} (or
+ * {@link MediaType#OCTET_STREAM} if there's no OLE header).
+ *
+ * As of Tika 1.21, this detector respects the legacy behavior of not performing detection
+ * on a non-TikaInputStream.
+ *
+ * @param markLimit
+ */
public void setMarkLimit(int markLimit) {
this.markLimit = markLimit;
}
@@ -391,7 +404,9 @@ public class POIFSContainerDetector implements Detector {
private Set<String> getTopLevelNames(TikaInputStream stream)
throws IOException {
// Force the document stream to a (possibly temporary) file
- // so we don't modify the current position of the stream
+ // so we don't modify the current position of the stream.
+ //If the markLimit is < 0, this will spool the entire file
+ //to disk if there is not an underlying file.
Path file = stream.getPath(markLimit);
//if the stream was longer than markLimit, don't detect