You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ni...@apache.org on 2018/03/21 08:40:31 UTC
[tika] 02/02: Simplify stream resetting logic by using new
ParserUtil methods for it
This is an automated email from the ASF dual-hosted git repository.
nick pushed a commit to branch multiple-parsers
in repository https://gitbox.apache.org/repos/asf/tika.git
commit 54477aa3904944ca964e1f70188ac69b9f994042
Author: Nick Burch <ni...@gagravarr.org>
AuthorDate: Wed Mar 21 08:40:23 2018 +0000
Simplify stream resetting logic by using new ParserUtil methods for it
---
.../tika/parser/multiple/AbstractMultipleParser.java | 18 ++++--------------
.../main/java/org/apache/tika/utils/ParserUtils.java | 4 ++--
2 files changed, 6 insertions(+), 16 deletions(-)
diff --git a/tika-core/src/main/java/org/apache/tika/parser/multiple/AbstractMultipleParser.java b/tika-core/src/main/java/org/apache/tika/parser/multiple/AbstractMultipleParser.java
index 8f896b2..9fcf998 100644
--- a/tika-core/src/main/java/org/apache/tika/parser/multiple/AbstractMultipleParser.java
+++ b/tika-core/src/main/java/org/apache/tika/parser/multiple/AbstractMultipleParser.java
@@ -30,7 +30,6 @@ import java.util.Set;
import org.apache.tika.exception.TikaException;
import org.apache.tika.io.TemporaryResources;
-import org.apache.tika.io.TikaInputStream;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.mime.MediaType;
import org.apache.tika.mime.MediaTypeRegistry;
@@ -215,20 +214,11 @@ public abstract class AbstractMultipleParser extends AbstractParser {
// Start tracking resources, so we can clean up when done
TemporaryResources tmp = new TemporaryResources();
try {
- // Force the stream to be a Tika one
- // Force the stream to be file-backed, so we can re-read safely
- // later if required for parser 2+
- // TODO Should we support RereadableInputStream as well?
- // TODO Can we put this re-read logic in a utils method?
- TikaInputStream taggedStream = TikaInputStream.get(stream, tmp);
- taggedStream.getPath();
+ // Ensure we'll be able to re-read safely, buffering to disk if so,
+ // to permit Parsers 2+ to be able to read the same data
+ InputStream taggedStream = ParserUtils.ensureStreamReReadable(stream, tmp);
for (Parser p : parsers) {
- // Indicate we may need to re-read the stream later
- // TODO Support an InputStreamFactory as an alternative to
- // Files, see TIKA-2585
- taggedStream.mark(-1);
-
// Get a new handler for this parser, if we can
// If not, the user will get text from every parser
// mushed together onto the one solitary handler...
@@ -276,7 +266,7 @@ public abstract class AbstractMultipleParser extends AbstractParser {
// Prepare for the next parser, if present
lastMetadata = cloneMetadata(metadata);
- taggedStream.reset();
+ taggedStream = ParserUtils.streamResetForReRead(taggedStream, tmp);
}
} finally {
tmp.dispose();
diff --git a/tika-core/src/main/java/org/apache/tika/utils/ParserUtils.java b/tika-core/src/main/java/org/apache/tika/utils/ParserUtils.java
index 663b689..02958c2 100644
--- a/tika-core/src/main/java/org/apache/tika/utils/ParserUtils.java
+++ b/tika-core/src/main/java/org/apache/tika/utils/ParserUtils.java
@@ -96,7 +96,7 @@ public class ParserUtils {
* Streams that are automatically OK include {@link TikaInputStream}s
* created from Files or InputStreamFactories, and {@link RereadableInputStream}.
*/
- public InputStream ensureStreamReReadable(InputStream stream, TemporaryResources tmp) throws IOException {
+ public static InputStream ensureStreamReReadable(InputStream stream, TemporaryResources tmp) throws IOException {
// If it's re-readable, we're done
if (stream instanceof RereadableInputStream) return stream;
@@ -120,7 +120,7 @@ public class ParserUtils {
* {@link #ensureStreamReReadable(InputStream, TemporaryResources)})
* so that it can be re-read again.
*/
- public InputStream streamResetForReRead(InputStream stream, TemporaryResources tmp) throws IOException {
+ public static InputStream streamResetForReRead(InputStream stream, TemporaryResources tmp) throws IOException {
// If re-readable, rewind to start
if (stream instanceof RereadableInputStream) {
((RereadableInputStream)stream).rewind();
--
To stop receiving notification emails like this one, please contact
nick@apache.org.