You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ni...@apache.org on 2018/03/13 18:15:28 UTC
[tika] 05/13: Prepare to track metadata between parsers
This is an automated email from the ASF dual-hosted git repository.
nick pushed a commit to branch multiple-parsers
in repository https://gitbox.apache.org/repos/asf/tika.git
commit 427417c5d17f1e03724f3e6ded64779bf7366677
Author: Nick Burch <ni...@gagravarr.org>
AuthorDate: Tue Mar 13 15:04:43 2018 +0000
Prepare to track metadata between parsers
---
.../org/apache/tika/parser/multiple/AbstractMultipleParser.java | 7 +++++++
1 file changed, 7 insertions(+)
diff --git a/tika-core/src/main/java/org/apache/tika/parser/multiple/AbstractMultipleParser.java b/tika-core/src/main/java/org/apache/tika/parser/multiple/AbstractMultipleParser.java
index c47e762..46cd064 100644
--- a/tika-core/src/main/java/org/apache/tika/parser/multiple/AbstractMultipleParser.java
+++ b/tika-core/src/main/java/org/apache/tika/parser/multiple/AbstractMultipleParser.java
@@ -34,6 +34,7 @@ import org.apache.tika.parser.AbstractParser;
import org.apache.tika.parser.ParseContext;
import org.apache.tika.parser.Parser;
import org.apache.tika.parser.ParserDecorator;
+import org.apache.tika.utils.ParserUtils;
import org.xml.sax.ContentHandler;
import org.xml.sax.SAXException;
@@ -178,7 +179,11 @@ public abstract class AbstractMultipleParser extends AbstractParser {
InputStream stream, ContentHandler handler,
Metadata metadata, ParseContext context)
throws IOException, SAXException, TikaException {
+ // Track the metadata between parsers, so we can apply our policy
+ Metadata originalMetadata = ParserUtils.cloneMetadata(metadata);
+ Metadata lastMetadata = originalMetadata;
+ // Start tracking resources, so we can clean up when done
TemporaryResources tmp = new TemporaryResources();
try {
// Force the stream to be a Tika one
@@ -187,6 +192,7 @@ public abstract class AbstractMultipleParser extends AbstractParser {
// TODO Support an InputStreamFactory as an alternative to
// Files, see TIKA-2585
// TODO Rewind support copy from ParserDecorator.withFallbacks
+ // TODO Should we use RereadableInputStream instead?
TikaInputStream taggedStream = TikaInputStream.get(stream, tmp);
Path path = taggedStream.getPath();
@@ -222,6 +228,7 @@ public abstract class AbstractMultipleParser extends AbstractParser {
}
// TODO Handle metadata clashes based on the Policy
+ lastMetadata = ParserUtils.cloneMetadata(metadata);
}
} finally {
tmp.dispose();
--
To stop receiving notification emails like this one, please contact
nick@apache.org.