You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ni...@apache.org on 2018/03/13 18:15:28 UTC

[tika] 05/13: Prepare to track metadata between parsers

This is an automated email from the ASF dual-hosted git repository.

nick pushed a commit to branch multiple-parsers
in repository https://gitbox.apache.org/repos/asf/tika.git

commit 427417c5d17f1e03724f3e6ded64779bf7366677
Author: Nick Burch <ni...@gagravarr.org>
AuthorDate: Tue Mar 13 15:04:43 2018 +0000

    Prepare to track metadata between parsers
---
 .../org/apache/tika/parser/multiple/AbstractMultipleParser.java    | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/tika-core/src/main/java/org/apache/tika/parser/multiple/AbstractMultipleParser.java b/tika-core/src/main/java/org/apache/tika/parser/multiple/AbstractMultipleParser.java
index c47e762..46cd064 100644
--- a/tika-core/src/main/java/org/apache/tika/parser/multiple/AbstractMultipleParser.java
+++ b/tika-core/src/main/java/org/apache/tika/parser/multiple/AbstractMultipleParser.java
@@ -34,6 +34,7 @@ import org.apache.tika.parser.AbstractParser;
 import org.apache.tika.parser.ParseContext;
 import org.apache.tika.parser.Parser;
 import org.apache.tika.parser.ParserDecorator;
+import org.apache.tika.utils.ParserUtils;
 import org.xml.sax.ContentHandler;
 import org.xml.sax.SAXException;
 
@@ -178,7 +179,11 @@ public abstract class AbstractMultipleParser extends AbstractParser {
             InputStream stream, ContentHandler handler,
             Metadata metadata, ParseContext context)
             throws IOException, SAXException, TikaException {
+        // Track the metadata between parsers, so we can apply our policy
+        Metadata originalMetadata = ParserUtils.cloneMetadata(metadata);
+        Metadata lastMetadata = originalMetadata;
         
+        // Start tracking resources, so we can clean up when done
         TemporaryResources tmp = new TemporaryResources();
         try {
             // Force the stream to be a Tika one
@@ -187,6 +192,7 @@ public abstract class AbstractMultipleParser extends AbstractParser {
             // TODO Support an InputStreamFactory as an alternative to
             //  Files, see TIKA-2585
             // TODO Rewind support copy from ParserDecorator.withFallbacks
+            // TODO Should we use RereadableInputStream instead?
             TikaInputStream taggedStream = TikaInputStream.get(stream, tmp);
             Path path = taggedStream.getPath();
             
@@ -222,6 +228,7 @@ public abstract class AbstractMultipleParser extends AbstractParser {
                 }
                 
                 // TODO Handle metadata clashes based on the Policy
+                lastMetadata = ParserUtils.cloneMetadata(metadata);
             }
         } finally {
             tmp.dispose();

-- 
To stop receiving notification emails like this one, please contact
nick@apache.org.