You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ni...@apache.org on 2018/03/13 18:15:30 UTC

[tika] 07/13: Pull common "Real Parser" identification logic out to utils

This is an automated email from the ASF dual-hosted git repository.

nick pushed a commit to branch multiple-parsers
in repository https://gitbox.apache.org/repos/asf/tika.git

commit d229ab6f666cde8b007f568b13001a2c780ff477
Author: Nick Burch <ni...@gagravarr.org>
AuthorDate: Tue Mar 13 15:10:16 2018 +0000

    Pull common "Real Parser" identification logic out to utils
---
 .../java/org/apache/tika/parser/CompositeParser.java    |  7 ++-----
 .../tika/parser/multiple/AbstractMultipleParser.java    | 17 ++++-------------
 .../main/java/org/apache/tika/utils/ParserUtils.java    | 14 ++++++++++++++
 3 files changed, 20 insertions(+), 18 deletions(-)

diff --git a/tika-core/src/main/java/org/apache/tika/parser/CompositeParser.java b/tika-core/src/main/java/org/apache/tika/parser/CompositeParser.java
index ea3968e..0098468 100644
--- a/tika-core/src/main/java/org/apache/tika/parser/CompositeParser.java
+++ b/tika-core/src/main/java/org/apache/tika/parser/CompositeParser.java
@@ -23,6 +23,7 @@ import org.apache.tika.metadata.Metadata;
 import org.apache.tika.mime.MediaType;
 import org.apache.tika.mime.MediaTypeRegistry;
 import org.apache.tika.sax.TaggedContentHandler;
+import org.apache.tika.utils.ParserUtils;
 import org.xml.sax.ContentHandler;
 import org.xml.sax.SAXException;
 
@@ -271,11 +272,7 @@ public class CompositeParser extends AbstractParser {
             TikaInputStream taggedStream = TikaInputStream.get(stream, tmp);
             TaggedContentHandler taggedHandler = 
                 handler != null ? new TaggedContentHandler(handler) : null;
-            if (parser instanceof ParserDecorator){
-                metadata.add("X-Parsed-By", ((ParserDecorator) parser).getWrappedParser().getClass().getName());
-            } else {
-                metadata.add("X-Parsed-By", parser.getClass().getName());
-            }
+            metadata.add("X-Parsed-By", ParserUtils.getParserClassname(parser));
             try {
                 parser.parse(taggedStream, taggedHandler, metadata, context);
             } catch (RuntimeException e) {
diff --git a/tika-core/src/main/java/org/apache/tika/parser/multiple/AbstractMultipleParser.java b/tika-core/src/main/java/org/apache/tika/parser/multiple/AbstractMultipleParser.java
index 02d7e51..d66c541 100644
--- a/tika-core/src/main/java/org/apache/tika/parser/multiple/AbstractMultipleParser.java
+++ b/tika-core/src/main/java/org/apache/tika/parser/multiple/AbstractMultipleParser.java
@@ -34,7 +34,7 @@ import org.apache.tika.parser.AbstractParser;
 import org.apache.tika.parser.ParseContext;
 import org.apache.tika.parser.Parser;
 import org.apache.tika.parser.ParserDecorator;
-import org.apache.tika.utils.ParserUtils;
+import static org.apache.tika.utils.ParserUtils.*;
 import org.xml.sax.ContentHandler;
 import org.xml.sax.SAXException;
 
@@ -180,7 +180,7 @@ public abstract class AbstractMultipleParser extends AbstractParser {
             Metadata metadata, ParseContext context)
             throws IOException, SAXException, TikaException {
         // Track the metadata between parsers, so we can apply our policy
-        Metadata originalMetadata = ParserUtils.cloneMetadata(metadata);
+        Metadata originalMetadata = cloneMetadata(metadata);
         Metadata lastMetadata = originalMetadata;
         
         // Start tracking resources, so we can clean up when done
@@ -206,7 +206,7 @@ public abstract class AbstractMultipleParser extends AbstractParser {
                 TikaInputStream parserStream = TikaInputStream.get(path);
                 
                 // Record this parser
-                metadata.add("X-Parsed-By", getParserName(p));
+                metadata.add("X-Parsed-By", getParserClassname(p));
                 
                 // TODO Handle metadata clashes based on the Policy
                 
@@ -234,20 +234,11 @@ public abstract class AbstractMultipleParser extends AbstractParser {
                 }
                 
                 // TODO Handle metadata clashes based on the Policy
-                lastMetadata = ParserUtils.cloneMetadata(metadata);
+                lastMetadata = cloneMetadata(metadata);
             }
         } finally {
             tmp.dispose();
         }
     }
-    
-    private String getParserName(Parser parser) {
-        // TODO Share this logic with CompositeParser
-        if (parser instanceof ParserDecorator){
-            return ((ParserDecorator) parser).getWrappedParser().getClass().getName();
-        } else {
-            return parser.getClass().getName();
-        }
-    }
 }
 
diff --git a/tika-core/src/main/java/org/apache/tika/utils/ParserUtils.java b/tika-core/src/main/java/org/apache/tika/utils/ParserUtils.java
index 289cbc2..bdbb04c 100644
--- a/tika-core/src/main/java/org/apache/tika/utils/ParserUtils.java
+++ b/tika-core/src/main/java/org/apache/tika/utils/ParserUtils.java
@@ -17,6 +17,8 @@
 package org.apache.tika.utils;
 
 import org.apache.tika.metadata.Metadata;
+import org.apache.tika.parser.Parser;
+import org.apache.tika.parser.ParserDecorator;
 
 /**
  * Helper util methods for Parsers themselves.
@@ -40,4 +42,16 @@ public class ParserUtils {
         }
         return clone;
     }
+
+    /**
+     * Identifies the real class name of the {@link Parser}, unwrapping
+     *  any {@link ParserDecorator} decorations on top of it.
+     */
+    public static String getParserClassname(Parser parser) {
+        if (parser instanceof ParserDecorator){
+            return ((ParserDecorator) parser).getWrappedParser().getClass().getName();
+        } else {
+            return parser.getClass().getName();
+        }
+    }
 }

-- 
To stop receiving notification emails like this one, please contact
nick@apache.org.