You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ni...@apache.org on 2018/03/13 18:15:30 UTC
[tika] 07/13: Pull common "Real Parser" identification logic out to
utils
This is an automated email from the ASF dual-hosted git repository.
nick pushed a commit to branch multiple-parsers
in repository https://gitbox.apache.org/repos/asf/tika.git
commit d229ab6f666cde8b007f568b13001a2c780ff477
Author: Nick Burch <ni...@gagravarr.org>
AuthorDate: Tue Mar 13 15:10:16 2018 +0000
Pull common "Real Parser" identification logic out to utils
---
.../java/org/apache/tika/parser/CompositeParser.java | 7 ++-----
.../tika/parser/multiple/AbstractMultipleParser.java | 17 ++++-------------
.../main/java/org/apache/tika/utils/ParserUtils.java | 14 ++++++++++++++
3 files changed, 20 insertions(+), 18 deletions(-)
diff --git a/tika-core/src/main/java/org/apache/tika/parser/CompositeParser.java b/tika-core/src/main/java/org/apache/tika/parser/CompositeParser.java
index ea3968e..0098468 100644
--- a/tika-core/src/main/java/org/apache/tika/parser/CompositeParser.java
+++ b/tika-core/src/main/java/org/apache/tika/parser/CompositeParser.java
@@ -23,6 +23,7 @@ import org.apache.tika.metadata.Metadata;
import org.apache.tika.mime.MediaType;
import org.apache.tika.mime.MediaTypeRegistry;
import org.apache.tika.sax.TaggedContentHandler;
+import org.apache.tika.utils.ParserUtils;
import org.xml.sax.ContentHandler;
import org.xml.sax.SAXException;
@@ -271,11 +272,7 @@ public class CompositeParser extends AbstractParser {
TikaInputStream taggedStream = TikaInputStream.get(stream, tmp);
TaggedContentHandler taggedHandler =
handler != null ? new TaggedContentHandler(handler) : null;
- if (parser instanceof ParserDecorator){
- metadata.add("X-Parsed-By", ((ParserDecorator) parser).getWrappedParser().getClass().getName());
- } else {
- metadata.add("X-Parsed-By", parser.getClass().getName());
- }
+ metadata.add("X-Parsed-By", ParserUtils.getParserClassname(parser));
try {
parser.parse(taggedStream, taggedHandler, metadata, context);
} catch (RuntimeException e) {
diff --git a/tika-core/src/main/java/org/apache/tika/parser/multiple/AbstractMultipleParser.java b/tika-core/src/main/java/org/apache/tika/parser/multiple/AbstractMultipleParser.java
index 02d7e51..d66c541 100644
--- a/tika-core/src/main/java/org/apache/tika/parser/multiple/AbstractMultipleParser.java
+++ b/tika-core/src/main/java/org/apache/tika/parser/multiple/AbstractMultipleParser.java
@@ -34,7 +34,7 @@ import org.apache.tika.parser.AbstractParser;
import org.apache.tika.parser.ParseContext;
import org.apache.tika.parser.Parser;
import org.apache.tika.parser.ParserDecorator;
-import org.apache.tika.utils.ParserUtils;
+import static org.apache.tika.utils.ParserUtils.*;
import org.xml.sax.ContentHandler;
import org.xml.sax.SAXException;
@@ -180,7 +180,7 @@ public abstract class AbstractMultipleParser extends AbstractParser {
Metadata metadata, ParseContext context)
throws IOException, SAXException, TikaException {
// Track the metadata between parsers, so we can apply our policy
- Metadata originalMetadata = ParserUtils.cloneMetadata(metadata);
+ Metadata originalMetadata = cloneMetadata(metadata);
Metadata lastMetadata = originalMetadata;
// Start tracking resources, so we can clean up when done
@@ -206,7 +206,7 @@ public abstract class AbstractMultipleParser extends AbstractParser {
TikaInputStream parserStream = TikaInputStream.get(path);
// Record this parser
- metadata.add("X-Parsed-By", getParserName(p));
+ metadata.add("X-Parsed-By", getParserClassname(p));
// TODO Handle metadata clashes based on the Policy
@@ -234,20 +234,11 @@ public abstract class AbstractMultipleParser extends AbstractParser {
}
// TODO Handle metadata clashes based on the Policy
- lastMetadata = ParserUtils.cloneMetadata(metadata);
+ lastMetadata = cloneMetadata(metadata);
}
} finally {
tmp.dispose();
}
}
-
- private String getParserName(Parser parser) {
- // TODO Share this logic with CompositeParser
- if (parser instanceof ParserDecorator){
- return ((ParserDecorator) parser).getWrappedParser().getClass().getName();
- } else {
- return parser.getClass().getName();
- }
- }
}
diff --git a/tika-core/src/main/java/org/apache/tika/utils/ParserUtils.java b/tika-core/src/main/java/org/apache/tika/utils/ParserUtils.java
index 289cbc2..bdbb04c 100644
--- a/tika-core/src/main/java/org/apache/tika/utils/ParserUtils.java
+++ b/tika-core/src/main/java/org/apache/tika/utils/ParserUtils.java
@@ -17,6 +17,8 @@
package org.apache.tika.utils;
import org.apache.tika.metadata.Metadata;
+import org.apache.tika.parser.Parser;
+import org.apache.tika.parser.ParserDecorator;
/**
* Helper util methods for Parsers themselves.
@@ -40,4 +42,16 @@ public class ParserUtils {
}
return clone;
}
+
+ /**
+ * Identifies the real class name of the {@link Parser}, unwrapping
+ * any {@link ParserDecorator} decorations on top of it.
+ */
+ public static String getParserClassname(Parser parser) {
+ if (parser instanceof ParserDecorator){
+ return ((ParserDecorator) parser).getWrappedParser().getClass().getName();
+ } else {
+ return parser.getClass().getName();
+ }
+ }
}
--
To stop receiving notification emails like this one, please contact
nick@apache.org.