You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ta...@apache.org on 2022/04/26 12:24:25 UTC
[tika] 02/04: TIKA-3733 -- pass parent metadata to AbstractPOIFSExtractor in OutlookExtractor
This is an automated email from the ASF dual-hosted git repository.
tallison pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/tika.git
commit 15909b43253ed671f76a3aed2205da3b67dbb97d
Author: tallison <ta...@apache.org>
AuthorDate: Tue Apr 26 07:19:47 2022 -0400
TIKA-3733 -- pass parent metadata to AbstractPOIFSExtractor in OutlookExtractor
---
.../apache/tika/parser/microsoft/OfficeParser.java | 4 +--
.../tika/parser/microsoft/OutlookExtractor.java | 42 ++++++++++++++++++++++
2 files changed, 44 insertions(+), 2 deletions(-)
diff --git a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/parser/microsoft/OfficeParser.java b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/parser/microsoft/OfficeParser.java
index a155e470c..280b486bd 100644
--- a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/parser/microsoft/OfficeParser.java
+++ b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/parser/microsoft/OfficeParser.java
@@ -235,9 +235,9 @@ public class OfficeParser extends AbstractOfficeParser {
}
break;
case OUTLOOK:
- OutlookExtractor extractor = new OutlookExtractor(root, context);
+ OutlookExtractor extractor = new OutlookExtractor(root, metadata, context);
- extractor.parse(xhtml, metadata);
+ extractor.parse(xhtml);
break;
case ENCRYPTED:
EncryptionInfo info = new EncryptionInfo(root);
diff --git a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/parser/microsoft/OutlookExtractor.java b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/parser/microsoft/OutlookExtractor.java
index a7983d7b4..2f19dec1a 100644
--- a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/parser/microsoft/OutlookExtractor.java
+++ b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/parser/microsoft/OutlookExtractor.java
@@ -96,10 +96,27 @@ public class OutlookExtractor extends AbstractPOIFSExtractor {
private final ParseContext parseContext;
private final boolean extractAllAlternatives;
HtmlEncodingDetector detector = new HtmlEncodingDetector();
+
+ /**
+ * @deprecated use {@link OutlookExtractor#OutlookExtractor(DirectoryNode, Metadata, ParseContext)}
+ * Will be removed after 2.4.0
+ * @param filesystem
+ * @param context
+ * @throws TikaException
+ */
+ @Deprecated
public OutlookExtractor(POIFSFileSystem filesystem, ParseContext context) throws TikaException {
this(filesystem.getRoot(), context);
}
+ /**
+ * @deprecated use {@link OutlookExtractor#OutlookExtractor(DirectoryNode, Metadata, ParseContext)}
+ * Will be removed after 2.4.0
+ * @param root
+ * @param context
+ * @throws TikaException
+ */
+ @Deprecated
public OutlookExtractor(DirectoryNode root, ParseContext context) throws TikaException {
super(context);
this.parseContext = context;
@@ -112,6 +129,18 @@ public class OutlookExtractor extends AbstractPOIFSExtractor {
}
}
+ public OutlookExtractor(DirectoryNode root, Metadata metadata, ParseContext context) throws TikaException {
+ super(context, metadata);
+ this.parseContext = context;
+ this.extractAllAlternatives =
+ context.get(OfficeParserConfig.class).isExtractAllAlternativesFromMSG();
+ try {
+ this.msg = new MAPIMessage(root);
+ } catch (IOException e) {
+ throw new TikaException("Failed to parse Outlook message", e);
+ }
+ }
+
//need to add empty string to ensure that parallel arrays are parallel
//even if one value is null.
public static void addEvenIfNull(Property property, String value, Metadata metadata) {
@@ -156,6 +185,19 @@ public class OutlookExtractor extends AbstractPOIFSExtractor {
}
}
+ public void parse(XHTMLContentHandler xhtml) throws TikaException, SAXException, IOException {
+ parse(xhtml, parentMetadata);
+ }
+
+ /**
+ * @deprecated use {@link #parse(XHTMLContentHandler), will be removed after 2.4.0}
+ * @param xhtml
+ * @param metadata
+ * @throws TikaException
+ * @throws SAXException
+ * @throws IOException
+ */
+ @Deprecated
public void parse(XHTMLContentHandler xhtml, Metadata metadata)
throws TikaException, SAXException, IOException {
try {