You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ta...@apache.org on 2022/04/26 12:24:25 UTC

[tika] 02/04: TIKA-3733 -- pass parent metadata to AbstractPOIFSExtractor in OutlookExtractor

This is an automated email from the ASF dual-hosted git repository.

tallison pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/tika.git

commit 15909b43253ed671f76a3aed2205da3b67dbb97d
Author: tallison <ta...@apache.org>
AuthorDate: Tue Apr 26 07:19:47 2022 -0400

    TIKA-3733 -- pass parent metadata to AbstractPOIFSExtractor in OutlookExtractor
---
 .../apache/tika/parser/microsoft/OfficeParser.java |  4 +--
 .../tika/parser/microsoft/OutlookExtractor.java    | 42 ++++++++++++++++++++++
 2 files changed, 44 insertions(+), 2 deletions(-)

diff --git a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/parser/microsoft/OfficeParser.java b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/parser/microsoft/OfficeParser.java
index a155e470c..280b486bd 100644
--- a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/parser/microsoft/OfficeParser.java
+++ b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/parser/microsoft/OfficeParser.java
@@ -235,9 +235,9 @@ public class OfficeParser extends AbstractOfficeParser {
                 }
                 break;
             case OUTLOOK:
-                OutlookExtractor extractor = new OutlookExtractor(root, context);
+                OutlookExtractor extractor = new OutlookExtractor(root, metadata, context);
 
-                extractor.parse(xhtml, metadata);
+                extractor.parse(xhtml);
                 break;
             case ENCRYPTED:
                 EncryptionInfo info = new EncryptionInfo(root);
diff --git a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/parser/microsoft/OutlookExtractor.java b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/parser/microsoft/OutlookExtractor.java
index a7983d7b4..2f19dec1a 100644
--- a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/parser/microsoft/OutlookExtractor.java
+++ b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/parser/microsoft/OutlookExtractor.java
@@ -96,10 +96,27 @@ public class OutlookExtractor extends AbstractPOIFSExtractor {
     private final ParseContext parseContext;
     private final boolean extractAllAlternatives;
     HtmlEncodingDetector detector = new HtmlEncodingDetector();
+
+    /**
+     * @deprecated use {@link OutlookExtractor#OutlookExtractor(DirectoryNode, Metadata, ParseContext)}
+     *      Will be removed after 2.4.0
+     * @param filesystem
+     * @param context
+     * @throws TikaException
+     */
+    @Deprecated
     public OutlookExtractor(POIFSFileSystem filesystem, ParseContext context) throws TikaException {
         this(filesystem.getRoot(), context);
     }
 
+    /**
+     * @deprecated use {@link OutlookExtractor#OutlookExtractor(DirectoryNode, Metadata, ParseContext)}
+     *              Will be removed after 2.4.0
+     * @param root
+     * @param context
+     * @throws TikaException
+     */
+    @Deprecated
     public OutlookExtractor(DirectoryNode root, ParseContext context) throws TikaException {
         super(context);
         this.parseContext = context;
@@ -112,6 +129,18 @@ public class OutlookExtractor extends AbstractPOIFSExtractor {
         }
     }
 
+    public OutlookExtractor(DirectoryNode root, Metadata metadata, ParseContext context) throws TikaException {
+        super(context, metadata);
+        this.parseContext = context;
+        this.extractAllAlternatives =
+                context.get(OfficeParserConfig.class).isExtractAllAlternativesFromMSG();
+        try {
+            this.msg = new MAPIMessage(root);
+        } catch (IOException e) {
+            throw new TikaException("Failed to parse Outlook message", e);
+        }
+    }
+
     //need to add empty string to ensure that parallel arrays are parallel
     //even if one value is null.
     public static void addEvenIfNull(Property property, String value, Metadata metadata) {
@@ -156,6 +185,19 @@ public class OutlookExtractor extends AbstractPOIFSExtractor {
         }
     }
 
+    public void parse(XHTMLContentHandler xhtml) throws TikaException, SAXException, IOException {
+        parse(xhtml, parentMetadata);
+    }
+
+    /**
+     * @deprecated use {@link #parse(XHTMLContentHandler), will be removed after 2.4.0}
+     * @param xhtml
+     * @param metadata
+     * @throws TikaException
+     * @throws SAXException
+     * @throws IOException
+     */
+    @Deprecated
     public void parse(XHTMLContentHandler xhtml, Metadata metadata)
             throws TikaException, SAXException, IOException {
         try {