You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ta...@apache.org on 2022/04/26 11:04:56 UTC

[tika] 01/02: TIKA-3733 -- fix newly discovered npe in WordExtractor and OutlookExtractor

This is an automated email from the ASF dual-hosted git repository.

tallison pushed a commit to branch branch_1x
in repository https://gitbox.apache.org/repos/asf/tika.git

commit 434c73680600c7faa8f6fb812d2156c3462d3982
Author: tallison <ta...@apache.org>
AuthorDate: Tue Apr 26 06:58:36 2022 -0400

    TIKA-3733 -- fix newly discovered npe in WordExtractor and OutlookExtractor
---
 .../org/apache/tika/parser/microsoft/AbstractPOIFSExtractor.java | 4 ----
 .../main/java/org/apache/tika/parser/microsoft/OfficeParser.java | 2 +-
 .../java/org/apache/tika/parser/microsoft/OutlookExtractor.java  | 9 +++++----
 .../java/org/apache/tika/parser/microsoft/WordExtractor.java     | 7 ++-----
 4 files changed, 8 insertions(+), 14 deletions(-)

diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/AbstractPOIFSExtractor.java b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/AbstractPOIFSExtractor.java
index 3f9724977..d6bdca9c9 100644
--- a/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/AbstractPOIFSExtractor.java
+++ b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/AbstractPOIFSExtractor.java
@@ -52,10 +52,6 @@ abstract class AbstractPOIFSExtractor {
     protected final OfficeParserConfig officeParserConfig;
     protected final ParseContext context;
 
-    protected AbstractPOIFSExtractor(ParseContext context) {
-        this(context, null);
-    }
-
     protected AbstractPOIFSExtractor(ParseContext context, Metadata parentMetadata) {
         embeddedDocumentUtil = new EmbeddedDocumentUtil(context);
 
diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/OfficeParser.java b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/OfficeParser.java
index 67b09bb75..226c6c599 100644
--- a/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/OfficeParser.java
+++ b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/OfficeParser.java
@@ -194,7 +194,7 @@ public class OfficeParser extends AbstractOfficeParser {
                 break;
             case OUTLOOK:
                 OutlookExtractor extractor =
-                        new OutlookExtractor(root, context);
+                        new OutlookExtractor(root, metadata, context);
 
                 extractor.parse(xhtml, metadata);
                 break;
diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/OutlookExtractor.java b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/OutlookExtractor.java
index 33b7fbf14..d440f1356 100644
--- a/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/OutlookExtractor.java
+++ b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/OutlookExtractor.java
@@ -126,12 +126,13 @@ public class OutlookExtractor extends AbstractPOIFSExtractor {
 
     private final boolean extractAllAlternatives;
 
-    public OutlookExtractor(POIFSFileSystem filesystem, ParseContext context) throws TikaException {
-        this(filesystem.getRoot(), context);
+    public OutlookExtractor(POIFSFileSystem filesystem,
+                            Metadata parentMetadata, ParseContext context) throws TikaException {
+        this(filesystem.getRoot(), parentMetadata, context);
     }
 
-    public OutlookExtractor(DirectoryNode root, ParseContext context) throws TikaException {
-        super(context);
+    public OutlookExtractor(DirectoryNode root, Metadata parentMetadata, ParseContext context) throws TikaException {
+        super(context, parentMetadata);
         this.parseContext = context;
         this.extractAllAlternatives = context.get(OfficeParserConfig.class).getExtractAllAlternativesFromMSG();
         try {
diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/WordExtractor.java b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/WordExtractor.java
index 6b3d08c8b..2f3f267ef 100644
--- a/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/WordExtractor.java
+++ b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/WordExtractor.java
@@ -83,11 +83,8 @@ public class WordExtractor extends AbstractPOIFSExtractor {
 
     private final Deque<FormattingUtils.Tag> formattingState = new ArrayDeque<>();
 
-    private final Metadata metadata;
-
     public WordExtractor(ParseContext context, Metadata metadata) {
-        super(context);
-        this.metadata = metadata;
+        super(context, metadata);
     }
 
     private static int countParagraphs(Range... ranges) {
@@ -239,7 +236,7 @@ public class WordExtractor extends AbstractPOIFSExtractor {
             return;
         }
         for (SavedByEntry sbe : savedByTable.getEntries()) {
-            metadata.add(TikaCoreProperties.ORIGINAL_RESOURCE_NAME, sbe.getSaveLocation());
+            parentMetadata.add(TikaCoreProperties.ORIGINAL_RESOURCE_NAME, sbe.getSaveLocation());
         }
     }