You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ta...@apache.org on 2022/04/26 11:04:56 UTC
[tika] 01/02: TIKA-3733 -- fix newly discovered npe in WordExtractor and OutlookExtractor
This is an automated email from the ASF dual-hosted git repository.
tallison pushed a commit to branch branch_1x
in repository https://gitbox.apache.org/repos/asf/tika.git
commit 434c73680600c7faa8f6fb812d2156c3462d3982
Author: tallison <ta...@apache.org>
AuthorDate: Tue Apr 26 06:58:36 2022 -0400
TIKA-3733 -- fix newly discovered npe in WordExtractor and OutlookExtractor
---
.../org/apache/tika/parser/microsoft/AbstractPOIFSExtractor.java | 4 ----
.../main/java/org/apache/tika/parser/microsoft/OfficeParser.java | 2 +-
.../java/org/apache/tika/parser/microsoft/OutlookExtractor.java | 9 +++++----
.../java/org/apache/tika/parser/microsoft/WordExtractor.java | 7 ++-----
4 files changed, 8 insertions(+), 14 deletions(-)
diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/AbstractPOIFSExtractor.java b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/AbstractPOIFSExtractor.java
index 3f9724977..d6bdca9c9 100644
--- a/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/AbstractPOIFSExtractor.java
+++ b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/AbstractPOIFSExtractor.java
@@ -52,10 +52,6 @@ abstract class AbstractPOIFSExtractor {
protected final OfficeParserConfig officeParserConfig;
protected final ParseContext context;
- protected AbstractPOIFSExtractor(ParseContext context) {
- this(context, null);
- }
-
protected AbstractPOIFSExtractor(ParseContext context, Metadata parentMetadata) {
embeddedDocumentUtil = new EmbeddedDocumentUtil(context);
diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/OfficeParser.java b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/OfficeParser.java
index 67b09bb75..226c6c599 100644
--- a/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/OfficeParser.java
+++ b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/OfficeParser.java
@@ -194,7 +194,7 @@ public class OfficeParser extends AbstractOfficeParser {
break;
case OUTLOOK:
OutlookExtractor extractor =
- new OutlookExtractor(root, context);
+ new OutlookExtractor(root, metadata, context);
extractor.parse(xhtml, metadata);
break;
diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/OutlookExtractor.java b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/OutlookExtractor.java
index 33b7fbf14..d440f1356 100644
--- a/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/OutlookExtractor.java
+++ b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/OutlookExtractor.java
@@ -126,12 +126,13 @@ public class OutlookExtractor extends AbstractPOIFSExtractor {
private final boolean extractAllAlternatives;
- public OutlookExtractor(POIFSFileSystem filesystem, ParseContext context) throws TikaException {
- this(filesystem.getRoot(), context);
+ public OutlookExtractor(POIFSFileSystem filesystem,
+ Metadata parentMetadata, ParseContext context) throws TikaException {
+ this(filesystem.getRoot(), parentMetadata, context);
}
- public OutlookExtractor(DirectoryNode root, ParseContext context) throws TikaException {
- super(context);
+ public OutlookExtractor(DirectoryNode root, Metadata parentMetadata, ParseContext context) throws TikaException {
+ super(context, parentMetadata);
this.parseContext = context;
this.extractAllAlternatives = context.get(OfficeParserConfig.class).getExtractAllAlternativesFromMSG();
try {
diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/WordExtractor.java b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/WordExtractor.java
index 6b3d08c8b..2f3f267ef 100644
--- a/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/WordExtractor.java
+++ b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/WordExtractor.java
@@ -83,11 +83,8 @@ public class WordExtractor extends AbstractPOIFSExtractor {
private final Deque<FormattingUtils.Tag> formattingState = new ArrayDeque<>();
- private final Metadata metadata;
-
public WordExtractor(ParseContext context, Metadata metadata) {
- super(context);
- this.metadata = metadata;
+ super(context, metadata);
}
private static int countParagraphs(Range... ranges) {
@@ -239,7 +236,7 @@ public class WordExtractor extends AbstractPOIFSExtractor {
return;
}
for (SavedByEntry sbe : savedByTable.getEntries()) {
- metadata.add(TikaCoreProperties.ORIGINAL_RESOURCE_NAME, sbe.getSaveLocation());
+ parentMetadata.add(TikaCoreProperties.ORIGINAL_RESOURCE_NAME, sbe.getSaveLocation());
}
}