You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ta...@apache.org on 2017/01/13 13:47:01 UTC
tika git commit: TIKA-2134 -- handle missing parts more robustly
Repository: tika
Updated Branches:
refs/heads/master 8eb7d352f -> 526fc08f2
TIKA-2134 -- handle missing parts more robustly
Project: http://git-wip-us.apache.org/repos/asf/tika/repo
Commit: http://git-wip-us.apache.org/repos/asf/tika/commit/526fc08f
Tree: http://git-wip-us.apache.org/repos/asf/tika/tree/526fc08f
Diff: http://git-wip-us.apache.org/repos/asf/tika/diff/526fc08f
Branch: refs/heads/master
Commit: 526fc08f2a0a4c44ea191a10748ca9ef0e834cb3
Parents: 8eb7d35
Author: tballison <ta...@mitre.org>
Authored: Fri Jan 13 08:46:54 2017 -0500
Committer: tballison <ta...@mitre.org>
Committed: Fri Jan 13 08:46:54 2017 -0500
----------------------------------------------------------------------
.../microsoft/ooxml/AbstractOOXMLExtractor.java | 4 ++++
.../ooxml/XSSFExcelExtractorDecorator.java | 16 +++++++++++++---
2 files changed, 17 insertions(+), 3 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/tika/blob/526fc08f/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ooxml/AbstractOOXMLExtractor.java
----------------------------------------------------------------------
diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ooxml/AbstractOOXMLExtractor.java b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ooxml/AbstractOOXMLExtractor.java
index 9c12fc5..426092e 100644
--- a/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ooxml/AbstractOOXMLExtractor.java
+++ b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ooxml/AbstractOOXMLExtractor.java
@@ -181,6 +181,10 @@ public abstract class AbstractOOXMLExtractor implements OOXMLExtractor {
Set<String> seen = new HashSet<>();
try {
for (PackagePart source : getMainDocumentParts()) {
+ if (source == null) {
+ //parts can go missing; silently ignore -- TIKA-2134
+ continue;
+ }
for (PackageRelationship rel : source.getRelationships()) {
try {
handleEmbeddedPart(source, rel, handler, metadata, seen);
http://git-wip-us.apache.org/repos/asf/tika/blob/526fc08f/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ooxml/XSSFExcelExtractorDecorator.java
----------------------------------------------------------------------
diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ooxml/XSSFExcelExtractorDecorator.java b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ooxml/XSSFExcelExtractorDecorator.java
index 0f6957c..45a6a84 100644
--- a/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ooxml/XSSFExcelExtractorDecorator.java
+++ b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ooxml/XSSFExcelExtractorDecorator.java
@@ -159,7 +159,13 @@ public class XSSFExcelExtractorDecorator extends AbstractOOXMLExtractor {
for (String footer : sheetExtractor.footers) {
extractHeaderFooter(footer, xhtml);
}
- processShapes(iter.getShapes(), xhtml);
+ List<XSSFShape> shapes = null;
+ try {
+ shapes = iter.getShapes();
+ } catch (NullPointerException e) {
+ //missing shape
+ }
+ processShapes(shapes, xhtml);
//for now dump sheet hyperlinks at bottom of page
//consider a double-pass of the inputstream to reunite hyperlinks with cells/textboxes
@@ -176,8 +182,12 @@ public class XSSFExcelExtractorDecorator extends AbstractOOXMLExtractor {
for (PackageRelationship rel : sheetPart.getRelationshipsByType(XSSFRelation.DRAWINGS.getRelation())) {
if (rel.getTargetMode() == TargetMode.INTERNAL) {
PackagePartName relName = PackagingURIHelper.createPartName(rel.getTargetURI());
- for (PackageRelationship drawRel : rel.getPackage()
- .getPart(relName)
+ PackagePart part = rel.getPackage().getPart(relName);
+ //parts can go missing, and Excel quietly ignores missing images -- TIKA-2134
+ if (part == null) {
+ continue;
+ }
+ for (PackageRelationship drawRel : part
.getRelationshipsByType(XSSFRelation.SHEET_HYPERLINKS.getRelation())) {
drawingHyperlinks.put(drawRel.getId(), drawRel.getTargetURI().toString());
}