You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ta...@apache.org on 2022/04/27 13:01:34 UTC
[tika] branch branch_1x updated: TIKA-3741 -- fix regression in handling embedded exceptions in ppt
This is an automated email from the ASF dual-hosted git repository.
tallison pushed a commit to branch branch_1x
in repository https://gitbox.apache.org/repos/asf/tika.git
The following commit(s) were added to refs/heads/branch_1x by this push:
new 88bff551f TIKA-3741 -- fix regression in handling embedded exceptions in ppt
88bff551f is described below
commit 88bff551fd05a3d7193291dcd3a98af56f38471a
Author: tallison <ta...@apache.org>
AuthorDate: Wed Apr 27 09:01:18 2022 -0400
TIKA-3741 -- fix regression in handling embedded exceptions in ppt
---
.../tika/parser/microsoft/HSLFExtractor.java | 67 ++++++++++++----------
.../parser/microsoft/PowerPointParserTest.java | 8 +--
2 files changed, 40 insertions(+), 35 deletions(-)
diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/HSLFExtractor.java b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/HSLFExtractor.java
index 356c47e6a..50a19938d 100644
--- a/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/HSLFExtractor.java
+++ b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/HSLFExtractor.java
@@ -503,44 +503,49 @@ public class HSLFExtractor extends AbstractPOIFSExtractor {
InputStream dataStream = null;
try {
dataStream = data.getInputStream();
+ } catch (SecurityException e) {
+ throw e;
} catch (Exception e) {
EmbeddedDocumentUtil.recordEmbeddedStreamException(e, parentMetadata);
continue;
}
- try (TikaInputStream stream = TikaInputStream.get(dataStream)) {
- String mediaType = null;
- if ("Excel.Chart.8".equals(oleShape.getProgId())) {
- mediaType = "application/vnd.ms-excel";
- } else {
- MediaType mt = getTikaConfig().getDetector().detect(stream, new Metadata());
- mediaType = mt.toString();
- }
- if (mediaType.equals("application/x-tika-msoffice-embedded; format=comp_obj")
- || mediaType.equals("application/x-tika-msoffice")) {
- POIFSFileSystem poifs = null;
-
- try {
- poifs = new POIFSFileSystem(new CloseShieldInputStream(stream));
- } catch (RuntimeException e) {
- throw new IOExceptionWithCause(e);
- }
- try {
- handleEmbeddedOfficeDoc(poifs.getRoot(), objID, xhtml);
- } finally {
- if (poifs != null) {
- poifs.close();
- }
- }
- } else {
- handleEmbeddedResource(
- stream, objID, objID,
- mediaType, xhtml, false);
- }
- } catch (IOException e) {
- EmbeddedDocumentUtil.recordEmbeddedStreamException(e, parentMetadata);
+ handleData(objID, oleShape.getProgId(), dataStream, xhtml);
+ }
+ }
+ }
+ }
+
+ private void handleData(String objID, String progId, InputStream dataStream,
+ XHTMLContentHandler xhtml) {
+
+ try (TikaInputStream stream = TikaInputStream.get(dataStream)) {
+ String mediaType = null;
+ if ("Excel.Chart.8".equals(progId)) {
+ mediaType = "application/vnd.ms-excel";
+ } else {
+ MediaType mt = getTikaConfig().getDetector().detect(stream, new Metadata());
+ mediaType = mt.toString();
+ }
+ if (mediaType.equals("application/x-tika-msoffice-embedded; format=comp_obj")
+ || mediaType.equals("application/x-tika-msoffice")) {
+ POIFSFileSystem poifs = new POIFSFileSystem(new CloseShieldInputStream(stream));
+
+ try {
+ handleEmbeddedOfficeDoc(poifs.getRoot(), objID, xhtml);
+ } finally {
+ if (poifs != null) {
+ poifs.close();
}
}
+ } else {
+ handleEmbeddedResource(
+ stream, objID, objID,
+ mediaType, xhtml, false);
}
+ } catch (SecurityException e) {
+ throw e;
+ } catch (Exception e) {
+ EmbeddedDocumentUtil.recordException(e, parentMetadata);
}
}
diff --git a/tika-parsers/src/test/java/org/apache/tika/parser/microsoft/PowerPointParserTest.java b/tika-parsers/src/test/java/org/apache/tika/parser/microsoft/PowerPointParserTest.java
index 658c0d572..ce60154bc 100644
--- a/tika-parsers/src/test/java/org/apache/tika/parser/microsoft/PowerPointParserTest.java
+++ b/tika-parsers/src/test/java/org/apache/tika/parser/microsoft/PowerPointParserTest.java
@@ -327,15 +327,15 @@ public class PowerPointParserTest extends TikaTest {
XMLResult r = getXML("testPPT_skipBadCompressedObject.ppt");
assertContains("NASA Human", r.xml);
assertEquals(2,
- r.metadata.getValues(TikaCoreProperties.TIKA_META_EXCEPTION_EMBEDDED_STREAM).length);
+ r.metadata.getValues(TikaCoreProperties.TIKA_META_EXCEPTION_WARNING).length);
assertContains("incorrect data check",
- r.metadata.get(TikaCoreProperties.TIKA_META_EXCEPTION_EMBEDDED_STREAM));
+ r.metadata.get(TikaCoreProperties.TIKA_META_EXCEPTION_WARNING));
List<Metadata> metadataList = getRecursiveMetadata("testPPT_skipBadCompressedObject.ppt");
assertEquals(2,
- metadataList.get(0).getValues(TikaCoreProperties.TIKA_META_EXCEPTION_EMBEDDED_STREAM).length);
+ metadataList.get(0).getValues(TikaCoreProperties.TIKA_META_EXCEPTION_WARNING).length);
assertContains("incorrect data check",
- metadataList.get(0).get(TikaCoreProperties.TIKA_META_EXCEPTION_EMBEDDED_STREAM));
+ metadataList.get(0).get(TikaCoreProperties.TIKA_META_EXCEPTION_WARNING));
}
@Test(expected = EncryptedDocumentException.class)