You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ta...@apache.org on 2018/08/08 17:21:39 UTC
[tika] branch master updated: TIKA-2706 -- store exceptions from
macroreader in child metadata
This is an automated email from the ASF dual-hosted git repository.
tallison pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/tika.git
The following commit(s) were added to refs/heads/master by this push:
new da26f02 TIKA-2706 -- store exceptions from macroreader in child metadata
da26f02 is described below
commit da26f02bf60ea3ef59bfa5b67930925d8dcd2e77
Author: TALLISON <ta...@apache.org>
AuthorDate: Wed Aug 8 13:21:28 2018 -0400
TIKA-2706 -- store exceptions from macroreader in child metadata
---
.../org/apache/tika/parser/microsoft/OfficeParser.java | 17 ++++++++++++++---
1 file changed, 14 insertions(+), 3 deletions(-)
diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/OfficeParser.java b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/OfficeParser.java
index fab72cc..e418dfe 100644
--- a/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/OfficeParser.java
+++ b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/OfficeParser.java
@@ -313,8 +313,8 @@ public class OfficeParser extends AbstractOfficeParser {
* @throws IOException on IOException if it occurs during the extraction of the embedded doc
* @throws SAXException on SAXException for writing to xhtml
*/
- public static void extractMacros(NPOIFSFileSystem fs, ContentHandler xhtml, EmbeddedDocumentExtractor
- embeddedDocumentExtractor) throws IOException, SAXException {
+ public static void extractMacros(NPOIFSFileSystem fs, ContentHandler xhtml,
+ EmbeddedDocumentExtractor embeddedDocumentExtractor) throws IOException, SAXException {
VBAMacroReader reader = null;
Map<String, String> macros = null;
@@ -322,7 +322,18 @@ public class OfficeParser extends AbstractOfficeParser {
reader = new VBAMacroReader(fs);
macros = reader.readMacros();
} catch (Exception e) {
- //swallow
+ if (e instanceof SecurityException) {
+ throw e;
+ }
+ Metadata m = new Metadata();
+ m.set(TikaCoreProperties.EMBEDDED_RESOURCE_TYPE, TikaCoreProperties.EmbeddedResourceType.MACRO.toString());
+ m.set(Metadata.CONTENT_TYPE, "text/x-vbasic");
+ EmbeddedDocumentUtil.recordException(e, m);
+ if (embeddedDocumentExtractor.shouldParseEmbedded(m)) {
+ embeddedDocumentExtractor.parseEmbedded(
+ //pass in space character so that we don't trigger a zero-byte exception
+ new ByteArrayInputStream(new byte[]{'\u0020'}), xhtml, m, true);
+ }
return;
}
for (Map.Entry<String, String> e : macros.entrySet()) {