You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ta...@apache.org on 2018/08/08 17:21:39 UTC

[tika] branch master updated: TIKA-2706 -- store exceptions from macroreader in child metadata

This is an automated email from the ASF dual-hosted git repository.

tallison pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/tika.git


The following commit(s) were added to refs/heads/master by this push:
     new da26f02  TIKA-2706 -- store exceptions from macroreader in child metadata
da26f02 is described below

commit da26f02bf60ea3ef59bfa5b67930925d8dcd2e77
Author: TALLISON <ta...@apache.org>
AuthorDate: Wed Aug 8 13:21:28 2018 -0400

    TIKA-2706 -- store exceptions from macroreader in child metadata
---
 .../org/apache/tika/parser/microsoft/OfficeParser.java  | 17 ++++++++++++++---
 1 file changed, 14 insertions(+), 3 deletions(-)

diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/OfficeParser.java b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/OfficeParser.java
index fab72cc..e418dfe 100644
--- a/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/OfficeParser.java
+++ b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/OfficeParser.java
@@ -313,8 +313,8 @@ public class OfficeParser extends AbstractOfficeParser {
      * @throws IOException on IOException if it occurs during the extraction of the embedded doc
      * @throws SAXException on SAXException for writing to xhtml
      */
-    public static void extractMacros(NPOIFSFileSystem fs, ContentHandler xhtml, EmbeddedDocumentExtractor
-            embeddedDocumentExtractor)  throws IOException, SAXException {
+    public static void extractMacros(NPOIFSFileSystem fs, ContentHandler xhtml,
+                                     EmbeddedDocumentExtractor embeddedDocumentExtractor)  throws IOException, SAXException {
 
         VBAMacroReader reader = null;
         Map<String, String> macros = null;
@@ -322,7 +322,18 @@ public class OfficeParser extends AbstractOfficeParser {
             reader = new VBAMacroReader(fs);
             macros = reader.readMacros();
         } catch (Exception e) {
-            //swallow
+            if (e instanceof SecurityException) {
+                throw e;
+            }
+            Metadata m = new Metadata();
+            m.set(TikaCoreProperties.EMBEDDED_RESOURCE_TYPE, TikaCoreProperties.EmbeddedResourceType.MACRO.toString());
+            m.set(Metadata.CONTENT_TYPE, "text/x-vbasic");
+            EmbeddedDocumentUtil.recordException(e, m);
+            if (embeddedDocumentExtractor.shouldParseEmbedded(m)) {
+                embeddedDocumentExtractor.parseEmbedded(
+                        //pass in space character so that we don't trigger a zero-byte exception
+                        new ByteArrayInputStream(new byte[]{'\u0020'}), xhtml, m, true);
+            }
             return;
         }
         for (Map.Entry<String, String> e : macros.entrySet()) {