You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ta...@apache.org on 2020/08/13 13:56:48 UTC

[tika] branch main updated: TIKA-3159 and TIKA-3161 -- improve macro handling in odf and flat odf

This is an automated email from the ASF dual-hosted git repository.

tallison pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/tika.git


The following commit(s) were added to refs/heads/main by this push:
     new 518bd27  TIKA-3159 and TIKA-3161 -- improve macro handling in odf and flat odf
518bd27 is described below

commit 518bd27aadf3d311db031bbd305795e0dac10b8f
Author: tallison <ta...@apache.org>
AuthorDate: Thu Aug 13 09:56:28 2020 -0400

    TIKA-3159 and TIKA-3161 -- improve macro handling in odf and flat odf
---
 .../parser/odf/FlatOpenDocumentMacroHandler.java   |  16 +++--
 .../tika/parser/odf/FlatOpenDocumentParser.java    |  46 ++++++++++----
 .../tika/parser/odf/OpenDocumentMacroHandler.java  |  19 +++---
 .../apache/tika/parser/odf/OpenDocumentParser.java |  67 +++++++++++---------
 .../org/apache/tika/parser/odf/ODFParserTest.java  |  69 +++++++++++++++++----
 .../apache/tika/parser/odf/tika-config-macros.xml  |  32 ++++++++++
 .../test/resources/test-documents/testODTMacro.odt | Bin 29912 -> 30809 bytes
 7 files changed, 178 insertions(+), 71 deletions(-)

diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/odf/FlatOpenDocumentMacroHandler.java b/tika-parsers/src/main/java/org/apache/tika/parser/odf/FlatOpenDocumentMacroHandler.java
index fefc824..6c83ea3 100644
--- a/tika-parsers/src/main/java/org/apache/tika/parser/odf/FlatOpenDocumentMacroHandler.java
+++ b/tika-parsers/src/main/java/org/apache/tika/parser/odf/FlatOpenDocumentMacroHandler.java
@@ -33,7 +33,9 @@ import java.io.IOException;
 import java.io.InputStream;
 import java.nio.charset.StandardCharsets;
 
-
+/**
+ * Handler for macros in flat open documents
+ */
 class FlatOpenDocumentMacroHandler extends ContentHandlerDecorator {
 
     static String MODULE = "module";
@@ -80,10 +82,17 @@ class FlatOpenDocumentMacroHandler extends ContentHandlerDecorator {
                 handleMacro();
             } catch (IOException e) {
                 throw new SAXException(e);
+            } finally {
+                resetMacroState();
             }
         }
     }
 
+    protected void resetMacroState() {
+        macroBuffer.setLength(0);
+        macroName = null;
+        inMacro = false;
+    }
     protected void handleMacro() throws IOException, SAXException {
 
         byte[] bytes = macroBuffer.toString().getBytes(StandardCharsets.UTF_8);
@@ -98,11 +107,6 @@ class FlatOpenDocumentMacroHandler extends ContentHandlerDecorator {
         embeddedMetadata.set(TikaCoreProperties.EMBEDDED_RESOURCE_TYPE,
                 TikaCoreProperties.EmbeddedResourceType.MACRO.toString());
 
-        //reset state before parse
-        macroBuffer.setLength(0);
-        macroName = null;
-        inMacro = false;
-
         if (embeddedDocumentExtractor.shouldParseEmbedded(embeddedMetadata)) {
             try (InputStream is = TikaInputStream.get(bytes)) {
                 embeddedDocumentExtractor.parseEmbedded(
diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/odf/FlatOpenDocumentParser.java b/tika-parsers/src/main/java/org/apache/tika/parser/odf/FlatOpenDocumentParser.java
index 04c7cd5..6fde124 100644
--- a/tika-parsers/src/main/java/org/apache/tika/parser/odf/FlatOpenDocumentParser.java
+++ b/tika-parsers/src/main/java/org/apache/tika/parser/odf/FlatOpenDocumentParser.java
@@ -17,6 +17,7 @@
 package org.apache.tika.parser.odf;
 
 import org.apache.commons.io.input.CloseShieldInputStream;
+import org.apache.tika.config.Field;
 import org.apache.tika.exception.TikaException;
 import org.apache.tika.metadata.Metadata;
 import org.apache.tika.mime.MediaType;
@@ -58,6 +59,8 @@ public class FlatOpenDocumentParser extends AbstractParser {
                     FLAT_OD, FLAT_ODT, FLAT_ODP, FLAT_ODS
                   )));
 
+    private boolean extractMacros = false;
+
     @Override
     public Set<MediaType> getSupportedTypes(ParseContext context) {
         return SUPPORTED_TYPES;
@@ -88,9 +91,14 @@ public class FlatOpenDocumentParser extends AbstractParser {
         }
     }
 
+    @Field
+    public void setExtractMacros(boolean extractMacros) {
+        this.extractMacros = extractMacros;
+    }
+
     private ContentHandler getContentHandler(ContentHandler handler,
                                              Metadata metadata, ParseContext context) {
-        return new FlatOpenDocumentParserHandler(handler, metadata, context);
+        return new FlatOpenDocumentParserHandler(handler, metadata, context, extractMacros);
     }
 
     private static class FlatOpenDocumentParserHandler extends ContentHandlerDecorator {
@@ -99,8 +107,6 @@ public class FlatOpenDocumentParser extends AbstractParser {
         private static final String SCRIPTS = "scripts";
         private static final String DOCUMENT = "document";
 
-        private final Metadata metadata;
-        private final ParseContext parseContext;
 
         private final ContentHandler defaultHandler = new DefaultHandler();
 
@@ -111,13 +117,31 @@ public class FlatOpenDocumentParser extends AbstractParser {
         private ContentHandler currentHandler = defaultHandler;
 
         private MediaType detectedType = null;
+        private final boolean extractMacros;
 
-        private FlatOpenDocumentParserHandler(ContentHandler baseHandler, Metadata metadata, ParseContext parseContext) {
-            this.metadata = metadata;
-            this.parseContext = parseContext;
-            this.bodyHandler = new OpenDocumentBodyHandler(new NSNormalizerContentHandler(baseHandler), parseContext);
-            this.metadataHandler = OpenDocumentMetaParser.getContentHandler(metadata, parseContext);
-            this.macroHandler = new FlatOpenDocumentMacroHandler(baseHandler, parseContext);
+        private FlatOpenDocumentParserHandler(ContentHandler baseHandler,
+                                              Metadata metadata,
+                                              ParseContext parseContext, boolean extractMacros) {
+            this.extractMacros = extractMacros;
+
+            this.bodyHandler =
+                    new OfflineContentHandler(
+                            new OpenDocumentBodyHandler(
+                                    new NSNormalizerContentHandler(baseHandler), parseContext));
+
+            this.metadataHandler = new OfflineContentHandler(
+                    new NSNormalizerContentHandler(
+                            OpenDocumentMetaParser.getContentHandler(metadata, parseContext)
+                    )
+            );
+
+            if (extractMacros) {
+                this.macroHandler = new OfflineContentHandler(
+                        new FlatOpenDocumentMacroHandler(baseHandler, parseContext)
+                );
+            } else {
+                this.macroHandler = null;
+            }
         }
 
         MediaType getDetectedType() {
@@ -132,7 +156,7 @@ public class FlatOpenDocumentParser extends AbstractParser {
                 currentHandler = metadataHandler;
             } else if (BODY.equals(localName)) {
                 currentHandler = bodyHandler;
-            } else if (SCRIPTS.equals(localName)) {
+            } else if (extractMacros && SCRIPTS.equals(localName)) {
                 currentHandler = macroHandler;
             }
 
@@ -165,7 +189,7 @@ public class FlatOpenDocumentParser extends AbstractParser {
                 currentHandler = defaultHandler;
             } else if (BODY.equals(localName)) {
                 currentHandler = defaultHandler;
-            } else if (SCRIPTS.equals(localName)) {
+            } else if (extractMacros && SCRIPTS.equals(localName)) {
                 currentHandler = defaultHandler;
             }
             currentHandler.endElement(namespaceURI, localName, qName);
diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/odf/OpenDocumentMacroHandler.java b/tika-parsers/src/main/java/org/apache/tika/parser/odf/OpenDocumentMacroHandler.java
index 79c11ab..e60cce0 100644
--- a/tika-parsers/src/main/java/org/apache/tika/parser/odf/OpenDocumentMacroHandler.java
+++ b/tika-parsers/src/main/java/org/apache/tika/parser/odf/OpenDocumentMacroHandler.java
@@ -16,22 +16,13 @@
  */
 package org.apache.tika.parser.odf;
 
-import org.apache.commons.lang3.StringUtils;
-import org.apache.tika.extractor.EmbeddedDocumentExtractor;
-import org.apache.tika.extractor.EmbeddedDocumentUtil;
-import org.apache.tika.io.TikaInputStream;
-import org.apache.tika.metadata.Metadata;
-import org.apache.tika.metadata.TikaCoreProperties;
 import org.apache.tika.parser.ParseContext;
-import org.apache.tika.sax.ContentHandlerDecorator;
 import org.apache.tika.utils.XMLReaderUtils;
 import org.xml.sax.Attributes;
 import org.xml.sax.ContentHandler;
 import org.xml.sax.SAXException;
 
 import java.io.IOException;
-import java.io.InputStream;
-import java.nio.charset.StandardCharsets;
 
 
 class OpenDocumentMacroHandler extends FlatOpenDocumentMacroHandler {
@@ -44,8 +35,11 @@ class OpenDocumentMacroHandler extends FlatOpenDocumentMacroHandler {
     public void startElement(
             String namespaceURI, String localName, String qName,
             Attributes attrs) throws SAXException {
-        inMacro = true;
-        macroName = XMLReaderUtils.getAttrValue(NAME, attrs);
+        //in the compressed odf, there should only be one element in this file.
+        if (MODULE.equalsIgnoreCase(localName)) {
+            inMacro = true;
+            macroName = XMLReaderUtils.getAttrValue(NAME, attrs);
+        }
     }
 
 
@@ -57,6 +51,9 @@ class OpenDocumentMacroHandler extends FlatOpenDocumentMacroHandler {
                 handleMacro();
             } catch (IOException e) {
                 throw new SAXException(e);
+            } finally {
+                //this shouldn't be necessary in the compressed odf files
+                resetMacroState();
             }
         }
     }
diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/odf/OpenDocumentParser.java b/tika-parsers/src/main/java/org/apache/tika/parser/odf/OpenDocumentParser.java
index a750a9b..d645d10 100644
--- a/tika-parsers/src/main/java/org/apache/tika/parser/odf/OpenDocumentParser.java
+++ b/tika-parsers/src/main/java/org/apache/tika/parser/odf/OpenDocumentParser.java
@@ -32,6 +32,8 @@ import java.util.zip.ZipInputStream;
 import org.apache.commons.io.IOUtils;
 import org.apache.commons.io.input.CloseShieldInputStream;
 import org.apache.commons.lang3.StringUtils;
+import org.apache.tika.config.Field;
+import org.apache.tika.detect.XmlRootExtractor;
 import org.apache.tika.exception.TikaException;
 import org.apache.tika.extractor.EmbeddedDocumentExtractor;
 import org.apache.tika.extractor.EmbeddedDocumentUtil;
@@ -123,6 +125,8 @@ public class OpenDocumentParser extends AbstractParser {
         return SUPPORTED_TYPES;
     }
 
+    private boolean extractMacros = false;
+
     public void parse(
             InputStream stream, ContentHandler baseHandler,
             Metadata metadata, ParseContext context)
@@ -148,7 +152,7 @@ public class OpenDocumentParser extends AbstractParser {
 
         // Prepare to handle the content
         XHTMLContentHandler xhtml = new XHTMLContentHandler(baseHandler, metadata);
-
+        xhtml.startDocument();
         // As we don't know which of the metadata or the content
         //  we'll hit first, catch the endDocument call initially
         EndDocumentShieldingContentHandler handler =
@@ -176,6 +180,11 @@ public class OpenDocumentParser extends AbstractParser {
         }
     }
 
+    @Field
+    public void setExtractMacros(boolean extractMacros) {
+        this.extractMacros = extractMacros;
+    }
+
     private void handleZipStream(ZipInputStream zipStream, Metadata metadata, ParseContext context, EndDocumentShieldingContentHandler handler) throws IOException, TikaException, SAXException {
         ZipEntry entry = zipStream.getNextEntry();
         if (entry == null) {
@@ -211,7 +220,6 @@ public class OpenDocumentParser extends AbstractParser {
                                 ParseContext context, ContentHandler handler)
             throws IOException, SAXException, TikaException {
         if (entry == null) return;
-
         if (entry.getName().equals("mimetype")) {
             String type = IOUtils.toString(zip, UTF_8);
             metadata.set(Metadata.CONTENT_TYPE, type);
@@ -236,18 +244,15 @@ public class OpenDocumentParser extends AbstractParser {
             //scrape everything under Thumbnails/ and Pictures/
             if (embeddedName.contains("Thumbnails/") ||
                     embeddedName.contains("Pictures/")) {
-                if (ignoreScriptFile(embeddedName)) {
-                    return;
-                }
 
                 EmbeddedDocumentExtractor embeddedDocumentExtractor =
                         EmbeddedDocumentUtil.getEmbeddedDocumentExtractor(context);
                 Metadata embeddedMetadata = new Metadata();
                 embeddedMetadata.set(TikaCoreProperties.ORIGINAL_RESOURCE_NAME, entry.getName());
-                /* if (embeddedName.startsWith("Thumbnails/")) {
+                if (embeddedName.startsWith("Thumbnails/")) {
                     embeddedMetadata.set(TikaCoreProperties.EMBEDDED_RESOURCE_TYPE,
-                            TikaCoreProperties.EmbeddedResourceType.THUMBNAIL);
-                }*/
+                            TikaCoreProperties.EmbeddedResourceType.THUMBNAIL.toString());
+                }
                 if (embeddedName.contains("Pictures/")) {
                     embeddedMetadata.set(TikaCoreProperties.EMBEDDED_RESOURCE_TYPE,
                             TikaCoreProperties.EmbeddedResourceType.INLINE.toString());
@@ -257,36 +262,35 @@ public class OpenDocumentParser extends AbstractParser {
                     embeddedDocumentExtractor.parseEmbedded(zip,
                             new EmbeddedContentHandler(handler), embeddedMetadata, false);
                 }
-            } else if (embeddedName.contains("Basic/")) {
-                Metadata embeddedMetadata = new Metadata();
-                embeddedMetadata.set(TikaCoreProperties.EMBEDDED_RESOURCE_TYPE,
-                        TikaCoreProperties.EmbeddedResourceType.MACRO.toString());
-                String name = getMacroName(embeddedName);
-                if (!StringUtils.isAllBlank(name)) {
-                    embeddedMetadata.set(TikaCoreProperties.RESOURCE_NAME_KEY, name);
-                }
-                handler = new OpenDocumentMacroHandler(handler, context);
-                XMLReaderUtils.parseSAX(
-                        new CloseShieldInputStream(zip),
-                        new OfflineContentHandler(new EmbeddedContentHandler(
-                                handler)), context);
+            } else if (extractMacros && embeddedName.contains("Basic/")) {
+                //process all files under Basic/; let maybeHandleMacro figure
+                //out if it is a macro or not
+                maybeHandleMacro(zip, embeddedName, handler, context);
             }
 
         }
     }
 
-    private String getMacroName(String embeddedName) {
-
-        if (embeddedName == null) {
-            return null;
+    private void maybeHandleMacro(InputStream is, String embeddedName,
+                                  ContentHandler handler, ParseContext context)
+            throws TikaException, IOException, SAXException {
+        //should probably run XMLRootExtractor on the inputstream
+        //or read the macro manifest for the names of the macros
+        //rather than relying on the script file name
+        if (ignoreScriptFile(embeddedName)) {
+            return;
         }
-        int lastSlash = embeddedName.lastIndexOf("/");
-        if (lastSlash > -1) {
-            return embeddedName.substring(lastSlash+1).replaceFirst("\\.xml$", "");
-        }
-        return null;
+        Metadata embeddedMetadata = new Metadata();
+        embeddedMetadata.set(TikaCoreProperties.EMBEDDED_RESOURCE_TYPE,
+                TikaCoreProperties.EmbeddedResourceType.MACRO.toString());
+        handler = new OpenDocumentMacroHandler(handler, context);
+        XMLReaderUtils.parseSAX(
+                new CloseShieldInputStream(is),
+                new OfflineContentHandler(new EmbeddedContentHandler(
+                        handler)), context);
     }
 
+
     private boolean ignoreScriptFile(String embeddedName) {
         if (embeddedName.contains("Basic/")) {
             if (embeddedName.contains("script-lb.xml")) {
@@ -294,6 +298,9 @@ public class OpenDocumentParser extends AbstractParser {
             } else if (embeddedName.contains("script-lc.xml")) {
                 return true;
             }
+        } else {
+            //shouldn't ever get here, but if it isn't under Basic/, ignore it
+            return true;
         }
         return false;
     }
diff --git a/tika-parsers/src/test/java/org/apache/tika/parser/odf/ODFParserTest.java b/tika-parsers/src/test/java/org/apache/tika/parser/odf/ODFParserTest.java
index 5f98b6c..1d5217d 100644
--- a/tika-parsers/src/test/java/org/apache/tika/parser/odf/ODFParserTest.java
+++ b/tika-parsers/src/test/java/org/apache/tika/parser/odf/ODFParserTest.java
@@ -25,6 +25,8 @@ import java.util.Arrays;
 import java.util.List;
 
 import org.apache.tika.TikaTest;
+import org.apache.tika.config.TikaConfig;
+import org.apache.tika.exception.TikaException;
 import org.apache.tika.io.TikaInputStream;
 import org.apache.tika.metadata.Metadata;
 import org.apache.tika.metadata.Office;
@@ -32,14 +34,17 @@ import org.apache.tika.metadata.OfficeOpenXMLCore;
 import org.apache.tika.metadata.OfficeOpenXMLExtended;
 import org.apache.tika.metadata.PagedText;
 import org.apache.tika.metadata.TikaCoreProperties;
+import org.apache.tika.parser.AutoDetectParser;
 import org.apache.tika.parser.EmptyParser;
 import org.apache.tika.parser.ParseContext;
 import org.apache.tika.parser.Parser;
 import org.apache.tika.parser.opendocument.OpenOfficeParser;
 import org.apache.tika.sax.AbstractRecursiveParserWrapperHandler;
 import org.apache.tika.sax.BodyContentHandler;
+import org.junit.BeforeClass;
 import org.junit.Test;
 import org.xml.sax.ContentHandler;
+import org.xml.sax.SAXException;
 
 public class ODFParserTest extends TikaTest {
     /**
@@ -53,6 +58,14 @@ public class ODFParserTest extends TikaTest {
        };
     }
 
+    private static Parser MACRO_PARSER;
+
+    @BeforeClass
+    public static void setUp() throws IOException, TikaException, SAXException {
+        MACRO_PARSER = new AutoDetectParser(new TikaConfig
+                (ODFParserTest.class.getResourceAsStream("tika-config-macros.xml")));
+    }
+
     @Test
     public void testOO3() throws Exception {
        for (Parser parser : getParsers()) {
@@ -393,8 +406,8 @@ public class ODFParserTest extends TikaTest {
 
     @Test
     public void testMacroODT() throws Exception {
-        List<Metadata> metadataList = getRecursiveMetadata("testODTMacro.odt");
-        assertEquals(4, metadataList.size());
+        List<Metadata> metadataList = getRecursiveMetadata("testODTMacro.odt", MACRO_PARSER);
+        assertEquals(5, metadataList.size());
         Metadata parent = metadataList.get(0);
 
         assertContains("<p>Hello dear user,</p>",
@@ -403,22 +416,44 @@ public class ODFParserTest extends TikaTest {
                 parent.get(Metadata.CONTENT_TYPE));
 
         //make sure metadata came through
-        assertEquals("LibreOffice/6.4.3.2$MacOSX_X86_64 LibreOffice_project/747b5d0ebf89f41c860ec2a39efd7cb15b54f2d8",
+        assertEquals("LibreOffice/6.4.4.2$Linux_X86_64 LibreOffice_project/40$Build-2",
                 parent.get("generator"));
         assertEquals(1, parent.getInt(PagedText.N_PAGES).intValue());
 
-        Metadata macro = metadataList.get(1);
-        assertEquals("MACRO", macro.get(TikaCoreProperties.EMBEDDED_RESOURCE_TYPE_KEY));
-        assertContains("If WsGQFM Or 2 Then", macro.get(AbstractRecursiveParserWrapperHandler.TIKA_CONTENT));
-        assertEquals("test", macro.get(TikaCoreProperties.RESOURCE_NAME_KEY));
+        Metadata macro1 = metadataList.get(1);
+        assertEquals("MACRO", macro1.get(TikaCoreProperties.EMBEDDED_RESOURCE_TYPE_KEY));
+        assertContains("If WsGQFM Or 2 Then", macro1.get(AbstractRecursiveParserWrapperHandler.TIKA_CONTENT));
+        assertEquals("test", macro1.get(TikaCoreProperties.RESOURCE_NAME_KEY));
 
-        Metadata image = metadataList.get(2);
+        Metadata macro2 = metadataList.get(2);
+        assertEquals("MACRO", macro2.get(TikaCoreProperties.EMBEDDED_RESOURCE_TYPE_KEY));
+        assertContains("If WsGQFM Or 1 Then", macro2.get(AbstractRecursiveParserWrapperHandler.TIKA_CONTENT));
+        assertEquals("test2", macro2.get(TikaCoreProperties.RESOURCE_NAME_KEY));
+
+        Metadata image = metadataList.get(3);
         assertEquals("image/png", image.get(Metadata.CONTENT_TYPE));
     }
 
     @Test
+    public void testMacroODTandXMLHandler() throws Exception {
+        String xml = getXML("testODTMacro.odt", MACRO_PARSER).xml;
+        assertContains("Hello dear user", xml);
+        assertContains("If WsGQFM Or 1", xml);
+        assertContains("If WsGQFM Or 2 Then", xml);
+    }
+
+    @Test
+    public void testMacroODTandXMLHandlerDefault() throws Exception {
+        //test to make sure that macros aren't extracted by the default AutoDetectParser
+        String xml = getXML("testODTMacro.odt").xml;
+        assertContains("Hello dear user", xml);
+        assertNotContained("If WsGQFM Or 1", xml);
+        assertNotContained("If WsGQFM Or 2 Then", xml);
+    }
+
+    @Test
     public void testMacroODS() throws Exception {
-        List<Metadata> metadataList = getRecursiveMetadata("testODSMacro.ods");
+        List<Metadata> metadataList = getRecursiveMetadata("testODSMacro.ods", MACRO_PARSER);
         assertEquals(4, metadataList.size());
         Metadata parent = metadataList.get(0);
 
@@ -438,7 +473,7 @@ public class ODFParserTest extends TikaTest {
 
     @Test
     public void testMacroODP() throws Exception {
-        List<Metadata> metadataList = getRecursiveMetadata("testODPMacro.odp");
+        List<Metadata> metadataList = getRecursiveMetadata("testODPMacro.odp", MACRO_PARSER);
         assertEquals(3, metadataList.size());
         Metadata parent = metadataList.get(0);
 
@@ -462,7 +497,7 @@ public class ODFParserTest extends TikaTest {
 
     @Test
     public void testMacroFODT() throws Exception {
-        List<Metadata> metadataList = getRecursiveMetadata("testODTMacro.fodt");
+        List<Metadata> metadataList = getRecursiveMetadata("testODTMacro.fodt", MACRO_PARSER);
         assertEquals(3, metadataList.size());
         Metadata parent = metadataList.get(0);
 
@@ -485,9 +520,17 @@ public class ODFParserTest extends TikaTest {
         assertEquals("image/png", image.get(Metadata.CONTENT_TYPE));
     }
 
+
+    @Test
+    public void testMacroFODTandXMLOutput() throws Exception {
+        String xml = getXML("testODTMacro.fodt", MACRO_PARSER).xml;
+        assertContains("Hello dear user", xml);
+        assertContains("If WsGQFM Or 2", xml);
+    }
+
     @Test
     public void testMacroFODS() throws Exception {
-        List<Metadata> metadataList = getRecursiveMetadata("testODSMacro.fods");
+        List<Metadata> metadataList = getRecursiveMetadata("testODSMacro.fods", MACRO_PARSER);
         assertEquals(3, metadataList.size());
         Metadata parent = metadataList.get(0);
 
@@ -507,7 +550,7 @@ public class ODFParserTest extends TikaTest {
 
     @Test
     public void testMacroFODP() throws Exception {
-        List<Metadata> metadataList = getRecursiveMetadata("testODPMacro.fodp");
+        List<Metadata> metadataList = getRecursiveMetadata("testODPMacro.fodp", MACRO_PARSER);
         assertEquals(2, metadataList.size());
         Metadata parent = metadataList.get(0);
 
diff --git a/tika-parsers/src/test/resources/org/apache/tika/parser/odf/tika-config-macros.xml b/tika-parsers/src/test/resources/org/apache/tika/parser/odf/tika-config-macros.xml
new file mode 100644
index 0000000..a448d1d
--- /dev/null
+++ b/tika-parsers/src/test/resources/org/apache/tika/parser/odf/tika-config-macros.xml
@@ -0,0 +1,32 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+
+  http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+-->
+<properties>
+    <parsers>
+        <parser class="org.apache.tika.parser.DefaultParser"/>
+        <parser class="org.apache.tika.parser.odf.OpenDocumentParser">
+            <params>
+                <param name="extractMacros" type="bool">true</param>
+            </params>
+        </parser>
+        <parser class="org.apache.tika.parser.odf.FlatOpenDocumentParser">
+            <params>
+                <param name="extractMacros" type="bool">true</param>
+            </params>
+        </parser>
+    </parsers>
+</properties>
diff --git a/tika-parsers/src/test/resources/test-documents/testODTMacro.odt b/tika-parsers/src/test/resources/test-documents/testODTMacro.odt
index 6309e97..5ec2d25 100644
Binary files a/tika-parsers/src/test/resources/test-documents/testODTMacro.odt and b/tika-parsers/src/test/resources/test-documents/testODTMacro.odt differ