You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ta...@apache.org on 2020/08/13 13:56:48 UTC
[tika] branch main updated: TIKA-3159 and TIKA-3161 -- improve
macro handling in odf and flat odf
This is an automated email from the ASF dual-hosted git repository.
tallison pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/tika.git
The following commit(s) were added to refs/heads/main by this push:
new 518bd27 TIKA-3159 and TIKA-3161 -- improve macro handling in odf and flat odf
518bd27 is described below
commit 518bd27aadf3d311db031bbd305795e0dac10b8f
Author: tallison <ta...@apache.org>
AuthorDate: Thu Aug 13 09:56:28 2020 -0400
TIKA-3159 and TIKA-3161 -- improve macro handling in odf and flat odf
---
.../parser/odf/FlatOpenDocumentMacroHandler.java | 16 +++--
.../tika/parser/odf/FlatOpenDocumentParser.java | 46 ++++++++++----
.../tika/parser/odf/OpenDocumentMacroHandler.java | 19 +++---
.../apache/tika/parser/odf/OpenDocumentParser.java | 67 +++++++++++---------
.../org/apache/tika/parser/odf/ODFParserTest.java | 69 +++++++++++++++++----
.../apache/tika/parser/odf/tika-config-macros.xml | 32 ++++++++++
.../test/resources/test-documents/testODTMacro.odt | Bin 29912 -> 30809 bytes
7 files changed, 178 insertions(+), 71 deletions(-)
diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/odf/FlatOpenDocumentMacroHandler.java b/tika-parsers/src/main/java/org/apache/tika/parser/odf/FlatOpenDocumentMacroHandler.java
index fefc824..6c83ea3 100644
--- a/tika-parsers/src/main/java/org/apache/tika/parser/odf/FlatOpenDocumentMacroHandler.java
+++ b/tika-parsers/src/main/java/org/apache/tika/parser/odf/FlatOpenDocumentMacroHandler.java
@@ -33,7 +33,9 @@ import java.io.IOException;
import java.io.InputStream;
import java.nio.charset.StandardCharsets;
-
+/**
+ * Handler for macros in flat open documents
+ */
class FlatOpenDocumentMacroHandler extends ContentHandlerDecorator {
static String MODULE = "module";
@@ -80,10 +82,17 @@ class FlatOpenDocumentMacroHandler extends ContentHandlerDecorator {
handleMacro();
} catch (IOException e) {
throw new SAXException(e);
+ } finally {
+ resetMacroState();
}
}
}
+ protected void resetMacroState() {
+ macroBuffer.setLength(0);
+ macroName = null;
+ inMacro = false;
+ }
protected void handleMacro() throws IOException, SAXException {
byte[] bytes = macroBuffer.toString().getBytes(StandardCharsets.UTF_8);
@@ -98,11 +107,6 @@ class FlatOpenDocumentMacroHandler extends ContentHandlerDecorator {
embeddedMetadata.set(TikaCoreProperties.EMBEDDED_RESOURCE_TYPE,
TikaCoreProperties.EmbeddedResourceType.MACRO.toString());
- //reset state before parse
- macroBuffer.setLength(0);
- macroName = null;
- inMacro = false;
-
if (embeddedDocumentExtractor.shouldParseEmbedded(embeddedMetadata)) {
try (InputStream is = TikaInputStream.get(bytes)) {
embeddedDocumentExtractor.parseEmbedded(
diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/odf/FlatOpenDocumentParser.java b/tika-parsers/src/main/java/org/apache/tika/parser/odf/FlatOpenDocumentParser.java
index 04c7cd5..6fde124 100644
--- a/tika-parsers/src/main/java/org/apache/tika/parser/odf/FlatOpenDocumentParser.java
+++ b/tika-parsers/src/main/java/org/apache/tika/parser/odf/FlatOpenDocumentParser.java
@@ -17,6 +17,7 @@
package org.apache.tika.parser.odf;
import org.apache.commons.io.input.CloseShieldInputStream;
+import org.apache.tika.config.Field;
import org.apache.tika.exception.TikaException;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.mime.MediaType;
@@ -58,6 +59,8 @@ public class FlatOpenDocumentParser extends AbstractParser {
FLAT_OD, FLAT_ODT, FLAT_ODP, FLAT_ODS
)));
+ private boolean extractMacros = false;
+
@Override
public Set<MediaType> getSupportedTypes(ParseContext context) {
return SUPPORTED_TYPES;
@@ -88,9 +91,14 @@ public class FlatOpenDocumentParser extends AbstractParser {
}
}
+ @Field
+ public void setExtractMacros(boolean extractMacros) {
+ this.extractMacros = extractMacros;
+ }
+
private ContentHandler getContentHandler(ContentHandler handler,
Metadata metadata, ParseContext context) {
- return new FlatOpenDocumentParserHandler(handler, metadata, context);
+ return new FlatOpenDocumentParserHandler(handler, metadata, context, extractMacros);
}
private static class FlatOpenDocumentParserHandler extends ContentHandlerDecorator {
@@ -99,8 +107,6 @@ public class FlatOpenDocumentParser extends AbstractParser {
private static final String SCRIPTS = "scripts";
private static final String DOCUMENT = "document";
- private final Metadata metadata;
- private final ParseContext parseContext;
private final ContentHandler defaultHandler = new DefaultHandler();
@@ -111,13 +117,31 @@ public class FlatOpenDocumentParser extends AbstractParser {
private ContentHandler currentHandler = defaultHandler;
private MediaType detectedType = null;
+ private final boolean extractMacros;
- private FlatOpenDocumentParserHandler(ContentHandler baseHandler, Metadata metadata, ParseContext parseContext) {
- this.metadata = metadata;
- this.parseContext = parseContext;
- this.bodyHandler = new OpenDocumentBodyHandler(new NSNormalizerContentHandler(baseHandler), parseContext);
- this.metadataHandler = OpenDocumentMetaParser.getContentHandler(metadata, parseContext);
- this.macroHandler = new FlatOpenDocumentMacroHandler(baseHandler, parseContext);
+ private FlatOpenDocumentParserHandler(ContentHandler baseHandler,
+ Metadata metadata,
+ ParseContext parseContext, boolean extractMacros) {
+ this.extractMacros = extractMacros;
+
+ this.bodyHandler =
+ new OfflineContentHandler(
+ new OpenDocumentBodyHandler(
+ new NSNormalizerContentHandler(baseHandler), parseContext));
+
+ this.metadataHandler = new OfflineContentHandler(
+ new NSNormalizerContentHandler(
+ OpenDocumentMetaParser.getContentHandler(metadata, parseContext)
+ )
+ );
+
+ if (extractMacros) {
+ this.macroHandler = new OfflineContentHandler(
+ new FlatOpenDocumentMacroHandler(baseHandler, parseContext)
+ );
+ } else {
+ this.macroHandler = null;
+ }
}
MediaType getDetectedType() {
@@ -132,7 +156,7 @@ public class FlatOpenDocumentParser extends AbstractParser {
currentHandler = metadataHandler;
} else if (BODY.equals(localName)) {
currentHandler = bodyHandler;
- } else if (SCRIPTS.equals(localName)) {
+ } else if (extractMacros && SCRIPTS.equals(localName)) {
currentHandler = macroHandler;
}
@@ -165,7 +189,7 @@ public class FlatOpenDocumentParser extends AbstractParser {
currentHandler = defaultHandler;
} else if (BODY.equals(localName)) {
currentHandler = defaultHandler;
- } else if (SCRIPTS.equals(localName)) {
+ } else if (extractMacros && SCRIPTS.equals(localName)) {
currentHandler = defaultHandler;
}
currentHandler.endElement(namespaceURI, localName, qName);
diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/odf/OpenDocumentMacroHandler.java b/tika-parsers/src/main/java/org/apache/tika/parser/odf/OpenDocumentMacroHandler.java
index 79c11ab..e60cce0 100644
--- a/tika-parsers/src/main/java/org/apache/tika/parser/odf/OpenDocumentMacroHandler.java
+++ b/tika-parsers/src/main/java/org/apache/tika/parser/odf/OpenDocumentMacroHandler.java
@@ -16,22 +16,13 @@
*/
package org.apache.tika.parser.odf;
-import org.apache.commons.lang3.StringUtils;
-import org.apache.tika.extractor.EmbeddedDocumentExtractor;
-import org.apache.tika.extractor.EmbeddedDocumentUtil;
-import org.apache.tika.io.TikaInputStream;
-import org.apache.tika.metadata.Metadata;
-import org.apache.tika.metadata.TikaCoreProperties;
import org.apache.tika.parser.ParseContext;
-import org.apache.tika.sax.ContentHandlerDecorator;
import org.apache.tika.utils.XMLReaderUtils;
import org.xml.sax.Attributes;
import org.xml.sax.ContentHandler;
import org.xml.sax.SAXException;
import java.io.IOException;
-import java.io.InputStream;
-import java.nio.charset.StandardCharsets;
class OpenDocumentMacroHandler extends FlatOpenDocumentMacroHandler {
@@ -44,8 +35,11 @@ class OpenDocumentMacroHandler extends FlatOpenDocumentMacroHandler {
public void startElement(
String namespaceURI, String localName, String qName,
Attributes attrs) throws SAXException {
- inMacro = true;
- macroName = XMLReaderUtils.getAttrValue(NAME, attrs);
+ //in the compressed odf, there should only be one element in this file.
+ if (MODULE.equalsIgnoreCase(localName)) {
+ inMacro = true;
+ macroName = XMLReaderUtils.getAttrValue(NAME, attrs);
+ }
}
@@ -57,6 +51,9 @@ class OpenDocumentMacroHandler extends FlatOpenDocumentMacroHandler {
handleMacro();
} catch (IOException e) {
throw new SAXException(e);
+ } finally {
+ //this shouldn't be necessary in the compressed odf files
+ resetMacroState();
}
}
}
diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/odf/OpenDocumentParser.java b/tika-parsers/src/main/java/org/apache/tika/parser/odf/OpenDocumentParser.java
index a750a9b..d645d10 100644
--- a/tika-parsers/src/main/java/org/apache/tika/parser/odf/OpenDocumentParser.java
+++ b/tika-parsers/src/main/java/org/apache/tika/parser/odf/OpenDocumentParser.java
@@ -32,6 +32,8 @@ import java.util.zip.ZipInputStream;
import org.apache.commons.io.IOUtils;
import org.apache.commons.io.input.CloseShieldInputStream;
import org.apache.commons.lang3.StringUtils;
+import org.apache.tika.config.Field;
+import org.apache.tika.detect.XmlRootExtractor;
import org.apache.tika.exception.TikaException;
import org.apache.tika.extractor.EmbeddedDocumentExtractor;
import org.apache.tika.extractor.EmbeddedDocumentUtil;
@@ -123,6 +125,8 @@ public class OpenDocumentParser extends AbstractParser {
return SUPPORTED_TYPES;
}
+ private boolean extractMacros = false;
+
public void parse(
InputStream stream, ContentHandler baseHandler,
Metadata metadata, ParseContext context)
@@ -148,7 +152,7 @@ public class OpenDocumentParser extends AbstractParser {
// Prepare to handle the content
XHTMLContentHandler xhtml = new XHTMLContentHandler(baseHandler, metadata);
-
+ xhtml.startDocument();
// As we don't know which of the metadata or the content
// we'll hit first, catch the endDocument call initially
EndDocumentShieldingContentHandler handler =
@@ -176,6 +180,11 @@ public class OpenDocumentParser extends AbstractParser {
}
}
+ @Field
+ public void setExtractMacros(boolean extractMacros) {
+ this.extractMacros = extractMacros;
+ }
+
private void handleZipStream(ZipInputStream zipStream, Metadata metadata, ParseContext context, EndDocumentShieldingContentHandler handler) throws IOException, TikaException, SAXException {
ZipEntry entry = zipStream.getNextEntry();
if (entry == null) {
@@ -211,7 +220,6 @@ public class OpenDocumentParser extends AbstractParser {
ParseContext context, ContentHandler handler)
throws IOException, SAXException, TikaException {
if (entry == null) return;
-
if (entry.getName().equals("mimetype")) {
String type = IOUtils.toString(zip, UTF_8);
metadata.set(Metadata.CONTENT_TYPE, type);
@@ -236,18 +244,15 @@ public class OpenDocumentParser extends AbstractParser {
//scrape everything under Thumbnails/ and Pictures/
if (embeddedName.contains("Thumbnails/") ||
embeddedName.contains("Pictures/")) {
- if (ignoreScriptFile(embeddedName)) {
- return;
- }
EmbeddedDocumentExtractor embeddedDocumentExtractor =
EmbeddedDocumentUtil.getEmbeddedDocumentExtractor(context);
Metadata embeddedMetadata = new Metadata();
embeddedMetadata.set(TikaCoreProperties.ORIGINAL_RESOURCE_NAME, entry.getName());
- /* if (embeddedName.startsWith("Thumbnails/")) {
+ if (embeddedName.startsWith("Thumbnails/")) {
embeddedMetadata.set(TikaCoreProperties.EMBEDDED_RESOURCE_TYPE,
- TikaCoreProperties.EmbeddedResourceType.THUMBNAIL);
- }*/
+ TikaCoreProperties.EmbeddedResourceType.THUMBNAIL.toString());
+ }
if (embeddedName.contains("Pictures/")) {
embeddedMetadata.set(TikaCoreProperties.EMBEDDED_RESOURCE_TYPE,
TikaCoreProperties.EmbeddedResourceType.INLINE.toString());
@@ -257,36 +262,35 @@ public class OpenDocumentParser extends AbstractParser {
embeddedDocumentExtractor.parseEmbedded(zip,
new EmbeddedContentHandler(handler), embeddedMetadata, false);
}
- } else if (embeddedName.contains("Basic/")) {
- Metadata embeddedMetadata = new Metadata();
- embeddedMetadata.set(TikaCoreProperties.EMBEDDED_RESOURCE_TYPE,
- TikaCoreProperties.EmbeddedResourceType.MACRO.toString());
- String name = getMacroName(embeddedName);
- if (!StringUtils.isAllBlank(name)) {
- embeddedMetadata.set(TikaCoreProperties.RESOURCE_NAME_KEY, name);
- }
- handler = new OpenDocumentMacroHandler(handler, context);
- XMLReaderUtils.parseSAX(
- new CloseShieldInputStream(zip),
- new OfflineContentHandler(new EmbeddedContentHandler(
- handler)), context);
+ } else if (extractMacros && embeddedName.contains("Basic/")) {
+ //process all files under Basic/; let maybeHandleMacro figure
+ //out if it is a macro or not
+ maybeHandleMacro(zip, embeddedName, handler, context);
}
}
}
- private String getMacroName(String embeddedName) {
-
- if (embeddedName == null) {
- return null;
+ private void maybeHandleMacro(InputStream is, String embeddedName,
+ ContentHandler handler, ParseContext context)
+ throws TikaException, IOException, SAXException {
+ //should probably run XMLRootExtractor on the inputstream
+ //or read the macro manifest for the names of the macros
+ //rather than relying on the script file name
+ if (ignoreScriptFile(embeddedName)) {
+ return;
}
- int lastSlash = embeddedName.lastIndexOf("/");
- if (lastSlash > -1) {
- return embeddedName.substring(lastSlash+1).replaceFirst("\\.xml$", "");
- }
- return null;
+ Metadata embeddedMetadata = new Metadata();
+ embeddedMetadata.set(TikaCoreProperties.EMBEDDED_RESOURCE_TYPE,
+ TikaCoreProperties.EmbeddedResourceType.MACRO.toString());
+ handler = new OpenDocumentMacroHandler(handler, context);
+ XMLReaderUtils.parseSAX(
+ new CloseShieldInputStream(is),
+ new OfflineContentHandler(new EmbeddedContentHandler(
+ handler)), context);
}
+
private boolean ignoreScriptFile(String embeddedName) {
if (embeddedName.contains("Basic/")) {
if (embeddedName.contains("script-lb.xml")) {
@@ -294,6 +298,9 @@ public class OpenDocumentParser extends AbstractParser {
} else if (embeddedName.contains("script-lc.xml")) {
return true;
}
+ } else {
+ //shouldn't ever get here, but if it isn't under Basic/, ignore it
+ return true;
}
return false;
}
diff --git a/tika-parsers/src/test/java/org/apache/tika/parser/odf/ODFParserTest.java b/tika-parsers/src/test/java/org/apache/tika/parser/odf/ODFParserTest.java
index 5f98b6c..1d5217d 100644
--- a/tika-parsers/src/test/java/org/apache/tika/parser/odf/ODFParserTest.java
+++ b/tika-parsers/src/test/java/org/apache/tika/parser/odf/ODFParserTest.java
@@ -25,6 +25,8 @@ import java.util.Arrays;
import java.util.List;
import org.apache.tika.TikaTest;
+import org.apache.tika.config.TikaConfig;
+import org.apache.tika.exception.TikaException;
import org.apache.tika.io.TikaInputStream;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.metadata.Office;
@@ -32,14 +34,17 @@ import org.apache.tika.metadata.OfficeOpenXMLCore;
import org.apache.tika.metadata.OfficeOpenXMLExtended;
import org.apache.tika.metadata.PagedText;
import org.apache.tika.metadata.TikaCoreProperties;
+import org.apache.tika.parser.AutoDetectParser;
import org.apache.tika.parser.EmptyParser;
import org.apache.tika.parser.ParseContext;
import org.apache.tika.parser.Parser;
import org.apache.tika.parser.opendocument.OpenOfficeParser;
import org.apache.tika.sax.AbstractRecursiveParserWrapperHandler;
import org.apache.tika.sax.BodyContentHandler;
+import org.junit.BeforeClass;
import org.junit.Test;
import org.xml.sax.ContentHandler;
+import org.xml.sax.SAXException;
public class ODFParserTest extends TikaTest {
/**
@@ -53,6 +58,14 @@ public class ODFParserTest extends TikaTest {
};
}
+ private static Parser MACRO_PARSER;
+
+ @BeforeClass
+ public static void setUp() throws IOException, TikaException, SAXException {
+ MACRO_PARSER = new AutoDetectParser(new TikaConfig
+ (ODFParserTest.class.getResourceAsStream("tika-config-macros.xml")));
+ }
+
@Test
public void testOO3() throws Exception {
for (Parser parser : getParsers()) {
@@ -393,8 +406,8 @@ public class ODFParserTest extends TikaTest {
@Test
public void testMacroODT() throws Exception {
- List<Metadata> metadataList = getRecursiveMetadata("testODTMacro.odt");
- assertEquals(4, metadataList.size());
+ List<Metadata> metadataList = getRecursiveMetadata("testODTMacro.odt", MACRO_PARSER);
+ assertEquals(5, metadataList.size());
Metadata parent = metadataList.get(0);
assertContains("<p>Hello dear user,</p>",
@@ -403,22 +416,44 @@ public class ODFParserTest extends TikaTest {
parent.get(Metadata.CONTENT_TYPE));
//make sure metadata came through
- assertEquals("LibreOffice/6.4.3.2$MacOSX_X86_64 LibreOffice_project/747b5d0ebf89f41c860ec2a39efd7cb15b54f2d8",
+ assertEquals("LibreOffice/6.4.4.2$Linux_X86_64 LibreOffice_project/40$Build-2",
parent.get("generator"));
assertEquals(1, parent.getInt(PagedText.N_PAGES).intValue());
- Metadata macro = metadataList.get(1);
- assertEquals("MACRO", macro.get(TikaCoreProperties.EMBEDDED_RESOURCE_TYPE_KEY));
- assertContains("If WsGQFM Or 2 Then", macro.get(AbstractRecursiveParserWrapperHandler.TIKA_CONTENT));
- assertEquals("test", macro.get(TikaCoreProperties.RESOURCE_NAME_KEY));
+ Metadata macro1 = metadataList.get(1);
+ assertEquals("MACRO", macro1.get(TikaCoreProperties.EMBEDDED_RESOURCE_TYPE_KEY));
+ assertContains("If WsGQFM Or 2 Then", macro1.get(AbstractRecursiveParserWrapperHandler.TIKA_CONTENT));
+ assertEquals("test", macro1.get(TikaCoreProperties.RESOURCE_NAME_KEY));
- Metadata image = metadataList.get(2);
+ Metadata macro2 = metadataList.get(2);
+ assertEquals("MACRO", macro2.get(TikaCoreProperties.EMBEDDED_RESOURCE_TYPE_KEY));
+ assertContains("If WsGQFM Or 1 Then", macro2.get(AbstractRecursiveParserWrapperHandler.TIKA_CONTENT));
+ assertEquals("test2", macro2.get(TikaCoreProperties.RESOURCE_NAME_KEY));
+
+ Metadata image = metadataList.get(3);
assertEquals("image/png", image.get(Metadata.CONTENT_TYPE));
}
@Test
+ public void testMacroODTandXMLHandler() throws Exception {
+ String xml = getXML("testODTMacro.odt", MACRO_PARSER).xml;
+ assertContains("Hello dear user", xml);
+ assertContains("If WsGQFM Or 1", xml);
+ assertContains("If WsGQFM Or 2 Then", xml);
+ }
+
+ @Test
+ public void testMacroODTandXMLHandlerDefault() throws Exception {
+ //test to make sure that macros aren't extracted by the default AutoDetectParser
+ String xml = getXML("testODTMacro.odt").xml;
+ assertContains("Hello dear user", xml);
+ assertNotContained("If WsGQFM Or 1", xml);
+ assertNotContained("If WsGQFM Or 2 Then", xml);
+ }
+
+ @Test
public void testMacroODS() throws Exception {
- List<Metadata> metadataList = getRecursiveMetadata("testODSMacro.ods");
+ List<Metadata> metadataList = getRecursiveMetadata("testODSMacro.ods", MACRO_PARSER);
assertEquals(4, metadataList.size());
Metadata parent = metadataList.get(0);
@@ -438,7 +473,7 @@ public class ODFParserTest extends TikaTest {
@Test
public void testMacroODP() throws Exception {
- List<Metadata> metadataList = getRecursiveMetadata("testODPMacro.odp");
+ List<Metadata> metadataList = getRecursiveMetadata("testODPMacro.odp", MACRO_PARSER);
assertEquals(3, metadataList.size());
Metadata parent = metadataList.get(0);
@@ -462,7 +497,7 @@ public class ODFParserTest extends TikaTest {
@Test
public void testMacroFODT() throws Exception {
- List<Metadata> metadataList = getRecursiveMetadata("testODTMacro.fodt");
+ List<Metadata> metadataList = getRecursiveMetadata("testODTMacro.fodt", MACRO_PARSER);
assertEquals(3, metadataList.size());
Metadata parent = metadataList.get(0);
@@ -485,9 +520,17 @@ public class ODFParserTest extends TikaTest {
assertEquals("image/png", image.get(Metadata.CONTENT_TYPE));
}
+
+ @Test
+ public void testMacroFODTandXMLOutput() throws Exception {
+ String xml = getXML("testODTMacro.fodt", MACRO_PARSER).xml;
+ assertContains("Hello dear user", xml);
+ assertContains("If WsGQFM Or 2", xml);
+ }
+
@Test
public void testMacroFODS() throws Exception {
- List<Metadata> metadataList = getRecursiveMetadata("testODSMacro.fods");
+ List<Metadata> metadataList = getRecursiveMetadata("testODSMacro.fods", MACRO_PARSER);
assertEquals(3, metadataList.size());
Metadata parent = metadataList.get(0);
@@ -507,7 +550,7 @@ public class ODFParserTest extends TikaTest {
@Test
public void testMacroFODP() throws Exception {
- List<Metadata> metadataList = getRecursiveMetadata("testODPMacro.fodp");
+ List<Metadata> metadataList = getRecursiveMetadata("testODPMacro.fodp", MACRO_PARSER);
assertEquals(2, metadataList.size());
Metadata parent = metadataList.get(0);
diff --git a/tika-parsers/src/test/resources/org/apache/tika/parser/odf/tika-config-macros.xml b/tika-parsers/src/test/resources/org/apache/tika/parser/odf/tika-config-macros.xml
new file mode 100644
index 0000000..a448d1d
--- /dev/null
+++ b/tika-parsers/src/test/resources/org/apache/tika/parser/odf/tika-config-macros.xml
@@ -0,0 +1,32 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+<properties>
+ <parsers>
+ <parser class="org.apache.tika.parser.DefaultParser"/>
+ <parser class="org.apache.tika.parser.odf.OpenDocumentParser">
+ <params>
+ <param name="extractMacros" type="bool">true</param>
+ </params>
+ </parser>
+ <parser class="org.apache.tika.parser.odf.FlatOpenDocumentParser">
+ <params>
+ <param name="extractMacros" type="bool">true</param>
+ </params>
+ </parser>
+ </parsers>
+</properties>
diff --git a/tika-parsers/src/test/resources/test-documents/testODTMacro.odt b/tika-parsers/src/test/resources/test-documents/testODTMacro.odt
index 6309e97..5ec2d25 100644
Binary files a/tika-parsers/src/test/resources/test-documents/testODTMacro.odt and b/tika-parsers/src/test/resources/test-documents/testODTMacro.odt differ