You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ni...@apache.org on 2017/04/27 17:02:50 UTC
[tika] 02/02: TIKA-2346 OfficeParserConfig control extraction from
shapes from DOCX
This is an automated email from the ASF dual-hosted git repository.
nick pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/tika.git
commit 0876aa909ffb77dfbd384ebe2f5de0a873ab489a
Author: Nick Burch <ni...@gagravarr.org>
AuthorDate: Thu Apr 27 18:02:06 2017 +0100
TIKA-2346 OfficeParserConfig control extraction from shapes from DOCX
---
.../tika/parser/microsoft/ooxml/XWPFWordExtractorDecorator.java | 6 ++++--
1 file changed, 4 insertions(+), 2 deletions(-)
diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ooxml/XWPFWordExtractorDecorator.java b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ooxml/XWPFWordExtractorDecorator.java
index a9eb93f..39a72c6 100644
--- a/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ooxml/XWPFWordExtractorDecorator.java
+++ b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ooxml/XWPFWordExtractorDecorator.java
@@ -290,8 +290,10 @@ public class XWPFWordExtractorDecorator extends AbstractOOXMLExtractor {
}
// Also extract any paragraphs embedded in text boxes:
- for (XmlObject embeddedParagraph : paragraph.getCTP().selectPath("declare namespace w='http://schemas.openxmlformats.org/wordprocessingml/2006/main' declare namespace wps='http://schemas.microsoft.com/office/word/2010/wordprocessingShape' .//*/wps:txbx/w:txbxContent/w:p")) {
- extractParagraph(new XWPFParagraph(CTP.Factory.parse(embeddedParagraph.xmlText()), paragraph.getBody()), listManager, xhtml);
+ if (config.getIncludeShapeBasedContent()) {
+ for (XmlObject embeddedParagraph : paragraph.getCTP().selectPath("declare namespace w='http://schemas.openxmlformats.org/wordprocessingml/2006/main' declare namespace wps='http://schemas.microsoft.com/office/word/2010/wordprocessingShape' .//*/wps:txbx/w:txbxContent/w:p")) {
+ extractParagraph(new XWPFParagraph(CTP.Factory.parse(embeddedParagraph.xmlText()), paragraph.getBody()), listManager, xhtml);
+ }
}
// Finish this paragraph
--
To stop receiving notification emails like this one, please contact
"commits@tika.apache.org" <co...@tika.apache.org>.