You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ma...@apache.org on 2011/04/05 10:48:07 UTC
svn commit: r1088924 - in /tika/trunk/tika-parsers/src:
main/java/org/apache/tika/parser/microsoft/ooxml/XWPFWordExtractorDecorator.java
test/java/org/apache/tika/parser/microsoft/ooxml/OOXMLParserTest.java
test/resources/test-documents/NullHeader.docx
Author: maxcom
Date: Tue Apr 5 08:48:06 2011
New Revision: 1088924
URL: http://svn.apache.org/viewvc?rev=1088924&view=rev
Log:
TIKA-633: NPE in XWPFWordExtractorDecorator.extractHeaders
Added:
tika/trunk/tika-parsers/src/test/resources/test-documents/NullHeader.docx (with props)
Modified:
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ooxml/XWPFWordExtractorDecorator.java
tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/microsoft/ooxml/OOXMLParserTest.java
Modified: tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ooxml/XWPFWordExtractorDecorator.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ooxml/XWPFWordExtractorDecorator.java?rev=1088924&r1=1088923&r2=1088924&view=diff
==============================================================================
--- tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ooxml/XWPFWordExtractorDecorator.java (original)
+++ tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ooxml/XWPFWordExtractorDecorator.java Tue Apr 5 08:48:06 2011
@@ -54,13 +54,17 @@ public class XWPFWordExtractorDecorator
XWPFHeaderFooterPolicy hfPolicy = document.getHeaderFooterPolicy();
// headers
- extractHeaders(xhtml, hfPolicy);
+ if (hfPolicy!=null) {
+ extractHeaders(xhtml, hfPolicy);
+ }
// process text in the order that it occurs in
extractIBodyText(document, xhtml);
// then all document tables
- extractFooters(xhtml, hfPolicy);
+ if (hfPolicy!=null) {
+ extractFooters(xhtml, hfPolicy);
+ }
}
private void extractIBodyText(IBody bodyElement, XHTMLContentHandler xhtml)
Modified: tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/microsoft/ooxml/OOXMLParserTest.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/microsoft/ooxml/OOXMLParserTest.java?rev=1088924&r1=1088923&r2=1088924&view=diff
==============================================================================
--- tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/microsoft/ooxml/OOXMLParserTest.java (original)
+++ tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/microsoft/ooxml/OOXMLParserTest.java Tue Apr 5 08:48:06 2011
@@ -430,4 +430,22 @@ public class OOXMLParserTest extends Tes
}
}
+ /**
+ * Test docx without headers
+ * TIKA-633
+ */
+ public void testNullHeaders() throws Exception {
+ Parser parser = new AutoDetectParser();
+ Metadata metadata = new Metadata();
+ ContentHandler handler = new BodyContentHandler();
+ ParseContext context = new ParseContext();
+
+ InputStream input = OOXMLParserTest.class.getResourceAsStream("/test-documents/NullHeader.docx");
+ try {
+ parser.parse(TikaInputStream.get(input), handler, metadata, context);
+ assertFalse(handler.toString().isEmpty());
+ } finally {
+ input.close();
+ }
+ }
}
Added: tika/trunk/tika-parsers/src/test/resources/test-documents/NullHeader.docx
URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/test/resources/test-documents/NullHeader.docx?rev=1088924&view=auto
==============================================================================
Binary file - no diff available.
Propchange: tika/trunk/tika-parsers/src/test/resources/test-documents/NullHeader.docx
------------------------------------------------------------------------------
svn:mime-type = application/octet-stream