You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ma...@apache.org on 2011/04/05 10:48:07 UTC

svn commit: r1088924 - in /tika/trunk/tika-parsers/src: main/java/org/apache/tika/parser/microsoft/ooxml/XWPFWordExtractorDecorator.java test/java/org/apache/tika/parser/microsoft/ooxml/OOXMLParserTest.java test/resources/test-documents/NullHeader.docx

Author: maxcom
Date: Tue Apr  5 08:48:06 2011
New Revision: 1088924

URL: http://svn.apache.org/viewvc?rev=1088924&view=rev
Log:
TIKA-633: NPE in XWPFWordExtractorDecorator.extractHeaders

Added:
    tika/trunk/tika-parsers/src/test/resources/test-documents/NullHeader.docx   (with props)
Modified:
    tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ooxml/XWPFWordExtractorDecorator.java
    tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/microsoft/ooxml/OOXMLParserTest.java

Modified: tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ooxml/XWPFWordExtractorDecorator.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ooxml/XWPFWordExtractorDecorator.java?rev=1088924&r1=1088923&r2=1088924&view=diff
==============================================================================
--- tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ooxml/XWPFWordExtractorDecorator.java (original)
+++ tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ooxml/XWPFWordExtractorDecorator.java Tue Apr  5 08:48:06 2011
@@ -54,13 +54,17 @@ public class XWPFWordExtractorDecorator 
         XWPFHeaderFooterPolicy hfPolicy = document.getHeaderFooterPolicy();
 
         // headers
-        extractHeaders(xhtml, hfPolicy);
+        if (hfPolicy!=null) {
+            extractHeaders(xhtml, hfPolicy);
+        }
 
         // process text in the order that it occurs in
         extractIBodyText(document, xhtml);
 
         // then all document tables
-        extractFooters(xhtml, hfPolicy);
+        if (hfPolicy!=null) {
+            extractFooters(xhtml, hfPolicy);
+        }
     }
 
     private void extractIBodyText(IBody bodyElement, XHTMLContentHandler xhtml)

Modified: tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/microsoft/ooxml/OOXMLParserTest.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/microsoft/ooxml/OOXMLParserTest.java?rev=1088924&r1=1088923&r2=1088924&view=diff
==============================================================================
--- tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/microsoft/ooxml/OOXMLParserTest.java (original)
+++ tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/microsoft/ooxml/OOXMLParserTest.java Tue Apr  5 08:48:06 2011
@@ -430,4 +430,22 @@ public class OOXMLParserTest extends Tes
         }
     }
 
+    /**
+     * Test docx without headers
+     * TIKA-633
+     */
+    public void testNullHeaders() throws Exception {
+        Parser parser = new AutoDetectParser();
+        Metadata metadata = new Metadata();
+        ContentHandler handler = new BodyContentHandler();
+        ParseContext context = new ParseContext();
+
+        InputStream input = OOXMLParserTest.class.getResourceAsStream("/test-documents/NullHeader.docx");
+        try {
+            parser.parse(TikaInputStream.get(input), handler, metadata, context);
+            assertFalse(handler.toString().isEmpty());
+        } finally {
+            input.close();
+        }
+    }
 }

Added: tika/trunk/tika-parsers/src/test/resources/test-documents/NullHeader.docx
URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/test/resources/test-documents/NullHeader.docx?rev=1088924&view=auto
==============================================================================
Binary file - no diff available.

Propchange: tika/trunk/tika-parsers/src/test/resources/test-documents/NullHeader.docx
------------------------------------------------------------------------------
    svn:mime-type = application/octet-stream