You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ta...@apache.org on 2018/07/06 13:22:20 UTC

[tika] branch master updated: TIKA-2675 -- OpenDocumentParser should fail on invalid zip via Sebastian Nagel and PR-240.

This is an automated email from the ASF dual-hosted git repository.

tallison pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/tika.git


The following commit(s) were added to refs/heads/master by this push:
     new c9a81a4  TIKA-2675 -- OpenDocumentParser should fail on invalid zip via Sebastian Nagel and PR-240.
c9a81a4 is described below

commit c9a81a400ee10e9342bbfe718d62f0b0d6c7944f
Author: TALLISON <ta...@apache.org>
AuthorDate: Fri Jul 6 09:22:04 2018 -0400

    TIKA-2675 -- OpenDocumentParser should fail on invalid zip via
    Sebastian Nagel and PR-240.
---
 .../apache/tika/parser/odf/OpenDocumentParser.java |  7 +++++--
 .../org/apache/tika/parser/odf/ODFParserTest.java  | 23 ++++++++++++++++++++++
 .../test-documents/testODTnotaZipFile.odt          |  1 +
 3 files changed, 29 insertions(+), 2 deletions(-)

diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/odf/OpenDocumentParser.java b/tika-parsers/src/main/java/org/apache/tika/parser/odf/OpenDocumentParser.java
index c8aa65e..86ac3cf 100644
--- a/tika-parsers/src/main/java/org/apache/tika/parser/odf/OpenDocumentParser.java
+++ b/tika-parsers/src/main/java/org/apache/tika/parser/odf/OpenDocumentParser.java
@@ -174,10 +174,13 @@ public class OpenDocumentParser extends AbstractParser {
 
     private void handleZipStream(ZipInputStream zipStream, Metadata metadata, ParseContext context, EndDocumentShieldingContentHandler handler) throws IOException, TikaException, SAXException {
         ZipEntry entry = zipStream.getNextEntry();
-        while (entry != null) {
+        if (entry == null) {
+            throw new IOException("No entries found in ZipInputStream");
+        }
+        do {
             handleZipEntry(entry, zipStream, metadata, context, handler);
             entry = zipStream.getNextEntry();
-        }
+        } while (entry != null);
     }
 
     private void handleZipFile(ZipFile zipFile, Metadata metadata,
diff --git a/tika-parsers/src/test/java/org/apache/tika/parser/odf/ODFParserTest.java b/tika-parsers/src/test/java/org/apache/tika/parser/odf/ODFParserTest.java
index 7b93271..3b8048c 100644
--- a/tika-parsers/src/test/java/org/apache/tika/parser/odf/ODFParserTest.java
+++ b/tika-parsers/src/test/java/org/apache/tika/parser/odf/ODFParserTest.java
@@ -19,6 +19,7 @@ package org.apache.tika.parser.odf;
 import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertTrue;
 
+import java.io.IOException;
 import java.io.InputStream;
 import java.util.Arrays;
 import java.util.List;
@@ -367,6 +368,28 @@ public class ODFParserTest extends TikaTest {
         assertEquals(3, metadataList.size());
     }
 
+    @Test(expected = IOException.class)
+    public void testInvalidFromStream() throws Exception {
+        try (InputStream is = this.getClass().getResource(
+                "/test-documents/testODTnotaZipFile.odt").openStream()) {
+            OpenDocumentParser parser = new OpenDocumentParser();
+            Metadata metadata = new Metadata();
+            ContentHandler handler = new BodyContentHandler();
+            parser.parse(is, handler, metadata, new ParseContext());
+        }
+    }
+
+    @Test(expected = IOException.class)
+    public void testInvalidFromFile() throws Exception {
+        try (TikaInputStream tis = TikaInputStream.get(this.getClass().getResource(
+                "/test-documents/testODTnotaZipFile.odt"))) {
+            OpenDocumentParser parser = new OpenDocumentParser();
+            Metadata metadata = new Metadata();
+            ContentHandler handler = new BodyContentHandler();
+            parser.parse(tis, handler, metadata, new ParseContext());
+        }
+    }
+
     private ParseContext getNonRecursingParseContext() {
         ParseContext parseContext = new ParseContext();
         parseContext.set(Parser.class, new EmptyParser());
diff --git a/tika-parsers/src/test/resources/test-documents/testODTnotaZipFile.odt b/tika-parsers/src/test/resources/test-documents/testODTnotaZipFile.odt
new file mode 100644
index 0000000..9c1d376
--- /dev/null
+++ b/tika-parsers/src/test/resources/test-documents/testODTnotaZipFile.odt
@@ -0,0 +1 @@
+This is not a zip file!