You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ju...@apache.org on 2011/05/22 04:27:02 UTC

svn commit: r1125861 - in /tika/trunk/tika-parsers/src: main/java/org/apache/tika/parser/pkg/PackageExtractor.java test/java/org/apache/tika/parser/pkg/ZipParserTest.java test/resources/test-documents/moby.zip

Author: jukka
Date: Sun May 22 02:27:01 2011
New Revision: 1125861

URL: http://svn.apache.org/viewvc?rev=1125861&view=rev
Log:
TIKA-346: ZipParser throws "invalid compression method" error for some archives

Added:
    tika/trunk/tika-parsers/src/test/resources/test-documents/moby.zip
Modified:
    tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/pkg/PackageExtractor.java
    tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/pkg/ZipParserTest.java

Modified: tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/pkg/PackageExtractor.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/pkg/PackageExtractor.java?rev=1125861&r1=1125860&r2=1125861&view=diff
==============================================================================
--- tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/pkg/PackageExtractor.java (original)
+++ tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/pkg/PackageExtractor.java Sun May 22 02:27:01 2011
@@ -156,14 +156,18 @@ class PackageExtractor {
             ArchiveEntry entry = archive.getNextEntry();
             while (entry != null) {
                 if (!entry.isDirectory()) {
-                    Metadata entrydata = new Metadata();
                     String name = entry.getName();
-                    if (name != null && name.length() > 0) {
-                        entrydata.set(Metadata.RESOURCE_NAME_KEY, name);
-                    }
 
-                    if (extractor.shouldParseEmbedded(entrydata)) {
-                        extractor.parseEmbedded(archive, xhtml, entrydata, true);
+                    if (archive.canReadEntryData(entry)) {
+                        Metadata entrydata = new Metadata();
+                        if (name != null && name.length() > 0) {
+                            entrydata.set(Metadata.RESOURCE_NAME_KEY, name);
+                        }
+                        if (extractor.shouldParseEmbedded(entrydata)) {
+                            extractor.parseEmbedded(archive, xhtml, entrydata, true);
+                        }
+                    } else if (name != null && name.length() > 0) {
+                        xhtml.element("p", name);
                     }
                 }
                 entry = archive.getNextEntry();

Modified: tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/pkg/ZipParserTest.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/pkg/ZipParserTest.java?rev=1125861&r1=1125860&r2=1125861&view=diff
==============================================================================
--- tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/pkg/ZipParserTest.java (original)
+++ tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/pkg/ZipParserTest.java Sun May 22 02:27:01 2011
@@ -18,6 +18,7 @@ package org.apache.tika.parser.pkg;
 
 import java.io.InputStream;
 
+import org.apache.tika.Tika;
 import org.apache.tika.metadata.Metadata;
 import org.apache.tika.parser.AutoDetectParser;
 import org.apache.tika.parser.Parser;
@@ -101,4 +102,19 @@ public class ZipParserTest extends Abstr
           assertNull(type);
        }
     }
+
+    /**
+     * Test case for the ability of the ZIP parser to extract the name of
+     * a ZIP entry even if the content of the entry is unreadable due to an
+     * unsupported compression method.
+     *
+     * @see <a href="https://issues.apache.org/jira/browse/TIKA-346">TIKA-346</a>
+     */
+    public void testUnsupportedZipCompressionMethod() throws Exception {
+        String content = new Tika().parseToString(
+                ZipParserTest.class.getResourceAsStream(
+                        "/test-documents/moby.zip"));
+        assertTrue(content.contains("README"));
+    }
+
 }

Added: tika/trunk/tika-parsers/src/test/resources/test-documents/moby.zip
URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/test/resources/test-documents/moby.zip?rev=1125861&view=auto
==============================================================================
Files tika/trunk/tika-parsers/src/test/resources/test-documents/moby.zip (added) and tika/trunk/tika-parsers/src/test/resources/test-documents/moby.zip Sun May 22 02:27:01 2011 differ