You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ju...@apache.org on 2011/05/22 04:27:02 UTC
svn commit: r1125861 - in /tika/trunk/tika-parsers/src:
main/java/org/apache/tika/parser/pkg/PackageExtractor.java
test/java/org/apache/tika/parser/pkg/ZipParserTest.java
test/resources/test-documents/moby.zip
Author: jukka
Date: Sun May 22 02:27:01 2011
New Revision: 1125861
URL: http://svn.apache.org/viewvc?rev=1125861&view=rev
Log:
TIKA-346: ZipParser throws "invalid compression method" error for some archives
Added:
tika/trunk/tika-parsers/src/test/resources/test-documents/moby.zip
Modified:
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/pkg/PackageExtractor.java
tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/pkg/ZipParserTest.java
Modified: tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/pkg/PackageExtractor.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/pkg/PackageExtractor.java?rev=1125861&r1=1125860&r2=1125861&view=diff
==============================================================================
--- tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/pkg/PackageExtractor.java (original)
+++ tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/pkg/PackageExtractor.java Sun May 22 02:27:01 2011
@@ -156,14 +156,18 @@ class PackageExtractor {
ArchiveEntry entry = archive.getNextEntry();
while (entry != null) {
if (!entry.isDirectory()) {
- Metadata entrydata = new Metadata();
String name = entry.getName();
- if (name != null && name.length() > 0) {
- entrydata.set(Metadata.RESOURCE_NAME_KEY, name);
- }
- if (extractor.shouldParseEmbedded(entrydata)) {
- extractor.parseEmbedded(archive, xhtml, entrydata, true);
+ if (archive.canReadEntryData(entry)) {
+ Metadata entrydata = new Metadata();
+ if (name != null && name.length() > 0) {
+ entrydata.set(Metadata.RESOURCE_NAME_KEY, name);
+ }
+ if (extractor.shouldParseEmbedded(entrydata)) {
+ extractor.parseEmbedded(archive, xhtml, entrydata, true);
+ }
+ } else if (name != null && name.length() > 0) {
+ xhtml.element("p", name);
}
}
entry = archive.getNextEntry();
Modified: tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/pkg/ZipParserTest.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/pkg/ZipParserTest.java?rev=1125861&r1=1125860&r2=1125861&view=diff
==============================================================================
--- tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/pkg/ZipParserTest.java (original)
+++ tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/pkg/ZipParserTest.java Sun May 22 02:27:01 2011
@@ -18,6 +18,7 @@ package org.apache.tika.parser.pkg;
import java.io.InputStream;
+import org.apache.tika.Tika;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.parser.AutoDetectParser;
import org.apache.tika.parser.Parser;
@@ -101,4 +102,19 @@ public class ZipParserTest extends Abstr
assertNull(type);
}
}
+
+ /**
+ * Test case for the ability of the ZIP parser to extract the name of
+ * a ZIP entry even if the content of the entry is unreadable due to an
+ * unsupported compression method.
+ *
+ * @see <a href="https://issues.apache.org/jira/browse/TIKA-346">TIKA-346</a>
+ */
+ public void testUnsupportedZipCompressionMethod() throws Exception {
+ String content = new Tika().parseToString(
+ ZipParserTest.class.getResourceAsStream(
+ "/test-documents/moby.zip"));
+ assertTrue(content.contains("README"));
+ }
+
}
Added: tika/trunk/tika-parsers/src/test/resources/test-documents/moby.zip
URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/test/resources/test-documents/moby.zip?rev=1125861&view=auto
==============================================================================
Files tika/trunk/tika-parsers/src/test/resources/test-documents/moby.zip (added) and tika/trunk/tika-parsers/src/test/resources/test-documents/moby.zip Sun May 22 02:27:01 2011 differ