You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ju...@apache.org on 2014/04/18 16:11:06 UTC
svn commit: r1588474 - in /tika/trunk/tika-parsers/src:
main/java/org/apache/tika/parser/pkg/PackageParser.java
test/java/org/apache/tika/parser/pkg/ZipParserTest.java
Author: jukka
Date: Fri Apr 18 14:11:06 2014
New Revision: 1588474
URL: http://svn.apache.org/r1588474
Log:
TIKA-936: encoding of ZipArchiveInputStream
Allow a custom ArchiveStreamFactory instance to be passed through the ParseContext
Modified:
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/pkg/PackageParser.java
tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/pkg/ZipParserTest.java
Modified: tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/pkg/PackageParser.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/pkg/PackageParser.java?rev=1588474&r1=1588473&r2=1588474&view=diff
==============================================================================
--- tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/pkg/PackageParser.java (original)
+++ tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/pkg/PackageParser.java Fri Apr 18 14:11:06 2014
@@ -117,7 +117,8 @@ public class PackageParser extends Abstr
ArchiveInputStream ais;
try {
- ArchiveStreamFactory factory = new ArchiveStreamFactory();
+ ArchiveStreamFactory factory = context.get(
+ ArchiveStreamFactory.class, new ArchiveStreamFactory());
ais = factory.createArchiveInputStream(stream);
} catch (StreamingNotSupportedException sne) {
// Most archive formats work on streams, but a few need files
Modified: tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/pkg/ZipParserTest.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/pkg/ZipParserTest.java?rev=1588474&r1=1588473&r2=1588474&view=diff
==============================================================================
--- tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/pkg/ZipParserTest.java (original)
+++ tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/pkg/ZipParserTest.java Fri Apr 18 14:11:06 2014
@@ -24,8 +24,11 @@ import java.io.InputStream;
import java.util.HashSet;
import java.util.Set;
+import org.apache.commons.codec.binary.Base64;
+import org.apache.commons.compress.archivers.ArchiveStreamFactory;
import org.apache.tika.Tika;
import org.apache.tika.extractor.EmbeddedDocumentExtractor;
+import org.apache.tika.io.TikaInputStream;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.parser.AutoDetectParser;
import org.apache.tika.parser.ParseContext;
@@ -33,6 +36,7 @@ import org.apache.tika.parser.Parser;
import org.apache.tika.sax.BodyContentHandler;
import org.junit.Test;
import org.xml.sax.ContentHandler;
+import org.xml.sax.helpers.DefaultHandler;
/**
* Test case for parsing zip files.
@@ -171,4 +175,30 @@ public class ZipParserTest extends Abstr
assertTrue(relIDs.allRelIDs.contains("test1.txt"));
assertTrue(relIDs.allRelIDs.contains("test2.txt"));
}
+
+ @Test // TIKA-936
+ public void testCustomEncoding() throws Exception {
+ ArchiveStreamFactory factory = new ArchiveStreamFactory();
+ factory.setEntryEncoding("SJIS");
+ trackingContext.set(ArchiveStreamFactory.class, factory);
+
+ InputStream stream = TikaInputStream.get(Base64.decodeBase64(
+ "UEsDBBQAAAAIAI+CvUCDo3+zIgAAACgAAAAOAAAAk/qWe4zqg4GDgi50"
+ + "eHRr2tj0qulsc2pzRHN609Gm7Y1OvFxNYLHJv6ZV97yCiQEAUEsBAh"
+ + "QLFAAAAAgAj4K9QIOjf7MiAAAAKAAAAA4AAAAAAAAAAAAgAAAAAAAA"
+ + "AJP6lnuM6oOBg4IudHh0UEsFBgAAAAABAAEAPAAAAE4AAAAAAA=="));
+ try {
+ autoDetectParser.parse(
+ stream, new DefaultHandler(),
+ new Metadata(), trackingContext);
+ } finally {
+ stream.close();
+ }
+
+ assertEquals(1, tracker.filenames.size());
+ assertEquals(
+ "\u65E5\u672C\u8A9E\u30E1\u30E2.txt",
+ tracker.filenames.get(0));
+ }
+
}