You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ju...@apache.org on 2014/04/18 16:11:06 UTC

svn commit: r1588474 - in /tika/trunk/tika-parsers/src: main/java/org/apache/tika/parser/pkg/PackageParser.java test/java/org/apache/tika/parser/pkg/ZipParserTest.java

Author: jukka
Date: Fri Apr 18 14:11:06 2014
New Revision: 1588474

URL: http://svn.apache.org/r1588474
Log:
TIKA-936: encoding of ZipArchiveInputStream

Allow a custom ArchiveStreamFactory instance to be passed through the ParseContext

Modified:
    tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/pkg/PackageParser.java
    tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/pkg/ZipParserTest.java

Modified: tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/pkg/PackageParser.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/pkg/PackageParser.java?rev=1588474&r1=1588473&r2=1588474&view=diff
==============================================================================
--- tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/pkg/PackageParser.java (original)
+++ tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/pkg/PackageParser.java Fri Apr 18 14:11:06 2014
@@ -117,7 +117,8 @@ public class PackageParser extends Abstr
 
         ArchiveInputStream ais;
         try {
-            ArchiveStreamFactory factory = new ArchiveStreamFactory();
+            ArchiveStreamFactory factory = context.get(
+                    ArchiveStreamFactory.class, new ArchiveStreamFactory());
             ais = factory.createArchiveInputStream(stream);
         } catch (StreamingNotSupportedException sne) {
             // Most archive formats work on streams, but a few need files

Modified: tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/pkg/ZipParserTest.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/pkg/ZipParserTest.java?rev=1588474&r1=1588473&r2=1588474&view=diff
==============================================================================
--- tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/pkg/ZipParserTest.java (original)
+++ tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/pkg/ZipParserTest.java Fri Apr 18 14:11:06 2014
@@ -24,8 +24,11 @@ import java.io.InputStream;
 import java.util.HashSet;
 import java.util.Set;
 
+import org.apache.commons.codec.binary.Base64;
+import org.apache.commons.compress.archivers.ArchiveStreamFactory;
 import org.apache.tika.Tika;
 import org.apache.tika.extractor.EmbeddedDocumentExtractor;
+import org.apache.tika.io.TikaInputStream;
 import org.apache.tika.metadata.Metadata;
 import org.apache.tika.parser.AutoDetectParser;
 import org.apache.tika.parser.ParseContext;
@@ -33,6 +36,7 @@ import org.apache.tika.parser.Parser;
 import org.apache.tika.sax.BodyContentHandler;
 import org.junit.Test;
 import org.xml.sax.ContentHandler;
+import org.xml.sax.helpers.DefaultHandler;
 
 /**
  * Test case for parsing zip files.
@@ -171,4 +175,30 @@ public class ZipParserTest extends Abstr
         assertTrue(relIDs.allRelIDs.contains("test1.txt"));
         assertTrue(relIDs.allRelIDs.contains("test2.txt"));
     }
+
+    @Test // TIKA-936
+    public void testCustomEncoding() throws Exception {
+        ArchiveStreamFactory factory = new ArchiveStreamFactory();
+        factory.setEntryEncoding("SJIS");
+        trackingContext.set(ArchiveStreamFactory.class, factory);
+
+        InputStream stream = TikaInputStream.get(Base64.decodeBase64(
+                "UEsDBBQAAAAIAI+CvUCDo3+zIgAAACgAAAAOAAAAk/qWe4zqg4GDgi50"
+                + "eHRr2tj0qulsc2pzRHN609Gm7Y1OvFxNYLHJv6ZV97yCiQEAUEsBAh"
+                + "QLFAAAAAgAj4K9QIOjf7MiAAAAKAAAAA4AAAAAAAAAAAAgAAAAAAAA"
+                + "AJP6lnuM6oOBg4IudHh0UEsFBgAAAAABAAEAPAAAAE4AAAAAAA=="));
+        try {
+            autoDetectParser.parse(
+                    stream, new DefaultHandler(),
+                    new Metadata(), trackingContext);
+        } finally {
+            stream.close();
+        }
+
+        assertEquals(1, tracker.filenames.size());
+        assertEquals(
+                "\u65E5\u672C\u8A9E\u30E1\u30E2.txt",
+                tracker.filenames.get(0));
+    }
+
 }