You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ni...@apache.org on 2012/04/05 15:39:25 UTC
svn commit: r1309854 - in /tika/trunk/tika-parsers/src:
main/java/org/apache/tika/parser/pkg/ZipContainerDetector.java
test/java/org/apache/tika/detect/TestContainerAwareDetector.java
Author: nick
Date: Thu Apr 5 13:39:25 2012
New Revision: 1309854
URL: http://svn.apache.org/viewvc?rev=1309854&view=rev
Log:
TIKA-890 Container Aware detection of JAR derived types such as WAR, EAR and APK, with tests
Modified:
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/pkg/ZipContainerDetector.java
tika/trunk/tika-parsers/src/test/java/org/apache/tika/detect/TestContainerAwareDetector.java
Modified: tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/pkg/ZipContainerDetector.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/pkg/ZipContainerDetector.java?rev=1309854&r1=1309853&r2=1309854&view=diff
==============================================================================
--- tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/pkg/ZipContainerDetector.java (original)
+++ tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/pkg/ZipContainerDetector.java Thu Apr 5 13:39:25 2012
@@ -77,10 +77,11 @@ public class ZipContainerDetector implem
if (type == null) {
type = detectIWork(zip);
}
+ if (type == null) {
+ type = detectJar(zip);
+ }
if (type != null) {
return type;
- } else if (zip.getEntry("META-INF/MANIFEST.MF") != null) {
- return MediaType.application("java-archive");
}
} finally {
// TODO: shouldn't we record the open
@@ -191,4 +192,33 @@ public class ZipContainerDetector implem
return null;
}
}
+
+ private static MediaType detectJar(ZipFile zip) {
+ if (zip.getEntry("META-INF/MANIFEST.MF") != null) {
+ // It's a Jar file, or something based on Jar
+
+ // Is it an Android APK?
+ if (zip.getEntry("AndroidManifest.xml") != null) {
+ return MediaType.application("vnd.android.package-archive");
+ }
+
+ // Check for WAR and EAR
+ if (zip.getEntry("WEB-INF/") != null) {
+ return MediaType.application("x-tika-java-web-archive");
+ }
+ if (zip.getEntry("META-INF/application.xml") != null) {
+ return MediaType.application("x-tika-java-enterprise-archive");
+ }
+
+ // Looks like a regular Jar Archive
+ return MediaType.application("java-archive");
+ } else {
+ // Some Android APKs miss the default Manifest
+ if (zip.getEntry("AndroidManifest.xml") != null) {
+ return MediaType.application("vnd.android.package-archive");
+ }
+
+ return null;
+ }
+ }
}
\ No newline at end of file
Modified: tika/trunk/tika-parsers/src/test/java/org/apache/tika/detect/TestContainerAwareDetector.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/test/java/org/apache/tika/detect/TestContainerAwareDetector.java?rev=1309854&r1=1309853&r2=1309854&view=diff
==============================================================================
--- tika/trunk/tika-parsers/src/test/java/org/apache/tika/detect/TestContainerAwareDetector.java (original)
+++ tika/trunk/tika-parsers/src/test/java/org/apache/tika/detect/TestContainerAwareDetector.java Thu Apr 5 13:39:25 2012
@@ -281,7 +281,12 @@ public class TestContainerAwareDetector
public void testDetectZip() throws Exception {
assertTypeByData("test-documents.zip", "application/zip");
assertTypeByData("test-zip-of-zip.zip", "application/zip");
+
+ // JAR based formats
assertTypeByData("testJAR.jar", "application/java-archive");
+ assertTypeByData("testWAR.war", "application/x-tika-java-web-archive");
+ assertTypeByData("testEAR.ear", "application/x-tika-java-enterprise-archive");
+ assertTypeByData("testAPK.apk", "application/vnd.android.package-archive");
}
private TikaInputStream getTruncatedFile(String name, int n)