You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ni...@apache.org on 2012/04/05 15:39:25 UTC

svn commit: r1309854 - in /tika/trunk/tika-parsers/src: main/java/org/apache/tika/parser/pkg/ZipContainerDetector.java test/java/org/apache/tika/detect/TestContainerAwareDetector.java

Author: nick
Date: Thu Apr  5 13:39:25 2012
New Revision: 1309854

URL: http://svn.apache.org/viewvc?rev=1309854&view=rev
Log:
TIKA-890 Container Aware detection of JAR derived types such as WAR, EAR and APK, with tests

Modified:
    tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/pkg/ZipContainerDetector.java
    tika/trunk/tika-parsers/src/test/java/org/apache/tika/detect/TestContainerAwareDetector.java

Modified: tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/pkg/ZipContainerDetector.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/pkg/ZipContainerDetector.java?rev=1309854&r1=1309853&r2=1309854&view=diff
==============================================================================
--- tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/pkg/ZipContainerDetector.java (original)
+++ tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/pkg/ZipContainerDetector.java Thu Apr  5 13:39:25 2012
@@ -77,10 +77,11 @@ public class ZipContainerDetector implem
                     if (type == null) {
                         type = detectIWork(zip);
                     }
+                    if (type == null) {
+                        type = detectJar(zip); 
+                    }
                     if (type != null) {
                         return type;
-                    } else if (zip.getEntry("META-INF/MANIFEST.MF") != null) {
-                        return MediaType.application("java-archive");
                     }
                 } finally {
                     // TODO: shouldn't we record the open
@@ -191,4 +192,33 @@ public class ZipContainerDetector implem
             return null;
         }
     }
+    
+    private static MediaType detectJar(ZipFile zip) {
+       if (zip.getEntry("META-INF/MANIFEST.MF") != null) {
+          // It's a Jar file, or something based on Jar
+          
+          // Is it an Android APK?
+          if (zip.getEntry("AndroidManifest.xml") != null) {
+             return MediaType.application("vnd.android.package-archive");
+          }
+          
+          // Check for WAR and EAR
+          if (zip.getEntry("WEB-INF/") != null) {
+             return MediaType.application("x-tika-java-web-archive");
+          }
+          if (zip.getEntry("META-INF/application.xml") != null) {
+             return MediaType.application("x-tika-java-enterprise-archive");
+          }
+          
+          // Looks like a regular Jar Archive
+          return MediaType.application("java-archive");
+       } else {
+          // Some Android APKs miss the default Manifest
+          if (zip.getEntry("AndroidManifest.xml") != null) {
+             return MediaType.application("vnd.android.package-archive");
+          }
+          
+          return null;
+       }
+    }
 }
\ No newline at end of file

Modified: tika/trunk/tika-parsers/src/test/java/org/apache/tika/detect/TestContainerAwareDetector.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/test/java/org/apache/tika/detect/TestContainerAwareDetector.java?rev=1309854&r1=1309853&r2=1309854&view=diff
==============================================================================
--- tika/trunk/tika-parsers/src/test/java/org/apache/tika/detect/TestContainerAwareDetector.java (original)
+++ tika/trunk/tika-parsers/src/test/java/org/apache/tika/detect/TestContainerAwareDetector.java Thu Apr  5 13:39:25 2012
@@ -281,7 +281,12 @@ public class TestContainerAwareDetector 
     public void testDetectZip() throws Exception {
         assertTypeByData("test-documents.zip", "application/zip");
         assertTypeByData("test-zip-of-zip.zip", "application/zip");
+        
+        // JAR based formats
         assertTypeByData("testJAR.jar", "application/java-archive");
+        assertTypeByData("testWAR.war", "application/x-tika-java-web-archive");
+        assertTypeByData("testEAR.ear", "application/x-tika-java-enterprise-archive");
+        assertTypeByData("testAPK.apk", "application/vnd.android.package-archive");
     }
 
     private TikaInputStream getTruncatedFile(String name, int n)