You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ni...@apache.org on 2013/06/19 01:11:38 UTC

svn commit: r1494366 - in /tika/trunk: tika-core/src/main/resources/org/apache/tika/mime/ tika-parsers/src/main/java/org/apache/tika/parser/pkg/ tika-parsers/src/test/java/org/apache/tika/detect/

Author: nick
Date: Tue Jun 18 23:11:38 2013
New Revision: 1494366

URL: http://svn.apache.org/r1494366
Log:
Mimetype, Zip container detector and unit test for the Apple IPA format. Original logic from Paul Brinich from TIKA-1136

Modified:
    tika/trunk/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml
    tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/pkg/ZipContainerDetector.java
    tika/trunk/tika-parsers/src/test/java/org/apache/tika/detect/TestContainerAwareDetector.java

Modified: tika/trunk/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml
URL: http://svn.apache.org/viewvc/tika/trunk/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml?rev=1494366&r1=1494365&r2=1494366&view=diff
==============================================================================
--- tika/trunk/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml (original)
+++ tika/trunk/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml Tue Jun 18 23:11:38 2013
@@ -2798,6 +2798,12 @@
     <glob pattern="*.iso"/>
   </mime-type>
 
+  <mime-type type="application/x-itunes-ipa">
+    <sub-class-of type="application/zip"/>
+    <_comment>Apple iOS IPA AppStore file</_comment>
+    <glob pattern="*.ipa"/>
+  </mime-type>
+
   <mime-type type="application/x-java-jnlp-file">
     <glob pattern="*.jnlp"/>
   </mime-type>

Modified: tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/pkg/ZipContainerDetector.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/pkg/ZipContainerDetector.java?rev=1494366&r1=1494365&r2=1494366&view=diff
==============================================================================
--- tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/pkg/ZipContainerDetector.java (original)
+++ tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/pkg/ZipContainerDetector.java Tue Jun 18 23:11:38 2013
@@ -20,6 +20,9 @@ import java.io.ByteArrayInputStream;
 import java.io.IOException;
 import java.io.InputStream;
 import java.util.Enumeration;
+import java.util.HashSet;
+import java.util.Iterator;
+import java.util.Set;
 import java.util.regex.Pattern;
 
 import org.apache.commons.compress.archivers.ArchiveException;
@@ -142,6 +145,9 @@ public class ZipContainerDetector implem
                 if (type == null) {
                     type = detectKmz(zip);
                 }
+                if (type == null) {
+                    type = detectIpa(zip);
+                }
                 if (type != null) {
                     return type;
                 }
@@ -311,4 +317,44 @@ public class ZipContainerDetector implem
         }
     }
 
+    /**
+     * To be considered as an IPA file, it needs to match all of these
+     */
+    private static HashSet<Pattern> ipaEntryPatterns = new HashSet<Pattern>() {
+        private static final long serialVersionUID = 6545295886322115362L;
+        {
+           add(Pattern.compile("^Payload/$"));
+           add(Pattern.compile("^Payload/.*\\.app/$"));
+           add(Pattern.compile("^Payload/.*\\.app/_CodeSignature/$"));
+           add(Pattern.compile("^Payload/.*\\.app/_CodeSignature/CodeResources$"));
+           add(Pattern.compile("^Payload/.*\\.app/CodeResources$"));
+           add(Pattern.compile("^Payload/.*\\.app/Info\\.plist$"));
+           add(Pattern.compile("^Payload/.*\\.app/PkgInfo$"));
+           add(Pattern.compile("^Payload/.*\\.app/ResourceRules\\.plist$"));
+    }};
+    @SuppressWarnings("unchecked")
+    private static MediaType detectIpa(ZipFile zip) {
+        // Note - consider generalising this logic, if another format needs many regexp matching
+        Set<Pattern> tmpPatterns = (Set<Pattern>)ipaEntryPatterns.clone();
+        
+        Enumeration<ZipArchiveEntry> entries = zip.getEntries();
+        while (entries.hasMoreElements()) {
+            ZipArchiveEntry entry = entries.nextElement();
+            String name = entry.getName();
+            
+            Iterator<Pattern> ip = tmpPatterns.iterator();
+            while (ip.hasNext()) {
+                if (ip.next().matcher(name).matches()) {
+                    ip.remove();
+                }
+            }
+            if (tmpPatterns.isEmpty()) {
+                // We've found everything we need to find
+                return MediaType.application("x-itunes-ipa");
+            }
+        }
+        
+        // If we get here, not all required entries were found
+        return null;
+    }
 }

Modified: tika/trunk/tika-parsers/src/test/java/org/apache/tika/detect/TestContainerAwareDetector.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/test/java/org/apache/tika/detect/TestContainerAwareDetector.java?rev=1494366&r1=1494365&r2=1494366&view=diff
==============================================================================
--- tika/trunk/tika-parsers/src/test/java/org/apache/tika/detect/TestContainerAwareDetector.java (original)
+++ tika/trunk/tika-parsers/src/test/java/org/apache/tika/detect/TestContainerAwareDetector.java Tue Jun 18 23:11:38 2013
@@ -290,6 +290,11 @@ public class TestContainerAwareDetector 
        assertTypeByData("testKMZ.kmz", "application/vnd.google-earth.kmz");
     }
     
+    public void testDetectIPA() throws Exception {
+        assertTypeByNameAndData("testIPA.ipa", "application/x-itunes-ipa");
+        assertTypeByData("testIPA.ipa", "application/x-itunes-ipa");
+     }
+     
     public void testDetectZip() throws Exception {
         assertTypeByData("test-documents.zip", "application/zip");
         assertTypeByData("test-zip-of-zip.zip", "application/zip");