You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ni...@apache.org on 2013/06/19 01:11:38 UTC
svn commit: r1494366 - in /tika/trunk:
tika-core/src/main/resources/org/apache/tika/mime/
tika-parsers/src/main/java/org/apache/tika/parser/pkg/
tika-parsers/src/test/java/org/apache/tika/detect/
Author: nick
Date: Tue Jun 18 23:11:38 2013
New Revision: 1494366
URL: http://svn.apache.org/r1494366
Log:
Mimetype, Zip container detector and unit test for the Apple IPA format. Original logic from Paul Brinich from TIKA-1136
Modified:
tika/trunk/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/pkg/ZipContainerDetector.java
tika/trunk/tika-parsers/src/test/java/org/apache/tika/detect/TestContainerAwareDetector.java
Modified: tika/trunk/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml
URL: http://svn.apache.org/viewvc/tika/trunk/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml?rev=1494366&r1=1494365&r2=1494366&view=diff
==============================================================================
--- tika/trunk/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml (original)
+++ tika/trunk/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml Tue Jun 18 23:11:38 2013
@@ -2798,6 +2798,12 @@
<glob pattern="*.iso"/>
</mime-type>
+ <mime-type type="application/x-itunes-ipa">
+ <sub-class-of type="application/zip"/>
+ <_comment>Apple iOS IPA AppStore file</_comment>
+ <glob pattern="*.ipa"/>
+ </mime-type>
+
<mime-type type="application/x-java-jnlp-file">
<glob pattern="*.jnlp"/>
</mime-type>
Modified: tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/pkg/ZipContainerDetector.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/pkg/ZipContainerDetector.java?rev=1494366&r1=1494365&r2=1494366&view=diff
==============================================================================
--- tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/pkg/ZipContainerDetector.java (original)
+++ tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/pkg/ZipContainerDetector.java Tue Jun 18 23:11:38 2013
@@ -20,6 +20,9 @@ import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.util.Enumeration;
+import java.util.HashSet;
+import java.util.Iterator;
+import java.util.Set;
import java.util.regex.Pattern;
import org.apache.commons.compress.archivers.ArchiveException;
@@ -142,6 +145,9 @@ public class ZipContainerDetector implem
if (type == null) {
type = detectKmz(zip);
}
+ if (type == null) {
+ type = detectIpa(zip);
+ }
if (type != null) {
return type;
}
@@ -311,4 +317,44 @@ public class ZipContainerDetector implem
}
}
+ /**
+ * To be considered as an IPA file, it needs to match all of these
+ */
+ private static HashSet<Pattern> ipaEntryPatterns = new HashSet<Pattern>() {
+ private static final long serialVersionUID = 6545295886322115362L;
+ {
+ add(Pattern.compile("^Payload/$"));
+ add(Pattern.compile("^Payload/.*\\.app/$"));
+ add(Pattern.compile("^Payload/.*\\.app/_CodeSignature/$"));
+ add(Pattern.compile("^Payload/.*\\.app/_CodeSignature/CodeResources$"));
+ add(Pattern.compile("^Payload/.*\\.app/CodeResources$"));
+ add(Pattern.compile("^Payload/.*\\.app/Info\\.plist$"));
+ add(Pattern.compile("^Payload/.*\\.app/PkgInfo$"));
+ add(Pattern.compile("^Payload/.*\\.app/ResourceRules\\.plist$"));
+ }};
+ @SuppressWarnings("unchecked")
+ private static MediaType detectIpa(ZipFile zip) {
+ // Note - consider generalising this logic, if another format needs many regexp matching
+ Set<Pattern> tmpPatterns = (Set<Pattern>)ipaEntryPatterns.clone();
+
+ Enumeration<ZipArchiveEntry> entries = zip.getEntries();
+ while (entries.hasMoreElements()) {
+ ZipArchiveEntry entry = entries.nextElement();
+ String name = entry.getName();
+
+ Iterator<Pattern> ip = tmpPatterns.iterator();
+ while (ip.hasNext()) {
+ if (ip.next().matcher(name).matches()) {
+ ip.remove();
+ }
+ }
+ if (tmpPatterns.isEmpty()) {
+ // We've found everything we need to find
+ return MediaType.application("x-itunes-ipa");
+ }
+ }
+
+ // If we get here, not all required entries were found
+ return null;
+ }
}
Modified: tika/trunk/tika-parsers/src/test/java/org/apache/tika/detect/TestContainerAwareDetector.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/test/java/org/apache/tika/detect/TestContainerAwareDetector.java?rev=1494366&r1=1494365&r2=1494366&view=diff
==============================================================================
--- tika/trunk/tika-parsers/src/test/java/org/apache/tika/detect/TestContainerAwareDetector.java (original)
+++ tika/trunk/tika-parsers/src/test/java/org/apache/tika/detect/TestContainerAwareDetector.java Tue Jun 18 23:11:38 2013
@@ -290,6 +290,11 @@ public class TestContainerAwareDetector
assertTypeByData("testKMZ.kmz", "application/vnd.google-earth.kmz");
}
+ public void testDetectIPA() throws Exception {
+ assertTypeByNameAndData("testIPA.ipa", "application/x-itunes-ipa");
+ assertTypeByData("testIPA.ipa", "application/x-itunes-ipa");
+ }
+
public void testDetectZip() throws Exception {
assertTypeByData("test-documents.zip", "application/zip");
assertTypeByData("test-zip-of-zip.zip", "application/zip");