You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ni...@apache.org on 2016/03/06 15:34:15 UTC

[2/2] tika git commit: TIKA-1890 Mime magic for CAB files, and unit tests for detection

TIKA-1890 Mime magic for CAB files, and unit tests for detection


Project: http://git-wip-us.apache.org/repos/asf/tika/repo
Commit: http://git-wip-us.apache.org/repos/asf/tika/commit/f7d3097f
Tree: http://git-wip-us.apache.org/repos/asf/tika/tree/f7d3097f
Diff: http://git-wip-us.apache.org/repos/asf/tika/diff/f7d3097f

Branch: refs/heads/master
Commit: f7d3097fb6581d989195b51bb2bc4302ad9bf24a
Parents: b878281
Author: Nick Burch <ni...@gagravarr.org>
Authored: Sun Mar 6 14:33:54 2016 +0000
Committer: Nick Burch <ni...@gagravarr.org>
Committed: Sun Mar 6 14:33:54 2016 +0000

----------------------------------------------------------------------
 .../src/main/resources/org/apache/tika/mime/tika-mimetypes.xml    | 3 +++
 .../src/test/java/org/apache/tika/mime/TestMimeTypes.java         | 2 ++
 2 files changed, 5 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/tika/blob/f7d3097f/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml
----------------------------------------------------------------------
diff --git a/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml b/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml
index bcf4fee..a4e0588 100644
--- a/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml
+++ b/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml
@@ -1517,6 +1517,9 @@
   </mime-type>
   <mime-type type="application/vnd.ms-asf"/>
   <mime-type type="application/vnd.ms-cab-compressed">
+    <magic priority="50">
+      <match value="MSCF\000\000\000\000" type="string" offset="0"/>
+    </magic>
     <glob pattern="*.cab"/>
   </mime-type>
 

http://git-wip-us.apache.org/repos/asf/tika/blob/f7d3097f/tika-parsers/src/test/java/org/apache/tika/mime/TestMimeTypes.java
----------------------------------------------------------------------
diff --git a/tika-parsers/src/test/java/org/apache/tika/mime/TestMimeTypes.java b/tika-parsers/src/test/java/org/apache/tika/mime/TestMimeTypes.java
index 67a749e..57198ad 100644
--- a/tika-parsers/src/test/java/org/apache/tika/mime/TestMimeTypes.java
+++ b/tika-parsers/src/test/java/org/apache/tika/mime/TestMimeTypes.java
@@ -353,6 +353,7 @@ public class TestMimeTypes {
        assertTypeByName("application/x-tar",  "test.tar");
        assertTypeByName("application/gzip", "test.tgz"); // See GZIP, not tar contents of it
        assertTypeByName("application/x-cpio", "test.cpio");
+       assertTypeByName("application/vnd.ms-cab-compressed", "test.cab");
        
        // TODO Add an example .deb and .udeb, then check these
        
@@ -363,6 +364,7 @@ public class TestMimeTypes {
        assertTypeByData("application/x-gtar",  "test-documents.tar"); // GNU TAR
        assertTypeByData("application/gzip", "test-documents.tgz"); // See GZIP, not tar contents of it
        assertTypeByData("application/x-cpio", "test-documents.cpio");
+       assertTypeByData("application/vnd.ms-cab-compressed", "test-documents.cab");
        
        // For spanned zip files, the .zip file doesn't have the header, it's the other parts
        assertTypeByData("application/octet-stream", "test-documents-spanned.zip");