You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ni...@apache.org on 2014/10/29 20:23:10 UTC

svn commit: r1635263 - in /tika/trunk: tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml tika-parsers/src/test/java/org/apache/tika/mime/TestMimeTypes.java tika-parsers/src/test/resources/test-documents/testJAR_with_PEHDR.jar

Author: nick
Date: Wed Oct 29 19:23:10 2014
New Revision: 1635263

URL: http://svn.apache.org/r1635263
Log:
TIKA-1461 PE files must also have the MZ header at the start, so tweak magic and add positive and negative mime magic detection tests for it

Added:
    tika/trunk/tika-parsers/src/test/resources/test-documents/testJAR_with_PEHDR.jar   (with props)
Modified:
    tika/trunk/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml
    tika/trunk/tika-parsers/src/test/java/org/apache/tika/mime/TestMimeTypes.java

Modified: tika/trunk/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml
URL: http://svn.apache.org/viewvc/tika/trunk/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml?rev=1635263&r1=1635262&r2=1635263&view=diff
==============================================================================
--- tika/trunk/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml (original)
+++ tika/trunk/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml Wed Oct 29 19:23:10 2014
@@ -3040,9 +3040,14 @@
     <sub-class-of type="application/x-msdownload"/>
     <magic priority="55">
       <!-- Technically the header offset is stored at 0x3c, and isn't a -->
-      <!-- constant, but it's almost always set to start at 0x80 or 0xf0 -->
-      <match value="PE\000\000" type="string" offset="128"/>
-      <match value="PE\000\000" type="string" offset="240"/>
+      <!-- constant, but it's almost always set to start at 0x80, 0xb0, -->
+      <!-- 0xd0 or 0xf0. Will always have the MZ msdoc header too. -->
+      <match value="MZ" type="string" offset="0">
+         <match value="PE\000\000" type="string" offset="128"/>
+         <match value="PE\000\000" type="string" offset="176"/>
+         <match value="PE\000\000" type="string" offset="208"/>
+         <match value="PE\000\000" type="string" offset="240"/>
+      </match>
     </magic>
   </mime-type>
   <!-- the PE header should be PEx00x00 then a two byte machine type -->

Modified: tika/trunk/tika-parsers/src/test/java/org/apache/tika/mime/TestMimeTypes.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/test/java/org/apache/tika/mime/TestMimeTypes.java?rev=1635263&r1=1635262&r2=1635263&view=diff
==============================================================================
--- tika/trunk/tika-parsers/src/test/java/org/apache/tika/mime/TestMimeTypes.java (original)
+++ tika/trunk/tika-parsers/src/test/java/org/apache/tika/mime/TestMimeTypes.java Wed Oct 29 19:23:10 2014
@@ -742,6 +742,21 @@ public class TestMimeTypes {
     }
     
     @Test
+    public void testWindowsEXE() throws Exception {
+        assertTypeByName("application/x-msdownload", "x.dll");
+        assertTypeByName("application/x-ms-installer", "x.msi");
+        assertTypeByName("application/x-dosexec", "x.exe");
+        
+        assertTypeByData("application/x-msdownload; format=pe", "testTinyPE.exe");
+        assertTypeByNameAndData("application/x-msdownload; format=pe", "testTinyPE.exe");
+        
+        // A jar file with part of a PE header, but not a full one
+        //  should still be detected as a zip or jar (without/with name)
+        assertTypeByData("application/zip", "testJAR_with_PEHDR.jar");
+        assertTypeByNameAndData("application/java-archive", "testJAR_with_PEHDR.jar");
+    }
+    
+    @Test
     public void testMatroskaDetection() throws Exception {
         assertType("video/x-matroska", "testMKV.mkv");
         // TODO: Need custom detector data detection, see TIKA-1180

Added: tika/trunk/tika-parsers/src/test/resources/test-documents/testJAR_with_PEHDR.jar
URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/test/resources/test-documents/testJAR_with_PEHDR.jar?rev=1635263&view=auto
==============================================================================
Binary file - no diff available.

Propchange: tika/trunk/tika-parsers/src/test/resources/test-documents/testJAR_with_PEHDR.jar
------------------------------------------------------------------------------
    svn:mime-type = application/octet-stream