You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ni...@apache.org on 2014/10/29 20:23:10 UTC
svn commit: r1635263 - in /tika/trunk:
tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml
tika-parsers/src/test/java/org/apache/tika/mime/TestMimeTypes.java
tika-parsers/src/test/resources/test-documents/testJAR_with_PEHDR.jar
Author: nick
Date: Wed Oct 29 19:23:10 2014
New Revision: 1635263
URL: http://svn.apache.org/r1635263
Log:
TIKA-1461 PE files must also have the MZ header at the start, so tweak magic and add positive and negative mime magic detection tests for it
Added:
tika/trunk/tika-parsers/src/test/resources/test-documents/testJAR_with_PEHDR.jar (with props)
Modified:
tika/trunk/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml
tika/trunk/tika-parsers/src/test/java/org/apache/tika/mime/TestMimeTypes.java
Modified: tika/trunk/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml
URL: http://svn.apache.org/viewvc/tika/trunk/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml?rev=1635263&r1=1635262&r2=1635263&view=diff
==============================================================================
--- tika/trunk/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml (original)
+++ tika/trunk/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml Wed Oct 29 19:23:10 2014
@@ -3040,9 +3040,14 @@
<sub-class-of type="application/x-msdownload"/>
<magic priority="55">
<!-- Technically the header offset is stored at 0x3c, and isn't a -->
- <!-- constant, but it's almost always set to start at 0x80 or 0xf0 -->
- <match value="PE\000\000" type="string" offset="128"/>
- <match value="PE\000\000" type="string" offset="240"/>
+ <!-- constant, but it's almost always set to start at 0x80, 0xb0, -->
+ <!-- 0xd0 or 0xf0. Will always have the MZ msdoc header too. -->
+ <match value="MZ" type="string" offset="0">
+ <match value="PE\000\000" type="string" offset="128"/>
+ <match value="PE\000\000" type="string" offset="176"/>
+ <match value="PE\000\000" type="string" offset="208"/>
+ <match value="PE\000\000" type="string" offset="240"/>
+ </match>
</magic>
</mime-type>
<!-- the PE header should be PEx00x00 then a two byte machine type -->
Modified: tika/trunk/tika-parsers/src/test/java/org/apache/tika/mime/TestMimeTypes.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/test/java/org/apache/tika/mime/TestMimeTypes.java?rev=1635263&r1=1635262&r2=1635263&view=diff
==============================================================================
--- tika/trunk/tika-parsers/src/test/java/org/apache/tika/mime/TestMimeTypes.java (original)
+++ tika/trunk/tika-parsers/src/test/java/org/apache/tika/mime/TestMimeTypes.java Wed Oct 29 19:23:10 2014
@@ -742,6 +742,21 @@ public class TestMimeTypes {
}
@Test
+ public void testWindowsEXE() throws Exception {
+ assertTypeByName("application/x-msdownload", "x.dll");
+ assertTypeByName("application/x-ms-installer", "x.msi");
+ assertTypeByName("application/x-dosexec", "x.exe");
+
+ assertTypeByData("application/x-msdownload; format=pe", "testTinyPE.exe");
+ assertTypeByNameAndData("application/x-msdownload; format=pe", "testTinyPE.exe");
+
+ // A jar file with part of a PE header, but not a full one
+ // should still be detected as a zip or jar (without/with name)
+ assertTypeByData("application/zip", "testJAR_with_PEHDR.jar");
+ assertTypeByNameAndData("application/java-archive", "testJAR_with_PEHDR.jar");
+ }
+
+ @Test
public void testMatroskaDetection() throws Exception {
assertType("video/x-matroska", "testMKV.mkv");
// TODO: Need custom detector data detection, see TIKA-1180
Added: tika/trunk/tika-parsers/src/test/resources/test-documents/testJAR_with_PEHDR.jar
URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/test/resources/test-documents/testJAR_with_PEHDR.jar?rev=1635263&view=auto
==============================================================================
Binary file - no diff available.
Propchange: tika/trunk/tika-parsers/src/test/resources/test-documents/testJAR_with_PEHDR.jar
------------------------------------------------------------------------------
svn:mime-type = application/octet-stream