You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ni...@apache.org on 2014/05/21 16:52:00 UTC

svn commit: r1596590 - in /tika/trunk: tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml tika-parsers/src/test/java/org/apache/tika/detect/TestContainerAwareDetector.java

Author: nick
Date: Wed May 21 14:52:00 2014
New Revision: 1596590

URL: http://svn.apache.org/r1596590
Log:
Container formats with specific, low-false-positive magic matches need a slightly higher priority, so that they don't accidently end up being matched based on the contents of the container near the start of the file. Partly solves TIKA-1292. This closes #6 github pull request

Modified:
    tika/trunk/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml
    tika/trunk/tika-parsers/src/test/java/org/apache/tika/detect/TestContainerAwareDetector.java

Modified: tika/trunk/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml
URL: http://svn.apache.org/viewvc/tika/trunk/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml?rev=1596590&r1=1596589&r2=1596590&view=diff
==============================================================================
--- tika/trunk/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml (original)
+++ tika/trunk/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml Wed May 21 14:52:00 2014
@@ -2839,7 +2839,7 @@
 
   <mime-type type="application/x-gtar">
     <_comment>GNU tar Compressed File Archive (GNU Tape Archive)</_comment>
-    <magic priority="40">
+    <magic priority="50">
       <!-- GNU tar archive -->
       <match value="ustar  \0" type="string" offset="257" />
     </magic>
@@ -2856,7 +2856,7 @@
     <alias type="application/gzip-compressed"/>
     <alias type="application/x-gzip-compressed"/>
     <alias type="gzip/document"/>
-    <magic priority="40">
+    <magic priority="45">
       <match value="\037\213" type="string" offset="0" />
       <match value="\x1f\x8b" type="string" offset="0" />
     </magic>
@@ -3536,7 +3536,7 @@
     <tika:link>http://en.wikipedia.org/wiki/ZIP_(file_format)</tika:link>
     <tika:uti>com.pkware.zip-archive</tika:uti>
     <alias type="application/x-zip-compressed"/>
-    <magic priority="40">
+    <magic priority="50">
       <match value="PK\003\004" type="string" offset="0"/>
       <match value="PK\005\006" type="string" offset="0"/>
       <match value="PK\x07\x08" type="string" offset="0"/>

Modified: tika/trunk/tika-parsers/src/test/java/org/apache/tika/detect/TestContainerAwareDetector.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/test/java/org/apache/tika/detect/TestContainerAwareDetector.java?rev=1596590&r1=1596589&r2=1596590&view=diff
==============================================================================
--- tika/trunk/tika-parsers/src/test/java/org/apache/tika/detect/TestContainerAwareDetector.java (original)
+++ tika/trunk/tika-parsers/src/test/java/org/apache/tika/detect/TestContainerAwareDetector.java Wed May 21 14:52:00 2014
@@ -338,9 +338,8 @@ public class TestContainerAwareDetector 
         assertTypeByData("testAPK.apk", "application/vnd.android.package-archive");
         
         // JAR with HTML files in it
-        // TODO Fix TIKA-1292 and enable this test
-//        assertTypeByNameAndData("testJAR_with_HTML.jar", "testJAR_with_HTML.jar",
-//                                "application/java-archive", "application/java-archive");
+        assertTypeByNameAndData("testJAR_with_HTML.jar", "testJAR_with_HTML.jar",
+                                "application/java-archive", "application/java-archive");
     }
 
     private TikaInputStream getTruncatedFile(String name, int n)