You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ju...@apache.org on 2009/02/03 22:11:24 UTC

svn commit: r740449 - in /lucene/tika/trunk/src: main/resources/mime/tika-mimetypes.xml test/java/org/apache/tika/mime/TestMimeTypes.java

Author: jukka
Date: Tue Feb  3 21:11:22 2009
New Revision: 740449

URL: http://svn.apache.org/viewvc?rev=740449&view=rev
Log:
TIKA-192: Add glob and magic patterns for image types

Improved GIF type information, plus relevant tests.

Cleaned up JPEG and TIFF type information

Modified:
    lucene/tika/trunk/src/main/resources/mime/tika-mimetypes.xml
    lucene/tika/trunk/src/test/java/org/apache/tika/mime/TestMimeTypes.java

Modified: lucene/tika/trunk/src/main/resources/mime/tika-mimetypes.xml
URL: http://svn.apache.org/viewvc/lucene/tika/trunk/src/main/resources/mime/tika-mimetypes.xml?rev=740449&r1=740448&r2=740449&view=diff
==============================================================================
--- lucene/tika/trunk/src/main/resources/mime/tika-mimetypes.xml (original)
+++ lucene/tika/trunk/src/main/resources/mime/tika-mimetypes.xml Tue Feb  3 21:11:22 2009
@@ -647,18 +647,6 @@
     <alias type="audio/x-realaudio" />
   </mime-type>
 
-  <mime-type type="image/tiff">
-    <comment>Tagged Image File Format</comment>
-    <magic priority="50">
-      <!-- MM.* = Big endian (M=Motorola) and 0x002a in big endian    -->
-      <match value="0x4d4d002a" type="string" offset="0" />
-      <!-- II*. = Little endian (I=Intel) and 0x002a in little endian -->
-      <match value="0x49492a00" type="string" offset="0" />
-    </magic>
-    <glob pattern="*.tiff" />
-    <glob pattern="*.tif" />
-  </mime-type>
-
   <mime-type type="message/rfc822">
     <magic priority="50">
       <match type="string" value="Relay-Version:" offset="0" />
@@ -780,6 +768,8 @@
   </mime-type>
 
   <mime-type type="image/jpeg">
+    <acronym>JPEG</acronym>
+    <comment>Joint Photographic Experts Group</comment>
     <magic priority="50">
       <!-- FFD8 is the SOI (Start Of Image) marker.              -->
       <!-- It is followed by another marker that starts with FF. -->
@@ -1013,16 +1003,24 @@
     </magic>
     <glob pattern="*.ra" />
   </mime-type>
+
   <mime-type type="image/gif">
+    <acronym>GIF</acronym>
+    <comment>Graphics Interchange Format</comment>
     <magic priority="50">
-      <match value="GIF8" type="string" offset="0" />
+      <match value="GIF87a" type="string" offset="0" />
+      <match value="GIF89a" type="string" offset="0" />
     </magic>
     <glob pattern="*.gif" />
   </mime-type>
 
   <mime-type type="image/tiff">
+    <acronym>TIFF</acronym>
+    <comment>Tagged Image File Format</comment>
     <magic priority="50">
+      <!-- MM.* = Big endian (M=Motorola) and 0x002a in big endian    -->
       <match value="MM\x00\x2a" type="string" offset="0" />
+      <!-- II*. = Little endian (I=Intel) and 0x002a in little endian -->
       <match value="II\x2a\x00" type="string" offset="0" />
     </magic>
     <glob pattern="*.tiff" />

Modified: lucene/tika/trunk/src/test/java/org/apache/tika/mime/TestMimeTypes.java
URL: http://svn.apache.org/viewvc/lucene/tika/trunk/src/test/java/org/apache/tika/mime/TestMimeTypes.java?rev=740449&r1=740448&r2=740449&view=diff
==============================================================================
--- lucene/tika/trunk/src/test/java/org/apache/tika/mime/TestMimeTypes.java (original)
+++ lucene/tika/trunk/src/test/java/org/apache/tika/mime/TestMimeTypes.java Tue Feb  3 21:11:22 2009
@@ -96,7 +96,9 @@
         assertType("image/jpeg", "testJPEG.jpg");
         assertTypeByData("image/jpeg", "testJPEG.jpg");
         assertTypeByName("image/jpeg", "x.jpg");
+        assertTypeByName("image/jpeg", "x.JPG");
         assertTypeByName("image/jpeg", "x.jpeg");
+        assertTypeByName("image/jpeg", "x.JPEG");
         assertTypeByName("image/jpeg", "x.jpe");
         assertTypeByName("image/jpeg", "x.jif");
         assertTypeByName("image/jpeg", "x.jfif");
@@ -108,6 +110,14 @@
         assertTypeByData("image/tiff", "testTIFF.tif");
         assertTypeByName("image/tiff", "x.tiff");
         assertTypeByName("image/tiff", "x.tif");
+        assertTypeByName("image/tiff", "x.TIF");
+    }
+
+    public void testGifDetection() throws Exception {
+        assertType("image/gif", "testGIF.gif");
+        assertTypeByData("image/gif", "testGIF.gif");
+        assertTypeByName("image/gif", "x.gif");
+        assertTypeByName("image/gif", "x.GIF");
     }
 
     /**