You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ju...@apache.org on 2009/02/03 22:11:24 UTC
svn commit: r740449 - in /lucene/tika/trunk/src:
main/resources/mime/tika-mimetypes.xml
test/java/org/apache/tika/mime/TestMimeTypes.java
Author: jukka
Date: Tue Feb 3 21:11:22 2009
New Revision: 740449
URL: http://svn.apache.org/viewvc?rev=740449&view=rev
Log:
TIKA-192: Add glob and magic patterns for image types
Improved GIF type information, plus relevant tests.
Cleaned up JPEG and TIFF type information
Modified:
lucene/tika/trunk/src/main/resources/mime/tika-mimetypes.xml
lucene/tika/trunk/src/test/java/org/apache/tika/mime/TestMimeTypes.java
Modified: lucene/tika/trunk/src/main/resources/mime/tika-mimetypes.xml
URL: http://svn.apache.org/viewvc/lucene/tika/trunk/src/main/resources/mime/tika-mimetypes.xml?rev=740449&r1=740448&r2=740449&view=diff
==============================================================================
--- lucene/tika/trunk/src/main/resources/mime/tika-mimetypes.xml (original)
+++ lucene/tika/trunk/src/main/resources/mime/tika-mimetypes.xml Tue Feb 3 21:11:22 2009
@@ -647,18 +647,6 @@
<alias type="audio/x-realaudio" />
</mime-type>
- <mime-type type="image/tiff">
- <comment>Tagged Image File Format</comment>
- <magic priority="50">
- <!-- MM.* = Big endian (M=Motorola) and 0x002a in big endian -->
- <match value="0x4d4d002a" type="string" offset="0" />
- <!-- II*. = Little endian (I=Intel) and 0x002a in little endian -->
- <match value="0x49492a00" type="string" offset="0" />
- </magic>
- <glob pattern="*.tiff" />
- <glob pattern="*.tif" />
- </mime-type>
-
<mime-type type="message/rfc822">
<magic priority="50">
<match type="string" value="Relay-Version:" offset="0" />
@@ -780,6 +768,8 @@
</mime-type>
<mime-type type="image/jpeg">
+ <acronym>JPEG</acronym>
+ <comment>Joint Photographic Experts Group</comment>
<magic priority="50">
<!-- FFD8 is the SOI (Start Of Image) marker. -->
<!-- It is followed by another marker that starts with FF. -->
@@ -1013,16 +1003,24 @@
</magic>
<glob pattern="*.ra" />
</mime-type>
+
<mime-type type="image/gif">
+ <acronym>GIF</acronym>
+ <comment>Graphics Interchange Format</comment>
<magic priority="50">
- <match value="GIF8" type="string" offset="0" />
+ <match value="GIF87a" type="string" offset="0" />
+ <match value="GIF89a" type="string" offset="0" />
</magic>
<glob pattern="*.gif" />
</mime-type>
<mime-type type="image/tiff">
+ <acronym>TIFF</acronym>
+ <comment>Tagged Image File Format</comment>
<magic priority="50">
+ <!-- MM.* = Big endian (M=Motorola) and 0x002a in big endian -->
<match value="MM\x00\x2a" type="string" offset="0" />
+ <!-- II*. = Little endian (I=Intel) and 0x002a in little endian -->
<match value="II\x2a\x00" type="string" offset="0" />
</magic>
<glob pattern="*.tiff" />
Modified: lucene/tika/trunk/src/test/java/org/apache/tika/mime/TestMimeTypes.java
URL: http://svn.apache.org/viewvc/lucene/tika/trunk/src/test/java/org/apache/tika/mime/TestMimeTypes.java?rev=740449&r1=740448&r2=740449&view=diff
==============================================================================
--- lucene/tika/trunk/src/test/java/org/apache/tika/mime/TestMimeTypes.java (original)
+++ lucene/tika/trunk/src/test/java/org/apache/tika/mime/TestMimeTypes.java Tue Feb 3 21:11:22 2009
@@ -96,7 +96,9 @@
assertType("image/jpeg", "testJPEG.jpg");
assertTypeByData("image/jpeg", "testJPEG.jpg");
assertTypeByName("image/jpeg", "x.jpg");
+ assertTypeByName("image/jpeg", "x.JPG");
assertTypeByName("image/jpeg", "x.jpeg");
+ assertTypeByName("image/jpeg", "x.JPEG");
assertTypeByName("image/jpeg", "x.jpe");
assertTypeByName("image/jpeg", "x.jif");
assertTypeByName("image/jpeg", "x.jfif");
@@ -108,6 +110,14 @@
assertTypeByData("image/tiff", "testTIFF.tif");
assertTypeByName("image/tiff", "x.tiff");
assertTypeByName("image/tiff", "x.tif");
+ assertTypeByName("image/tiff", "x.TIF");
+ }
+
+ public void testGifDetection() throws Exception {
+ assertType("image/gif", "testGIF.gif");
+ assertTypeByData("image/gif", "testGIF.gif");
+ assertTypeByName("image/gif", "x.gif");
+ assertTypeByName("image/gif", "x.GIF");
}
/**