You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ju...@apache.org on 2009/02/03 22:00:39 UTC

svn commit: r740440 - in /lucene/tika/trunk/src: main/resources/mime/tika-mimetypes.xml test/java/org/apache/tika/mime/TestMimeTypes.java

Author: jukka
Date: Tue Feb  3 21:00:38 2009
New Revision: 740440

URL: http://svn.apache.org/viewvc?rev=740440&view=rev
Log:
TIKA-192: Add glob and magic patterns for image types

Improved TIFF type information, plus relevant tests.

Modified:
    lucene/tika/trunk/src/main/resources/mime/tika-mimetypes.xml
    lucene/tika/trunk/src/test/java/org/apache/tika/mime/TestMimeTypes.java

Modified: lucene/tika/trunk/src/main/resources/mime/tika-mimetypes.xml
URL: http://svn.apache.org/viewvc/lucene/tika/trunk/src/main/resources/mime/tika-mimetypes.xml?rev=740440&r1=740439&r2=740440&view=diff
==============================================================================
--- lucene/tika/trunk/src/main/resources/mime/tika-mimetypes.xml (original)
+++ lucene/tika/trunk/src/main/resources/mime/tika-mimetypes.xml Tue Feb  3 21:00:38 2009
@@ -648,10 +648,15 @@
   </mime-type>
 
   <mime-type type="image/tiff">
+    <comment>Tagged Image File Format</comment>
     <magic priority="50">
-      <match value="0x4d4d2a00" type="string" offset="0" />
+      <!-- MM.* = Big endian (M=Motorola) and 0x002a in big endian    -->
+      <match value="0x4d4d002a" type="string" offset="0" />
+      <!-- II*. = Little endian (I=Intel) and 0x002a in little endian -->
       <match value="0x49492a00" type="string" offset="0" />
     </magic>
+    <glob pattern="*.tiff" />
+    <glob pattern="*.tif" />
   </mime-type>
 
   <mime-type type="message/rfc822">
@@ -1014,6 +1019,7 @@
     </magic>
     <glob pattern="*.gif" />
   </mime-type>
+
   <mime-type type="image/tiff">
     <magic priority="50">
       <match value="MM\x00\x2a" type="string" offset="0" />
@@ -1022,6 +1028,7 @@
     <glob pattern="*.tiff" />
     <glob pattern="*.tif" />
   </mime-type>
+
   <mime-type type="image/x-ms-bmp">
     <glob pattern="*.bmp" />
   </mime-type>

Modified: lucene/tika/trunk/src/test/java/org/apache/tika/mime/TestMimeTypes.java
URL: http://svn.apache.org/viewvc/lucene/tika/trunk/src/test/java/org/apache/tika/mime/TestMimeTypes.java?rev=740440&r1=740439&r2=740440&view=diff
==============================================================================
--- lucene/tika/trunk/src/test/java/org/apache/tika/mime/TestMimeTypes.java (original)
+++ lucene/tika/trunk/src/test/java/org/apache/tika/mime/TestMimeTypes.java Tue Feb  3 21:00:38 2009
@@ -90,7 +90,11 @@
         assertTypeByName("application/zip", "x.zip");
         assertTypeByName("application/vnd.oasis.opendocument.text", "x.odt");
         assertTypeByName("application/octet-stream", "x.xyz");
+    }
 
+    public void testJpegDetection() throws Exception {
+        assertType("image/jpeg", "testJPEG.jpg");
+        assertTypeByData("image/jpeg", "testJPEG.jpg");
         assertTypeByName("image/jpeg", "x.jpg");
         assertTypeByName("image/jpeg", "x.jpeg");
         assertTypeByName("image/jpeg", "x.jpe");
@@ -99,6 +103,13 @@
         assertTypeByName("image/jpeg", "x.jfi");
     }
 
+    public void testTiffDetection() throws Exception {
+        assertType("image/tiff", "testTIFF.tif");
+        assertTypeByData("image/tiff", "testTIFF.tif");
+        assertTypeByName("image/tiff", "x.tiff");
+        assertTypeByName("image/tiff", "x.tif");
+    }
+
     /**
      * Tests MimeTypes.getMimeType(URL), which examines both the byte header
      * and, if necessary, the URL's extension.
@@ -146,4 +157,17 @@
         metadata.set(Metadata.RESOURCE_NAME_KEY, filename);
         assertEquals(expected, repo.detect(null, metadata).toString());
     }
+
+    private void assertTypeByData(String expected, String filename)
+            throws IOException {
+        InputStream stream = TestMimeTypes.class.getResourceAsStream(
+                "/test-documents/" + filename);
+        try {
+            Metadata metadata = new Metadata();
+            assertEquals(expected, repo.detect(stream, metadata).toString());
+        } finally {
+            stream.close();
+        }
+    }
+
 }