You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ju...@apache.org on 2009/02/03 22:00:39 UTC
svn commit: r740440 - in /lucene/tika/trunk/src:
main/resources/mime/tika-mimetypes.xml
test/java/org/apache/tika/mime/TestMimeTypes.java
Author: jukka
Date: Tue Feb 3 21:00:38 2009
New Revision: 740440
URL: http://svn.apache.org/viewvc?rev=740440&view=rev
Log:
TIKA-192: Add glob and magic patterns for image types
Improved TIFF type information, plus relevant tests.
Modified:
lucene/tika/trunk/src/main/resources/mime/tika-mimetypes.xml
lucene/tika/trunk/src/test/java/org/apache/tika/mime/TestMimeTypes.java
Modified: lucene/tika/trunk/src/main/resources/mime/tika-mimetypes.xml
URL: http://svn.apache.org/viewvc/lucene/tika/trunk/src/main/resources/mime/tika-mimetypes.xml?rev=740440&r1=740439&r2=740440&view=diff
==============================================================================
--- lucene/tika/trunk/src/main/resources/mime/tika-mimetypes.xml (original)
+++ lucene/tika/trunk/src/main/resources/mime/tika-mimetypes.xml Tue Feb 3 21:00:38 2009
@@ -648,10 +648,15 @@
</mime-type>
<mime-type type="image/tiff">
+ <comment>Tagged Image File Format</comment>
<magic priority="50">
- <match value="0x4d4d2a00" type="string" offset="0" />
+ <!-- MM.* = Big endian (M=Motorola) and 0x002a in big endian -->
+ <match value="0x4d4d002a" type="string" offset="0" />
+ <!-- II*. = Little endian (I=Intel) and 0x002a in little endian -->
<match value="0x49492a00" type="string" offset="0" />
</magic>
+ <glob pattern="*.tiff" />
+ <glob pattern="*.tif" />
</mime-type>
<mime-type type="message/rfc822">
@@ -1014,6 +1019,7 @@
</magic>
<glob pattern="*.gif" />
</mime-type>
+
<mime-type type="image/tiff">
<magic priority="50">
<match value="MM\x00\x2a" type="string" offset="0" />
@@ -1022,6 +1028,7 @@
<glob pattern="*.tiff" />
<glob pattern="*.tif" />
</mime-type>
+
<mime-type type="image/x-ms-bmp">
<glob pattern="*.bmp" />
</mime-type>
Modified: lucene/tika/trunk/src/test/java/org/apache/tika/mime/TestMimeTypes.java
URL: http://svn.apache.org/viewvc/lucene/tika/trunk/src/test/java/org/apache/tika/mime/TestMimeTypes.java?rev=740440&r1=740439&r2=740440&view=diff
==============================================================================
--- lucene/tika/trunk/src/test/java/org/apache/tika/mime/TestMimeTypes.java (original)
+++ lucene/tika/trunk/src/test/java/org/apache/tika/mime/TestMimeTypes.java Tue Feb 3 21:00:38 2009
@@ -90,7 +90,11 @@
assertTypeByName("application/zip", "x.zip");
assertTypeByName("application/vnd.oasis.opendocument.text", "x.odt");
assertTypeByName("application/octet-stream", "x.xyz");
+ }
+ public void testJpegDetection() throws Exception {
+ assertType("image/jpeg", "testJPEG.jpg");
+ assertTypeByData("image/jpeg", "testJPEG.jpg");
assertTypeByName("image/jpeg", "x.jpg");
assertTypeByName("image/jpeg", "x.jpeg");
assertTypeByName("image/jpeg", "x.jpe");
@@ -99,6 +103,13 @@
assertTypeByName("image/jpeg", "x.jfi");
}
+ public void testTiffDetection() throws Exception {
+ assertType("image/tiff", "testTIFF.tif");
+ assertTypeByData("image/tiff", "testTIFF.tif");
+ assertTypeByName("image/tiff", "x.tiff");
+ assertTypeByName("image/tiff", "x.tif");
+ }
+
/**
* Tests MimeTypes.getMimeType(URL), which examines both the byte header
* and, if necessary, the URL's extension.
@@ -146,4 +157,17 @@
metadata.set(Metadata.RESOURCE_NAME_KEY, filename);
assertEquals(expected, repo.detect(null, metadata).toString());
}
+
+ private void assertTypeByData(String expected, String filename)
+ throws IOException {
+ InputStream stream = TestMimeTypes.class.getResourceAsStream(
+ "/test-documents/" + filename);
+ try {
+ Metadata metadata = new Metadata();
+ assertEquals(expected, repo.detect(stream, metadata).toString());
+ } finally {
+ stream.close();
+ }
+ }
+
}