You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ni...@apache.org on 2017/01/23 18:39:26 UTC

[1/3] tika git commit: TIKA-2250 As of RFC7903, the official mime type for BMP is now the one without the x- prefix

Repository: tika
Updated Branches:
  refs/heads/master 4cc15e2a3 -> 90bf4f6e4


TIKA-2250 As of RFC7903, the official mime type for BMP is now the one without the x- prefix


Project: http://git-wip-us.apache.org/repos/asf/tika/repo
Commit: http://git-wip-us.apache.org/repos/asf/tika/commit/847156ac
Tree: http://git-wip-us.apache.org/repos/asf/tika/tree/847156ac
Diff: http://git-wip-us.apache.org/repos/asf/tika/diff/847156ac

Branch: refs/heads/master
Commit: 847156ac0f5fa7d4cc06964198359cf594b66d50
Parents: 4cc15e2
Author: Nick Burch <ni...@gagravarr.org>
Authored: Mon Jan 23 18:20:44 2017 +0000
Committer: Nick Burch <ni...@gagravarr.org>
Committed: Mon Jan 23 18:20:44 2017 +0000

----------------------------------------------------------------------
 .../org/apache/tika/mime/tika-mimetypes.xml         |  5 +++--
 .../java/org/apache/tika/TikaDetectionTest.java     |  3 ++-
 .../org/apache/tika/mime/MimeTypesReaderTest.java   |  2 +-
 .../org/apache/tika/parser/CompositeParserTest.java |  4 ++--
 .../org/apache/tika/parser/image/ImageParser.java   | 16 ++++++++--------
 .../apache/tika/parser/ocr/TesseractOCRParser.java  |  2 +-
 .../java/org/apache/tika/mime/TestMimeTypes.java    | 12 ++++++------
 .../apache/tika/parser/AutoDetectParserTest.java    |  2 +-
 .../org/apache/tika/server/TikaMimeTypesTest.java   | 13 +++++++------
 9 files changed, 31 insertions(+), 28 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/tika/blob/847156ac/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml
----------------------------------------------------------------------
diff --git a/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml b/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml
index 10a7c15..27b3e99 100644
--- a/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml
+++ b/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml
@@ -4852,8 +4852,9 @@
     <glob pattern="*.xyz"/>
   </mime-type>
 
-  <mime-type type="image/x-ms-bmp">
-    <alias type="image/bmp"/>
+  <mime-type type="image/bmp">
+    <alias type="image/x-bmp"/>
+    <alias type="image/x-ms-bmp"/>
     <acronym>BMP</acronym>
     <_comment>Windows bitmap</_comment>
     <tika:link>http://en.wikipedia.org/wiki/BMP_file_format</tika:link>

http://git-wip-us.apache.org/repos/asf/tika/blob/847156ac/tika-core/src/test/java/org/apache/tika/TikaDetectionTest.java
----------------------------------------------------------------------
diff --git a/tika-core/src/test/java/org/apache/tika/TikaDetectionTest.java b/tika-core/src/test/java/org/apache/tika/TikaDetectionTest.java
index b82f010..ea8faf5 100644
--- a/tika-core/src/test/java/org/apache/tika/TikaDetectionTest.java
+++ b/tika-core/src/test/java/org/apache/tika/TikaDetectionTest.java
@@ -682,7 +682,8 @@ public class TikaDetectionTest {
         assertEquals("chemical/x-cml", tika.detect("x.cml"));
         assertEquals("chemical/x-csml", tika.detect("x.csml"));
         assertEquals("chemical/x-xyz", tika.detect("x.xyz"));
-        assertEquals("image/x-ms-bmp", tika.detect("x.bmp"));
+        // Differ from httpd - bmp was properly registered in RFC 7903
+        //assertEquals("image/x-ms-bmp", tika.detect("x.bmp"));
         assertEquals("image/cgm", tika.detect("x.cgm"));
         assertEquals("image/g3fax", tika.detect("x.g3"));
         assertEquals("image/gif", tika.detect("x.gif"));

http://git-wip-us.apache.org/repos/asf/tika/blob/847156ac/tika-core/src/test/java/org/apache/tika/mime/MimeTypesReaderTest.java
----------------------------------------------------------------------
diff --git a/tika-core/src/test/java/org/apache/tika/mime/MimeTypesReaderTest.java b/tika-core/src/test/java/org/apache/tika/mime/MimeTypesReaderTest.java
index a5681b3..bddaf1a 100644
--- a/tika-core/src/test/java/org/apache/tika/mime/MimeTypesReaderTest.java
+++ b/tika-core/src/test/java/org/apache/tika/mime/MimeTypesReaderTest.java
@@ -131,7 +131,7 @@ public class MimeTypesReaderTest {
      */
     @Test
     public void testReadExtendedMetadata() throws Exception {
-        MimeType mime = this.mimeTypes.forName("image/x-ms-bmp");
+        MimeType mime = this.mimeTypes.forName("image/bmp");
         assertEquals("BMP", mime.getAcronym());
         assertEquals("com.microsoft.bmp", mime.getUniformTypeIdentifier());
         assertEquals("http://en.wikipedia.org/wiki/BMP_file_format", 

http://git-wip-us.apache.org/repos/asf/tika/blob/847156ac/tika-core/src/test/java/org/apache/tika/parser/CompositeParserTest.java
----------------------------------------------------------------------
diff --git a/tika-core/src/test/java/org/apache/tika/parser/CompositeParserTest.java b/tika-core/src/test/java/org/apache/tika/parser/CompositeParserTest.java
index 6a2d52d..c320eae 100644
--- a/tika-core/src/test/java/org/apache/tika/parser/CompositeParserTest.java
+++ b/tika-core/src/test/java/org/apache/tika/parser/CompositeParserTest.java
@@ -81,7 +81,7 @@ public class CompositeParserTest {
 
     @Test
     public void testMimeTypeAliases() throws Exception {
-       MediaType bmpCanonical = MediaType.image("x-ms-bmp");
+       MediaType bmpCanonical = MediaType.image("bmp");
        Map<String,String> bmpCanonicalMetadata = new HashMap<String, String>();
        bmpCanonicalMetadata.put("BMP", "True");
        bmpCanonicalMetadata.put("Canonical", "True");
@@ -90,7 +90,7 @@ public class CompositeParserTest {
              bmpCanonicalMetadata, null
        );
        
-       MediaType bmpAlias = MediaType.image("bmp");
+       MediaType bmpAlias = MediaType.image("x-ms-bmp");
        Map<String,String> bmpAliasMetadata = new HashMap<String, String>();
        bmpAliasMetadata.put("BMP", "True");
        bmpAliasMetadata.put("Alias", "True");

http://git-wip-us.apache.org/repos/asf/tika/blob/847156ac/tika-parsers/src/main/java/org/apache/tika/parser/image/ImageParser.java
----------------------------------------------------------------------
diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/image/ImageParser.java b/tika-parsers/src/main/java/org/apache/tika/parser/image/ImageParser.java
index 321e9e9..236d2d0 100644
--- a/tika-parsers/src/main/java/org/apache/tika/parser/image/ImageParser.java
+++ b/tika-parsers/src/main/java/org/apache/tika/parser/image/ImageParser.java
@@ -54,15 +54,15 @@ public class ImageParser extends AbstractParser {
 
     private static final Logger LOGGER = Logger.getLogger(ImageParser.class.getName());
 
-    private static final MediaType CANONICAL_BMP_TYPE = MediaType.image("x-ms-bmp");
-    private static final MediaType JAVA_BMP_TYPE = MediaType.image("bmp");
+    private static final MediaType MAIN_BMP_TYPE = MediaType.image("bmp");
+    private static final MediaType OLD_BMP_TYPE = MediaType.image("x-ms-bmp");
 
     private static final Set<MediaType> TMP_SUPPORTED;
 
     static {
         TMP_SUPPORTED = new HashSet<MediaType>(Arrays.asList(
-                CANONICAL_BMP_TYPE,
-                JAVA_BMP_TYPE,
+                MAIN_BMP_TYPE,
+                OLD_BMP_TYPE,
                 MediaType.image("gif"),
                 MediaType.image("png"),
                 MediaType.image("vnd.wap.wbmp"),
@@ -171,10 +171,10 @@ public class ImageParser extends AbstractParser {
             throws IOException, SAXException, TikaException {
         String type = metadata.get(Metadata.CONTENT_TYPE);
         if (type != null) {
-            // Java has a different idea of the BMP mime type to
-            //  what the canonical one is, fix this up.
-            if (CANONICAL_BMP_TYPE.toString().equals(type)) {
-                type = JAVA_BMP_TYPE.toString();
+            // If the old (pre-RFC7903) BMP mime type is given,
+            //  fix it up to the new one, so Java is happy
+            if (OLD_BMP_TYPE.toString().equals(type)) {
+                type = MAIN_BMP_TYPE.toString();
             }
 
             try {

http://git-wip-us.apache.org/repos/asf/tika/blob/847156ac/tika-parsers/src/main/java/org/apache/tika/parser/ocr/TesseractOCRParser.java
----------------------------------------------------------------------
diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/ocr/TesseractOCRParser.java b/tika-parsers/src/main/java/org/apache/tika/parser/ocr/TesseractOCRParser.java
index 46a3f55..cbbbcf2 100644
--- a/tika-parsers/src/main/java/org/apache/tika/parser/ocr/TesseractOCRParser.java
+++ b/tika-parsers/src/main/java/org/apache/tika/parser/ocr/TesseractOCRParser.java
@@ -94,7 +94,7 @@ public class TesseractOCRParser extends AbstractParser {
     private static final Set<MediaType> SUPPORTED_TYPES = Collections.unmodifiableSet(
             new HashSet<MediaType>(Arrays.asList(new MediaType[] {
                     MediaType.image("png"), MediaType.image("jpeg"), MediaType.image("tiff"),
-                    MediaType.image("x-ms-bmp"), MediaType.image("gif"), MediaType.image("jp2"),
+                    MediaType.image("bmp"), MediaType.image("gif"), MediaType.image("jp2"),
                     MediaType.image("jpx"), MediaType.image("x-portable-pixmap")
             })));
     private static Map<String,Boolean> TESSERACT_PRESENT = new HashMap<String, Boolean>();

http://git-wip-us.apache.org/repos/asf/tika/blob/847156ac/tika-parsers/src/test/java/org/apache/tika/mime/TestMimeTypes.java
----------------------------------------------------------------------
diff --git a/tika-parsers/src/test/java/org/apache/tika/mime/TestMimeTypes.java b/tika-parsers/src/test/java/org/apache/tika/mime/TestMimeTypes.java
index b384cda..904007d 100644
--- a/tika-parsers/src/test/java/org/apache/tika/mime/TestMimeTypes.java
+++ b/tika-parsers/src/test/java/org/apache/tika/mime/TestMimeTypes.java
@@ -470,12 +470,12 @@ public class TestMimeTypes {
 
     @Test
     public void testBmpDetection() throws Exception {
-        assertType("image/x-ms-bmp", "testBMP.bmp");
-        assertTypeByData("image/x-ms-bmp", "testBMP.bmp");
-        assertTypeByName("image/x-ms-bmp", "x.bmp");
-        assertTypeByName("image/x-ms-bmp", "x.BMP");
-        assertTypeByName("image/x-ms-bmp", "x.dib");
-        assertTypeByName("image/x-ms-bmp", "x.DIB");
+        assertType("image/bmp", "testBMP.bmp");
+        assertTypeByData("image/bmp", "testBMP.bmp");
+        assertTypeByName("image/bmp", "x.bmp");
+        assertTypeByName("image/bmp", "x.BMP");
+        assertTypeByName("image/bmp", "x.dib");
+        assertTypeByName("image/bmp", "x.DIB");
         //false positive check -- contains part of BMP signature
         assertType("text/plain", "testBMPfp.txt");
     }

http://git-wip-us.apache.org/repos/asf/tika/blob/847156ac/tika-parsers/src/test/java/org/apache/tika/parser/AutoDetectParserTest.java
----------------------------------------------------------------------
diff --git a/tika-parsers/src/test/java/org/apache/tika/parser/AutoDetectParserTest.java b/tika-parsers/src/test/java/org/apache/tika/parser/AutoDetectParserTest.java
index 91b054e..817308f 100644
--- a/tika-parsers/src/test/java/org/apache/tika/parser/AutoDetectParserTest.java
+++ b/tika-parsers/src/test/java/org/apache/tika/parser/AutoDetectParserTest.java
@@ -64,7 +64,7 @@ public class AutoDetectParserTest {
     private static final String WORD       = "application/msword";
     private static final String XML        = "application/xml";
     private static final String RSS        = "application/rss+xml";
-    private static final String BMP        = "image/x-ms-bmp";
+    private static final String BMP        = "image/bmp";
     private static final String GIF        = "image/gif";
     private static final String JPEG       = "image/jpeg";
     private static final String PNG        = "image/png";

http://git-wip-us.apache.org/repos/asf/tika/blob/847156ac/tika-server/src/test/java/org/apache/tika/server/TikaMimeTypesTest.java
----------------------------------------------------------------------
diff --git a/tika-server/src/test/java/org/apache/tika/server/TikaMimeTypesTest.java b/tika-server/src/test/java/org/apache/tika/server/TikaMimeTypesTest.java
index 929dbc5..b0b47fc 100644
--- a/tika-server/src/test/java/org/apache/tika/server/TikaMimeTypesTest.java
+++ b/tika-server/src/test/java/org/apache/tika/server/TikaMimeTypesTest.java
@@ -63,7 +63,7 @@ public class TikaMimeTypesTest extends CXFTestBase {
 
         assertContains("supertype: video/ogg", text);
 
-        assertContains("alias:     image/bmp", text);
+        assertContains("alias:     image/x-ms-bmp", text);
     }
 
     @Test
@@ -84,7 +84,7 @@ public class TikaMimeTypesTest extends CXFTestBase {
 
         assertContains("Super Type: <a href=\"#video/ogg\">video/ogg", text);
 
-        assertContains("Alias: image/bmp", text);
+        assertContains("Alias: image/x-ms-bmp", text);
     }
 
     @Test
@@ -102,13 +102,14 @@ public class TikaMimeTypesTest extends CXFTestBase {
         assertEquals(true, json.containsKey("text/plain"));
         assertEquals(true, json.containsKey("application/xml"));
         assertEquals(true, json.containsKey("video/x-ogm"));
-        assertEquals(true, json.containsKey("image/x-ms-bmp"));
+        assertEquals(true, json.containsKey("image/bmp"));
 
-        Map<String, Object> bmp = json.get("image/x-ms-bmp");
+        Map<String, Object> bmp = json.get("image/bmp");
         assertEquals(true, bmp.containsKey("alias"));
         Object[] aliases = (Object[]) bmp.get("alias");
-        assertEquals(1, aliases.length);
-        assertEquals("image/bmp", aliases[0]);
+        assertEquals(2, aliases.length);
+        assertEquals("image/x-bmp", aliases[0]);
+        assertEquals("image/x-ms-bmp", aliases[1]);
 
         String whichParser = bmp.get("parser").toString();
         assertTrue("Which parser", whichParser.equals("org.apache.tika.parser.ocr.TesseractOCRParser") ||


[2/3] tika git commit: TIKA-2250 As of RFC7903, the official mime type for WMF is now an image one and without the x- prefix

Posted by ni...@apache.org.
TIKA-2250 As of RFC7903, the official mime type for WMF is now an image one and without the x- prefix


Project: http://git-wip-us.apache.org/repos/asf/tika/repo
Commit: http://git-wip-us.apache.org/repos/asf/tika/commit/e6c0082e
Tree: http://git-wip-us.apache.org/repos/asf/tika/tree/e6c0082e
Diff: http://git-wip-us.apache.org/repos/asf/tika/diff/e6c0082e

Branch: refs/heads/master
Commit: e6c0082e41143a01f0bf646a8a8b6c06a85ca239
Parents: 847156a
Author: Nick Burch <ni...@gagravarr.org>
Authored: Mon Jan 23 18:27:02 2017 +0000
Committer: Nick Burch <ni...@gagravarr.org>
Committed: Mon Jan 23 18:27:02 2017 +0000

----------------------------------------------------------------------
 .../org/apache/tika/mime/tika-mimetypes.xml     | 22 +++++++++++---------
 .../java/org/apache/tika/TikaDetectionTest.java |  3 ++-
 .../tika/parser/microsoft/HSLFExtractor.java    |  2 +-
 .../org/apache/tika/mime/TestMimeTypes.java     |  6 +++---
 .../AbstractPOIContainerExtractionTest.java     |  2 +-
 .../apache/tika/parser/rtf/RTFParserTest.java   |  2 +-
 6 files changed, 20 insertions(+), 17 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/tika/blob/e6c0082e/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml
----------------------------------------------------------------------
diff --git a/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml b/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml
index 27b3e99..939f4cb 100644
--- a/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml
+++ b/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml
@@ -3703,16 +3703,6 @@
     <glob pattern="*.m13"/>
     <glob pattern="*.m14"/>
   </mime-type>
-  <mime-type type="application/x-msmetafile">
-    <alias type="image/x-wmf"/>
-    <acronym>WMF</acronym>
-    <_comment>Windows Metafile</_comment>
-    <glob pattern="*.wmf"/>
-    <magic priority="50">
-      <match value="0xd7cdc69a0000" type="string" offset="0"/>
-      <match value="0x010009000003" type="string" offset="0"/>
-    </magic>
-  </mime-type>
   <mime-type type="application/x-msmoney">
     <glob pattern="*.mny"/>
     <magic priority="60">
@@ -5204,6 +5194,18 @@
     <glob pattern="*.webp"/>
   </mime-type>
 
+  <mime-type type="image/wmf">
+    <alias type="image/x-wmf"/>
+    <alias type="application/x-msmetafile"/>
+    <acronym>WMF</acronym>
+    <_comment>Windows Metafile</_comment>
+    <glob pattern="*.wmf"/>
+    <magic priority="50">
+      <match value="0xd7cdc69a0000" type="string" offset="0"/>
+      <match value="0x010009000003" type="string" offset="0"/>
+    </magic>
+  </mime-type>
+
   <mime-type type="image/vnd.xiff">
     <glob pattern="*.xif"/>
   </mime-type>

http://git-wip-us.apache.org/repos/asf/tika/blob/e6c0082e/tika-core/src/test/java/org/apache/tika/TikaDetectionTest.java
----------------------------------------------------------------------
diff --git a/tika-core/src/test/java/org/apache/tika/TikaDetectionTest.java b/tika-core/src/test/java/org/apache/tika/TikaDetectionTest.java
index ea8faf5..cf242cf 100644
--- a/tika-core/src/test/java/org/apache/tika/TikaDetectionTest.java
+++ b/tika-core/src/test/java/org/apache/tika/TikaDetectionTest.java
@@ -590,7 +590,8 @@ public class TikaDetectionTest {
         assertEquals("application/x-msmediaview", tika.detect("x.mvb"));
         assertEquals("application/x-msmediaview", tika.detect("x.m13"));
         assertEquals("application/x-msmediaview", tika.detect("x.m14"));
-        assertEquals("application/x-msmetafile", tika.detect("x.wmf"));
+        // Differ from httpd - wmf was properly registered in RFC 7903
+        //assertEquals("application/x-msmetafile", tika.detect("x.wmf"));
         assertEquals("application/x-msmoney", tika.detect("x.mny"));
         assertEquals("application/x-mspublisher", tika.detect("x.pub"));
         assertEquals("application/x-msschedule", tika.detect("x.scd"));

http://git-wip-us.apache.org/repos/asf/tika/blob/e6c0082e/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/HSLFExtractor.java
----------------------------------------------------------------------
diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/HSLFExtractor.java b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/HSLFExtractor.java
index 6fc949e..8457ec3 100644
--- a/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/HSLFExtractor.java
+++ b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/HSLFExtractor.java
@@ -330,7 +330,7 @@ public class HSLFExtractor extends AbstractPOIFSExtractor {
                     mediaType = "application/x-emf";
                     break;
                 case WMF:
-                    mediaType = "application/x-msmetafile";
+                    mediaType = "image/wmf";
                     break;
                 case DIB:
                     mediaType = "image/bmp";

http://git-wip-us.apache.org/repos/asf/tika/blob/e6c0082e/tika-parsers/src/test/java/org/apache/tika/mime/TestMimeTypes.java
----------------------------------------------------------------------
diff --git a/tika-parsers/src/test/java/org/apache/tika/mime/TestMimeTypes.java b/tika-parsers/src/test/java/org/apache/tika/mime/TestMimeTypes.java
index 904007d..4c5dd7c 100644
--- a/tika-parsers/src/test/java/org/apache/tika/mime/TestMimeTypes.java
+++ b/tika-parsers/src/test/java/org/apache/tika/mime/TestMimeTypes.java
@@ -630,9 +630,9 @@ public class TestMimeTypes {
 
     @Test
     public void testWmfDetection() throws Exception {
-        assertTypeByName("application/x-msmetafile", "x.wmf");
-        assertTypeByData("application/x-msmetafile", "testWMF.wmf");
-        assertTypeByName("application/x-msmetafile", "x.WMF");
+        assertTypeByName("image/wmf", "x.wmf");
+        assertTypeByData("image/wmf", "testWMF.wmf");
+        assertTypeByName("image/wmf", "x.WMF");
 
         assertTypeByName("application/x-emf", "x.emf");
         assertTypeByData("application/x-emf","testEMF.emf");

http://git-wip-us.apache.org/repos/asf/tika/blob/e6c0082e/tika-parsers/src/test/java/org/apache/tika/parser/microsoft/AbstractPOIContainerExtractionTest.java
----------------------------------------------------------------------
diff --git a/tika-parsers/src/test/java/org/apache/tika/parser/microsoft/AbstractPOIContainerExtractionTest.java b/tika-parsers/src/test/java/org/apache/tika/parser/microsoft/AbstractPOIContainerExtractionTest.java
index f454446..1a2940d 100644
--- a/tika-parsers/src/test/java/org/apache/tika/parser/microsoft/AbstractPOIContainerExtractionTest.java
+++ b/tika-parsers/src/test/java/org/apache/tika/parser/microsoft/AbstractPOIContainerExtractionTest.java
@@ -46,7 +46,7 @@ public abstract class AbstractPOIContainerExtractionTest extends TikaTest {
     public static final MediaType TYPE_GIF = MediaType.image("gif");
     public static final MediaType TYPE_PNG = MediaType.image("png");
     public static final MediaType TYPE_EMF = MediaType.application("x-emf");
-    public static final MediaType TYPE_WMF = MediaType.application("x-msmetafile");
+    public static final MediaType TYPE_WMF = MediaType.image("wmf");
 
     protected static TikaInputStream getTestFile(String filename) throws Exception {
         URL input = AbstractPOIContainerExtractionTest.class.getResource(

http://git-wip-us.apache.org/repos/asf/tika/blob/e6c0082e/tika-parsers/src/test/java/org/apache/tika/parser/rtf/RTFParserTest.java
----------------------------------------------------------------------
diff --git a/tika-parsers/src/test/java/org/apache/tika/parser/rtf/RTFParserTest.java b/tika-parsers/src/test/java/org/apache/tika/parser/rtf/RTFParserTest.java
index bb42361..dc473a1 100644
--- a/tika-parsers/src/test/java/org/apache/tika/parser/rtf/RTFParserTest.java
+++ b/tika-parsers/src/test/java/org/apache/tika/parser/rtf/RTFParserTest.java
@@ -503,7 +503,7 @@ public class RTFParserTest extends TikaTest {
     public void testEmbeddedLinkedDocument() throws Exception {
         Set<MediaType> skipTypes = new HashSet<MediaType>();
         skipTypes.add(MediaType.parse("application/x-emf"));
-        skipTypes.add(MediaType.parse("application/x-msmetafile"));
+        skipTypes.add(MediaType.parse("image/wmf"));
 
         TrackingHandler tracker = new TrackingHandler(skipTypes);
         try (TikaInputStream tis = TikaInputStream.get(getResourceAsStream("/test-documents/testRTFEmbeddedLink.rtf"))) {


[3/3] tika git commit: TIKA-2250 As of RFC7903, the official mime type for EMF is now an image one and without the x- prefix

Posted by ni...@apache.org.
TIKA-2250 As of RFC7903, the official mime type for EMF is now an image one and without the x- prefix


Project: http://git-wip-us.apache.org/repos/asf/tika/repo
Commit: http://git-wip-us.apache.org/repos/asf/tika/commit/90bf4f6e
Tree: http://git-wip-us.apache.org/repos/asf/tika/tree/90bf4f6e
Diff: http://git-wip-us.apache.org/repos/asf/tika/diff/90bf4f6e

Branch: refs/heads/master
Commit: 90bf4f6e4c645240b36ded6973eb64961312fc0a
Parents: e6c0082
Author: Nick Burch <ni...@gagravarr.org>
Authored: Mon Jan 23 18:31:49 2017 +0000
Committer: Nick Burch <ni...@gagravarr.org>
Committed: Mon Jan 23 18:31:49 2017 +0000

----------------------------------------------------------------------
 CHANGES.txt                                     |  3 +++
 .../org/apache/tika/mime/tika-mimetypes.xml     | 26 +++++++++++---------
 .../tika/parser/microsoft/HSLFExtractor.java    |  2 +-
 .../org/apache/tika/mime/TestMimeTypes.java     |  7 +++---
 .../AbstractPOIContainerExtractionTest.java     |  2 +-
 .../apache/tika/parser/pdf/PDFParserTest.java   |  2 +-
 .../apache/tika/parser/rtf/RTFParserTest.java   |  2 +-
 7 files changed, 25 insertions(+), 19 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/tika/blob/90bf4f6e/CHANGES.txt
----------------------------------------------------------------------
diff --git a/CHANGES.txt b/CHANGES.txt
index b44dcf6..4256c69 100644
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@ -1,5 +1,8 @@
 Release 1.15 - ??
 
+  * Official mime types for BMP, EMF and WMF have been registered with
+    IANA, so switch to these (image/bmp image/emf image/wmf) (TIKA-2250)
+
   * Be more parsimonious with BufferedInputStreams via Josh Hight
     (TIKA-2244).
 

http://git-wip-us.apache.org/repos/asf/tika/blob/90bf4f6e/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml
----------------------------------------------------------------------
diff --git a/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml b/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml
index 939f4cb..854de62 100644
--- a/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml
+++ b/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml
@@ -3268,18 +3268,6 @@
     <glob pattern="*.exe"/>
   </mime-type>
 
-  <mime-type type="application/x-emf">
-    <acronym>EMF</acronym>
-    <_comment>Extended Metafile</_comment>
-    <tika:link>https://msdn.microsoft.com/en-us/library/cc230711.aspx</tika:link>
-    <glob pattern="*.emf"/>
-    <magic priority="50">
-      <match value="0x01000000" type="string" offset="0">
-        <match value="0x464D4520" type="little32" offset="40"/>
-      </match>
-    </magic>
-  </mime-type>
-
   <mime-type type="application/x-erdas-hfa">
     <magic priority="50">
       <match value="EHFA_HEADER_TAG" type="string" offset="0" />
@@ -4891,6 +4879,20 @@
     <glob pattern="*.cgm"/>
   </mime-type>
 
+  <mime-type type="image/emf">
+    <alias type="image/x-emf"/>
+    <alias type="application/x-emf"/>
+    <acronym>EMF</acronym>
+    <_comment>Enhanced Metafile</_comment>
+    <tika:link>https://msdn.microsoft.com/en-us/library/cc230711.aspx</tika:link>
+    <glob pattern="*.emf"/>
+    <magic priority="50">
+      <match value="0x01000000" type="string" offset="0">
+        <match value="0x464D4520" type="little32" offset="40"/>
+      </match>
+    </magic>
+  </mime-type>
+
   <mime-type type="image/example"/>
 
   <mime-type type="image/fits">

http://git-wip-us.apache.org/repos/asf/tika/blob/90bf4f6e/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/HSLFExtractor.java
----------------------------------------------------------------------
diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/HSLFExtractor.java b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/HSLFExtractor.java
index 8457ec3..c05fda0 100644
--- a/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/HSLFExtractor.java
+++ b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/HSLFExtractor.java
@@ -327,7 +327,7 @@ public class HSLFExtractor extends AbstractPOIFSExtractor {
 
             switch (pic.getType()) {
                 case EMF:
-                    mediaType = "application/x-emf";
+                    mediaType = "image/emf";
                     break;
                 case WMF:
                     mediaType = "image/wmf";

http://git-wip-us.apache.org/repos/asf/tika/blob/90bf4f6e/tika-parsers/src/test/java/org/apache/tika/mime/TestMimeTypes.java
----------------------------------------------------------------------
diff --git a/tika-parsers/src/test/java/org/apache/tika/mime/TestMimeTypes.java b/tika-parsers/src/test/java/org/apache/tika/mime/TestMimeTypes.java
index 4c5dd7c..40d938e 100644
--- a/tika-parsers/src/test/java/org/apache/tika/mime/TestMimeTypes.java
+++ b/tika-parsers/src/test/java/org/apache/tika/mime/TestMimeTypes.java
@@ -634,9 +634,10 @@ public class TestMimeTypes {
         assertTypeByData("image/wmf", "testWMF.wmf");
         assertTypeByName("image/wmf", "x.WMF");
 
-        assertTypeByName("application/x-emf", "x.emf");
-        assertTypeByData("application/x-emf","testEMF.emf");
-        assertTypeByName("application/x-emf", "x.EMF");
+        assertTypeByName("image/emf", "x.emf");
+        assertTypeByData("image/emf", "testEMF.emf");
+        assertTypeByName("image/emf", "x.EMF");
+
         // TODO: Need a test wmz file
         assertTypeByName("application/x-ms-wmz", "x.wmz");
         assertTypeByName("application/x-ms-wmz", "x.WMZ");

http://git-wip-us.apache.org/repos/asf/tika/blob/90bf4f6e/tika-parsers/src/test/java/org/apache/tika/parser/microsoft/AbstractPOIContainerExtractionTest.java
----------------------------------------------------------------------
diff --git a/tika-parsers/src/test/java/org/apache/tika/parser/microsoft/AbstractPOIContainerExtractionTest.java b/tika-parsers/src/test/java/org/apache/tika/parser/microsoft/AbstractPOIContainerExtractionTest.java
index 1a2940d..86657b1 100644
--- a/tika-parsers/src/test/java/org/apache/tika/parser/microsoft/AbstractPOIContainerExtractionTest.java
+++ b/tika-parsers/src/test/java/org/apache/tika/parser/microsoft/AbstractPOIContainerExtractionTest.java
@@ -45,7 +45,7 @@ public abstract class AbstractPOIContainerExtractionTest extends TikaTest {
     public static final MediaType TYPE_JPG = MediaType.image("jpeg");
     public static final MediaType TYPE_GIF = MediaType.image("gif");
     public static final MediaType TYPE_PNG = MediaType.image("png");
-    public static final MediaType TYPE_EMF = MediaType.application("x-emf");
+    public static final MediaType TYPE_EMF = MediaType.image("emf");
     public static final MediaType TYPE_WMF = MediaType.image("wmf");
 
     protected static TikaInputStream getTestFile(String filename) throws Exception {

http://git-wip-us.apache.org/repos/asf/tika/blob/90bf4f6e/tika-parsers/src/test/java/org/apache/tika/parser/pdf/PDFParserTest.java
----------------------------------------------------------------------
diff --git a/tika-parsers/src/test/java/org/apache/tika/parser/pdf/PDFParserTest.java b/tika-parsers/src/test/java/org/apache/tika/parser/pdf/PDFParserTest.java
index b017457..51fb9c9 100644
--- a/tika-parsers/src/test/java/org/apache/tika/parser/pdf/PDFParserTest.java
+++ b/tika-parsers/src/test/java/org/apache/tika/parser/pdf/PDFParserTest.java
@@ -73,7 +73,7 @@ import org.xml.sax.ContentHandler;
 public class PDFParserTest extends TikaTest {
 
     public static final MediaType TYPE_TEXT = MediaType.TEXT_PLAIN;
-    public static final MediaType TYPE_EMF = MediaType.application("x-emf");
+    public static final MediaType TYPE_EMF = MediaType.image("emf");
     public static final MediaType TYPE_PDF = MediaType.application("pdf");
     public static final MediaType TYPE_DOCX = MediaType.application("vnd.openxmlformats-officedocument.wordprocessingml.document");
     public static final MediaType TYPE_DOC = MediaType.application("msword");

http://git-wip-us.apache.org/repos/asf/tika/blob/90bf4f6e/tika-parsers/src/test/java/org/apache/tika/parser/rtf/RTFParserTest.java
----------------------------------------------------------------------
diff --git a/tika-parsers/src/test/java/org/apache/tika/parser/rtf/RTFParserTest.java b/tika-parsers/src/test/java/org/apache/tika/parser/rtf/RTFParserTest.java
index dc473a1..68388b5 100644
--- a/tika-parsers/src/test/java/org/apache/tika/parser/rtf/RTFParserTest.java
+++ b/tika-parsers/src/test/java/org/apache/tika/parser/rtf/RTFParserTest.java
@@ -502,7 +502,7 @@ public class RTFParserTest extends TikaTest {
     @Test
     public void testEmbeddedLinkedDocument() throws Exception {
         Set<MediaType> skipTypes = new HashSet<MediaType>();
-        skipTypes.add(MediaType.parse("application/x-emf"));
+        skipTypes.add(MediaType.parse("image/emf"));
         skipTypes.add(MediaType.parse("image/wmf"));
 
         TrackingHandler tracker = new TrackingHandler(skipTypes);