You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ni...@apache.org on 2017/01/23 18:39:26 UTC
[1/3] tika git commit: TIKA-2250 As of RFC7903,
the official mime type for BMP is now the one without the x- prefix
Repository: tika
Updated Branches:
refs/heads/master 4cc15e2a3 -> 90bf4f6e4
TIKA-2250 As of RFC7903, the official mime type for BMP is now the one without the x- prefix
Project: http://git-wip-us.apache.org/repos/asf/tika/repo
Commit: http://git-wip-us.apache.org/repos/asf/tika/commit/847156ac
Tree: http://git-wip-us.apache.org/repos/asf/tika/tree/847156ac
Diff: http://git-wip-us.apache.org/repos/asf/tika/diff/847156ac
Branch: refs/heads/master
Commit: 847156ac0f5fa7d4cc06964198359cf594b66d50
Parents: 4cc15e2
Author: Nick Burch <ni...@gagravarr.org>
Authored: Mon Jan 23 18:20:44 2017 +0000
Committer: Nick Burch <ni...@gagravarr.org>
Committed: Mon Jan 23 18:20:44 2017 +0000
----------------------------------------------------------------------
.../org/apache/tika/mime/tika-mimetypes.xml | 5 +++--
.../java/org/apache/tika/TikaDetectionTest.java | 3 ++-
.../org/apache/tika/mime/MimeTypesReaderTest.java | 2 +-
.../org/apache/tika/parser/CompositeParserTest.java | 4 ++--
.../org/apache/tika/parser/image/ImageParser.java | 16 ++++++++--------
.../apache/tika/parser/ocr/TesseractOCRParser.java | 2 +-
.../java/org/apache/tika/mime/TestMimeTypes.java | 12 ++++++------
.../apache/tika/parser/AutoDetectParserTest.java | 2 +-
.../org/apache/tika/server/TikaMimeTypesTest.java | 13 +++++++------
9 files changed, 31 insertions(+), 28 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/tika/blob/847156ac/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml
----------------------------------------------------------------------
diff --git a/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml b/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml
index 10a7c15..27b3e99 100644
--- a/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml
+++ b/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml
@@ -4852,8 +4852,9 @@
<glob pattern="*.xyz"/>
</mime-type>
- <mime-type type="image/x-ms-bmp">
- <alias type="image/bmp"/>
+ <mime-type type="image/bmp">
+ <alias type="image/x-bmp"/>
+ <alias type="image/x-ms-bmp"/>
<acronym>BMP</acronym>
<_comment>Windows bitmap</_comment>
<tika:link>http://en.wikipedia.org/wiki/BMP_file_format</tika:link>
http://git-wip-us.apache.org/repos/asf/tika/blob/847156ac/tika-core/src/test/java/org/apache/tika/TikaDetectionTest.java
----------------------------------------------------------------------
diff --git a/tika-core/src/test/java/org/apache/tika/TikaDetectionTest.java b/tika-core/src/test/java/org/apache/tika/TikaDetectionTest.java
index b82f010..ea8faf5 100644
--- a/tika-core/src/test/java/org/apache/tika/TikaDetectionTest.java
+++ b/tika-core/src/test/java/org/apache/tika/TikaDetectionTest.java
@@ -682,7 +682,8 @@ public class TikaDetectionTest {
assertEquals("chemical/x-cml", tika.detect("x.cml"));
assertEquals("chemical/x-csml", tika.detect("x.csml"));
assertEquals("chemical/x-xyz", tika.detect("x.xyz"));
- assertEquals("image/x-ms-bmp", tika.detect("x.bmp"));
+ // Differ from httpd - bmp was properly registered in RFC 7903
+ //assertEquals("image/x-ms-bmp", tika.detect("x.bmp"));
assertEquals("image/cgm", tika.detect("x.cgm"));
assertEquals("image/g3fax", tika.detect("x.g3"));
assertEquals("image/gif", tika.detect("x.gif"));
http://git-wip-us.apache.org/repos/asf/tika/blob/847156ac/tika-core/src/test/java/org/apache/tika/mime/MimeTypesReaderTest.java
----------------------------------------------------------------------
diff --git a/tika-core/src/test/java/org/apache/tika/mime/MimeTypesReaderTest.java b/tika-core/src/test/java/org/apache/tika/mime/MimeTypesReaderTest.java
index a5681b3..bddaf1a 100644
--- a/tika-core/src/test/java/org/apache/tika/mime/MimeTypesReaderTest.java
+++ b/tika-core/src/test/java/org/apache/tika/mime/MimeTypesReaderTest.java
@@ -131,7 +131,7 @@ public class MimeTypesReaderTest {
*/
@Test
public void testReadExtendedMetadata() throws Exception {
- MimeType mime = this.mimeTypes.forName("image/x-ms-bmp");
+ MimeType mime = this.mimeTypes.forName("image/bmp");
assertEquals("BMP", mime.getAcronym());
assertEquals("com.microsoft.bmp", mime.getUniformTypeIdentifier());
assertEquals("http://en.wikipedia.org/wiki/BMP_file_format",
http://git-wip-us.apache.org/repos/asf/tika/blob/847156ac/tika-core/src/test/java/org/apache/tika/parser/CompositeParserTest.java
----------------------------------------------------------------------
diff --git a/tika-core/src/test/java/org/apache/tika/parser/CompositeParserTest.java b/tika-core/src/test/java/org/apache/tika/parser/CompositeParserTest.java
index 6a2d52d..c320eae 100644
--- a/tika-core/src/test/java/org/apache/tika/parser/CompositeParserTest.java
+++ b/tika-core/src/test/java/org/apache/tika/parser/CompositeParserTest.java
@@ -81,7 +81,7 @@ public class CompositeParserTest {
@Test
public void testMimeTypeAliases() throws Exception {
- MediaType bmpCanonical = MediaType.image("x-ms-bmp");
+ MediaType bmpCanonical = MediaType.image("bmp");
Map<String,String> bmpCanonicalMetadata = new HashMap<String, String>();
bmpCanonicalMetadata.put("BMP", "True");
bmpCanonicalMetadata.put("Canonical", "True");
@@ -90,7 +90,7 @@ public class CompositeParserTest {
bmpCanonicalMetadata, null
);
- MediaType bmpAlias = MediaType.image("bmp");
+ MediaType bmpAlias = MediaType.image("x-ms-bmp");
Map<String,String> bmpAliasMetadata = new HashMap<String, String>();
bmpAliasMetadata.put("BMP", "True");
bmpAliasMetadata.put("Alias", "True");
http://git-wip-us.apache.org/repos/asf/tika/blob/847156ac/tika-parsers/src/main/java/org/apache/tika/parser/image/ImageParser.java
----------------------------------------------------------------------
diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/image/ImageParser.java b/tika-parsers/src/main/java/org/apache/tika/parser/image/ImageParser.java
index 321e9e9..236d2d0 100644
--- a/tika-parsers/src/main/java/org/apache/tika/parser/image/ImageParser.java
+++ b/tika-parsers/src/main/java/org/apache/tika/parser/image/ImageParser.java
@@ -54,15 +54,15 @@ public class ImageParser extends AbstractParser {
private static final Logger LOGGER = Logger.getLogger(ImageParser.class.getName());
- private static final MediaType CANONICAL_BMP_TYPE = MediaType.image("x-ms-bmp");
- private static final MediaType JAVA_BMP_TYPE = MediaType.image("bmp");
+ private static final MediaType MAIN_BMP_TYPE = MediaType.image("bmp");
+ private static final MediaType OLD_BMP_TYPE = MediaType.image("x-ms-bmp");
private static final Set<MediaType> TMP_SUPPORTED;
static {
TMP_SUPPORTED = new HashSet<MediaType>(Arrays.asList(
- CANONICAL_BMP_TYPE,
- JAVA_BMP_TYPE,
+ MAIN_BMP_TYPE,
+ OLD_BMP_TYPE,
MediaType.image("gif"),
MediaType.image("png"),
MediaType.image("vnd.wap.wbmp"),
@@ -171,10 +171,10 @@ public class ImageParser extends AbstractParser {
throws IOException, SAXException, TikaException {
String type = metadata.get(Metadata.CONTENT_TYPE);
if (type != null) {
- // Java has a different idea of the BMP mime type to
- // what the canonical one is, fix this up.
- if (CANONICAL_BMP_TYPE.toString().equals(type)) {
- type = JAVA_BMP_TYPE.toString();
+ // If the old (pre-RFC7903) BMP mime type is given,
+ // fix it up to the new one, so Java is happy
+ if (OLD_BMP_TYPE.toString().equals(type)) {
+ type = MAIN_BMP_TYPE.toString();
}
try {
http://git-wip-us.apache.org/repos/asf/tika/blob/847156ac/tika-parsers/src/main/java/org/apache/tika/parser/ocr/TesseractOCRParser.java
----------------------------------------------------------------------
diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/ocr/TesseractOCRParser.java b/tika-parsers/src/main/java/org/apache/tika/parser/ocr/TesseractOCRParser.java
index 46a3f55..cbbbcf2 100644
--- a/tika-parsers/src/main/java/org/apache/tika/parser/ocr/TesseractOCRParser.java
+++ b/tika-parsers/src/main/java/org/apache/tika/parser/ocr/TesseractOCRParser.java
@@ -94,7 +94,7 @@ public class TesseractOCRParser extends AbstractParser {
private static final Set<MediaType> SUPPORTED_TYPES = Collections.unmodifiableSet(
new HashSet<MediaType>(Arrays.asList(new MediaType[] {
MediaType.image("png"), MediaType.image("jpeg"), MediaType.image("tiff"),
- MediaType.image("x-ms-bmp"), MediaType.image("gif"), MediaType.image("jp2"),
+ MediaType.image("bmp"), MediaType.image("gif"), MediaType.image("jp2"),
MediaType.image("jpx"), MediaType.image("x-portable-pixmap")
})));
private static Map<String,Boolean> TESSERACT_PRESENT = new HashMap<String, Boolean>();
http://git-wip-us.apache.org/repos/asf/tika/blob/847156ac/tika-parsers/src/test/java/org/apache/tika/mime/TestMimeTypes.java
----------------------------------------------------------------------
diff --git a/tika-parsers/src/test/java/org/apache/tika/mime/TestMimeTypes.java b/tika-parsers/src/test/java/org/apache/tika/mime/TestMimeTypes.java
index b384cda..904007d 100644
--- a/tika-parsers/src/test/java/org/apache/tika/mime/TestMimeTypes.java
+++ b/tika-parsers/src/test/java/org/apache/tika/mime/TestMimeTypes.java
@@ -470,12 +470,12 @@ public class TestMimeTypes {
@Test
public void testBmpDetection() throws Exception {
- assertType("image/x-ms-bmp", "testBMP.bmp");
- assertTypeByData("image/x-ms-bmp", "testBMP.bmp");
- assertTypeByName("image/x-ms-bmp", "x.bmp");
- assertTypeByName("image/x-ms-bmp", "x.BMP");
- assertTypeByName("image/x-ms-bmp", "x.dib");
- assertTypeByName("image/x-ms-bmp", "x.DIB");
+ assertType("image/bmp", "testBMP.bmp");
+ assertTypeByData("image/bmp", "testBMP.bmp");
+ assertTypeByName("image/bmp", "x.bmp");
+ assertTypeByName("image/bmp", "x.BMP");
+ assertTypeByName("image/bmp", "x.dib");
+ assertTypeByName("image/bmp", "x.DIB");
//false positive check -- contains part of BMP signature
assertType("text/plain", "testBMPfp.txt");
}
http://git-wip-us.apache.org/repos/asf/tika/blob/847156ac/tika-parsers/src/test/java/org/apache/tika/parser/AutoDetectParserTest.java
----------------------------------------------------------------------
diff --git a/tika-parsers/src/test/java/org/apache/tika/parser/AutoDetectParserTest.java b/tika-parsers/src/test/java/org/apache/tika/parser/AutoDetectParserTest.java
index 91b054e..817308f 100644
--- a/tika-parsers/src/test/java/org/apache/tika/parser/AutoDetectParserTest.java
+++ b/tika-parsers/src/test/java/org/apache/tika/parser/AutoDetectParserTest.java
@@ -64,7 +64,7 @@ public class AutoDetectParserTest {
private static final String WORD = "application/msword";
private static final String XML = "application/xml";
private static final String RSS = "application/rss+xml";
- private static final String BMP = "image/x-ms-bmp";
+ private static final String BMP = "image/bmp";
private static final String GIF = "image/gif";
private static final String JPEG = "image/jpeg";
private static final String PNG = "image/png";
http://git-wip-us.apache.org/repos/asf/tika/blob/847156ac/tika-server/src/test/java/org/apache/tika/server/TikaMimeTypesTest.java
----------------------------------------------------------------------
diff --git a/tika-server/src/test/java/org/apache/tika/server/TikaMimeTypesTest.java b/tika-server/src/test/java/org/apache/tika/server/TikaMimeTypesTest.java
index 929dbc5..b0b47fc 100644
--- a/tika-server/src/test/java/org/apache/tika/server/TikaMimeTypesTest.java
+++ b/tika-server/src/test/java/org/apache/tika/server/TikaMimeTypesTest.java
@@ -63,7 +63,7 @@ public class TikaMimeTypesTest extends CXFTestBase {
assertContains("supertype: video/ogg", text);
- assertContains("alias: image/bmp", text);
+ assertContains("alias: image/x-ms-bmp", text);
}
@Test
@@ -84,7 +84,7 @@ public class TikaMimeTypesTest extends CXFTestBase {
assertContains("Super Type: <a href=\"#video/ogg\">video/ogg", text);
- assertContains("Alias: image/bmp", text);
+ assertContains("Alias: image/x-ms-bmp", text);
}
@Test
@@ -102,13 +102,14 @@ public class TikaMimeTypesTest extends CXFTestBase {
assertEquals(true, json.containsKey("text/plain"));
assertEquals(true, json.containsKey("application/xml"));
assertEquals(true, json.containsKey("video/x-ogm"));
- assertEquals(true, json.containsKey("image/x-ms-bmp"));
+ assertEquals(true, json.containsKey("image/bmp"));
- Map<String, Object> bmp = json.get("image/x-ms-bmp");
+ Map<String, Object> bmp = json.get("image/bmp");
assertEquals(true, bmp.containsKey("alias"));
Object[] aliases = (Object[]) bmp.get("alias");
- assertEquals(1, aliases.length);
- assertEquals("image/bmp", aliases[0]);
+ assertEquals(2, aliases.length);
+ assertEquals("image/x-bmp", aliases[0]);
+ assertEquals("image/x-ms-bmp", aliases[1]);
String whichParser = bmp.get("parser").toString();
assertTrue("Which parser", whichParser.equals("org.apache.tika.parser.ocr.TesseractOCRParser") ||
[2/3] tika git commit: TIKA-2250 As of RFC7903,
the official mime type for WMF is now an image one and without the x-
prefix
Posted by ni...@apache.org.
TIKA-2250 As of RFC7903, the official mime type for WMF is now an image one and without the x- prefix
Project: http://git-wip-us.apache.org/repos/asf/tika/repo
Commit: http://git-wip-us.apache.org/repos/asf/tika/commit/e6c0082e
Tree: http://git-wip-us.apache.org/repos/asf/tika/tree/e6c0082e
Diff: http://git-wip-us.apache.org/repos/asf/tika/diff/e6c0082e
Branch: refs/heads/master
Commit: e6c0082e41143a01f0bf646a8a8b6c06a85ca239
Parents: 847156a
Author: Nick Burch <ni...@gagravarr.org>
Authored: Mon Jan 23 18:27:02 2017 +0000
Committer: Nick Burch <ni...@gagravarr.org>
Committed: Mon Jan 23 18:27:02 2017 +0000
----------------------------------------------------------------------
.../org/apache/tika/mime/tika-mimetypes.xml | 22 +++++++++++---------
.../java/org/apache/tika/TikaDetectionTest.java | 3 ++-
.../tika/parser/microsoft/HSLFExtractor.java | 2 +-
.../org/apache/tika/mime/TestMimeTypes.java | 6 +++---
.../AbstractPOIContainerExtractionTest.java | 2 +-
.../apache/tika/parser/rtf/RTFParserTest.java | 2 +-
6 files changed, 20 insertions(+), 17 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/tika/blob/e6c0082e/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml
----------------------------------------------------------------------
diff --git a/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml b/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml
index 27b3e99..939f4cb 100644
--- a/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml
+++ b/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml
@@ -3703,16 +3703,6 @@
<glob pattern="*.m13"/>
<glob pattern="*.m14"/>
</mime-type>
- <mime-type type="application/x-msmetafile">
- <alias type="image/x-wmf"/>
- <acronym>WMF</acronym>
- <_comment>Windows Metafile</_comment>
- <glob pattern="*.wmf"/>
- <magic priority="50">
- <match value="0xd7cdc69a0000" type="string" offset="0"/>
- <match value="0x010009000003" type="string" offset="0"/>
- </magic>
- </mime-type>
<mime-type type="application/x-msmoney">
<glob pattern="*.mny"/>
<magic priority="60">
@@ -5204,6 +5194,18 @@
<glob pattern="*.webp"/>
</mime-type>
+ <mime-type type="image/wmf">
+ <alias type="image/x-wmf"/>
+ <alias type="application/x-msmetafile"/>
+ <acronym>WMF</acronym>
+ <_comment>Windows Metafile</_comment>
+ <glob pattern="*.wmf"/>
+ <magic priority="50">
+ <match value="0xd7cdc69a0000" type="string" offset="0"/>
+ <match value="0x010009000003" type="string" offset="0"/>
+ </magic>
+ </mime-type>
+
<mime-type type="image/vnd.xiff">
<glob pattern="*.xif"/>
</mime-type>
http://git-wip-us.apache.org/repos/asf/tika/blob/e6c0082e/tika-core/src/test/java/org/apache/tika/TikaDetectionTest.java
----------------------------------------------------------------------
diff --git a/tika-core/src/test/java/org/apache/tika/TikaDetectionTest.java b/tika-core/src/test/java/org/apache/tika/TikaDetectionTest.java
index ea8faf5..cf242cf 100644
--- a/tika-core/src/test/java/org/apache/tika/TikaDetectionTest.java
+++ b/tika-core/src/test/java/org/apache/tika/TikaDetectionTest.java
@@ -590,7 +590,8 @@ public class TikaDetectionTest {
assertEquals("application/x-msmediaview", tika.detect("x.mvb"));
assertEquals("application/x-msmediaview", tika.detect("x.m13"));
assertEquals("application/x-msmediaview", tika.detect("x.m14"));
- assertEquals("application/x-msmetafile", tika.detect("x.wmf"));
+ // Differ from httpd - wmf was properly registered in RFC 7903
+ //assertEquals("application/x-msmetafile", tika.detect("x.wmf"));
assertEquals("application/x-msmoney", tika.detect("x.mny"));
assertEquals("application/x-mspublisher", tika.detect("x.pub"));
assertEquals("application/x-msschedule", tika.detect("x.scd"));
http://git-wip-us.apache.org/repos/asf/tika/blob/e6c0082e/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/HSLFExtractor.java
----------------------------------------------------------------------
diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/HSLFExtractor.java b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/HSLFExtractor.java
index 6fc949e..8457ec3 100644
--- a/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/HSLFExtractor.java
+++ b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/HSLFExtractor.java
@@ -330,7 +330,7 @@ public class HSLFExtractor extends AbstractPOIFSExtractor {
mediaType = "application/x-emf";
break;
case WMF:
- mediaType = "application/x-msmetafile";
+ mediaType = "image/wmf";
break;
case DIB:
mediaType = "image/bmp";
http://git-wip-us.apache.org/repos/asf/tika/blob/e6c0082e/tika-parsers/src/test/java/org/apache/tika/mime/TestMimeTypes.java
----------------------------------------------------------------------
diff --git a/tika-parsers/src/test/java/org/apache/tika/mime/TestMimeTypes.java b/tika-parsers/src/test/java/org/apache/tika/mime/TestMimeTypes.java
index 904007d..4c5dd7c 100644
--- a/tika-parsers/src/test/java/org/apache/tika/mime/TestMimeTypes.java
+++ b/tika-parsers/src/test/java/org/apache/tika/mime/TestMimeTypes.java
@@ -630,9 +630,9 @@ public class TestMimeTypes {
@Test
public void testWmfDetection() throws Exception {
- assertTypeByName("application/x-msmetafile", "x.wmf");
- assertTypeByData("application/x-msmetafile", "testWMF.wmf");
- assertTypeByName("application/x-msmetafile", "x.WMF");
+ assertTypeByName("image/wmf", "x.wmf");
+ assertTypeByData("image/wmf", "testWMF.wmf");
+ assertTypeByName("image/wmf", "x.WMF");
assertTypeByName("application/x-emf", "x.emf");
assertTypeByData("application/x-emf","testEMF.emf");
http://git-wip-us.apache.org/repos/asf/tika/blob/e6c0082e/tika-parsers/src/test/java/org/apache/tika/parser/microsoft/AbstractPOIContainerExtractionTest.java
----------------------------------------------------------------------
diff --git a/tika-parsers/src/test/java/org/apache/tika/parser/microsoft/AbstractPOIContainerExtractionTest.java b/tika-parsers/src/test/java/org/apache/tika/parser/microsoft/AbstractPOIContainerExtractionTest.java
index f454446..1a2940d 100644
--- a/tika-parsers/src/test/java/org/apache/tika/parser/microsoft/AbstractPOIContainerExtractionTest.java
+++ b/tika-parsers/src/test/java/org/apache/tika/parser/microsoft/AbstractPOIContainerExtractionTest.java
@@ -46,7 +46,7 @@ public abstract class AbstractPOIContainerExtractionTest extends TikaTest {
public static final MediaType TYPE_GIF = MediaType.image("gif");
public static final MediaType TYPE_PNG = MediaType.image("png");
public static final MediaType TYPE_EMF = MediaType.application("x-emf");
- public static final MediaType TYPE_WMF = MediaType.application("x-msmetafile");
+ public static final MediaType TYPE_WMF = MediaType.image("wmf");
protected static TikaInputStream getTestFile(String filename) throws Exception {
URL input = AbstractPOIContainerExtractionTest.class.getResource(
http://git-wip-us.apache.org/repos/asf/tika/blob/e6c0082e/tika-parsers/src/test/java/org/apache/tika/parser/rtf/RTFParserTest.java
----------------------------------------------------------------------
diff --git a/tika-parsers/src/test/java/org/apache/tika/parser/rtf/RTFParserTest.java b/tika-parsers/src/test/java/org/apache/tika/parser/rtf/RTFParserTest.java
index bb42361..dc473a1 100644
--- a/tika-parsers/src/test/java/org/apache/tika/parser/rtf/RTFParserTest.java
+++ b/tika-parsers/src/test/java/org/apache/tika/parser/rtf/RTFParserTest.java
@@ -503,7 +503,7 @@ public class RTFParserTest extends TikaTest {
public void testEmbeddedLinkedDocument() throws Exception {
Set<MediaType> skipTypes = new HashSet<MediaType>();
skipTypes.add(MediaType.parse("application/x-emf"));
- skipTypes.add(MediaType.parse("application/x-msmetafile"));
+ skipTypes.add(MediaType.parse("image/wmf"));
TrackingHandler tracker = new TrackingHandler(skipTypes);
try (TikaInputStream tis = TikaInputStream.get(getResourceAsStream("/test-documents/testRTFEmbeddedLink.rtf"))) {
[3/3] tika git commit: TIKA-2250 As of RFC7903,
the official mime type for EMF is now an image one and without the x-
prefix
Posted by ni...@apache.org.
TIKA-2250 As of RFC7903, the official mime type for EMF is now an image one and without the x- prefix
Project: http://git-wip-us.apache.org/repos/asf/tika/repo
Commit: http://git-wip-us.apache.org/repos/asf/tika/commit/90bf4f6e
Tree: http://git-wip-us.apache.org/repos/asf/tika/tree/90bf4f6e
Diff: http://git-wip-us.apache.org/repos/asf/tika/diff/90bf4f6e
Branch: refs/heads/master
Commit: 90bf4f6e4c645240b36ded6973eb64961312fc0a
Parents: e6c0082
Author: Nick Burch <ni...@gagravarr.org>
Authored: Mon Jan 23 18:31:49 2017 +0000
Committer: Nick Burch <ni...@gagravarr.org>
Committed: Mon Jan 23 18:31:49 2017 +0000
----------------------------------------------------------------------
CHANGES.txt | 3 +++
.../org/apache/tika/mime/tika-mimetypes.xml | 26 +++++++++++---------
.../tika/parser/microsoft/HSLFExtractor.java | 2 +-
.../org/apache/tika/mime/TestMimeTypes.java | 7 +++---
.../AbstractPOIContainerExtractionTest.java | 2 +-
.../apache/tika/parser/pdf/PDFParserTest.java | 2 +-
.../apache/tika/parser/rtf/RTFParserTest.java | 2 +-
7 files changed, 25 insertions(+), 19 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/tika/blob/90bf4f6e/CHANGES.txt
----------------------------------------------------------------------
diff --git a/CHANGES.txt b/CHANGES.txt
index b44dcf6..4256c69 100644
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@ -1,5 +1,8 @@
Release 1.15 - ??
+ * Official mime types for BMP, EMF and WMF have been registered with
+ IANA, so switch to these (image/bmp image/emf image/wmf) (TIKA-2250)
+
* Be more parsimonious with BufferedInputStreams via Josh Hight
(TIKA-2244).
http://git-wip-us.apache.org/repos/asf/tika/blob/90bf4f6e/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml
----------------------------------------------------------------------
diff --git a/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml b/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml
index 939f4cb..854de62 100644
--- a/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml
+++ b/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml
@@ -3268,18 +3268,6 @@
<glob pattern="*.exe"/>
</mime-type>
- <mime-type type="application/x-emf">
- <acronym>EMF</acronym>
- <_comment>Extended Metafile</_comment>
- <tika:link>https://msdn.microsoft.com/en-us/library/cc230711.aspx</tika:link>
- <glob pattern="*.emf"/>
- <magic priority="50">
- <match value="0x01000000" type="string" offset="0">
- <match value="0x464D4520" type="little32" offset="40"/>
- </match>
- </magic>
- </mime-type>
-
<mime-type type="application/x-erdas-hfa">
<magic priority="50">
<match value="EHFA_HEADER_TAG" type="string" offset="0" />
@@ -4891,6 +4879,20 @@
<glob pattern="*.cgm"/>
</mime-type>
+ <mime-type type="image/emf">
+ <alias type="image/x-emf"/>
+ <alias type="application/x-emf"/>
+ <acronym>EMF</acronym>
+ <_comment>Enhanced Metafile</_comment>
+ <tika:link>https://msdn.microsoft.com/en-us/library/cc230711.aspx</tika:link>
+ <glob pattern="*.emf"/>
+ <magic priority="50">
+ <match value="0x01000000" type="string" offset="0">
+ <match value="0x464D4520" type="little32" offset="40"/>
+ </match>
+ </magic>
+ </mime-type>
+
<mime-type type="image/example"/>
<mime-type type="image/fits">
http://git-wip-us.apache.org/repos/asf/tika/blob/90bf4f6e/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/HSLFExtractor.java
----------------------------------------------------------------------
diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/HSLFExtractor.java b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/HSLFExtractor.java
index 8457ec3..c05fda0 100644
--- a/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/HSLFExtractor.java
+++ b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/HSLFExtractor.java
@@ -327,7 +327,7 @@ public class HSLFExtractor extends AbstractPOIFSExtractor {
switch (pic.getType()) {
case EMF:
- mediaType = "application/x-emf";
+ mediaType = "image/emf";
break;
case WMF:
mediaType = "image/wmf";
http://git-wip-us.apache.org/repos/asf/tika/blob/90bf4f6e/tika-parsers/src/test/java/org/apache/tika/mime/TestMimeTypes.java
----------------------------------------------------------------------
diff --git a/tika-parsers/src/test/java/org/apache/tika/mime/TestMimeTypes.java b/tika-parsers/src/test/java/org/apache/tika/mime/TestMimeTypes.java
index 4c5dd7c..40d938e 100644
--- a/tika-parsers/src/test/java/org/apache/tika/mime/TestMimeTypes.java
+++ b/tika-parsers/src/test/java/org/apache/tika/mime/TestMimeTypes.java
@@ -634,9 +634,10 @@ public class TestMimeTypes {
assertTypeByData("image/wmf", "testWMF.wmf");
assertTypeByName("image/wmf", "x.WMF");
- assertTypeByName("application/x-emf", "x.emf");
- assertTypeByData("application/x-emf","testEMF.emf");
- assertTypeByName("application/x-emf", "x.EMF");
+ assertTypeByName("image/emf", "x.emf");
+ assertTypeByData("image/emf", "testEMF.emf");
+ assertTypeByName("image/emf", "x.EMF");
+
// TODO: Need a test wmz file
assertTypeByName("application/x-ms-wmz", "x.wmz");
assertTypeByName("application/x-ms-wmz", "x.WMZ");
http://git-wip-us.apache.org/repos/asf/tika/blob/90bf4f6e/tika-parsers/src/test/java/org/apache/tika/parser/microsoft/AbstractPOIContainerExtractionTest.java
----------------------------------------------------------------------
diff --git a/tika-parsers/src/test/java/org/apache/tika/parser/microsoft/AbstractPOIContainerExtractionTest.java b/tika-parsers/src/test/java/org/apache/tika/parser/microsoft/AbstractPOIContainerExtractionTest.java
index 1a2940d..86657b1 100644
--- a/tika-parsers/src/test/java/org/apache/tika/parser/microsoft/AbstractPOIContainerExtractionTest.java
+++ b/tika-parsers/src/test/java/org/apache/tika/parser/microsoft/AbstractPOIContainerExtractionTest.java
@@ -45,7 +45,7 @@ public abstract class AbstractPOIContainerExtractionTest extends TikaTest {
public static final MediaType TYPE_JPG = MediaType.image("jpeg");
public static final MediaType TYPE_GIF = MediaType.image("gif");
public static final MediaType TYPE_PNG = MediaType.image("png");
- public static final MediaType TYPE_EMF = MediaType.application("x-emf");
+ public static final MediaType TYPE_EMF = MediaType.image("emf");
public static final MediaType TYPE_WMF = MediaType.image("wmf");
protected static TikaInputStream getTestFile(String filename) throws Exception {
http://git-wip-us.apache.org/repos/asf/tika/blob/90bf4f6e/tika-parsers/src/test/java/org/apache/tika/parser/pdf/PDFParserTest.java
----------------------------------------------------------------------
diff --git a/tika-parsers/src/test/java/org/apache/tika/parser/pdf/PDFParserTest.java b/tika-parsers/src/test/java/org/apache/tika/parser/pdf/PDFParserTest.java
index b017457..51fb9c9 100644
--- a/tika-parsers/src/test/java/org/apache/tika/parser/pdf/PDFParserTest.java
+++ b/tika-parsers/src/test/java/org/apache/tika/parser/pdf/PDFParserTest.java
@@ -73,7 +73,7 @@ import org.xml.sax.ContentHandler;
public class PDFParserTest extends TikaTest {
public static final MediaType TYPE_TEXT = MediaType.TEXT_PLAIN;
- public static final MediaType TYPE_EMF = MediaType.application("x-emf");
+ public static final MediaType TYPE_EMF = MediaType.image("emf");
public static final MediaType TYPE_PDF = MediaType.application("pdf");
public static final MediaType TYPE_DOCX = MediaType.application("vnd.openxmlformats-officedocument.wordprocessingml.document");
public static final MediaType TYPE_DOC = MediaType.application("msword");
http://git-wip-us.apache.org/repos/asf/tika/blob/90bf4f6e/tika-parsers/src/test/java/org/apache/tika/parser/rtf/RTFParserTest.java
----------------------------------------------------------------------
diff --git a/tika-parsers/src/test/java/org/apache/tika/parser/rtf/RTFParserTest.java b/tika-parsers/src/test/java/org/apache/tika/parser/rtf/RTFParserTest.java
index dc473a1..68388b5 100644
--- a/tika-parsers/src/test/java/org/apache/tika/parser/rtf/RTFParserTest.java
+++ b/tika-parsers/src/test/java/org/apache/tika/parser/rtf/RTFParserTest.java
@@ -502,7 +502,7 @@ public class RTFParserTest extends TikaTest {
@Test
public void testEmbeddedLinkedDocument() throws Exception {
Set<MediaType> skipTypes = new HashSet<MediaType>();
- skipTypes.add(MediaType.parse("application/x-emf"));
+ skipTypes.add(MediaType.parse("image/emf"));
skipTypes.add(MediaType.parse("image/wmf"));
TrackingHandler tracker = new TrackingHandler(skipTypes);