You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ta...@apache.org on 2017/01/13 16:55:55 UTC
tika git commit: TIKA-2238 -- add mime detection for embedded
MSEquation files
Repository: tika
Updated Branches:
refs/heads/master 526fc08f2 -> c9639bd19
TIKA-2238 -- add mime detection for embedded MSEquation files
Project: http://git-wip-us.apache.org/repos/asf/tika/repo
Commit: http://git-wip-us.apache.org/repos/asf/tika/commit/c9639bd1
Tree: http://git-wip-us.apache.org/repos/asf/tika/tree/c9639bd1
Diff: http://git-wip-us.apache.org/repos/asf/tika/diff/c9639bd1
Branch: refs/heads/master
Commit: c9639bd19d23ecf5b1a1c3907d6ed1874d84d736
Parents: 526fc08
Author: tballison <ta...@mitre.org>
Authored: Fri Jan 13 11:55:46 2017 -0500
Committer: tballison <ta...@mitre.org>
Committed: Fri Jan 13 11:55:46 2017 -0500
----------------------------------------------------------------------
.../parser/microsoft/POIFSContainerDetector.java | 8 ++++++++
.../microsoft/POIContainerExtractionTest.java | 8 ++++++++
.../testMSEquation-govdos-863534.doc | Bin 0 -> 30720 bytes
3 files changed, 16 insertions(+)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/tika/blob/c9639bd1/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/POIFSContainerDetector.java
----------------------------------------------------------------------
diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/POIFSContainerDetector.java b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/POIFSContainerDetector.java
index 992692f..703f269 100644
--- a/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/POIFSContainerDetector.java
+++ b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/POIFSContainerDetector.java
@@ -72,6 +72,12 @@ public class POIFSContainerDetector implements Detector {
* Graph/Charts embedded in PowerPoint and Excel
*/
public static final MediaType MS_GRAPH_CHART = application("vnd.ms-graph");
+
+ /**
+ * Equation embedded in Office docs
+ */
+ public static final MediaType MS_EQUATION = application("vnd.ms-equation");
+
/**
* Microsoft Excel
*/
@@ -300,6 +306,8 @@ public class POIFSContainerDetector implements Detector {
}
} else if (names.contains("NativeContent_MAIN")) {
return new MediaType(QUATTROPRO, "version", "9"); // .qpw
+ } else if (names.contains("Equation Native")) {
+ return MS_EQUATION;
} else {
for (String name : names) {
if (name.startsWith("__substg1.0_")) {
http://git-wip-us.apache.org/repos/asf/tika/blob/c9639bd1/tika-parsers/src/test/java/org/apache/tika/parser/microsoft/POIContainerExtractionTest.java
----------------------------------------------------------------------
diff --git a/tika-parsers/src/test/java/org/apache/tika/parser/microsoft/POIContainerExtractionTest.java b/tika-parsers/src/test/java/org/apache/tika/parser/microsoft/POIContainerExtractionTest.java
index b59bb00..1a40479 100644
--- a/tika-parsers/src/test/java/org/apache/tika/parser/microsoft/POIContainerExtractionTest.java
+++ b/tika-parsers/src/test/java/org/apache/tika/parser/microsoft/POIContainerExtractionTest.java
@@ -379,4 +379,12 @@ public class POIContainerExtractionTest extends AbstractPOIContainerExtractionTe
assertTrue("didn't find chart in "+suffix, found);
}
}
+
+ @Test
+ public void testEmbeddedEquation() throws Exception {
+ //file derives from govdocs1 863534.doc
+ List<Metadata> metadataList = getRecursiveMetadata("testMSEquation-govdos-863534.doc");
+ assertEquals(3, metadataList.size());
+ assertEquals("application/vnd.ms-equation", metadataList.get(2).get(Metadata.CONTENT_TYPE));
+ }
}
http://git-wip-us.apache.org/repos/asf/tika/blob/c9639bd1/tika-parsers/src/test/resources/test-documents/testMSEquation-govdos-863534.doc
----------------------------------------------------------------------
diff --git a/tika-parsers/src/test/resources/test-documents/testMSEquation-govdos-863534.doc b/tika-parsers/src/test/resources/test-documents/testMSEquation-govdos-863534.doc
new file mode 100644
index 0000000..bede30e
Binary files /dev/null and b/tika-parsers/src/test/resources/test-documents/testMSEquation-govdos-863534.doc differ