You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ta...@apache.org on 2017/01/13 16:55:55 UTC

tika git commit: TIKA-2238 -- add mime detection for embedded MSEquation files

Repository: tika
Updated Branches:
  refs/heads/master 526fc08f2 -> c9639bd19


TIKA-2238 -- add mime detection for embedded MSEquation files


Project: http://git-wip-us.apache.org/repos/asf/tika/repo
Commit: http://git-wip-us.apache.org/repos/asf/tika/commit/c9639bd1
Tree: http://git-wip-us.apache.org/repos/asf/tika/tree/c9639bd1
Diff: http://git-wip-us.apache.org/repos/asf/tika/diff/c9639bd1

Branch: refs/heads/master
Commit: c9639bd19d23ecf5b1a1c3907d6ed1874d84d736
Parents: 526fc08
Author: tballison <ta...@mitre.org>
Authored: Fri Jan 13 11:55:46 2017 -0500
Committer: tballison <ta...@mitre.org>
Committed: Fri Jan 13 11:55:46 2017 -0500

----------------------------------------------------------------------
 .../parser/microsoft/POIFSContainerDetector.java   |   8 ++++++++
 .../microsoft/POIContainerExtractionTest.java      |   8 ++++++++
 .../testMSEquation-govdos-863534.doc               | Bin 0 -> 30720 bytes
 3 files changed, 16 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/tika/blob/c9639bd1/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/POIFSContainerDetector.java
----------------------------------------------------------------------
diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/POIFSContainerDetector.java b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/POIFSContainerDetector.java
index 992692f..703f269 100644
--- a/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/POIFSContainerDetector.java
+++ b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/POIFSContainerDetector.java
@@ -72,6 +72,12 @@ public class POIFSContainerDetector implements Detector {
      * Graph/Charts embedded in PowerPoint and Excel
      */
     public static final MediaType MS_GRAPH_CHART = application("vnd.ms-graph");
+
+    /**
+     * Equation embedded in Office docs
+     */
+    public static final MediaType MS_EQUATION = application("vnd.ms-equation");
+
     /**
      * Microsoft Excel
      */
@@ -300,6 +306,8 @@ public class POIFSContainerDetector implements Detector {
                 }
             } else if (names.contains("NativeContent_MAIN")) {
                 return new MediaType(QUATTROPRO, "version", "9"); // .qpw
+            } else if (names.contains("Equation Native")) {
+                return MS_EQUATION;
             } else {
                 for (String name : names) {
                     if (name.startsWith("__substg1.0_")) {

http://git-wip-us.apache.org/repos/asf/tika/blob/c9639bd1/tika-parsers/src/test/java/org/apache/tika/parser/microsoft/POIContainerExtractionTest.java
----------------------------------------------------------------------
diff --git a/tika-parsers/src/test/java/org/apache/tika/parser/microsoft/POIContainerExtractionTest.java b/tika-parsers/src/test/java/org/apache/tika/parser/microsoft/POIContainerExtractionTest.java
index b59bb00..1a40479 100644
--- a/tika-parsers/src/test/java/org/apache/tika/parser/microsoft/POIContainerExtractionTest.java
+++ b/tika-parsers/src/test/java/org/apache/tika/parser/microsoft/POIContainerExtractionTest.java
@@ -379,4 +379,12 @@ public class POIContainerExtractionTest extends AbstractPOIContainerExtractionTe
             assertTrue("didn't find chart in "+suffix, found);
         }
     }
+
+    @Test
+    public void testEmbeddedEquation() throws Exception {
+        //file derives from govdocs1 863534.doc
+        List<Metadata> metadataList = getRecursiveMetadata("testMSEquation-govdos-863534.doc");
+        assertEquals(3, metadataList.size());
+        assertEquals("application/vnd.ms-equation", metadataList.get(2).get(Metadata.CONTENT_TYPE));
+    }
 }

http://git-wip-us.apache.org/repos/asf/tika/blob/c9639bd1/tika-parsers/src/test/resources/test-documents/testMSEquation-govdos-863534.doc
----------------------------------------------------------------------
diff --git a/tika-parsers/src/test/resources/test-documents/testMSEquation-govdos-863534.doc b/tika-parsers/src/test/resources/test-documents/testMSEquation-govdos-863534.doc
new file mode 100644
index 0000000..bede30e
Binary files /dev/null and b/tika-parsers/src/test/resources/test-documents/testMSEquation-govdos-863534.doc differ