You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ta...@apache.org on 2024/03/25 19:00:21 UTC

(tika) branch TIKA-4224 created (now 2a5a8266b)

This is an automated email from the ASF dual-hosted git repository.

tallison pushed a change to branch TIKA-4224
in repository https://gitbox.apache.org/repos/asf/tika.git


      at 2a5a8266b TIKA-4224 -- add detection for 3mf

This branch includes the following new commits:

     new 2a5a8266b TIKA-4224 -- add detection for 3mf

The 1 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.



(tika) 01/01: TIKA-4224 -- add detection for 3mf

Posted by ta...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

tallison pushed a commit to branch TIKA-4224
in repository https://gitbox.apache.org/repos/asf/tika.git

commit 2a5a8266b3b78a5ea013353a7bec4010e02adcda
Author: tallison <ta...@apache.org>
AuthorDate: Mon Mar 25 15:00:07 2024 -0400

    TIKA-4224 -- add detection for 3mf
---
 .../org/apache/tika/mime/tika-mimetypes.xml        |   6 +++
 .../detect/microsoft/ooxml/OPCPackageDetector.java |  47 +++++++++++++--------
 .../tika/detect/TestContainerAwareDetector.java    |   5 +++
 .../src/test/resources/test-documents/test3mf.3mf  | Bin 0 -> 28243 bytes
 4 files changed, 41 insertions(+), 17 deletions(-)

diff --git a/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml b/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml
index 7176332ef..f6e974946 100644
--- a/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml
+++ b/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml
@@ -2065,6 +2065,12 @@
     <glob pattern="*.ost"/>
   </mime-type>
 
+  <mime-type type="application/vnd.ms-package.3dmanufacturing-3dmodel+xml">
+    <tika:link>https://en.wikipedia.org/wiki/3D_Manufacturing_Format</tika:link>
+    <_comment>3D manufacturing format</_comment>
+    <glob pattern="*.3mf"/>
+  </mime-type>
+
   <mime-type type="application/vnd.ms-pki.seccat">
     <glob pattern="*.cat"/>
   </mime-type>
diff --git a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/detect/microsoft/ooxml/OPCPackageDetector.java b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/detect/microsoft/ooxml/OPCPackageDetector.java
index cdef864e0..369ba475c 100644
--- a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/detect/microsoft/ooxml/OPCPackageDetector.java
+++ b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/detect/microsoft/ooxml/OPCPackageDetector.java
@@ -88,6 +88,9 @@ public class OPCPackageDetector implements ZipContainerDetector {
             MediaType.application("vnd.openxmlformats-officedocument.spreadsheetml.template");
     static final MediaType XLAM = MediaType.application("vnd.ms-excel.addin.macroEnabled.12");
     static final MediaType XPS = MediaType.application("vnd.ms-xpsdocument");
+
+    static final MediaType THREE_MF = MediaType.application("vnd.ms-package.3dmanufacturing-3dmodel+xml");
+
     static final Set<String> OOXML_HINTS =
             fillSet("word/document.xml", "_rels/.rels", "[Content_Types].xml",
                     "ppt/presentation.xml", "ppt/slides/slide1.xml", "xl/workbook.xml",
@@ -100,6 +103,8 @@ public class OPCPackageDetector implements ZipContainerDetector {
             "http://schemas.openxps.org/oxps/v1.0/fixedrepresentation";
     private static final String STAR_OFFICE_6_WRITER = "application/vnd.sun.xml.writer";
 
+    private static final String THREE_MF_DOCUMENT =
+            "http://schemas.microsoft.com/3dmanufacturing/2013/01/3dmodel";
     static Map<String, MediaType> OOXML_CONTENT_TYPES = new ConcurrentHashMap<>();
 
     static {
@@ -153,29 +158,37 @@ public class OPCPackageDetector implements ZipContainerDetector {
         // Check for the normal Office core document
         PackageRelationshipCollection core =
                 pkg.getRelationshipsByType(PackageRelationshipTypes.CORE_DOCUMENT);
+
         // Otherwise check for some other Office core document types
         if (core.size() == 0) {
             core = pkg.getRelationshipsByType(PackageRelationshipTypes.STRICT_CORE_DOCUMENT);
-        }
-        if (core.size() == 0) {
-            core = pkg.getRelationshipsByType(PackageRelationshipTypes.VISIO_CORE_DOCUMENT);
-        }
-        if (core.size() == 0) {
-            core = pkg.getRelationshipsByType(XPS_DOCUMENT);
-            if (core.size() == 1) {
-                return MediaType.application("vnd.ms-xpsdocument");
+
+            if (core.size() == 0) {
+                core = pkg.getRelationshipsByType(PackageRelationshipTypes.VISIO_CORE_DOCUMENT);
             }
-            core = pkg.getRelationshipsByType(OPEN_XPS_DOCUMENT);
-            if (core.size() == 1) {
-                return MediaType.application("vnd.ms-xpsdocument");
+            if (core.size() == 0) {
+                core = pkg.getRelationshipsByType(XPS_DOCUMENT);
+                if (core.size() == 1) {
+                    return MediaType.application("vnd.ms-xpsdocument");
+                }
+                core = pkg.getRelationshipsByType(OPEN_XPS_DOCUMENT);
+                if (core.size() == 1) {
+                    return MediaType.application("vnd.ms-xpsdocument");
+                }
             }
-        }
 
-        if (core.size() == 0) {
-            core = pkg.getRelationshipsByType(
-                    "http://schemas.autodesk.com/dwfx/2007/relationships/documentsequence");
-            if (core.size() == 1) {
-                return MediaType.parse("model/vnd.dwfx+xps");
+            if (core.size() == 0) {
+                core = pkg.getRelationshipsByType(
+                        "http://schemas.autodesk.com/dwfx/2007/relationships/documentsequence");
+                if (core.size() == 1) {
+                    return MediaType.parse("model/vnd.dwfx+xps");
+                }
+            }
+            if (core.size() == 0) {
+                core = pkg.getRelationshipsByType(THREE_MF_DOCUMENT);
+                if (core.size() == 1) {
+                    return THREE_MF;
+                }
             }
         }
         // If we didn't find a single core document of any type, skip detection
diff --git a/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/java/org/apache/tika/detect/TestContainerAwareDetector.java b/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/java/org/apache/tika/detect/TestContainerAwareDetector.java
index 9ad968b9c..d35df67bf 100644
--- a/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/java/org/apache/tika/detect/TestContainerAwareDetector.java
+++ b/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/java/org/apache/tika/detect/TestContainerAwareDetector.java
@@ -262,6 +262,11 @@ public class TestContainerAwareDetector extends MultiThreadedTikaTest {
         assertTypeByData("testODTnotaZipFile.odt", "text/plain");
     }
 
+    @Test
+    public void test3MF() throws Exception {
+        assertTypeByData("test3mf.3mf", "application/vnd.ms-package.3dmanufacturing-3dmodel+xml");
+        assertTypeByNameAndData("test3mf.3mf", "application/vnd.ms-package.3dmanufacturing-3dmodel+xml");
+    }
     @Test
     public void testODFDifferentOrder() throws Exception {
         //TIKA-3356
diff --git a/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/resources/test-documents/test3mf.3mf b/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/resources/test-documents/test3mf.3mf
new file mode 100644
index 000000000..f7d0cf5a7
Binary files /dev/null and b/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/resources/test-documents/test3mf.3mf differ