You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ta...@apache.org on 2024/03/25 21:06:50 UTC
(tika) branch main updated: TIKA-4224 -- add detection for 3mf (#1689)
This is an automated email from the ASF dual-hosted git repository.
tallison pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/tika.git
The following commit(s) were added to refs/heads/main by this push:
new 3ffbc04f7 TIKA-4224 -- add detection for 3mf (#1689)
3ffbc04f7 is described below
commit 3ffbc04f7a1023aa8e6d5ea22d19feb2a7e61a8f
Author: Tim Allison <ta...@apache.org>
AuthorDate: Mon Mar 25 17:06:45 2024 -0400
TIKA-4224 -- add detection for 3mf (#1689)
---
.../org/apache/tika/mime/tika-mimetypes.xml | 6 +++
.../detect/microsoft/ooxml/OPCPackageDetector.java | 47 +++++++++++++--------
.../tika/detect/TestContainerAwareDetector.java | 5 +++
.../src/test/resources/test-documents/test3mf.3mf | Bin 0 -> 28243 bytes
4 files changed, 41 insertions(+), 17 deletions(-)
diff --git a/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml b/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml
index fed46b858..df483de9d 100644
--- a/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml
+++ b/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml
@@ -2065,6 +2065,12 @@
<glob pattern="*.ost"/>
</mime-type>
+ <mime-type type="application/vnd.ms-package.3dmanufacturing-3dmodel+xml">
+ <tika:link>https://en.wikipedia.org/wiki/3D_Manufacturing_Format</tika:link>
+ <_comment>3D manufacturing format</_comment>
+ <glob pattern="*.3mf"/>
+ </mime-type>
+
<mime-type type="application/vnd.ms-pki.seccat">
<glob pattern="*.cat"/>
</mime-type>
diff --git a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/detect/microsoft/ooxml/OPCPackageDetector.java b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/detect/microsoft/ooxml/OPCPackageDetector.java
index cdef864e0..369ba475c 100644
--- a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/detect/microsoft/ooxml/OPCPackageDetector.java
+++ b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/detect/microsoft/ooxml/OPCPackageDetector.java
@@ -88,6 +88,9 @@ public class OPCPackageDetector implements ZipContainerDetector {
MediaType.application("vnd.openxmlformats-officedocument.spreadsheetml.template");
static final MediaType XLAM = MediaType.application("vnd.ms-excel.addin.macroEnabled.12");
static final MediaType XPS = MediaType.application("vnd.ms-xpsdocument");
+
+ static final MediaType THREE_MF = MediaType.application("vnd.ms-package.3dmanufacturing-3dmodel+xml");
+
static final Set<String> OOXML_HINTS =
fillSet("word/document.xml", "_rels/.rels", "[Content_Types].xml",
"ppt/presentation.xml", "ppt/slides/slide1.xml", "xl/workbook.xml",
@@ -100,6 +103,8 @@ public class OPCPackageDetector implements ZipContainerDetector {
"http://schemas.openxps.org/oxps/v1.0/fixedrepresentation";
private static final String STAR_OFFICE_6_WRITER = "application/vnd.sun.xml.writer";
+ private static final String THREE_MF_DOCUMENT =
+ "http://schemas.microsoft.com/3dmanufacturing/2013/01/3dmodel";
static Map<String, MediaType> OOXML_CONTENT_TYPES = new ConcurrentHashMap<>();
static {
@@ -153,29 +158,37 @@ public class OPCPackageDetector implements ZipContainerDetector {
// Check for the normal Office core document
PackageRelationshipCollection core =
pkg.getRelationshipsByType(PackageRelationshipTypes.CORE_DOCUMENT);
+
// Otherwise check for some other Office core document types
if (core.size() == 0) {
core = pkg.getRelationshipsByType(PackageRelationshipTypes.STRICT_CORE_DOCUMENT);
- }
- if (core.size() == 0) {
- core = pkg.getRelationshipsByType(PackageRelationshipTypes.VISIO_CORE_DOCUMENT);
- }
- if (core.size() == 0) {
- core = pkg.getRelationshipsByType(XPS_DOCUMENT);
- if (core.size() == 1) {
- return MediaType.application("vnd.ms-xpsdocument");
+
+ if (core.size() == 0) {
+ core = pkg.getRelationshipsByType(PackageRelationshipTypes.VISIO_CORE_DOCUMENT);
}
- core = pkg.getRelationshipsByType(OPEN_XPS_DOCUMENT);
- if (core.size() == 1) {
- return MediaType.application("vnd.ms-xpsdocument");
+ if (core.size() == 0) {
+ core = pkg.getRelationshipsByType(XPS_DOCUMENT);
+ if (core.size() == 1) {
+ return MediaType.application("vnd.ms-xpsdocument");
+ }
+ core = pkg.getRelationshipsByType(OPEN_XPS_DOCUMENT);
+ if (core.size() == 1) {
+ return MediaType.application("vnd.ms-xpsdocument");
+ }
}
- }
- if (core.size() == 0) {
- core = pkg.getRelationshipsByType(
- "http://schemas.autodesk.com/dwfx/2007/relationships/documentsequence");
- if (core.size() == 1) {
- return MediaType.parse("model/vnd.dwfx+xps");
+ if (core.size() == 0) {
+ core = pkg.getRelationshipsByType(
+ "http://schemas.autodesk.com/dwfx/2007/relationships/documentsequence");
+ if (core.size() == 1) {
+ return MediaType.parse("model/vnd.dwfx+xps");
+ }
+ }
+ if (core.size() == 0) {
+ core = pkg.getRelationshipsByType(THREE_MF_DOCUMENT);
+ if (core.size() == 1) {
+ return THREE_MF;
+ }
}
}
// If we didn't find a single core document of any type, skip detection
diff --git a/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/java/org/apache/tika/detect/TestContainerAwareDetector.java b/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/java/org/apache/tika/detect/TestContainerAwareDetector.java
index 9ad968b9c..d35df67bf 100644
--- a/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/java/org/apache/tika/detect/TestContainerAwareDetector.java
+++ b/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/java/org/apache/tika/detect/TestContainerAwareDetector.java
@@ -262,6 +262,11 @@ public class TestContainerAwareDetector extends MultiThreadedTikaTest {
assertTypeByData("testODTnotaZipFile.odt", "text/plain");
}
+ @Test
+ public void test3MF() throws Exception {
+ assertTypeByData("test3mf.3mf", "application/vnd.ms-package.3dmanufacturing-3dmodel+xml");
+ assertTypeByNameAndData("test3mf.3mf", "application/vnd.ms-package.3dmanufacturing-3dmodel+xml");
+ }
@Test
public void testODFDifferentOrder() throws Exception {
//TIKA-3356
diff --git a/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/resources/test-documents/test3mf.3mf b/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/resources/test-documents/test3mf.3mf
new file mode 100644
index 000000000..f7d0cf5a7
Binary files /dev/null and b/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/resources/test-documents/test3mf.3mf differ