You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ni...@apache.org on 2014/05/08 18:32:27 UTC
svn commit: r1593322 - in /tika/trunk:
tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml
tika-parsers/src/main/java/org/apache/tika/parser/pkg/ZipContainerDetector.java
Author: nick
Date: Thu May 8 16:32:27 2014
New Revision: 1593322
URL: http://svn.apache.org/r1593322
Log:
Mimetype for the OPC based DWFX format, and detector support for it. TIKA-1204
Modified:
tika/trunk/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/pkg/ZipContainerDetector.java
Modified: tika/trunk/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml
URL: http://svn.apache.org/viewvc/tika/trunk/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml?rev=1593322&r1=1593321&r2=1593322&view=diff
==============================================================================
--- tika/trunk/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml (original)
+++ tika/trunk/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml Thu May 8 16:32:27 2014
@@ -4609,6 +4609,12 @@
<mime-type type="model/vnd.dwf">
<glob pattern="*.dwf"/>
</mime-type>
+ <mime-type type="model/vnd.dwfx+xps">
+ <_comment>AutoCAD Design Web Format</_comment>
+ <glob pattern="*.dwfx"/>
+ <sub-class-of type="application/x-tika-ooxml"/>
+ </mime-type>
+
<mime-type type="model/vnd.flatland.3dml"/>
<mime-type type="model/vnd.gdl">
<glob pattern="*.gdl"/>
Modified: tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/pkg/ZipContainerDetector.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/pkg/ZipContainerDetector.java?rev=1593322&r1=1593321&r2=1593322&view=diff
==============================================================================
--- tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/pkg/ZipContainerDetector.java (original)
+++ tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/pkg/ZipContainerDetector.java Thu May 8 16:32:27 2014
@@ -134,7 +134,7 @@ public class ZipContainerDetector implem
try {
MediaType type = detectOpenDocument(zip);
if (type == null) {
- type = detectOfficeOpenXML(zip, tis);
+ type = detectOPCBased(zip, tis);
}
if (type == null) {
type = detectIWork(zip);
@@ -192,7 +192,7 @@ public class ZipContainerDetector implem
}
}
- private static MediaType detectOfficeOpenXML(ZipFile zip, TikaInputStream stream) {
+ private static MediaType detectOPCBased(ZipFile zip, TikaInputStream stream) {
try {
if (zip.getEntry("_rels/.rels") != null
|| zip.getEntry("[Content_Types].xml") != null) {
@@ -200,8 +200,16 @@ public class ZipContainerDetector implem
OPCPackage pkg = OPCPackage.open(stream.getFile().getPath(), PackageAccess.READ);
stream.setOpenContainer(pkg);
- // Detect based on the open OPC Package
- return detectOfficeOpenXML(pkg);
+ // Is at an OOXML format?
+ MediaType type = detectOfficeOpenXML(pkg);
+ if (type != null) return type;
+
+ // Is it an AutoCAD format?
+ type = detectAutoCADOPC(pkg);
+ if (type != null) return type;
+
+ // We don't know what it is, sorry
+ return null;
} else {
return null;
}
@@ -244,6 +252,19 @@ public class ZipContainerDetector implem
// Build the MediaType object and return
return MediaType.parse(docType);
}
+ /**
+ * Detects AutoCAD formats that live in OPC packaging
+ */
+ private static MediaType detectAutoCADOPC(OPCPackage pkg) {
+ PackageRelationshipCollection dwfxSeq =
+ pkg.getRelationshipsByType("http://schemas.autodesk.com/dwfx/2007/relationships/documentsequence");
+ if (dwfxSeq.size() == 1) {
+ return MediaType.parse("model/vnd.dwfx+xps");
+ } else {
+ // Non-AutoCAD Package received
+ return null;
+ }
+ }
private static MediaType detectIWork(ZipFile zip) {
if (zip.getEntry(IWorkPackageParser.IWORK_COMMON_ENTRY) != null) {