You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ni...@apache.org on 2014/05/08 18:32:27 UTC

svn commit: r1593322 - in /tika/trunk: tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml tika-parsers/src/main/java/org/apache/tika/parser/pkg/ZipContainerDetector.java

Author: nick
Date: Thu May  8 16:32:27 2014
New Revision: 1593322

URL: http://svn.apache.org/r1593322
Log:
Mimetype for the OPC based DWFX format, and detector support for it. TIKA-1204

Modified:
    tika/trunk/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml
    tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/pkg/ZipContainerDetector.java

Modified: tika/trunk/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml
URL: http://svn.apache.org/viewvc/tika/trunk/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml?rev=1593322&r1=1593321&r2=1593322&view=diff
==============================================================================
--- tika/trunk/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml (original)
+++ tika/trunk/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml Thu May  8 16:32:27 2014
@@ -4609,6 +4609,12 @@
   <mime-type type="model/vnd.dwf">
     <glob pattern="*.dwf"/>
   </mime-type>
+  <mime-type type="model/vnd.dwfx+xps">
+    <_comment>AutoCAD Design Web Format</_comment>
+    <glob pattern="*.dwfx"/>
+    <sub-class-of type="application/x-tika-ooxml"/>
+  </mime-type>
+
   <mime-type type="model/vnd.flatland.3dml"/>
   <mime-type type="model/vnd.gdl">
     <glob pattern="*.gdl"/>

Modified: tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/pkg/ZipContainerDetector.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/pkg/ZipContainerDetector.java?rev=1593322&r1=1593321&r2=1593322&view=diff
==============================================================================
--- tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/pkg/ZipContainerDetector.java (original)
+++ tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/pkg/ZipContainerDetector.java Thu May  8 16:32:27 2014
@@ -134,7 +134,7 @@ public class ZipContainerDetector implem
             try {
                 MediaType type = detectOpenDocument(zip);
                 if (type == null) {
-                    type = detectOfficeOpenXML(zip, tis);
+                    type = detectOPCBased(zip, tis);
                 }
                 if (type == null) {
                     type = detectIWork(zip);
@@ -192,7 +192,7 @@ public class ZipContainerDetector implem
         }
     }
 
-    private static MediaType detectOfficeOpenXML(ZipFile zip, TikaInputStream stream) {
+    private static MediaType detectOPCBased(ZipFile zip, TikaInputStream stream) {
         try {
             if (zip.getEntry("_rels/.rels") != null
                     || zip.getEntry("[Content_Types].xml") != null) {
@@ -200,8 +200,16 @@ public class ZipContainerDetector implem
                 OPCPackage pkg = OPCPackage.open(stream.getFile().getPath(), PackageAccess.READ);
                 stream.setOpenContainer(pkg);
 
-                // Detect based on the open OPC Package
-                return detectOfficeOpenXML(pkg);
+                // Is at an OOXML format?
+                MediaType type = detectOfficeOpenXML(pkg);
+                if (type != null) return type;
+                
+                // Is it an AutoCAD format?
+                type = detectAutoCADOPC(pkg);
+                if (type != null) return type;
+                
+                // We don't know what it is, sorry
+                return null;
             } else {
                 return null;
             }
@@ -244,6 +252,19 @@ public class ZipContainerDetector implem
         // Build the MediaType object and return
         return MediaType.parse(docType);
     }
+    /**
+     * Detects AutoCAD formats that live in OPC packaging
+     */
+    private static MediaType detectAutoCADOPC(OPCPackage pkg) {
+        PackageRelationshipCollection dwfxSeq = 
+                pkg.getRelationshipsByType("http://schemas.autodesk.com/dwfx/2007/relationships/documentsequence");
+        if (dwfxSeq.size() == 1) {
+            return MediaType.parse("model/vnd.dwfx+xps");
+        } else {
+            // Non-AutoCAD Package received
+            return null;
+        }
+    }
 
     private static MediaType detectIWork(ZipFile zip) {
         if (zip.getEntry(IWorkPackageParser.IWORK_COMMON_ENTRY) != null) {