You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ni...@apache.org on 2010/11/26 17:52:15 UTC

svn commit: r1039460 - in /tika/trunk/tika-parsers/src: main/java/org/apache/tika/detect/ZipContainerDetector.java test/java/org/apache/tika/detect/TestContainerAwareDetector.java

Author: nick
Date: Fri Nov 26 16:52:14 2010
New Revision: 1039460

URL: http://svn.apache.org/viewvc?rev=1039460&view=rev
Log:
When detecting macro enabled OOXML files, return the same format media type as in mimetypes.xml. Adds unit tests for a few of these. (TIKA-560)

Modified:
    tika/trunk/tika-parsers/src/main/java/org/apache/tika/detect/ZipContainerDetector.java
    tika/trunk/tika-parsers/src/test/java/org/apache/tika/detect/TestContainerAwareDetector.java

Modified: tika/trunk/tika-parsers/src/main/java/org/apache/tika/detect/ZipContainerDetector.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/detect/ZipContainerDetector.java?rev=1039460&r1=1039459&r2=1039460&view=diff
==============================================================================
--- tika/trunk/tika-parsers/src/main/java/org/apache/tika/detect/ZipContainerDetector.java (original)
+++ tika/trunk/tika-parsers/src/main/java/org/apache/tika/detect/ZipContainerDetector.java Fri Nov 26 16:52:14 2010
@@ -83,6 +83,13 @@ public class ZipContainerDetector implem
 
                 // Turn that into the type of the overall document
                 String docType = coreType.substring(0, coreType.lastIndexOf('.'));
+                
+                // The Macro Enabled formats are a little special
+                if(docType.toLowerCase().endsWith("macroenabled")) {
+                   docType = docType.toLowerCase() + ".12";
+                }
+                
+                // Build the MediaType object and return
                 return fromString(docType);
              } catch(InvalidFormatException e) {
                 throw new IOException("Office Open XML File detected, but corrupted - " + e.getMessage());

Modified: tika/trunk/tika-parsers/src/test/java/org/apache/tika/detect/TestContainerAwareDetector.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/test/java/org/apache/tika/detect/TestContainerAwareDetector.java?rev=1039460&r1=1039459&r2=1039460&view=diff
==============================================================================
--- tika/trunk/tika-parsers/src/test/java/org/apache/tika/detect/TestContainerAwareDetector.java (original)
+++ tika/trunk/tika-parsers/src/test/java/org/apache/tika/detect/TestContainerAwareDetector.java Fri Nov 26 16:52:14 2010
@@ -169,6 +169,25 @@ public class TestContainerAwareDetector 
               "TikaInputStream should still have a file",
               tis.hasFile()
         );
+        
+        // Check some of the less common OOXML types
+        input = getTestDoc("testPPT.pptm");
+        assertEquals(
+              MediaType.application("vnd.ms-powerpoint.presentation.macroenabled.12"),
+              d.detect(input, new Metadata())
+        );
+        
+        input = getTestDoc("testPPT.ppsx");
+        assertEquals(
+              MediaType.application("vnd.openxmlformats-officedocument.presentationml.slideshow"),
+              d.detect(input, new Metadata())
+        );
+
+        input = getTestDoc("testPPT.ppsm");
+        assertEquals(
+              MediaType.application("vnd.ms-powerpoint.slideshow.macroEnabled.12"),
+              d.detect(input, new Metadata())
+        );
     }
     
     public void testDetectIWork() throws Exception {