You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ni...@apache.org on 2010/11/26 17:52:15 UTC
svn commit: r1039460 - in /tika/trunk/tika-parsers/src:
main/java/org/apache/tika/detect/ZipContainerDetector.java
test/java/org/apache/tika/detect/TestContainerAwareDetector.java
Author: nick
Date: Fri Nov 26 16:52:14 2010
New Revision: 1039460
URL: http://svn.apache.org/viewvc?rev=1039460&view=rev
Log:
When detecting macro enabled OOXML files, return the same format media type as in mimetypes.xml. Adds unit tests for a few of these. (TIKA-560)
Modified:
tika/trunk/tika-parsers/src/main/java/org/apache/tika/detect/ZipContainerDetector.java
tika/trunk/tika-parsers/src/test/java/org/apache/tika/detect/TestContainerAwareDetector.java
Modified: tika/trunk/tika-parsers/src/main/java/org/apache/tika/detect/ZipContainerDetector.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/detect/ZipContainerDetector.java?rev=1039460&r1=1039459&r2=1039460&view=diff
==============================================================================
--- tika/trunk/tika-parsers/src/main/java/org/apache/tika/detect/ZipContainerDetector.java (original)
+++ tika/trunk/tika-parsers/src/main/java/org/apache/tika/detect/ZipContainerDetector.java Fri Nov 26 16:52:14 2010
@@ -83,6 +83,13 @@ public class ZipContainerDetector implem
// Turn that into the type of the overall document
String docType = coreType.substring(0, coreType.lastIndexOf('.'));
+
+ // The Macro Enabled formats are a little special
+ if(docType.toLowerCase().endsWith("macroenabled")) {
+ docType = docType.toLowerCase() + ".12";
+ }
+
+ // Build the MediaType object and return
return fromString(docType);
} catch(InvalidFormatException e) {
throw new IOException("Office Open XML File detected, but corrupted - " + e.getMessage());
Modified: tika/trunk/tika-parsers/src/test/java/org/apache/tika/detect/TestContainerAwareDetector.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/test/java/org/apache/tika/detect/TestContainerAwareDetector.java?rev=1039460&r1=1039459&r2=1039460&view=diff
==============================================================================
--- tika/trunk/tika-parsers/src/test/java/org/apache/tika/detect/TestContainerAwareDetector.java (original)
+++ tika/trunk/tika-parsers/src/test/java/org/apache/tika/detect/TestContainerAwareDetector.java Fri Nov 26 16:52:14 2010
@@ -169,6 +169,25 @@ public class TestContainerAwareDetector
"TikaInputStream should still have a file",
tis.hasFile()
);
+
+ // Check some of the less common OOXML types
+ input = getTestDoc("testPPT.pptm");
+ assertEquals(
+ MediaType.application("vnd.ms-powerpoint.presentation.macroenabled.12"),
+ d.detect(input, new Metadata())
+ );
+
+ input = getTestDoc("testPPT.ppsx");
+ assertEquals(
+ MediaType.application("vnd.openxmlformats-officedocument.presentationml.slideshow"),
+ d.detect(input, new Metadata())
+ );
+
+ input = getTestDoc("testPPT.ppsm");
+ assertEquals(
+ MediaType.application("vnd.ms-powerpoint.slideshow.macroEnabled.12"),
+ d.detect(input, new Metadata())
+ );
}
public void testDetectIWork() throws Exception {