You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ni...@apache.org on 2012/05/17 00:05:14 UTC
svn commit: r1339390 - in /tika/trunk/tika-parsers/src:
main/java/org/apache/tika/parser/microsoft/ooxml/OOXMLExtractorFactory.java
test/java/org/apache/tika/parser/microsoft/ooxml/OOXMLParserTest.java
Author: nick
Date: Wed May 16 22:05:14 2012
New Revision: 1339390
URL: http://svn.apache.org/viewvc?rev=1339390&view=rev
Log:
TIKA-916 Correctly bail out early for .xps and .thmx files, which are an unsupported variant of PPTX, plus tests
Modified:
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ooxml/OOXMLExtractorFactory.java
tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/microsoft/ooxml/OOXMLParserTest.java
Modified: tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ooxml/OOXMLExtractorFactory.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ooxml/OOXMLExtractorFactory.java?rev=1339390&r1=1339389&r2=1339390&view=diff
==============================================================================
--- tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ooxml/OOXMLExtractorFactory.java (original)
+++ tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ooxml/OOXMLExtractorFactory.java Wed May 16 22:05:14 2012
@@ -76,7 +76,7 @@ public class OOXMLExtractorFactory {
// Get the type, and ensure it's one we handle
MediaType type = ZipContainerDetector.detectOfficeOpenXML(pkg);
- if (type != null && OOXMLParser.UNSUPPORTED_OOXML_TYPES.contains(type)) {
+ if (type == null || OOXMLParser.UNSUPPORTED_OOXML_TYPES.contains(type)) {
// Not a supported type, delegate to Empty Parser
EmptyParser.INSTANCE.parse(stream, baseHandler, metadata, context);
return;
Modified: tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/microsoft/ooxml/OOXMLParserTest.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/microsoft/ooxml/OOXMLParserTest.java?rev=1339390&r1=1339389&r2=1339390&view=diff
==============================================================================
--- tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/microsoft/ooxml/OOXMLParserTest.java (original)
+++ tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/microsoft/ooxml/OOXMLParserTest.java Wed May 16 22:05:14 2012
@@ -151,11 +151,11 @@ public class OOXMLParserTest extends Tik
* such as presentation, macro-enabled etc
*/
public void testPowerPoint() throws Exception {
- String[] extensions = new String[] {
- "pptx", "pptm", "ppsm", "ppsx", "potm"
- //"thmx", // TIKA-418: Will be supported in POI 3.7 beta 2
- //"xps" // TIKA-418: Not yet supported by POI
- };
+ String[] extensions = new String[] {
+ "pptx", "pptm", "ppsm", "ppsx", "potm"
+ //"thmx", // TIKA-418: Will be supported in POI 3.7 beta 2
+ //"xps" // TIKA-418: Not yet supported by POI
+ };
String[] mimeTypes = new String[] {
"application/vnd.openxmlformats-officedocument.presentationml.presentation",
@@ -216,7 +216,46 @@ public class OOXMLParserTest extends Tik
} finally {
input.close();
}
- }
+ }
+ }
+
+ /**
+ * For the PowerPoint formats we don't currently support, ensure that
+ * we don't break either
+ */
+ public void testUnsupportedPowerPoint() throws Exception {
+ String[] extensions = new String[] { "xps", "thmx" };
+ String[] mimeTypes = new String[] {
+ "application/vnd.ms-xpsdocument",
+ "application/vnd.openxmlformats-officedocument" // Is this right?
+ };
+
+ for (int i=0; i<extensions.length; i++) {
+ String extension = extensions[i];
+ String filename = "testPPT." + extension;
+ String mimetype = mimeTypes[i];
+
+ Parser parser = new AutoDetectParser();
+ Metadata metadata = new Metadata();
+ metadata.set(Metadata.RESOURCE_NAME_KEY, filename);
+ ContentHandler handler = new BodyContentHandler();
+ ParseContext context = new ParseContext();
+
+ InputStream input = getTestDocument(filename);
+ try {
+ parser.parse(input, handler, metadata, context);
+
+ // Should get the metadata
+ assertEquals(
+ "Mime-type checking for " + filename,
+ mimeTypes[i],
+ metadata.get(Metadata.CONTENT_TYPE));
+
+ // But that's about it
+ } finally {
+ input.close();
+ }
+ }
}
/**