You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ni...@apache.org on 2012/05/17 00:05:14 UTC

svn commit: r1339390 - in /tika/trunk/tika-parsers/src: main/java/org/apache/tika/parser/microsoft/ooxml/OOXMLExtractorFactory.java test/java/org/apache/tika/parser/microsoft/ooxml/OOXMLParserTest.java

Author: nick
Date: Wed May 16 22:05:14 2012
New Revision: 1339390

URL: http://svn.apache.org/viewvc?rev=1339390&view=rev
Log:
TIKA-916 Correctly bail out early for .xps and .thmx files, which are an unsupported variant of PPTX, plus tests

Modified:
    tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ooxml/OOXMLExtractorFactory.java
    tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/microsoft/ooxml/OOXMLParserTest.java

Modified: tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ooxml/OOXMLExtractorFactory.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ooxml/OOXMLExtractorFactory.java?rev=1339390&r1=1339389&r2=1339390&view=diff
==============================================================================
--- tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ooxml/OOXMLExtractorFactory.java (original)
+++ tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ooxml/OOXMLExtractorFactory.java Wed May 16 22:05:14 2012
@@ -76,7 +76,7 @@ public class OOXMLExtractorFactory {
             
             // Get the type, and ensure it's one we handle
             MediaType type = ZipContainerDetector.detectOfficeOpenXML(pkg);
-            if (type != null && OOXMLParser.UNSUPPORTED_OOXML_TYPES.contains(type)) {
+            if (type == null || OOXMLParser.UNSUPPORTED_OOXML_TYPES.contains(type)) {
                // Not a supported type, delegate to Empty Parser 
                EmptyParser.INSTANCE.parse(stream, baseHandler, metadata, context);
                return;

Modified: tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/microsoft/ooxml/OOXMLParserTest.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/microsoft/ooxml/OOXMLParserTest.java?rev=1339390&r1=1339389&r2=1339390&view=diff
==============================================================================
--- tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/microsoft/ooxml/OOXMLParserTest.java (original)
+++ tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/microsoft/ooxml/OOXMLParserTest.java Wed May 16 22:05:14 2012
@@ -151,11 +151,11 @@ public class OOXMLParserTest extends Tik
      *  such as presentation, macro-enabled etc
      */
     public void testPowerPoint() throws Exception {
-	String[] extensions = new String[] {
-		"pptx", "pptm", "ppsm", "ppsx", "potm"
-		//"thmx", // TIKA-418: Will be supported in POI 3.7 beta 2 
-		//"xps" // TIKA-418: Not yet supported by POI
-	};
+       String[] extensions = new String[] {
+             "pptx", "pptm", "ppsm", "ppsx", "potm"
+             //"thmx", // TIKA-418: Will be supported in POI 3.7 beta 2 
+             //"xps" // TIKA-418: Not yet supported by POI
+       };
 
         String[] mimeTypes = new String[] {
                 "application/vnd.openxmlformats-officedocument.presentationml.presentation",
@@ -216,7 +216,46 @@ public class OOXMLParserTest extends Tik
             } finally {
                 input.close();
             }
-	}
+        }
+    }
+    
+    /**
+     * For the PowerPoint formats we don't currently support, ensure that
+     *  we don't break either
+     */
+    public void testUnsupportedPowerPoint() throws Exception {
+       String[] extensions = new String[] { "xps", "thmx" };
+       String[] mimeTypes = new String[] {
+             "application/vnd.ms-xpsdocument",
+             "application/vnd.openxmlformats-officedocument" // Is this right?
+       };
+       
+       for (int i=0; i<extensions.length; i++) {
+          String extension = extensions[i];
+          String filename = "testPPT." + extension;
+          String mimetype = mimeTypes[i];
+
+          Parser parser = new AutoDetectParser();
+          Metadata metadata = new Metadata();
+          metadata.set(Metadata.RESOURCE_NAME_KEY, filename);
+          ContentHandler handler = new BodyContentHandler();
+          ParseContext context = new ParseContext();
+  
+          InputStream input = getTestDocument(filename);
+          try {
+              parser.parse(input, handler, metadata, context);
+
+              // Should get the metadata
+              assertEquals(
+                    "Mime-type checking for " + filename,
+                    mimeTypes[i],
+                    metadata.get(Metadata.CONTENT_TYPE));
+
+              // But that's about it
+          } finally {
+             input.close();
+         }
+       }
     }
     
     /**