You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ma...@apache.org on 2011/04/27 11:16:17 UTC

svn commit: r1097047 - in /tika/trunk/tika-parsers/src: main/java/org/apache/tika/parser/microsoft/SummaryExtractor.java test/java/org/apache/tika/parser/microsoft/ExcelParserTest.java test/resources/test-documents/jxl.xls

Author: maxcom
Date: Wed Apr 27 09:16:16 2011
New Revision: 1097047

URL: http://svn.apache.org/viewvc?rev=1097047&view=rev
Log:
Office: SummaryExtractor: do not fail on files without property stream (original fault file was generated by Java Excel API library)

Added:
    tika/trunk/tika-parsers/src/test/resources/test-documents/jxl.xls   (with props)
Modified:
    tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/SummaryExtractor.java
    tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/microsoft/ExcelParserTest.java

Modified: tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/SummaryExtractor.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/SummaryExtractor.java?rev=1097047&r1=1097046&r2=1097047&view=diff
==============================================================================
--- tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/SummaryExtractor.java (original)
+++ tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/SummaryExtractor.java Wed Apr 27 09:16:16 2011
@@ -75,7 +75,7 @@ class SummaryExtractor {
         } catch (FileNotFoundException e) {
             // entry does not exist, just skip it
         } catch (NoPropertySetStreamException e) {
-            throw new TikaException("Not a HPSF document", e);
+            // no property stream, just skip it
         } catch (UnexpectedPropertySetTypeException e) {
             throw new TikaException("Unexpected HPSF document", e);
         } catch (MarkUnsupportedException e) {

Modified: tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/microsoft/ExcelParserTest.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/microsoft/ExcelParserTest.java?rev=1097047&r1=1097046&r2=1097047&view=diff
==============================================================================
--- tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/microsoft/ExcelParserTest.java (original)
+++ tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/microsoft/ExcelParserTest.java Wed Apr 27 09:16:16 2011
@@ -165,4 +165,25 @@ public class ExcelParserTest extends Tes
             input.close();
         }
     }
+
+    public void testJXL() throws Exception {
+        InputStream input = ExcelParserTest.class.getResourceAsStream(
+                "/test-documents/jxl.xls");
+        try {
+            Metadata metadata = new Metadata();
+            ContentHandler handler = new BodyContentHandler(-1);
+            ParseContext context = new ParseContext();
+            context.set(Locale.class, Locale.US);
+            new OfficeParser().parse(input, handler, metadata, context);
+
+            assertEquals(
+                    "application/vnd.ms-excel",
+                    metadata.get(Metadata.CONTENT_TYPE));
+            String content = handler.toString();
+            assertTrue(content.contains("Number Formats"));
+        } finally {
+            input.close();
+        }
+    }
+
 }

Added: tika/trunk/tika-parsers/src/test/resources/test-documents/jxl.xls
URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/test/resources/test-documents/jxl.xls?rev=1097047&view=auto
==============================================================================
Binary file - no diff available.

Propchange: tika/trunk/tika-parsers/src/test/resources/test-documents/jxl.xls
------------------------------------------------------------------------------
    svn:mime-type = application/octet-stream