You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ni...@apache.org on 2014/11/30 18:19:45 UTC

svn commit: r1642570 - /tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ExcelExtractor.java

Author: nick
Date: Sun Nov 30 17:19:45 2014
New Revision: 1642570

URL: http://svn.apache.org/r1642570
Log:
Add a TODO for TIKA-1490

Modified:
    tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ExcelExtractor.java

Modified: tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ExcelExtractor.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ExcelExtractor.java?rev=1642570&r1=1642569&r2=1642570&view=diff
==============================================================================
--- tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ExcelExtractor.java (original)
+++ tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ExcelExtractor.java Sun Nov 30 17:19:45 2014
@@ -96,6 +96,7 @@ public class ExcelExtractor extends Abst
     private boolean listenForAllRecords = false;
     
     private static final String WORKBOOK_ENTRY = "Workbook";
+    private static final String BOOK_ENTRY = "Book";
 
     public ExcelExtractor(ParseContext context) {
         super(context);
@@ -143,8 +144,15 @@ public class ExcelExtractor extends Abst
             DirectoryNode root, XHTMLContentHandler xhtml,
             Locale locale) throws IOException, SAXException, TikaException {
         if (! root.hasEntry(WORKBOOK_ENTRY)) {
-           // Corrupt file / very old file, just skip
-           return;
+            if (root.hasEntry(BOOK_ENTRY)) {
+                // Excel 5 / Excel 95 file
+                // Records are in a different structure so needs a
+                //  different parser to process them
+                // TODO Call one, see TIKA-1490
+            } else {
+               // Corrupt file / very old file, just skip text extraction
+               return;
+            }
         }
        
         TikaHSSFListener listener = new TikaHSSFListener(xhtml, locale, this);
@@ -610,5 +618,4 @@ public class ExcelExtractor extends Abst
         }
 
     }
-
 }