You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ni...@apache.org on 2014/11/30 18:19:45 UTC
svn commit: r1642570 -
/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ExcelExtractor.java
Author: nick
Date: Sun Nov 30 17:19:45 2014
New Revision: 1642570
URL: http://svn.apache.org/r1642570
Log:
Add a TODO for TIKA-1490
Modified:
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ExcelExtractor.java
Modified: tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ExcelExtractor.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ExcelExtractor.java?rev=1642570&r1=1642569&r2=1642570&view=diff
==============================================================================
--- tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ExcelExtractor.java (original)
+++ tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ExcelExtractor.java Sun Nov 30 17:19:45 2014
@@ -96,6 +96,7 @@ public class ExcelExtractor extends Abst
private boolean listenForAllRecords = false;
private static final String WORKBOOK_ENTRY = "Workbook";
+ private static final String BOOK_ENTRY = "Book";
public ExcelExtractor(ParseContext context) {
super(context);
@@ -143,8 +144,15 @@ public class ExcelExtractor extends Abst
DirectoryNode root, XHTMLContentHandler xhtml,
Locale locale) throws IOException, SAXException, TikaException {
if (! root.hasEntry(WORKBOOK_ENTRY)) {
- // Corrupt file / very old file, just skip
- return;
+ if (root.hasEntry(BOOK_ENTRY)) {
+ // Excel 5 / Excel 95 file
+ // Records are in a different structure so needs a
+ // different parser to process them
+ // TODO Call one, see TIKA-1490
+ } else {
+ // Corrupt file / very old file, just skip text extraction
+ return;
+ }
}
TikaHSSFListener listener = new TikaHSSFListener(xhtml, locale, this);
@@ -610,5 +618,4 @@ public class ExcelExtractor extends Abst
}
}
-
}