You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by dm...@apache.org on 2010/01/08 19:45:12 UTC
svn commit: r897292 -
/lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ooxml/XSSFExcelExtractorDecorator.java
Author: dmeikle
Date: Fri Jan 8 18:45:12 2010
New Revision: 897292
URL: http://svn.apache.org/viewvc?rev=897292&view=rev
Log:
TIKA-103: Corrected XSSFExcelExtractorDecorator to use document style table.
Modified:
lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ooxml/XSSFExcelExtractorDecorator.java
Modified: lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ooxml/XSSFExcelExtractorDecorator.java
URL: http://svn.apache.org/viewvc/lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ooxml/XSSFExcelExtractorDecorator.java?rev=897292&r1=897291&r2=897292&view=diff
==============================================================================
--- lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ooxml/XSSFExcelExtractorDecorator.java (original)
+++ lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ooxml/XSSFExcelExtractorDecorator.java Fri Jan 8 18:45:12 2010
@@ -21,6 +21,7 @@
import java.util.Locale;
import org.apache.poi.hssf.extractor.ExcelExtractor;
+import org.apache.poi.ss.usermodel.BuiltinFormats;
import org.apache.poi.ss.usermodel.Cell;
import org.apache.poi.ss.usermodel.CellStyle;
import org.apache.poi.ss.usermodel.Comment;
@@ -29,6 +30,7 @@
import org.apache.poi.ss.usermodel.Row;
import org.apache.poi.xssf.extractor.XSSFExcelExtractor;
import org.apache.poi.xssf.usermodel.XSSFCell;
+import org.apache.poi.xssf.usermodel.XSSFCellStyle;
import org.apache.poi.xssf.usermodel.XSSFSheet;
import org.apache.poi.xssf.usermodel.XSSFWorkbook;
import org.apache.tika.sax.XHTMLContentHandler;
@@ -85,11 +87,18 @@
xhtml.characters(cell.getRichStringCellValue()
.getString());
} else if (type == Cell.CELL_TYPE_NUMERIC) {
- CellStyle style = cell.getCellStyle();
- xhtml.characters(
- formatter.formatRawCellContents(cell.getNumericCellValue(),
- style.getIndex(),
- style.getDataFormatString()));
+ // Get Cell Style Information from Document Style Table
+ XSSFCellStyle style = document.getCellStyleAt(cell.getCellStyle().getIndex());
+ short formatIndex = style.getDataFormat();
+ String formatString = style.getDataFormatString();
+ if (formatString == null) {
+ formatString = BuiltinFormats.getBuiltinFormat(formatIndex);
+ }
+
+ xhtml.characters(
+ formatter.formatRawCellContents(cell.getNumericCellValue(),
+ formatIndex,
+ formatString));
} else {
XSSFCell xc = (XSSFCell) cell;
String rawValue = xc.getRawValue();