You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ta...@apache.org on 2013/09/26 16:01:18 UTC
svn commit: r1526498 - in /tika/trunk/tika-parsers/src:
main/java/org/apache/tika/parser/microsoft/ooxml/
test/java/org/apache/tika/parser/microsoft/ooxml/
test/resources/test-documents/
Author: tallison
Date: Thu Sep 26 14:01:17 2013
New Revision: 1526498
URL: http://svn.apache.org/r1526498
Log:
tika-1100 textboxes in xlsx; modified XSSFExcelExtractorDecorator and added test in OOXMLParserTest
Added:
tika/trunk/tika-parsers/src/test/resources/test-documents/testEXCEL_textbox.xlsx (with props)
Modified:
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ooxml/XSSFExcelExtractorDecorator.java
tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/microsoft/ooxml/OOXMLParserTest.java
Modified: tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ooxml/XSSFExcelExtractorDecorator.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ooxml/XSSFExcelExtractorDecorator.java?rev=1526498&r1=1526497&r2=1526498&view=diff
==============================================================================
--- tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ooxml/XSSFExcelExtractorDecorator.java (original)
+++ tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ooxml/XSSFExcelExtractorDecorator.java Thu Sep 26 14:01:17 2013
@@ -46,6 +46,8 @@ import org.apache.poi.xssf.model.Comment
import org.apache.poi.xssf.model.StylesTable;
import org.apache.poi.xssf.usermodel.XSSFComment;
import org.apache.poi.xssf.usermodel.XSSFRelation;
+import org.apache.poi.xssf.usermodel.XSSFShape;
+import org.apache.poi.xssf.usermodel.XSSFSimpleShape;
import org.apache.poi.xssf.usermodel.helpers.HeaderFooterHelper;
import org.apache.tika.exception.TikaException;
import org.apache.tika.metadata.Metadata;
@@ -118,6 +120,7 @@ public class XSSFExcelExtractorDecorator
while (iter.hasNext()) {
InputStream stream = iter.next();
sheetParts.add(iter.getSheetPart());
+
SheetTextAsHTML sheetExtractor = new SheetTextAsHTML(xhtml, iter.getSheetComments());
// Start, and output the sheet name
@@ -142,7 +145,7 @@ public class XSSFExcelExtractorDecorator
for(String footer : sheetExtractor.footers) {
extractHeaderFooter(footer, xhtml);
}
-
+ processShapes(iter.getShapes(), xhtml);
// All done with this sheet
xhtml.endElement("div");
}
@@ -157,6 +160,20 @@ public class XSSFExcelExtractorDecorator
}
}
+ private void processShapes(List<XSSFShape> shapes, XHTMLContentHandler xhtml) throws SAXException {
+ if (shapes == null){
+ return;
+ }
+ for (XSSFShape shape : shapes){
+ if (shape instanceof XSSFSimpleShape){
+ String sText = ((XSSFSimpleShape)shape).getText();
+ if (sText != null && sText.length() > 0){
+ xhtml.element("p", sText);
+ }
+ }
+ }
+ }
+
public void processSheet(
SheetContentsHandler sheetContentsExtractor,
StylesTable styles,
Modified: tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/microsoft/ooxml/OOXMLParserTest.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/microsoft/ooxml/OOXMLParserTest.java?rev=1526498&r1=1526497&r2=1526498&view=diff
==============================================================================
--- tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/microsoft/ooxml/OOXMLParserTest.java (original)
+++ tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/microsoft/ooxml/OOXMLParserTest.java Thu Sep 26 14:01:17 2013
@@ -988,4 +988,16 @@ public class OOXMLParserTest extends Tik
input.close();
}
}
+
+ //TIKA-1100:
+ public void testExcelTextBox() throws Exception {
+ Metadata metadata = new Metadata();
+ ContentHandler handler = new BodyContentHandler();
+ ParseContext context = new ParseContext();
+ InputStream input = getTestDocument("testEXCEL_textbox.xlsx");
+ parser.parse(input, handler, metadata, context);
+ String content = handler.toString();
+ assertContains("some autoshape", content);
+ }
+
}
Added: tika/trunk/tika-parsers/src/test/resources/test-documents/testEXCEL_textbox.xlsx
URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/test/resources/test-documents/testEXCEL_textbox.xlsx?rev=1526498&view=auto
==============================================================================
Binary file - no diff available.
Propchange: tika/trunk/tika-parsers/src/test/resources/test-documents/testEXCEL_textbox.xlsx
------------------------------------------------------------------------------
svn:mime-type = application/octet-stream