You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@poi.apache.org by ta...@apache.org on 2013/08/08 16:04:07 UTC
svn commit: r1511789 - in /poi/trunk/src/ooxml:
java/org/apache/poi/xssf/eventusermodel/
java/org/apache/poi/xssf/extractor/ java/org/apache/poi/xssf/usermodel/
testcases/org/apache/poi/xssf/eventusermodel/
testcases/org/apache/poi/xssf/extractor/
Author: tallison
Date: Thu Aug 8 14:04:07 2013
New Revision: 1511789
URL: http://svn.apache.org/r1511789
Log:
55347 - integrate textbox text extraction with Excel extractors
Modified:
poi/trunk/src/ooxml/java/org/apache/poi/xssf/eventusermodel/XSSFReader.java
poi/trunk/src/ooxml/java/org/apache/poi/xssf/extractor/XSSFEventBasedExcelExtractor.java
poi/trunk/src/ooxml/java/org/apache/poi/xssf/extractor/XSSFExcelExtractor.java
poi/trunk/src/ooxml/java/org/apache/poi/xssf/usermodel/XSSFDrawing.java
poi/trunk/src/ooxml/testcases/org/apache/poi/xssf/eventusermodel/TestXSSFReader.java
poi/trunk/src/ooxml/testcases/org/apache/poi/xssf/extractor/TestXSSFEventBasedExcelExtractor.java
poi/trunk/src/ooxml/testcases/org/apache/poi/xssf/extractor/TestXSSFExcelExtractor.java
Modified: poi/trunk/src/ooxml/java/org/apache/poi/xssf/eventusermodel/XSSFReader.java
URL: http://svn.apache.org/viewvc/poi/trunk/src/ooxml/java/org/apache/poi/xssf/eventusermodel/XSSFReader.java?rev=1511789&r1=1511788&r2=1511789&view=diff
==============================================================================
--- poi/trunk/src/ooxml/java/org/apache/poi/xssf/eventusermodel/XSSFReader.java (original)
+++ poi/trunk/src/ooxml/java/org/apache/poi/xssf/eventusermodel/XSSFReader.java Thu Aug 8 14:04:07 2013
@@ -21,6 +21,8 @@ import java.io.InputStream;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Iterator;
+import java.util.LinkedList;
+import java.util.List;
import java.util.Map;
import org.apache.poi.POIXMLException;
@@ -37,7 +39,9 @@ import org.apache.poi.xssf.model.Comment
import org.apache.poi.xssf.model.SharedStringsTable;
import org.apache.poi.xssf.model.StylesTable;
import org.apache.poi.xssf.model.ThemesTable;
+import org.apache.poi.xssf.usermodel.XSSFDrawing;
import org.apache.poi.xssf.usermodel.XSSFRelation;
+import org.apache.poi.xssf.usermodel.XSSFShape;
import org.apache.xmlbeans.XmlException;
import org.openxmlformats.schemas.spreadsheetml.x2006.main.CTSheet;
import org.openxmlformats.schemas.spreadsheetml.x2006.main.CTWorkbook;
@@ -273,6 +277,35 @@ public class XSSFReader {
return null;
}
+ /**
+ * Returns the shapes associated with this sheet,
+ * an empty list or null if there is an exception
+ */
+ public List<XSSFShape> getShapes() {
+ PackagePart sheetPkg = getSheetPart();
+ List<XSSFShape> shapes= new LinkedList<XSSFShape>();
+ // Do we have a comments relationship? (Only ever one if so)
+ try {
+ PackageRelationshipCollection drawingsList = sheetPkg.getRelationshipsByType(XSSFRelation.DRAWINGS.getRelation());
+ for (int i = 0; i < drawingsList.size(); i++){
+ PackageRelationship drawings = drawingsList.getRelationship(i);
+ PackagePartName drawingsName = PackagingURIHelper.createPartName(drawings.getTargetURI());
+ PackagePart drawingsPart = sheetPkg.getPackage().getPart(drawingsName);
+ XSSFDrawing drawing = new XSSFDrawing(drawingsPart, drawings);
+ for (XSSFShape shape : drawing.getShapes()){
+ shapes.add(shape);
+ }
+ }
+ } catch (XmlException e){
+ return null;
+ } catch (InvalidFormatException e) {
+ return null;
+ } catch (IOException e) {
+ return null;
+ }
+ return shapes;
+ }
+
public PackagePart getSheetPart() {
String sheetId = ctSheet.getId();
return sheetMap.get(sheetId);
Modified: poi/trunk/src/ooxml/java/org/apache/poi/xssf/extractor/XSSFEventBasedExcelExtractor.java
URL: http://svn.apache.org/viewvc/poi/trunk/src/ooxml/java/org/apache/poi/xssf/extractor/XSSFEventBasedExcelExtractor.java?rev=1511789&r1=1511788&r2=1511789&view=diff
==============================================================================
--- poi/trunk/src/ooxml/java/org/apache/poi/xssf/extractor/XSSFEventBasedExcelExtractor.java (original)
+++ poi/trunk/src/ooxml/java/org/apache/poi/xssf/extractor/XSSFEventBasedExcelExtractor.java Thu Aug 8 14:04:07 2013
@@ -18,6 +18,7 @@ package org.apache.poi.xssf.extractor;
import java.io.IOException;
import java.io.InputStream;
+import java.util.List;
import java.util.Locale;
import javax.xml.parsers.ParserConfigurationException;
@@ -37,6 +38,8 @@ import org.apache.poi.xssf.eventusermode
import org.apache.poi.xssf.eventusermodel.XSSFSheetXMLHandler;
import org.apache.poi.xssf.eventusermodel.XSSFSheetXMLHandler.SheetContentsHandler;
import org.apache.poi.xssf.model.StylesTable;
+import org.apache.poi.xssf.usermodel.XSSFShape;
+import org.apache.poi.xssf.usermodel.XSSFSimpleShape;
import org.apache.xmlbeans.XmlException;
import org.xml.sax.ContentHandler;
import org.xml.sax.InputSource;
@@ -54,6 +57,7 @@ public class XSSFEventBasedExcelExtracto
private Locale locale;
private boolean includeSheetNames = true;
private boolean formulasNotResults = false;
+ private boolean includeTextBoxes = true;
public XSSFEventBasedExcelExtractor(String path) throws XmlException, OpenXML4JException, IOException {
this(OPCPackage.open(path));
@@ -89,6 +93,14 @@ public class XSSFEventBasedExcelExtracto
public void setFormulasNotResults(boolean formulasNotResults) {
this.formulasNotResults = formulasNotResults;
}
+
+ /**
+ * Should text from textboxes be included? Default is true
+ */
+
+ public void setIncludeTextBoxes(boolean includeTextBoxes) {
+ this.includeTextBoxes = includeTextBoxes;
+ }
public void setLocale(Locale locale) {
this.locale = locale;
@@ -175,6 +187,9 @@ public class XSSFEventBasedExcelExtracto
text.append('\n');
}
processSheet(sheetExtractor, styles, strings, stream);
+ if (includeTextBoxes){
+ processShapes(iter.getShapes(), text);
+ }
stream.close();
}
@@ -191,7 +206,20 @@ public class XSSFEventBasedExcelExtracto
}
}
- @Override
+ private void processShapes(List<XSSFShape> shapes, StringBuffer text) {
+ if (shapes == null){
+ return;
+ }
+ for (XSSFShape shape : shapes){
+ if (shape instanceof XSSFSimpleShape){
+ String sText = ((XSSFSimpleShape)shape).getText();
+ if (sText != null && sText.length() > 0){
+ text.append(sText).append('\n');
+ }
+ }
+ }
+ }
+ @Override
public void close() throws IOException {
if (container != null) {
container.close();
Modified: poi/trunk/src/ooxml/java/org/apache/poi/xssf/extractor/XSSFExcelExtractor.java
URL: http://svn.apache.org/viewvc/poi/trunk/src/ooxml/java/org/apache/poi/xssf/extractor/XSSFExcelExtractor.java?rev=1511789&r1=1511788&r2=1511789&view=diff
==============================================================================
--- poi/trunk/src/ooxml/java/org/apache/poi/xssf/extractor/XSSFExcelExtractor.java (original)
+++ poi/trunk/src/ooxml/java/org/apache/poi/xssf/extractor/XSSFExcelExtractor.java Thu Aug 8 14:04:07 2013
@@ -31,8 +31,11 @@ import org.apache.poi.ss.usermodel.DataF
import org.apache.poi.ss.usermodel.HeaderFooter;
import org.apache.poi.ss.usermodel.Row;
import org.apache.poi.xssf.usermodel.XSSFCell;
+import org.apache.poi.xssf.usermodel.XSSFDrawing;
import org.apache.poi.xssf.usermodel.XSSFRelation;
+import org.apache.poi.xssf.usermodel.XSSFShape;
import org.apache.poi.xssf.usermodel.XSSFSheet;
+import org.apache.poi.xssf.usermodel.XSSFSimpleShape;
import org.apache.poi.xssf.usermodel.XSSFWorkbook;
import org.apache.xmlbeans.XmlException;
@@ -52,6 +55,7 @@ public class XSSFExcelExtractor extends
private boolean formulasNotResults = false;
private boolean includeCellComments = false;
private boolean includeHeadersFooters = true;
+ private boolean includeTextBoxes = true;
/**
* @deprecated Use {@link #XSSFExcelExtractor(org.apache.poi.openxml4j.opc.OPCPackage)} instead.
@@ -104,6 +108,13 @@ public class XSSFExcelExtractor extends
this.includeHeadersFooters = includeHeadersFooters;
}
/**
+ * Should text within textboxes be included? Default is true
+ * @param includeTextBoxes
+ */
+ public void setIncludeTextBoxes(boolean includeTextBoxes){
+ this.includeTextBoxes = includeTextBoxes;
+ }
+ /**
* What Locale should be used for formatting numbers (based
* on the styles applied to the cells)
*/
@@ -180,7 +191,20 @@ public class XSSFExcelExtractor extends
}
text.append("\n");
}
-
+
+ // add textboxes
+ if (includeTextBoxes){
+ XSSFDrawing drawing = sheet.createDrawingPatriarch();
+ for (XSSFShape shape : drawing.getShapes()){
+ if (shape instanceof XSSFSimpleShape){
+ String boxText = ((XSSFSimpleShape)shape).getText();
+ if (boxText.length() > 0){
+ text.append(boxText);
+ text.append('\n');
+ }
+ }
+ }
+ }
// Finally footer(s), if present
if(includeHeadersFooters) {
text.append(
Modified: poi/trunk/src/ooxml/java/org/apache/poi/xssf/usermodel/XSSFDrawing.java
URL: http://svn.apache.org/viewvc/poi/trunk/src/ooxml/java/org/apache/poi/xssf/usermodel/XSSFDrawing.java?rev=1511789&r1=1511788&r2=1511789&view=diff
==============================================================================
--- poi/trunk/src/ooxml/java/org/apache/poi/xssf/usermodel/XSSFDrawing.java (original)
+++ poi/trunk/src/ooxml/java/org/apache/poi/xssf/usermodel/XSSFDrawing.java Thu Aug 8 14:04:07 2013
@@ -76,7 +76,7 @@ public final class XSSFDrawing extends P
* @param rel the package relationship holding this drawing,
* the relationship type must be http://schemas.openxmlformats.org/officeDocument/2006/relationships/drawing
*/
- protected XSSFDrawing(PackagePart part, PackageRelationship rel) throws IOException, XmlException {
+ public XSSFDrawing(PackagePart part, PackageRelationship rel) throws IOException, XmlException {
super(part, rel);
XmlOptions options = new XmlOptions(DEFAULT_XML_OPTIONS);
//Removing root element
Modified: poi/trunk/src/ooxml/testcases/org/apache/poi/xssf/eventusermodel/TestXSSFReader.java
URL: http://svn.apache.org/viewvc/poi/trunk/src/ooxml/testcases/org/apache/poi/xssf/eventusermodel/TestXSSFReader.java?rev=1511789&r1=1511788&r2=1511789&view=diff
==============================================================================
--- poi/trunk/src/ooxml/testcases/org/apache/poi/xssf/eventusermodel/TestXSSFReader.java (original)
+++ poi/trunk/src/ooxml/testcases/org/apache/poi/xssf/eventusermodel/TestXSSFReader.java Thu Aug 8 14:04:07 2013
@@ -19,6 +19,7 @@ package org.apache.poi.xssf.eventusermod
import java.io.InputStream;
import java.util.Iterator;
+import java.util.List;
import junit.framework.TestCase;
@@ -27,6 +28,8 @@ import org.apache.poi.util.IOUtils;
import org.apache.poi.xssf.XSSFTestDataSamples;
import org.apache.poi.xssf.model.CommentsTable;
import org.apache.poi.xssf.usermodel.XSSFRichTextString;
+import org.apache.poi.xssf.usermodel.XSSFShape;
+import org.apache.poi.xssf.usermodel.XSSFSimpleShape;
import org.apache.poi.POIDataSamples;
/**
@@ -164,4 +167,33 @@ public final class TestXSSFReader extend
stream.close();
}
}
+ /**
+ * Test text extraction from text box using getShapes()
+ * @throws Exception
+ */
+ public void testShapes() throws Exception{
+ OPCPackage pkg = XSSFTestDataSamples.openSamplePackage("WithTextBox.xlsx");
+ XSSFReader r = new XSSFReader(pkg);
+ XSSFReader.SheetIterator it = (XSSFReader.SheetIterator)r.getSheetsData();
+
+ StringBuilder sb = new StringBuilder();
+ while(it.hasNext())
+ {
+ it.next();
+ List<XSSFShape> shapes = it.getShapes();
+ if (shapes != null){
+ for (XSSFShape shape : shapes){
+ if (shape instanceof XSSFSimpleShape){
+ String t = ((XSSFSimpleShape)shape).getText();
+ sb.append(t).append('\n');
+ }
+ }
+ }
+ }
+ String text = sb.toString();
+ assertTrue(text.indexOf("Line 1") > -1);
+ assertTrue(text.indexOf("Line 2") > -1);
+ assertTrue(text.indexOf("Line 3") > -1);
+
+ }
}
Modified: poi/trunk/src/ooxml/testcases/org/apache/poi/xssf/extractor/TestXSSFEventBasedExcelExtractor.java
URL: http://svn.apache.org/viewvc/poi/trunk/src/ooxml/testcases/org/apache/poi/xssf/extractor/TestXSSFEventBasedExcelExtractor.java?rev=1511789&r1=1511788&r2=1511789&view=diff
==============================================================================
--- poi/trunk/src/ooxml/testcases/org/apache/poi/xssf/extractor/TestXSSFEventBasedExcelExtractor.java (original)
+++ poi/trunk/src/ooxml/testcases/org/apache/poi/xssf/extractor/TestXSSFEventBasedExcelExtractor.java Thu Aug 8 14:04:07 2013
@@ -17,6 +17,7 @@
package org.apache.poi.xssf.extractor;
+import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
@@ -25,7 +26,11 @@ import junit.framework.TestCase;
import org.apache.poi.POITextExtractor;
import org.apache.poi.hssf.HSSFTestDataSamples;
import org.apache.poi.hssf.extractor.ExcelExtractor;
+import org.apache.poi.openxml4j.opc.OPCPackage;
import org.apache.poi.xssf.XSSFTestDataSamples;
+import org.apache.poi.xssf.eventusermodel.XSSFReader;
+import org.apache.poi.xssf.usermodel.XSSFShape;
+import org.apache.poi.xssf.usermodel.XSSFSimpleShape;
/**
* Tests for {@link XSSFEventBasedExcelExtractor}
@@ -167,4 +172,19 @@ public class TestXSSFEventBasedExcelExtr
ole2Extractor.close();
ooxmlExtractor.close();
}
+
+ /**
+ * Test text extraction from text box using getShapes()
+ * @throws Exception
+ */
+ public void testShapes() throws Exception{
+ XSSFEventBasedExcelExtractor ooxmlExtractor = getExtractor("WithTextBox.xlsx");
+
+ String text = ooxmlExtractor.getText();
+
+ assertTrue(text.indexOf("Line 1") > -1);
+ assertTrue(text.indexOf("Line 2") > -1);
+ assertTrue(text.indexOf("Line 3") > -1);
+
+ }
}
Modified: poi/trunk/src/ooxml/testcases/org/apache/poi/xssf/extractor/TestXSSFExcelExtractor.java
URL: http://svn.apache.org/viewvc/poi/trunk/src/ooxml/testcases/org/apache/poi/xssf/extractor/TestXSSFExcelExtractor.java?rev=1511789&r1=1511788&r2=1511789&view=diff
==============================================================================
--- poi/trunk/src/ooxml/testcases/org/apache/poi/xssf/extractor/TestXSSFExcelExtractor.java (original)
+++ poi/trunk/src/ooxml/testcases/org/apache/poi/xssf/extractor/TestXSSFExcelExtractor.java Thu Aug 8 14:04:07 2013
@@ -211,4 +211,16 @@ public class TestXSSFExcelExtractor exte
extractor.close();
}
+ /**
+ * Simple test for text box text
+ * @throws IOException
+ */
+ public void testTextBoxes() throws IOException {
+ XSSFExcelExtractor extractor = getExtractor("WithTextBox.xlsx");
+ extractor.setFormulasNotResults(true);
+ String text = extractor.getText();
+ assertTrue(text.indexOf("Line 1") > -1);
+ assertTrue(text.indexOf("Line 2") > -1);
+ assertTrue(text.indexOf("Line 3") > -1);
+ }
}
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@poi.apache.org
For additional commands, e-mail: commits-help@poi.apache.org