You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@poi.apache.org by ta...@apache.org on 2013/08/08 16:04:07 UTC

svn commit: r1511789 - in /poi/trunk/src/ooxml: java/org/apache/poi/xssf/eventusermodel/ java/org/apache/poi/xssf/extractor/ java/org/apache/poi/xssf/usermodel/ testcases/org/apache/poi/xssf/eventusermodel/ testcases/org/apache/poi/xssf/extractor/

Author: tallison
Date: Thu Aug  8 14:04:07 2013
New Revision: 1511789

URL: http://svn.apache.org/r1511789
Log:
55347 - integrate textbox text extraction with Excel extractors

Modified:
    poi/trunk/src/ooxml/java/org/apache/poi/xssf/eventusermodel/XSSFReader.java
    poi/trunk/src/ooxml/java/org/apache/poi/xssf/extractor/XSSFEventBasedExcelExtractor.java
    poi/trunk/src/ooxml/java/org/apache/poi/xssf/extractor/XSSFExcelExtractor.java
    poi/trunk/src/ooxml/java/org/apache/poi/xssf/usermodel/XSSFDrawing.java
    poi/trunk/src/ooxml/testcases/org/apache/poi/xssf/eventusermodel/TestXSSFReader.java
    poi/trunk/src/ooxml/testcases/org/apache/poi/xssf/extractor/TestXSSFEventBasedExcelExtractor.java
    poi/trunk/src/ooxml/testcases/org/apache/poi/xssf/extractor/TestXSSFExcelExtractor.java

Modified: poi/trunk/src/ooxml/java/org/apache/poi/xssf/eventusermodel/XSSFReader.java
URL: http://svn.apache.org/viewvc/poi/trunk/src/ooxml/java/org/apache/poi/xssf/eventusermodel/XSSFReader.java?rev=1511789&r1=1511788&r2=1511789&view=diff
==============================================================================
--- poi/trunk/src/ooxml/java/org/apache/poi/xssf/eventusermodel/XSSFReader.java (original)
+++ poi/trunk/src/ooxml/java/org/apache/poi/xssf/eventusermodel/XSSFReader.java Thu Aug  8 14:04:07 2013
@@ -21,6 +21,8 @@ import java.io.InputStream;
 import java.util.ArrayList;
 import java.util.HashMap;
 import java.util.Iterator;
+import java.util.LinkedList;
+import java.util.List;
 import java.util.Map;
 
 import org.apache.poi.POIXMLException;
@@ -37,7 +39,9 @@ import org.apache.poi.xssf.model.Comment
 import org.apache.poi.xssf.model.SharedStringsTable;
 import org.apache.poi.xssf.model.StylesTable;
 import org.apache.poi.xssf.model.ThemesTable;
+import org.apache.poi.xssf.usermodel.XSSFDrawing;
 import org.apache.poi.xssf.usermodel.XSSFRelation;
+import org.apache.poi.xssf.usermodel.XSSFShape;
 import org.apache.xmlbeans.XmlException;
 import org.openxmlformats.schemas.spreadsheetml.x2006.main.CTSheet;
 import org.openxmlformats.schemas.spreadsheetml.x2006.main.CTWorkbook;
@@ -273,6 +277,35 @@ public class XSSFReader {
            return null;
         }
         
+        /**
+         * Returns the shapes associated with this sheet,
+         * an empty list or null if there is an exception
+         */
+        public List<XSSFShape> getShapes() {
+            PackagePart sheetPkg = getSheetPart();
+            List<XSSFShape> shapes= new LinkedList<XSSFShape>();
+           // Do we have a comments relationship? (Only ever one if so)
+           try {
+              PackageRelationshipCollection drawingsList = sheetPkg.getRelationshipsByType(XSSFRelation.DRAWINGS.getRelation());
+              for (int i = 0; i < drawingsList.size(); i++){
+                  PackageRelationship drawings = drawingsList.getRelationship(i);
+                  PackagePartName drawingsName = PackagingURIHelper.createPartName(drawings.getTargetURI());
+                  PackagePart drawingsPart = sheetPkg.getPackage().getPart(drawingsName);
+                  XSSFDrawing drawing = new XSSFDrawing(drawingsPart, drawings);
+                  for (XSSFShape shape : drawing.getShapes()){
+                      shapes.add(shape);
+                  }
+              }
+           } catch (XmlException e){
+               return null;
+           } catch (InvalidFormatException e) {  
+              return null;
+           } catch (IOException e) {
+              return null;
+           }
+           return shapes;
+        }
+        
         public PackagePart getSheetPart() {
            String sheetId = ctSheet.getId();
            return sheetMap.get(sheetId);

Modified: poi/trunk/src/ooxml/java/org/apache/poi/xssf/extractor/XSSFEventBasedExcelExtractor.java
URL: http://svn.apache.org/viewvc/poi/trunk/src/ooxml/java/org/apache/poi/xssf/extractor/XSSFEventBasedExcelExtractor.java?rev=1511789&r1=1511788&r2=1511789&view=diff
==============================================================================
--- poi/trunk/src/ooxml/java/org/apache/poi/xssf/extractor/XSSFEventBasedExcelExtractor.java (original)
+++ poi/trunk/src/ooxml/java/org/apache/poi/xssf/extractor/XSSFEventBasedExcelExtractor.java Thu Aug  8 14:04:07 2013
@@ -18,6 +18,7 @@ package org.apache.poi.xssf.extractor;
 
 import java.io.IOException;
 import java.io.InputStream;
+import java.util.List;
 import java.util.Locale;
 
 import javax.xml.parsers.ParserConfigurationException;
@@ -37,6 +38,8 @@ import org.apache.poi.xssf.eventusermode
 import org.apache.poi.xssf.eventusermodel.XSSFSheetXMLHandler;
 import org.apache.poi.xssf.eventusermodel.XSSFSheetXMLHandler.SheetContentsHandler;
 import org.apache.poi.xssf.model.StylesTable;
+import org.apache.poi.xssf.usermodel.XSSFShape;
+import org.apache.poi.xssf.usermodel.XSSFSimpleShape;
 import org.apache.xmlbeans.XmlException;
 import org.xml.sax.ContentHandler;
 import org.xml.sax.InputSource;
@@ -54,6 +57,7 @@ public class XSSFEventBasedExcelExtracto
    private Locale locale;
 	private boolean includeSheetNames = true;
 	private boolean formulasNotResults = false;
+	private boolean includeTextBoxes = true;
 
 	public XSSFEventBasedExcelExtractor(String path) throws XmlException, OpenXML4JException, IOException {
 		this(OPCPackage.open(path));
@@ -89,6 +93,14 @@ public class XSSFEventBasedExcelExtracto
 	public void setFormulasNotResults(boolean formulasNotResults) {
 		this.formulasNotResults = formulasNotResults;
 	}
+
+	/**
+     * Should text from textboxes be included? Default is true
+     */
+
+	public void setIncludeTextBoxes(boolean includeTextBoxes) {
+	    this.includeTextBoxes = includeTextBoxes;
+	}
 	
 	public void setLocale(Locale locale) {
 	   this.locale = locale;
@@ -175,6 +187,9 @@ public class XSSFEventBasedExcelExtracto
                  text.append('\n');
               }
               processSheet(sheetExtractor, styles, strings, stream);
+              if (includeTextBoxes){
+                  processShapes(iter.getShapes(), text);
+              }
               stream.close();
           }
           
@@ -191,7 +206,20 @@ public class XSSFEventBasedExcelExtracto
        }
    }
    
-	@Override
+    private void processShapes(List<XSSFShape> shapes, StringBuffer text) {
+        if (shapes == null){
+            return;
+        }
+        for (XSSFShape shape : shapes){
+            if (shape instanceof XSSFSimpleShape){
+                String sText = ((XSSFSimpleShape)shape).getText();
+                if (sText != null && sText.length() > 0){
+                    text.append(sText).append('\n');
+                }
+            }
+        }
+    }
+    @Override
 	public void close() throws IOException {
 		if (container != null) {
 			container.close();

Modified: poi/trunk/src/ooxml/java/org/apache/poi/xssf/extractor/XSSFExcelExtractor.java
URL: http://svn.apache.org/viewvc/poi/trunk/src/ooxml/java/org/apache/poi/xssf/extractor/XSSFExcelExtractor.java?rev=1511789&r1=1511788&r2=1511789&view=diff
==============================================================================
--- poi/trunk/src/ooxml/java/org/apache/poi/xssf/extractor/XSSFExcelExtractor.java (original)
+++ poi/trunk/src/ooxml/java/org/apache/poi/xssf/extractor/XSSFExcelExtractor.java Thu Aug  8 14:04:07 2013
@@ -31,8 +31,11 @@ import org.apache.poi.ss.usermodel.DataF
 import org.apache.poi.ss.usermodel.HeaderFooter;
 import org.apache.poi.ss.usermodel.Row;
 import org.apache.poi.xssf.usermodel.XSSFCell;
+import org.apache.poi.xssf.usermodel.XSSFDrawing;
 import org.apache.poi.xssf.usermodel.XSSFRelation;
+import org.apache.poi.xssf.usermodel.XSSFShape;
 import org.apache.poi.xssf.usermodel.XSSFSheet;
+import org.apache.poi.xssf.usermodel.XSSFSimpleShape;
 import org.apache.poi.xssf.usermodel.XSSFWorkbook;
 import org.apache.xmlbeans.XmlException;
 
@@ -52,6 +55,7 @@ public class XSSFExcelExtractor extends 
     private boolean formulasNotResults = false;
     private boolean includeCellComments = false;
     private boolean includeHeadersFooters = true;
+    private boolean includeTextBoxes = true;
 
     /**
      * @deprecated  Use {@link #XSSFExcelExtractor(org.apache.poi.openxml4j.opc.OPCPackage)} instead.
@@ -104,6 +108,13 @@ public class XSSFExcelExtractor extends 
         this.includeHeadersFooters = includeHeadersFooters;
     }
     /**
+     * Should text within textboxes be included? Default is true
+     * @param includeTextBoxes
+     */
+    public void setIncludeTextBoxes(boolean includeTextBoxes){
+        this.includeTextBoxes = includeTextBoxes;
+    }
+    /**
      * What Locale should be used for formatting numbers (based
      *  on the styles applied to the cells)
      */
@@ -180,7 +191,20 @@ public class XSSFExcelExtractor extends 
                 }
                 text.append("\n");
             }
-
+            
+            // add textboxes
+            if (includeTextBoxes){
+                XSSFDrawing drawing = sheet.createDrawingPatriarch();
+                for (XSSFShape shape : drawing.getShapes()){
+                    if (shape instanceof XSSFSimpleShape){
+                        String boxText = ((XSSFSimpleShape)shape).getText();
+                        if (boxText.length() > 0){
+                            text.append(boxText);
+                            text.append('\n');
+                        }
+                    }
+                }
+            }
             // Finally footer(s), if present
             if(includeHeadersFooters) {
                 text.append(

Modified: poi/trunk/src/ooxml/java/org/apache/poi/xssf/usermodel/XSSFDrawing.java
URL: http://svn.apache.org/viewvc/poi/trunk/src/ooxml/java/org/apache/poi/xssf/usermodel/XSSFDrawing.java?rev=1511789&r1=1511788&r2=1511789&view=diff
==============================================================================
--- poi/trunk/src/ooxml/java/org/apache/poi/xssf/usermodel/XSSFDrawing.java (original)
+++ poi/trunk/src/ooxml/java/org/apache/poi/xssf/usermodel/XSSFDrawing.java Thu Aug  8 14:04:07 2013
@@ -76,7 +76,7 @@ public final class XSSFDrawing extends P
      * @param rel  the package relationship holding this drawing,
      * the relationship type must be http://schemas.openxmlformats.org/officeDocument/2006/relationships/drawing
      */
-    protected XSSFDrawing(PackagePart part, PackageRelationship rel) throws IOException, XmlException {
+    public XSSFDrawing(PackagePart part, PackageRelationship rel) throws IOException, XmlException {
         super(part, rel);
         XmlOptions options  = new XmlOptions(DEFAULT_XML_OPTIONS);
         //Removing root element

Modified: poi/trunk/src/ooxml/testcases/org/apache/poi/xssf/eventusermodel/TestXSSFReader.java
URL: http://svn.apache.org/viewvc/poi/trunk/src/ooxml/testcases/org/apache/poi/xssf/eventusermodel/TestXSSFReader.java?rev=1511789&r1=1511788&r2=1511789&view=diff
==============================================================================
--- poi/trunk/src/ooxml/testcases/org/apache/poi/xssf/eventusermodel/TestXSSFReader.java (original)
+++ poi/trunk/src/ooxml/testcases/org/apache/poi/xssf/eventusermodel/TestXSSFReader.java Thu Aug  8 14:04:07 2013
@@ -19,6 +19,7 @@ package org.apache.poi.xssf.eventusermod
 
 import java.io.InputStream;
 import java.util.Iterator;
+import java.util.List;
 
 import junit.framework.TestCase;
 
@@ -27,6 +28,8 @@ import org.apache.poi.util.IOUtils;
 import org.apache.poi.xssf.XSSFTestDataSamples;
 import org.apache.poi.xssf.model.CommentsTable;
 import org.apache.poi.xssf.usermodel.XSSFRichTextString;
+import org.apache.poi.xssf.usermodel.XSSFShape;
+import org.apache.poi.xssf.usermodel.XSSFSimpleShape;
 import org.apache.poi.POIDataSamples;
 
 /**
@@ -164,4 +167,33 @@ public final class TestXSSFReader extend
           stream.close();
       }
    }
+   /**
+    * Test text extraction from text box using getShapes()
+    * @throws Exception
+    */
+   public void testShapes() throws Exception{
+       OPCPackage pkg =  XSSFTestDataSamples.openSamplePackage("WithTextBox.xlsx");
+       XSSFReader r = new XSSFReader(pkg);
+       XSSFReader.SheetIterator it = (XSSFReader.SheetIterator)r.getSheetsData();
+       
+       StringBuilder sb = new StringBuilder();
+       while(it.hasNext())
+       {    
+          it.next();
+          List<XSSFShape> shapes = it.getShapes();
+          if (shapes != null){
+              for (XSSFShape shape : shapes){
+                  if (shape instanceof XSSFSimpleShape){
+                      String t = ((XSSFSimpleShape)shape).getText();
+                      sb.append(t).append('\n');
+                  }
+              }
+          }
+       }
+       String text = sb.toString();
+       assertTrue(text.indexOf("Line 1") > -1);
+       assertTrue(text.indexOf("Line 2") > -1);
+       assertTrue(text.indexOf("Line 3") > -1);
+
+   }
 }

Modified: poi/trunk/src/ooxml/testcases/org/apache/poi/xssf/extractor/TestXSSFEventBasedExcelExtractor.java
URL: http://svn.apache.org/viewvc/poi/trunk/src/ooxml/testcases/org/apache/poi/xssf/extractor/TestXSSFEventBasedExcelExtractor.java?rev=1511789&r1=1511788&r2=1511789&view=diff
==============================================================================
--- poi/trunk/src/ooxml/testcases/org/apache/poi/xssf/extractor/TestXSSFEventBasedExcelExtractor.java (original)
+++ poi/trunk/src/ooxml/testcases/org/apache/poi/xssf/extractor/TestXSSFEventBasedExcelExtractor.java Thu Aug  8 14:04:07 2013
@@ -17,6 +17,7 @@
 
 package org.apache.poi.xssf.extractor;
 
+import java.util.List;
 import java.util.regex.Matcher;
 import java.util.regex.Pattern;
 
@@ -25,7 +26,11 @@ import junit.framework.TestCase;
 import org.apache.poi.POITextExtractor;
 import org.apache.poi.hssf.HSSFTestDataSamples;
 import org.apache.poi.hssf.extractor.ExcelExtractor;
+import org.apache.poi.openxml4j.opc.OPCPackage;
 import org.apache.poi.xssf.XSSFTestDataSamples;
+import org.apache.poi.xssf.eventusermodel.XSSFReader;
+import org.apache.poi.xssf.usermodel.XSSFShape;
+import org.apache.poi.xssf.usermodel.XSSFSimpleShape;
 
 /**
  * Tests for {@link XSSFEventBasedExcelExtractor}
@@ -167,4 +172,19 @@ public class TestXSSFEventBasedExcelExtr
 		ole2Extractor.close();
 		ooxmlExtractor.close();
 	}
+	
+	 /**
+	    * Test text extraction from text box using getShapes()
+	    * @throws Exception
+	    */
+    public void testShapes() throws Exception{
+	    XSSFEventBasedExcelExtractor ooxmlExtractor = getExtractor("WithTextBox.xlsx");
+	       
+	    String text = ooxmlExtractor.getText();
+
+	    assertTrue(text.indexOf("Line 1") > -1);
+	    assertTrue(text.indexOf("Line 2") > -1);
+	    assertTrue(text.indexOf("Line 3") > -1);
+
+    }
 }

Modified: poi/trunk/src/ooxml/testcases/org/apache/poi/xssf/extractor/TestXSSFExcelExtractor.java
URL: http://svn.apache.org/viewvc/poi/trunk/src/ooxml/testcases/org/apache/poi/xssf/extractor/TestXSSFExcelExtractor.java?rev=1511789&r1=1511788&r2=1511789&view=diff
==============================================================================
--- poi/trunk/src/ooxml/testcases/org/apache/poi/xssf/extractor/TestXSSFExcelExtractor.java (original)
+++ poi/trunk/src/ooxml/testcases/org/apache/poi/xssf/extractor/TestXSSFExcelExtractor.java Thu Aug  8 14:04:07 2013
@@ -211,4 +211,16 @@ public class TestXSSFExcelExtractor exte
       
       extractor.close();
 	}
+	/**
+	 * Simple test for text box text
+	 * @throws IOException
+	 */
+	public void testTextBoxes() throws IOException {
+	    XSSFExcelExtractor extractor = getExtractor("WithTextBox.xlsx");
+	    extractor.setFormulasNotResults(true);
+	    String text = extractor.getText();
+	    assertTrue(text.indexOf("Line 1") > -1);
+	    assertTrue(text.indexOf("Line 2") > -1);
+	    assertTrue(text.indexOf("Line 3") > -1);
+	}
 }



---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@poi.apache.org
For additional commands, e-mail: commits-help@poi.apache.org