You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@poi.apache.org by ma...@apache.org on 2010/01/11 15:27:55 UTC

svn commit: r897875 - in /poi/trunk: src/ooxml/java/org/apache/poi/xslf/extractor/ src/ooxml/java/org/apache/poi/xslf/usermodel/ src/ooxml/testcases/org/apache/poi/xslf/extractor/ test-data/slideshow/

Author: maxcom
Date: Mon Jan 11 14:27:54 2010
New Revision: 897875

URL: http://svn.apache.org/viewvc?rev=897875&view=rev
Log:
XSLF: text extraction from tables

Added:
    poi/trunk/src/ooxml/java/org/apache/poi/xslf/usermodel/DrawingParagraph.java
    poi/trunk/src/ooxml/java/org/apache/poi/xslf/usermodel/DrawingTable.java
    poi/trunk/src/ooxml/java/org/apache/poi/xslf/usermodel/DrawingTableCell.java
    poi/trunk/src/ooxml/java/org/apache/poi/xslf/usermodel/DrawingTableRow.java
    poi/trunk/src/ooxml/java/org/apache/poi/xslf/usermodel/DrawingTextBody.java
    poi/trunk/src/ooxml/java/org/apache/poi/xslf/usermodel/XSLFCommonSlideData.java
    poi/trunk/test-data/slideshow/present1.pptx   (with props)
Modified:
    poi/trunk/src/ooxml/java/org/apache/poi/xslf/extractor/XSLFPowerPointExtractor.java
    poi/trunk/src/ooxml/java/org/apache/poi/xslf/usermodel/XSLFSlide.java
    poi/trunk/src/ooxml/testcases/org/apache/poi/xslf/extractor/TestXSLFPowerPointExtractor.java

Modified: poi/trunk/src/ooxml/java/org/apache/poi/xslf/extractor/XSLFPowerPointExtractor.java
URL: http://svn.apache.org/viewvc/poi/trunk/src/ooxml/java/org/apache/poi/xslf/extractor/XSLFPowerPointExtractor.java?rev=897875&r1=897874&r2=897875&view=diff
==============================================================================
--- poi/trunk/src/ooxml/java/org/apache/poi/xslf/extractor/XSLFPowerPointExtractor.java (original)
+++ poi/trunk/src/ooxml/java/org/apache/poi/xslf/extractor/XSLFPowerPointExtractor.java Mon Jan 11 14:27:54 2010
@@ -16,28 +16,18 @@
 ==================================================================== */
 package org.apache.poi.xslf.extractor;
 
-import java.io.IOException;
-
 import org.apache.poi.POIXMLTextExtractor;
 import org.apache.poi.openxml4j.exceptions.OpenXML4JException;
 import org.apache.poi.openxml4j.opc.OPCPackage;
 import org.apache.poi.xslf.XSLFSlideShow;
+import org.apache.poi.xslf.usermodel.DrawingParagraph;
 import org.apache.poi.xslf.usermodel.XMLSlideShow;
+import org.apache.poi.xslf.usermodel.XSLFCommonSlideData;
 import org.apache.poi.xslf.usermodel.XSLFSlide;
 import org.apache.xmlbeans.XmlException;
-import org.apache.xmlbeans.XmlObject;
-import org.apache.xmlbeans.XmlCursor;
-import org.openxmlformats.schemas.drawingml.x2006.main.CTRegularTextRun;
-import org.openxmlformats.schemas.drawingml.x2006.main.CTTextBody;
-import org.openxmlformats.schemas.drawingml.x2006.main.CTTextParagraph;
-import org.openxmlformats.schemas.drawingml.x2006.main.CTTextLineBreak;
-import org.openxmlformats.schemas.presentationml.x2006.main.CTComment;
-import org.openxmlformats.schemas.presentationml.x2006.main.CTCommentList;
-import org.openxmlformats.schemas.presentationml.x2006.main.CTGroupShape;
-import org.openxmlformats.schemas.presentationml.x2006.main.CTNotesSlide;
-import org.openxmlformats.schemas.presentationml.x2006.main.CTShape;
-import org.openxmlformats.schemas.presentationml.x2006.main.CTSlide;
-import org.openxmlformats.schemas.presentationml.x2006.main.CTSlideIdListEntry;
+import org.openxmlformats.schemas.presentationml.x2006.main.*;
+
+import java.io.IOException;
 
 public class XSLFPowerPointExtractor extends POIXMLTextExtractor {
 	private XMLSlideShow slideshow;
@@ -110,7 +100,7 @@
 					slideshow._getXSLFSlideShow().getSlideComments(slideId);
 				
 				if(slideText) {
-					extractText(rawSlide.getCSld().getSpTree(), text);
+					extractText(slides[i].getCommonSlideData(), text);
 					
 					// Comments too for the slide
 					if(comments != null) {
@@ -123,8 +113,9 @@
 						}
 					}
 				}
+
 				if(notesText && notes != null) {
-					extractText(notes.getCSld().getSpTree(), text);
+					extractText(new XSLFCommonSlideData(notes.getCSld()), text);
 				}
 			} catch(Exception e) {
 				throw new RuntimeException(e);
@@ -134,31 +125,10 @@
 		return text.toString();
 	}
 	
-	private void extractText(CTGroupShape gs, StringBuffer text) {
-		CTShape[] shapes = gs.getSpArray();
-		for (int i = 0; i < shapes.length; i++) {
-			CTTextBody textBody =
-				shapes[i].getTxBody();
-			if(textBody != null) {
-				CTTextParagraph[] paras = 
-					textBody.getPArray();
-				for (int j = 0; j < paras.length; j++) {
-                    XmlCursor c = paras[j].newCursor();
-                    c.selectPath("./*");
-                    while (c.toNextSelection()) {
-                        XmlObject o = c.getObject();
-                        if(o instanceof CTRegularTextRun){
-                            CTRegularTextRun txrun = (CTRegularTextRun)o;
-                            text.append( txrun.getT() );
-                        } else if (o instanceof CTTextLineBreak){
-                            text.append('\n');
-                        }
-                    }
-                    
-					// End each paragraph with a new line
-					text.append("\n");
-				}
-			}
-		}
-	}
+	private void extractText(XSLFCommonSlideData data, StringBuffer text) {
+        for (DrawingParagraph p : data.getText()) {
+            text.append(p.getText());
+            text.append("\n");
+        }
+    }
 }

Added: poi/trunk/src/ooxml/java/org/apache/poi/xslf/usermodel/DrawingParagraph.java
URL: http://svn.apache.org/viewvc/poi/trunk/src/ooxml/java/org/apache/poi/xslf/usermodel/DrawingParagraph.java?rev=897875&view=auto
==============================================================================
--- poi/trunk/src/ooxml/java/org/apache/poi/xslf/usermodel/DrawingParagraph.java (added)
+++ poi/trunk/src/ooxml/java/org/apache/poi/xslf/usermodel/DrawingParagraph.java Mon Jan 11 14:27:54 2010
@@ -0,0 +1,33 @@
+package org.apache.poi.xslf.usermodel;
+
+import org.openxmlformats.schemas.drawingml.x2006.main.CTTextParagraph;
+import org.openxmlformats.schemas.drawingml.x2006.main.CTRegularTextRun;
+import org.openxmlformats.schemas.drawingml.x2006.main.CTTextLineBreak;
+import org.apache.xmlbeans.XmlCursor;
+import org.apache.xmlbeans.XmlObject;
+
+public class DrawingParagraph {
+    private final CTTextParagraph p;
+
+    public DrawingParagraph(CTTextParagraph p) {
+        this.p = p;
+    }
+
+    public CharSequence getText() {
+        StringBuilder text = new StringBuilder();
+
+        XmlCursor c = p.newCursor();
+        c.selectPath("./*");
+        while (c.toNextSelection()) {
+            XmlObject o = c.getObject();
+            if (o instanceof CTRegularTextRun) {
+                CTRegularTextRun txrun = (CTRegularTextRun) o;
+                text.append(txrun.getT());
+            } else if (o instanceof CTTextLineBreak) {
+                text.append('\n');
+            }
+        }
+        
+        return text;
+    }
+}

Added: poi/trunk/src/ooxml/java/org/apache/poi/xslf/usermodel/DrawingTable.java
URL: http://svn.apache.org/viewvc/poi/trunk/src/ooxml/java/org/apache/poi/xslf/usermodel/DrawingTable.java?rev=897875&view=auto
==============================================================================
--- poi/trunk/src/ooxml/java/org/apache/poi/xslf/usermodel/DrawingTable.java (added)
+++ poi/trunk/src/ooxml/java/org/apache/poi/xslf/usermodel/DrawingTable.java Mon Jan 11 14:27:54 2010
@@ -0,0 +1,23 @@
+package org.apache.poi.xslf.usermodel;
+
+import org.openxmlformats.schemas.drawingml.x2006.main.CTTable;
+import org.openxmlformats.schemas.drawingml.x2006.main.CTTableRow;
+
+public class DrawingTable {
+    private final CTTable table;
+
+    public DrawingTable(CTTable table) {
+        this.table = table;
+    }
+
+    public DrawingTableRow[] getRows() {
+        CTTableRow[] ctTableRows = table.getTrArray();
+        DrawingTableRow[] o = new DrawingTableRow[ctTableRows.length];
+
+        for (int i=0; i<o.length; i++) {
+            o[i] = new DrawingTableRow(ctTableRows[i]);
+        }
+
+        return o;
+    }
+}

Added: poi/trunk/src/ooxml/java/org/apache/poi/xslf/usermodel/DrawingTableCell.java
URL: http://svn.apache.org/viewvc/poi/trunk/src/ooxml/java/org/apache/poi/xslf/usermodel/DrawingTableCell.java?rev=897875&view=auto
==============================================================================
--- poi/trunk/src/ooxml/java/org/apache/poi/xslf/usermodel/DrawingTableCell.java (added)
+++ poi/trunk/src/ooxml/java/org/apache/poi/xslf/usermodel/DrawingTableCell.java Mon Jan 11 14:27:54 2010
@@ -0,0 +1,17 @@
+package org.apache.poi.xslf.usermodel;
+
+import org.openxmlformats.schemas.drawingml.x2006.main.CTTableCell;
+
+public class DrawingTableCell {
+    private final CTTableCell cell;
+    private final DrawingTextBody drawingTextBody;
+
+    public DrawingTableCell(CTTableCell cell) {
+        this.cell = cell;
+        drawingTextBody = new DrawingTextBody(this.cell.getTxBody());
+    }
+
+    public DrawingTextBody getTextBody() {
+        return drawingTextBody;
+    }
+}

Added: poi/trunk/src/ooxml/java/org/apache/poi/xslf/usermodel/DrawingTableRow.java
URL: http://svn.apache.org/viewvc/poi/trunk/src/ooxml/java/org/apache/poi/xslf/usermodel/DrawingTableRow.java?rev=897875&view=auto
==============================================================================
--- poi/trunk/src/ooxml/java/org/apache/poi/xslf/usermodel/DrawingTableRow.java (added)
+++ poi/trunk/src/ooxml/java/org/apache/poi/xslf/usermodel/DrawingTableRow.java Mon Jan 11 14:27:54 2010
@@ -0,0 +1,23 @@
+package org.apache.poi.xslf.usermodel;
+
+import org.openxmlformats.schemas.drawingml.x2006.main.CTTableRow;
+import org.openxmlformats.schemas.drawingml.x2006.main.CTTableCell;
+
+public class DrawingTableRow {
+    private final CTTableRow row;
+
+    public DrawingTableRow(CTTableRow row) {
+        this.row = row;
+    }
+
+    public DrawingTableCell[] getCells() {
+        CTTableCell[] ctTableCells = row.getTcArray();
+        DrawingTableCell[] o = new DrawingTableCell[ctTableCells.length];
+
+        for (int i=0; i<o.length; i++) {
+            o[i] = new DrawingTableCell(ctTableCells[i]);
+        }
+
+        return o;
+    }
+}
\ No newline at end of file

Added: poi/trunk/src/ooxml/java/org/apache/poi/xslf/usermodel/DrawingTextBody.java
URL: http://svn.apache.org/viewvc/poi/trunk/src/ooxml/java/org/apache/poi/xslf/usermodel/DrawingTextBody.java?rev=897875&view=auto
==============================================================================
--- poi/trunk/src/ooxml/java/org/apache/poi/xslf/usermodel/DrawingTextBody.java (added)
+++ poi/trunk/src/ooxml/java/org/apache/poi/xslf/usermodel/DrawingTextBody.java Mon Jan 11 14:27:54 2010
@@ -0,0 +1,23 @@
+package org.apache.poi.xslf.usermodel;
+
+import org.openxmlformats.schemas.drawingml.x2006.main.CTTextBody;
+import org.openxmlformats.schemas.drawingml.x2006.main.CTTextParagraph;
+
+public class DrawingTextBody {
+    private final CTTextBody textBody;
+
+    public DrawingTextBody(CTTextBody textBody) {
+        this.textBody = textBody;
+    }
+
+    public DrawingParagraph[] getParagraphs() {
+        CTTextParagraph[] pArray = textBody.getPArray();
+        DrawingParagraph[] o = new DrawingParagraph[pArray.length];
+
+        for (int i=0; i<o.length; i++) {
+            o[i] = new DrawingParagraph(pArray[i]);
+        }
+
+        return o;
+    }
+}

Added: poi/trunk/src/ooxml/java/org/apache/poi/xslf/usermodel/XSLFCommonSlideData.java
URL: http://svn.apache.org/viewvc/poi/trunk/src/ooxml/java/org/apache/poi/xslf/usermodel/XSLFCommonSlideData.java?rev=897875&view=auto
==============================================================================
--- poi/trunk/src/ooxml/java/org/apache/poi/xslf/usermodel/XSLFCommonSlideData.java (added)
+++ poi/trunk/src/ooxml/java/org/apache/poi/xslf/usermodel/XSLFCommonSlideData.java Mon Jan 11 14:27:54 2010
@@ -0,0 +1,67 @@
+package org.apache.poi.xslf.usermodel;
+
+import org.apache.xmlbeans.XmlCursor;
+import org.apache.xmlbeans.XmlObject;
+import org.openxmlformats.schemas.drawingml.x2006.main.CTGraphicalObjectData;
+import org.openxmlformats.schemas.drawingml.x2006.main.CTTable;
+import org.openxmlformats.schemas.drawingml.x2006.main.CTTextBody;
+import org.openxmlformats.schemas.presentationml.x2006.main.CTCommonSlideData;
+import org.openxmlformats.schemas.presentationml.x2006.main.CTGraphicalObjectFrame;
+import org.openxmlformats.schemas.presentationml.x2006.main.CTGroupShape;
+import org.openxmlformats.schemas.presentationml.x2006.main.CTShape;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+
+public class XSLFCommonSlideData {
+    private final CTCommonSlideData data;
+
+    public XSLFCommonSlideData(CTCommonSlideData data) {
+        this.data = data;
+    }
+
+    public List<DrawingParagraph> getText() {
+        CTGroupShape gs = data.getSpTree();
+
+        List<DrawingParagraph> out = new ArrayList<DrawingParagraph>();
+
+        CTShape[] shapes = gs.getSpArray();
+        for (int i = 0; i < shapes.length; i++) {
+            CTTextBody ctTextBody = shapes[i].getTxBody();
+            if (ctTextBody==null) {
+                continue;
+            }
+
+            DrawingTextBody textBody = new DrawingTextBody(ctTextBody);
+
+            out.addAll(Arrays.asList(textBody.getParagraphs()));
+        }
+
+        CTGraphicalObjectFrame[] graphicFrames = gs.getGraphicFrameArray();
+        for (CTGraphicalObjectFrame frame: graphicFrames) {
+            CTGraphicalObjectData data = frame.getGraphic().getGraphicData();
+            XmlCursor c = data.newCursor();
+            c.selectPath("./*");
+
+            while (c.toNextSelection()) {
+                XmlObject o = c.getObject();
+
+                if (o instanceof CTTable) {
+                    DrawingTable table = new DrawingTable((CTTable) o);
+
+                    for (DrawingTableRow row : table.getRows()) {
+                        for (DrawingTableCell cell : row.getCells()) {
+                            DrawingTextBody textBody = cell.getTextBody();
+
+                            out.addAll(Arrays.asList(textBody.getParagraphs()));
+                        }
+                    }
+                }
+            }
+        }
+
+        return out;
+    }
+
+}

Modified: poi/trunk/src/ooxml/java/org/apache/poi/xslf/usermodel/XSLFSlide.java
URL: http://svn.apache.org/viewvc/poi/trunk/src/ooxml/java/org/apache/poi/xslf/usermodel/XSLFSlide.java?rev=897875&r1=897874&r2=897875&view=diff
==============================================================================
--- poi/trunk/src/ooxml/java/org/apache/poi/xslf/usermodel/XSLFSlide.java (original)
+++ poi/trunk/src/ooxml/java/org/apache/poi/xslf/usermodel/XSLFSlide.java Mon Jan 11 14:27:54 2010
@@ -26,11 +26,13 @@
 public class XSLFSlide extends XSLFSheet implements Slide {
 	private CTSlide slide;
 	private CTSlideIdListEntry slideId;
+    private XSLFCommonSlideData data;
 	
 	public XSLFSlide(CTSlide slide, CTSlideIdListEntry slideId, SlideShow parent) {
 		super(parent);
 		this.slide = slide;
 		this.slideId = slideId;
+        this.data = new XSLFCommonSlideData(slide.getCSld());
 	}
 	
 	/**
@@ -88,4 +90,8 @@
 		// TODO Auto-generated method stub
 
 	}
+
+    public XSLFCommonSlideData getCommonSlideData() {
+        return data;
+    }
 }

Modified: poi/trunk/src/ooxml/testcases/org/apache/poi/xslf/extractor/TestXSLFPowerPointExtractor.java
URL: http://svn.apache.org/viewvc/poi/trunk/src/ooxml/testcases/org/apache/poi/xslf/extractor/TestXSLFPowerPointExtractor.java?rev=897875&r1=897874&r2=897875&view=diff
==============================================================================
--- poi/trunk/src/ooxml/testcases/org/apache/poi/xslf/extractor/TestXSLFPowerPointExtractor.java (original)
+++ poi/trunk/src/ooxml/testcases/org/apache/poi/xslf/extractor/TestXSLFPowerPointExtractor.java Mon Jan 11 14:27:54 2010
@@ -113,4 +113,17 @@
 		// Check comments are there
 		assertTrue("Unable to find expected word in text\n" + text, text.contains("testdoc"));
 	}
+
+    public void testTable() throws Exception {
+        POIDataSamples slTests = POIDataSamples.getSlideShowInstance();
+        xmlA = new XSLFSlideShow(OPCPackage.open(slTests.openResourceAsStream("present1.pptx")));
+        XSLFPowerPointExtractor extractor =
+            new XSLFPowerPointExtractor(xmlA);
+
+        String text = extractor.getText();
+        assertTrue(text.length() > 0);
+
+        // Check comments are there
+        assertTrue("Unable to find expected word in text\n" + text, text.contains("TEST"));
+    }
 }

Added: poi/trunk/test-data/slideshow/present1.pptx
URL: http://svn.apache.org/viewvc/poi/trunk/test-data/slideshow/present1.pptx?rev=897875&view=auto
==============================================================================
Binary file - no diff available.

Propchange: poi/trunk/test-data/slideshow/present1.pptx
------------------------------------------------------------------------------
    svn:mime-type = application/octet-stream



---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@poi.apache.org
For additional commands, e-mail: commits-help@poi.apache.org