You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@poi.apache.org by ma...@apache.org on 2010/01/11 15:27:55 UTC
svn commit: r897875 - in /poi/trunk:
src/ooxml/java/org/apache/poi/xslf/extractor/
src/ooxml/java/org/apache/poi/xslf/usermodel/
src/ooxml/testcases/org/apache/poi/xslf/extractor/ test-data/slideshow/
Author: maxcom
Date: Mon Jan 11 14:27:54 2010
New Revision: 897875
URL: http://svn.apache.org/viewvc?rev=897875&view=rev
Log:
XSLF: text extraction from tables
Added:
poi/trunk/src/ooxml/java/org/apache/poi/xslf/usermodel/DrawingParagraph.java
poi/trunk/src/ooxml/java/org/apache/poi/xslf/usermodel/DrawingTable.java
poi/trunk/src/ooxml/java/org/apache/poi/xslf/usermodel/DrawingTableCell.java
poi/trunk/src/ooxml/java/org/apache/poi/xslf/usermodel/DrawingTableRow.java
poi/trunk/src/ooxml/java/org/apache/poi/xslf/usermodel/DrawingTextBody.java
poi/trunk/src/ooxml/java/org/apache/poi/xslf/usermodel/XSLFCommonSlideData.java
poi/trunk/test-data/slideshow/present1.pptx (with props)
Modified:
poi/trunk/src/ooxml/java/org/apache/poi/xslf/extractor/XSLFPowerPointExtractor.java
poi/trunk/src/ooxml/java/org/apache/poi/xslf/usermodel/XSLFSlide.java
poi/trunk/src/ooxml/testcases/org/apache/poi/xslf/extractor/TestXSLFPowerPointExtractor.java
Modified: poi/trunk/src/ooxml/java/org/apache/poi/xslf/extractor/XSLFPowerPointExtractor.java
URL: http://svn.apache.org/viewvc/poi/trunk/src/ooxml/java/org/apache/poi/xslf/extractor/XSLFPowerPointExtractor.java?rev=897875&r1=897874&r2=897875&view=diff
==============================================================================
--- poi/trunk/src/ooxml/java/org/apache/poi/xslf/extractor/XSLFPowerPointExtractor.java (original)
+++ poi/trunk/src/ooxml/java/org/apache/poi/xslf/extractor/XSLFPowerPointExtractor.java Mon Jan 11 14:27:54 2010
@@ -16,28 +16,18 @@
==================================================================== */
package org.apache.poi.xslf.extractor;
-import java.io.IOException;
-
import org.apache.poi.POIXMLTextExtractor;
import org.apache.poi.openxml4j.exceptions.OpenXML4JException;
import org.apache.poi.openxml4j.opc.OPCPackage;
import org.apache.poi.xslf.XSLFSlideShow;
+import org.apache.poi.xslf.usermodel.DrawingParagraph;
import org.apache.poi.xslf.usermodel.XMLSlideShow;
+import org.apache.poi.xslf.usermodel.XSLFCommonSlideData;
import org.apache.poi.xslf.usermodel.XSLFSlide;
import org.apache.xmlbeans.XmlException;
-import org.apache.xmlbeans.XmlObject;
-import org.apache.xmlbeans.XmlCursor;
-import org.openxmlformats.schemas.drawingml.x2006.main.CTRegularTextRun;
-import org.openxmlformats.schemas.drawingml.x2006.main.CTTextBody;
-import org.openxmlformats.schemas.drawingml.x2006.main.CTTextParagraph;
-import org.openxmlformats.schemas.drawingml.x2006.main.CTTextLineBreak;
-import org.openxmlformats.schemas.presentationml.x2006.main.CTComment;
-import org.openxmlformats.schemas.presentationml.x2006.main.CTCommentList;
-import org.openxmlformats.schemas.presentationml.x2006.main.CTGroupShape;
-import org.openxmlformats.schemas.presentationml.x2006.main.CTNotesSlide;
-import org.openxmlformats.schemas.presentationml.x2006.main.CTShape;
-import org.openxmlformats.schemas.presentationml.x2006.main.CTSlide;
-import org.openxmlformats.schemas.presentationml.x2006.main.CTSlideIdListEntry;
+import org.openxmlformats.schemas.presentationml.x2006.main.*;
+
+import java.io.IOException;
public class XSLFPowerPointExtractor extends POIXMLTextExtractor {
private XMLSlideShow slideshow;
@@ -110,7 +100,7 @@
slideshow._getXSLFSlideShow().getSlideComments(slideId);
if(slideText) {
- extractText(rawSlide.getCSld().getSpTree(), text);
+ extractText(slides[i].getCommonSlideData(), text);
// Comments too for the slide
if(comments != null) {
@@ -123,8 +113,9 @@
}
}
}
+
if(notesText && notes != null) {
- extractText(notes.getCSld().getSpTree(), text);
+ extractText(new XSLFCommonSlideData(notes.getCSld()), text);
}
} catch(Exception e) {
throw new RuntimeException(e);
@@ -134,31 +125,10 @@
return text.toString();
}
- private void extractText(CTGroupShape gs, StringBuffer text) {
- CTShape[] shapes = gs.getSpArray();
- for (int i = 0; i < shapes.length; i++) {
- CTTextBody textBody =
- shapes[i].getTxBody();
- if(textBody != null) {
- CTTextParagraph[] paras =
- textBody.getPArray();
- for (int j = 0; j < paras.length; j++) {
- XmlCursor c = paras[j].newCursor();
- c.selectPath("./*");
- while (c.toNextSelection()) {
- XmlObject o = c.getObject();
- if(o instanceof CTRegularTextRun){
- CTRegularTextRun txrun = (CTRegularTextRun)o;
- text.append( txrun.getT() );
- } else if (o instanceof CTTextLineBreak){
- text.append('\n');
- }
- }
-
- // End each paragraph with a new line
- text.append("\n");
- }
- }
- }
- }
+ private void extractText(XSLFCommonSlideData data, StringBuffer text) {
+ for (DrawingParagraph p : data.getText()) {
+ text.append(p.getText());
+ text.append("\n");
+ }
+ }
}
Added: poi/trunk/src/ooxml/java/org/apache/poi/xslf/usermodel/DrawingParagraph.java
URL: http://svn.apache.org/viewvc/poi/trunk/src/ooxml/java/org/apache/poi/xslf/usermodel/DrawingParagraph.java?rev=897875&view=auto
==============================================================================
--- poi/trunk/src/ooxml/java/org/apache/poi/xslf/usermodel/DrawingParagraph.java (added)
+++ poi/trunk/src/ooxml/java/org/apache/poi/xslf/usermodel/DrawingParagraph.java Mon Jan 11 14:27:54 2010
@@ -0,0 +1,33 @@
+package org.apache.poi.xslf.usermodel;
+
+import org.openxmlformats.schemas.drawingml.x2006.main.CTTextParagraph;
+import org.openxmlformats.schemas.drawingml.x2006.main.CTRegularTextRun;
+import org.openxmlformats.schemas.drawingml.x2006.main.CTTextLineBreak;
+import org.apache.xmlbeans.XmlCursor;
+import org.apache.xmlbeans.XmlObject;
+
+public class DrawingParagraph {
+ private final CTTextParagraph p;
+
+ public DrawingParagraph(CTTextParagraph p) {
+ this.p = p;
+ }
+
+ public CharSequence getText() {
+ StringBuilder text = new StringBuilder();
+
+ XmlCursor c = p.newCursor();
+ c.selectPath("./*");
+ while (c.toNextSelection()) {
+ XmlObject o = c.getObject();
+ if (o instanceof CTRegularTextRun) {
+ CTRegularTextRun txrun = (CTRegularTextRun) o;
+ text.append(txrun.getT());
+ } else if (o instanceof CTTextLineBreak) {
+ text.append('\n');
+ }
+ }
+
+ return text;
+ }
+}
Added: poi/trunk/src/ooxml/java/org/apache/poi/xslf/usermodel/DrawingTable.java
URL: http://svn.apache.org/viewvc/poi/trunk/src/ooxml/java/org/apache/poi/xslf/usermodel/DrawingTable.java?rev=897875&view=auto
==============================================================================
--- poi/trunk/src/ooxml/java/org/apache/poi/xslf/usermodel/DrawingTable.java (added)
+++ poi/trunk/src/ooxml/java/org/apache/poi/xslf/usermodel/DrawingTable.java Mon Jan 11 14:27:54 2010
@@ -0,0 +1,23 @@
+package org.apache.poi.xslf.usermodel;
+
+import org.openxmlformats.schemas.drawingml.x2006.main.CTTable;
+import org.openxmlformats.schemas.drawingml.x2006.main.CTTableRow;
+
+public class DrawingTable {
+ private final CTTable table;
+
+ public DrawingTable(CTTable table) {
+ this.table = table;
+ }
+
+ public DrawingTableRow[] getRows() {
+ CTTableRow[] ctTableRows = table.getTrArray();
+ DrawingTableRow[] o = new DrawingTableRow[ctTableRows.length];
+
+ for (int i=0; i<o.length; i++) {
+ o[i] = new DrawingTableRow(ctTableRows[i]);
+ }
+
+ return o;
+ }
+}
Added: poi/trunk/src/ooxml/java/org/apache/poi/xslf/usermodel/DrawingTableCell.java
URL: http://svn.apache.org/viewvc/poi/trunk/src/ooxml/java/org/apache/poi/xslf/usermodel/DrawingTableCell.java?rev=897875&view=auto
==============================================================================
--- poi/trunk/src/ooxml/java/org/apache/poi/xslf/usermodel/DrawingTableCell.java (added)
+++ poi/trunk/src/ooxml/java/org/apache/poi/xslf/usermodel/DrawingTableCell.java Mon Jan 11 14:27:54 2010
@@ -0,0 +1,17 @@
+package org.apache.poi.xslf.usermodel;
+
+import org.openxmlformats.schemas.drawingml.x2006.main.CTTableCell;
+
+public class DrawingTableCell {
+ private final CTTableCell cell;
+ private final DrawingTextBody drawingTextBody;
+
+ public DrawingTableCell(CTTableCell cell) {
+ this.cell = cell;
+ drawingTextBody = new DrawingTextBody(this.cell.getTxBody());
+ }
+
+ public DrawingTextBody getTextBody() {
+ return drawingTextBody;
+ }
+}
Added: poi/trunk/src/ooxml/java/org/apache/poi/xslf/usermodel/DrawingTableRow.java
URL: http://svn.apache.org/viewvc/poi/trunk/src/ooxml/java/org/apache/poi/xslf/usermodel/DrawingTableRow.java?rev=897875&view=auto
==============================================================================
--- poi/trunk/src/ooxml/java/org/apache/poi/xslf/usermodel/DrawingTableRow.java (added)
+++ poi/trunk/src/ooxml/java/org/apache/poi/xslf/usermodel/DrawingTableRow.java Mon Jan 11 14:27:54 2010
@@ -0,0 +1,23 @@
+package org.apache.poi.xslf.usermodel;
+
+import org.openxmlformats.schemas.drawingml.x2006.main.CTTableRow;
+import org.openxmlformats.schemas.drawingml.x2006.main.CTTableCell;
+
+public class DrawingTableRow {
+ private final CTTableRow row;
+
+ public DrawingTableRow(CTTableRow row) {
+ this.row = row;
+ }
+
+ public DrawingTableCell[] getCells() {
+ CTTableCell[] ctTableCells = row.getTcArray();
+ DrawingTableCell[] o = new DrawingTableCell[ctTableCells.length];
+
+ for (int i=0; i<o.length; i++) {
+ o[i] = new DrawingTableCell(ctTableCells[i]);
+ }
+
+ return o;
+ }
+}
\ No newline at end of file
Added: poi/trunk/src/ooxml/java/org/apache/poi/xslf/usermodel/DrawingTextBody.java
URL: http://svn.apache.org/viewvc/poi/trunk/src/ooxml/java/org/apache/poi/xslf/usermodel/DrawingTextBody.java?rev=897875&view=auto
==============================================================================
--- poi/trunk/src/ooxml/java/org/apache/poi/xslf/usermodel/DrawingTextBody.java (added)
+++ poi/trunk/src/ooxml/java/org/apache/poi/xslf/usermodel/DrawingTextBody.java Mon Jan 11 14:27:54 2010
@@ -0,0 +1,23 @@
+package org.apache.poi.xslf.usermodel;
+
+import org.openxmlformats.schemas.drawingml.x2006.main.CTTextBody;
+import org.openxmlformats.schemas.drawingml.x2006.main.CTTextParagraph;
+
+public class DrawingTextBody {
+ private final CTTextBody textBody;
+
+ public DrawingTextBody(CTTextBody textBody) {
+ this.textBody = textBody;
+ }
+
+ public DrawingParagraph[] getParagraphs() {
+ CTTextParagraph[] pArray = textBody.getPArray();
+ DrawingParagraph[] o = new DrawingParagraph[pArray.length];
+
+ for (int i=0; i<o.length; i++) {
+ o[i] = new DrawingParagraph(pArray[i]);
+ }
+
+ return o;
+ }
+}
Added: poi/trunk/src/ooxml/java/org/apache/poi/xslf/usermodel/XSLFCommonSlideData.java
URL: http://svn.apache.org/viewvc/poi/trunk/src/ooxml/java/org/apache/poi/xslf/usermodel/XSLFCommonSlideData.java?rev=897875&view=auto
==============================================================================
--- poi/trunk/src/ooxml/java/org/apache/poi/xslf/usermodel/XSLFCommonSlideData.java (added)
+++ poi/trunk/src/ooxml/java/org/apache/poi/xslf/usermodel/XSLFCommonSlideData.java Mon Jan 11 14:27:54 2010
@@ -0,0 +1,67 @@
+package org.apache.poi.xslf.usermodel;
+
+import org.apache.xmlbeans.XmlCursor;
+import org.apache.xmlbeans.XmlObject;
+import org.openxmlformats.schemas.drawingml.x2006.main.CTGraphicalObjectData;
+import org.openxmlformats.schemas.drawingml.x2006.main.CTTable;
+import org.openxmlformats.schemas.drawingml.x2006.main.CTTextBody;
+import org.openxmlformats.schemas.presentationml.x2006.main.CTCommonSlideData;
+import org.openxmlformats.schemas.presentationml.x2006.main.CTGraphicalObjectFrame;
+import org.openxmlformats.schemas.presentationml.x2006.main.CTGroupShape;
+import org.openxmlformats.schemas.presentationml.x2006.main.CTShape;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+
+public class XSLFCommonSlideData {
+ private final CTCommonSlideData data;
+
+ public XSLFCommonSlideData(CTCommonSlideData data) {
+ this.data = data;
+ }
+
+ public List<DrawingParagraph> getText() {
+ CTGroupShape gs = data.getSpTree();
+
+ List<DrawingParagraph> out = new ArrayList<DrawingParagraph>();
+
+ CTShape[] shapes = gs.getSpArray();
+ for (int i = 0; i < shapes.length; i++) {
+ CTTextBody ctTextBody = shapes[i].getTxBody();
+ if (ctTextBody==null) {
+ continue;
+ }
+
+ DrawingTextBody textBody = new DrawingTextBody(ctTextBody);
+
+ out.addAll(Arrays.asList(textBody.getParagraphs()));
+ }
+
+ CTGraphicalObjectFrame[] graphicFrames = gs.getGraphicFrameArray();
+ for (CTGraphicalObjectFrame frame: graphicFrames) {
+ CTGraphicalObjectData data = frame.getGraphic().getGraphicData();
+ XmlCursor c = data.newCursor();
+ c.selectPath("./*");
+
+ while (c.toNextSelection()) {
+ XmlObject o = c.getObject();
+
+ if (o instanceof CTTable) {
+ DrawingTable table = new DrawingTable((CTTable) o);
+
+ for (DrawingTableRow row : table.getRows()) {
+ for (DrawingTableCell cell : row.getCells()) {
+ DrawingTextBody textBody = cell.getTextBody();
+
+ out.addAll(Arrays.asList(textBody.getParagraphs()));
+ }
+ }
+ }
+ }
+ }
+
+ return out;
+ }
+
+}
Modified: poi/trunk/src/ooxml/java/org/apache/poi/xslf/usermodel/XSLFSlide.java
URL: http://svn.apache.org/viewvc/poi/trunk/src/ooxml/java/org/apache/poi/xslf/usermodel/XSLFSlide.java?rev=897875&r1=897874&r2=897875&view=diff
==============================================================================
--- poi/trunk/src/ooxml/java/org/apache/poi/xslf/usermodel/XSLFSlide.java (original)
+++ poi/trunk/src/ooxml/java/org/apache/poi/xslf/usermodel/XSLFSlide.java Mon Jan 11 14:27:54 2010
@@ -26,11 +26,13 @@
public class XSLFSlide extends XSLFSheet implements Slide {
private CTSlide slide;
private CTSlideIdListEntry slideId;
+ private XSLFCommonSlideData data;
public XSLFSlide(CTSlide slide, CTSlideIdListEntry slideId, SlideShow parent) {
super(parent);
this.slide = slide;
this.slideId = slideId;
+ this.data = new XSLFCommonSlideData(slide.getCSld());
}
/**
@@ -88,4 +90,8 @@
// TODO Auto-generated method stub
}
+
+ public XSLFCommonSlideData getCommonSlideData() {
+ return data;
+ }
}
Modified: poi/trunk/src/ooxml/testcases/org/apache/poi/xslf/extractor/TestXSLFPowerPointExtractor.java
URL: http://svn.apache.org/viewvc/poi/trunk/src/ooxml/testcases/org/apache/poi/xslf/extractor/TestXSLFPowerPointExtractor.java?rev=897875&r1=897874&r2=897875&view=diff
==============================================================================
--- poi/trunk/src/ooxml/testcases/org/apache/poi/xslf/extractor/TestXSLFPowerPointExtractor.java (original)
+++ poi/trunk/src/ooxml/testcases/org/apache/poi/xslf/extractor/TestXSLFPowerPointExtractor.java Mon Jan 11 14:27:54 2010
@@ -113,4 +113,17 @@
// Check comments are there
assertTrue("Unable to find expected word in text\n" + text, text.contains("testdoc"));
}
+
+ public void testTable() throws Exception {
+ POIDataSamples slTests = POIDataSamples.getSlideShowInstance();
+ xmlA = new XSLFSlideShow(OPCPackage.open(slTests.openResourceAsStream("present1.pptx")));
+ XSLFPowerPointExtractor extractor =
+ new XSLFPowerPointExtractor(xmlA);
+
+ String text = extractor.getText();
+ assertTrue(text.length() > 0);
+
+ // Check comments are there
+ assertTrue("Unable to find expected word in text\n" + text, text.contains("TEST"));
+ }
}
Added: poi/trunk/test-data/slideshow/present1.pptx
URL: http://svn.apache.org/viewvc/poi/trunk/test-data/slideshow/present1.pptx?rev=897875&view=auto
==============================================================================
Binary file - no diff available.
Propchange: poi/trunk/test-data/slideshow/present1.pptx
------------------------------------------------------------------------------
svn:mime-type = application/octet-stream
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@poi.apache.org
For additional commands, e-mail: commits-help@poi.apache.org