You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@poi.apache.org by ye...@apache.org on 2009/07/05 15:15:41 UTC
svn commit: r791241 - in /poi/trunk/src:
documentation/content/xdocs/status.xml
scratchpad/src/org/apache/poi/hslf/extractor/PowerPointExtractor.java
scratchpad/testcases/org/apache/poi/hslf/extractor/TestExtractor.java
Author: yegor
Date: Sun Jul 5 13:15:41 2009
New Revision: 791241
URL: http://svn.apache.org/viewvc?rev=791241&view=rev
Log:
Support for getting OLE object data in PowerPointExtractor, see Bugzilla 47456
Modified:
poi/trunk/src/documentation/content/xdocs/status.xml
poi/trunk/src/scratchpad/src/org/apache/poi/hslf/extractor/PowerPointExtractor.java
poi/trunk/src/scratchpad/testcases/org/apache/poi/hslf/extractor/TestExtractor.java
Modified: poi/trunk/src/documentation/content/xdocs/status.xml
URL: http://svn.apache.org/viewvc/poi/trunk/src/documentation/content/xdocs/status.xml?rev=791241&r1=791240&r2=791241&view=diff
==============================================================================
--- poi/trunk/src/documentation/content/xdocs/status.xml (original)
+++ poi/trunk/src/documentation/content/xdocs/status.xml Sun Jul 5 13:15:41 2009
@@ -33,6 +33,7 @@
<changes>
<release version="3.5-beta7" date="2009-??-??">
+ <action dev="POI-DEVELOPERS" type="add">47456 - Support for getting OLE object data in PowerPointExtractor</action>
<action dev="POI-DEVELOPERS" type="fix">47411 - Explicitly set the 1900 date system when creating XSSF workbooks</action>
<action dev="POI-DEVELOPERS" type="add">47400 - Support fo text extraction of footnotes, endnotes and comments in HWPF</action>
<action dev="POI-DEVELOPERS" type="fix">47415 - Fixed PageSettingsBlock to allow multiple PLS records</action>
Modified: poi/trunk/src/scratchpad/src/org/apache/poi/hslf/extractor/PowerPointExtractor.java
URL: http://svn.apache.org/viewvc/poi/trunk/src/scratchpad/src/org/apache/poi/hslf/extractor/PowerPointExtractor.java?rev=791241&r1=791240&r2=791241&view=diff
==============================================================================
--- poi/trunk/src/scratchpad/src/org/apache/poi/hslf/extractor/PowerPointExtractor.java (original)
+++ poi/trunk/src/scratchpad/src/org/apache/poi/hslf/extractor/PowerPointExtractor.java Sun Jul 5 13:15:41 2009
@@ -21,14 +21,12 @@
import java.io.IOException;
import java.io.InputStream;
import java.util.HashSet;
+import java.util.List;
+import java.util.ArrayList;
import org.apache.poi.POIOLE2TextExtractor;
import org.apache.poi.hslf.HSLFSlideShow;
-import org.apache.poi.hslf.model.Comment;
-import org.apache.poi.hslf.model.HeadersFooters;
-import org.apache.poi.hslf.model.Notes;
-import org.apache.poi.hslf.model.Slide;
-import org.apache.poi.hslf.model.TextRun;
+import org.apache.poi.hslf.model.*;
import org.apache.poi.hslf.usermodel.SlideShow;
import org.apache.poi.poifs.filesystem.DirectoryNode;
import org.apache.poi.poifs.filesystem.POIFSFileSystem;
@@ -151,7 +149,24 @@
return getText(false,true);
}
- /**
+ public List<OLEShape> getOLEShapes() {
+ List<OLEShape> list = new ArrayList<OLEShape>();
+
+ for (int i = 0; i < _slides.length; i++) {
+ Slide slide = _slides[i];
+
+ Shape[] shapes = slide.getShapes();
+ for (int j = 0; j < shapes.length; j++) {
+ if (shapes[j] instanceof OLEShape) {
+ list.add((OLEShape) shapes[j]);
+ }
+ }
+ }
+
+ return list;
+ }
+
+ /**
* Fetches text from the slideshow, be it slide text or note text.
* Because the final block of text in a TextRun normally have their
* last \n stripped, we add it back
Modified: poi/trunk/src/scratchpad/testcases/org/apache/poi/hslf/extractor/TestExtractor.java
URL: http://svn.apache.org/viewvc/poi/trunk/src/scratchpad/testcases/org/apache/poi/hslf/extractor/TestExtractor.java?rev=791241&r1=791240&r2=791241&view=diff
==============================================================================
--- poi/trunk/src/scratchpad/testcases/org/apache/poi/hslf/extractor/TestExtractor.java (original)
+++ poi/trunk/src/scratchpad/testcases/org/apache/poi/hslf/extractor/TestExtractor.java Sun Jul 5 13:15:41 2009
@@ -18,11 +18,16 @@
package org.apache.poi.hslf.extractor;
import java.io.FileInputStream;
+import java.io.InputStream;
+import java.util.List;
import org.apache.poi.hslf.HSLFSlideShow;
+import org.apache.poi.hslf.model.OLEShape;
import org.apache.poi.hslf.usermodel.SlideShow;
import org.apache.poi.poifs.filesystem.DirectoryNode;
import org.apache.poi.poifs.filesystem.POIFSFileSystem;
+import org.apache.poi.hssf.usermodel.HSSFWorkbook;
+import org.apache.poi.hwpf.HWPFDocument;
import junit.framework.TestCase;
@@ -167,51 +172,30 @@
/**
* A powerpoint file with embeded powerpoint files
- * TODO - figure out how to handle this, as ppt
- * appears to embed not as ole2 streams
*/
- public void DISABLEDtestExtractFromOwnEmbeded() throws Exception {
- String filename3 = pdirname + "/ppt_with_embeded.ppt";
- POIFSFileSystem fs = new POIFSFileSystem(
- new FileInputStream(filename3)
- );
- HSLFSlideShow ss;
-
- DirectoryNode dirA = (DirectoryNode)
- fs.getRoot().getEntry("MBD0000A3B6");
- DirectoryNode dirB = (DirectoryNode)
- fs.getRoot().getEntry("MBD0000A3B3");
-
- assertNotNull(dirA.getEntry("PowerPoint Document"));
- assertNotNull(dirB.getEntry("PowerPoint Document"));
-
- // Check the first file
- ss = new HSLFSlideShow(dirA, fs);
- ppe = new PowerPointExtractor(ss);
- assertEquals("Sample PowerPoint file\nThis is the 1st file\nNot much too it\n",
- ppe.getText(true, false)
- );
-
- // And the second
- ss = new HSLFSlideShow(dirB, fs);
- ppe = new PowerPointExtractor(ss);
- assertEquals("Sample PowerPoint file\nThis is the 2nd file\nNot much too it either\n",
- ppe.getText(true, false)
- );
-
-
- // Check the master doc two ways
- ss = new HSLFSlideShow(fs.getRoot(), fs);
- ppe = new PowerPointExtractor(ss);
- assertEquals("I have embeded files in me\n",
- ppe.getText(true, false)
- );
-
- ss = new HSLFSlideShow(fs);
- ppe = new PowerPointExtractor(ss);
- assertEquals("I have embeded files in me\n",
- ppe.getText(true, false)
- );
+ public void testExtractFromOwnEmbeded() throws Exception {
+ String path = pdirname + "/ppt_with_embeded.ppt";
+ ppe = new PowerPointExtractor(path);
+ List<OLEShape> shapes = ppe.getOLEShapes();
+ assertEquals("Expected 6 ole shapes in " + path, 6, shapes.size());
+ int num_ppt = 0, num_doc = 0, num_xls = 0;
+ for(OLEShape ole : shapes) {
+ String name = ole.getInstanceName();
+ InputStream data = ole.getObjectData().getData();
+ if ("Worksheet".equals(name)) {
+ HSSFWorkbook wb = new HSSFWorkbook(data);
+ num_xls++;
+ } else if ("Document".equals(name)) {
+ HWPFDocument doc = new HWPFDocument(data);
+ num_doc++;
+ } else if ("Presentation".equals(name)) {
+ num_ppt++;
+ SlideShow ppt = new SlideShow(data);
+ }
+ }
+ assertEquals("Expected 2 embedded Word Documents", 2, num_doc);
+ assertEquals("Expected 2 embedded Excel Spreadsheets", 2, num_xls);
+ assertEquals("Expected 2 embedded PowerPoint Presentations", 2, num_ppt);
}
/**
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@poi.apache.org
For additional commands, e-mail: commits-help@poi.apache.org