You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@poi.apache.org by ye...@apache.org on 2009/07/05 15:15:41 UTC

svn commit: r791241 - in /poi/trunk/src: documentation/content/xdocs/status.xml scratchpad/src/org/apache/poi/hslf/extractor/PowerPointExtractor.java scratchpad/testcases/org/apache/poi/hslf/extractor/TestExtractor.java

Author: yegor
Date: Sun Jul  5 13:15:41 2009
New Revision: 791241

URL: http://svn.apache.org/viewvc?rev=791241&view=rev
Log:
Support for getting OLE object data in PowerPointExtractor, see Bugzilla 47456

Modified:
    poi/trunk/src/documentation/content/xdocs/status.xml
    poi/trunk/src/scratchpad/src/org/apache/poi/hslf/extractor/PowerPointExtractor.java
    poi/trunk/src/scratchpad/testcases/org/apache/poi/hslf/extractor/TestExtractor.java

Modified: poi/trunk/src/documentation/content/xdocs/status.xml
URL: http://svn.apache.org/viewvc/poi/trunk/src/documentation/content/xdocs/status.xml?rev=791241&r1=791240&r2=791241&view=diff
==============================================================================
--- poi/trunk/src/documentation/content/xdocs/status.xml (original)
+++ poi/trunk/src/documentation/content/xdocs/status.xml Sun Jul  5 13:15:41 2009
@@ -33,6 +33,7 @@
 
     <changes>
         <release version="3.5-beta7" date="2009-??-??">
+           <action dev="POI-DEVELOPERS" type="add">47456 - Support for getting OLE object data in PowerPointExtractor</action>
            <action dev="POI-DEVELOPERS" type="fix">47411 - Explicitly set the 1900 date system when creating XSSF workbooks</action>
            <action dev="POI-DEVELOPERS" type="add">47400 - Support fo text extraction of footnotes, endnotes and comments in HWPF</action>
            <action dev="POI-DEVELOPERS" type="fix">47415 - Fixed PageSettingsBlock to allow multiple PLS records</action>

Modified: poi/trunk/src/scratchpad/src/org/apache/poi/hslf/extractor/PowerPointExtractor.java
URL: http://svn.apache.org/viewvc/poi/trunk/src/scratchpad/src/org/apache/poi/hslf/extractor/PowerPointExtractor.java?rev=791241&r1=791240&r2=791241&view=diff
==============================================================================
--- poi/trunk/src/scratchpad/src/org/apache/poi/hslf/extractor/PowerPointExtractor.java (original)
+++ poi/trunk/src/scratchpad/src/org/apache/poi/hslf/extractor/PowerPointExtractor.java Sun Jul  5 13:15:41 2009
@@ -21,14 +21,12 @@
 import java.io.IOException;
 import java.io.InputStream;
 import java.util.HashSet;
+import java.util.List;
+import java.util.ArrayList;
 
 import org.apache.poi.POIOLE2TextExtractor;
 import org.apache.poi.hslf.HSLFSlideShow;
-import org.apache.poi.hslf.model.Comment;
-import org.apache.poi.hslf.model.HeadersFooters;
-import org.apache.poi.hslf.model.Notes;
-import org.apache.poi.hslf.model.Slide;
-import org.apache.poi.hslf.model.TextRun;
+import org.apache.poi.hslf.model.*;
 import org.apache.poi.hslf.usermodel.SlideShow;
 import org.apache.poi.poifs.filesystem.DirectoryNode;
 import org.apache.poi.poifs.filesystem.POIFSFileSystem;
@@ -151,7 +149,24 @@
 		return getText(false,true);
 	}
 
-  /**
+    public List<OLEShape> getOLEShapes() {
+        List<OLEShape> list = new ArrayList<OLEShape>();
+
+        for (int i = 0; i < _slides.length; i++) {
+            Slide slide = _slides[i];
+
+            Shape[] shapes = slide.getShapes();
+            for (int j = 0; j < shapes.length; j++) {
+                if (shapes[j] instanceof OLEShape) {
+                    list.add((OLEShape) shapes[j]);
+                }
+            }
+        }
+
+        return list;
+    }
+
+    /**
    * Fetches text from the slideshow, be it slide text or note text.
    * Because the final block of text in a TextRun normally have their
    *  last \n stripped, we add it back

Modified: poi/trunk/src/scratchpad/testcases/org/apache/poi/hslf/extractor/TestExtractor.java
URL: http://svn.apache.org/viewvc/poi/trunk/src/scratchpad/testcases/org/apache/poi/hslf/extractor/TestExtractor.java?rev=791241&r1=791240&r2=791241&view=diff
==============================================================================
--- poi/trunk/src/scratchpad/testcases/org/apache/poi/hslf/extractor/TestExtractor.java (original)
+++ poi/trunk/src/scratchpad/testcases/org/apache/poi/hslf/extractor/TestExtractor.java Sun Jul  5 13:15:41 2009
@@ -18,11 +18,16 @@
 package org.apache.poi.hslf.extractor;
 
 import java.io.FileInputStream;
+import java.io.InputStream;
+import java.util.List;
 
 import org.apache.poi.hslf.HSLFSlideShow;
+import org.apache.poi.hslf.model.OLEShape;
 import org.apache.poi.hslf.usermodel.SlideShow;
 import org.apache.poi.poifs.filesystem.DirectoryNode;
 import org.apache.poi.poifs.filesystem.POIFSFileSystem;
+import org.apache.poi.hssf.usermodel.HSSFWorkbook;
+import org.apache.poi.hwpf.HWPFDocument;
 
 import junit.framework.TestCase;
 
@@ -167,51 +172,30 @@
 
     /**
      * A powerpoint file with embeded powerpoint files
-     * TODO - figure out how to handle this, as ppt
-     *  appears to embed not as ole2 streams
      */
-    public void DISABLEDtestExtractFromOwnEmbeded() throws Exception {
-    	String filename3 = pdirname + "/ppt_with_embeded.ppt";
-    	POIFSFileSystem fs = new POIFSFileSystem(
-    			new FileInputStream(filename3)
-    	);
-    	HSLFSlideShow ss;
-    	
-    	DirectoryNode dirA = (DirectoryNode)
-    		fs.getRoot().getEntry("MBD0000A3B6");
-		DirectoryNode dirB = (DirectoryNode)
-			fs.getRoot().getEntry("MBD0000A3B3");
-		
-		assertNotNull(dirA.getEntry("PowerPoint Document"));
-		assertNotNull(dirB.getEntry("PowerPoint Document"));
-    	
-		// Check the first file
-    	ss = new HSLFSlideShow(dirA, fs);
-		ppe = new PowerPointExtractor(ss);
-		assertEquals("Sample PowerPoint file\nThis is the 1st file\nNot much too it\n",
-				ppe.getText(true, false)
-		);
-
-		// And the second
-    	ss = new HSLFSlideShow(dirB, fs);
-		ppe = new PowerPointExtractor(ss);
-		assertEquals("Sample PowerPoint file\nThis is the 2nd file\nNot much too it either\n",
-				ppe.getText(true, false)
-		);
-		
-		
-		// Check the master doc two ways
-    	ss = new HSLFSlideShow(fs.getRoot(), fs);
-		ppe = new PowerPointExtractor(ss);
-		assertEquals("I have embeded files in me\n",
-				ppe.getText(true, false)
-		);
-		
-    	ss = new HSLFSlideShow(fs);
-		ppe = new PowerPointExtractor(ss);
-		assertEquals("I have embeded files in me\n",
-				ppe.getText(true, false)
-		);
+    public void testExtractFromOwnEmbeded() throws Exception {
+    	String path = pdirname + "/ppt_with_embeded.ppt";
+		ppe = new PowerPointExtractor(path);
+        List<OLEShape> shapes = ppe.getOLEShapes();
+        assertEquals("Expected 6 ole shapes in " + path, 6, shapes.size());
+        int num_ppt = 0, num_doc = 0, num_xls = 0;
+        for(OLEShape ole : shapes) {
+            String name = ole.getInstanceName();
+            InputStream data = ole.getObjectData().getData();
+            if ("Worksheet".equals(name)) {
+                HSSFWorkbook wb = new HSSFWorkbook(data);
+                num_xls++;
+            } else if ("Document".equals(name)) {
+                HWPFDocument doc = new HWPFDocument(data);
+                num_doc++;
+            } else if ("Presentation".equals(name)) {
+                num_ppt++;
+                SlideShow ppt = new SlideShow(data);
+            }
+        }
+        assertEquals("Expected 2 embedded Word Documents", 2, num_doc);
+        assertEquals("Expected 2 embedded Excel Spreadsheets", 2, num_xls);
+        assertEquals("Expected 2 embedded PowerPoint Presentations", 2, num_ppt);
     }
     
     /**



---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@poi.apache.org
For additional commands, e-mail: commits-help@poi.apache.org