You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@poi.apache.org by ye...@apache.org on 2009/11/12 08:07:41 UTC

svn commit: r835271 - in /poi/trunk: src/documentation/content/xdocs/ src/scratchpad/src/org/apache/poi/hslf/extractor/ src/scratchpad/testcases/org/apache/poi/hslf/extractor/ test-data/slideshow/

Author: yegor
Date: Thu Nov 12 07:07:41 2009
New Revision: 835271

URL: http://svn.apache.org/viewvc?rev=835271&view=rev
Log:
support for text extraction from PPT master slides, see Bugzilla 48161

Added:
    poi/trunk/test-data/slideshow/master_text.ppt   (with props)
Modified:
    poi/trunk/src/documentation/content/xdocs/status.xml
    poi/trunk/src/scratchpad/src/org/apache/poi/hslf/extractor/PowerPointExtractor.java
    poi/trunk/src/scratchpad/testcases/org/apache/poi/hslf/extractor/TestExtractor.java

Modified: poi/trunk/src/documentation/content/xdocs/status.xml
URL: http://svn.apache.org/viewvc/poi/trunk/src/documentation/content/xdocs/status.xml?rev=835271&r1=835270&r2=835271&view=diff
==============================================================================
--- poi/trunk/src/documentation/content/xdocs/status.xml (original)
+++ poi/trunk/src/documentation/content/xdocs/status.xml Thu Nov 12 07:07:41 2009
@@ -34,6 +34,7 @@
 
     <changes>
         <release version="3.6-beta1" date="2009-??-??">
+           <action dev="POI-DEVELOPERS" type="add">48161 - support for text extraction from PPT master slides</action>
            <action dev="POI-DEVELOPERS" type="add">47970 - added a method to set arabic mode in HSSFSheet</action>
            <action dev="POI-DEVELOPERS" type="fix">48134 - release system resources when using Picture.resize()</action>
            <action dev="POI-DEVELOPERS" type="fix">48087 - avoid NPE in XSSFChartSheet  when calling methods of the superclass</action>

Modified: poi/trunk/src/scratchpad/src/org/apache/poi/hslf/extractor/PowerPointExtractor.java
URL: http://svn.apache.org/viewvc/poi/trunk/src/scratchpad/src/org/apache/poi/hslf/extractor/PowerPointExtractor.java?rev=835271&r1=835270&r2=835271&view=diff
==============================================================================
--- poi/trunk/src/scratchpad/src/org/apache/poi/hslf/extractor/PowerPointExtractor.java (original)
+++ poi/trunk/src/scratchpad/src/org/apache/poi/hslf/extractor/PowerPointExtractor.java Thu Nov 12 07:07:41 2009
@@ -45,6 +45,7 @@
 	private boolean _slidesByDefault = true;
 	private boolean _notesByDefault = false;
 	private boolean _commentsByDefault = false;
+    private boolean _masterByDefault = false;
 
 	/**
 	 * Basic extractor. Returns all the text, and optionally all the notes
@@ -58,6 +59,8 @@
 
 		boolean notes = false;
 		boolean comments = false;
+        boolean master = true;
+        
 		String file;
 		if (args.length > 1) {
 			notes = true;
@@ -70,7 +73,7 @@
 		}
 
 		PowerPointExtractor ppe = new PowerPointExtractor(file);
-		System.out.println(ppe.getText(true, notes, comments));
+		System.out.println(ppe.getText(true, notes, comments, master));
 	}
 
 	/**
@@ -137,12 +140,19 @@
 		this._commentsByDefault = commentsByDefault;
 	}
 
+    /**
+     * Should a call to getText() return text from master? Default is no
+     */
+    public void setMasterByDefault(boolean masterByDefault) {
+        this._masterByDefault = masterByDefault;
+    }
+
 	/**
 	 * Fetches all the slide text from the slideshow, but not the notes, unless
 	 * you've called setSlidesByDefault() and setNotesByDefault() to change this
 	 */
 	public String getText() {
-		return getText(_slidesByDefault, _notesByDefault, _commentsByDefault);
+		return getText(_slidesByDefault, _notesByDefault, _commentsByDefault, _masterByDefault);
 	}
 
 	/**
@@ -178,14 +188,20 @@
 	 * @param getNoteText fetch note text
 	 */
 	public String getText(boolean getSlideText, boolean getNoteText) {
-		return getText(getSlideText, getNoteText, _commentsByDefault);
+		return getText(getSlideText, getNoteText, _commentsByDefault, _masterByDefault);
 	}
 
-	public String getText(boolean getSlideText, boolean getNoteText, boolean getCommentText) {
+	public String getText(boolean getSlideText, boolean getNoteText, boolean getCommentText, boolean getMasterText) {
 		StringBuffer ret = new StringBuffer();
 
 		if (getSlideText) {
-			for (int i = 0; i < _slides.length; i++) {
+            if (getMasterText) {
+                for (SlideMaster master : _show.getSlidesMasters()) {
+                    textRunsToText(ret, master.getTextRuns());
+                }
+            }
+
+            for (int i = 0; i < _slides.length; i++) {
 				Slide slide = _slides[i];
 
 				// Slide header, if set
@@ -195,19 +211,9 @@
 				}
 
 				// Slide text
-				TextRun[] runs = slide.getTextRuns();
-				for (int j = 0; j < runs.length; j++) {
-					TextRun run = runs[j];
-					if (run != null) {
-						String text = run.getText();
-						ret.append(text);
-						if (!text.endsWith("\n")) {
-							ret.append("\n");
-						}
-					}
-				}
+                textRunsToText(ret, slide.getTextRuns());
 
-				// Slide footer, if set
+                // Slide footer, if set
 				if (hf != null && hf.isFooterVisible() && hf.getFooterText() != null) {
 					ret.append(hf.getFooterText() + "\n");
 				}
@@ -249,17 +255,7 @@
 				}
 
 				// Notes text
-				TextRun[] runs = notes.getTextRuns();
-				if (runs != null && runs.length > 0) {
-					for (int j = 0; j < runs.length; j++) {
-						TextRun run = runs[j];
-						String text = run.getText();
-						ret.append(text);
-						if (!text.endsWith("\n")) {
-							ret.append("\n");
-						}
-					}
-				}
+                textRunsToText(ret, notes.getTextRuns());
 
 				// Repeat the notes footer, if set
 				if (hf != null && hf.isFooterVisible() && hf.getFooterText() != null) {
@@ -270,4 +266,21 @@
 
 		return ret.toString();
 	}
+
+    private void textRunsToText(StringBuffer ret, TextRun[] runs) {
+        if (runs==null) {
+            return;
+        }
+
+        for (int j = 0; j < runs.length; j++) {
+            TextRun run = runs[j];
+            if (run != null) {
+                String text = run.getText();
+                ret.append(text);
+                if (!text.endsWith("\n")) {
+                    ret.append("\n");
+                }
+            }
+        }
+    }
 }

Modified: poi/trunk/src/scratchpad/testcases/org/apache/poi/hslf/extractor/TestExtractor.java
URL: http://svn.apache.org/viewvc/poi/trunk/src/scratchpad/testcases/org/apache/poi/hslf/extractor/TestExtractor.java?rev=835271&r1=835270&r2=835271&view=diff
==============================================================================
--- poi/trunk/src/scratchpad/testcases/org/apache/poi/hslf/extractor/TestExtractor.java (original)
+++ poi/trunk/src/scratchpad/testcases/org/apache/poi/hslf/extractor/TestExtractor.java Thu Nov 12 07:07:41 2009
@@ -48,7 +48,6 @@
     //private String pdirname;
 
     protected void setUp() throws Exception {
-
 		ppe = new PowerPointExtractor(slTests.openResourceAsStream("basic_test_ppt_file.ppt"));
 		ppe2 = new PowerPointExtractor(slTests.openResourceAsStream("with_textbox.ppt"));
     }
@@ -63,7 +62,7 @@
 		
 		// 1 page example with text boxes
 		sheetText = ppe2.getText();
-		expectText = "Hello, World!!!\nI am just a poor boy\nThis is Times New Roman\nPlain Text \n"; 
+		expectText = "Hello, World!!!\nI am just a poor boy\nThis is Times New Roman\nPlain Text \n";
 
 		ensureTwoStringsTheSame(expectText, sheetText);
     }
@@ -112,7 +111,7 @@
 	 */
 	public void testMissingCoreRecords() throws Exception {
 		ppe = new PowerPointExtractor(slTests.openResourceAsStream("missing_core_records.ppt"));
-		
+
 		String text = ppe.getText(true, false);
 		String nText = ppe.getNotes();
 
@@ -265,4 +264,13 @@
 	private static boolean contains(String text, String searchString) {
 		return text.indexOf(searchString) >=0;
 	}
+
+    public void testMasterText() throws Exception {
+		ppe = new PowerPointExtractor(slTests.openResourceAsStream("master_text.ppt"));
+        ppe.setMasterByDefault(true);
+
+		String text = ppe.getText();
+		assertTrue(text.contains("Master Header Text"));
+    }
+
 }

Added: poi/trunk/test-data/slideshow/master_text.ppt
URL: http://svn.apache.org/viewvc/poi/trunk/test-data/slideshow/master_text.ppt?rev=835271&view=auto
==============================================================================
Binary file - no diff available.

Propchange: poi/trunk/test-data/slideshow/master_text.ppt
------------------------------------------------------------------------------
    svn:mime-type = application/octet-stream



---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@poi.apache.org
For additional commands, e-mail: commits-help@poi.apache.org