You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@poi.apache.org by ki...@apache.org on 2016/10/08 18:08:26 UTC
svn commit: r1763927 - in /poi: site/src/documentation/content/xdocs/
trunk/src/scratchpad/src/org/apache/poi/hslf/extractor/
trunk/src/scratchpad/src/org/apache/poi/hslf/model/
trunk/src/scratchpad/testcases/org/apache/poi/hslf/extractor/
Author: kiwiwings
Date: Sat Oct 8 18:08:25 2016
New Revision: 1763927
URL: http://svn.apache.org/viewvc?rev=1763927&view=rev
Log:
Bug 60003 - Regression: HSLF Powerpoint text extractor from footer of master slide
Modified:
poi/site/src/documentation/content/xdocs/status.xml
poi/trunk/src/scratchpad/src/org/apache/poi/hslf/extractor/PowerPointExtractor.java
poi/trunk/src/scratchpad/src/org/apache/poi/hslf/model/HSLFMetroShape.java
poi/trunk/src/scratchpad/testcases/org/apache/poi/hslf/extractor/TestExtractor.java
Modified: poi/site/src/documentation/content/xdocs/status.xml
URL: http://svn.apache.org/viewvc/poi/site/src/documentation/content/xdocs/status.xml?rev=1763927&r1=1763926&r2=1763927&view=diff
==============================================================================
--- poi/site/src/documentation/content/xdocs/status.xml (original)
+++ poi/site/src/documentation/content/xdocs/status.xml Sat Oct 8 18:08:25 2016
@@ -40,6 +40,7 @@
</devs>
<release version="3.16-beta1" date="2016-11-??">
+ <action dev="PD" type="add" fixes-bug="60003">Regression: HSLF Powerpoint text extractor from footer of master slide</action>
<action dev="PD" type="add" fixes-bug="60226">ClassLoader workaround for OSGI when processing OOXML files</action>
<action dev="PD" type="add" fixes-bug="60187">SS Common: support BorderStyle enums in RegionUtil</action>
<action dev="PD" type="add" fixes-bug="59857">Password protected files with "Microsoft Enhanced Cryptographic Provider v1.0"</action>
Modified: poi/trunk/src/scratchpad/src/org/apache/poi/hslf/extractor/PowerPointExtractor.java
URL: http://svn.apache.org/viewvc/poi/trunk/src/scratchpad/src/org/apache/poi/hslf/extractor/PowerPointExtractor.java?rev=1763927&r1=1763926&r2=1763927&view=diff
==============================================================================
--- poi/trunk/src/scratchpad/src/org/apache/poi/hslf/extractor/PowerPointExtractor.java (original)
+++ poi/trunk/src/scratchpad/src/org/apache/poi/hslf/extractor/PowerPointExtractor.java Sat Oct 8 18:08:25 2016
@@ -17,21 +17,43 @@
package org.apache.poi.hslf.extractor;
-import java.io.*;
-import java.util.*;
+import java.io.File;
+import java.io.IOException;
+import java.io.InputStream;
+import java.util.ArrayList;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Set;
import org.apache.poi.POIOLE2TextExtractor;
-import org.apache.poi.hslf.model.*;
-import org.apache.poi.hslf.usermodel.*;
-import org.apache.poi.poifs.filesystem.*;
+import org.apache.poi.hslf.model.Comment;
+import org.apache.poi.hslf.model.HSLFMetroShape;
+import org.apache.poi.hslf.model.HeadersFooters;
+import org.apache.poi.hslf.model.OLEShape;
+import org.apache.poi.hslf.usermodel.HSLFMasterSheet;
+import org.apache.poi.hslf.usermodel.HSLFNotes;
+import org.apache.poi.hslf.usermodel.HSLFShape;
+import org.apache.poi.hslf.usermodel.HSLFSlide;
+import org.apache.poi.hslf.usermodel.HSLFSlideMaster;
+import org.apache.poi.hslf.usermodel.HSLFSlideShow;
+import org.apache.poi.hslf.usermodel.HSLFSlideShowImpl;
+import org.apache.poi.hslf.usermodel.HSLFTable;
+import org.apache.poi.hslf.usermodel.HSLFTableCell;
+import org.apache.poi.hslf.usermodel.HSLFTextParagraph;
+import org.apache.poi.hslf.usermodel.HSLFTextShape;
+import org.apache.poi.poifs.filesystem.DirectoryNode;
+import org.apache.poi.poifs.filesystem.NPOIFSFileSystem;
+import org.apache.poi.poifs.filesystem.POIFSFileSystem;
+import org.apache.poi.util.POILogFactory;
+import org.apache.poi.util.POILogger;
/**
* This class can be used to extract text from a PowerPoint file. Can optionally
* also get the notes from one.
- *
- * @author Nick Burch
*/
public final class PowerPointExtractor extends POIOLE2TextExtractor {
+ private static final POILogger LOG = POILogFactory.getLogger(PowerPointExtractor.class);
+
private final HSLFSlideShowImpl _hslfshow;
private final HSLFSlideShow _show;
private final List<HSLFSlide> _slides;
@@ -207,20 +229,27 @@ public final class PowerPointExtractor e
for (HSLFSlideMaster master : _show.getSlideMasters()) {
for(HSLFShape sh : master.getShapes()){
if(sh instanceof HSLFTextShape){
- if(HSLFMasterSheet.isPlaceholder(sh)) {
- // don't bother about boiler
- // plate text on master
- // sheets
+ HSLFTextShape hsh = (HSLFTextShape)sh;
+ final String text = hsh.getText();
+ if (text == null || "".equals(text) || "*".equals(text)) {
continue;
}
- HSLFTextShape tsh = (HSLFTextShape)sh;
- String text = tsh.getText();
- if (text != null){
- ret.append(text);
- if (!text.endsWith("\n")) {
- ret.append("\n");
+
+ if (HSLFMasterSheet.isPlaceholder(sh)) {
+ // check for metro shape of complex placeholder
+ boolean isMetro = new HSLFMetroShape<HSLFShape>(sh).hasMetroBlob();
+
+ if (!isMetro) {
+ // don't bother about boiler plate text on master sheets
+ LOG.log(POILogger.INFO, "Ignoring boiler plate (placeholder) text on slide master:", text);
+ continue;
}
}
+
+ ret.append(text);
+ if (!text.endsWith("\n")) {
+ ret.append("\n");
+ }
}
}
}
Modified: poi/trunk/src/scratchpad/src/org/apache/poi/hslf/model/HSLFMetroShape.java
URL: http://svn.apache.org/viewvc/poi/trunk/src/scratchpad/src/org/apache/poi/hslf/model/HSLFMetroShape.java?rev=1763927&r1=1763926&r2=1763927&view=diff
==============================================================================
--- poi/trunk/src/scratchpad/src/org/apache/poi/hslf/model/HSLFMetroShape.java (original)
+++ poi/trunk/src/scratchpad/src/org/apache/poi/hslf/model/HSLFMetroShape.java Sat Oct 8 18:08:25 2016
@@ -47,14 +47,20 @@ public class HSLFMetroShape<T extends Sh
* @return the bytes of the metro blob, which are bytes of an OPCPackage, i.e. a zip stream
*/
public byte[] getMetroBytes() {
+ EscherComplexProperty ep = getMetroProp();
+ return (ep == null) ? null : ep.getComplexData();
+ }
+
+ /**
+ * @return if there's a metro blob to extract
+ */
+ public boolean hasMetroBlob() {
+ return getMetroProp() != null;
+ }
+
+ private EscherComplexProperty getMetroProp() {
AbstractEscherOptRecord opt = shape.getEscherChild(EscherTertiaryOptRecord.RECORD_ID);
- if (opt != null) {
- EscherComplexProperty ep = (EscherComplexProperty)opt.lookup(EscherProperties.GROUPSHAPE__METROBLOB);
- if (ep != null) {
- return ep.getComplexData();
- }
- }
- return null;
+ return (opt == null) ? null : (EscherComplexProperty)opt.lookup(EscherProperties.GROUPSHAPE__METROBLOB);
}
/**
Modified: poi/trunk/src/scratchpad/testcases/org/apache/poi/hslf/extractor/TestExtractor.java
URL: http://svn.apache.org/viewvc/poi/trunk/src/scratchpad/testcases/org/apache/poi/hslf/extractor/TestExtractor.java?rev=1763927&r1=1763926&r2=1763927&view=diff
==============================================================================
--- poi/trunk/src/scratchpad/testcases/org/apache/poi/hslf/extractor/TestExtractor.java (original)
+++ poi/trunk/src/scratchpad/testcases/org/apache/poi/hslf/extractor/TestExtractor.java Sat Oct 8 18:08:25 2016
@@ -431,5 +431,19 @@ public final class TestExtractor {
String target = "this\tText\tis\twithin\ta\n"+
"table\t1\t2\t3\t4";
assertTrue(text.contains(target));
- }
+ }
+
+ // bug 60003
+ @Test
+ public void testExtractMasterSlideFooterText() throws Exception {
+ HSLFSlideShowImpl hslf = new HSLFSlideShowImpl(slTests.openResourceAsStream("60003.ppt"));
+ ppe.close();
+
+ ppe = new PowerPointExtractor(hslf);
+ ppe.setMasterByDefault(true);
+
+ String text = ppe.getText();
+ assertContains(text, "Prague");
+ hslf.close();
+ }
}
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@poi.apache.org
For additional commands, e-mail: commits-help@poi.apache.org