You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@poi.apache.org by ki...@apache.org on 2016/10/08 18:08:26 UTC

svn commit: r1763927 - in /poi: site/src/documentation/content/xdocs/ trunk/src/scratchpad/src/org/apache/poi/hslf/extractor/ trunk/src/scratchpad/src/org/apache/poi/hslf/model/ trunk/src/scratchpad/testcases/org/apache/poi/hslf/extractor/

Author: kiwiwings
Date: Sat Oct  8 18:08:25 2016
New Revision: 1763927

URL: http://svn.apache.org/viewvc?rev=1763927&view=rev
Log:
Bug 60003 - Regression: HSLF Powerpoint text extractor from footer of master slide

Modified:
    poi/site/src/documentation/content/xdocs/status.xml
    poi/trunk/src/scratchpad/src/org/apache/poi/hslf/extractor/PowerPointExtractor.java
    poi/trunk/src/scratchpad/src/org/apache/poi/hslf/model/HSLFMetroShape.java
    poi/trunk/src/scratchpad/testcases/org/apache/poi/hslf/extractor/TestExtractor.java

Modified: poi/site/src/documentation/content/xdocs/status.xml
URL: http://svn.apache.org/viewvc/poi/site/src/documentation/content/xdocs/status.xml?rev=1763927&r1=1763926&r2=1763927&view=diff
==============================================================================
--- poi/site/src/documentation/content/xdocs/status.xml (original)
+++ poi/site/src/documentation/content/xdocs/status.xml Sat Oct  8 18:08:25 2016
@@ -40,6 +40,7 @@
     </devs>
 
     <release version="3.16-beta1" date="2016-11-??">
+        <action dev="PD" type="add" fixes-bug="60003">Regression: HSLF Powerpoint text extractor from footer of master slide</action>
         <action dev="PD" type="add" fixes-bug="60226">ClassLoader workaround for OSGI when processing OOXML files</action>
         <action dev="PD" type="add" fixes-bug="60187">SS Common: support BorderStyle enums in RegionUtil</action>
         <action dev="PD" type="add" fixes-bug="59857">Password protected files with "Microsoft Enhanced Cryptographic Provider v1.0"</action>

Modified: poi/trunk/src/scratchpad/src/org/apache/poi/hslf/extractor/PowerPointExtractor.java
URL: http://svn.apache.org/viewvc/poi/trunk/src/scratchpad/src/org/apache/poi/hslf/extractor/PowerPointExtractor.java?rev=1763927&r1=1763926&r2=1763927&view=diff
==============================================================================
--- poi/trunk/src/scratchpad/src/org/apache/poi/hslf/extractor/PowerPointExtractor.java (original)
+++ poi/trunk/src/scratchpad/src/org/apache/poi/hslf/extractor/PowerPointExtractor.java Sat Oct  8 18:08:25 2016
@@ -17,21 +17,43 @@
 
 package org.apache.poi.hslf.extractor;
 
-import java.io.*;
-import java.util.*;
+import java.io.File;
+import java.io.IOException;
+import java.io.InputStream;
+import java.util.ArrayList;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Set;
 
 import org.apache.poi.POIOLE2TextExtractor;
-import org.apache.poi.hslf.model.*;
-import org.apache.poi.hslf.usermodel.*;
-import org.apache.poi.poifs.filesystem.*;
+import org.apache.poi.hslf.model.Comment;
+import org.apache.poi.hslf.model.HSLFMetroShape;
+import org.apache.poi.hslf.model.HeadersFooters;
+import org.apache.poi.hslf.model.OLEShape;
+import org.apache.poi.hslf.usermodel.HSLFMasterSheet;
+import org.apache.poi.hslf.usermodel.HSLFNotes;
+import org.apache.poi.hslf.usermodel.HSLFShape;
+import org.apache.poi.hslf.usermodel.HSLFSlide;
+import org.apache.poi.hslf.usermodel.HSLFSlideMaster;
+import org.apache.poi.hslf.usermodel.HSLFSlideShow;
+import org.apache.poi.hslf.usermodel.HSLFSlideShowImpl;
+import org.apache.poi.hslf.usermodel.HSLFTable;
+import org.apache.poi.hslf.usermodel.HSLFTableCell;
+import org.apache.poi.hslf.usermodel.HSLFTextParagraph;
+import org.apache.poi.hslf.usermodel.HSLFTextShape;
+import org.apache.poi.poifs.filesystem.DirectoryNode;
+import org.apache.poi.poifs.filesystem.NPOIFSFileSystem;
+import org.apache.poi.poifs.filesystem.POIFSFileSystem;
+import org.apache.poi.util.POILogFactory;
+import org.apache.poi.util.POILogger;
 
 /**
  * This class can be used to extract text from a PowerPoint file. Can optionally
  * also get the notes from one.
- *
- * @author Nick Burch
  */
 public final class PowerPointExtractor extends POIOLE2TextExtractor {
+   private static final POILogger LOG = POILogFactory.getLogger(PowerPointExtractor.class);
+    
    private final HSLFSlideShowImpl _hslfshow;
    private final HSLFSlideShow _show;
    private final List<HSLFSlide> _slides;
@@ -207,20 +229,27 @@ public final class PowerPointExtractor e
                 for (HSLFSlideMaster master : _show.getSlideMasters()) {
                     for(HSLFShape sh : master.getShapes()){
                         if(sh instanceof HSLFTextShape){
-                            if(HSLFMasterSheet.isPlaceholder(sh)) {
-                                // don't bother about boiler
-                                // plate text on master
-                                // sheets
+                            HSLFTextShape hsh = (HSLFTextShape)sh;
+                            final String text = hsh.getText();
+                            if (text == null || "".equals(text) || "*".equals(text)) {
                                 continue;
                             }
-                            HSLFTextShape tsh = (HSLFTextShape)sh;
-                            String text = tsh.getText();
-                            if (text != null){
-                                ret.append(text);
-                                if (!text.endsWith("\n")) {
-                                    ret.append("\n");
+                            
+                            if (HSLFMasterSheet.isPlaceholder(sh)) {
+                                // check for metro shape of complex placeholder
+                                boolean isMetro = new HSLFMetroShape<HSLFShape>(sh).hasMetroBlob();
+                                
+                                if (!isMetro) {
+                                    // don't bother about boiler plate text on master sheets
+                                    LOG.log(POILogger.INFO, "Ignoring boiler plate (placeholder) text on slide master:", text);
+                                    continue;
                                 }
                             }
+                            
+                            ret.append(text);
+                            if (!text.endsWith("\n")) {
+                                ret.append("\n");
+                            }
                         }
                     }
                 }

Modified: poi/trunk/src/scratchpad/src/org/apache/poi/hslf/model/HSLFMetroShape.java
URL: http://svn.apache.org/viewvc/poi/trunk/src/scratchpad/src/org/apache/poi/hslf/model/HSLFMetroShape.java?rev=1763927&r1=1763926&r2=1763927&view=diff
==============================================================================
--- poi/trunk/src/scratchpad/src/org/apache/poi/hslf/model/HSLFMetroShape.java (original)
+++ poi/trunk/src/scratchpad/src/org/apache/poi/hslf/model/HSLFMetroShape.java Sat Oct  8 18:08:25 2016
@@ -47,14 +47,20 @@ public class HSLFMetroShape<T extends Sh
      * @return the bytes of the metro blob, which are bytes of an OPCPackage, i.e. a zip stream 
      */
     public byte[] getMetroBytes() {
+        EscherComplexProperty ep = getMetroProp();
+        return (ep == null) ? null : ep.getComplexData();
+    }
+
+    /**
+     * @return if there's a metro blob to extract
+     */
+    public boolean hasMetroBlob() {
+        return getMetroProp() != null;
+    }
+    
+    private EscherComplexProperty getMetroProp() {
         AbstractEscherOptRecord opt = shape.getEscherChild(EscherTertiaryOptRecord.RECORD_ID);
-        if (opt != null) {
-            EscherComplexProperty ep = (EscherComplexProperty)opt.lookup(EscherProperties.GROUPSHAPE__METROBLOB);
-            if (ep != null) {
-                return ep.getComplexData();
-            }
-        }
-        return null;
+        return (opt == null) ? null : (EscherComplexProperty)opt.lookup(EscherProperties.GROUPSHAPE__METROBLOB);
     }
     
     /**

Modified: poi/trunk/src/scratchpad/testcases/org/apache/poi/hslf/extractor/TestExtractor.java
URL: http://svn.apache.org/viewvc/poi/trunk/src/scratchpad/testcases/org/apache/poi/hslf/extractor/TestExtractor.java?rev=1763927&r1=1763926&r2=1763927&view=diff
==============================================================================
--- poi/trunk/src/scratchpad/testcases/org/apache/poi/hslf/extractor/TestExtractor.java (original)
+++ poi/trunk/src/scratchpad/testcases/org/apache/poi/hslf/extractor/TestExtractor.java Sat Oct  8 18:08:25 2016
@@ -431,5 +431,19 @@ public final class TestExtractor {
         String target = "this\tText\tis\twithin\ta\n"+
                 "table\t1\t2\t3\t4";
         assertTrue(text.contains(target));
-    }    
+    }
+
+    // bug 60003
+    @Test
+    public void testExtractMasterSlideFooterText() throws Exception {
+        HSLFSlideShowImpl hslf = new HSLFSlideShowImpl(slTests.openResourceAsStream("60003.ppt"));
+        ppe.close();
+
+        ppe = new PowerPointExtractor(hslf);
+        ppe.setMasterByDefault(true);
+      
+        String text = ppe.getText();
+        assertContains(text, "Prague");
+        hslf.close();
+    }
 }



---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@poi.apache.org
For additional commands, e-mail: commits-help@poi.apache.org