You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@poi.apache.org by ki...@apache.org on 2015/06/25 01:34:04 UTC

svn commit: r1687398 - in /poi: site/src/documentation/content/xdocs/ trunk/src/scratchpad/src/org/apache/poi/hslf/ trunk/src/scratchpad/src/org/apache/poi/hslf/blip/ trunk/src/scratchpad/src/org/apache/poi/hslf/usermodel/ trunk/src/scratchpad/testcase...

Author: kiwiwings
Date: Wed Jun 24 23:34:03 2015
New Revision: 1687398

URL: http://svn.apache.org/r1687398
Log:
Bug 54332 - WMF extraction failing in Tika for older PowerPoint Files

Added:
    poi/trunk/test-data/slideshow/54332a.ppt   (with props)
    poi/trunk/test-data/slideshow/54332b.ppt   (with props)
Modified:
    poi/site/src/documentation/content/xdocs/status.xml
    poi/trunk/src/scratchpad/src/org/apache/poi/hslf/HSLFSlideShow.java
    poi/trunk/src/scratchpad/src/org/apache/poi/hslf/blip/Bitmap.java
    poi/trunk/src/scratchpad/src/org/apache/poi/hslf/blip/DIB.java
    poi/trunk/src/scratchpad/src/org/apache/poi/hslf/blip/EMF.java
    poi/trunk/src/scratchpad/src/org/apache/poi/hslf/blip/JPEG.java
    poi/trunk/src/scratchpad/src/org/apache/poi/hslf/blip/Metafile.java
    poi/trunk/src/scratchpad/src/org/apache/poi/hslf/blip/PICT.java
    poi/trunk/src/scratchpad/src/org/apache/poi/hslf/blip/PNG.java
    poi/trunk/src/scratchpad/src/org/apache/poi/hslf/blip/WMF.java
    poi/trunk/src/scratchpad/src/org/apache/poi/hslf/usermodel/PictureData.java
    poi/trunk/src/scratchpad/testcases/org/apache/poi/hslf/model/TestPicture.java

Modified: poi/site/src/documentation/content/xdocs/status.xml
URL: http://svn.apache.org/viewvc/poi/site/src/documentation/content/xdocs/status.xml?rev=1687398&r1=1687397&r2=1687398&view=diff
==============================================================================
--- poi/site/src/documentation/content/xdocs/status.xml (original)
+++ poi/site/src/documentation/content/xdocs/status.xml Wed Jun 24 23:34:03 2015
@@ -39,6 +39,7 @@
     </devs>
 
     <release version="3.13-beta1" date="2015-??-??">
+        <action dev="PD" type="fix" fixes-bug="54332">WMF extraction failing in Tika for older PowerPoint Files</action>
         <action dev="PD" type="add" fixes-bug="56865">Limit number of bytes (by counting them) while opening office docs</action>
         <action dev="PD" type="add" fixes-bug="50090">zip bomb prevention</action>
         <action dev="PD" type="fix" fixes-bug="58069">Biff8RC4 xorShort returns wrong value for unsigned shorts</action>

Modified: poi/trunk/src/scratchpad/src/org/apache/poi/hslf/HSLFSlideShow.java
URL: http://svn.apache.org/viewvc/poi/trunk/src/scratchpad/src/org/apache/poi/hslf/HSLFSlideShow.java?rev=1687398&r1=1687397&r2=1687398&view=diff
==============================================================================
--- poi/trunk/src/scratchpad/src/org/apache/poi/hslf/HSLFSlideShow.java (original)
+++ poi/trunk/src/scratchpad/src/org/apache/poi/hslf/HSLFSlideShow.java Wed Jun 24 23:34:03 2015
@@ -401,6 +401,7 @@ public final class HSLFSlideShow extends
 				// Build the PictureData object from the data
 				try {
 					PictureData pict = PictureData.create(type - 0xF018);
+					pict.setSignature(signature);
 
                     // Copy the data, ready to pass to PictureData
                     byte[] imgdata = new byte[imgsize];

Modified: poi/trunk/src/scratchpad/src/org/apache/poi/hslf/blip/Bitmap.java
URL: http://svn.apache.org/viewvc/poi/trunk/src/scratchpad/src/org/apache/poi/hslf/blip/Bitmap.java?rev=1687398&r1=1687397&r2=1687398&view=diff
==============================================================================
--- poi/trunk/src/scratchpad/src/org/apache/poi/hslf/blip/Bitmap.java (original)
+++ poi/trunk/src/scratchpad/src/org/apache/poi/hslf/blip/Bitmap.java Wed Jun 24 23:34:03 2015
@@ -32,15 +32,18 @@ public abstract  class Bitmap extends Pi
 
     public byte[] getData(){
         byte[] rawdata = getRawData();
-        byte[] imgdata = new byte[rawdata.length-17];
-        System.arraycopy(rawdata, 17, imgdata, 0, imgdata.length);
+        int prefixLen = 16*uidInstanceCount+1;
+        byte[] imgdata = new byte[rawdata.length-prefixLen];
+        System.arraycopy(rawdata, prefixLen, imgdata, 0, imgdata.length);
         return imgdata;
     }
 
     public void setData(byte[] data) throws IOException {
         ByteArrayOutputStream out = new ByteArrayOutputStream();
-        byte[] checksum = getChecksum(data);
-        out.write(checksum);
+        for (int i=0; i<uidInstanceCount; i++) {
+            byte[] checksum = getChecksum(data);
+            out.write(checksum);
+        }
         out.write(0);
         out.write(data);
 

Modified: poi/trunk/src/scratchpad/src/org/apache/poi/hslf/blip/DIB.java
URL: http://svn.apache.org/viewvc/poi/trunk/src/scratchpad/src/org/apache/poi/hslf/blip/DIB.java?rev=1687398&r1=1687397&r2=1687398&view=diff
==============================================================================
--- poi/trunk/src/scratchpad/src/org/apache/poi/hslf/blip/DIB.java (original)
+++ poi/trunk/src/scratchpad/src/org/apache/poi/hslf/blip/DIB.java Wed Jun 24 23:34:03 2015
@@ -31,7 +31,7 @@ public final class DIB extends Bitmap {
     /**
      * Size of the BITMAPFILEHEADER structure preceding the actual DIB bytes
      */
-    public static final int HEADER_SIZE = 14;
+    private static final int HEADER_SIZE = 14;
 
     /**
      * @return type of  this picture
@@ -42,13 +42,29 @@ public final class DIB extends Bitmap {
     }
 
     /**
-     * DIB signature is <code>0x7A80</code>
+     * DIB signature is {@code 0x7A80} or {@code 0x7A90}
      *
-     * @return DIB signature (<code>0x7A80</code>)
+     * @return DIB signature ({@code 0x7A80} or {@code 0x7A90})
      */
     public int getSignature(){
-        return 0x7A80;
+        return (uidInstanceCount == 1 ? 0x7A80 : 0x7A90);
     }
+
+    /**
+     * Sets the DIB signature - either {@code 0x7A80} or {@code 0x7A90}
+     */
+    public void setSignature(int signature) {
+        switch (signature) {
+            case 0x7A80:
+                uidInstanceCount = 1;
+                break;
+            case 0x7A90:
+                uidInstanceCount = 2;
+                break;
+            default:
+                throw new IllegalArgumentException(signature+" is not a valid instance/signature value for DIB");
+        }        
+    }    
     
     public byte[] getData(){
         return addBMPHeader ( super.getData() );

Modified: poi/trunk/src/scratchpad/src/org/apache/poi/hslf/blip/EMF.java
URL: http://svn.apache.org/viewvc/poi/trunk/src/scratchpad/src/org/apache/poi/hslf/blip/EMF.java?rev=1687398&r1=1687397&r2=1687398&view=diff
==============================================================================
--- poi/trunk/src/scratchpad/src/org/apache/poi/hslf/blip/EMF.java (original)
+++ poi/trunk/src/scratchpad/src/org/apache/poi/hslf/blip/EMF.java Wed Jun 24 23:34:03 2015
@@ -84,11 +84,27 @@ public final class EMF extends Metafile
     }
 
     /**
-     * EMF signature is <code>0x3D40</code>
+     * EMF signature is {@code 0x3D40} or {@code 0x3D50}
      *
-     * @return EMF signature (<code>0x3D40</code>)
+     * @return EMF signature ({@code 0x3D40} or {@code 0x3D50})
      */
-    public int getSignature(){
-        return 0x3D40;
+    public int getSignature() {
+        return (uidInstanceCount == 1 ? 0x3D40 : 0x3D50);
+    }
+    
+    /**
+     * Sets the EMF signature - either {@code 0x3D40} or {@code 0x3D50}
+     */
+    public void setSignature(int signature) {
+        switch (signature) {
+            case 0x3D40:
+                uidInstanceCount = 1;
+                break;
+            case 0x3D50:
+                uidInstanceCount = 2;
+                break;
+            default:
+                throw new IllegalArgumentException(signature+" is not a valid instance/signature value for EMF");
+        }        
     }
 }

Modified: poi/trunk/src/scratchpad/src/org/apache/poi/hslf/blip/JPEG.java
URL: http://svn.apache.org/viewvc/poi/trunk/src/scratchpad/src/org/apache/poi/hslf/blip/JPEG.java?rev=1687398&r1=1687397&r2=1687398&view=diff
==============================================================================
--- poi/trunk/src/scratchpad/src/org/apache/poi/hslf/blip/JPEG.java (original)
+++ poi/trunk/src/scratchpad/src/org/apache/poi/hslf/blip/JPEG.java Wed Jun 24 23:34:03 2015
@@ -26,6 +26,10 @@ import org.apache.poi.hslf.model.Picture
  */
 public final class JPEG extends Bitmap {
 
+    public enum ColorSpace { rgb, cymk };
+    
+    private ColorSpace colorSpace = ColorSpace.rgb;
+    
     /**
      * @return type of  this picture
      * @see  org.apache.poi.hslf.model.Picture#JPEG
@@ -34,12 +38,48 @@ public final class JPEG extends Bitmap {
         return Picture.JPEG;
     }
 
+    public ColorSpace getColorSpace() {
+        return colorSpace;
+    }
+    
+    public void setColorSpace(ColorSpace colorSpace) {
+        this.colorSpace = colorSpace;
+    }
+    
     /**
-     * JPEG signature is <code>0x46A0</code>
+     * JPEG signature is one of {@code 0x46A0, 0x46B0, 0x6E20, 0x6E30} 
      *
-     * @return JPEG signature (<code>0x46A0</code>)
+     * @return JPEG signature ({@code 0x46A0, 0x46B0, 0x6E20, 0x6E30})
      */
     public int getSignature(){
-        return 0x46A0;
+        return (colorSpace == ColorSpace.rgb)
+            ? (uidInstanceCount == 1 ? 0x46A0 :  0x46B0)
+            : (uidInstanceCount == 1 ? 0x6E20 :  0x6E30);
     }
+    
+    /**
+     * Sets the PICT signature - either {@code 0x5420} or {@code 0x5430}
+     */
+    public void setSignature(int signature) {
+        switch (signature) {
+            case 0x46A0:
+                uidInstanceCount = 1;
+                colorSpace = ColorSpace.rgb;
+                break;
+            case 0x46B0:
+                uidInstanceCount = 2;
+                colorSpace = ColorSpace.rgb;
+                break;
+            case 0x6E20:
+                uidInstanceCount = 1;
+                colorSpace = ColorSpace.cymk;
+                break;
+            case 0x6E30:
+                uidInstanceCount = 2;
+                colorSpace = ColorSpace.cymk;
+                break;
+            default:
+                throw new IllegalArgumentException(signature+" is not a valid instance/signature value for JPEG");
+        }        
+    }    
 }

Modified: poi/trunk/src/scratchpad/src/org/apache/poi/hslf/blip/Metafile.java
URL: http://svn.apache.org/viewvc/poi/trunk/src/scratchpad/src/org/apache/poi/hslf/blip/Metafile.java?rev=1687398&r1=1687397&r2=1687398&view=diff
==============================================================================
--- poi/trunk/src/scratchpad/src/org/apache/poi/hslf/blip/Metafile.java (original)
+++ poi/trunk/src/scratchpad/src/org/apache/poi/hslf/blip/Metafile.java Wed Jun 24 23:34:03 2015
@@ -86,8 +86,8 @@ public abstract class Metafile extends P
 
             zipsize = LittleEndian.getInt(data, pos); pos += LittleEndian.INT_SIZE;
 
-            compression = LittleEndian.getUnsignedByte(data, pos); pos++;
-            filter = LittleEndian.getUnsignedByte(data, pos); pos++;
+            compression = LittleEndian.getUByte(data, pos); pos++;
+            filter = LittleEndian.getUByte(data, pos); pos++;
         }
 
         public void write(OutputStream out) throws IOException {

Modified: poi/trunk/src/scratchpad/src/org/apache/poi/hslf/blip/PICT.java
URL: http://svn.apache.org/viewvc/poi/trunk/src/scratchpad/src/org/apache/poi/hslf/blip/PICT.java?rev=1687398&r1=1687397&r2=1687398&view=diff
==============================================================================
--- poi/trunk/src/scratchpad/src/org/apache/poi/hslf/blip/PICT.java (original)
+++ poi/trunk/src/scratchpad/src/org/apache/poi/hslf/blip/PICT.java Wed Jun 24 23:34:03 2015
@@ -33,10 +33,6 @@ import org.apache.poi.hslf.model.Shape;
  */
 public final class PICT extends Metafile {
 
-    public PICT(){
-        super();
-    }
-
     /**
      * Extract compressed PICT data from a ppt
      */
@@ -46,7 +42,7 @@ public final class PICT extends Metafile
             byte[] macheader = new byte[512];
             ByteArrayOutputStream out = new ByteArrayOutputStream();
             out.write(macheader);
-            int pos = CHECKSUM_SIZE;
+            int pos = CHECKSUM_SIZE*uidInstanceCount;
             byte[] pict;
             try {
                 pict = read(rawdata, pos);
@@ -109,12 +105,27 @@ public final class PICT extends Metafile
     }
 
     /**
-     * PICT signature is <code>0x5430</code>
+     * PICT signature is {@code 0x5420} or {@code 0x5430}
      *
-     * @return PICT signature (<code>0x5430</code>)
+     * @return PICT signature ({@code 0x5420} or {@code 0x5430})
      */
     public int getSignature(){
-        return 0x5430;
+        return (uidInstanceCount == 1 ? 0x5420 : 0x5430);
     }
 
+    /**
+     * Sets the PICT signature - either {@code 0x5420} or {@code 0x5430}
+     */
+    public void setSignature(int signature) {
+        switch (signature) {
+            case 0x5420:
+                uidInstanceCount = 1;
+                break;
+            case 0x5430:
+                uidInstanceCount = 2;
+                break;
+            default:
+                throw new IllegalArgumentException(signature+" is not a valid instance/signature value for PICT");
+        }        
+    }
 }

Modified: poi/trunk/src/scratchpad/src/org/apache/poi/hslf/blip/PNG.java
URL: http://svn.apache.org/viewvc/poi/trunk/src/scratchpad/src/org/apache/poi/hslf/blip/PNG.java?rev=1687398&r1=1687397&r2=1687398&view=diff
==============================================================================
--- poi/trunk/src/scratchpad/src/org/apache/poi/hslf/blip/PNG.java (original)
+++ poi/trunk/src/scratchpad/src/org/apache/poi/hslf/blip/PNG.java Wed Jun 24 23:34:03 2015
@@ -17,14 +17,8 @@
 
 package org.apache.poi.hslf.blip;
 
-import org.apache.poi.util.PngUtils;
 import org.apache.poi.hslf.model.Picture;
-import org.apache.poi.hslf.exceptions.HSLFException;
-
-import javax.imageio.ImageIO;
-import java.awt.image.BufferedImage;
-import java.io.ByteArrayInputStream;
-import java.io.IOException;
+import org.apache.poi.util.PngUtils;
 
 /**
  * Represents a PNG picture data in a PPT file
@@ -59,11 +53,27 @@ public final class PNG extends Bitmap {
     }
 
     /**
-     * PNG signature is <code>0x6E00</code>
+     * PNG signature is {@code 0x6E00} or {@code 0x6E10}
      *
-     * @return PNG signature (<code>0x6E00</code>)
+     * @return PNG signature ({@code 0x6E00} or {@code 0x6E10})
      */
     public int getSignature(){
-        return 0x6E00;
+        return (uidInstanceCount == 1 ? 0x6E00 : 0x6E10);
+    }
+    
+    /**
+     * Sets the PNG signature - either {@code 0x6E00} or {@code 0x6E10}
+     */
+    public void setSignature(int signature) {
+        switch (signature) {
+            case 0x6E00:
+                uidInstanceCount = 1;
+                break;
+            case 0x6E10:
+                uidInstanceCount = 2;
+                break;
+            default:
+                throw new IllegalArgumentException(signature+" is not a valid instance/signature value for PNG");
+        }        
     }
 }

Modified: poi/trunk/src/scratchpad/src/org/apache/poi/hslf/blip/WMF.java
URL: http://svn.apache.org/viewvc/poi/trunk/src/scratchpad/src/org/apache/poi/hslf/blip/WMF.java?rev=1687398&r1=1687397&r2=1687398&view=diff
==============================================================================
--- poi/trunk/src/scratchpad/src/org/apache/poi/hslf/blip/WMF.java (original)
+++ poi/trunk/src/scratchpad/src/org/apache/poi/hslf/blip/WMF.java Wed Jun 24 23:34:03 2015
@@ -43,8 +43,8 @@ public final class WMF extends Metafile
             ByteArrayOutputStream out = new ByteArrayOutputStream();
             InputStream is = new ByteArrayInputStream( rawdata );
             Header header = new Header();
-            header.read(rawdata, CHECKSUM_SIZE);
-            is.skip(header.getSize() + CHECKSUM_SIZE);
+            header.read(rawdata, CHECKSUM_SIZE*uidInstanceCount);
+            is.skip(header.getSize() + CHECKSUM_SIZE*uidInstanceCount);
 
             AldusHeader aldus = new AldusHeader();
             aldus.left = header.bounds.x;
@@ -84,7 +84,9 @@ public final class WMF extends Metafile
 
         byte[] checksum = getChecksum(data);
         ByteArrayOutputStream out = new ByteArrayOutputStream();
-        out.write(checksum);
+        for (int i=0; i<uidInstanceCount; i++) {
+            out.write(checksum);
+        }
         header.write(out);
         out.write(compressed);
 
@@ -99,12 +101,27 @@ public final class WMF extends Metafile
     }
 
     /**
-     * WMF signature is <code>0x2160</code>
+     * WMF signature is either {@code 0x2160} or {@code 0x2170}
      */
     public int getSignature(){
-        return 0x2160;
+        return (uidInstanceCount == 1 ? 0x2160 : 0x2170);
     }
 
+    /**
+     * Sets the WMF signature - either {@code 0x2160} or {@code 0x2170}
+     */
+    public void setSignature(int signature) {
+        switch (signature) {
+            case 0x2160:
+                uidInstanceCount = 1;
+                break;
+            case 0x2170:
+                uidInstanceCount = 2;
+                break;
+            default:
+                throw new IllegalArgumentException(signature+" is not a valid instance/signature value for WMF");
+        }
+    }
 
     /**
      * Aldus Placeable Metafile header - 22 byte structure before WMF data.

Modified: poi/trunk/src/scratchpad/src/org/apache/poi/hslf/usermodel/PictureData.java
URL: http://svn.apache.org/viewvc/poi/trunk/src/scratchpad/src/org/apache/poi/hslf/usermodel/PictureData.java?rev=1687398&r1=1687397&r2=1687398&view=diff
==============================================================================
--- poi/trunk/src/scratchpad/src/org/apache/poi/hslf/usermodel/PictureData.java (original)
+++ poi/trunk/src/scratchpad/src/org/apache/poi/hslf/usermodel/PictureData.java Wed Jun 24 23:34:03 2015
@@ -55,11 +55,17 @@ public abstract class PictureData {
     * Binary data of the picture
     */
     private byte[] rawdata;
+    
     /**
      * The offset to the picture in the stream
      */
     protected int offset;
-
+    
+    /**
+     * The instance type/signatures defines if one or two UID instances will be included
+     */
+    protected int uidInstanceCount = 1;
+    
     /**
      * Returns type of this picture.
      * Must be one of the static constants defined in the <code>Picture<code> class.
@@ -82,8 +88,17 @@ public abstract class PictureData {
     /**
      * Blip signature.
      */
-    protected abstract int getSignature();
+    public abstract int getSignature();
+    
+    public abstract void setSignature(int signature);
 
+    /**
+     * The instance type/signatures defines if one or two UID instances will be included
+     */
+    protected int getUIDInstanceCount() {
+        return uidInstanceCount;
+    }
+    
     protected static final ImagePainter[] painters = new ImagePainter[8];
     static {
         PictureData.setImagePainter(Picture.PNG, new BitmapPainter());

Modified: poi/trunk/src/scratchpad/testcases/org/apache/poi/hslf/model/TestPicture.java
URL: http://svn.apache.org/viewvc/poi/trunk/src/scratchpad/testcases/org/apache/poi/hslf/model/TestPicture.java?rev=1687398&r1=1687397&r2=1687398&view=diff
==============================================================================
--- poi/trunk/src/scratchpad/testcases/org/apache/poi/hslf/model/TestPicture.java (original)
+++ poi/trunk/src/scratchpad/testcases/org/apache/poi/hslf/model/TestPicture.java Wed Jun 24 23:34:03 2015
@@ -131,18 +131,37 @@ public final class TestPicture {
                 null            // EMF
         };
 
-        for (int i = 0; i < pictures.length; i++) {
-            BufferedImage image = ImageIO.read(new ByteArrayInputStream(pictures[i].getData()));
-
-            if (pictures[i].getType() != Picture.WMF && pictures[i].getType() != Picture.EMF) {
-                assertNotNull(image);
-
-                int[] dimensions = expectedSizes[i];
-                assertEquals(dimensions[0], image.getWidth());
-                assertEquals(dimensions[1], image.getHeight());
+        int i=0;
+        for (PictureData pd : pictures) {
+            BufferedImage image = ImageIO.read(new ByteArrayInputStream(pd.getData()));
+            switch (pd.getType()) {
+                case Picture.WMF:
+                case Picture.EMF:
+                    break;
+                default:
+                    assertNotNull(image);
+                    int[] dimensions = expectedSizes[i];
+                    assertEquals(dimensions[0], image.getWidth());
+                    assertEquals(dimensions[1], image.getHeight());
+                    break;
             }
+            i++;
         }
     }
+    
+    @Test
+    public void bug54332() throws Exception {
+        HSLFSlideShow hss = new HSLFSlideShow(_slTests.openResourceAsStream("54332a.ppt")); // TIKA-1046
+
+        PictureData[] pictures = hss.getPictures();
+        assertEquals(1, pictures.length);
+        assertEquals(102352, pictures[0].getData().length);
+        
+        hss = new HSLFSlideShow(_slTests.openResourceAsStream("54332b.ppt")); // TIKA-1612
+        pictures = hss.getPictures();
+        assertEquals(1, pictures.length);
+        assertEquals(55830, pictures[0].getData().length);
+    }
 
     @Test
     @Ignore("Just for visual validation - antialiasing is different on various systems")

Added: poi/trunk/test-data/slideshow/54332a.ppt
URL: http://svn.apache.org/viewvc/poi/trunk/test-data/slideshow/54332a.ppt?rev=1687398&view=auto
==============================================================================
Binary file - no diff available.

Propchange: poi/trunk/test-data/slideshow/54332a.ppt
------------------------------------------------------------------------------
    svn:mime-type = application/octet-stream

Added: poi/trunk/test-data/slideshow/54332b.ppt
URL: http://svn.apache.org/viewvc/poi/trunk/test-data/slideshow/54332b.ppt?rev=1687398&view=auto
==============================================================================
Binary file - no diff available.

Propchange: poi/trunk/test-data/slideshow/54332b.ppt
------------------------------------------------------------------------------
    svn:mime-type = application/octet-stream



---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@poi.apache.org
For additional commands, e-mail: commits-help@poi.apache.org