You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pdfbox.apache.org by ti...@apache.org on 2018/03/24 12:33:28 UTC

svn commit: r1827641 - in /pdfbox/branches/2.0/pdfbox/src/main/java/org/apache/pdfbox/filter: FlateFilter.java LZWFilter.java Predictor.java

Author: tilman
Date: Sat Mar 24 12:33:28 2018
New Revision: 1827641

URL: http://svn.apache.org/viewvc?rev=1827641&view=rev
Log:
PDFBOX-4151: avoid double memory usage with predictor, by Itai Shaked

Modified:
    pdfbox/branches/2.0/pdfbox/src/main/java/org/apache/pdfbox/filter/FlateFilter.java
    pdfbox/branches/2.0/pdfbox/src/main/java/org/apache/pdfbox/filter/LZWFilter.java
    pdfbox/branches/2.0/pdfbox/src/main/java/org/apache/pdfbox/filter/Predictor.java

Modified: pdfbox/branches/2.0/pdfbox/src/main/java/org/apache/pdfbox/filter/FlateFilter.java
URL: http://svn.apache.org/viewvc/pdfbox/branches/2.0/pdfbox/src/main/java/org/apache/pdfbox/filter/FlateFilter.java?rev=1827641&r1=1827640&r2=1827641&view=diff
==============================================================================
--- pdfbox/branches/2.0/pdfbox/src/main/java/org/apache/pdfbox/filter/FlateFilter.java (original)
+++ pdfbox/branches/2.0/pdfbox/src/main/java/org/apache/pdfbox/filter/FlateFilter.java Sat Mar 24 12:33:28 2018
@@ -46,34 +46,12 @@ final class FlateFilter extends Filter
     public DecodeResult decode(InputStream encoded, OutputStream decoded,
                                          COSDictionary parameters, int index) throws IOException
     {
-        int predictor = -1;
-
         final COSDictionary decodeParams = getDecodeParams(parameters, index);
-        if (decodeParams != null)
-        {
-            predictor = decodeParams.getInt(COSName.PREDICTOR);
-        }
 
         try
         {
-            if (predictor > 1)
-            {
-                int colors = Math.min(decodeParams.getInt(COSName.COLORS, 1), 32);
-                int bitsPerPixel = decodeParams.getInt(COSName.BITS_PER_COMPONENT, 8);
-                int columns = decodeParams.getInt(COSName.COLUMNS, 1);
-                ByteArrayOutputStream baos = new ByteArrayOutputStream();
-                decompress(encoded, baos);
-                ByteArrayInputStream bais = new ByteArrayInputStream(baos.toByteArray());
-                Predictor.decodePredictor(predictor, colors, bitsPerPixel, columns, bais, decoded);
-                decoded.flush();
-                baos.reset();
-                bais.reset();
-            }
-            else
-            {
-                decompress(encoded, decoded);
-            }
-        } 
+            decompress(encoded, Predictor.wrapPredictor(decoded, decodeParams));
+        }
         catch (DataFormatException e)
         {
             // if the stream is corrupt a DataFormatException may occur
@@ -98,7 +76,7 @@ final class FlateFilter extends Filter
             // use nowrap mode to bypass zlib-header and checksum to avoid a DataFormatException
             Inflater inflater = new Inflater(true); 
             inflater.setInput(buf,0,read);
-            byte[] res = new byte[1024]; 
+            byte[] res = new byte[1024];
             boolean dataWritten = false;
             while (true) 
             { 

Modified: pdfbox/branches/2.0/pdfbox/src/main/java/org/apache/pdfbox/filter/LZWFilter.java
URL: http://svn.apache.org/viewvc/pdfbox/branches/2.0/pdfbox/src/main/java/org/apache/pdfbox/filter/LZWFilter.java?rev=1827641&r1=1827640&r2=1827641&view=diff
==============================================================================
--- pdfbox/branches/2.0/pdfbox/src/main/java/org/apache/pdfbox/filter/LZWFilter.java (original)
+++ pdfbox/branches/2.0/pdfbox/src/main/java/org/apache/pdfbox/filter/LZWFilter.java Sat Mar 24 12:33:28 2018
@@ -67,37 +67,15 @@ public class LZWFilter extends Filter
     public DecodeResult decode(InputStream encoded, OutputStream decoded,
             COSDictionary parameters, int index) throws IOException
     {
-        int predictor = -1;
-        int earlyChange = 1;
-
         COSDictionary decodeParams = getDecodeParams(parameters, index);
-        if (decodeParams != null)
-        {
-            predictor = decodeParams.getInt(COSName.PREDICTOR);
-            earlyChange = decodeParams.getInt(COSName.EARLY_CHANGE, 1);
-            if (earlyChange != 0 && earlyChange != 1)
-            {
-                earlyChange = 1;
-            }
-        }
-        if (predictor > 1)
-        {
-            @SuppressWarnings("null")
-            int colors = Math.min(decodeParams.getInt(COSName.COLORS, 1), 32);
-            int bitsPerPixel = decodeParams.getInt(COSName.BITS_PER_COMPONENT, 8);
-            int columns = decodeParams.getInt(COSName.COLUMNS, 1);
-            ByteArrayOutputStream baos = new ByteArrayOutputStream();
-            doLZWDecode(encoded, baos, earlyChange);
-            ByteArrayInputStream bais = new ByteArrayInputStream(baos.toByteArray());
-            Predictor.decodePredictor(predictor, colors, bitsPerPixel, columns, bais, decoded);
-            decoded.flush();
-            baos.reset();
-            bais.reset();
-        }
-        else
+        int earlyChange = decodeParams.getInt(COSName.EARLY_CHANGE, 1);
+
+        if (earlyChange != 0 && earlyChange != 1)
         {
-            doLZWDecode(encoded, decoded, earlyChange);
+            earlyChange = 1;
         }
+
+        doLZWDecode(encoded, Predictor.wrapPredictor(decoded, decodeParams), earlyChange);
         return new DecodeResult(parameters);
     }
 

Modified: pdfbox/branches/2.0/pdfbox/src/main/java/org/apache/pdfbox/filter/Predictor.java
URL: http://svn.apache.org/viewvc/pdfbox/branches/2.0/pdfbox/src/main/java/org/apache/pdfbox/filter/Predictor.java?rev=1827641&r1=1827640&r2=1827641&view=diff
==============================================================================
--- pdfbox/branches/2.0/pdfbox/src/main/java/org/apache/pdfbox/filter/Predictor.java (original)
+++ pdfbox/branches/2.0/pdfbox/src/main/java/org/apache/pdfbox/filter/Predictor.java Sat Mar 24 12:33:28 2018
@@ -15,10 +15,14 @@
  */
 package org.apache.pdfbox.filter;
 
+import java.io.FilterOutputStream;
 import java.io.IOException;
 import java.io.InputStream;
 import java.io.OutputStream;
+import java.util.Arrays;
 
+import org.apache.pdfbox.cos.COSDictionary;
+import org.apache.pdfbox.cos.COSName;
 import org.apache.pdfbox.io.IOUtils;
 
 /**
@@ -31,6 +35,173 @@ public final class Predictor
     private Predictor()
     {
     }
+
+    /**
+     * Decodes a single line of data in-place.
+     * @param predictor Predictor value for the current line
+     * @param colors Number of color components, from decode parameters.
+     * @param bitsPerComponent Number of bits per components, from decode parameters.
+     * @param columns Number samples in a row, from decode parameters.
+     * @param actline Current (active) line to decode. Data will be decoded in-place,
+     *                i.e. - the contents of this buffer will be modified.
+     * @param lastline The previous decoded line. When decoding the first line, this
+     *                 parameter should be an empty byte array of the same length as
+     *                 <code>actline</code>.
+     */
+    static void decodePredictorRow(int predictor, int colors, int bitsPerComponent, int columns, byte[] actline, byte[] lastline)
+    {
+        if (predictor == 1)
+        {
+            // no prediction
+            return;
+        }
+        final int bitsPerPixel = colors * bitsPerComponent;
+        final int bytesPerPixel = (bitsPerPixel + 7) / 8;
+        final int rowlength = actline.length;
+        switch (predictor)
+        {
+            case 2:
+                // PRED TIFF SUB
+                if (bitsPerComponent == 8)
+                {
+                    // for 8 bits per component it is the same algorithm as PRED SUB of PNG format
+                    for (int p = bytesPerPixel; p < rowlength; p++)
+                    {
+                        int sub = actline[p] & 0xff;
+                        int left = actline[p - bytesPerPixel] & 0xff;
+                        actline[p] = (byte) (sub + left);
+                    }
+                    break;
+                }
+                if (bitsPerComponent == 16)
+                {
+                    for (int p = bytesPerPixel; p < rowlength; p += 2)
+                    {
+                        int sub = ((actline[p] & 0xff) << 8) + (actline[p + 1] & 0xff);
+                        int left = (((actline[p - bytesPerPixel] & 0xff) << 8)
+                                + (actline[p - bytesPerPixel + 1] & 0xff));
+                        actline[p] = (byte) (((sub + left) >> 8) & 0xff);
+                        actline[p + 1] = (byte) ((sub + left) & 0xff);
+                    }
+                    break;
+                }
+                if (bitsPerComponent == 1 && colors == 1)
+                {
+                    // bytesPerPixel cannot be used:
+                    // "A row shall occupy a whole number of bytes, rounded up if necessary.
+                    // Samples and their components shall be packed into bytes
+                    // from high-order to low-order bits."
+                    for (int p = 0; p < rowlength; p++)
+                    {
+                        for (int bit = 7; bit >= 0; --bit)
+                        {
+                            int sub = (actline[p] >> bit) & 1;
+                            if (p == 0 && bit == 7)
+                            {
+                                continue;
+                            }
+                            int left;
+                            if (bit == 7)
+                            {
+                                // use bit #0 from previous byte
+                                left = actline[p - 1] & 1;
+                            }
+                            else
+                            {
+                                // use "previous" bit
+                                left = (actline[p] >> (bit + 1)) & 1;
+                            }
+                            if (((sub + left) & 1) == 0)
+                            {
+                                // reset bit
+                                actline[p] = (byte) (actline[p] & ~(1 << bit));
+                            }
+                            else
+                            {
+                                // set bit
+                                actline[p] = (byte) (actline[p] | (1 << bit));
+                            }
+                        }
+                    }
+                    break;
+                }
+                // everything else, i.e. bpc 2 and 4, but has been tested for bpc 1 and 8 too
+                int elements = columns * colors;
+                for (int p = colors; p < elements; ++p)
+                {
+                    int bytePosSub = p * bitsPerComponent / 8;
+                    int bitPosSub = 8 - p * bitsPerComponent % 8 - bitsPerComponent;
+                    int bytePosLeft = (p - colors) * bitsPerComponent / 8;
+                    int bitPosLeft = 8 - (p - colors) * bitsPerComponent % 8 - bitsPerComponent;
+
+                    int sub = getBitSeq(actline[bytePosSub], bitPosSub, bitsPerComponent);
+                    int left = getBitSeq(actline[bytePosLeft], bitPosLeft, bitsPerComponent);
+                    actline[bytePosSub] = (byte) calcSetBitSeq(actline[bytePosSub], bitPosSub, bitsPerComponent, sub + left);
+                }
+                break;
+            case 10:
+                // PRED NONE
+                // do nothing
+                break;
+            case 11:
+                // PRED SUB
+                for (int p = bytesPerPixel; p < rowlength; p++)
+                {
+                    int sub = actline[p];
+                    int left = actline[p - bytesPerPixel];
+                    actline[p] = (byte) (sub + left);
+                }
+                break;
+            case 12:
+                // PRED UP
+                for (int p = 0; p < rowlength; p++)
+                {
+                    int up = actline[p] & 0xff;
+                    int prior = lastline[p] & 0xff;
+                    actline[p] = (byte) ((up + prior) & 0xff);
+                }
+                break;
+            case 13:
+                // PRED AVG
+                for (int p = 0; p < rowlength; p++)
+                {
+                    int avg = actline[p] & 0xff;
+                    int left = p - bytesPerPixel >= 0 ? actline[p - bytesPerPixel] & 0xff : 0;
+                    int up = lastline[p] & 0xff;
+                    actline[p] = (byte) ((avg + (left + up) / 2) & 0xff);
+                }
+                break;
+            case 14:
+                // PRED PAETH
+                for (int p = 0; p < rowlength; p++)
+                {
+                    int paeth = actline[p] & 0xff;
+                    int a = p - bytesPerPixel >= 0 ? actline[p - bytesPerPixel] & 0xff : 0;// left
+                    int b = lastline[p] & 0xff;// upper
+                    int c = p - bytesPerPixel >= 0 ? lastline[p - bytesPerPixel] & 0xff : 0;// upperleft
+                    int value = a + b - c;
+                    int absa = Math.abs(value - a);
+                    int absb = Math.abs(value - b);
+                    int absc = Math.abs(value - c);
+
+                    if (absa <= absb && absa <= absc)
+                    {
+                        actline[p] = (byte) ((paeth + a) & 0xff);
+                    }
+                    else if (absb <= absc)
+                    {
+                        actline[p] = (byte) ((paeth + b) & 0xff);
+                    }
+                    else
+                    {
+                        actline[p] = (byte) ((paeth + c) & 0xff);
+                    }
+                }
+                break;
+            default:
+                break;
+        }
+    }
     
     static void decodePredictor(int predictor, int colors, int bitsPerComponent, int columns, InputStream in, OutputStream out)
             throws IOException
@@ -43,9 +214,7 @@ public final class Predictor
         else
         {
             // calculate sizes
-            final int bitsPerPixel = colors * bitsPerComponent;
-            final int bytesPerPixel = (bitsPerPixel + 7) / 8;
-            final int rowlength = (columns * bitsPerPixel + 7) / 8;
+            final int rowlength = calculateRowLength(colors, bitsPerComponent, columns);
             byte[] actline = new byte[rowlength];
             byte[] lastline = new byte[rowlength];
 
@@ -74,155 +243,18 @@ public final class Predictor
                     offset += i;
                 }
 
-                // do prediction as specified in PNG-Specification 1.2
-                switch (linepredictor)
-                {
-                    case 2:
-                        // PRED TIFF SUB
-                        if (bitsPerComponent == 8)
-                        {
-                            // for 8 bits per component it is the same algorithm as PRED SUB of PNG format
-                            for (int p = bytesPerPixel; p < rowlength; p++)
-                            {
-                                int sub = actline[p] & 0xff;
-                                int left = actline[p - bytesPerPixel] & 0xff;
-                                actline[p] = (byte) (sub + left);
-                            }
-                            break;
-                        }
-                        if (bitsPerComponent == 16)
-                        {
-                            for (int p = bytesPerPixel; p < rowlength; p += 2)
-                            {
-                                int sub = ((actline[p] & 0xff) << 8) + (actline[p + 1] & 0xff);
-                                int left = (((actline[p - bytesPerPixel] & 0xff) << 8)
-                                        + (actline[p - bytesPerPixel + 1] & 0xff));
-                                actline[p] = (byte) (((sub + left) >> 8) & 0xff);
-                                actline[p + 1] = (byte) ((sub + left) & 0xff);
-                            }
-                            break;
-                        }
-                        if (bitsPerComponent == 1 && colors == 1)
-                        {
-                            // bytesPerPixel cannot be used:
-                            // "A row shall occupy a whole number of bytes, rounded up if necessary.
-                            // Samples and their components shall be packed into bytes 
-                            // from high-order to low-order bits."
-                            for (int p = 0; p < rowlength; p++)
-                            {
-                                for (int bit = 7; bit >= 0; --bit)
-                                {
-                                    int sub = (actline[p] >> bit) & 1;
-                                    if (p == 0 && bit == 7)
-                                    {
-                                        continue;
-                                    }
-                                    int left;
-                                    if (bit == 7)
-                                    {
-                                        // use bit #0 from previous byte
-                                        left = actline[p - 1] & 1;
-                                    }
-                                    else
-                                    {
-                                        // use "previous" bit
-                                        left = (actline[p] >> (bit + 1)) & 1;
-                                    }
-                                    if (((sub + left) & 1) == 0)
-                                    {
-                                        // reset bit
-                                        actline[p] = (byte) (actline[p] & ~(1 << bit));
-                                    }
-                                    else
-                                    {
-                                        // set bit
-                                        actline[p] = (byte) (actline[p] | (1 << bit));
-                                    }
-                                }
-                            }
-                            break;
-                        }
-                        // everything else, i.e. bpc 2 and 4, but has been tested for bpc 1 and 8 too
-                        int elements = columns * colors;
-                        for (int p = colors; p < elements; ++p)
-                        {
-                            int bytePosSub = p * bitsPerComponent / 8;
-                            int bitPosSub = 8 - p * bitsPerComponent % 8 - bitsPerComponent;
-                            int bytePosLeft = (p - colors) * bitsPerComponent / 8;
-                            int bitPosLeft = 8 - (p - colors) * bitsPerComponent % 8 - bitsPerComponent;
-
-                            int sub = getBitSeq(actline[bytePosSub], bitPosSub, bitsPerComponent);
-                            int left = getBitSeq(actline[bytePosLeft], bitPosLeft, bitsPerComponent);
-                            actline[bytePosSub] = (byte) calcSetBitSeq(actline[bytePosSub], bitPosSub, bitsPerComponent, sub + left);
-                        }
-                        break;
-                    case 10:
-                        // PRED NONE
-                        // do nothing
-                        break;
-                    case 11:
-                        // PRED SUB
-                        for (int p = bytesPerPixel; p < rowlength; p++)
-                        {
-                            int sub = actline[p];
-                            int left = actline[p - bytesPerPixel];
-                            actline[p] = (byte) (sub + left);
-                        }
-                        break;
-                    case 12:
-                        // PRED UP
-                        for (int p = 0; p < rowlength; p++)
-                        {
-                            int up = actline[p] & 0xff;
-                            int prior = lastline[p] & 0xff;
-                            actline[p] = (byte) ((up + prior) & 0xff);
-                        }
-                        break;
-                    case 13:
-                        // PRED AVG
-                        for (int p = 0; p < rowlength; p++)
-                        {
-                            int avg = actline[p] & 0xff;
-                            int left = p - bytesPerPixel >= 0 ? actline[p - bytesPerPixel] & 0xff : 0;
-                            int up = lastline[p] & 0xff;
-                            actline[p] = (byte) ((avg + (left + up) / 2) & 0xff);
-                        }
-                        break;
-                    case 14:
-                        // PRED PAETH
-                        for (int p = 0; p < rowlength; p++)
-                        {
-                            int paeth = actline[p] & 0xff;
-                            int a = p - bytesPerPixel >= 0 ? actline[p - bytesPerPixel] & 0xff : 0;// left
-                            int b = lastline[p] & 0xff;// upper
-                            int c = p - bytesPerPixel >= 0 ? lastline[p - bytesPerPixel] & 0xff : 0;// upperleft
-                            int value = a + b - c;
-                            int absa = Math.abs(value - a);
-                            int absb = Math.abs(value - b);
-                            int absc = Math.abs(value - c);
-
-                            if (absa <= absb && absa <= absc)
-                            {
-                                actline[p] = (byte) ((paeth + a) & 0xff);
-                            }
-                            else if (absb <= absc)
-                            {
-                                actline[p] = (byte) ((paeth + b) & 0xff);
-                            }
-                            else
-                            {
-                                actline[p] = (byte) ((paeth + c) & 0xff);
-                            }
-                        }
-                        break;
-                    default:
-                        break;
-                }
+                decodePredictorRow(linepredictor, colors, bitsPerComponent, columns, actline, lastline);
                 System.arraycopy(actline, 0, lastline, 0, rowlength);
                 out.write(actline);
             }
         }
     }
+
+    static int calculateRowLength(int colors, int bitsPerComponent, int columns)
+    {
+        final int bitsPerPixel = colors * bitsPerComponent;
+        return  (columns * bitsPerPixel + 7) / 8;
+    }
     
     // get value from bit interval from a byte
     static int getBitSeq(int by, int startBit, int bitSize)
@@ -240,4 +272,146 @@ public final class Predictor
         return (by & mask) | (truncatedVal << startBit);
     }
 
+    /**
+     * Wraps and <code>OutputStream</code> in a predictor decoding stream as necessary.
+     * If no predictor is specified by the parameters, the original stream is returned as is.
+     *
+     * @param out The stream to which decoded data should be written
+     * @param decodeParams Decode parameters for the stream
+     * @return An <code>OutputStream</code> is returned, which will write decoded data
+     * into the given stream. If no predictor is specified, the original stream is returned.
+     */
+    static OutputStream wrapPredictor(OutputStream out, COSDictionary decodeParams)
+    {
+        int predictor = decodeParams.getInt(COSName.PREDICTOR);
+        if (predictor > 1)
+        {
+            int colors = Math.min(decodeParams.getInt(COSName.COLORS, 1), 32);
+            int bitsPerPixel = decodeParams.getInt(COSName.BITS_PER_COMPONENT, 8);
+            int columns = decodeParams.getInt(COSName.COLUMNS, 1);
+
+            return new PredictorOutputStream(out, predictor, colors, bitsPerPixel, columns);
+        }
+        else
+        {
+            return out;
+        }
+    }
+
+    /**
+     * Output stream that implements predictor decoding. Data is buffered until a complete
+     * row is available, which is then decoded and written to the underlying stream.
+     * The previous row is retained for decoding the next row.
+     */
+    private static final class PredictorOutputStream extends FilterOutputStream
+    {
+        // current predictor type
+        private int predictor;
+        // image decode parameters
+        private final int colors, bitsPerComponent, columns, rowLength;
+        // PNG predictor (predictor>=10) means every row has a (potentially different)
+        // predictor value
+        private final boolean predictorPerRow;
+
+        // data buffers
+        private byte[] currentRow, lastRow;
+        // amount of data in the current row
+        private int currentRowData = 0;
+        // was the per-row predictor value read for the current row being processed
+        private boolean predictorRead = false;
+
+        PredictorOutputStream(OutputStream out, int predictor, int colors, int bitsPerComponent, int columns)
+        {
+            super(out);
+            this.predictor = predictor;
+            this.colors = colors;
+            this.bitsPerComponent = bitsPerComponent;
+            this.columns = columns;
+            this.rowLength = calculateRowLength(colors, bitsPerComponent, columns);
+            this.predictorPerRow = (predictor >= 10);
+            currentRow = new byte[rowLength];
+            lastRow = new byte[rowLength];
+        }
+
+        @Override
+        public void write(byte[] bytes) throws IOException
+        {
+            write(bytes, 0, bytes.length);
+        }
+
+        @Override
+        public void write(byte[] bytes, int off, int len) throws IOException
+        {
+            int maxOffset = off + len;
+            while (off < maxOffset)
+            {
+                if (predictorPerRow && currentRowData == 0 && !predictorRead)
+                {
+                    // PNG predictor; each row starts with predictor type (0, 1, 2, 3, 4)
+                    // read per line predictor, add 10 to tread value 0 as 10, 1 as 11, ...
+                    predictor = bytes[off] + 10;
+                    off++;
+                    predictorRead = true;
+                }
+                else
+                {
+                    int toRead = Math.min(rowLength - currentRowData, maxOffset - off);
+                    System.arraycopy(bytes, off, currentRow, currentRowData, toRead);
+                    currentRowData += toRead;
+                    off += toRead;
+
+                    // current row is filled, decode it, write it to underlying stream,
+                    // and reset the state.
+                    if (currentRowData == currentRow.length)
+                    {
+                        decodeAndWriteRow();
+                    }
+                }
+            }
+        }
+
+        private void decodeAndWriteRow() throws IOException
+        {
+            decodePredictorRow(predictor, colors, bitsPerComponent, columns, currentRow, lastRow);
+            out.write(currentRow);
+            flipRows();
+        }
+
+        /**
+         * Flips the row buffers (to avoid copying), and resets the current-row index
+         * and predictorRead flag
+         */
+        private void flipRows()
+        {
+            byte[] temp = lastRow;
+            lastRow = currentRow;
+            currentRow = temp;
+            currentRowData = 0;
+            predictorRead = false;
+        }
+
+        @Override
+        public void flush() throws IOException
+        {
+            // The last row is allowed to be incomplete, and should be completed with zeros.
+            if (currentRowData > 0)
+            {
+                Arrays.fill(currentRow, currentRowData, rowLength, (byte)0);
+                decodeAndWriteRow();
+            }
+            super.flush();
+        }
+
+        @Override
+        public void close() throws IOException
+        {
+            super.close();
+        }
+
+        @Override
+        public void write(int i) throws IOException
+        {
+            throw new UnsupportedOperationException("Not supported");
+        }
+    }
 }