You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pdfbox.apache.org by ti...@apache.org on 2018/03/24 12:33:28 UTC
svn commit: r1827641 - in
/pdfbox/branches/2.0/pdfbox/src/main/java/org/apache/pdfbox/filter:
FlateFilter.java LZWFilter.java Predictor.java
Author: tilman
Date: Sat Mar 24 12:33:28 2018
New Revision: 1827641
URL: http://svn.apache.org/viewvc?rev=1827641&view=rev
Log:
PDFBOX-4151: avoid double memory usage with predictor, by Itai Shaked
Modified:
pdfbox/branches/2.0/pdfbox/src/main/java/org/apache/pdfbox/filter/FlateFilter.java
pdfbox/branches/2.0/pdfbox/src/main/java/org/apache/pdfbox/filter/LZWFilter.java
pdfbox/branches/2.0/pdfbox/src/main/java/org/apache/pdfbox/filter/Predictor.java
Modified: pdfbox/branches/2.0/pdfbox/src/main/java/org/apache/pdfbox/filter/FlateFilter.java
URL: http://svn.apache.org/viewvc/pdfbox/branches/2.0/pdfbox/src/main/java/org/apache/pdfbox/filter/FlateFilter.java?rev=1827641&r1=1827640&r2=1827641&view=diff
==============================================================================
--- pdfbox/branches/2.0/pdfbox/src/main/java/org/apache/pdfbox/filter/FlateFilter.java (original)
+++ pdfbox/branches/2.0/pdfbox/src/main/java/org/apache/pdfbox/filter/FlateFilter.java Sat Mar 24 12:33:28 2018
@@ -46,34 +46,12 @@ final class FlateFilter extends Filter
public DecodeResult decode(InputStream encoded, OutputStream decoded,
COSDictionary parameters, int index) throws IOException
{
- int predictor = -1;
-
final COSDictionary decodeParams = getDecodeParams(parameters, index);
- if (decodeParams != null)
- {
- predictor = decodeParams.getInt(COSName.PREDICTOR);
- }
try
{
- if (predictor > 1)
- {
- int colors = Math.min(decodeParams.getInt(COSName.COLORS, 1), 32);
- int bitsPerPixel = decodeParams.getInt(COSName.BITS_PER_COMPONENT, 8);
- int columns = decodeParams.getInt(COSName.COLUMNS, 1);
- ByteArrayOutputStream baos = new ByteArrayOutputStream();
- decompress(encoded, baos);
- ByteArrayInputStream bais = new ByteArrayInputStream(baos.toByteArray());
- Predictor.decodePredictor(predictor, colors, bitsPerPixel, columns, bais, decoded);
- decoded.flush();
- baos.reset();
- bais.reset();
- }
- else
- {
- decompress(encoded, decoded);
- }
- }
+ decompress(encoded, Predictor.wrapPredictor(decoded, decodeParams));
+ }
catch (DataFormatException e)
{
// if the stream is corrupt a DataFormatException may occur
@@ -98,7 +76,7 @@ final class FlateFilter extends Filter
// use nowrap mode to bypass zlib-header and checksum to avoid a DataFormatException
Inflater inflater = new Inflater(true);
inflater.setInput(buf,0,read);
- byte[] res = new byte[1024];
+ byte[] res = new byte[1024];
boolean dataWritten = false;
while (true)
{
Modified: pdfbox/branches/2.0/pdfbox/src/main/java/org/apache/pdfbox/filter/LZWFilter.java
URL: http://svn.apache.org/viewvc/pdfbox/branches/2.0/pdfbox/src/main/java/org/apache/pdfbox/filter/LZWFilter.java?rev=1827641&r1=1827640&r2=1827641&view=diff
==============================================================================
--- pdfbox/branches/2.0/pdfbox/src/main/java/org/apache/pdfbox/filter/LZWFilter.java (original)
+++ pdfbox/branches/2.0/pdfbox/src/main/java/org/apache/pdfbox/filter/LZWFilter.java Sat Mar 24 12:33:28 2018
@@ -67,37 +67,15 @@ public class LZWFilter extends Filter
public DecodeResult decode(InputStream encoded, OutputStream decoded,
COSDictionary parameters, int index) throws IOException
{
- int predictor = -1;
- int earlyChange = 1;
-
COSDictionary decodeParams = getDecodeParams(parameters, index);
- if (decodeParams != null)
- {
- predictor = decodeParams.getInt(COSName.PREDICTOR);
- earlyChange = decodeParams.getInt(COSName.EARLY_CHANGE, 1);
- if (earlyChange != 0 && earlyChange != 1)
- {
- earlyChange = 1;
- }
- }
- if (predictor > 1)
- {
- @SuppressWarnings("null")
- int colors = Math.min(decodeParams.getInt(COSName.COLORS, 1), 32);
- int bitsPerPixel = decodeParams.getInt(COSName.BITS_PER_COMPONENT, 8);
- int columns = decodeParams.getInt(COSName.COLUMNS, 1);
- ByteArrayOutputStream baos = new ByteArrayOutputStream();
- doLZWDecode(encoded, baos, earlyChange);
- ByteArrayInputStream bais = new ByteArrayInputStream(baos.toByteArray());
- Predictor.decodePredictor(predictor, colors, bitsPerPixel, columns, bais, decoded);
- decoded.flush();
- baos.reset();
- bais.reset();
- }
- else
+ int earlyChange = decodeParams.getInt(COSName.EARLY_CHANGE, 1);
+
+ if (earlyChange != 0 && earlyChange != 1)
{
- doLZWDecode(encoded, decoded, earlyChange);
+ earlyChange = 1;
}
+
+ doLZWDecode(encoded, Predictor.wrapPredictor(decoded, decodeParams), earlyChange);
return new DecodeResult(parameters);
}
Modified: pdfbox/branches/2.0/pdfbox/src/main/java/org/apache/pdfbox/filter/Predictor.java
URL: http://svn.apache.org/viewvc/pdfbox/branches/2.0/pdfbox/src/main/java/org/apache/pdfbox/filter/Predictor.java?rev=1827641&r1=1827640&r2=1827641&view=diff
==============================================================================
--- pdfbox/branches/2.0/pdfbox/src/main/java/org/apache/pdfbox/filter/Predictor.java (original)
+++ pdfbox/branches/2.0/pdfbox/src/main/java/org/apache/pdfbox/filter/Predictor.java Sat Mar 24 12:33:28 2018
@@ -15,10 +15,14 @@
*/
package org.apache.pdfbox.filter;
+import java.io.FilterOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
+import java.util.Arrays;
+import org.apache.pdfbox.cos.COSDictionary;
+import org.apache.pdfbox.cos.COSName;
import org.apache.pdfbox.io.IOUtils;
/**
@@ -31,6 +35,173 @@ public final class Predictor
private Predictor()
{
}
+
+ /**
+ * Decodes a single line of data in-place.
+ * @param predictor Predictor value for the current line
+ * @param colors Number of color components, from decode parameters.
+ * @param bitsPerComponent Number of bits per components, from decode parameters.
+ * @param columns Number samples in a row, from decode parameters.
+ * @param actline Current (active) line to decode. Data will be decoded in-place,
+ * i.e. - the contents of this buffer will be modified.
+ * @param lastline The previous decoded line. When decoding the first line, this
+ * parameter should be an empty byte array of the same length as
+ * <code>actline</code>.
+ */
+ static void decodePredictorRow(int predictor, int colors, int bitsPerComponent, int columns, byte[] actline, byte[] lastline)
+ {
+ if (predictor == 1)
+ {
+ // no prediction
+ return;
+ }
+ final int bitsPerPixel = colors * bitsPerComponent;
+ final int bytesPerPixel = (bitsPerPixel + 7) / 8;
+ final int rowlength = actline.length;
+ switch (predictor)
+ {
+ case 2:
+ // PRED TIFF SUB
+ if (bitsPerComponent == 8)
+ {
+ // for 8 bits per component it is the same algorithm as PRED SUB of PNG format
+ for (int p = bytesPerPixel; p < rowlength; p++)
+ {
+ int sub = actline[p] & 0xff;
+ int left = actline[p - bytesPerPixel] & 0xff;
+ actline[p] = (byte) (sub + left);
+ }
+ break;
+ }
+ if (bitsPerComponent == 16)
+ {
+ for (int p = bytesPerPixel; p < rowlength; p += 2)
+ {
+ int sub = ((actline[p] & 0xff) << 8) + (actline[p + 1] & 0xff);
+ int left = (((actline[p - bytesPerPixel] & 0xff) << 8)
+ + (actline[p - bytesPerPixel + 1] & 0xff));
+ actline[p] = (byte) (((sub + left) >> 8) & 0xff);
+ actline[p + 1] = (byte) ((sub + left) & 0xff);
+ }
+ break;
+ }
+ if (bitsPerComponent == 1 && colors == 1)
+ {
+ // bytesPerPixel cannot be used:
+ // "A row shall occupy a whole number of bytes, rounded up if necessary.
+ // Samples and their components shall be packed into bytes
+ // from high-order to low-order bits."
+ for (int p = 0; p < rowlength; p++)
+ {
+ for (int bit = 7; bit >= 0; --bit)
+ {
+ int sub = (actline[p] >> bit) & 1;
+ if (p == 0 && bit == 7)
+ {
+ continue;
+ }
+ int left;
+ if (bit == 7)
+ {
+ // use bit #0 from previous byte
+ left = actline[p - 1] & 1;
+ }
+ else
+ {
+ // use "previous" bit
+ left = (actline[p] >> (bit + 1)) & 1;
+ }
+ if (((sub + left) & 1) == 0)
+ {
+ // reset bit
+ actline[p] = (byte) (actline[p] & ~(1 << bit));
+ }
+ else
+ {
+ // set bit
+ actline[p] = (byte) (actline[p] | (1 << bit));
+ }
+ }
+ }
+ break;
+ }
+ // everything else, i.e. bpc 2 and 4, but has been tested for bpc 1 and 8 too
+ int elements = columns * colors;
+ for (int p = colors; p < elements; ++p)
+ {
+ int bytePosSub = p * bitsPerComponent / 8;
+ int bitPosSub = 8 - p * bitsPerComponent % 8 - bitsPerComponent;
+ int bytePosLeft = (p - colors) * bitsPerComponent / 8;
+ int bitPosLeft = 8 - (p - colors) * bitsPerComponent % 8 - bitsPerComponent;
+
+ int sub = getBitSeq(actline[bytePosSub], bitPosSub, bitsPerComponent);
+ int left = getBitSeq(actline[bytePosLeft], bitPosLeft, bitsPerComponent);
+ actline[bytePosSub] = (byte) calcSetBitSeq(actline[bytePosSub], bitPosSub, bitsPerComponent, sub + left);
+ }
+ break;
+ case 10:
+ // PRED NONE
+ // do nothing
+ break;
+ case 11:
+ // PRED SUB
+ for (int p = bytesPerPixel; p < rowlength; p++)
+ {
+ int sub = actline[p];
+ int left = actline[p - bytesPerPixel];
+ actline[p] = (byte) (sub + left);
+ }
+ break;
+ case 12:
+ // PRED UP
+ for (int p = 0; p < rowlength; p++)
+ {
+ int up = actline[p] & 0xff;
+ int prior = lastline[p] & 0xff;
+ actline[p] = (byte) ((up + prior) & 0xff);
+ }
+ break;
+ case 13:
+ // PRED AVG
+ for (int p = 0; p < rowlength; p++)
+ {
+ int avg = actline[p] & 0xff;
+ int left = p - bytesPerPixel >= 0 ? actline[p - bytesPerPixel] & 0xff : 0;
+ int up = lastline[p] & 0xff;
+ actline[p] = (byte) ((avg + (left + up) / 2) & 0xff);
+ }
+ break;
+ case 14:
+ // PRED PAETH
+ for (int p = 0; p < rowlength; p++)
+ {
+ int paeth = actline[p] & 0xff;
+ int a = p - bytesPerPixel >= 0 ? actline[p - bytesPerPixel] & 0xff : 0;// left
+ int b = lastline[p] & 0xff;// upper
+ int c = p - bytesPerPixel >= 0 ? lastline[p - bytesPerPixel] & 0xff : 0;// upperleft
+ int value = a + b - c;
+ int absa = Math.abs(value - a);
+ int absb = Math.abs(value - b);
+ int absc = Math.abs(value - c);
+
+ if (absa <= absb && absa <= absc)
+ {
+ actline[p] = (byte) ((paeth + a) & 0xff);
+ }
+ else if (absb <= absc)
+ {
+ actline[p] = (byte) ((paeth + b) & 0xff);
+ }
+ else
+ {
+ actline[p] = (byte) ((paeth + c) & 0xff);
+ }
+ }
+ break;
+ default:
+ break;
+ }
+ }
static void decodePredictor(int predictor, int colors, int bitsPerComponent, int columns, InputStream in, OutputStream out)
throws IOException
@@ -43,9 +214,7 @@ public final class Predictor
else
{
// calculate sizes
- final int bitsPerPixel = colors * bitsPerComponent;
- final int bytesPerPixel = (bitsPerPixel + 7) / 8;
- final int rowlength = (columns * bitsPerPixel + 7) / 8;
+ final int rowlength = calculateRowLength(colors, bitsPerComponent, columns);
byte[] actline = new byte[rowlength];
byte[] lastline = new byte[rowlength];
@@ -74,155 +243,18 @@ public final class Predictor
offset += i;
}
- // do prediction as specified in PNG-Specification 1.2
- switch (linepredictor)
- {
- case 2:
- // PRED TIFF SUB
- if (bitsPerComponent == 8)
- {
- // for 8 bits per component it is the same algorithm as PRED SUB of PNG format
- for (int p = bytesPerPixel; p < rowlength; p++)
- {
- int sub = actline[p] & 0xff;
- int left = actline[p - bytesPerPixel] & 0xff;
- actline[p] = (byte) (sub + left);
- }
- break;
- }
- if (bitsPerComponent == 16)
- {
- for (int p = bytesPerPixel; p < rowlength; p += 2)
- {
- int sub = ((actline[p] & 0xff) << 8) + (actline[p + 1] & 0xff);
- int left = (((actline[p - bytesPerPixel] & 0xff) << 8)
- + (actline[p - bytesPerPixel + 1] & 0xff));
- actline[p] = (byte) (((sub + left) >> 8) & 0xff);
- actline[p + 1] = (byte) ((sub + left) & 0xff);
- }
- break;
- }
- if (bitsPerComponent == 1 && colors == 1)
- {
- // bytesPerPixel cannot be used:
- // "A row shall occupy a whole number of bytes, rounded up if necessary.
- // Samples and their components shall be packed into bytes
- // from high-order to low-order bits."
- for (int p = 0; p < rowlength; p++)
- {
- for (int bit = 7; bit >= 0; --bit)
- {
- int sub = (actline[p] >> bit) & 1;
- if (p == 0 && bit == 7)
- {
- continue;
- }
- int left;
- if (bit == 7)
- {
- // use bit #0 from previous byte
- left = actline[p - 1] & 1;
- }
- else
- {
- // use "previous" bit
- left = (actline[p] >> (bit + 1)) & 1;
- }
- if (((sub + left) & 1) == 0)
- {
- // reset bit
- actline[p] = (byte) (actline[p] & ~(1 << bit));
- }
- else
- {
- // set bit
- actline[p] = (byte) (actline[p] | (1 << bit));
- }
- }
- }
- break;
- }
- // everything else, i.e. bpc 2 and 4, but has been tested for bpc 1 and 8 too
- int elements = columns * colors;
- for (int p = colors; p < elements; ++p)
- {
- int bytePosSub = p * bitsPerComponent / 8;
- int bitPosSub = 8 - p * bitsPerComponent % 8 - bitsPerComponent;
- int bytePosLeft = (p - colors) * bitsPerComponent / 8;
- int bitPosLeft = 8 - (p - colors) * bitsPerComponent % 8 - bitsPerComponent;
-
- int sub = getBitSeq(actline[bytePosSub], bitPosSub, bitsPerComponent);
- int left = getBitSeq(actline[bytePosLeft], bitPosLeft, bitsPerComponent);
- actline[bytePosSub] = (byte) calcSetBitSeq(actline[bytePosSub], bitPosSub, bitsPerComponent, sub + left);
- }
- break;
- case 10:
- // PRED NONE
- // do nothing
- break;
- case 11:
- // PRED SUB
- for (int p = bytesPerPixel; p < rowlength; p++)
- {
- int sub = actline[p];
- int left = actline[p - bytesPerPixel];
- actline[p] = (byte) (sub + left);
- }
- break;
- case 12:
- // PRED UP
- for (int p = 0; p < rowlength; p++)
- {
- int up = actline[p] & 0xff;
- int prior = lastline[p] & 0xff;
- actline[p] = (byte) ((up + prior) & 0xff);
- }
- break;
- case 13:
- // PRED AVG
- for (int p = 0; p < rowlength; p++)
- {
- int avg = actline[p] & 0xff;
- int left = p - bytesPerPixel >= 0 ? actline[p - bytesPerPixel] & 0xff : 0;
- int up = lastline[p] & 0xff;
- actline[p] = (byte) ((avg + (left + up) / 2) & 0xff);
- }
- break;
- case 14:
- // PRED PAETH
- for (int p = 0; p < rowlength; p++)
- {
- int paeth = actline[p] & 0xff;
- int a = p - bytesPerPixel >= 0 ? actline[p - bytesPerPixel] & 0xff : 0;// left
- int b = lastline[p] & 0xff;// upper
- int c = p - bytesPerPixel >= 0 ? lastline[p - bytesPerPixel] & 0xff : 0;// upperleft
- int value = a + b - c;
- int absa = Math.abs(value - a);
- int absb = Math.abs(value - b);
- int absc = Math.abs(value - c);
-
- if (absa <= absb && absa <= absc)
- {
- actline[p] = (byte) ((paeth + a) & 0xff);
- }
- else if (absb <= absc)
- {
- actline[p] = (byte) ((paeth + b) & 0xff);
- }
- else
- {
- actline[p] = (byte) ((paeth + c) & 0xff);
- }
- }
- break;
- default:
- break;
- }
+ decodePredictorRow(linepredictor, colors, bitsPerComponent, columns, actline, lastline);
System.arraycopy(actline, 0, lastline, 0, rowlength);
out.write(actline);
}
}
}
+
+ static int calculateRowLength(int colors, int bitsPerComponent, int columns)
+ {
+ final int bitsPerPixel = colors * bitsPerComponent;
+ return (columns * bitsPerPixel + 7) / 8;
+ }
// get value from bit interval from a byte
static int getBitSeq(int by, int startBit, int bitSize)
@@ -240,4 +272,146 @@ public final class Predictor
return (by & mask) | (truncatedVal << startBit);
}
+ /**
+ * Wraps and <code>OutputStream</code> in a predictor decoding stream as necessary.
+ * If no predictor is specified by the parameters, the original stream is returned as is.
+ *
+ * @param out The stream to which decoded data should be written
+ * @param decodeParams Decode parameters for the stream
+ * @return An <code>OutputStream</code> is returned, which will write decoded data
+ * into the given stream. If no predictor is specified, the original stream is returned.
+ */
+ static OutputStream wrapPredictor(OutputStream out, COSDictionary decodeParams)
+ {
+ int predictor = decodeParams.getInt(COSName.PREDICTOR);
+ if (predictor > 1)
+ {
+ int colors = Math.min(decodeParams.getInt(COSName.COLORS, 1), 32);
+ int bitsPerPixel = decodeParams.getInt(COSName.BITS_PER_COMPONENT, 8);
+ int columns = decodeParams.getInt(COSName.COLUMNS, 1);
+
+ return new PredictorOutputStream(out, predictor, colors, bitsPerPixel, columns);
+ }
+ else
+ {
+ return out;
+ }
+ }
+
+ /**
+ * Output stream that implements predictor decoding. Data is buffered until a complete
+ * row is available, which is then decoded and written to the underlying stream.
+ * The previous row is retained for decoding the next row.
+ */
+ private static final class PredictorOutputStream extends FilterOutputStream
+ {
+ // current predictor type
+ private int predictor;
+ // image decode parameters
+ private final int colors, bitsPerComponent, columns, rowLength;
+ // PNG predictor (predictor>=10) means every row has a (potentially different)
+ // predictor value
+ private final boolean predictorPerRow;
+
+ // data buffers
+ private byte[] currentRow, lastRow;
+ // amount of data in the current row
+ private int currentRowData = 0;
+ // was the per-row predictor value read for the current row being processed
+ private boolean predictorRead = false;
+
+ PredictorOutputStream(OutputStream out, int predictor, int colors, int bitsPerComponent, int columns)
+ {
+ super(out);
+ this.predictor = predictor;
+ this.colors = colors;
+ this.bitsPerComponent = bitsPerComponent;
+ this.columns = columns;
+ this.rowLength = calculateRowLength(colors, bitsPerComponent, columns);
+ this.predictorPerRow = (predictor >= 10);
+ currentRow = new byte[rowLength];
+ lastRow = new byte[rowLength];
+ }
+
+ @Override
+ public void write(byte[] bytes) throws IOException
+ {
+ write(bytes, 0, bytes.length);
+ }
+
+ @Override
+ public void write(byte[] bytes, int off, int len) throws IOException
+ {
+ int maxOffset = off + len;
+ while (off < maxOffset)
+ {
+ if (predictorPerRow && currentRowData == 0 && !predictorRead)
+ {
+ // PNG predictor; each row starts with predictor type (0, 1, 2, 3, 4)
+ // read per line predictor, add 10 to tread value 0 as 10, 1 as 11, ...
+ predictor = bytes[off] + 10;
+ off++;
+ predictorRead = true;
+ }
+ else
+ {
+ int toRead = Math.min(rowLength - currentRowData, maxOffset - off);
+ System.arraycopy(bytes, off, currentRow, currentRowData, toRead);
+ currentRowData += toRead;
+ off += toRead;
+
+ // current row is filled, decode it, write it to underlying stream,
+ // and reset the state.
+ if (currentRowData == currentRow.length)
+ {
+ decodeAndWriteRow();
+ }
+ }
+ }
+ }
+
+ private void decodeAndWriteRow() throws IOException
+ {
+ decodePredictorRow(predictor, colors, bitsPerComponent, columns, currentRow, lastRow);
+ out.write(currentRow);
+ flipRows();
+ }
+
+ /**
+ * Flips the row buffers (to avoid copying), and resets the current-row index
+ * and predictorRead flag
+ */
+ private void flipRows()
+ {
+ byte[] temp = lastRow;
+ lastRow = currentRow;
+ currentRow = temp;
+ currentRowData = 0;
+ predictorRead = false;
+ }
+
+ @Override
+ public void flush() throws IOException
+ {
+ // The last row is allowed to be incomplete, and should be completed with zeros.
+ if (currentRowData > 0)
+ {
+ Arrays.fill(currentRow, currentRowData, rowLength, (byte)0);
+ decodeAndWriteRow();
+ }
+ super.flush();
+ }
+
+ @Override
+ public void close() throws IOException
+ {
+ super.close();
+ }
+
+ @Override
+ public void write(int i) throws IOException
+ {
+ throw new UnsupportedOperationException("Not supported");
+ }
+ }
}