You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pdfbox.apache.org by le...@apache.org on 2012/03/10 16:03:47 UTC
svn commit: r1299219 -
/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/filter/FlateFilter.java
Author: lehmi
Date: Sat Mar 10 15:03:47 2012
New Revision: 1299219
URL: http://svn.apache.org/viewvc?rev=1299219&view=rev
Log:
PDFBOX-1232: use Inflater instead of InflateInputStream to avoid an EOFExcpetion as proposed by Dave Smith
Modified:
pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/filter/FlateFilter.java
Modified: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/filter/FlateFilter.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/filter/FlateFilter.java?rev=1299219&r1=1299218&r2=1299219&view=diff
==============================================================================
--- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/filter/FlateFilter.java (original)
+++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/filter/FlateFilter.java Sat Mar 10 15:03:47 2012
@@ -21,10 +21,9 @@ import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
-import java.io.EOFException;
+import java.util.zip.DataFormatException;
import java.util.zip.DeflaterOutputStream;
-import java.util.zip.InflaterInputStream;
-import java.util.zip.ZipException;
+import java.util.zip.Inflater;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
@@ -32,6 +31,7 @@ import org.apache.pdfbox.cos.COSArray;
import org.apache.pdfbox.cos.COSBase;
import org.apache.pdfbox.cos.COSDictionary;
import org.apache.pdfbox.cos.COSName;
+import org.apache.tika.io.IOExceptionWithCause;
/**
* This is the used for the FlateDecode filter.
@@ -81,7 +81,6 @@ public class FlateFilter implements Filt
int colors = -1;
int bitsPerPixel = -1;
int columns = -1;
- InflaterInputStream decompressor = null;
ByteArrayInputStream bais = null;
ByteArrayOutputStream baos = null;
if (dict!=null)
@@ -97,87 +96,50 @@ public class FlateFilter implements Filt
try
{
- // Decompress data to temporary ByteArrayOutputStream
- decompressor = new InflaterInputStream(compressedData);
- int amountRead;
- int mayRead = compressedData.available();
-
- if (mayRead > 0)
+ baos = decompress(compressedData);
+ // Decode data using given predictor
+ if (predictor==-1 || predictor == 1 )
{
- byte[] buffer = new byte[Math.min(mayRead,BUFFER_SIZE)];
-
- // Decode data using given predictor
- if (predictor==-1 || predictor == 1 )
+ result.write(baos.toByteArray());
+ }
+ else
+ {
+ /*
+ * Reverting back to default values
+ */
+ if( colors == -1 )
{
- try
- {
- // decoding not needed
- while ((amountRead = decompressor.read(buffer, 0, Math.min(mayRead,BUFFER_SIZE))) != -1)
- {
- result.write(buffer, 0, amountRead);
- }
- }
- catch (ZipException exception)
- {
- // if the stream is corrupt an ZipException may occur
- LOG.error("FlateFilter: stop reading corrupt stream due to a ZipException");
- // re-throw the exception, caller has to handle it
- throw exception;
- }
- catch (EOFException exception)
- {
- // if the stream is corrupt an EOFException may occur
- LOG.error("FlateFilter: stop reading corrupt stream due to an EOFException");
- // re-throw the exception, caller has to handle it
- throw exception;
- }
+ colors = 1;
}
- else
+ if( bitsPerPixel == -1 )
{
- /*
- * Reverting back to default values
- */
- if( colors == -1 )
- {
- colors = 1;
- }
- if( bitsPerPixel == -1 )
- {
- bitsPerPixel = 8;
- }
- if( columns == -1 )
- {
- columns = 1;
- }
+ bitsPerPixel = 8;
+ }
+ if( columns == -1 )
+ {
+ columns = 1;
+ }
- baos = new ByteArrayOutputStream();
- while ((amountRead = decompressor.read(buffer, 0, Math.min(mayRead,BUFFER_SIZE))) != -1)
- {
- baos.write(buffer, 0, amountRead);
- }
- baos.flush();
+ // Copy data to ByteArrayInputStream for reading
+ bais = new ByteArrayInputStream(baos.toByteArray());
- // Copy data to ByteArrayInputStream for reading
- bais = new ByteArrayInputStream(baos.toByteArray());
- baos.close();
- baos = null;
-
- byte[] decodedData = decodePredictor(predictor, colors, bitsPerPixel, columns, bais);
- bais.close();
- bais = null;
+ byte[] decodedData = decodePredictor(predictor, colors, bitsPerPixel, columns, bais);
+ bais.close();
+ bais = null;
- result.write(decodedData);
- }
+ result.write(decodedData);
}
-
result.flush();
+ }
+ catch (DataFormatException exception)
+ {
+ // if the stream is corrupt a DataFormatException may occur
+ LOG.error("FlateFilter: stop reading corrupt stream due to a DataFormatException");
+ // re-throw the exception, caller has to handle it
+ throw new IOExceptionWithCause(exception);
}
finally
{
- if (decompressor != null)
- {
- decompressor.close();
- }
if (bais != null)
{
bais.close();
@@ -189,6 +151,38 @@ public class FlateFilter implements Filt
}
}
+ // Use Inflater instead of InflateInputStream to avoid an EOFException due to a probably
+ // missing Z_STREAM_END, see PDFBOX-1232 for details
+ private ByteArrayOutputStream decompress(InputStream in) throws IOException, DataFormatException
+ {
+ ByteArrayOutputStream out = new ByteArrayOutputStream();
+ byte[] buf = new byte[2048];
+ int read = in.read(buf);
+ if(read > 0)
+ {
+ Inflater inflater = new Inflater();
+ inflater.setInput(buf,0,read);
+ byte[] res = new byte[2048];
+ while(true)
+ {
+ int resRead = inflater.inflate(res);
+ if(resRead != 0)
+ {
+ out.write(res,0,resRead);
+ continue;
+ }
+ if(inflater.finished() || inflater.needsDictionary() || in.available() == 0)
+ {
+ break;
+ }
+ read = in.read(buf);
+ inflater.setInput(buf,0,read);
+ }
+ }
+ out.close();
+ return out;
+ }
+
private byte[] decodePredictor(int predictor, int colors, int bitsPerComponent, int columns, InputStream data)
throws IOException
{