You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pdfbox.apache.org by ti...@apache.org on 2014/10/16 19:06:53 UTC
svn commit: r1632387 - in
/pdfbox/branches/1.8/pdfbox/src/main/java/org/apache/pdfbox:
cos/COSStream.java pdfparser/PDFParser.java
Author: tilman
Date: Thu Oct 16 17:06:52 2014
New Revision: 1632387
URL: http://svn.apache.org/r1632387
Log:
PDFBOX-2296: check and fix the length of all streams where /Length value has become known after reading
Modified:
pdfbox/branches/1.8/pdfbox/src/main/java/org/apache/pdfbox/cos/COSStream.java
pdfbox/branches/1.8/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/PDFParser.java
Modified: pdfbox/branches/1.8/pdfbox/src/main/java/org/apache/pdfbox/cos/COSStream.java
URL: http://svn.apache.org/viewvc/pdfbox/branches/1.8/pdfbox/src/main/java/org/apache/pdfbox/cos/COSStream.java?rev=1632387&r1=1632386&r2=1632387&view=diff
==============================================================================
--- pdfbox/branches/1.8/pdfbox/src/main/java/org/apache/pdfbox/cos/COSStream.java (original)
+++ pdfbox/branches/1.8/pdfbox/src/main/java/org/apache/pdfbox/cos/COSStream.java Thu Oct 16 17:06:52 2014
@@ -178,6 +178,33 @@ public class COSStream extends COSDictio
}
/**
+ * This will set the expected length of the encoded stream. Call this method
+ * if the previously set expected length is wrong, to avoid further trouble.
+ *
+ * @param length the expected length of the encoded stream.
+ */
+ public void setFilteredLength(long length)
+ {
+ filteredStream.setExpectedLength(COSInteger.get(length));
+ }
+
+ /**
+ * This will get the length of the data written in the encoded stream.
+ *
+ * @return the length of the data written in the encoded stream as long
+ *
+ * @throws IOException
+ */
+ public long getFilteredLengthWritten() throws IOException
+ {
+ if (filteredStream == null)
+ {
+ doEncode();
+ }
+ return filteredStream.getLengthWritten();
+ }
+
+ /**
* This will get the logical content stream with none of the filters.
*
* @return the bytes of the logical (decoded) stream
Modified: pdfbox/branches/1.8/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/PDFParser.java
URL: http://svn.apache.org/viewvc/pdfbox/branches/1.8/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/PDFParser.java?rev=1632387&r1=1632386&r2=1632387&view=diff
==============================================================================
--- pdfbox/branches/1.8/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/PDFParser.java (original)
+++ pdfbox/branches/1.8/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/PDFParser.java Thu Oct 16 17:06:52 2014
@@ -21,6 +21,7 @@ import java.io.IOException;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.Collection;
+import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.regex.Pattern;
@@ -32,6 +33,7 @@ import org.apache.pdfbox.cos.COSDictiona
import org.apache.pdfbox.cos.COSDocument;
import org.apache.pdfbox.cos.COSInteger;
import org.apache.pdfbox.cos.COSName;
+import org.apache.pdfbox.cos.COSNumber;
import org.apache.pdfbox.cos.COSObject;
import org.apache.pdfbox.cos.COSStream;
import org.apache.pdfbox.exceptions.WrappedIOException;
@@ -67,6 +69,11 @@ public class PDFParser extends BaseParse
* File.
*/
private List<ConflictObj> conflictList = new ArrayList<ConflictObj>();
+
+ /**
+ * A list of COSStream objects to check for length correctness
+ */
+ private final HashSet<COSStream> streamLengthCheckSet = new HashSet<COSStream>();
/** Collects all Xref/trailer objects and resolves them into single
* object using startxref reference.
@@ -239,6 +246,8 @@ public class PDFParser extends BaseParse
document.setTrailer( xrefTrailerResolver.getTrailer() );
document.addXRefTable( xrefTrailerResolver.getXrefTable() );
+ fixStreamsLength();
+
if( !document.isEncrypted() )
{
document.dereferenceObjectStreams();
@@ -274,6 +283,35 @@ public class PDFParser extends BaseParse
}
/**
+ * Check whether streams with previously unknown length have the correct
+ * length and fix that length if needed.
+ *
+ * @throws IOException
+ */
+ private void fixStreamsLength() throws IOException
+ {
+ for (COSObject obj : document.getObjects())
+ {
+ if (obj.getObject() instanceof COSStream
+ && streamLengthCheckSet.contains((COSStream) obj.getObject()))
+ {
+ COSStream stream = (COSStream) obj.getObject();
+
+ long filteredLength = stream.getFilteredLength();
+ long filteredLengthWritten = stream.getFilteredLengthWritten();
+ if (Math.abs(filteredLength - filteredLengthWritten) > 2)
+ {
+ // adjust the length, but only if the difference is > 2,
+ // i.e. don't bother with CR LF differences
+ LOG.warn("/Length of " + obj + " corrected from " + filteredLength + " to " + filteredLengthWritten);
+ stream.setLong(COSName.LENGTH, filteredLengthWritten);
+ stream.setFilteredLength(filteredLengthWritten);
+ }
+ }
+ }
+ }
+
+ /**
* Skip to the start of the next object. This is used to recover
* from a corrupt object. This should handle all cases that parseObject
* supports. This assumes that the next object will
@@ -610,6 +648,19 @@ public class PDFParser extends BaseParse
// test for XRef type
final COSStream strmObj = (COSStream) pb;
+
+ // remember streams without length to check them later
+ COSBase streamLength = strmObj.getItem(COSName.LENGTH);
+ int length = -1;
+ if (streamLength instanceof COSNumber)
+ {
+ length = ((COSNumber) streamLength).intValue();
+ }
+ if (length == -1)
+ {
+ streamLengthCheckSet.add(strmObj);
+ }
+
final COSName objectType = (COSName)strmObj.getItem( COSName.TYPE );
if( objectType != null && objectType.equals( COSName.XREF ) )
{