You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pdfbox.apache.org by ti...@apache.org on 2014/10/16 19:06:53 UTC

svn commit: r1632387 - in /pdfbox/branches/1.8/pdfbox/src/main/java/org/apache/pdfbox: cos/COSStream.java pdfparser/PDFParser.java

Author: tilman
Date: Thu Oct 16 17:06:52 2014
New Revision: 1632387

URL: http://svn.apache.org/r1632387
Log:
PDFBOX-2296: check and fix the length of all streams where /Length value has become known after reading

Modified:
    pdfbox/branches/1.8/pdfbox/src/main/java/org/apache/pdfbox/cos/COSStream.java
    pdfbox/branches/1.8/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/PDFParser.java

Modified: pdfbox/branches/1.8/pdfbox/src/main/java/org/apache/pdfbox/cos/COSStream.java
URL: http://svn.apache.org/viewvc/pdfbox/branches/1.8/pdfbox/src/main/java/org/apache/pdfbox/cos/COSStream.java?rev=1632387&r1=1632386&r2=1632387&view=diff
==============================================================================
--- pdfbox/branches/1.8/pdfbox/src/main/java/org/apache/pdfbox/cos/COSStream.java (original)
+++ pdfbox/branches/1.8/pdfbox/src/main/java/org/apache/pdfbox/cos/COSStream.java Thu Oct 16 17:06:52 2014
@@ -178,6 +178,33 @@ public class COSStream extends COSDictio
     }
     
     /**
+     * This will set the expected length of the encoded stream. Call this method
+     * if the previously set expected length is wrong, to avoid further trouble.
+     * 
+     * @param length the expected length of the encoded stream.
+     */
+    public void setFilteredLength(long length)
+    {
+        filteredStream.setExpectedLength(COSInteger.get(length));
+    }
+
+    /**
+     * This will get the length of the data written in the encoded stream.
+     *
+     * @return the length of the data written in the encoded stream as long
+     *
+     * @throws IOException
+     */
+    public long getFilteredLengthWritten() throws IOException
+    {
+        if (filteredStream == null)
+        {
+            doEncode();
+        }
+        return filteredStream.getLengthWritten();
+    }
+ 
+    /**
      * This will get the logical content stream with none of the filters.
      *
      * @return the bytes of the logical (decoded) stream

Modified: pdfbox/branches/1.8/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/PDFParser.java
URL: http://svn.apache.org/viewvc/pdfbox/branches/1.8/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/PDFParser.java?rev=1632387&r1=1632386&r2=1632387&view=diff
==============================================================================
--- pdfbox/branches/1.8/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/PDFParser.java (original)
+++ pdfbox/branches/1.8/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/PDFParser.java Thu Oct 16 17:06:52 2014
@@ -21,6 +21,7 @@ import java.io.IOException;
 import java.io.InputStream;
 import java.util.ArrayList;
 import java.util.Collection;
+import java.util.HashSet;
 import java.util.Iterator;
 import java.util.List;
 import java.util.regex.Pattern;
@@ -32,6 +33,7 @@ import org.apache.pdfbox.cos.COSDictiona
 import org.apache.pdfbox.cos.COSDocument;
 import org.apache.pdfbox.cos.COSInteger;
 import org.apache.pdfbox.cos.COSName;
+import org.apache.pdfbox.cos.COSNumber;
 import org.apache.pdfbox.cos.COSObject;
 import org.apache.pdfbox.cos.COSStream;
 import org.apache.pdfbox.exceptions.WrappedIOException;
@@ -67,6 +69,11 @@ public class PDFParser extends BaseParse
      * File.
      */
     private List<ConflictObj> conflictList = new ArrayList<ConflictObj>();
+    
+    /**
+     * A list of COSStream objects to check for length correctness
+     */
+    private final HashSet<COSStream> streamLengthCheckSet = new HashSet<COSStream>();
 
     /** Collects all Xref/trailer objects and resolves them into single
      *  object using startxref reference. 
@@ -239,6 +246,8 @@ public class PDFParser extends BaseParse
             document.setTrailer( xrefTrailerResolver.getTrailer() );
             document.addXRefTable( xrefTrailerResolver.getXrefTable() );
 
+            fixStreamsLength();
+
             if( !document.isEncrypted() )
             {
                 document.dereferenceObjectStreams();
@@ -274,6 +283,35 @@ public class PDFParser extends BaseParse
     }
 
     /**
+     * Check whether streams with previously unknown length have the correct
+     * length and fix that length if needed.
+     *
+     * @throws IOException
+     */
+    private void fixStreamsLength() throws IOException
+    {
+        for (COSObject obj : document.getObjects())
+        {
+            if (obj.getObject() instanceof COSStream
+                    && streamLengthCheckSet.contains((COSStream) obj.getObject()))
+            {
+                COSStream stream = (COSStream) obj.getObject();
+
+                long filteredLength = stream.getFilteredLength();
+                long filteredLengthWritten = stream.getFilteredLengthWritten();
+                if (Math.abs(filteredLength - filteredLengthWritten) > 2)
+                {
+                    // adjust the length, but only if the difference is > 2,
+                    // i.e. don't bother with CR LF differences
+                    LOG.warn("/Length of " + obj + " corrected from " + filteredLength + " to " + filteredLengthWritten);
+                    stream.setLong(COSName.LENGTH, filteredLengthWritten);
+                    stream.setFilteredLength(filteredLengthWritten);
+                }
+            }
+        }
+    }
+
+    /**
      * Skip to the start of the next object.  This is used to recover
      * from a corrupt object. This should handle all cases that parseObject
      * supports. This assumes that the next object will
@@ -610,6 +648,19 @@ public class PDFParser extends BaseParse
 
                     // test for XRef type
                     final COSStream strmObj = (COSStream) pb;
+                    
+                    // remember streams without length to check them later
+                    COSBase streamLength = strmObj.getItem(COSName.LENGTH);
+                    int length = -1;
+                    if (streamLength instanceof COSNumber)
+                    {
+                        length = ((COSNumber) streamLength).intValue();
+                    }
+                    if (length == -1)
+                    {
+                        streamLengthCheckSet.add(strmObj);
+                    }
+                    
                     final COSName objectType = (COSName)strmObj.getItem( COSName.TYPE );
                     if( objectType != null && objectType.equals( COSName.XREF ) )
                     {