You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pdfbox.apache.org by le...@apache.org on 2020/05/17 15:43:43 UTC

svn commit: r1877861 - in /pdfbox/trunk/pdfbox/src: main/java/org/apache/pdfbox/cos/ main/java/org/apache/pdfbox/pdfparser/ test/java/org/apache/pdfbox/pdfparser/

Author: lehmi
Date: Sun May 17 15:43:42 2020
New Revision: 1877861

URL: http://svn.apache.org/viewvc?rev=1877861&view=rev
Log:
PDFBOX-4836: use RandomAccessReadView for malformed COSStreams as well

Added:
    pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/EndstreamFilterStream.java
      - copied, changed from r1877860, pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/EndstreamOutputStream.java
    pdfbox/trunk/pdfbox/src/test/java/org/apache/pdfbox/pdfparser/EndstreamFilterStreamTest.java
      - copied, changed from r1877860, pdfbox/trunk/pdfbox/src/test/java/org/apache/pdfbox/pdfparser/EndstreamOutputStreamTest.java
Removed:
    pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/EndstreamOutputStream.java
    pdfbox/trunk/pdfbox/src/test/java/org/apache/pdfbox/pdfparser/EndstreamOutputStreamTest.java
Modified:
    pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/cos/COSDocument.java
    pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/COSParser.java

Modified: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/cos/COSDocument.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/cos/COSDocument.java?rev=1877861&r1=1877860&r2=1877861&view=diff
==============================================================================
--- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/cos/COSDocument.java (original)
+++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/cos/COSDocument.java Sun May 17 15:43:42 2020
@@ -152,20 +152,6 @@ public class COSDocument extends COSBase
     }
 
     /**
-     * Creates a new COSStream using the current configuration for scratch files.
-     * Not for public use. Only COSParser should call this method.
-     *
-     * @param dictionary the corresponding dictionary
-     * @return the new COSStream
-     */
-    public COSStream createCOSStream(COSDictionary dictionary)
-    {
-        COSStream stream = new COSStream(scratchFile);
-        dictionary.forEach(stream::setItem);
-        return stream;
-    }
-
-    /**
      * Creates a new COSStream using the current configuration for scratch files. Not for public use. Only COSParser should
      * call this method.
      * 

Modified: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/COSParser.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/COSParser.java?rev=1877861&r1=1877860&r2=1877861&view=diff
==============================================================================
--- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/COSParser.java (original)
+++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/COSParser.java Sun May 17 15:43:42 2020
@@ -18,7 +18,6 @@ package org.apache.pdfbox.pdfparser;
 
 import java.io.IOException;
 import java.io.InputStream;
-import java.io.OutputStream;
 import java.nio.charset.StandardCharsets;
 import java.security.GeneralSecurityException;
 import java.security.KeyStore;
@@ -854,25 +853,21 @@ public class COSParser extends BaseParse
             }
         }
 
-        COSStream stream;
-        long streamPosition = source.getPosition();
+
+        long streamStartPosition = source.getPosition();
+        long streamLength;
         if (streamLengthObj != null && validateStreamLength(streamLengthObj.longValue()))
         {
-            stream = document.createCOSStream(dic,
-                    streamPosition,
-                    streamLengthObj.longValue());
+            streamLength = streamLengthObj.longValue();
             // skip stream
             source.seek(source.getPosition() + streamLengthObj.intValue());
         }
         else
         {
-            stream = document.createCOSStream(dic);
-            // get output stream to copy data to
-            try (OutputStream out = stream.createRawOutputStream())
-            {
-                readUntilEndStream(new EndstreamOutputStream(out));
-            }
+            streamLength = readUntilEndStream(new EndstreamFilterStream());
         }
+        COSStream stream = document.createCOSStream(dic, streamStartPosition, streamLength);
+
         String endStream = readString();
         if (endStream.equals("endobj") && isLenient)
         {
@@ -912,7 +907,7 @@ public class COSParser extends BaseParse
      * 
      * @throws IOException if something went wrong
      */
-    private void readUntilEndStream( final OutputStream out ) throws IOException
+    private long readUntilEndStream(final EndstreamFilterStream out) throws IOException
     {
         int bufSize;
         int charMatchCount = 0;
@@ -988,7 +983,7 @@ public class COSParser extends BaseParse
             // write buffer content until first matched char to output stream
             if ( contentBytes > 0 )
             {
-                out.write( strmBuf, 0, contentBytes );
+                out.filter(strmBuf, 0, contentBytes);
             }
             if ( charMatchCount == keyw.length ) 
             {
@@ -1003,7 +998,7 @@ public class COSParser extends BaseParse
             }            
         }
         // this writes a lonely CR or drops trailing CR LF and LF
-        out.flush();
+        return out.calculateLength();
     }
 
     private boolean validateStreamLength(long streamLength) throws IOException

Copied: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/EndstreamFilterStream.java (from r1877860, pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/EndstreamOutputStream.java)
URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/EndstreamFilterStream.java?p2=pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/EndstreamFilterStream.java&p1=pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/EndstreamOutputStream.java&r1=1877860&r2=1877861&rev=1877861&view=diff
==============================================================================
--- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/EndstreamOutputStream.java (original)
+++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/EndstreamFilterStream.java Sun May 17 15:43:42 2020
@@ -16,33 +16,21 @@
 
 package org.apache.pdfbox.pdfparser;
 
-import java.io.BufferedOutputStream;
-import java.io.IOException;
-import java.io.OutputStream;
 
 /**
- * This class is only for the readUntilEndStream method, to prevent a
- * final CR LF or LF (but not a final CR!) from being written to the output,
- * unless the beginning of the stream is assumed to be ASCII.
- * Only the 3-param write() method is implemented. This solves
- * PDFBOX-2079 and PDFBOX-2120 and avoids making readUntilEndStream() 
- * even more complex than it already is.
+ * This class is only for the readUntilEndStream method, to prevent a final CR LF or LF (but not a final CR!) from being
+ * written to the output, unless the beginning of the stream is assumed to be ASCII. This solves PDFBOX-2079 and
+ * PDFBOX-2120 and avoids making readUntilEndStream() even more complex than it already is.
  *
  * @author Tilman Hausherr
  */
-class EndstreamOutputStream extends BufferedOutputStream
+class EndstreamFilterStream
 {
-    //TODO: replace this class with a PullBackOutputStream class if there ever is one
-    
     private boolean hasCR = false;
     private boolean hasLF = false;
     private int pos = 0;
     private boolean mustFilter = true;
-
-    EndstreamOutputStream(OutputStream out)
-    {
-        super(out);
-    }
+    private long length = 0;
 
     /**
      * Write CR and/or LF that were kept, then writes len bytes from the 
@@ -52,10 +40,8 @@ class EndstreamOutputStream extends Buff
      * @param b byte array.
      * @param off offset.
      * @param len length of segment to write.
-     * @throws IOException 
      */
-    @Override
-    public synchronized void write(byte[] b, int off, int len) throws IOException
+    public void filter(byte[] b, int off, int len)
     {
         if (pos == 0 && len > 10)
         {
@@ -86,11 +72,11 @@ class EndstreamOutputStream extends Buff
                     // reset hasCR done too to avoid CR getting written in the flush
                     return;
                 }
-                super.write('\r');               
+                length++;
             }
             if (hasLF)
             {
-                super.write('\n');
+                length++;
                 hasLF = false;
             }
             // don't write CR, LF, or CR LF if at the end of the buffer
@@ -113,7 +99,7 @@ class EndstreamOutputStream extends Buff
                 }
             }
         }
-        super.write(b, off, len);
+        length += len;
         pos += len;
     }
 
@@ -121,19 +107,18 @@ class EndstreamOutputStream extends Buff
      * write out a single CR if one was kept. Don't write kept CR LF or LF, 
      * and then call the base method to flush.
      * 
-     * @throws IOException 
      */
-    @Override
-    public synchronized void flush() throws IOException
+    public long calculateLength()
     {
         // if there is only a CR and no LF, write it
         if (hasCR && !hasLF)
         {
-            super.write('\r');
+            length++;
             ++pos;
         }
         hasCR = false;
         hasLF = false;
-        super.flush();
+        return length;
     }
+
 }

Copied: pdfbox/trunk/pdfbox/src/test/java/org/apache/pdfbox/pdfparser/EndstreamFilterStreamTest.java (from r1877860, pdfbox/trunk/pdfbox/src/test/java/org/apache/pdfbox/pdfparser/EndstreamOutputStreamTest.java)
URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/test/java/org/apache/pdfbox/pdfparser/EndstreamFilterStreamTest.java?p2=pdfbox/trunk/pdfbox/src/test/java/org/apache/pdfbox/pdfparser/EndstreamFilterStreamTest.java&p1=pdfbox/trunk/pdfbox/src/test/java/org/apache/pdfbox/pdfparser/EndstreamOutputStreamTest.java&r1=1877860&r2=1877861&rev=1877861&view=diff
==============================================================================
--- pdfbox/trunk/pdfbox/src/test/java/org/apache/pdfbox/pdfparser/EndstreamOutputStreamTest.java (original)
+++ pdfbox/trunk/pdfbox/src/test/java/org/apache/pdfbox/pdfparser/EndstreamFilterStreamTest.java Sun May 17 15:43:42 2020
@@ -16,7 +16,6 @@
 
 package org.apache.pdfbox.pdfparser;
 
-import java.io.ByteArrayOutputStream;
 import java.io.File;
 import java.io.FileOutputStream;
 import java.io.IOException;
@@ -39,74 +38,64 @@ import org.junit.Test;
  *
  * @author Tilman Hausherr
  */
-public class EndstreamOutputStreamTest
+public class EndstreamFilterStreamTest
 {
     @Test
-    public void testEndstreamOutputStream() throws IOException
+    public void testEndstreamFilterStream() throws IOException
     {
-        ByteArrayOutputStream baos = new ByteArrayOutputStream();
-        EndstreamOutputStream feos = new EndstreamOutputStream(baos);
-        byte[] tab1 = {1, 2, 3, 4};
-        byte[] tab2 = {5, 6, 7, '\r', '\n'};
-        byte[] tab3 = {8, 9, '\r', '\n'};
-        feos.write(tab1, 0, tab1.length);
-        feos.write(tab2, 0, tab2.length);
-        feos.write(tab3, 0, tab3.length);
-        feos.flush();
-        byte[] expectedResult1 = { 1, 2, 3, 4, 5, 6, 7, '\r', '\n', 8, 9};
-        Assert.assertArrayEquals(expectedResult1, baos.toByteArray());
-
-        baos = new ByteArrayOutputStream();
-        feos = new EndstreamOutputStream(baos);
-        byte[] tab4 = {1, 2, 3, 4};
-        byte[] tab5 = {5, 6, 7, '\r' };
-        byte[] tab6 = {8, 9, '\n'};
-        feos.write(tab4, 0, tab4.length);
-        feos.write(tab5, 0, tab5.length);
-        feos.write(tab6, 0, tab6.length);
-        feos.flush();
-        byte[] expectedResult2 = { 1, 2, 3, 4, 5, 6, 7, '\r', 8, 9};
-        Assert.assertArrayEquals(expectedResult2, baos.toByteArray());
-        
-        baos = new ByteArrayOutputStream();
-        feos = new EndstreamOutputStream(baos);
-        byte[] tab7 = {1, 2, 3, 4, '\r'};
-        byte[] tab8 = {'\n', 5, 6, 7, '\n' };
-        byte[] tab9 = {8, 9, '\r'}; // final CR is not to be discarded
-        feos.write(tab7, 0, tab7.length);
-        feos.write(tab8, 0, tab8.length);
-        feos.write(tab9, 0, tab9.length);
-        feos.flush();
-        byte[] expectedResult3 = { 1, 2, 3, 4, '\r', '\n', 5, 6, 7, '\n', 8, 9, '\r'};
-        Assert.assertArrayEquals(expectedResult3, baos.toByteArray());
-        
-        baos = new ByteArrayOutputStream();
-        feos = new EndstreamOutputStream(baos);
-        byte[] tab10 = {1, 2, 3, 4, '\r'};
-        byte[] tab11 = {'\n', 5, 6, 7, '\r' };
-        byte[] tab12 = {8, 9, '\r'};
-        byte[] tab13 = {'\n'}; // final CR LF across buffers
-        feos.write(tab10, 0, tab10.length);
-        feos.write(tab11, 0, tab11.length);
-        feos.write(tab12, 0, tab12.length);
-        feos.write(tab13, 0, tab13.length);
-        feos.flush();
-        byte[] expectedResult4 = { 1, 2, 3, 4, '\r', '\n', 5, 6, 7, '\r', 8, 9};
-        Assert.assertArrayEquals(expectedResult4, baos.toByteArray());
-
-        baos = new ByteArrayOutputStream();
-        feos = new EndstreamOutputStream(baos);
-        byte[] tab14 = {1, 2, 3, 4, '\r'};
-        byte[] tab15 = {'\n', 5, 6, 7, '\r' };
-        byte[] tab16 = {8, 9, '\n'};
-        byte[] tab17 = {'\r'}; // final CR is not to be discarded
-        feos.write(tab14, 0, tab14.length);
-        feos.write(tab15, 0, tab15.length);
-        feos.write(tab16, 0, tab16.length);
-        feos.write(tab17, 0, tab17.length);
-        feos.flush();
-        byte[] expectedResult5 = { 1, 2, 3, 4, '\r', '\n', 5, 6, 7, '\r', 8, 9, '\n', '\r'};
-        Assert.assertArrayEquals(expectedResult5, baos.toByteArray());
+        EndstreamFilterStream feos = new EndstreamFilterStream();
+        byte[] tab1 = { 1, 2, 3, 4 };
+        byte[] tab2 = { 5, 6, 7, '\r', '\n' };
+        byte[] tab3 = { 8, 9, '\r', '\n' };
+        feos.filter(tab1, 0, tab1.length);
+        feos.filter(tab2, 0, tab2.length);
+        feos.filter(tab3, 0, tab3.length);
+        byte[] expectedResult1 = { 1, 2, 3, 4, 5, 6, 7, '\r', '\n', 8, 9 };
+        Assert.assertEquals(expectedResult1.length, feos.calculateLength());
+
+        feos = new EndstreamFilterStream();
+        byte[] tab4 = { 1, 2, 3, 4 };
+        byte[] tab5 = { 5, 6, 7, '\r' };
+        byte[] tab6 = { 8, 9, '\n' };
+        feos.filter(tab4, 0, tab4.length);
+        feos.filter(tab5, 0, tab5.length);
+        feos.filter(tab6, 0, tab6.length);
+        byte[] expectedResult2 = { 1, 2, 3, 4, 5, 6, 7, '\r', 8, 9 };
+        Assert.assertEquals(expectedResult2.length, feos.calculateLength());
+
+        feos = new EndstreamFilterStream();
+        byte[] tab7 = { 1, 2, 3, 4, '\r' };
+        byte[] tab8 = { '\n', 5, 6, 7, '\n' };
+        byte[] tab9 = { 8, 9, '\r' }; // final CR is not to be discarded
+        feos.filter(tab7, 0, tab7.length);
+        feos.filter(tab8, 0, tab8.length);
+        feos.filter(tab9, 0, tab9.length);
+        byte[] expectedResult3 = { 1, 2, 3, 4, '\r', '\n', 5, 6, 7, '\n', 8, 9, '\r' };
+        Assert.assertEquals(expectedResult3.length, feos.calculateLength());
+
+        feos = new EndstreamFilterStream();
+        byte[] tab10 = { 1, 2, 3, 4, '\r' };
+        byte[] tab11 = { '\n', 5, 6, 7, '\r' };
+        byte[] tab12 = { 8, 9, '\r' };
+        byte[] tab13 = { '\n' }; // final CR LF across buffers
+        feos.filter(tab10, 0, tab10.length);
+        feos.filter(tab11, 0, tab11.length);
+        feos.filter(tab12, 0, tab12.length);
+        feos.filter(tab13, 0, tab13.length);
+        byte[] expectedResult4 = { 1, 2, 3, 4, '\r', '\n', 5, 6, 7, '\r', 8, 9 };
+        Assert.assertEquals(expectedResult4.length, feos.calculateLength());
+
+        feos = new EndstreamFilterStream();
+        byte[] tab14 = { 1, 2, 3, 4, '\r' };
+        byte[] tab15 = { '\n', 5, 6, 7, '\r' };
+        byte[] tab16 = { 8, 9, '\n' };
+        byte[] tab17 = { '\r' }; // final CR is not to be discarded
+        feos.filter(tab14, 0, tab14.length);
+        feos.filter(tab15, 0, tab15.length);
+        feos.filter(tab16, 0, tab16.length);
+        feos.filter(tab17, 0, tab17.length);
+        byte[] expectedResult5 = { 1, 2, 3, 4, '\r', '\n', 5, 6, 7, '\r', 8, 9, '\n', '\r' };
+        Assert.assertEquals(expectedResult5.length, feos.calculateLength());
     }
 
     @Test