You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pdfbox.apache.org by le...@apache.org on 2020/05/17 15:43:43 UTC
svn commit: r1877861 - in /pdfbox/trunk/pdfbox/src:
main/java/org/apache/pdfbox/cos/ main/java/org/apache/pdfbox/pdfparser/
test/java/org/apache/pdfbox/pdfparser/
Author: lehmi
Date: Sun May 17 15:43:42 2020
New Revision: 1877861
URL: http://svn.apache.org/viewvc?rev=1877861&view=rev
Log:
PDFBOX-4836: use RandomAccessReadView for malformed COSStreams as well
Added:
pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/EndstreamFilterStream.java
- copied, changed from r1877860, pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/EndstreamOutputStream.java
pdfbox/trunk/pdfbox/src/test/java/org/apache/pdfbox/pdfparser/EndstreamFilterStreamTest.java
- copied, changed from r1877860, pdfbox/trunk/pdfbox/src/test/java/org/apache/pdfbox/pdfparser/EndstreamOutputStreamTest.java
Removed:
pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/EndstreamOutputStream.java
pdfbox/trunk/pdfbox/src/test/java/org/apache/pdfbox/pdfparser/EndstreamOutputStreamTest.java
Modified:
pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/cos/COSDocument.java
pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/COSParser.java
Modified: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/cos/COSDocument.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/cos/COSDocument.java?rev=1877861&r1=1877860&r2=1877861&view=diff
==============================================================================
--- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/cos/COSDocument.java (original)
+++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/cos/COSDocument.java Sun May 17 15:43:42 2020
@@ -152,20 +152,6 @@ public class COSDocument extends COSBase
}
/**
- * Creates a new COSStream using the current configuration for scratch files.
- * Not for public use. Only COSParser should call this method.
- *
- * @param dictionary the corresponding dictionary
- * @return the new COSStream
- */
- public COSStream createCOSStream(COSDictionary dictionary)
- {
- COSStream stream = new COSStream(scratchFile);
- dictionary.forEach(stream::setItem);
- return stream;
- }
-
- /**
* Creates a new COSStream using the current configuration for scratch files. Not for public use. Only COSParser should
* call this method.
*
Modified: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/COSParser.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/COSParser.java?rev=1877861&r1=1877860&r2=1877861&view=diff
==============================================================================
--- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/COSParser.java (original)
+++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/COSParser.java Sun May 17 15:43:42 2020
@@ -18,7 +18,6 @@ package org.apache.pdfbox.pdfparser;
import java.io.IOException;
import java.io.InputStream;
-import java.io.OutputStream;
import java.nio.charset.StandardCharsets;
import java.security.GeneralSecurityException;
import java.security.KeyStore;
@@ -854,25 +853,21 @@ public class COSParser extends BaseParse
}
}
- COSStream stream;
- long streamPosition = source.getPosition();
+
+ long streamStartPosition = source.getPosition();
+ long streamLength;
if (streamLengthObj != null && validateStreamLength(streamLengthObj.longValue()))
{
- stream = document.createCOSStream(dic,
- streamPosition,
- streamLengthObj.longValue());
+ streamLength = streamLengthObj.longValue();
// skip stream
source.seek(source.getPosition() + streamLengthObj.intValue());
}
else
{
- stream = document.createCOSStream(dic);
- // get output stream to copy data to
- try (OutputStream out = stream.createRawOutputStream())
- {
- readUntilEndStream(new EndstreamOutputStream(out));
- }
+ streamLength = readUntilEndStream(new EndstreamFilterStream());
}
+ COSStream stream = document.createCOSStream(dic, streamStartPosition, streamLength);
+
String endStream = readString();
if (endStream.equals("endobj") && isLenient)
{
@@ -912,7 +907,7 @@ public class COSParser extends BaseParse
*
* @throws IOException if something went wrong
*/
- private void readUntilEndStream( final OutputStream out ) throws IOException
+ private long readUntilEndStream(final EndstreamFilterStream out) throws IOException
{
int bufSize;
int charMatchCount = 0;
@@ -988,7 +983,7 @@ public class COSParser extends BaseParse
// write buffer content until first matched char to output stream
if ( contentBytes > 0 )
{
- out.write( strmBuf, 0, contentBytes );
+ out.filter(strmBuf, 0, contentBytes);
}
if ( charMatchCount == keyw.length )
{
@@ -1003,7 +998,7 @@ public class COSParser extends BaseParse
}
}
// this writes a lonely CR or drops trailing CR LF and LF
- out.flush();
+ return out.calculateLength();
}
private boolean validateStreamLength(long streamLength) throws IOException
Copied: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/EndstreamFilterStream.java (from r1877860, pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/EndstreamOutputStream.java)
URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/EndstreamFilterStream.java?p2=pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/EndstreamFilterStream.java&p1=pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/EndstreamOutputStream.java&r1=1877860&r2=1877861&rev=1877861&view=diff
==============================================================================
--- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/EndstreamOutputStream.java (original)
+++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/EndstreamFilterStream.java Sun May 17 15:43:42 2020
@@ -16,33 +16,21 @@
package org.apache.pdfbox.pdfparser;
-import java.io.BufferedOutputStream;
-import java.io.IOException;
-import java.io.OutputStream;
/**
- * This class is only for the readUntilEndStream method, to prevent a
- * final CR LF or LF (but not a final CR!) from being written to the output,
- * unless the beginning of the stream is assumed to be ASCII.
- * Only the 3-param write() method is implemented. This solves
- * PDFBOX-2079 and PDFBOX-2120 and avoids making readUntilEndStream()
- * even more complex than it already is.
+ * This class is only for the readUntilEndStream method, to prevent a final CR LF or LF (but not a final CR!) from being
+ * written to the output, unless the beginning of the stream is assumed to be ASCII. This solves PDFBOX-2079 and
+ * PDFBOX-2120 and avoids making readUntilEndStream() even more complex than it already is.
*
* @author Tilman Hausherr
*/
-class EndstreamOutputStream extends BufferedOutputStream
+class EndstreamFilterStream
{
- //TODO: replace this class with a PullBackOutputStream class if there ever is one
-
private boolean hasCR = false;
private boolean hasLF = false;
private int pos = 0;
private boolean mustFilter = true;
-
- EndstreamOutputStream(OutputStream out)
- {
- super(out);
- }
+ private long length = 0;
/**
* Write CR and/or LF that were kept, then writes len bytes from the
@@ -52,10 +40,8 @@ class EndstreamOutputStream extends Buff
* @param b byte array.
* @param off offset.
* @param len length of segment to write.
- * @throws IOException
*/
- @Override
- public synchronized void write(byte[] b, int off, int len) throws IOException
+ public void filter(byte[] b, int off, int len)
{
if (pos == 0 && len > 10)
{
@@ -86,11 +72,11 @@ class EndstreamOutputStream extends Buff
// reset hasCR done too to avoid CR getting written in the flush
return;
}
- super.write('\r');
+ length++;
}
if (hasLF)
{
- super.write('\n');
+ length++;
hasLF = false;
}
// don't write CR, LF, or CR LF if at the end of the buffer
@@ -113,7 +99,7 @@ class EndstreamOutputStream extends Buff
}
}
}
- super.write(b, off, len);
+ length += len;
pos += len;
}
@@ -121,19 +107,18 @@ class EndstreamOutputStream extends Buff
* write out a single CR if one was kept. Don't write kept CR LF or LF,
* and then call the base method to flush.
*
- * @throws IOException
*/
- @Override
- public synchronized void flush() throws IOException
+ public long calculateLength()
{
// if there is only a CR and no LF, write it
if (hasCR && !hasLF)
{
- super.write('\r');
+ length++;
++pos;
}
hasCR = false;
hasLF = false;
- super.flush();
+ return length;
}
+
}
Copied: pdfbox/trunk/pdfbox/src/test/java/org/apache/pdfbox/pdfparser/EndstreamFilterStreamTest.java (from r1877860, pdfbox/trunk/pdfbox/src/test/java/org/apache/pdfbox/pdfparser/EndstreamOutputStreamTest.java)
URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/test/java/org/apache/pdfbox/pdfparser/EndstreamFilterStreamTest.java?p2=pdfbox/trunk/pdfbox/src/test/java/org/apache/pdfbox/pdfparser/EndstreamFilterStreamTest.java&p1=pdfbox/trunk/pdfbox/src/test/java/org/apache/pdfbox/pdfparser/EndstreamOutputStreamTest.java&r1=1877860&r2=1877861&rev=1877861&view=diff
==============================================================================
--- pdfbox/trunk/pdfbox/src/test/java/org/apache/pdfbox/pdfparser/EndstreamOutputStreamTest.java (original)
+++ pdfbox/trunk/pdfbox/src/test/java/org/apache/pdfbox/pdfparser/EndstreamFilterStreamTest.java Sun May 17 15:43:42 2020
@@ -16,7 +16,6 @@
package org.apache.pdfbox.pdfparser;
-import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
@@ -39,74 +38,64 @@ import org.junit.Test;
*
* @author Tilman Hausherr
*/
-public class EndstreamOutputStreamTest
+public class EndstreamFilterStreamTest
{
@Test
- public void testEndstreamOutputStream() throws IOException
+ public void testEndstreamFilterStream() throws IOException
{
- ByteArrayOutputStream baos = new ByteArrayOutputStream();
- EndstreamOutputStream feos = new EndstreamOutputStream(baos);
- byte[] tab1 = {1, 2, 3, 4};
- byte[] tab2 = {5, 6, 7, '\r', '\n'};
- byte[] tab3 = {8, 9, '\r', '\n'};
- feos.write(tab1, 0, tab1.length);
- feos.write(tab2, 0, tab2.length);
- feos.write(tab3, 0, tab3.length);
- feos.flush();
- byte[] expectedResult1 = { 1, 2, 3, 4, 5, 6, 7, '\r', '\n', 8, 9};
- Assert.assertArrayEquals(expectedResult1, baos.toByteArray());
-
- baos = new ByteArrayOutputStream();
- feos = new EndstreamOutputStream(baos);
- byte[] tab4 = {1, 2, 3, 4};
- byte[] tab5 = {5, 6, 7, '\r' };
- byte[] tab6 = {8, 9, '\n'};
- feos.write(tab4, 0, tab4.length);
- feos.write(tab5, 0, tab5.length);
- feos.write(tab6, 0, tab6.length);
- feos.flush();
- byte[] expectedResult2 = { 1, 2, 3, 4, 5, 6, 7, '\r', 8, 9};
- Assert.assertArrayEquals(expectedResult2, baos.toByteArray());
-
- baos = new ByteArrayOutputStream();
- feos = new EndstreamOutputStream(baos);
- byte[] tab7 = {1, 2, 3, 4, '\r'};
- byte[] tab8 = {'\n', 5, 6, 7, '\n' };
- byte[] tab9 = {8, 9, '\r'}; // final CR is not to be discarded
- feos.write(tab7, 0, tab7.length);
- feos.write(tab8, 0, tab8.length);
- feos.write(tab9, 0, tab9.length);
- feos.flush();
- byte[] expectedResult3 = { 1, 2, 3, 4, '\r', '\n', 5, 6, 7, '\n', 8, 9, '\r'};
- Assert.assertArrayEquals(expectedResult3, baos.toByteArray());
-
- baos = new ByteArrayOutputStream();
- feos = new EndstreamOutputStream(baos);
- byte[] tab10 = {1, 2, 3, 4, '\r'};
- byte[] tab11 = {'\n', 5, 6, 7, '\r' };
- byte[] tab12 = {8, 9, '\r'};
- byte[] tab13 = {'\n'}; // final CR LF across buffers
- feos.write(tab10, 0, tab10.length);
- feos.write(tab11, 0, tab11.length);
- feos.write(tab12, 0, tab12.length);
- feos.write(tab13, 0, tab13.length);
- feos.flush();
- byte[] expectedResult4 = { 1, 2, 3, 4, '\r', '\n', 5, 6, 7, '\r', 8, 9};
- Assert.assertArrayEquals(expectedResult4, baos.toByteArray());
-
- baos = new ByteArrayOutputStream();
- feos = new EndstreamOutputStream(baos);
- byte[] tab14 = {1, 2, 3, 4, '\r'};
- byte[] tab15 = {'\n', 5, 6, 7, '\r' };
- byte[] tab16 = {8, 9, '\n'};
- byte[] tab17 = {'\r'}; // final CR is not to be discarded
- feos.write(tab14, 0, tab14.length);
- feos.write(tab15, 0, tab15.length);
- feos.write(tab16, 0, tab16.length);
- feos.write(tab17, 0, tab17.length);
- feos.flush();
- byte[] expectedResult5 = { 1, 2, 3, 4, '\r', '\n', 5, 6, 7, '\r', 8, 9, '\n', '\r'};
- Assert.assertArrayEquals(expectedResult5, baos.toByteArray());
+ EndstreamFilterStream feos = new EndstreamFilterStream();
+ byte[] tab1 = { 1, 2, 3, 4 };
+ byte[] tab2 = { 5, 6, 7, '\r', '\n' };
+ byte[] tab3 = { 8, 9, '\r', '\n' };
+ feos.filter(tab1, 0, tab1.length);
+ feos.filter(tab2, 0, tab2.length);
+ feos.filter(tab3, 0, tab3.length);
+ byte[] expectedResult1 = { 1, 2, 3, 4, 5, 6, 7, '\r', '\n', 8, 9 };
+ Assert.assertEquals(expectedResult1.length, feos.calculateLength());
+
+ feos = new EndstreamFilterStream();
+ byte[] tab4 = { 1, 2, 3, 4 };
+ byte[] tab5 = { 5, 6, 7, '\r' };
+ byte[] tab6 = { 8, 9, '\n' };
+ feos.filter(tab4, 0, tab4.length);
+ feos.filter(tab5, 0, tab5.length);
+ feos.filter(tab6, 0, tab6.length);
+ byte[] expectedResult2 = { 1, 2, 3, 4, 5, 6, 7, '\r', 8, 9 };
+ Assert.assertEquals(expectedResult2.length, feos.calculateLength());
+
+ feos = new EndstreamFilterStream();
+ byte[] tab7 = { 1, 2, 3, 4, '\r' };
+ byte[] tab8 = { '\n', 5, 6, 7, '\n' };
+ byte[] tab9 = { 8, 9, '\r' }; // final CR is not to be discarded
+ feos.filter(tab7, 0, tab7.length);
+ feos.filter(tab8, 0, tab8.length);
+ feos.filter(tab9, 0, tab9.length);
+ byte[] expectedResult3 = { 1, 2, 3, 4, '\r', '\n', 5, 6, 7, '\n', 8, 9, '\r' };
+ Assert.assertEquals(expectedResult3.length, feos.calculateLength());
+
+ feos = new EndstreamFilterStream();
+ byte[] tab10 = { 1, 2, 3, 4, '\r' };
+ byte[] tab11 = { '\n', 5, 6, 7, '\r' };
+ byte[] tab12 = { 8, 9, '\r' };
+ byte[] tab13 = { '\n' }; // final CR LF across buffers
+ feos.filter(tab10, 0, tab10.length);
+ feos.filter(tab11, 0, tab11.length);
+ feos.filter(tab12, 0, tab12.length);
+ feos.filter(tab13, 0, tab13.length);
+ byte[] expectedResult4 = { 1, 2, 3, 4, '\r', '\n', 5, 6, 7, '\r', 8, 9 };
+ Assert.assertEquals(expectedResult4.length, feos.calculateLength());
+
+ feos = new EndstreamFilterStream();
+ byte[] tab14 = { 1, 2, 3, 4, '\r' };
+ byte[] tab15 = { '\n', 5, 6, 7, '\r' };
+ byte[] tab16 = { 8, 9, '\n' };
+ byte[] tab17 = { '\r' }; // final CR is not to be discarded
+ feos.filter(tab14, 0, tab14.length);
+ feos.filter(tab15, 0, tab15.length);
+ feos.filter(tab16, 0, tab16.length);
+ feos.filter(tab17, 0, tab17.length);
+ byte[] expectedResult5 = { 1, 2, 3, 4, '\r', '\n', 5, 6, 7, '\r', 8, 9, '\n', '\r' };
+ Assert.assertEquals(expectedResult5.length, feos.calculateLength());
}
@Test