You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pdfbox.apache.org by le...@apache.org on 2020/05/17 10:55:16 UTC
svn commit: r1877851 - in
/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox: cos/COSDocument.java
cos/COSStream.java cos/ICOSParser.java io/RandomAccessReadView.java
pdfparser/COSParser.java
Author: lehmi
Date: Sun May 17 10:55:16 2020
New Revision: 1877851
URL: http://svn.apache.org/viewvc?rev=1877851&view=rev
Log:
PDFBOX-4836: introduce RandomAccessReadView to be used to read a COSStream
Added:
pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/io/RandomAccessReadView.java (with props)
Modified:
pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/cos/COSDocument.java
pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/cos/COSStream.java
pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/cos/ICOSParser.java
pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/COSParser.java
Modified: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/cos/COSDocument.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/cos/COSDocument.java?rev=1877851&r1=1877850&r2=1877851&view=diff
==============================================================================
--- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/cos/COSDocument.java (original)
+++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/cos/COSDocument.java Sun May 17 10:55:16 2020
@@ -166,6 +166,24 @@ public class COSDocument extends COSBase
}
/**
+ * Creates a new COSStream using the current configuration for scratch files. Not for public use. Only COSParser
+ * should call this method.
+ *
+ * @param dictionary the corresponding dictionary
+ * @param startPosition the start position within the source
+ * @param streamLength the stream length
+ * @return the new COSStream
+ */
+ public COSStream createCOSStream(COSDictionary dictionary, long startPosition,
+ long streamLength)
+ {
+ COSStream stream = new COSStream(scratchFile,
+ parser.createRandomAccessReadView(startPosition, streamLength));
+ dictionary.forEach(stream::setItem);
+ return stream;
+ }
+
+ /**
* Get the dictionary containing the linearization information if the pdf is linearized.
*
* @return the dictionary containing the linearization information
Modified: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/cos/COSStream.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/cos/COSStream.java?rev=1877851&r1=1877850&r2=1877851&view=diff
==============================================================================
--- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/cos/COSStream.java (original)
+++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/cos/COSStream.java Sun May 17 10:55:16 2020
@@ -37,6 +37,7 @@ import org.apache.pdfbox.io.IOUtils;
import org.apache.pdfbox.io.RandomAccess;
import org.apache.pdfbox.io.RandomAccessInputStream;
import org.apache.pdfbox.io.RandomAccessOutputStream;
+import org.apache.pdfbox.io.RandomAccessReadView;
import org.apache.pdfbox.io.ScratchFile;
/**
@@ -46,9 +47,14 @@ import org.apache.pdfbox.io.ScratchFile;
*/
public class COSStream extends COSDictionary implements Closeable
{
- private RandomAccess randomAccess; // backing store, in-memory or on-disk
- private final ScratchFile scratchFile; // used as a temp buffer during decoding
- private boolean isWriting; // true if there's an open OutputStream
+ // backing store, in-memory or on-disk
+ private RandomAccess randomAccess;
+ // used as a temp buffer during decoding
+ private final ScratchFile scratchFile;
+ // true if there's an open OutputStream
+ private boolean isWriting;
+ // random access view to be read from
+ private RandomAccessReadView randomAccessReadView;
private static final Log LOG = LogFactory.getLog(COSStream.class);
@@ -77,6 +83,27 @@ public class COSStream extends COSDictio
}
/**
+ * Creates a new stream with an empty dictionary. Data is read from the given random accessview. Written data is
+ * stored in the given scratch file.
+ *
+ * @param scratchFile Scratch file for writing stream data.
+ */
+ public COSStream(ScratchFile scratchFile, RandomAccessReadView randomAccessReadView)
+ {
+ this(scratchFile);
+ this.randomAccessReadView = randomAccessReadView;
+ try
+ {
+ setInt(COSName.LENGTH, (int) randomAccessReadView.length());
+ }
+ catch (IOException e)
+ {
+ // TODO Auto-generated catch block
+ e.printStackTrace();
+ }
+ }
+
+ /**
* {@inheritDoc}
*/
@Override
@@ -184,8 +211,16 @@ public class COSStream extends COSDictio
{
throw new IllegalStateException("Cannot read while there is an open stream writer");
}
- ensureRandomAccessExists(true);
- return new RandomAccessInputStream(randomAccess);
+ if (randomAccess == null && randomAccessReadView != null)
+ {
+ randomAccessReadView.seek(0);
+ return new RandomAccessInputStream(randomAccessReadView);
+ }
+ else
+ {
+ ensureRandomAccessExists(true);
+ return new RandomAccessInputStream(randomAccess);
+ }
}
/**
@@ -201,13 +236,7 @@ public class COSStream extends COSDictio
public COSInputStream createInputStream(DecodeOptions options) throws IOException
{
- checkClosed();
- if (isWriting)
- {
- throw new IllegalStateException("Cannot read while there is an open stream writer");
- }
- ensureRandomAccessExists(true);
- InputStream input = new RandomAccessInputStream(randomAccess);
+ InputStream input = createRawInputStream();
return COSInputStream.create(getFilterList(), this, input, scratchFile, options);
}
@@ -241,8 +270,10 @@ public class COSStream extends COSDictio
{
setItem(COSName.FILTER, filters);
}
- IOUtils.closeQuietly(randomAccess);
- randomAccess = scratchFile.createBuffer();
+ if (randomAccess != null)
+ randomAccess.clear();
+ else
+ randomAccess = scratchFile.createBuffer();
OutputStream randomOut = new RandomAccessOutputStream(randomAccess);
OutputStream cosOut = new COSOutputStream(getFilterList(), this, randomOut, scratchFile);
isWriting = true;
@@ -277,8 +308,10 @@ public class COSStream extends COSDictio
{
throw new IllegalStateException("Cannot have more than one open stream writer.");
}
- IOUtils.closeQuietly(randomAccess);
- randomAccess = scratchFile.createBuffer();
+ if (randomAccess != null)
+ randomAccess.clear();
+ else
+ randomAccess = scratchFile.createBuffer();
OutputStream out = new RandomAccessOutputStream(randomAccess);
isWriting = true;
return new FilterOutputStream(out)
Modified: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/cos/ICOSParser.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/cos/ICOSParser.java?rev=1877851&r1=1877850&r2=1877851&view=diff
==============================================================================
--- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/cos/ICOSParser.java (original)
+++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/cos/ICOSParser.java Sun May 17 10:55:16 2020
@@ -18,6 +18,8 @@ package org.apache.pdfbox.cos;
import java.io.IOException;
+import org.apache.pdfbox.io.RandomAccessReadView;
+
public interface ICOSParser
{
@@ -25,11 +27,18 @@ public interface ICOSParser
* Dereference the COSBase object which is referenced by the given COSObject.
*
* @param obj the COSObject which references the COSBase object to be dereferenced.
- *
* @return the referenced object
- *
* @throws IOException if something went wrong when dereferencing the COSBase object
*/
public COSBase dereferenceCOSObject(COSObject obj) throws IOException;
+ /**
+ * Create a random access read view starting at the given position with the given length.
+ *
+ * @param startPosition start position within the underlying random access read
+ * @param streamLength stream length
+ * @return the random access read view
+ */
+ public RandomAccessReadView createRandomAccessReadView(long startPosition, long streamLength);
+
}
Added: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/io/RandomAccessReadView.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/io/RandomAccessReadView.java?rev=1877851&view=auto
==============================================================================
--- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/io/RandomAccessReadView.java (added)
+++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/io/RandomAccessReadView.java Sun May 17 10:55:16 2020
@@ -0,0 +1,220 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.pdfbox.io;
+
+import java.io.IOException;
+
+/**
+ * This class provides a view of a part of a random access read. It clips the section starting at the given start
+ * position with the given length into a new random access read.
+ *
+ */
+public class RandomAccessReadView implements RandomAccessRead
+{
+ // the underlying random access read
+ private RandomAccessRead randomAccessRead;
+ // the start position within the underlying source
+ private final long startPosition;
+ // stream length
+ private final long streamLength;
+ // current position within the view
+ private long currentPosition = 0;
+
+ /**
+ * Constructor.
+ *
+ * @param randomAccessRead the underlying random access read
+ * @param startPosition start position within the underlying random access read
+ * @param streamLength stream length
+ */
+ public RandomAccessReadView(RandomAccessRead randomAccessRead, long startPosition,
+ long streamLength)
+ {
+ this.randomAccessRead = randomAccessRead;
+ this.startPosition = startPosition;
+ this.streamLength = streamLength;
+ }
+
+ /**
+ * {@inheritDoc}
+ */
+ @Override
+ public long getPosition() throws IOException
+ {
+ checkClosed();
+ return currentPosition;
+ }
+
+ /**
+ * {@inheritDoc}
+ */
+ @Override
+ public void seek(final long newOffset) throws IOException
+ {
+ checkClosed();
+ if (newOffset < streamLength)
+ {
+ randomAccessRead.seek(startPosition + newOffset);
+ currentPosition = newOffset;
+ }
+ }
+
+ /**
+ * {@inheritDoc}
+ */
+ @Override
+ public int read() throws IOException
+ {
+ checkClosed();
+ if (currentPosition >= streamLength)
+ {
+ return -1;
+ }
+ restorePosition();
+ int readValue = randomAccessRead.read();
+ if (readValue > -1)
+ {
+ currentPosition++;
+ }
+ return readValue;
+ }
+
+ /**
+ * {@inheritDoc}
+ */
+ @Override
+ public int read(byte[] b) throws IOException
+ {
+ checkClosed();
+ if (currentPosition >= streamLength)
+ {
+ return 0;
+ }
+ restorePosition();
+ return read(b, 0, b.length);
+ }
+
+ /**
+ * {@inheritDoc}
+ */
+ @Override
+ public int read(byte[] b, int off, int len) throws IOException
+ {
+ checkClosed();
+ restorePosition();
+ int readBytes = randomAccessRead.read(b, off, Math.min(len, available()));
+ currentPosition += readBytes;
+ return readBytes;
+ }
+
+ /**
+ * {@inheritDoc}
+ */
+ @Override
+ public int available() throws IOException
+ {
+ checkClosed();
+ return (int) (streamLength - currentPosition);
+ }
+
+ /**
+ * {@inheritDoc}
+ */
+ @Override
+ public long length() throws IOException
+ {
+ checkClosed();
+ return streamLength;
+ }
+
+ /**
+ * {@inheritDoc}
+ */
+ @Override
+ public void close() throws IOException
+ {
+ checkClosed();
+ randomAccessRead = null;
+ }
+
+ /**
+ * {@inheritDoc}
+ */
+ @Override
+ public boolean isClosed()
+ {
+ return randomAccessRead == null || randomAccessRead.isClosed();
+ }
+
+ /**
+ * {@inheritDoc}
+ */
+ @Override
+ public int peek() throws IOException
+ {
+ checkClosed();
+ restorePosition();
+ return randomAccessRead.peek();
+ }
+
+ /**
+ * {@inheritDoc}
+ */
+ @Override
+ public void rewind(int bytes) throws IOException
+ {
+ checkClosed();
+ restorePosition();
+ randomAccessRead.rewind(bytes);
+ currentPosition -= bytes;
+ }
+
+ /**
+ * {@inheritDoc}
+ */
+ @Override
+ public boolean isEOF() throws IOException
+ {
+ checkClosed();
+ return currentPosition >= streamLength;
+ }
+
+ /**
+ * Restore the current position within the underlying random access read.
+ *
+ * @throws IOException
+ */
+ private void restorePosition() throws IOException
+ {
+ randomAccessRead.seek(startPosition + currentPosition);
+ }
+
+ /**
+ * Ensure that that the view isn't closed.
+ *
+ * @throws IOException
+ */
+ private void checkClosed() throws IOException
+ {
+ if (isClosed())
+ {
+ // consider that the rab is closed if there is no current buffer
+ throw new IOException("RandomAccessReadView already closed");
+ }
+ }
+
+}
Propchange: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/io/RandomAccessReadView.java
------------------------------------------------------------------------------
svn:eol-style = native
Modified: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/COSParser.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/COSParser.java?rev=1877851&r1=1877850&r2=1877851&view=diff
==============================================================================
--- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/COSParser.java (original)
+++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/COSParser.java Sun May 17 10:55:16 2020
@@ -45,6 +45,7 @@ import org.apache.pdfbox.cos.COSStream;
import org.apache.pdfbox.cos.ICOSParser;
import org.apache.pdfbox.io.IOUtils;
import org.apache.pdfbox.io.RandomAccessRead;
+import org.apache.pdfbox.io.RandomAccessReadView;
import org.apache.pdfbox.pdfparser.XrefTrailerResolver.XRefType;
import org.apache.pdfbox.pdmodel.encryption.AccessPermission;
import org.apache.pdfbox.pdmodel.encryption.DecryptionMaterial;
@@ -573,6 +574,12 @@ public class COSParser extends BaseParse
return parsedObj;
}
+ @Override
+ public RandomAccessReadView createRandomAccessReadView(long startPosition, long streamLength)
+ {
+ return new RandomAccessReadView(source, startPosition, streamLength);
+ }
+
/**
* Parse the object for the given object number.
*
@@ -823,8 +830,6 @@ public class COSParser extends BaseParse
*/
protected COSStream parseCOSStream(COSDictionary dic) throws IOException
{
- COSStream stream = document.createCOSStream(dic);
-
// read 'stream'; this was already tested in parseObjectsDynamically()
readString();
@@ -847,14 +852,21 @@ public class COSParser extends BaseParse
}
}
- // get output stream to copy data to
- try (OutputStream out = stream.createRawOutputStream())
+ COSStream stream;
+ long streamPosition = source.getPosition();
+ if (streamLengthObj != null && validateStreamLength(streamLengthObj.longValue()))
+ {
+ stream = document.createCOSStream(dic,
+ streamPosition,
+ streamLengthObj.longValue());
+ // skip stream
+ source.seek(source.getPosition() + streamLengthObj.intValue());
+ }
+ else
{
- if (streamLengthObj != null && validateStreamLength(streamLengthObj.longValue()))
- {
- readValidStream(out, streamLengthObj);
- }
- else
+ stream = document.createCOSStream(dic);
+ // get output stream to copy data to
+ try (OutputStream out = stream.createRawOutputStream())
{
readUntilEndStream(new EndstreamOutputStream(out));
}
@@ -992,24 +1004,6 @@ public class COSParser extends BaseParse
out.flush();
}
- private void readValidStream(OutputStream out, COSNumber streamLengthObj) throws IOException
- {
- long remainBytes = streamLengthObj.longValue();
- while (remainBytes > 0)
- {
- final int chunk = (remainBytes > STREAMCOPYBUFLEN) ? STREAMCOPYBUFLEN : (int) remainBytes;
- final int readBytes = source.read(streamCopyBuf, 0, chunk);
- if (readBytes <= 0)
- {
- // shouldn't happen, the stream length has already been validated
- throw new IOException("read error at offset " + source.getPosition()
- + ": expected " + chunk + " bytes, but read() returns " + readBytes);
- }
- out.write(streamCopyBuf, 0, readBytes);
- remainBytes -= readBytes;
- }
- }
-
private boolean validateStreamLength(long streamLength) throws IOException
{
boolean streamLengthIsValid = true;