You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pdfbox.apache.org by le...@apache.org on 2020/05/16 16:32:17 UTC
svn commit: r1877823 - in
/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox: ./ cos/ io/
pdfparser/ pdmodel/interactive/digitalsignature/
Author: lehmi
Date: Sat May 16 16:32:17 2020
New Revision: 1877823
URL: http://svn.apache.org/viewvc?rev=1877823&view=rev
Log: (empty)
Added:
pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/io/RandomAccessBufferedFile.java
- copied, changed from r1877822, pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/io/RandomAccessBufferedFileInputStream.java
pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/io/RandomAccessMemoryMappedFile.java (with props)
pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/io/RandomAccessReadView.java (with props)
Removed:
pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/io/RandomAccessBufferedFileInputStream.java
Modified:
pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/Loader.java
pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/cos/COSDocument.java
pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/cos/COSInputStream.java
pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/cos/COSStream.java
pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/cos/ICOSParser.java
pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/COSParser.java
pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/EndstreamOutputStream.java
pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/digitalsignature/SignatureOptions.java
Modified: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/Loader.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/Loader.java?rev=1877823&r1=1877822&r2=1877823&view=diff
==============================================================================
--- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/Loader.java (original)
+++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/Loader.java Sat May 16 16:32:17 2020
@@ -26,7 +26,7 @@ import java.io.InputStream;
import org.apache.pdfbox.io.IOUtils;
import org.apache.pdfbox.io.MemoryUsageSetting;
import org.apache.pdfbox.io.RandomAccessBuffer;
-import org.apache.pdfbox.io.RandomAccessBufferedFileInputStream;
+import org.apache.pdfbox.io.RandomAccessBufferedFile;
import org.apache.pdfbox.io.RandomAccessRead;
import org.apache.pdfbox.io.ScratchFile;
import org.apache.pdfbox.pdfparser.FDFParser;
@@ -297,7 +297,7 @@ public class Loader
MemoryUsageSetting memUsageSetting) throws IOException
{
@SuppressWarnings({ "squid:S2095" }) // raFile not closed here, may be needed for signing
- RandomAccessBufferedFileInputStream raFile = new RandomAccessBufferedFileInputStream(file);
+ RandomAccessBufferedFile raFile = new RandomAccessBufferedFile(file);
try
{
return Loader.loadPDF(raFile, password, keyStore, alias, memUsageSetting);
@@ -308,8 +308,21 @@ public class Loader
throw ioe;
}
}
-
- public static PDDocument loadPDF(RandomAccessBufferedFileInputStream raFile, String password,
+ /**
+ * Parses a PDF.
+ *
+ * @param rafile RandomAccessRead of the file to be loaded
+ * @param password password to be used for decryption
+ * @param keyStore key store to be used for decryption when using public key security
+ * @param alias alias to be used for decryption when using public key security
+ * @param memUsageSetting defines how memory is used for buffering PDF streams
+ *
+ * @return loaded document
+ *
+ * @throws IOException in case of a file reading or parsing error
+ */
+ public static PDDocument loadPDF(RandomAccessRead raFile,
+ String password,
InputStream keyStore, String alias, MemoryUsageSetting memUsageSetting)
throws IOException
{
Modified: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/cos/COSDocument.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/cos/COSDocument.java?rev=1877823&r1=1877822&r2=1877823&view=diff
==============================================================================
--- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/cos/COSDocument.java (original)
+++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/cos/COSDocument.java Sat May 16 16:32:17 2020
@@ -165,6 +165,14 @@ public class COSDocument extends COSBase
return stream;
}
+ public COSStream createCOSStream(COSDictionary dictionary, long startPosition,
+ long streamLength)
+ {
+ COSStream stream = new COSStream(scratchFile,
+ parser.createRandomAccessReadView(startPosition, streamLength));
+ dictionary.forEach(stream::setItem);
+ return stream;
+ }
/**
* Get the dictionary containing the linearization information if the pdf is linearized.
*
Modified: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/cos/COSInputStream.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/cos/COSInputStream.java?rev=1877823&r1=1877822&r2=1877823&view=diff
==============================================================================
--- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/cos/COSInputStream.java (original)
+++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/cos/COSInputStream.java Sat May 16 16:32:17 2020
@@ -77,7 +77,8 @@ public final class COSInputStream extend
{
// scratch file
final RandomAccess buffer = scratchFile.createBuffer();
- DecodeResult result = filters.get(i).decode(input, new RandomAccessOutputStream(buffer), parameters, i, options);
+ DecodeResult result = filters.get(i).decode(input,
+ new RandomAccessOutputStream(buffer), parameters, i, options);
results.add(result);
input = new RandomAccessInputStream(buffer)
{
Modified: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/cos/COSStream.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/cos/COSStream.java?rev=1877823&r1=1877822&r2=1877823&view=diff
==============================================================================
--- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/cos/COSStream.java (original)
+++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/cos/COSStream.java Sat May 16 16:32:17 2020
@@ -37,6 +37,7 @@ import org.apache.pdfbox.io.IOUtils;
import org.apache.pdfbox.io.RandomAccess;
import org.apache.pdfbox.io.RandomAccessInputStream;
import org.apache.pdfbox.io.RandomAccessOutputStream;
+import org.apache.pdfbox.io.RandomAccessReadView;
import org.apache.pdfbox.io.ScratchFile;
/**
@@ -46,10 +47,15 @@ import org.apache.pdfbox.io.ScratchFile;
*/
public class COSStream extends COSDictionary implements Closeable
{
- private RandomAccess randomAccess; // backing store, in-memory or on-disk
- private final ScratchFile scratchFile; // used as a temp buffer during decoding
- private boolean isWriting; // true if there's an open OutputStream
+ // backing store, in-memory or on-disk
+ private RandomAccess randomAccess;
+ // used as a temp buffer during decoding
+ private final ScratchFile scratchFile;
+ // true if there's an open OutputStream
+ private boolean isWriting;
+ private RandomAccessReadView randomAccessReadView;
+
private static final Log LOG = LogFactory.getLog(COSStream.class);
/**
@@ -77,6 +83,26 @@ public class COSStream extends COSDictio
}
/**
+ * Creates a new stream with an empty dictionary. Data is stored in the given scratch file.
+ *
+ * @param scratchFile Scratch file for writing stream data.
+ */
+ public COSStream(ScratchFile scratchFile, RandomAccessReadView randomAccessReadView)
+ {
+ this(scratchFile);
+ this.randomAccessReadView = randomAccessReadView;
+ try
+ {
+ setInt(COSName.LENGTH, (int) randomAccessReadView.length());
+ }
+ catch (IOException e)
+ {
+ // TODO Auto-generated catch block
+ e.printStackTrace();
+ }
+ }
+
+ /**
* {@inheritDoc}
*/
@Override
@@ -184,8 +210,16 @@ public class COSStream extends COSDictio
{
throw new IllegalStateException("Cannot read while there is an open stream writer");
}
- ensureRandomAccessExists(true);
- return new RandomAccessInputStream(randomAccess);
+ if (randomAccess == null && randomAccessReadView != null)
+ {
+ randomAccessReadView.seek(0);
+ return new RandomAccessInputStream(randomAccessReadView);
+ }
+ else
+ {
+ ensureRandomAccessExists(true);
+ return new RandomAccessInputStream(randomAccess);
+ }
}
/**
@@ -201,13 +235,7 @@ public class COSStream extends COSDictio
public COSInputStream createInputStream(DecodeOptions options) throws IOException
{
- checkClosed();
- if (isWriting)
- {
- throw new IllegalStateException("Cannot read while there is an open stream writer");
- }
- ensureRandomAccessExists(true);
- InputStream input = new RandomAccessInputStream(randomAccess);
+ InputStream input = createRawInputStream();
return COSInputStream.create(getFilterList(), this, input, scratchFile, options);
}
@@ -241,8 +269,10 @@ public class COSStream extends COSDictio
{
setItem(COSName.FILTER, filters);
}
- IOUtils.closeQuietly(randomAccess);
- randomAccess = scratchFile.createBuffer();
+ if (randomAccess != null)
+ randomAccess.clear();
+ else
+ randomAccess = scratchFile.createBuffer();
OutputStream randomOut = new RandomAccessOutputStream(randomAccess);
OutputStream cosOut = new COSOutputStream(getFilterList(), this, randomOut, scratchFile);
isWriting = true;
@@ -277,8 +307,10 @@ public class COSStream extends COSDictio
{
throw new IllegalStateException("Cannot have more than one open stream writer.");
}
- IOUtils.closeQuietly(randomAccess);
- randomAccess = scratchFile.createBuffer();
+ if (randomAccess != null)
+ randomAccess.clear();
+ else
+ randomAccess = scratchFile.createBuffer();
OutputStream out = new RandomAccessOutputStream(randomAccess);
isWriting = true;
return new FilterOutputStream(out)
@@ -325,7 +357,7 @@ public class COSStream extends COSDictio
/**
* Returns the length of the encoded stream.
*
- * @return length in bytes
+ * @return length in bytesg
*/
public long getLength()
{
Modified: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/cos/ICOSParser.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/cos/ICOSParser.java?rev=1877823&r1=1877822&r2=1877823&view=diff
==============================================================================
--- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/cos/ICOSParser.java (original)
+++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/cos/ICOSParser.java Sat May 16 16:32:17 2020
@@ -18,6 +18,8 @@ package org.apache.pdfbox.cos;
import java.io.IOException;
+import org.apache.pdfbox.io.RandomAccessReadView;
+
public interface ICOSParser
{
@@ -32,4 +34,6 @@ public interface ICOSParser
*/
public COSBase dereferenceCOSObject(COSObject obj) throws IOException;
+ public RandomAccessReadView createRandomAccessReadView(long startPosition, long streamLength);
+
}
Copied: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/io/RandomAccessBufferedFile.java (from r1877822, pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/io/RandomAccessBufferedFileInputStream.java)
URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/io/RandomAccessBufferedFile.java?p2=pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/io/RandomAccessBufferedFile.java&p1=pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/io/RandomAccessBufferedFileInputStream.java&r1=1877822&r2=1877823&rev=1877823&view=diff
==============================================================================
--- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/io/RandomAccessBufferedFileInputStream.java (original)
+++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/io/RandomAccessBufferedFile.java Sat May 16 16:32:17 2020
@@ -25,17 +25,13 @@ import java.util.LinkedHashMap;
import java.util.Map;
/**
- * Provides {@link InputStream} access to portions of a file combined with
- * buffered reading of content. Start of next bytes to read can be set via seek
- * method.
+ * Provides buffered reading of content. Start of next bytes to be read can be set via seek method.
*
- * File is accessed via {@link RandomAccessFile} and is read in byte chunks
- * which are cached.
+ * File is accessed via {@link RandomAccessFile} and is read in byte chunks which are cached.
*
* @author Timo Boehme
*/
-public class RandomAccessBufferedFileInputStream
-extends InputStream implements RandomAccessRead
+public class RandomAccessBufferedFile implements RandomAccessRead
{
/**
* The prefix for the temp file being used.
@@ -83,7 +79,7 @@ extends InputStream implements RandomAcc
* @param filename the filename of the file to be read.
* @throws IOException if something went wrong while accessing the given file.
*/
- public RandomAccessBufferedFileInputStream( String filename ) throws IOException
+ public RandomAccessBufferedFile( String filename ) throws IOException
{
this(new File(filename));
}
@@ -94,7 +90,7 @@ extends InputStream implements RandomAcc
* @param file the file to be read.
* @throws IOException if something went wrong while accessing the given file.
*/
- public RandomAccessBufferedFileInputStream( File file ) throws IOException
+ public RandomAccessBufferedFile( File file ) throws IOException
{
raFile = new RandomAccessFile(file, "r");
fileLength = file.length();
@@ -108,7 +104,7 @@ extends InputStream implements RandomAcc
* @param input the input stream to be read. It will be closed by this method.
* @throws IOException if something went wrong while creating the temporary file.
*/
- public RandomAccessBufferedFileInputStream( InputStream input ) throws IOException
+ public RandomAccessBufferedFile( InputStream input ) throws IOException
{
tempFile = createTmpFile(input);
fileLength = tempFile.length();
@@ -267,32 +263,6 @@ extends InputStream implements RandomAcc
}
@Override
- public long skip( long n ) throws IOException
- {
- // test if we have to reduce skip count because of EOF
- long toSkip = n;
-
- if ( fileLength - fileOffset < toSkip )
- {
- toSkip = fileLength - fileOffset;
- }
-
- if ( ( toSkip < pageSize ) && ( ( offsetWithinPage + toSkip ) <= pageSize ) )
- {
- // we can skip within current page
- offsetWithinPage += toSkip;
- fileOffset += toSkip;
- }
- else
- {
- // seek to the page we will get after skipping
- seek( fileOffset + toSkip );
- }
-
- return toSkip;
- }
-
- @Override
public long length() throws IOException
{
return fileLength;
@@ -333,7 +303,6 @@ extends InputStream implements RandomAcc
@Override
public boolean isEOF() throws IOException
{
- int peek = peek();
- return peek == -1;
+ return peek() == -1;
}
}
Added: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/io/RandomAccessMemoryMappedFile.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/io/RandomAccessMemoryMappedFile.java?rev=1877823&view=auto
==============================================================================
--- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/io/RandomAccessMemoryMappedFile.java (added)
+++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/io/RandomAccessMemoryMappedFile.java Sat May 16 16:32:17 2020
@@ -0,0 +1,286 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.pdfbox.io;
+
+import java.io.IOException;
+import java.nio.BufferUnderflowException;
+import java.nio.MappedByteBuffer;
+import java.nio.channels.FileChannel;
+import java.nio.file.Files;
+import java.nio.file.Paths;
+import java.nio.file.StandardOpenOption;
+import java.util.EnumSet;
+
+/**
+ * An implementation of the RandomAccess interface to store data in memory.
+ * The data will be stored in chunks organized in an ArrayList.
+ */
+public class RandomAccessMemoryMappedFile implements RandomAccessRead
+{
+
+ // max buffer size is 2Gb
+ private static final int MAX_BUFFER_SIZE = Integer.MAX_VALUE;
+ // default buffer size is 128kb
+ private static final int DEFAULT_BUFFER_SIZE = 2 ^ 17;
+
+ // current chunk
+ private MappedByteBuffer mappedByteBuffer;
+
+ // current pointer for the current buffer
+ private long currentBufferPointer;
+ // start file position of the buffer
+ private long startPositionBuffer = 0;
+ // end file position of the buffer
+ private long endPositionBuffer;
+ // size of the whole file
+ private long size;
+
+ private final FileChannel fileChannel;
+
+ /**
+ * Default constructor.
+ */
+ public RandomAccessMemoryMappedFile(String filename) throws IOException
+ {
+ fileChannel = (FileChannel) Files
+ .newByteChannel(Paths.get(
+ filename),
+ EnumSet.of(StandardOpenOption.READ));
+ size = fileChannel.size();
+ startPositionBuffer = 0;
+ endPositionBuffer = size;
+ mappedByteBuffer = fileChannel.map(FileChannel.MapMode.READ_ONLY, 0, size);
+ }
+
+ /**
+ * {@inheritDoc}
+ */
+ @Override
+ public void close() throws IOException
+ {
+ fileChannel.close();
+ mappedByteBuffer = null;
+ }
+
+ /**
+ * {@inheritDoc}
+ */
+ @Override
+ public void seek(long position) throws IOException
+ {
+ checkClosed();
+ if (position < 0)
+ {
+ throw new IOException("Invalid position "+position);
+ }
+ // TODO only ints are allowed -> max buf size
+ mappedByteBuffer.position((int) position);
+ // if (position >= startPositionBuffer && position < endPositionBuffer)
+ // {
+ // currentBufferPointer = position;
+ // }
+ // else if (position < size)
+ // {
+ // newBuffer(position);
+ // }
+ // else
+ // {
+ // // TODO jump to end of buffer
+ // }
+ }
+
+ /**
+ * {@inheritDoc}
+ */
+ @Override
+ public long getPosition() throws IOException
+ {
+ checkClosed();
+ return mappedByteBuffer.position();
+ }
+
+ /**
+ * {@inheritDoc}
+ */
+ @Override
+ public int read() throws IOException
+ {
+ checkClosed();
+ // if (mappedByteBuffer.position() >= endPositionBuffer - startPositionBuffer)
+ // {
+ // newBuffer(getPosition());
+ // }
+ int result = -1;
+ try
+ {
+ result = mappedByteBuffer.get() & 0xff;
+ }
+ catch (BufferUnderflowException exception)
+ {
+ System.out.println("Size: " + size);
+ System.out.println("Position: " + mappedByteBuffer.position());
+ }
+ return result;
+ }
+
+ /**
+ * {@inheritDoc}
+ */
+ @Override
+ public int read(byte[] b, int offset, int length) throws IOException
+ {
+ checkClosed();
+ if (isEOF())
+ {
+ return 0;
+ }
+ if (mappedByteBuffer.position() >= size)
+ System.err.println("GOTCHA");
+ mappedByteBuffer.get(b, offset, length);
+ return length;
+ }
+
+ // private int readRemainingBytes(byte[] b, int offset, int length)
+ // {
+ // if (isEOF())
+ // {
+ // // TODO return -1 ??
+ // return 0;
+ // }
+ // int maxLength = (int) Math.min(length, size-pointer);
+ // int remainingBytes = chunkSize - currentBufferPointer;
+ // // no more bytes left
+ // if (remainingBytes == 0)
+ // {
+ // return 0;
+ // }
+ // if (maxLength >= remainingBytes)
+ // {
+ // // copy the remaining bytes from the current buffer
+ // System.arraycopy(currentBuffer, currentBufferPointer, b, offset, remainingBytes);
+ // // end of file reached
+ // currentBufferPointer += remainingBytes;
+ // pointer += remainingBytes;
+ // return remainingBytes;
+ // }
+ // else
+ // {
+ // // copy the remaining bytes from the whole buffer
+ // System.arraycopy(currentBuffer, currentBufferPointer, b, offset, maxLength);
+ // // end of file reached
+ // currentBufferPointer += maxLength;
+ // pointer += maxLength;
+ // return maxLength;
+ // }
+ // }
+
+ /**
+ * {@inheritDoc}
+ */
+ @Override
+ public long length() throws IOException
+ {
+ checkClosed();
+ return size;
+ }
+
+ /**
+ * switch to the next buffer chunk and reset the buffer pointer.
+ */
+ // private void newBuffer(long startPosition) throws IOException
+ // {
+ // startPositionBuffer = startPosition;
+ // endPositionBuffer = fileChannel.read(currentBuffer,
+ // startPositionBuffer)
+ // + startPositionBuffer;
+ // }
+
+ /**
+ * Ensure that the RandomAccessBuffer is not closed
+ * @throws IOException
+ */
+ private void checkClosed() throws IOException
+ {
+ if (isClosed())
+ {
+ // consider that the rab is closed if there is no current buffer
+ throw new IOException(getClass().getSimpleName() + " already closed");
+ }
+
+ }
+
+ /**
+ * {@inheritDoc}
+ */
+ @Override
+ public boolean isClosed()
+ {
+ return !fileChannel.isOpen();
+ }
+
+ /**
+ * {@inheritDoc}
+ */
+ @Override
+ public boolean isEOF() throws IOException
+ {
+ checkClosed();
+ return fileChannel.position() == size;
+ }
+
+ /**
+ * {@inheritDoc}
+ */
+ @Override
+ public int available() throws IOException
+ {
+ return (int) Math.min(length() - getPosition(), Integer.MAX_VALUE);
+ }
+
+ /**
+ * {@inheritDoc}
+ */
+ @Override
+ public int peek() throws IOException
+ {
+ int result = read();
+ if (result != -1)
+ {
+ rewind(1);
+ }
+ return result;
+ }
+
+ /**
+ * {@inheritDoc}
+ */
+ @Override
+ public void rewind(int bytes) throws IOException
+ {
+ checkClosed();
+ seek(getPosition() - bytes);
+ }
+
+ /**
+ * {@inheritDoc}
+ */
+ @Override
+ public int read(byte[] b) throws IOException
+ {
+ return read(b, 0, b.length);
+ }
+}
Propchange: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/io/RandomAccessMemoryMappedFile.java
------------------------------------------------------------------------------
svn:eol-style = native
Added: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/io/RandomAccessReadView.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/io/RandomAccessReadView.java?rev=1877823&view=auto
==============================================================================
--- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/io/RandomAccessReadView.java (added)
+++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/io/RandomAccessReadView.java Sat May 16 16:32:17 2020
@@ -0,0 +1,137 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.pdfbox.io;
+
+import java.io.IOException;
+
+public class RandomAccessReadView implements RandomAccessRead
+{
+ private RandomAccessRead randomAccessRead;
+ private final long startPosition;
+ private final long streamLength;
+ private long currentPosition = 0;
+
+ public RandomAccessReadView(RandomAccessRead randomAccessRead, long startPosition,
+ long streamLength)
+ {
+ this.randomAccessRead = randomAccessRead;
+ this.startPosition = startPosition;
+ this.streamLength = streamLength;
+ }
+
+ @Override
+ public long getPosition()
+ {
+ return currentPosition;
+ }
+
+ @Override
+ public void seek( final long newOffset ) throws IOException
+ {
+ if (newOffset < streamLength)
+ {
+ randomAccessRead.seek(startPosition + newOffset);
+ currentPosition = newOffset;
+ }
+ }
+
+ @Override
+ public int read() throws IOException
+ {
+ if (currentPosition >= streamLength)
+ {
+ return -1;
+ }
+ restorePosition();
+ int readValue = randomAccessRead.read();
+ if (readValue > -1)
+ {
+ currentPosition++;
+ }
+ return readValue;
+ }
+
+ @Override
+ public int read(byte[] b) throws IOException
+ {
+ if (currentPosition >= streamLength)
+ {
+ return 0;
+ }
+ restorePosition();
+ return read(b, 0, b.length);
+ }
+
+ @Override
+ public int read( byte[] b, int off, int len ) throws IOException
+ {
+ restorePosition();
+ int readBytes = randomAccessRead.read(b, off, Math.min(len, available()));
+ currentPosition += readBytes;
+ return readBytes;
+ }
+
+ @Override
+ public int available() throws IOException
+ {
+ return (int) (streamLength - currentPosition);
+ }
+
+ @Override
+ public long length() throws IOException
+ {
+ return streamLength;
+ }
+
+ @Override
+ public void close() throws IOException
+ {
+ randomAccessRead = null;
+ }
+
+ @Override
+ public boolean isClosed()
+ {
+ return randomAccessRead == null || randomAccessRead.isClosed();
+ }
+
+ @Override
+ public int peek() throws IOException
+ {
+ restorePosition();
+ return randomAccessRead.peek();
+ }
+
+ @Override
+ public void rewind(int bytes) throws IOException
+ {
+ restorePosition();
+ randomAccessRead.rewind(bytes);
+ currentPosition -= bytes;
+ }
+
+ @Override
+ public boolean isEOF() throws IOException
+ {
+ return currentPosition >= streamLength;
+ }
+
+ private void restorePosition() throws IOException
+ {
+ randomAccessRead.seek(startPosition + currentPosition);
+ }
+}
Propchange: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/io/RandomAccessReadView.java
------------------------------------------------------------------------------
svn:eol-style = native
Modified: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/COSParser.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/COSParser.java?rev=1877823&r1=1877822&r2=1877823&view=diff
==============================================================================
--- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/COSParser.java (original)
+++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/COSParser.java Sat May 16 16:32:17 2020
@@ -18,7 +18,6 @@ package org.apache.pdfbox.pdfparser;
import java.io.IOException;
import java.io.InputStream;
-import java.io.OutputStream;
import java.nio.charset.StandardCharsets;
import java.security.GeneralSecurityException;
import java.security.KeyStore;
@@ -45,6 +44,7 @@ import org.apache.pdfbox.cos.COSStream;
import org.apache.pdfbox.cos.ICOSParser;
import org.apache.pdfbox.io.IOUtils;
import org.apache.pdfbox.io.RandomAccessRead;
+import org.apache.pdfbox.io.RandomAccessReadView;
import org.apache.pdfbox.pdfparser.XrefTrailerResolver.XRefType;
import org.apache.pdfbox.pdmodel.encryption.AccessPermission;
import org.apache.pdfbox.pdmodel.encryption.DecryptionMaterial;
@@ -573,6 +573,12 @@ public class COSParser extends BaseParse
return parsedObj;
}
+ @Override
+ public RandomAccessReadView createRandomAccessReadView(long startPosition, long streamLength)
+ {
+ return new RandomAccessReadView(source, startPosition, streamLength);
+ }
+
/**
* Parse the object for the given object number.
*
@@ -804,9 +810,6 @@ public class COSParser extends BaseParse
"Wrong type of length object: " + lengthBaseObj.getClass().getSimpleName());
}
- private static final int STREAMCOPYBUFLEN = 8192;
- private final byte[] streamCopyBuf = new byte[STREAMCOPYBUFLEN];
-
/**
* This will read a COSStream from the input stream using length attribute within dictionary. If
* length attribute is a indirect reference it is first resolved to get the stream length. This
@@ -823,8 +826,6 @@ public class COSParser extends BaseParse
*/
protected COSStream parseCOSStream(COSDictionary dic) throws IOException
{
- COSStream stream = document.createCOSStream(dic);
-
// read 'stream'; this was already tested in parseObjectsDynamically()
readString();
@@ -847,18 +848,23 @@ public class COSParser extends BaseParse
}
}
- // get output stream to copy data to
- try (OutputStream out = stream.createRawOutputStream())
+ COSStream stream = null;
+ long streamStartPosition = source.getPosition();
+ if (streamLengthObj != null && validateStreamLength(streamLengthObj.longValue()))
+ {
+ stream = document.createCOSStream(dic, streamStartPosition,
+ streamLengthObj.longValue());
+ // skip stream
+ source.seek(source.getPosition() + streamLengthObj.intValue());
+ }
+ else
{
- if (streamLengthObj != null && validateStreamLength(streamLengthObj.longValue()))
- {
- readValidStream(out, streamLengthObj);
- }
- else
- {
- readUntilEndStream(new EndstreamOutputStream(out));
- }
+ // get output stream to copy data to
+ long streamLength = readUntilEndStream(new EndstreamOutputStream());
+ stream = document.createCOSStream(dic, streamStartPosition,
+ streamLength);
}
+
String endStream = readString();
if (endStream.equals("endobj") && isLenient)
{
@@ -880,53 +886,49 @@ public class COSParser extends BaseParse
"Error reading stream, expected='endstream' actual='"
+ endStream + "' at offset " + source.getPosition());
}
-
return stream;
}
/**
- * This method will read through the current stream object until
- * we find the keyword "endstream" meaning we're at the end of this
- * object. Some pdf files, however, forget to write some endstream tags
- * and just close off objects with an "endobj" tag so we have to handle
- * this case as well.
+ * This method will read through the current stream object until we find the keyword "endstream" meaning we're at the
+ * end of this object. Some pdf files, however, forget to write some endstream tags and just close off objects with an
+ * "endobj" tag so we have to handle this case as well.
*
- * This method is optimized using buffered IO and reduced number of
- * byte compare operations.
+ * This method is optimized using buffered IO and reduced number of byte compare operations.
*
- * @param out stream we write out to.
+ * @param out stream we write out to.
*
* @throws IOException if something went wrong
*/
- private void readUntilEndStream( final OutputStream out ) throws IOException
+ private long readUntilEndStream(final EndstreamOutputStream out) throws IOException
{
int bufSize;
int charMatchCount = 0;
byte[] keyw = ENDSTREAM;
-
+
// last character position of shortest keyword ('endobj')
final int quickTestOffset = 5;
-
+
// read next chunk into buffer; already matched chars are added to beginning of buffer
- while ( ( bufSize = source.read( strmBuf, charMatchCount, STRMBUFLEN - charMatchCount ) ) > 0 )
+ while ((bufSize = source.read(strmBuf, charMatchCount, STRMBUFLEN - charMatchCount)) > 0)
{
bufSize += charMatchCount;
-
+
int bIdx = charMatchCount;
int quickTestIdx;
-
+
// iterate over buffer, trying to find keyword match
- for ( int maxQuicktestIdx = bufSize - quickTestOffset; bIdx < bufSize; bIdx++ )
+ for (int maxQuicktestIdx = bufSize - quickTestOffset; bIdx < bufSize; bIdx++)
{
// reduce compare operations by first test last character we would have to
// match if current one matches; if it is not a character from keywords
- // we can move behind the test character; this shortcut is inspired by the
+ // we can move behind the test character; this shortcut is inspired by the
// Boyer-Moore string search algorithm and can reduce parsing time by approx. 20%
quickTestIdx = bIdx + quickTestOffset;
if (charMatchCount == 0 && quickTestIdx < maxQuicktestIdx)
- {
+ {
final byte ch = strmBuf[quickTestIdx];
- if ( ( ch > 't' ) || ( ch < 'a' ) )
+ if ((ch > 't') || (ch < 'a'))
{
// last character we would have to match if current character would match
// is not a character from keywords -> jump behind and start over
@@ -934,80 +936,64 @@ public class COSParser extends BaseParse
continue;
}
}
-
+
// could be negative - but we only compare to ASCII
final byte ch = strmBuf[bIdx];
-
- if ( ch == keyw[ charMatchCount ] )
+
+ if (ch == keyw[charMatchCount])
{
- if ( ++charMatchCount == keyw.length )
+ if (++charMatchCount == keyw.length)
{
// match found
bIdx++;
break;
}
- }
- else
+ }
+ else
{
- if ( ( charMatchCount == 3 ) && ( ch == ENDOBJ[ charMatchCount ] ) )
+ if ((charMatchCount == 3) && (ch == ENDOBJ[charMatchCount]))
{
// maybe ENDSTREAM is missing but we could have ENDOBJ
keyw = ENDOBJ;
charMatchCount++;
- }
- else
+ }
+ else
{
- // no match; incrementing match start by 1 would be dumb since we already know
- // matched chars depending on current char read we may already have beginning
- // of a new match: 'e': first char matched; 'n': if we are at match position
- // idx 7 we already read 'e' thus 2 chars matched for each other char we have
+ // no match; incrementing match start by 1 would be dumb since we already know
+ // matched chars depending on current char read we may already have beginning
+ // of a new match: 'e': first char matched; 'n': if we are at match position
+ // idx 7 we already read 'e' thus 2 chars matched for each other char we have
// to start matching first keyword char beginning with next read position
- charMatchCount = ( ch == E ) ? 1 : ( ( ch == N ) && ( charMatchCount == 7 ) ) ? 2 : 0;
+ charMatchCount = (ch == E) ? 1
+ : ((ch == N) && (charMatchCount == 7)) ? 2 : 0;
// search again for 'endstream'
keyw = ENDSTREAM;
}
- }
+ }
}
-
- int contentBytes = Math.max( 0, bIdx - charMatchCount );
-
+
+ int contentBytes = Math.max(0, bIdx - charMatchCount);
+
// write buffer content until first matched char to output stream
- if ( contentBytes > 0 )
+ if (contentBytes > 0)
{
- out.write( strmBuf, 0, contentBytes );
+ out.write(strmBuf, 0, contentBytes);
}
- if ( charMatchCount == keyw.length )
+ if (charMatchCount == keyw.length)
{
// keyword matched; unread matched keyword (endstream/endobj) and following buffered content
- source.rewind( bufSize - contentBytes );
+ source.rewind(bufSize - contentBytes);
break;
- }
- else
+ }
+ else
{
// copy matched chars at start of buffer
- System.arraycopy( keyw, 0, strmBuf, 0, charMatchCount );
- }
+ System.arraycopy(keyw, 0, strmBuf, 0, charMatchCount);
+ }
}
// this writes a lonely CR or drops trailing CR LF and LF
out.flush();
- }
-
- private void readValidStream(OutputStream out, COSNumber streamLengthObj) throws IOException
- {
- long remainBytes = streamLengthObj.longValue();
- while (remainBytes > 0)
- {
- final int chunk = (remainBytes > STREAMCOPYBUFLEN) ? STREAMCOPYBUFLEN : (int) remainBytes;
- final int readBytes = source.read(streamCopyBuf, 0, chunk);
- if (readBytes <= 0)
- {
- // shouldn't happen, the stream length has already been validated
- throw new IOException("read error at offset " + source.getPosition()
- + ": expected " + chunk + " bytes, but read() returns " + readBytes);
- }
- out.write(streamCopyBuf, 0, readBytes);
- remainBytes -= readBytes;
- }
+ return out.getLength();
}
private boolean validateStreamLength(long streamLength) throws IOException
Modified: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/EndstreamOutputStream.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/EndstreamOutputStream.java?rev=1877823&r1=1877822&r2=1877823&view=diff
==============================================================================
--- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/EndstreamOutputStream.java (original)
+++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/EndstreamOutputStream.java Sat May 16 16:32:17 2020
@@ -16,7 +16,7 @@
package org.apache.pdfbox.pdfparser;
-import java.io.BufferedOutputStream;
+import java.io.FilterOutputStream;
import java.io.IOException;
import java.io.OutputStream;
@@ -30,20 +30,23 @@ import java.io.OutputStream;
*
* @author Tilman Hausherr
*/
-class EndstreamOutputStream extends BufferedOutputStream
+class EndstreamOutputStream extends FilterOutputStream
{
- //TODO: replace this class with a PullBackOutputStream class if there ever is one
-
private boolean hasCR = false;
private boolean hasLF = false;
private int pos = 0;
private boolean mustFilter = true;
+ private long length = 0;
EndstreamOutputStream(OutputStream out)
{
super(out);
}
+ public EndstreamOutputStream()
+ {
+ super(null);
+ }
/**
* Write CR and/or LF that were kept, then writes len bytes from the
* specified byte array starting at offset off to this output stream,
@@ -86,11 +89,11 @@ class EndstreamOutputStream extends Buff
// reset hasCR done too to avoid CR getting written in the flush
return;
}
- super.write('\r');
+ length++;
}
if (hasLF)
{
- super.write('\n');
+ length++;
hasLF = false;
}
// don't write CR, LF, or CR LF if at the end of the buffer
@@ -113,7 +116,7 @@ class EndstreamOutputStream extends Buff
}
}
}
- super.write(b, off, len);
+ length += len;
pos += len;
}
@@ -129,11 +132,15 @@ class EndstreamOutputStream extends Buff
// if there is only a CR and no LF, write it
if (hasCR && !hasLF)
{
- super.write('\r');
+ length++;
++pos;
}
hasCR = false;
hasLF = false;
- super.flush();
+ }
+
+ public long getLength()
+ {
+ return length;
}
}
Modified: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/digitalsignature/SignatureOptions.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/digitalsignature/SignatureOptions.java?rev=1877823&r1=1877822&r2=1877823&view=diff
==============================================================================
--- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/digitalsignature/SignatureOptions.java (original)
+++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/digitalsignature/SignatureOptions.java Sat May 16 16:32:17 2020
@@ -23,7 +23,7 @@ import java.io.InputStream;
import org.apache.pdfbox.cos.COSDocument;
import org.apache.pdfbox.io.RandomAccessBuffer;
-import org.apache.pdfbox.io.RandomAccessBufferedFileInputStream;
+import org.apache.pdfbox.io.RandomAccessBufferedFile;
import org.apache.pdfbox.io.RandomAccessRead;
import org.apache.pdfbox.pdfparser.PDFParser;
import org.apache.pdfbox.pdmodel.interactive.digitalsignature.visible.PDVisibleSigProperties;
@@ -79,7 +79,7 @@ public class SignatureOptions implements
*/
public void setVisualSignature(File file) throws IOException
{
- initFromRandomAccessRead(new RandomAccessBufferedFileInputStream(file));
+ initFromRandomAccessRead(new RandomAccessBufferedFile(file));
}
/**