You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pdfbox.apache.org by le...@apache.org on 2020/05/17 10:55:16 UTC

svn commit: r1877851 - in /pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox: cos/COSDocument.java cos/COSStream.java cos/ICOSParser.java io/RandomAccessReadView.java pdfparser/COSParser.java

Author: lehmi
Date: Sun May 17 10:55:16 2020
New Revision: 1877851

URL: http://svn.apache.org/viewvc?rev=1877851&view=rev
Log:
PDFBOX-4836: introduce RandomAccessReadView to be used to read a COSStream 

Added:
    pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/io/RandomAccessReadView.java   (with props)
Modified:
    pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/cos/COSDocument.java
    pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/cos/COSStream.java
    pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/cos/ICOSParser.java
    pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/COSParser.java

Modified: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/cos/COSDocument.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/cos/COSDocument.java?rev=1877851&r1=1877850&r2=1877851&view=diff
==============================================================================
--- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/cos/COSDocument.java (original)
+++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/cos/COSDocument.java Sun May 17 10:55:16 2020
@@ -166,6 +166,24 @@ public class COSDocument extends COSBase
     }
 
     /**
+     * Creates a new COSStream using the current configuration for scratch files. Not for public use. Only COSParser
+     * should call this method.
+     * 
+     * @param dictionary the corresponding dictionary
+     * @param startPosition the start position within the source
+     * @param streamLength the stream length
+     * @return the new COSStream
+     */
+    public COSStream createCOSStream(COSDictionary dictionary, long startPosition,
+            long streamLength)
+    {
+        COSStream stream = new COSStream(scratchFile,
+                parser.createRandomAccessReadView(startPosition, streamLength));
+        dictionary.forEach(stream::setItem);
+        return stream;
+    }
+
+    /**
      * Get the dictionary containing the linearization information if the pdf is linearized.
      * 
      * @return the dictionary containing the linearization information

Modified: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/cos/COSStream.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/cos/COSStream.java?rev=1877851&r1=1877850&r2=1877851&view=diff
==============================================================================
--- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/cos/COSStream.java (original)
+++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/cos/COSStream.java Sun May 17 10:55:16 2020
@@ -37,6 +37,7 @@ import org.apache.pdfbox.io.IOUtils;
 import org.apache.pdfbox.io.RandomAccess;
 import org.apache.pdfbox.io.RandomAccessInputStream;
 import org.apache.pdfbox.io.RandomAccessOutputStream;
+import org.apache.pdfbox.io.RandomAccessReadView;
 import org.apache.pdfbox.io.ScratchFile;
 
 /**
@@ -46,9 +47,14 @@ import org.apache.pdfbox.io.ScratchFile;
  */
 public class COSStream extends COSDictionary implements Closeable
 {
-    private RandomAccess randomAccess;      // backing store, in-memory or on-disk
-    private final ScratchFile scratchFile;  // used as a temp buffer during decoding
-    private boolean isWriting;              // true if there's an open OutputStream
+    // backing store, in-memory or on-disk
+    private RandomAccess randomAccess;
+    // used as a temp buffer during decoding
+    private final ScratchFile scratchFile;
+    // true if there's an open OutputStream
+    private boolean isWriting;
+    // random access view to be read from
+    private RandomAccessReadView randomAccessReadView;
     
     private static final Log LOG = LogFactory.getLog(COSStream.class);
     
@@ -77,6 +83,27 @@ public class COSStream extends COSDictio
     }
 
     /**
+     * Creates a new stream with an empty dictionary. Data is read from the given random accessview. Written data is
+     * stored in the given scratch file.
+     *
+     * @param scratchFile Scratch file for writing stream data.
+     */
+    public COSStream(ScratchFile scratchFile, RandomAccessReadView randomAccessReadView)
+    {
+        this(scratchFile);
+        this.randomAccessReadView = randomAccessReadView;
+        try
+        {
+            setInt(COSName.LENGTH, (int) randomAccessReadView.length());
+        }
+        catch (IOException e)
+        {
+            // TODO Auto-generated catch block
+            e.printStackTrace();
+        }
+    }
+
+    /**
      * {@inheritDoc}
      */
     @Override
@@ -184,8 +211,16 @@ public class COSStream extends COSDictio
         {
             throw new IllegalStateException("Cannot read while there is an open stream writer");
         }
-        ensureRandomAccessExists(true);
-        return new RandomAccessInputStream(randomAccess);
+        if (randomAccess == null && randomAccessReadView != null)
+        {
+            randomAccessReadView.seek(0);
+            return new RandomAccessInputStream(randomAccessReadView);
+        }
+        else
+        {
+            ensureRandomAccessExists(true);
+            return new RandomAccessInputStream(randomAccess);
+        }
     }
 
     /**
@@ -201,13 +236,7 @@ public class COSStream extends COSDictio
 
     public COSInputStream createInputStream(DecodeOptions options) throws IOException
     {
-        checkClosed();
-        if (isWriting)
-        {
-            throw new IllegalStateException("Cannot read while there is an open stream writer");
-        }
-        ensureRandomAccessExists(true);
-        InputStream input = new RandomAccessInputStream(randomAccess);
+        InputStream input = createRawInputStream();
         return COSInputStream.create(getFilterList(), this, input, scratchFile, options);
     }
 
@@ -241,8 +270,10 @@ public class COSStream extends COSDictio
         {
             setItem(COSName.FILTER, filters);
         }
-        IOUtils.closeQuietly(randomAccess);
-        randomAccess = scratchFile.createBuffer();
+        if (randomAccess != null)
+            randomAccess.clear();
+        else
+            randomAccess = scratchFile.createBuffer();
         OutputStream randomOut = new RandomAccessOutputStream(randomAccess);
         OutputStream cosOut = new COSOutputStream(getFilterList(), this, randomOut, scratchFile);
         isWriting = true;
@@ -277,8 +308,10 @@ public class COSStream extends COSDictio
         {
             throw new IllegalStateException("Cannot have more than one open stream writer.");
         }
-        IOUtils.closeQuietly(randomAccess);
-        randomAccess = scratchFile.createBuffer();
+        if (randomAccess != null)
+            randomAccess.clear();
+        else
+            randomAccess = scratchFile.createBuffer();
         OutputStream out = new RandomAccessOutputStream(randomAccess);
         isWriting = true;
         return new FilterOutputStream(out)

Modified: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/cos/ICOSParser.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/cos/ICOSParser.java?rev=1877851&r1=1877850&r2=1877851&view=diff
==============================================================================
--- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/cos/ICOSParser.java (original)
+++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/cos/ICOSParser.java Sun May 17 10:55:16 2020
@@ -18,6 +18,8 @@ package org.apache.pdfbox.cos;
 
 import java.io.IOException;
 
+import org.apache.pdfbox.io.RandomAccessReadView;
+
 public interface ICOSParser
 {
 
@@ -25,11 +27,18 @@ public interface ICOSParser
      * Dereference the COSBase object which is referenced by the given COSObject.
      * 
      * @param obj the COSObject which references the COSBase object to be dereferenced.
-     * 
      * @return the referenced object
-     * 
      * @throws IOException if something went wrong when dereferencing the COSBase object
      */
     public COSBase dereferenceCOSObject(COSObject obj) throws IOException;
 
+    /**
+     * Create a random access read view starting at the given position with the given length.
+     * 
+     * @param startPosition start position within the underlying random access read
+     * @param streamLength stream length
+     * @return the random access read view
+     */
+    public RandomAccessReadView createRandomAccessReadView(long startPosition, long streamLength);
+
 }

Added: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/io/RandomAccessReadView.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/io/RandomAccessReadView.java?rev=1877851&view=auto
==============================================================================
--- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/io/RandomAccessReadView.java (added)
+++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/io/RandomAccessReadView.java Sun May 17 10:55:16 2020
@@ -0,0 +1,220 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.pdfbox.io;
+
+import java.io.IOException;
+
+/**
+ * This class provides a view of a part of a random access read. It clips the section starting at the given start
+ * position with the given length into a new random access read.
+ * 
+ */
+public class RandomAccessReadView implements RandomAccessRead
+{
+    // the underlying random access read
+    private RandomAccessRead randomAccessRead;
+    // the start position within the underlying source
+    private final long startPosition;
+    // stream length
+    private final long streamLength;
+    // current position within the view
+    private long currentPosition = 0;
+
+    /**
+     * Constructor.
+     * 
+     * @param randomAccessRead the underlying random access read
+     * @param startPosition start position within the underlying random access read
+     * @param streamLength stream length
+     */
+    public RandomAccessReadView(RandomAccessRead randomAccessRead, long startPosition,
+            long streamLength)
+    {
+        this.randomAccessRead = randomAccessRead;
+        this.startPosition = startPosition;
+        this.streamLength = streamLength;
+    }
+
+    /**
+     * {@inheritDoc}
+     */
+    @Override
+    public long getPosition() throws IOException
+    {
+        checkClosed();
+        return currentPosition;
+    }
+
+    /**
+     * {@inheritDoc}
+     */
+    @Override
+    public void seek(final long newOffset) throws IOException
+    {
+        checkClosed();
+        if (newOffset < streamLength)
+        {
+            randomAccessRead.seek(startPosition + newOffset);
+            currentPosition = newOffset;
+        }
+    }
+
+    /**
+     * {@inheritDoc}
+     */
+    @Override
+    public int read() throws IOException
+    {
+        checkClosed();
+        if (currentPosition >= streamLength)
+        {
+            return -1;
+        }
+        restorePosition();
+        int readValue = randomAccessRead.read();
+        if (readValue > -1)
+        {
+            currentPosition++;
+        }
+        return readValue;
+    }
+
+    /**
+     * {@inheritDoc}
+     */
+    @Override
+    public int read(byte[] b) throws IOException
+    {
+        checkClosed();
+        if (currentPosition >= streamLength)
+        {
+            return 0;
+        }
+        restorePosition();
+        return read(b, 0, b.length);
+    }
+
+    /**
+     * {@inheritDoc}
+     */
+    @Override
+    public int read(byte[] b, int off, int len) throws IOException
+    {
+        checkClosed();
+        restorePosition();
+        int readBytes = randomAccessRead.read(b, off, Math.min(len, available()));
+        currentPosition += readBytes;
+        return readBytes;
+    }
+
+    /**
+     * {@inheritDoc}
+     */
+    @Override
+    public int available() throws IOException
+    {
+        checkClosed();
+        return (int) (streamLength - currentPosition);
+    }
+
+    /**
+     * {@inheritDoc}
+     */
+    @Override
+    public long length() throws IOException
+    {
+        checkClosed();
+        return streamLength;
+    }
+
+    /**
+     * {@inheritDoc}
+     */
+    @Override
+    public void close() throws IOException
+    {
+        checkClosed();
+        randomAccessRead = null;
+    }
+
+    /**
+     * {@inheritDoc}
+     */
+    @Override
+    public boolean isClosed()
+    {
+        return randomAccessRead == null || randomAccessRead.isClosed();
+    }
+
+    /**
+     * {@inheritDoc}
+     */
+    @Override
+    public int peek() throws IOException
+    {
+        checkClosed();
+        restorePosition();
+        return randomAccessRead.peek();
+    }
+
+    /**
+     * {@inheritDoc}
+     */
+    @Override
+    public void rewind(int bytes) throws IOException
+    {
+        checkClosed();
+        restorePosition();
+        randomAccessRead.rewind(bytes);
+        currentPosition -= bytes;
+    }
+
+    /**
+     * {@inheritDoc}
+     */
+    @Override
+    public boolean isEOF() throws IOException
+    {
+        checkClosed();
+        return currentPosition >= streamLength;
+    }
+
+    /**
+     * Restore the current position within the underlying random access read.
+     * 
+     * @throws IOException
+     */
+    private void restorePosition() throws IOException
+    {
+        randomAccessRead.seek(startPosition + currentPosition);
+    }
+
+    /**
+     * Ensure that that the view isn't closed.
+     * 
+     * @throws IOException
+     */
+    private void checkClosed() throws IOException
+    {
+        if (isClosed())
+        {
+            // consider that the rab is closed if there is no current buffer
+            throw new IOException("RandomAccessReadView already closed");
+        }
+    }
+
+}

Propchange: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/io/RandomAccessReadView.java
------------------------------------------------------------------------------
    svn:eol-style = native

Modified: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/COSParser.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/COSParser.java?rev=1877851&r1=1877850&r2=1877851&view=diff
==============================================================================
--- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/COSParser.java (original)
+++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/COSParser.java Sun May 17 10:55:16 2020
@@ -45,6 +45,7 @@ import org.apache.pdfbox.cos.COSStream;
 import org.apache.pdfbox.cos.ICOSParser;
 import org.apache.pdfbox.io.IOUtils;
 import org.apache.pdfbox.io.RandomAccessRead;
+import org.apache.pdfbox.io.RandomAccessReadView;
 import org.apache.pdfbox.pdfparser.XrefTrailerResolver.XRefType;
 import org.apache.pdfbox.pdmodel.encryption.AccessPermission;
 import org.apache.pdfbox.pdmodel.encryption.DecryptionMaterial;
@@ -573,6 +574,12 @@ public class COSParser extends BaseParse
         return parsedObj;
     }
 
+    @Override
+    public RandomAccessReadView createRandomAccessReadView(long startPosition, long streamLength)
+    {
+        return new RandomAccessReadView(source, startPosition, streamLength);
+    }
+
     /**
      * Parse the object for the given object number.  
      * 
@@ -823,8 +830,6 @@ public class COSParser extends BaseParse
      */
     protected COSStream parseCOSStream(COSDictionary dic) throws IOException
     {
-        COSStream stream = document.createCOSStream(dic);
-       
         // read 'stream'; this was already tested in parseObjectsDynamically()
         readString(); 
         
@@ -847,14 +852,21 @@ public class COSParser extends BaseParse
             }
         }
 
-        // get output stream to copy data to
-        try (OutputStream out = stream.createRawOutputStream())
+        COSStream stream;
+        long streamPosition = source.getPosition();
+        if (streamLengthObj != null && validateStreamLength(streamLengthObj.longValue()))
+        {
+            stream = document.createCOSStream(dic,
+                    streamPosition,
+                    streamLengthObj.longValue());
+            // skip stream
+            source.seek(source.getPosition() + streamLengthObj.intValue());
+        }
+        else
         {
-            if (streamLengthObj != null && validateStreamLength(streamLengthObj.longValue()))
-            {
-                readValidStream(out, streamLengthObj);
-            }
-            else
+            stream = document.createCOSStream(dic);
+            // get output stream to copy data to
+            try (OutputStream out = stream.createRawOutputStream())
             {
                 readUntilEndStream(new EndstreamOutputStream(out));
             }
@@ -992,24 +1004,6 @@ public class COSParser extends BaseParse
         out.flush();
     }
 
-    private void readValidStream(OutputStream out, COSNumber streamLengthObj) throws IOException
-    {
-        long remainBytes = streamLengthObj.longValue();
-        while (remainBytes > 0)
-        {
-            final int chunk = (remainBytes > STREAMCOPYBUFLEN) ? STREAMCOPYBUFLEN : (int) remainBytes;
-            final int readBytes = source.read(streamCopyBuf, 0, chunk);
-            if (readBytes <= 0)
-            {
-                // shouldn't happen, the stream length has already been validated
-                throw new IOException("read error at offset " + source.getPosition()
-                        + ": expected " + chunk + " bytes, but read() returns " + readBytes);
-            }
-            out.write(streamCopyBuf, 0, readBytes);
-            remainBytes -= readBytes;
-        }
-    }
-
     private boolean validateStreamLength(long streamLength) throws IOException
     {
         boolean streamLengthIsValid = true;