You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pdfbox.apache.org by le...@apache.org on 2012/07/15 15:36:06 UTC

svn commit: r1361698 - in /pdfbox/trunk/pdfbox/src: main/java/org/apache/pdfbox/io/RandomAccessBuffer.java test/java/org/apache/pdfbox/io/TestRandomAccessBuffer.java

Author: lehmi
Date: Sun Jul 15 13:36:06 2012
New Revision: 1361698

URL: http://svn.apache.org/viewvc?rev=1361698&view=rev
Log:
PDFBOX-1358: linear growth of the buffer instead of 2^n to reduce the memory consumption

Added:
    pdfbox/trunk/pdfbox/src/test/java/org/apache/pdfbox/io/TestRandomAccessBuffer.java
Modified:
    pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/io/RandomAccessBuffer.java

Modified: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/io/RandomAccessBuffer.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/io/RandomAccessBuffer.java?rev=1361698&r1=1361697&r2=1361698&view=diff
==============================================================================
--- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/io/RandomAccessBuffer.java (original)
+++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/io/RandomAccessBuffer.java Sun Jul 15 13:36:06 2012
@@ -17,29 +17,46 @@
 package org.apache.pdfbox.io;
 
 import java.io.IOException;
+import java.util.ArrayList;
 
 /**
- * An interface to allow PDF files to be stored completely in memory.
+ * An implementation of the RandomAccess interface to store a pdf in memory.
+ * The data will be stored in 16kb chunks organized in an ArrayList.  
  *
- * @author <a href="mailto:ben@benlitchfield.com">Ben Litchfield</a>
- * @version $Revision: 1.2 $
  */
 public class RandomAccessBuffer implements RandomAccess
 {
-
-    private byte[] buffer;
+    // chunk size is 16kb
+    private static final int BUFFER_SIZE = 16384;
+    // list containing all chunks
+    private ArrayList<byte[]> bufferList = null;
+    // current chunk
+    private byte[] currentBuffer;
+    // current pointer to the whole buffer
     private long pointer;
+    // current pointer for the current chunk
+    private long currentBufferPointer;
+    // size of the whole buffer
     private long size;
+    // current chunk list index
+    private int bufferListIndex;
+    // maximum chunk list index
+    private int bufferListMaxIndex;
 
     /**
      * Default constructor.
      */
     public RandomAccessBuffer()
     {
-        // starting with a 16kb buffer
-        buffer = new byte[16384];
+        // starting with one chunk
+        bufferList = new ArrayList<byte[]>();
+        currentBuffer = new byte[BUFFER_SIZE];
+        bufferList.add(currentBuffer);
         pointer = 0;
+        currentBufferPointer = 0;
         size = 0;
+        bufferListIndex = 0;
+        bufferListMaxIndex = 0;
     }
 
     /**
@@ -47,9 +64,12 @@ public class RandomAccessBuffer implemen
      */
     public void close() throws IOException
     {
-        buffer = null;
+        currentBuffer = null;
+        bufferList.clear();
         pointer = 0;
+        currentBufferPointer = 0;
         size = 0;
+        bufferListIndex = 0;
     }
 
     /**
@@ -57,7 +77,11 @@ public class RandomAccessBuffer implemen
      */
     public void seek(long position) throws IOException
     {
-        this.pointer = position;
+        pointer = position;
+        // calculate the chunk list index
+        bufferListIndex = (int)(position / BUFFER_SIZE);
+        currentBufferPointer = position % BUFFER_SIZE;
+        currentBuffer = bufferList.get(bufferListIndex);
     }
 
     /**
@@ -69,7 +93,20 @@ public class RandomAccessBuffer implemen
         {
             return -1;
         }
-        return buffer[(int)pointer++] & 0xff;
+        if (currentBufferPointer >= BUFFER_SIZE)
+        {
+            if (bufferListIndex >= bufferListMaxIndex)
+            {
+                return -1;
+            }
+            else
+            {
+                currentBuffer = bufferList.get(++bufferListIndex);
+                currentBufferPointer = 0;
+            }
+        }
+        pointer++;
+        return currentBuffer[(int)currentBufferPointer++] & 0xff;
     }
 
     /**
@@ -82,7 +119,35 @@ public class RandomAccessBuffer implemen
             return 0;
         }
         int maxLength = (int) Math.min(length, this.size-pointer);
-        System.arraycopy(buffer, (int) pointer, b, offset, maxLength);
+        long remainingBytes = BUFFER_SIZE - currentBufferPointer;
+        if (maxLength >= remainingBytes)
+        {
+            // copy the first bytes from the current buffer
+            System.arraycopy(currentBuffer, (int)currentBufferPointer, b, offset, (int)remainingBytes);
+            int newOffset = offset + (int)remainingBytes;
+            long remainingBytes2Read = length - remainingBytes;
+            // determine how many buffers are needed to get the remaining amount bytes
+            int numberOfArrays = (int)remainingBytes2Read / BUFFER_SIZE;
+            for (int i=0;i<numberOfArrays;i++) 
+            {
+                nextBuffer();
+                System.arraycopy(currentBuffer, 0, b, newOffset, BUFFER_SIZE);
+                newOffset += BUFFER_SIZE;
+            }
+            remainingBytes2Read = remainingBytes2Read % BUFFER_SIZE;
+            // are there still some bytes to be read?
+            if (remainingBytes2Read > 0)
+            {
+                nextBuffer();
+                System.arraycopy(currentBuffer, 0, b, newOffset, (int)remainingBytes2Read);
+                currentBufferPointer += remainingBytes2Read;
+            }
+        }
+        else
+        {
+            System.arraycopy(currentBuffer, (int)currentBufferPointer, b, offset, maxLength);
+            currentBufferPointer += maxLength;
+        }
         pointer += maxLength;
         return maxLength;
     }
@@ -100,15 +165,16 @@ public class RandomAccessBuffer implemen
      */
     public void write(int b) throws IOException
     {
-        if (pointer >= buffer.length)
+        if (currentBufferPointer >= BUFFER_SIZE) 
         {
-            if (pointer >= Integer.MAX_VALUE) 
+            if (pointer + BUFFER_SIZE >= Integer.MAX_VALUE) 
             {
                 throw new IOException("RandomAccessBuffer overflow");
             }
-            buffer = expandBuffer(buffer, (int)Math.min(2L * buffer.length, Integer.MAX_VALUE));
+            expandBuffer();
         }
-        buffer[(int)pointer++] = (byte)b;
+        currentBuffer[(int)currentBufferPointer++] = (byte)b;
+        pointer++;
         if (pointer > this.size)
         {
             this.size = pointer;
@@ -120,17 +186,43 @@ public class RandomAccessBuffer implemen
      */
     public void write(byte[] b, int offset, int length) throws IOException
     {
-        long newSize = pointer+length;
-        if (newSize >= buffer.length)
+        long newSize = pointer + length;
+        long remainingBytes = BUFFER_SIZE - currentBufferPointer;
+        if (length >= remainingBytes)
         {
             if (newSize > Integer.MAX_VALUE) 
             {
                 throw new IOException("RandomAccessBuffer overflow");
             }
-            newSize = Math.min(Math.max(2L * buffer.length, newSize), Integer.MAX_VALUE);
-            buffer = expandBuffer(buffer, (int)newSize);
+            // copy the first bytes to the current buffer
+            System.arraycopy(b, offset, currentBuffer, (int)currentBufferPointer, (int)remainingBytes);
+            int newOffset = offset + (int)remainingBytes;
+            long remainingBytes2Write = length - remainingBytes;
+            // determine how many buffers are needed for the remaining bytes
+            int numberOfNewArrays = (int)remainingBytes2Write / BUFFER_SIZE;
+            for (int i=0;i<numberOfNewArrays;i++) 
+            {
+                expandBuffer();
+                System.arraycopy(b, newOffset, currentBuffer, (int)currentBufferPointer, BUFFER_SIZE);
+                newOffset += BUFFER_SIZE;
+            }
+            // are there still some bytes to be written?
+            remainingBytes2Write -= numberOfNewArrays * BUFFER_SIZE;
+            if (remainingBytes2Write >= 0)
+            {
+                expandBuffer();
+                if (remainingBytes2Write > 0)
+                {
+                    System.arraycopy(b, newOffset, currentBuffer, (int)currentBufferPointer, (int)remainingBytes2Write);
+                }
+                currentBufferPointer = remainingBytes2Write;
+            }
+        }
+        else
+        {
+            System.arraycopy(b, offset, currentBuffer, (int)currentBufferPointer, length);
+            currentBufferPointer += length;
         }
-        System.arraycopy(b, offset, buffer, (int)pointer, length);
         pointer += length;
         if (pointer > this.size)
         {
@@ -139,17 +231,32 @@ public class RandomAccessBuffer implemen
     }
 
     /**
-     * expand the given buffer to the new size.
-     * 
-     * @param buffer the given buffer
-     * @param newSize the new size
-     * @return the expanded buffer
-     * 
-     */
-    private byte[] expandBuffer(byte[] buffer, int newSize) 
-    {
-        byte[] expandedBuffer = new byte[newSize];
-        System.arraycopy(buffer, 0, expandedBuffer, 0, buffer.length);
-        return expandedBuffer;
+     * create a new buffer chunk and adjust all pointers and indices.
+     */
+    private void expandBuffer() 
+    {
+        if (bufferListMaxIndex > bufferListIndex)
+        {
+            // there is already an existing chunk
+            nextBuffer();
+        }
+        else
+        {
+            // create a new chunk and add it to the buffer
+            currentBuffer = new byte[BUFFER_SIZE];
+            bufferList.add(currentBuffer);
+            currentBufferPointer = 0;
+            bufferListMaxIndex++;
+            bufferListIndex++;
+        }
+    }
+
+    /**
+     * switch to the next buffer chunk and reset the buffer pointer.
+     */
+    private void nextBuffer() 
+    {
+        currentBufferPointer = 0;
+        currentBuffer = bufferList.get(++bufferListIndex);
     }
 }

Added: pdfbox/trunk/pdfbox/src/test/java/org/apache/pdfbox/io/TestRandomAccessBuffer.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/test/java/org/apache/pdfbox/io/TestRandomAccessBuffer.java?rev=1361698&view=auto
==============================================================================
--- pdfbox/trunk/pdfbox/src/test/java/org/apache/pdfbox/io/TestRandomAccessBuffer.java (added)
+++ pdfbox/trunk/pdfbox/src/test/java/org/apache/pdfbox/io/TestRandomAccessBuffer.java Sun Jul 15 13:36:06 2012
@@ -0,0 +1,213 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.pdfbox.io;
+
+import java.io.IOException;
+
+import junit.framework.TestCase;
+
+/**
+ * This is a unit test for {@link RandomAccessBuffer}.
+ * 
+ */
+public class TestRandomAccessBuffer extends TestCase 
+{
+
+    private static final int BUFFER_SIZE = 16384;
+    
+    /**
+     * This will test the {@link RandomAccessBuffer#read()} 
+     * and {@link RandomAccessBuffer#write(int)} method.
+     * 
+     * @throws IOException is thrown if something went wrong.
+     */
+    public void testSimpleReadWrite() throws IOException
+    {
+        // create a buffer filled with 10 figures from 0 to 9
+        RandomAccessBuffer buffer = new RandomAccessBuffer();
+        for ( int i=0;i < 10;i++ )
+        {
+            buffer.write(i);
+        }
+        // jump back to the beginning of the buffer
+        buffer.seek(0);
+        // sum up all figures, the result should be 45
+        int result = 0;
+        for ( int i=0;i < 10;i++ )
+        {
+            result += buffer.read();
+        }
+        assertEquals(45, result);
+    }
+
+    /**
+     * This will test the {@link RandomAccessBuffer#read(byte[], int, int)} 
+     * and {@link RandomAccessBuffer#write(byte[], int, int)} method.
+     * 
+     * @throws IOException is thrown if something went wrong.
+     */
+    public void testSimpleArrayReadWrite() throws IOException
+    {
+        // create an array filled with 10 figures from 0 to 9
+        byte[] byteArray = new byte[10];
+        for ( byte i=0;i < 10;i++ )
+        {
+            byteArray[i] = i;
+        }
+        // create an empty buffer and write the array to it
+        RandomAccessBuffer buffer = new RandomAccessBuffer();
+        buffer.write(byteArray, 0, byteArray.length);
+        // jump back to the beginning of the buffer
+        buffer.seek(0);
+        // read the buffer byte after byte and sum up all figures, 
+        // the result should be 45
+        int result = 0;
+        for ( int i=0;i < 10;i++ )
+        {
+            result += buffer.read();
+        }
+        assertEquals(45, result);
+        // jump back to the beginning of the buffer
+        buffer.seek(0);
+        // read the buffer to an array and sum up all figures, 
+        // the result should be 45
+        buffer.read(byteArray, 0, byteArray.length);
+        result = 0;
+        for ( int i=0;i < 10;i++ )
+        {
+            result += byteArray[i];
+        }
+        assertEquals(45, result);
+    }
+
+    /**
+     * This will test the {@link RandomAccessBuffer#read(byte[], int, int)} 
+     * and {@link RandomAccessBuffer#write(byte[], int, int)} method using
+     * a couple of data to create more than one chunk.
+     * 
+     * @throws IOException is thrown if something went wrong.
+     */
+    public void testArrayReadWrite() throws IOException
+    {
+        // create an array filled with 16384 * "0", 16384 * "1" and 100 * "2"
+        byte[] byteArray = new byte[ 2 * BUFFER_SIZE + 100];
+        for (int i = BUFFER_SIZE;i < 2 * BUFFER_SIZE; i++)
+        {
+            byteArray[i] = 1;
+        }
+        for (int i = 2 * BUFFER_SIZE; i < 2 * BUFFER_SIZE + 100; i++)
+        {
+            byteArray[i] = 2;
+        }
+        // write the array to a buffer 
+        RandomAccessBuffer buffer = new RandomAccessBuffer();
+        buffer.write(byteArray, 0, byteArray.length);
+        // jump to the beginning
+        buffer.seek(0);
+        // the first byte should be "0"
+        assertEquals(0, buffer.read());
+        
+        // jump to the last byte of the first chunk, it should be "0"
+        buffer.seek(BUFFER_SIZE - 1);
+        assertEquals(0, buffer.read());
+        
+        // jump to the first byte of the second chunk, it should be "1"
+        buffer.seek(BUFFER_SIZE);
+        assertEquals(1, buffer.read());
+        
+        // jump to the end-5 of the first chunk
+        buffer.seek(BUFFER_SIZE - 5);
+        // read the last 5 bytes from the first and the first 5 bytes 
+        // from the second chunk and sum them up. The result should be "5"
+        byteArray = new byte[10];
+        buffer.read(byteArray,0, byteArray.length);
+        int result = 0;
+        for ( int i=0;i < 10;i++ )
+        {
+            result += byteArray[i];
+        }
+        assertEquals(5, result);
+
+        // jump to the end-5 of the second chunk
+        buffer.seek(2 * BUFFER_SIZE - 5);
+        // read the last 5 bytes from the second and the first 5 bytes 
+        // from the third chunk and sum them up. The result should be "15"
+        byteArray = new byte[10];
+        buffer.read(byteArray,0, byteArray.length);
+        result = 0;
+        for ( int i=0;i < 10;i++ )
+        {
+            result += byteArray[i];
+        }
+        assertEquals(15, result);
+    }
+
+    /**
+     * This will test if overwriting works.
+     * 
+     * @throws IOException is thrown if something went wrong.
+     */
+    public void testOverwrite() throws IOException
+    {
+        // create a buffer filled with 16384 * "0" and 100 * "1" 
+        byte[] byteArray = new byte[ BUFFER_SIZE + 100];
+        RandomAccessBuffer buffer = new RandomAccessBuffer();
+        for (int i = BUFFER_SIZE;i < BUFFER_SIZE + 100; i++)
+        {
+            byteArray[i] = 1;
+        }
+        buffer.write(byteArray, 0, byteArray.length);
+        
+        // jump to the end-5 of the first chunk
+        buffer.seek(BUFFER_SIZE - 5);
+        // read the last 5 bytes from the first and the first 5 bytes 
+        // from the second chunk and sum them up. The result should be "5"
+        byteArray = new byte[10];
+        buffer.read(byteArray,0, byteArray.length);
+        int result = 0;
+        for ( int i=0;i < 10;i++ )
+        {
+            result += byteArray[i];
+        }
+        assertEquals(5, result);
+        
+        // jump to the end-5 of the first chunk
+        buffer.seek(BUFFER_SIZE - 5);
+        // write 5 "2" and 5 "3" to the buffer
+        for ( int i=0;i < 5;i++ )
+        {
+            buffer.write(2);
+        }
+        for ( int i=0;i < 5;i++ )
+        {
+            buffer.write(3);
+        }
+        // jump to the end-5 of the first chunk
+        buffer.seek(BUFFER_SIZE - 5);
+        // read the last 5 bytes from the first and the first 5 bytes 
+        // from the second chunk and sum them up. The result should be "25"
+        byteArray = new byte[10];
+        buffer.read(byteArray,0, byteArray.length);
+        result = 0;
+        for ( int i=0;i < 10;i++ )
+        {
+            result += byteArray[i];
+        }
+        assertEquals(25, result);
+    }
+}