You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pdfbox.apache.org by tb...@apache.org on 2015/07/16 11:11:24 UTC

svn commit: r1691342 - in /pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/io: ScratchFile.java ScratchFileBuffer.java

Author: tboehme
Date: Thu Jul 16 09:11:24 2015
New Revision: 1691342

URL: http://svn.apache.org/r1691342
Log:
PDFBOX-2882: replace scratch file handling with optimized memory+file paging implementation

Modified:
    pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/io/ScratchFile.java
    pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/io/ScratchFileBuffer.java

Modified: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/io/ScratchFile.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/io/ScratchFile.java?rev=1691342&r1=1691341&r2=1691342&view=diff
==============================================================================
--- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/io/ScratchFile.java (original)
+++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/io/ScratchFile.java Thu Jul 16 09:11:24 2015
@@ -19,79 +19,313 @@ package org.apache.pdfbox.io;
 import java.io.Closeable;
 import java.io.File;
 import java.io.IOException;
+import java.util.BitSet;
+import java.util.concurrent.atomic.AtomicBoolean;
+
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 
 /**
- * A temporary file which can hold multiple buffers of temporary data. A new temporary file is created for each new
- * {@link ScratchFile} instance, and is deleted when the {@link ScratchFile} is closed.
- * <p>
- * Multiple buffers can be creating by calling the {@link #createBuffer()} method.
- * <p>
- * The file is split into pages, each page containing a pointer to the previous and next pages. This allows for
- * multiple, separate streams in the same file.
- *
- * @author Jesse Long
+ * Implements a memory page handling mechanism as base for creating (multiple)
+ * {@link RandomAccess} buffers each having its set of pages (implemented by
+ * {@link ScratchFileBuffer}). A buffer is created calling {@link #createBuffer()}.
+ * 
+ * <p>Pages can be stored in main memory or in a temporary file. A mixed mode
+ * is supported storing a certain amount of pages in memory and only the
+ * additional ones in temporary file (defined by maximum main memory to
+ * be used).</p>
+ * 
+ * <p>Pages can be marked as 'free' in order to re-use them. For in-memory pages
+ * this will release the used memory while for pages in temporary file this
+ * simply marks the area as free to re-use.</p>
+ * 
+ * <p>If a temporary file was created (done with the first page to be stored
+ * in temporary file) it is deleted when {@link ScratchFile#close()} is called.</p>
+ * 
+ * <p>Using this class for {@link RandomAccess} buffers allows for a direct control
+ * on the maximum memory usage and allows processing large files for which we
+ * otherwise would get an {@link OutOfMemoryError} in case of using {@link RandomAccessBuffer}.</p>
+ * 
+ * <p>This base class for providing pages is thread safe (the buffer implementations are not).</p>
  */
 public class ScratchFile implements Closeable
 {
     private static final Log LOG = LogFactory.getLog(ScratchFile.class);
-    private File file;
-    private java.io.RandomAccessFile raf;
 
+    /** number of pages by which we enlarge the scratch file (reduce I/O-operations) */
+    private static final int ENLARGE_PAGE_COUNT = 16;
+    private static final int PAGE_SIZE = 4096;
+    
+    private final File scratchFileDirectory;
+    private volatile File file;
+    private volatile java.io.RandomAccessFile raf;
+    private volatile int pageCount = 0;
+    private final BitSet freePages = new BitSet();
+    /** number of free pages; only to be accessed under synchronization on {@link #freePages} */
+    private int freePageCount = 0;
+    private final byte[][] inMemoryPages;
+    private final int inMemoryMaxPageCount;
+
+    private final AtomicBoolean isClosed = new AtomicBoolean( false );
+    
     /**
-     * Creates a new scratch file. If a {code scratchFileDirectory} is supplied, then the scratch file is created in
-     * that directory.
+     * Initializes page handler. If a <code>scratchFileDirectory</code> is supplied,
+     * then the scratch file will be created in that directory.
+     * 
+     * <p>All pages will be stored in the scratch file.</p>
      * 
-     * @param scratchFileDirectory The directory in which to create the scratch file, or {code null} if the scratch
-     * should be created in the default temporary directory.
-     * @throws IOException If there was a problem creating a temporary file.
+     * @param scratchFileDirectory The directory in which to create the scratch file
+     *                             or <code>null</code> to created it in the default temporary directory.
+     * 
+     * @throws IOException If scratch file directory was given but don't exist.
      */
     public ScratchFile(File scratchFileDirectory) throws IOException
     {
-        file = File.createTempFile("PDFBox", ".tmp", scratchFileDirectory);
-        try
+        this(scratchFileDirectory, 0);
+    }
+    
+    /**
+     * Initializes page handler. If a <code>scratchFileDirectory</code> is supplied,
+     * then the scratch file will be created in that directory.
+     * 
+     * <p>Depending on the size of allowed memory usage a number of pages (memorySize/{@link #PAGE_SIZE})
+     * will be stored in-memory and only additional pages will be written to/read from scratch file.</p>
+     * 
+     * @param scratchFileDirectory The directory in which to create the scratch file
+     *                             or <code>null</code> to created it in the default temporary directory.
+     * @param maxInMemoryByteSize maximum in-memory bytes to use for pages which don't have to be
+     *                            handled by scratch file
+     * 
+     * @throws IOException If scratch file directory was given but don't exist.
+     */
+    public ScratchFile(File scratchFileDirectory, long maxInMemoryByteSize) throws IOException
+    {
+        this.scratchFileDirectory = scratchFileDirectory;
+
+        if ((this.scratchFileDirectory != null) && (!this.scratchFileDirectory.isDirectory()))
         {
-            raf = new java.io.RandomAccessFile(file, "rw");
+            throw new IOException("Scratch file directory does not exist: " + this.scratchFileDirectory);
         }
-        catch (IOException e)
+        
+        inMemoryMaxPageCount = (int) Math.min(Integer.MAX_VALUE, Math.max(0, maxInMemoryByteSize) / PAGE_SIZE);
+        inMemoryPages = new byte[inMemoryMaxPageCount][];
+        
+        freePages.set(0, inMemoryMaxPageCount);
+        freePageCount = inMemoryMaxPageCount;
+    }
+
+    /**
+     * Will create scratch file if it does not exist already.
+     * 
+     * @throws IOException if {@link #close()} was called or creating scratch file failed
+     */
+    private final void ensureFileExists() throws IOException {
+        
+        if ( raf != null ) {
+            return;
+        }
+        
+        synchronized (isClosed)
         {
-            if (!file.delete())
+            checkClosed();
+            
+            file = File.createTempFile("PDFBox", ".tmp", scratchFileDirectory);
+            try
             {
-                LOG.warn("Error deleting scratch file: " + file.getAbsolutePath());
+                raf = new java.io.RandomAccessFile(file, "rw");
+            }
+            catch (IOException e)
+            {
+                if (!file.delete())
+                {
+                    LOG.warn("Error deleting scratch file: " + file.getAbsolutePath());
+                }
+                throw e;
             }
-            throw e;
         }
     }
-
+    
     /**
-     * Returns the underlying {@link java.io.RandomAccessFile}.
+     * Returns a new free page, either from free page pool
+     * or by enlarging scratch file (may be created).
      * 
-     * @return The underlying {@link java.io.RandomAccessFile}.
+     * @return index of new page
      */
-    java.io.RandomAccessFile getRandomAccessFile()
+    int getNewPage() throws IOException
     {
-        return raf;
+        synchronized (freePages)
+        {
+            
+            if (freePageCount <= 0)
+            {
+                enlarge();
+            }
+            
+            int idx = freePages.nextSetBit( 0 );
+            if (idx < 0)
+            {
+                throw new IOException("Expected free page but did not found one.");
+            }
+            freePages.clear(idx);
+            freePageCount--;
+            
+            if (idx >= pageCount)
+            {
+                pageCount = idx + 1;
+            }
+            
+            return idx;
+        }
     }
 
     /**
-     * Checks if this scratch file has already been closed. If the file has been closed, an {@link IOException} is
-     * thrown.
+     * Enlarges the scratch file by a number of pages defined by
+     * {@link #ENLARGE_PAGE_COUNT}. This will create the scratch
+     * file via {@link #ensureFileExists()} if it does not exist already.
+     * 
+     * <p>Only to be called under synchronization on {@link #freePages}.</p>
+     */
+    private final void enlarge() throws IOException
+    {
+        ensureFileExists();
+        
+        // handle corner case when close is called by another thread
+        java.io.RandomAccessFile localRAF = raf;
+        
+        checkClosed();
+        
+        synchronized ( localRAF )
+        {
+            long fileLen = localRAF.length();
+            long expectedFileLen = ((long)pageCount - inMemoryMaxPageCount) * PAGE_SIZE;
+            
+            if (expectedFileLen != fileLen)
+            {
+                throw new IOException("Expected scratch file size of " + expectedFileLen + " but found " + fileLen);
+            }
+                
+            fileLen += ENLARGE_PAGE_COUNT * PAGE_SIZE;
+
+            localRAF.setLength(fileLen);
+
+            freePages.set(pageCount, pageCount + ENLARGE_PAGE_COUNT);
+            freePageCount += ENLARGE_PAGE_COUNT;
+        }
+    }
+    
+    /**
+     * Returns byte size of a page.
+     * 
+     * @return byte size of a page
+     */
+    int getPageSize()
+    {
+        return PAGE_SIZE;
+    }
+    
+    /**
+     * Reads the page with specified index.
      * 
-     * @throws IOException If the file has already been closed.
+     * @param pageIdx index of page to read
+     * 
+     * @return byte array of size {@link #PAGE_SIZE} filled with page data read from file 
+     * 
+     * @throws IOException
+     */
+    byte[] readPage(int pageIdx) throws IOException
+    {
+        checkClosed();
+        
+        if ((pageIdx < 0) || (pageIdx >= pageCount))
+        {
+            throw new IOException("Page index out of range: " + pageIdx + ". Max value: " + (pageCount - 1) );
+        }
+        
+        if (pageIdx < inMemoryMaxPageCount)
+        {
+            return inMemoryPages[pageIdx];
+        }
+        
+        // handle corner case when close is called by another thread
+        java.io.RandomAccessFile localRAF = raf;
+        
+        checkClosed();
+        
+        synchronized ( localRAF )
+        {
+            byte[] page = new byte[PAGE_SIZE];
+            localRAF.seek(((long)pageIdx - inMemoryMaxPageCount) * PAGE_SIZE);
+            localRAF.readFully(page);
+            
+            return page;
+        }
+    }
+    
+    /**
+     * Writes updated page. Page is either kept in-memory if pageIdx &lt; {@link #inMemoryMaxPageCount}
+     * or is written to scratch file.
+     * 
+     * <p>Provided page byte array must not be re-used for other pages since we
+     * store it as is in case of in-memory handling.</p>
+     * 
+     * @param pageIdx index of page to write
+     * @param page page to write (length has to be {@value #PAGE_SIZE})
+     * 
+     * @throws IOException in case page index is out of range or page has wrong length
+     *                     or writing to file failed
+     */
+    void writePage(int pageIdx, byte[] page) throws IOException
+    {
+        checkClosed();
+        
+        if ((pageIdx<0) || (pageIdx>=pageCount))
+        {
+            throw new IOException("Page index out of range: " + pageIdx + ". Max value: " + (pageCount - 1) );
+        }
+        
+        if (page.length != PAGE_SIZE)
+        {
+            throw new IOException("Wrong page size to write: " + page.length + ". Expected: " + PAGE_SIZE );
+        }
+        
+        if (pageIdx < inMemoryMaxPageCount)
+        {
+            inMemoryPages[pageIdx] = page;
+        }
+        else
+        {
+            // handle corner case when close is called by another thread
+            java.io.RandomAccessFile localRAF = raf;
+            
+            checkClosed();
+            
+            synchronized ( localRAF )
+            {
+                localRAF.seek(((long)pageIdx - inMemoryMaxPageCount) * PAGE_SIZE);
+                localRAF.write(page);
+            }
+        }
+    }
+    
+    /**
+     * Checks if this page handler has already been closed. If so,
+     * an {@link IOException} is thrown.
+     * 
+     * @throws IOException If {@link #close()} has already been called.
      */
     void checkClosed() throws IOException
     {
-        if (raf == null)
+        if (isClosed.get())
         {
             throw new IOException("Scratch file already closed");
         }
     }
 
     /**
-     * Creates a new buffer in the scratch file.
+     * Creates a new buffer using this page handler.
      * 
      * @return A new buffer.
+     * 
      * @throws IOException If an error occurred.
      */
     public RandomAccess createBuffer() throws IOException
@@ -100,29 +334,75 @@ public class ScratchFile implements Clos
     }
 
     /**
-     * Closes and deletes the temporary file. No further interaction with the scratch file or associated buffers can
-     * happen after this method is called.
+     * Allows a buffer which is cleared/closed to release its pages to be re-used.
+     * 
+     * @param pageIndexes pages indexes of pages to release
+     * @param count number of page indexes contained in provided array 
+     */
+    void markPagesAsFree(int[] pageIndexes, int off, int count) {
+        synchronized (freePages)
+        {
+            for (int aIdx = off; aIdx < count; aIdx++)
+            {
+                int pageIdx = pageIndexes[aIdx];
+                if ((pageIdx>=0) && (pageIdx<pageCount) && (!freePages.get(pageIdx)))
+                {
+                    freePages.set(pageIdx);
+                    freePageCount++;
+                    if (pageIdx < inMemoryMaxPageCount)
+                    {
+                        inMemoryPages[pageIdx] = null;
+                    }
+                }
+                    
+            }
+        }
+    }
+    
+    /**
+     * Closes and deletes the temporary file. No further interaction with
+     * the scratch file or associated buffers can happen after this method is called.
+     * It also releases in-memory pages.
      * 
      * @throws IOException If there was a problem closing or deleting the temporary file.
      */
     @Override
     public void close() throws IOException
     {
-        if (raf != null)
+        if (isClosed.compareAndSet(false, true))
         {
-            raf.close();
-            raf = null;
-        }
-
-        if (file != null)
-        {
-            if (file.delete())
+            synchronized (isClosed)
             {
-                file = null;
-            }
-            else
-            {
-                throw new IOException("Error deleting scratch file: " + file.getAbsolutePath());
+                java.io.RandomAccessFile localRAF = raf;
+                
+                if (localRAF != null)
+                {
+                    raf = null;
+                    
+                    synchronized ( localRAF )
+                    {
+                        localRAF.close();
+                    }
+                }
+                
+                if (file != null)
+                {
+                    if (file.delete())
+                    {
+                        file = null;
+                    }
+                    else
+                    {
+                        throw new IOException("Error deleting scratch file: " + file.getAbsolutePath());
+                    }
+                }
+
+                freePages.clear();
+                
+                for (int pIdx = 0; pIdx < inMemoryMaxPageCount; pIdx++)
+                {
+                    inMemoryPages[pIdx] = null;
+                }
             }
         }
     }

Modified: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/io/ScratchFileBuffer.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/io/ScratchFileBuffer.java?rev=1691342&r1=1691341&r2=1691342&view=diff
==============================================================================
--- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/io/ScratchFileBuffer.java (original)
+++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/io/ScratchFileBuffer.java Thu Jul 16 09:11:24 2015
@@ -18,139 +18,174 @@ package org.apache.pdfbox.io;
 
 import java.io.EOFException;
 import java.io.IOException;
-import java.io.RandomAccessFile;
 
 /**
- * A {@link RandomAccess} implemented as a doubly linked list over multiple pages in a {@link java.io.RandomAccessFile}.
- * <p>
- * Each page is {@link #PAGE_SIZE} bytes, with the first 8 bytes being a pointer to page index (
- * {@code pageOffset / PAGE_SIZE}) of the previous page in the buffer, and the last 8 bytes being a pointer to the page
- * index of the next page in the buffer.
- * 
- * @author Jesse Long
+ * Implementation of {@link RandomAccess} as sequence of multiple fixed size pages handled
+ * by {@link ScratchFile}.
  */
 class ScratchFileBuffer implements RandomAccess
 {
+    private final int pageSize;
     /**
-     * The size of each page.
+     * The underlying page handler.
      */
-    private static final int PAGE_SIZE = 4096;
+    private ScratchFile pageHandler;
     /**
-     * The underlying scratch file.
-     */
-    private ScratchFile scratchFile;
-    /**
-     * The random access file of the scratch file.
+     * The number of bytes of content in this buffer.
      */
-    private RandomAccessFile raFile;
+    private long size = 0;
     /**
-     * The first page in this buffer.
+     * Index of current page in {@link #pageIndexes} (the nth page within this buffer).
      */
-    private final long firstPage;
+    private int currentPagePositionInPageIndexes;
     /**
-     * The number of bytes of content in this buffer.
+     * The offset of the current page within this buffer.
      */
-    private long length = 0;
+    private long currentPageOffset;
     /**
-     * The index of the page in which the current position of this buffer is in.
+     * The current page data.
      */
-    private long currentPage;
+    private byte[] currentPage;
     /**
-     * The current position of the buffer as an offset in the current page.
+     * The current position (for next read/write) of the buffer as an offset in the current page.
      */
     private int positionInPage;
-    /**
-     * The current position in the space of the whole buffer.
+    /** 
+     * <code>true</code> if current page was changed by a write method
      */
-    private long positionInBuffer;
+    private boolean currentPageContentChanged = false;
 
+    /** contains ordered list of pages with the index the page is known by page handler ({@link ScratchFile}) */
+    private int[] pageIndexes = new int[16];
+    /** number of pages held by this buffer */
+    private int pageCount = 0;
+    
     /**
-     * Creates a new buffer in the provided {@link ScratchFile}.
+     * Creates a new buffer using pages handled by provided {@link ScratchFile}.
+     * 
+     * @param pageHandler The {@link ScratchFile} managing the pages to be used by this buffer.
      * 
-     * @param scratchFile The {@link ScratchFile} in which to create the new buffer.
-     * @throws IOException If there was an error writing to the file.
+     * @throws IOException If getting first page failed.
      */
-    ScratchFileBuffer(ScratchFile scratchFile) throws IOException
+    ScratchFileBuffer(ScratchFile pageHandler) throws IOException
     {
-        scratchFile.checkClosed();
-
-        this.scratchFile = scratchFile;
-
-        raFile = scratchFile.getRandomAccessFile();
+        pageHandler.checkClosed();
 
-        /*
-         * We must allocate a new first page for each new buffer, in case multiple buffers are created at the same time,
-         * and use the same space.
-         */
-        firstPage = createNewPage();
-
-        /*
-         * Mark the first page back pointer to -1 to indicate start of buffer.
-         */
-        raFile.seek(firstPage * PAGE_SIZE);
-        raFile.writeLong(-1L);
-
-        /*
-         * Reset variables to beginning of empty buffer.
-         */
-        clear();
+        this.pageHandler = pageHandler;
+        
+        pageSize = this.pageHandler.getPageSize();
+        
+        addPage();
     }
 
     /**
-     * Checks if this buffer, or the underlying {@link ScratchFile} have been closed, throwing {@link IOException} if
-     * so.
+     * Checks if this buffer, or the underlying {@link ScratchFile} have been closed,
+     * throwing {@link IOException} if so.
      * 
      * @throws IOException If either this buffer, or the underlying {@link ScratchFile} have been closed.
      */
     private void checkClosed() throws IOException
     {
-        if (scratchFile == null)
+        if (pageHandler == null)
         {
-            throw new IOException("Scratch file buffer already closed");
+            throw new IOException("Buffer already closed");
         }
-        scratchFile.checkClosed();
+        pageHandler.checkClosed();
     }
 
     /**
+     * Adds a new page and positions all pointers to start of new page.
+     * 
+     * @throws IOException if requesting a new page fails
+     */
+    private void addPage() throws IOException
+    {
+        if (pageCount+1 >= pageIndexes.length)
+        {
+            int newSize = pageIndexes.length*2;
+            // check overflow
+            if (newSize<pageIndexes.length)
+            {
+                if (pageIndexes.length == Integer.MAX_VALUE)
+                {
+                    throw new IOException("Maximum buffer size reached.");
+                }
+                newSize = Integer.MAX_VALUE;
+            }
+            int[] newPageIndexes = new int[newSize];
+            System.arraycopy(pageIndexes, 0, newPageIndexes, 0, pageCount);
+            pageIndexes = newPageIndexes;
+        }
+        
+        int newPageIdx = pageHandler.getNewPage();
+        
+        pageIndexes[pageCount] = newPageIdx;
+        currentPagePositionInPageIndexes = pageCount;
+        currentPageOffset = ((long)pageCount) * pageSize; 
+        pageCount++;
+        currentPage = new byte[pageSize];
+        positionInPage = 0;
+    }
+    
+    /**
      * {@inheritDoc}
      */
     @Override
     public long length() throws IOException
     {
-        checkClosed();
-        return length;
+        return size;
     }
 
     /**
-     * Allocates a new page, and links the current and the new page.
+     * Ensures the current page has at least one byte left
+     * ({@link #positionInPage} in &lt; {@link #pageSize}).
+     * 
+     * <p>If this is not the case we go to next page (writing
+     * current one if changed). If current buffer has no more
+     * pages we add a new one.</p>
+     * 
+     * @param addNewPageIfNeeded if <code>true</code> it is allowed to add a new page in case
+     *                           we are currently at end of last buffer page
      * 
-     * @throws IOException If there was an error writing to the file.
+     * @return <code>true</code> if we were successful positioning pointer before end of page;
+     *         we might return <code>false</code> if it is not allowed to add another page
+     *         and current pointer points at end of last page
+     * 
+     * @throws IOException
      */
-    private void growToNewPage() throws IOException
+    private final boolean ensureAvailableBytesInPage(boolean addNewPageIfNeeded) throws IOException
     {
-        long newPage = createNewPage();
-
-        /*
-         * We should only grow to a new page when previous pages are full. If not, links won't work.
-         */
-        if (positionInPage != PAGE_SIZE - 8)
+        if (positionInPage >= pageSize)
         {
-            throw new IOException("Corruption detected in scratch file");
+            // page full
+            if (currentPageContentChanged)
+            {
+                // write page
+                pageHandler.writePage(pageIndexes[currentPagePositionInPageIndexes], currentPage);
+                currentPageContentChanged = false;
+            }
+            // get new page
+            if (currentPagePositionInPageIndexes+1 < pageCount)
+            {
+                // we already have more pages assigned (there was a backward seek before)
+                currentPage = pageHandler.readPage(pageIndexes[++currentPagePositionInPageIndexes]);
+                currentPageOffset = ((long)currentPagePositionInPageIndexes) * pageSize;
+                positionInPage = 0;
+            }
+            else if (addNewPageIfNeeded)
+            {
+                // need new page
+                addPage();
+            }
+            else
+            {
+                // we are at last page and are not allowed to add new page
+                return false;
+            }
         }
-        seekToCurrentPositionInFile();
-        raFile.writeLong(newPage);
-        
-        long previousPage = currentPage;
-        currentPage = newPage;
-        positionInPage = 0;
-        /*
-         * write back link to previous page.
-         */
-        seekToCurrentPositionInFile();
-        raFile.writeLong(previousPage);
-        positionInPage = 8;
+        return true;
     }
-
+    
     /**
      * {@inheritDoc}
      */
@@ -158,19 +193,15 @@ class ScratchFileBuffer implements Rando
     public void write(int b) throws IOException
     {
         checkClosed();
-        seekToCurrentPositionInFile();
-        if (positionInPage == PAGE_SIZE - 8)
-        {
-            growToNewPage();
-        }
-
-        raFile.write(b);
-
-        positionInPage++;
-        positionInBuffer++;
-        if (positionInBuffer > length)
+        
+        ensureAvailableBytesInPage(true);
+        
+        currentPage[positionInPage++] = (byte) b;
+        currentPageContentChanged = true;
+        
+        if(currentPageOffset + positionInPage > size)
         {
-            length = positionInBuffer;
+            size = currentPageOffset + positionInPage;
         }
     }
 
@@ -191,29 +222,27 @@ class ScratchFileBuffer implements Rando
     {
         checkClosed();
 
-        seekToCurrentPositionInFile();
-
-        while (len > 0)
+        int remain = len;
+        int bOff   = off;
+        
+        while (remain > 0)
         {
-            if (positionInPage == PAGE_SIZE - 8)
-            {
-                growToNewPage();
-            }
-
-            int availableSpaceInCurrentPage = (PAGE_SIZE - 8) - positionInPage;
+            ensureAvailableBytesInPage(true);
 
-            int bytesToWrite = Math.min(len, availableSpaceInCurrentPage);
-
-            raFile.write(b, off, bytesToWrite);
-
-            off += bytesToWrite;
-            len -= bytesToWrite;
+            int bytesToWrite = Math.min(remain, pageSize-positionInPage);
+            
+            System.arraycopy(b, bOff, currentPage, positionInPage, bytesToWrite);
+            
             positionInPage += bytesToWrite;
-            positionInBuffer += bytesToWrite;
-            if (positionInBuffer > length)
-            {
-                length = positionInBuffer;
-            }
+            currentPageContentChanged = true;
+            
+            bOff   += bytesToWrite;
+            remain -= bytesToWrite;
+        }
+        
+        if(currentPageOffset + positionInPage > size)
+        {
+            size = currentPageOffset + positionInPage;
         }
     }
 
@@ -224,10 +253,21 @@ class ScratchFileBuffer implements Rando
     public final void clear() throws IOException
     {
         checkClosed();
-        length = 0;
-        currentPage = firstPage;
-        positionInBuffer = 0;
-        positionInPage = 8;
+        
+        // keep only the first page, discard all other pages
+        pageHandler.markPagesAsFree(pageIndexes, 1, pageCount - 1);
+        pageCount = 1;
+        
+        // change to first page if we are not already there
+        if (currentPagePositionInPageIndexes > 0)
+        {
+            currentPage = pageHandler.readPage(pageIndexes[0]);
+            currentPagePositionInPageIndexes = 0;
+            currentPageOffset = 0;
+        }
+        positionInPage = 0;
+        size = 0;
+        currentPageContentChanged = false;
     }
 
     /**
@@ -237,7 +277,7 @@ class ScratchFileBuffer implements Rando
     public long getPosition() throws IOException
     {
         checkClosed();
-        return positionInBuffer;
+        return currentPageOffset + positionInPage;
     }
 
     /**
@@ -249,57 +289,40 @@ class ScratchFileBuffer implements Rando
         checkClosed();
 
         /*
-         * Can't seek past end of file. If you want to change implementation, seek to end of file, write zero bytes for
-         * remaining seek distance.
+         * for now we won't allow to seek past end of buffer; this can be changed by adding new pages as needed
          */
-        if (seekToPosition > length)
+        if (seekToPosition > size)
         {
             throw new EOFException();
         }
-
-        if (seekToPosition < positionInBuffer)
+        
+        if (seekToPosition < 0)
         {
-            if (currentPage != firstPage && seekToPosition < (positionInBuffer / 2))
-            {
-                /*
-                 * If we are seeking backwards, and the seek to position is closer to the beginning of the buffer than
-                 * our current position, just go to the start of the buffer and seek forward from there. Recurse exactly
-                 * once.
-                 */
-                currentPage = firstPage;
-                positionInPage = 8;
-                positionInBuffer = 0;
-                seek(seekToPosition);
-            }
-            else
-            {
-                while (positionInBuffer - seekToPosition > positionInPage - 8)
-                {
-                    raFile.seek(currentPage * PAGE_SIZE);
-                    long previousPage = raFile.readLong();
-                    currentPage = previousPage;
-                    positionInBuffer -= (positionInPage - 8);
-                    positionInPage = PAGE_SIZE - 8;
-                }
-
-                positionInPage -= (positionInBuffer - seekToPosition);
-                positionInBuffer = seekToPosition;
-            }
+            throw new IOException("Negative seek offset: " + seekToPosition);
+        }
+        
+        if ((seekToPosition >= currentPageOffset) && (seekToPosition <= currentPageOffset + pageSize))
+        {
+            // within same page
+            positionInPage = (int) (seekToPosition - currentPageOffset);
         }
         else
         {
-            while (seekToPosition - positionInBuffer > (PAGE_SIZE - 8) - positionInPage)
+            // have to go to another page
+            
+            // check if current page needs to be written to file
+            if (currentPageContentChanged)
             {
-                // seek to 8 bytes from end of current page, to read next page pointer.
-                raFile.seek(((currentPage + 1) * PAGE_SIZE) - 8);
-                long nextPage = raFile.readLong();
-                positionInBuffer += (PAGE_SIZE - 8) - positionInPage;
-                currentPage = nextPage;
-                positionInPage = 8;
+                pageHandler.writePage(pageIndexes[currentPagePositionInPageIndexes], currentPage);
+                currentPageContentChanged = false;
             }
-
-            positionInPage += seekToPosition - positionInBuffer;
-            positionInBuffer = seekToPosition;
+            
+            int newPagePosition = (int) (seekToPosition / pageSize);
+            
+            currentPage = pageHandler.readPage(pageIndexes[newPagePosition]);
+            currentPagePositionInPageIndexes = newPagePosition;
+            currentPageOffset = ((long)currentPagePositionInPageIndexes) * pageSize;
+            positionInPage = (int) (seekToPosition - currentPageOffset);
         }
     }
 
@@ -309,7 +332,7 @@ class ScratchFileBuffer implements Rando
     @Override
     public boolean isClosed()
     {
-        return scratchFile == null;
+        return pageHandler == null;
     }
 
     /**
@@ -332,7 +355,7 @@ class ScratchFileBuffer implements Rando
     @Override
     public void rewind(int bytes) throws IOException
     {
-        seek(positionInBuffer - bytes);
+        seek(currentPageOffset + positionInPage - bytes);
     }
 
     /**
@@ -364,7 +387,7 @@ class ScratchFileBuffer implements Rando
     public boolean isEOF() throws IOException
     {
         checkClosed();
-        return positionInBuffer >= length;
+        return currentPageOffset + positionInPage >= size;
     }
 
     /**
@@ -374,7 +397,7 @@ class ScratchFileBuffer implements Rando
     public int available() throws IOException
     {
         checkClosed();
-        return (int) Math.min(length - positionInBuffer, Integer.MAX_VALUE);
+        return (int) Math.min(size - (currentPageOffset + positionInPage), Integer.MAX_VALUE);
     }
 
     /**
@@ -385,29 +408,18 @@ class ScratchFileBuffer implements Rando
     {
         checkClosed();
 
-        if (positionInBuffer >= length)
+        if (currentPageOffset + positionInPage >= size)
         {
             return -1;
         }
 
-        seekToCurrentPositionInFile();
-
-        if (positionInPage == PAGE_SIZE - 8)
-        {
-            currentPage = raFile.readLong();
-            positionInPage = 8;
-            seekToCurrentPositionInFile();
-        }
-
-        int retv = raFile.read();
-
-        if (retv >= 0)
+        if (! ensureAvailableBytesInPage(false))
         {
-            positionInPage++;
-            positionInBuffer++;
+            // should not happen, we checked it before
+            throw new IOException("Unexpectedly no bytes available for read in buffer.");
         }
-
-        return retv;
+        
+        return currentPage[positionInPage++] & 0xff;
     }
 
     /**
@@ -427,40 +439,32 @@ class ScratchFileBuffer implements Rando
     {
         checkClosed();
 
-        if (positionInBuffer >= length)
+        if (currentPageOffset + positionInPage >= size)
         {
             return -1;
         }
 
-        len = (int) Math.min(len, length - positionInBuffer);
-
-        seekToCurrentPositionInFile();
+        int remain = (int) Math.min(len, size - (currentPageOffset + positionInPage));
 
         int totalBytesRead = 0;
+        int bOff           = off;
 
-        while (len > 0)
+        while (remain > 0)
         {
-            if (positionInPage == PAGE_SIZE - 8)
+            if (! ensureAvailableBytesInPage(false))
             {
-                currentPage = raFile.readLong();
-                positionInPage = 8;
-                seekToCurrentPositionInFile();
+                // should not happen, we checked it before
+                throw new IOException("Unexpectedly no bytes available for read in buffer.");
             }
+            
+            int readBytes = Math.min(remain, pageSize - positionInPage);
 
-            int availableInThisPage = (PAGE_SIZE - 8) - positionInPage;
+            System.arraycopy(currentPage, positionInPage, b, bOff, readBytes);
 
-            int rdbytes = raFile.read(b, off, Math.min(len, availableInThisPage));
-
-            if (rdbytes < 0)
-            {
-                throw new IOException("EOF reached before end of scratch file stream");
-            }
-
-            positionInPage += rdbytes;
-            totalBytesRead += rdbytes;
-            positionInBuffer += rdbytes;
-            off += rdbytes;
-            len -= rdbytes;
+            positionInPage += readBytes;
+            totalBytesRead += readBytes;
+            bOff += readBytes;
+            remain -= readBytes;
         }
 
         return totalBytesRead;
@@ -472,43 +476,17 @@ class ScratchFileBuffer implements Rando
     @Override
     public void close() throws IOException
     {
-        scratchFile = null;
-        raFile = null;
-    }
-
-    /**
-     * Positions the underlying {@link java.io.RandomAccessFile} to the correct position for use by this buffer.
-     * 
-     * @throws IOException If there was a problem seeking in the {@link java.io.RandomAccessFile}.
-     */
-    private void seekToCurrentPositionInFile() throws IOException
-    {
-        long positionInFile = (currentPage * PAGE_SIZE) + positionInPage;
-        if (raFile.getFilePointer() != positionInFile)
-        {
-            raFile.seek(positionInFile);
-        }
-    }
+        if (pageHandler != null) {
 
-    /**
-     * Allocates a new page in the temporary file by growing the file, returning the page index of the new page.
-     * 
-     * @return The index of the new page.
-     * @throws IOException If there was an error growing the file.
-     */
-    private long createNewPage() throws IOException
-    {
-        long fileLen = raFile.length();
-
-        fileLen += PAGE_SIZE;
-
-        if (fileLen % PAGE_SIZE > 0)
-        {
-            fileLen += PAGE_SIZE - (fileLen % PAGE_SIZE);
+            pageHandler.markPagesAsFree(pageIndexes, 0, pageCount);
+            pageHandler = null;
+            
+            pageIndexes = null;
+            currentPage = null;
+            currentPageOffset = 0;
+            currentPagePositionInPageIndexes = -1;
+            positionInPage = 0;
+            size = 0;
         }
-
-        raFile.setLength(fileLen);
-
-        return (fileLen / PAGE_SIZE) - 1;
     }
 }