You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pdfbox.apache.org by tb...@apache.org on 2015/07/16 11:11:24 UTC
svn commit: r1691342 - in
/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/io: ScratchFile.java
ScratchFileBuffer.java
Author: tboehme
Date: Thu Jul 16 09:11:24 2015
New Revision: 1691342
URL: http://svn.apache.org/r1691342
Log:
PDFBOX-2882: replace scratch file handling with optimized memory+file paging implementation
Modified:
pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/io/ScratchFile.java
pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/io/ScratchFileBuffer.java
Modified: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/io/ScratchFile.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/io/ScratchFile.java?rev=1691342&r1=1691341&r2=1691342&view=diff
==============================================================================
--- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/io/ScratchFile.java (original)
+++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/io/ScratchFile.java Thu Jul 16 09:11:24 2015
@@ -19,79 +19,313 @@ package org.apache.pdfbox.io;
import java.io.Closeable;
import java.io.File;
import java.io.IOException;
+import java.util.BitSet;
+import java.util.concurrent.atomic.AtomicBoolean;
+
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
/**
- * A temporary file which can hold multiple buffers of temporary data. A new temporary file is created for each new
- * {@link ScratchFile} instance, and is deleted when the {@link ScratchFile} is closed.
- * <p>
- * Multiple buffers can be creating by calling the {@link #createBuffer()} method.
- * <p>
- * The file is split into pages, each page containing a pointer to the previous and next pages. This allows for
- * multiple, separate streams in the same file.
- *
- * @author Jesse Long
+ * Implements a memory page handling mechanism as base for creating (multiple)
+ * {@link RandomAccess} buffers each having its set of pages (implemented by
+ * {@link ScratchFileBuffer}). A buffer is created calling {@link #createBuffer()}.
+ *
+ * <p>Pages can be stored in main memory or in a temporary file. A mixed mode
+ * is supported storing a certain amount of pages in memory and only the
+ * additional ones in temporary file (defined by maximum main memory to
+ * be used).</p>
+ *
+ * <p>Pages can be marked as 'free' in order to re-use them. For in-memory pages
+ * this will release the used memory while for pages in temporary file this
+ * simply marks the area as free to re-use.</p>
+ *
+ * <p>If a temporary file was created (done with the first page to be stored
+ * in temporary file) it is deleted when {@link ScratchFile#close()} is called.</p>
+ *
+ * <p>Using this class for {@link RandomAccess} buffers allows for a direct control
+ * on the maximum memory usage and allows processing large files for which we
+ * otherwise would get an {@link OutOfMemoryError} in case of using {@link RandomAccessBuffer}.</p>
+ *
+ * <p>This base class for providing pages is thread safe (the buffer implementations are not).</p>
*/
public class ScratchFile implements Closeable
{
private static final Log LOG = LogFactory.getLog(ScratchFile.class);
- private File file;
- private java.io.RandomAccessFile raf;
+ /** number of pages by which we enlarge the scratch file (reduce I/O-operations) */
+ private static final int ENLARGE_PAGE_COUNT = 16;
+ private static final int PAGE_SIZE = 4096;
+
+ private final File scratchFileDirectory;
+ private volatile File file;
+ private volatile java.io.RandomAccessFile raf;
+ private volatile int pageCount = 0;
+ private final BitSet freePages = new BitSet();
+ /** number of free pages; only to be accessed under synchronization on {@link #freePages} */
+ private int freePageCount = 0;
+ private final byte[][] inMemoryPages;
+ private final int inMemoryMaxPageCount;
+
+ private final AtomicBoolean isClosed = new AtomicBoolean( false );
+
/**
- * Creates a new scratch file. If a {code scratchFileDirectory} is supplied, then the scratch file is created in
- * that directory.
+ * Initializes page handler. If a <code>scratchFileDirectory</code> is supplied,
+ * then the scratch file will be created in that directory.
+ *
+ * <p>All pages will be stored in the scratch file.</p>
*
- * @param scratchFileDirectory The directory in which to create the scratch file, or {code null} if the scratch
- * should be created in the default temporary directory.
- * @throws IOException If there was a problem creating a temporary file.
+ * @param scratchFileDirectory The directory in which to create the scratch file
+ * or <code>null</code> to created it in the default temporary directory.
+ *
+ * @throws IOException If scratch file directory was given but don't exist.
*/
public ScratchFile(File scratchFileDirectory) throws IOException
{
- file = File.createTempFile("PDFBox", ".tmp", scratchFileDirectory);
- try
+ this(scratchFileDirectory, 0);
+ }
+
+ /**
+ * Initializes page handler. If a <code>scratchFileDirectory</code> is supplied,
+ * then the scratch file will be created in that directory.
+ *
+ * <p>Depending on the size of allowed memory usage a number of pages (memorySize/{@link #PAGE_SIZE})
+ * will be stored in-memory and only additional pages will be written to/read from scratch file.</p>
+ *
+ * @param scratchFileDirectory The directory in which to create the scratch file
+ * or <code>null</code> to created it in the default temporary directory.
+ * @param maxInMemoryByteSize maximum in-memory bytes to use for pages which don't have to be
+ * handled by scratch file
+ *
+ * @throws IOException If scratch file directory was given but don't exist.
+ */
+ public ScratchFile(File scratchFileDirectory, long maxInMemoryByteSize) throws IOException
+ {
+ this.scratchFileDirectory = scratchFileDirectory;
+
+ if ((this.scratchFileDirectory != null) && (!this.scratchFileDirectory.isDirectory()))
{
- raf = new java.io.RandomAccessFile(file, "rw");
+ throw new IOException("Scratch file directory does not exist: " + this.scratchFileDirectory);
}
- catch (IOException e)
+
+ inMemoryMaxPageCount = (int) Math.min(Integer.MAX_VALUE, Math.max(0, maxInMemoryByteSize) / PAGE_SIZE);
+ inMemoryPages = new byte[inMemoryMaxPageCount][];
+
+ freePages.set(0, inMemoryMaxPageCount);
+ freePageCount = inMemoryMaxPageCount;
+ }
+
+ /**
+ * Will create scratch file if it does not exist already.
+ *
+ * @throws IOException if {@link #close()} was called or creating scratch file failed
+ */
+ private final void ensureFileExists() throws IOException {
+
+ if ( raf != null ) {
+ return;
+ }
+
+ synchronized (isClosed)
{
- if (!file.delete())
+ checkClosed();
+
+ file = File.createTempFile("PDFBox", ".tmp", scratchFileDirectory);
+ try
{
- LOG.warn("Error deleting scratch file: " + file.getAbsolutePath());
+ raf = new java.io.RandomAccessFile(file, "rw");
+ }
+ catch (IOException e)
+ {
+ if (!file.delete())
+ {
+ LOG.warn("Error deleting scratch file: " + file.getAbsolutePath());
+ }
+ throw e;
}
- throw e;
}
}
-
+
/**
- * Returns the underlying {@link java.io.RandomAccessFile}.
+ * Returns a new free page, either from free page pool
+ * or by enlarging scratch file (may be created).
*
- * @return The underlying {@link java.io.RandomAccessFile}.
+ * @return index of new page
*/
- java.io.RandomAccessFile getRandomAccessFile()
+ int getNewPage() throws IOException
{
- return raf;
+ synchronized (freePages)
+ {
+
+ if (freePageCount <= 0)
+ {
+ enlarge();
+ }
+
+ int idx = freePages.nextSetBit( 0 );
+ if (idx < 0)
+ {
+ throw new IOException("Expected free page but did not found one.");
+ }
+ freePages.clear(idx);
+ freePageCount--;
+
+ if (idx >= pageCount)
+ {
+ pageCount = idx + 1;
+ }
+
+ return idx;
+ }
}
/**
- * Checks if this scratch file has already been closed. If the file has been closed, an {@link IOException} is
- * thrown.
+ * Enlarges the scratch file by a number of pages defined by
+ * {@link #ENLARGE_PAGE_COUNT}. This will create the scratch
+ * file via {@link #ensureFileExists()} if it does not exist already.
+ *
+ * <p>Only to be called under synchronization on {@link #freePages}.</p>
+ */
+ private final void enlarge() throws IOException
+ {
+ ensureFileExists();
+
+ // handle corner case when close is called by another thread
+ java.io.RandomAccessFile localRAF = raf;
+
+ checkClosed();
+
+ synchronized ( localRAF )
+ {
+ long fileLen = localRAF.length();
+ long expectedFileLen = ((long)pageCount - inMemoryMaxPageCount) * PAGE_SIZE;
+
+ if (expectedFileLen != fileLen)
+ {
+ throw new IOException("Expected scratch file size of " + expectedFileLen + " but found " + fileLen);
+ }
+
+ fileLen += ENLARGE_PAGE_COUNT * PAGE_SIZE;
+
+ localRAF.setLength(fileLen);
+
+ freePages.set(pageCount, pageCount + ENLARGE_PAGE_COUNT);
+ freePageCount += ENLARGE_PAGE_COUNT;
+ }
+ }
+
+ /**
+ * Returns byte size of a page.
+ *
+ * @return byte size of a page
+ */
+ int getPageSize()
+ {
+ return PAGE_SIZE;
+ }
+
+ /**
+ * Reads the page with specified index.
*
- * @throws IOException If the file has already been closed.
+ * @param pageIdx index of page to read
+ *
+ * @return byte array of size {@link #PAGE_SIZE} filled with page data read from file
+ *
+ * @throws IOException
+ */
+ byte[] readPage(int pageIdx) throws IOException
+ {
+ checkClosed();
+
+ if ((pageIdx < 0) || (pageIdx >= pageCount))
+ {
+ throw new IOException("Page index out of range: " + pageIdx + ". Max value: " + (pageCount - 1) );
+ }
+
+ if (pageIdx < inMemoryMaxPageCount)
+ {
+ return inMemoryPages[pageIdx];
+ }
+
+ // handle corner case when close is called by another thread
+ java.io.RandomAccessFile localRAF = raf;
+
+ checkClosed();
+
+ synchronized ( localRAF )
+ {
+ byte[] page = new byte[PAGE_SIZE];
+ localRAF.seek(((long)pageIdx - inMemoryMaxPageCount) * PAGE_SIZE);
+ localRAF.readFully(page);
+
+ return page;
+ }
+ }
+
+ /**
+ * Writes updated page. Page is either kept in-memory if pageIdx < {@link #inMemoryMaxPageCount}
+ * or is written to scratch file.
+ *
+ * <p>Provided page byte array must not be re-used for other pages since we
+ * store it as is in case of in-memory handling.</p>
+ *
+ * @param pageIdx index of page to write
+ * @param page page to write (length has to be {@value #PAGE_SIZE})
+ *
+ * @throws IOException in case page index is out of range or page has wrong length
+ * or writing to file failed
+ */
+ void writePage(int pageIdx, byte[] page) throws IOException
+ {
+ checkClosed();
+
+ if ((pageIdx<0) || (pageIdx>=pageCount))
+ {
+ throw new IOException("Page index out of range: " + pageIdx + ". Max value: " + (pageCount - 1) );
+ }
+
+ if (page.length != PAGE_SIZE)
+ {
+ throw new IOException("Wrong page size to write: " + page.length + ". Expected: " + PAGE_SIZE );
+ }
+
+ if (pageIdx < inMemoryMaxPageCount)
+ {
+ inMemoryPages[pageIdx] = page;
+ }
+ else
+ {
+ // handle corner case when close is called by another thread
+ java.io.RandomAccessFile localRAF = raf;
+
+ checkClosed();
+
+ synchronized ( localRAF )
+ {
+ localRAF.seek(((long)pageIdx - inMemoryMaxPageCount) * PAGE_SIZE);
+ localRAF.write(page);
+ }
+ }
+ }
+
+ /**
+ * Checks if this page handler has already been closed. If so,
+ * an {@link IOException} is thrown.
+ *
+ * @throws IOException If {@link #close()} has already been called.
*/
void checkClosed() throws IOException
{
- if (raf == null)
+ if (isClosed.get())
{
throw new IOException("Scratch file already closed");
}
}
/**
- * Creates a new buffer in the scratch file.
+ * Creates a new buffer using this page handler.
*
* @return A new buffer.
+ *
* @throws IOException If an error occurred.
*/
public RandomAccess createBuffer() throws IOException
@@ -100,29 +334,75 @@ public class ScratchFile implements Clos
}
/**
- * Closes and deletes the temporary file. No further interaction with the scratch file or associated buffers can
- * happen after this method is called.
+ * Allows a buffer which is cleared/closed to release its pages to be re-used.
+ *
+ * @param pageIndexes pages indexes of pages to release
+ * @param count number of page indexes contained in provided array
+ */
+ void markPagesAsFree(int[] pageIndexes, int off, int count) {
+ synchronized (freePages)
+ {
+ for (int aIdx = off; aIdx < count; aIdx++)
+ {
+ int pageIdx = pageIndexes[aIdx];
+ if ((pageIdx>=0) && (pageIdx<pageCount) && (!freePages.get(pageIdx)))
+ {
+ freePages.set(pageIdx);
+ freePageCount++;
+ if (pageIdx < inMemoryMaxPageCount)
+ {
+ inMemoryPages[pageIdx] = null;
+ }
+ }
+
+ }
+ }
+ }
+
+ /**
+ * Closes and deletes the temporary file. No further interaction with
+ * the scratch file or associated buffers can happen after this method is called.
+ * It also releases in-memory pages.
*
* @throws IOException If there was a problem closing or deleting the temporary file.
*/
@Override
public void close() throws IOException
{
- if (raf != null)
+ if (isClosed.compareAndSet(false, true))
{
- raf.close();
- raf = null;
- }
-
- if (file != null)
- {
- if (file.delete())
+ synchronized (isClosed)
{
- file = null;
- }
- else
- {
- throw new IOException("Error deleting scratch file: " + file.getAbsolutePath());
+ java.io.RandomAccessFile localRAF = raf;
+
+ if (localRAF != null)
+ {
+ raf = null;
+
+ synchronized ( localRAF )
+ {
+ localRAF.close();
+ }
+ }
+
+ if (file != null)
+ {
+ if (file.delete())
+ {
+ file = null;
+ }
+ else
+ {
+ throw new IOException("Error deleting scratch file: " + file.getAbsolutePath());
+ }
+ }
+
+ freePages.clear();
+
+ for (int pIdx = 0; pIdx < inMemoryMaxPageCount; pIdx++)
+ {
+ inMemoryPages[pIdx] = null;
+ }
}
}
}
Modified: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/io/ScratchFileBuffer.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/io/ScratchFileBuffer.java?rev=1691342&r1=1691341&r2=1691342&view=diff
==============================================================================
--- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/io/ScratchFileBuffer.java (original)
+++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/io/ScratchFileBuffer.java Thu Jul 16 09:11:24 2015
@@ -18,139 +18,174 @@ package org.apache.pdfbox.io;
import java.io.EOFException;
import java.io.IOException;
-import java.io.RandomAccessFile;
/**
- * A {@link RandomAccess} implemented as a doubly linked list over multiple pages in a {@link java.io.RandomAccessFile}.
- * <p>
- * Each page is {@link #PAGE_SIZE} bytes, with the first 8 bytes being a pointer to page index (
- * {@code pageOffset / PAGE_SIZE}) of the previous page in the buffer, and the last 8 bytes being a pointer to the page
- * index of the next page in the buffer.
- *
- * @author Jesse Long
+ * Implementation of {@link RandomAccess} as sequence of multiple fixed size pages handled
+ * by {@link ScratchFile}.
*/
class ScratchFileBuffer implements RandomAccess
{
+ private final int pageSize;
/**
- * The size of each page.
+ * The underlying page handler.
*/
- private static final int PAGE_SIZE = 4096;
+ private ScratchFile pageHandler;
/**
- * The underlying scratch file.
- */
- private ScratchFile scratchFile;
- /**
- * The random access file of the scratch file.
+ * The number of bytes of content in this buffer.
*/
- private RandomAccessFile raFile;
+ private long size = 0;
/**
- * The first page in this buffer.
+ * Index of current page in {@link #pageIndexes} (the nth page within this buffer).
*/
- private final long firstPage;
+ private int currentPagePositionInPageIndexes;
/**
- * The number of bytes of content in this buffer.
+ * The offset of the current page within this buffer.
*/
- private long length = 0;
+ private long currentPageOffset;
/**
- * The index of the page in which the current position of this buffer is in.
+ * The current page data.
*/
- private long currentPage;
+ private byte[] currentPage;
/**
- * The current position of the buffer as an offset in the current page.
+ * The current position (for next read/write) of the buffer as an offset in the current page.
*/
private int positionInPage;
- /**
- * The current position in the space of the whole buffer.
+ /**
+ * <code>true</code> if current page was changed by a write method
*/
- private long positionInBuffer;
+ private boolean currentPageContentChanged = false;
+ /** contains ordered list of pages with the index the page is known by page handler ({@link ScratchFile}) */
+ private int[] pageIndexes = new int[16];
+ /** number of pages held by this buffer */
+ private int pageCount = 0;
+
/**
- * Creates a new buffer in the provided {@link ScratchFile}.
+ * Creates a new buffer using pages handled by provided {@link ScratchFile}.
+ *
+ * @param pageHandler The {@link ScratchFile} managing the pages to be used by this buffer.
*
- * @param scratchFile The {@link ScratchFile} in which to create the new buffer.
- * @throws IOException If there was an error writing to the file.
+ * @throws IOException If getting first page failed.
*/
- ScratchFileBuffer(ScratchFile scratchFile) throws IOException
+ ScratchFileBuffer(ScratchFile pageHandler) throws IOException
{
- scratchFile.checkClosed();
-
- this.scratchFile = scratchFile;
-
- raFile = scratchFile.getRandomAccessFile();
+ pageHandler.checkClosed();
- /*
- * We must allocate a new first page for each new buffer, in case multiple buffers are created at the same time,
- * and use the same space.
- */
- firstPage = createNewPage();
-
- /*
- * Mark the first page back pointer to -1 to indicate start of buffer.
- */
- raFile.seek(firstPage * PAGE_SIZE);
- raFile.writeLong(-1L);
-
- /*
- * Reset variables to beginning of empty buffer.
- */
- clear();
+ this.pageHandler = pageHandler;
+
+ pageSize = this.pageHandler.getPageSize();
+
+ addPage();
}
/**
- * Checks if this buffer, or the underlying {@link ScratchFile} have been closed, throwing {@link IOException} if
- * so.
+ * Checks if this buffer, or the underlying {@link ScratchFile} have been closed,
+ * throwing {@link IOException} if so.
*
* @throws IOException If either this buffer, or the underlying {@link ScratchFile} have been closed.
*/
private void checkClosed() throws IOException
{
- if (scratchFile == null)
+ if (pageHandler == null)
{
- throw new IOException("Scratch file buffer already closed");
+ throw new IOException("Buffer already closed");
}
- scratchFile.checkClosed();
+ pageHandler.checkClosed();
}
/**
+ * Adds a new page and positions all pointers to start of new page.
+ *
+ * @throws IOException if requesting a new page fails
+ */
+ private void addPage() throws IOException
+ {
+ if (pageCount+1 >= pageIndexes.length)
+ {
+ int newSize = pageIndexes.length*2;
+ // check overflow
+ if (newSize<pageIndexes.length)
+ {
+ if (pageIndexes.length == Integer.MAX_VALUE)
+ {
+ throw new IOException("Maximum buffer size reached.");
+ }
+ newSize = Integer.MAX_VALUE;
+ }
+ int[] newPageIndexes = new int[newSize];
+ System.arraycopy(pageIndexes, 0, newPageIndexes, 0, pageCount);
+ pageIndexes = newPageIndexes;
+ }
+
+ int newPageIdx = pageHandler.getNewPage();
+
+ pageIndexes[pageCount] = newPageIdx;
+ currentPagePositionInPageIndexes = pageCount;
+ currentPageOffset = ((long)pageCount) * pageSize;
+ pageCount++;
+ currentPage = new byte[pageSize];
+ positionInPage = 0;
+ }
+
+ /**
* {@inheritDoc}
*/
@Override
public long length() throws IOException
{
- checkClosed();
- return length;
+ return size;
}
/**
- * Allocates a new page, and links the current and the new page.
+ * Ensures the current page has at least one byte left
+ * ({@link #positionInPage} in < {@link #pageSize}).
+ *
+ * <p>If this is not the case we go to next page (writing
+ * current one if changed). If current buffer has no more
+ * pages we add a new one.</p>
+ *
+ * @param addNewPageIfNeeded if <code>true</code> it is allowed to add a new page in case
+ * we are currently at end of last buffer page
*
- * @throws IOException If there was an error writing to the file.
+ * @return <code>true</code> if we were successful positioning pointer before end of page;
+ * we might return <code>false</code> if it is not allowed to add another page
+ * and current pointer points at end of last page
+ *
+ * @throws IOException
*/
- private void growToNewPage() throws IOException
+ private final boolean ensureAvailableBytesInPage(boolean addNewPageIfNeeded) throws IOException
{
- long newPage = createNewPage();
-
- /*
- * We should only grow to a new page when previous pages are full. If not, links won't work.
- */
- if (positionInPage != PAGE_SIZE - 8)
+ if (positionInPage >= pageSize)
{
- throw new IOException("Corruption detected in scratch file");
+ // page full
+ if (currentPageContentChanged)
+ {
+ // write page
+ pageHandler.writePage(pageIndexes[currentPagePositionInPageIndexes], currentPage);
+ currentPageContentChanged = false;
+ }
+ // get new page
+ if (currentPagePositionInPageIndexes+1 < pageCount)
+ {
+ // we already have more pages assigned (there was a backward seek before)
+ currentPage = pageHandler.readPage(pageIndexes[++currentPagePositionInPageIndexes]);
+ currentPageOffset = ((long)currentPagePositionInPageIndexes) * pageSize;
+ positionInPage = 0;
+ }
+ else if (addNewPageIfNeeded)
+ {
+ // need new page
+ addPage();
+ }
+ else
+ {
+ // we are at last page and are not allowed to add new page
+ return false;
+ }
}
- seekToCurrentPositionInFile();
- raFile.writeLong(newPage);
-
- long previousPage = currentPage;
- currentPage = newPage;
- positionInPage = 0;
- /*
- * write back link to previous page.
- */
- seekToCurrentPositionInFile();
- raFile.writeLong(previousPage);
- positionInPage = 8;
+ return true;
}
-
+
/**
* {@inheritDoc}
*/
@@ -158,19 +193,15 @@ class ScratchFileBuffer implements Rando
public void write(int b) throws IOException
{
checkClosed();
- seekToCurrentPositionInFile();
- if (positionInPage == PAGE_SIZE - 8)
- {
- growToNewPage();
- }
-
- raFile.write(b);
-
- positionInPage++;
- positionInBuffer++;
- if (positionInBuffer > length)
+
+ ensureAvailableBytesInPage(true);
+
+ currentPage[positionInPage++] = (byte) b;
+ currentPageContentChanged = true;
+
+ if(currentPageOffset + positionInPage > size)
{
- length = positionInBuffer;
+ size = currentPageOffset + positionInPage;
}
}
@@ -191,29 +222,27 @@ class ScratchFileBuffer implements Rando
{
checkClosed();
- seekToCurrentPositionInFile();
-
- while (len > 0)
+ int remain = len;
+ int bOff = off;
+
+ while (remain > 0)
{
- if (positionInPage == PAGE_SIZE - 8)
- {
- growToNewPage();
- }
-
- int availableSpaceInCurrentPage = (PAGE_SIZE - 8) - positionInPage;
+ ensureAvailableBytesInPage(true);
- int bytesToWrite = Math.min(len, availableSpaceInCurrentPage);
-
- raFile.write(b, off, bytesToWrite);
-
- off += bytesToWrite;
- len -= bytesToWrite;
+ int bytesToWrite = Math.min(remain, pageSize-positionInPage);
+
+ System.arraycopy(b, bOff, currentPage, positionInPage, bytesToWrite);
+
positionInPage += bytesToWrite;
- positionInBuffer += bytesToWrite;
- if (positionInBuffer > length)
- {
- length = positionInBuffer;
- }
+ currentPageContentChanged = true;
+
+ bOff += bytesToWrite;
+ remain -= bytesToWrite;
+ }
+
+ if(currentPageOffset + positionInPage > size)
+ {
+ size = currentPageOffset + positionInPage;
}
}
@@ -224,10 +253,21 @@ class ScratchFileBuffer implements Rando
public final void clear() throws IOException
{
checkClosed();
- length = 0;
- currentPage = firstPage;
- positionInBuffer = 0;
- positionInPage = 8;
+
+ // keep only the first page, discard all other pages
+ pageHandler.markPagesAsFree(pageIndexes, 1, pageCount - 1);
+ pageCount = 1;
+
+ // change to first page if we are not already there
+ if (currentPagePositionInPageIndexes > 0)
+ {
+ currentPage = pageHandler.readPage(pageIndexes[0]);
+ currentPagePositionInPageIndexes = 0;
+ currentPageOffset = 0;
+ }
+ positionInPage = 0;
+ size = 0;
+ currentPageContentChanged = false;
}
/**
@@ -237,7 +277,7 @@ class ScratchFileBuffer implements Rando
public long getPosition() throws IOException
{
checkClosed();
- return positionInBuffer;
+ return currentPageOffset + positionInPage;
}
/**
@@ -249,57 +289,40 @@ class ScratchFileBuffer implements Rando
checkClosed();
/*
- * Can't seek past end of file. If you want to change implementation, seek to end of file, write zero bytes for
- * remaining seek distance.
+ * for now we won't allow to seek past end of buffer; this can be changed by adding new pages as needed
*/
- if (seekToPosition > length)
+ if (seekToPosition > size)
{
throw new EOFException();
}
-
- if (seekToPosition < positionInBuffer)
+
+ if (seekToPosition < 0)
{
- if (currentPage != firstPage && seekToPosition < (positionInBuffer / 2))
- {
- /*
- * If we are seeking backwards, and the seek to position is closer to the beginning of the buffer than
- * our current position, just go to the start of the buffer and seek forward from there. Recurse exactly
- * once.
- */
- currentPage = firstPage;
- positionInPage = 8;
- positionInBuffer = 0;
- seek(seekToPosition);
- }
- else
- {
- while (positionInBuffer - seekToPosition > positionInPage - 8)
- {
- raFile.seek(currentPage * PAGE_SIZE);
- long previousPage = raFile.readLong();
- currentPage = previousPage;
- positionInBuffer -= (positionInPage - 8);
- positionInPage = PAGE_SIZE - 8;
- }
-
- positionInPage -= (positionInBuffer - seekToPosition);
- positionInBuffer = seekToPosition;
- }
+ throw new IOException("Negative seek offset: " + seekToPosition);
+ }
+
+ if ((seekToPosition >= currentPageOffset) && (seekToPosition <= currentPageOffset + pageSize))
+ {
+ // within same page
+ positionInPage = (int) (seekToPosition - currentPageOffset);
}
else
{
- while (seekToPosition - positionInBuffer > (PAGE_SIZE - 8) - positionInPage)
+ // have to go to another page
+
+ // check if current page needs to be written to file
+ if (currentPageContentChanged)
{
- // seek to 8 bytes from end of current page, to read next page pointer.
- raFile.seek(((currentPage + 1) * PAGE_SIZE) - 8);
- long nextPage = raFile.readLong();
- positionInBuffer += (PAGE_SIZE - 8) - positionInPage;
- currentPage = nextPage;
- positionInPage = 8;
+ pageHandler.writePage(pageIndexes[currentPagePositionInPageIndexes], currentPage);
+ currentPageContentChanged = false;
}
-
- positionInPage += seekToPosition - positionInBuffer;
- positionInBuffer = seekToPosition;
+
+ int newPagePosition = (int) (seekToPosition / pageSize);
+
+ currentPage = pageHandler.readPage(pageIndexes[newPagePosition]);
+ currentPagePositionInPageIndexes = newPagePosition;
+ currentPageOffset = ((long)currentPagePositionInPageIndexes) * pageSize;
+ positionInPage = (int) (seekToPosition - currentPageOffset);
}
}
@@ -309,7 +332,7 @@ class ScratchFileBuffer implements Rando
@Override
public boolean isClosed()
{
- return scratchFile == null;
+ return pageHandler == null;
}
/**
@@ -332,7 +355,7 @@ class ScratchFileBuffer implements Rando
@Override
public void rewind(int bytes) throws IOException
{
- seek(positionInBuffer - bytes);
+ seek(currentPageOffset + positionInPage - bytes);
}
/**
@@ -364,7 +387,7 @@ class ScratchFileBuffer implements Rando
public boolean isEOF() throws IOException
{
checkClosed();
- return positionInBuffer >= length;
+ return currentPageOffset + positionInPage >= size;
}
/**
@@ -374,7 +397,7 @@ class ScratchFileBuffer implements Rando
public int available() throws IOException
{
checkClosed();
- return (int) Math.min(length - positionInBuffer, Integer.MAX_VALUE);
+ return (int) Math.min(size - (currentPageOffset + positionInPage), Integer.MAX_VALUE);
}
/**
@@ -385,29 +408,18 @@ class ScratchFileBuffer implements Rando
{
checkClosed();
- if (positionInBuffer >= length)
+ if (currentPageOffset + positionInPage >= size)
{
return -1;
}
- seekToCurrentPositionInFile();
-
- if (positionInPage == PAGE_SIZE - 8)
- {
- currentPage = raFile.readLong();
- positionInPage = 8;
- seekToCurrentPositionInFile();
- }
-
- int retv = raFile.read();
-
- if (retv >= 0)
+ if (! ensureAvailableBytesInPage(false))
{
- positionInPage++;
- positionInBuffer++;
+ // should not happen, we checked it before
+ throw new IOException("Unexpectedly no bytes available for read in buffer.");
}
-
- return retv;
+
+ return currentPage[positionInPage++] & 0xff;
}
/**
@@ -427,40 +439,32 @@ class ScratchFileBuffer implements Rando
{
checkClosed();
- if (positionInBuffer >= length)
+ if (currentPageOffset + positionInPage >= size)
{
return -1;
}
- len = (int) Math.min(len, length - positionInBuffer);
-
- seekToCurrentPositionInFile();
+ int remain = (int) Math.min(len, size - (currentPageOffset + positionInPage));
int totalBytesRead = 0;
+ int bOff = off;
- while (len > 0)
+ while (remain > 0)
{
- if (positionInPage == PAGE_SIZE - 8)
+ if (! ensureAvailableBytesInPage(false))
{
- currentPage = raFile.readLong();
- positionInPage = 8;
- seekToCurrentPositionInFile();
+ // should not happen, we checked it before
+ throw new IOException("Unexpectedly no bytes available for read in buffer.");
}
+
+ int readBytes = Math.min(remain, pageSize - positionInPage);
- int availableInThisPage = (PAGE_SIZE - 8) - positionInPage;
+ System.arraycopy(currentPage, positionInPage, b, bOff, readBytes);
- int rdbytes = raFile.read(b, off, Math.min(len, availableInThisPage));
-
- if (rdbytes < 0)
- {
- throw new IOException("EOF reached before end of scratch file stream");
- }
-
- positionInPage += rdbytes;
- totalBytesRead += rdbytes;
- positionInBuffer += rdbytes;
- off += rdbytes;
- len -= rdbytes;
+ positionInPage += readBytes;
+ totalBytesRead += readBytes;
+ bOff += readBytes;
+ remain -= readBytes;
}
return totalBytesRead;
@@ -472,43 +476,17 @@ class ScratchFileBuffer implements Rando
@Override
public void close() throws IOException
{
- scratchFile = null;
- raFile = null;
- }
-
- /**
- * Positions the underlying {@link java.io.RandomAccessFile} to the correct position for use by this buffer.
- *
- * @throws IOException If there was a problem seeking in the {@link java.io.RandomAccessFile}.
- */
- private void seekToCurrentPositionInFile() throws IOException
- {
- long positionInFile = (currentPage * PAGE_SIZE) + positionInPage;
- if (raFile.getFilePointer() != positionInFile)
- {
- raFile.seek(positionInFile);
- }
- }
+ if (pageHandler != null) {
- /**
- * Allocates a new page in the temporary file by growing the file, returning the page index of the new page.
- *
- * @return The index of the new page.
- * @throws IOException If there was an error growing the file.
- */
- private long createNewPage() throws IOException
- {
- long fileLen = raFile.length();
-
- fileLen += PAGE_SIZE;
-
- if (fileLen % PAGE_SIZE > 0)
- {
- fileLen += PAGE_SIZE - (fileLen % PAGE_SIZE);
+ pageHandler.markPagesAsFree(pageIndexes, 0, pageCount);
+ pageHandler = null;
+
+ pageIndexes = null;
+ currentPage = null;
+ currentPageOffset = 0;
+ currentPagePositionInPageIndexes = -1;
+ positionInPage = 0;
+ size = 0;
}
-
- raFile.setLength(fileLen);
-
- return (fileLen / PAGE_SIZE) - 1;
}
}