You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pdfbox.apache.org by tb...@apache.org on 2012/07/29 23:58:11 UTC
svn commit: r1366964 - in
/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/io:
RandomAccessBuffer.java RandomAccessBufferedFileInputStream.java
RandomAccessFile.java RandomAccessRead.java
Author: tboehme
Date: Sun Jul 29 21:58:11 2012
New Revision: 1366964
URL: http://svn.apache.org/viewvc?rev=1366964&view=rev
Log:
PDFBOX-1369: add getPosition() method to RandomAccessRead interface
Modified:
pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/io/RandomAccessBuffer.java
pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/io/RandomAccessBufferedFileInputStream.java
pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/io/RandomAccessFile.java
pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/io/RandomAccessRead.java
Modified: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/io/RandomAccessBuffer.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/io/RandomAccessBuffer.java?rev=1366964&r1=1366963&r2=1366964&view=diff
==============================================================================
--- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/io/RandomAccessBuffer.java (original)
+++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/io/RandomAccessBuffer.java Sun Jul 29 21:58:11 2012
@@ -87,6 +87,13 @@ public class RandomAccessBuffer implemen
/**
* {@inheritDoc}
*/
+ public long getPosition() throws IOException {
+ return pointer;
+ }
+
+ /**
+ * {@inheritDoc}
+ */
public int read() throws IOException
{
if (pointer >= this.size)
Modified: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/io/RandomAccessBufferedFileInputStream.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/io/RandomAccessBufferedFileInputStream.java?rev=1366964&r1=1366963&r2=1366964&view=diff
==============================================================================
--- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/io/RandomAccessBufferedFileInputStream.java (original)
+++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/io/RandomAccessBufferedFileInputStream.java Sun Jul 29 21:58:11 2012
@@ -22,205 +22,233 @@ import java.io.IOException;
import java.io.InputStream;
import java.io.RandomAccessFile;
import java.util.LinkedHashMap;
+import java.util.Map;
/**
* Provides {@link InputStream} access to portions of a file combined with
- * buffered reading of content. Start of next bytes to read can be set via seek method.
+ * buffered reading of content. Start of next bytes to read can be set via seek
+ * method.
*
- * File is accessed via {@link RandomAccessFile} and is read in byte chunks which are
- * cached.
+ * File is accessed via {@link RandomAccessFile} and is read in byte chunks
+ * which are cached.
*
* @author Timo Boehme (timo.boehme at ontochem com)
*/
-public class RandomAccessBufferedFileInputStream extends InputStream implements RandomAccessRead
+public class RandomAccessBufferedFileInputStream
+extends InputStream implements RandomAccessRead
{
- private int pageSizeShift = 12;
- private int pageSize = 1 << pageSizeShift;
- private long pageOffsetMask = -1L << pageSizeShift;
- private int maxCachedPages = 1000;
-
- private byte[] lastRemovedCachePage = null;
-
- /** Create a LRU page cache. */
- private final LinkedHashMap<Long,byte[]> pageCache = new LinkedHashMap<Long, byte[]>( maxCachedPages, 0.75f, true )
- {
- private static final long serialVersionUID = -6302488539257741101L;
-
- @Override
- protected boolean removeEldestEntry( java.util.Map.Entry<Long, byte[]> _eldest )
- {
- final boolean doRemove = size() > maxCachedPages;
- if ( doRemove )
- lastRemovedCachePage = _eldest.getValue();
- return doRemove;
- }
- };
-
- private long curPageOffset = -1;
- private byte[] curPage = new byte[ pageSize ];
- private int offsetWithinPage = 0;
-
- private final RandomAccessFile raFile;
- private final long fileLength;
- private long fileOffset = 0;
-
- // ------------------------------------------------------------------------
- /** Create input stream instance for given file. */
- public RandomAccessBufferedFileInputStream( File _file )
- throws FileNotFoundException, IOException
- {
- raFile = new RandomAccessFile( _file, "r" );
- fileLength = _file.length();
-
- seek( 0 );
- }
-
- // ------------------------------------------------------------------------
- /** Returns offset in file at which next byte would be read. */
- public long getFilePointer()
- {
- return fileOffset;
- }
-
- // ------------------------------------------------------------------------
- /** Seeks to new position. If new position is outside of current page
- * the new page is either taken from cache or read from file and added to cache. */
- public void seek( final long newOffset ) throws IOException
- {
- final long newPageOffset = newOffset & pageOffsetMask;
- if ( newPageOffset != curPageOffset )
- {
- byte[] newPage = pageCache.get( newPageOffset );
- if ( newPage == null )
- {
- raFile.seek( newPageOffset );
- newPage = readPage();
- pageCache.put( newPageOffset, newPage );
- }
- curPageOffset = newPageOffset;
- curPage = newPage;
- }
-
- offsetWithinPage = (int) (newOffset - curPageOffset);
- fileOffset = newOffset;
- }
-
- // ------------------------------------------------------------------------
- /** Reads a page with data from current file position. If we have a previously
- * removed page from cache the buffer of this page is reused. Otherwise a new
- * byte buffer is created. */
- private final byte[] readPage() throws IOException
- {
- byte[] page;
-
- if ( lastRemovedCachePage != null )
- {
- page = lastRemovedCachePage;
- lastRemovedCachePage = null;
- } else
- page = new byte[ pageSize ];
-
- int readBytes = 0;
- while ( readBytes < pageSize )
- {
- int curBytesRead = raFile.read( page, readBytes, pageSize - readBytes );
- if ( curBytesRead < 0 )
- // EOF
- break;
- readBytes += curBytesRead;
- }
-
- return page;
- }
-
- // ------------------------------------------------------------------------
- @Override
- public int read() throws IOException
- {
- if ( fileOffset >= fileLength )
- {
- return -1;
- }
-
- if ( offsetWithinPage == pageSize )
- {
- seek( fileOffset );
- }
-
- fileOffset++;
- return curPage[ offsetWithinPage++ ] & 0xff;
- }
-
- // ------------------------------------------------------------------------
- @Override
- public int read( byte[] b, int off, int len ) throws IOException
- {
- if ( fileOffset >= fileLength )
- {
- return -1;
- }
-
- if ( offsetWithinPage == pageSize )
- {
- seek( fileOffset );
- }
-
- int commonLen = Math.min( pageSize - offsetWithinPage, len );
- if ( ( fileLength - fileOffset ) < pageSize )
- commonLen = Math.min( commonLen, (int) ( fileLength - fileOffset ) );
-
- System.arraycopy( curPage, offsetWithinPage, b, off, commonLen );
-
- offsetWithinPage += commonLen;
- fileOffset += commonLen;
-
- return commonLen;
- }
-
- // ------------------------------------------------------------------------
- @Override
- public int available() throws IOException
- {
- return (int) Math.min( fileLength - fileOffset, Integer.MAX_VALUE );
- }
-
- // ------------------------------------------------------------------------
- @Override
- public long skip( long n ) throws IOException
- {
- // test if we have to reduce skip count because of EOF
- long toSkip = n;
-
- if ( fileLength - fileOffset < toSkip )
- toSkip = fileLength - fileOffset;
-
- if ( ( toSkip < pageSize ) && ( ( offsetWithinPage + toSkip ) <= pageSize ) )
- {
- // we can skip within current page
- offsetWithinPage += toSkip;
- fileOffset += toSkip;
- }
- else
- {
- // seek to the page we will get after skipping
- seek( fileOffset + toSkip );
- }
-
- return toSkip;
- }
-
- // ------------------------------------------------------------------------
- public long length() throws IOException
- {
- return fileLength;
- }
-
- // ------------------------------------------------------------------------
- @Override
- public void close() throws IOException
- {
- raFile.close();
- pageCache.clear();
- }
+ private int pageSizeShift = 12;
+ private int pageSize = 1 << pageSizeShift;
+ private long pageOffsetMask = -1L << pageSizeShift;
+ private int maxCachedPages = 1000;
+
+ private byte[] lastRemovedCachePage = null;
+
+ /** Create a LRU page cache. */
+ private final LinkedHashMap<Long, byte[]> pageCache =
+ new LinkedHashMap<Long, byte[]>( maxCachedPages, 0.75f, true )
+ {
+ private static final long serialVersionUID = -6302488539257741101L;
+
+ @Override
+ protected boolean removeEldestEntry( Map.Entry<Long, byte[]> _eldest )
+ {
+ final boolean doRemove = size() > maxCachedPages;
+ if (doRemove)
+ {
+ lastRemovedCachePage = _eldest.getValue();
+ }
+ return doRemove;
+ }
+ };
+
+ private long curPageOffset = -1;
+ private byte[] curPage = new byte[pageSize];
+ private int offsetWithinPage = 0;
+
+ private final RandomAccessFile raFile;
+ private final long fileLength;
+ private long fileOffset = 0;
+
+ // ------------------------------------------------------------------------
+ /** Create input stream instance for given file. */
+ public RandomAccessBufferedFileInputStream( File _file )
+ throws FileNotFoundException, IOException
+ {
+ raFile = new RandomAccessFile(_file, "r");
+ fileLength = _file.length();
+
+ seek(0);
+ }
+
+ // ------------------------------------------------------------------------
+ /**
+ * Returns offset in file at which next byte would be read.
+ *
+ * @deprecated use {@link #getPosition()} instead
+ */
+ public long getFilePointer()
+ {
+ return fileOffset;
+ }
+
+ // ------------------------------------------------------------------------
+ /** Returns offset in file at which next byte would be read. */
+ public long getPosition()
+ {
+ return fileOffset;
+ }
+
+ // ------------------------------------------------------------------------
+ /**
+ * Seeks to new position. If new position is outside of current page the new
+ * page is either taken from cache or read from file and added to cache.
+ */
+ public void seek( final long newOffset ) throws IOException
+ {
+ final long newPageOffset = newOffset & pageOffsetMask;
+ if ( newPageOffset != curPageOffset )
+ {
+ byte[] newPage = pageCache.get( newPageOffset );
+ if ( newPage == null )
+ {
+ raFile.seek( newPageOffset );
+ newPage = readPage();
+ pageCache.put( newPageOffset, newPage );
+ }
+ curPageOffset = newPageOffset;
+ curPage = newPage;
+ }
+
+ offsetWithinPage = (int) ( newOffset - curPageOffset );
+ fileOffset = newOffset;
+ }
+
+ // ------------------------------------------------------------------------
+ /**
+ * Reads a page with data from current file position. If we have a
+ * previously removed page from cache the buffer of this page is reused.
+ * Otherwise a new byte buffer is created.
+ */
+ private final byte[] readPage() throws IOException
+ {
+ byte[] page;
+
+ if ( lastRemovedCachePage != null )
+ {
+ page = lastRemovedCachePage;
+ lastRemovedCachePage = null;
+ }
+ else
+ {
+ page = new byte[pageSize];
+ }
+
+ int readBytes = 0;
+ while ( readBytes < pageSize )
+ {
+ int curBytesRead = raFile.read( page, readBytes, pageSize - readBytes);
+ if (curBytesRead < 0)
+ {
+ // EOF
+ break;
+ }
+ readBytes += curBytesRead;
+ }
+
+ return page;
+ }
+
+ // ------------------------------------------------------------------------
+ @Override
+ public int read() throws IOException
+ {
+ if ( fileOffset >= fileLength )
+ {
+ return -1;
+ }
+
+ if ( offsetWithinPage == pageSize )
+ {
+ seek( fileOffset );
+ }
+
+ fileOffset++;
+ return curPage[offsetWithinPage++] & 0xff;
+ }
+
+ // ------------------------------------------------------------------------
+ @Override
+ public int read( byte[] b, int off, int len ) throws IOException
+ {
+ if ( fileOffset >= fileLength )
+ {
+ return -1;
+ }
+
+ if ( offsetWithinPage == pageSize )
+ {
+ seek( fileOffset );
+ }
+
+ int commonLen = Math.min( pageSize - offsetWithinPage, len );
+ if ( ( fileLength - fileOffset ) < pageSize )
+ commonLen = Math.min( commonLen, (int) ( fileLength - fileOffset ) );
+
+ System.arraycopy( curPage, offsetWithinPage, b, off, commonLen );
+
+ offsetWithinPage += commonLen;
+ fileOffset += commonLen;
+
+ return commonLen;
+ }
+
+ // ------------------------------------------------------------------------
+ @Override
+ public int available() throws IOException
+ {
+ return (int) Math.min( fileLength - fileOffset, Integer.MAX_VALUE );
+ }
+
+ // ------------------------------------------------------------------------
+ @Override
+ public long skip( long n ) throws IOException
+ {
+ // test if we have to reduce skip count because of EOF
+ long toSkip = n;
+
+ if ( fileLength - fileOffset < toSkip )
+ {
+ toSkip = fileLength - fileOffset;
+ }
+
+ if ( ( toSkip < pageSize ) && ( ( offsetWithinPage + toSkip ) <= pageSize ) )
+ {
+ // we can skip within current page
+ offsetWithinPage += toSkip;
+ fileOffset += toSkip;
+ }
+ else
+ {
+ // seek to the page we will get after skipping
+ seek( fileOffset + toSkip );
+ }
+
+ return toSkip;
+ }
+
+ // ------------------------------------------------------------------------
+ public long length() throws IOException
+ {
+ return fileLength;
+ }
+
+ // ------------------------------------------------------------------------
+ @Override
+ public void close() throws IOException
+ {
+ raFile.close();
+ pageCache.clear();
+ }
}
Modified: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/io/RandomAccessFile.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/io/RandomAccessFile.java?rev=1366964&r1=1366963&r2=1366964&view=diff
==============================================================================
--- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/io/RandomAccessFile.java (original)
+++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/io/RandomAccessFile.java Sun Jul 29 21:58:11 2012
@@ -62,6 +62,13 @@ public class RandomAccessFile implements
/**
* {@inheritDoc}
*/
+ public long getPosition() throws IOException {
+ return ras.getFilePointer();
+ }
+
+ /**
+ * {@inheritDoc}
+ */
public int read() throws IOException
{
return ras.read();
Modified: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/io/RandomAccessRead.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/io/RandomAccessRead.java?rev=1366964&r1=1366963&r2=1366964&view=diff
==============================================================================
--- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/io/RandomAccessRead.java (original)
+++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/io/RandomAccessRead.java Sun Jul 29 21:58:11 2012
@@ -25,6 +25,16 @@ public interface RandomAccessRead extend
{
/**
+ * Returns offset of next byte to be returned by a read method.
+ *
+ * @return offset of next byte which will be returned with next {@link #read()}
+ * (if no more bytes are left it returns a value >= length of source)
+ *
+ * @throws IOException
+ */
+ public long getPosition() throws IOException;
+
+ /**
* Seek to a position in the data.
*
* @param position The position to seek to.