You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pdfbox.apache.org by tb...@apache.org on 2012/07/29 23:58:11 UTC

svn commit: r1366964 - in /pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/io: RandomAccessBuffer.java RandomAccessBufferedFileInputStream.java RandomAccessFile.java RandomAccessRead.java

Author: tboehme
Date: Sun Jul 29 21:58:11 2012
New Revision: 1366964

URL: http://svn.apache.org/viewvc?rev=1366964&view=rev
Log:
PDFBOX-1369: add getPosition() method to RandomAccessRead interface

Modified:
    pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/io/RandomAccessBuffer.java
    pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/io/RandomAccessBufferedFileInputStream.java
    pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/io/RandomAccessFile.java
    pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/io/RandomAccessRead.java

Modified: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/io/RandomAccessBuffer.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/io/RandomAccessBuffer.java?rev=1366964&r1=1366963&r2=1366964&view=diff
==============================================================================
--- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/io/RandomAccessBuffer.java (original)
+++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/io/RandomAccessBuffer.java Sun Jul 29 21:58:11 2012
@@ -87,6 +87,13 @@ public class RandomAccessBuffer implemen
     /**
      * {@inheritDoc}
      */
+    public long getPosition() throws IOException {
+        return pointer;
+    }
+    
+    /**
+     * {@inheritDoc}
+     */
     public int read() throws IOException
     {
         if (pointer >= this.size)

Modified: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/io/RandomAccessBufferedFileInputStream.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/io/RandomAccessBufferedFileInputStream.java?rev=1366964&r1=1366963&r2=1366964&view=diff
==============================================================================
--- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/io/RandomAccessBufferedFileInputStream.java (original)
+++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/io/RandomAccessBufferedFileInputStream.java Sun Jul 29 21:58:11 2012
@@ -22,205 +22,233 @@ import java.io.IOException;
 import java.io.InputStream;
 import java.io.RandomAccessFile;
 import java.util.LinkedHashMap;
+import java.util.Map;
 
 /**
  * Provides {@link InputStream} access to portions of a file combined with
- * buffered reading of content. Start of next bytes to read can be set via seek method.
+ * buffered reading of content. Start of next bytes to read can be set via seek
+ * method.
  * 
- * File is accessed via {@link RandomAccessFile} and is read in byte chunks which are
- * cached.
+ * File is accessed via {@link RandomAccessFile} and is read in byte chunks
+ * which are cached.
  * 
  * @author Timo Boehme (timo.boehme at ontochem com)
  */
-public class RandomAccessBufferedFileInputStream extends InputStream implements RandomAccessRead
+public class RandomAccessBufferedFileInputStream
+extends InputStream implements RandomAccessRead
 {
 
-		private int  pageSizeShift  = 12;
-		private int  pageSize       = 1 << pageSizeShift;
-		private long pageOffsetMask = -1L << pageSizeShift;
-		private int  maxCachedPages = 1000;
-	
-		private byte[] lastRemovedCachePage = null;
-	
-		/** Create a LRU page cache. */
-		private final LinkedHashMap<Long,byte[]> pageCache = new LinkedHashMap<Long, byte[]>( maxCachedPages, 0.75f, true ) 
-		{
-				private static final long serialVersionUID = -6302488539257741101L;
-
-				@Override
-				protected boolean removeEldestEntry( java.util.Map.Entry<Long, byte[]> _eldest )
-				{
-						final boolean doRemove = size() > maxCachedPages;
-						if ( doRemove )
-							lastRemovedCachePage = _eldest.getValue();
-						return doRemove;
-				}
-		};
-	
-		private long   curPageOffset    = -1;
-		private byte[] curPage          = new byte[ pageSize ];
-		private int    offsetWithinPage = 0;
-	
-		private final RandomAccessFile raFile;
-		private final long             fileLength;
-		private long                   fileOffset = 0;
-	
-		// ------------------------------------------------------------------------
-		/** Create input stream instance for given file. */
-		public RandomAccessBufferedFileInputStream( File _file )
-		throws FileNotFoundException, IOException
-		{
-				raFile     = new RandomAccessFile( _file, "r" );
-				fileLength = _file.length();
-		
-				seek( 0 );
-		}
-	
-		// ------------------------------------------------------------------------
-		/** Returns offset in file at which next byte would be read. */
-		public long getFilePointer()
-		{
-				return fileOffset;
-		}
-	
-		// ------------------------------------------------------------------------
-		/** Seeks to new position. If new position is outside of current page
-		 *  the new page is either taken from cache or read from file and added to cache. */
-		public void seek( final long newOffset ) throws IOException
-		{
-				final long newPageOffset = newOffset & pageOffsetMask;
-				if ( newPageOffset != curPageOffset )
-				{
-						byte[] newPage = pageCache.get( newPageOffset );
-						if ( newPage == null )
-						{
-								raFile.seek( newPageOffset );
-								newPage = readPage();
-								pageCache.put( newPageOffset, newPage );
-						}
-						curPageOffset = newPageOffset;
-						curPage       = newPage;
-				}
-		
-				offsetWithinPage = (int) (newOffset - curPageOffset);
-				fileOffset       = newOffset;
-		}
-	
-		// ------------------------------------------------------------------------
-		/** Reads a page with data from current file position. If we have a previously
-		 *  removed page from cache the buffer of this page is reused. Otherwise a new
-		 *  byte buffer is created. */
-		private final byte[] readPage() throws IOException
-		{
-				byte[] page;
-		
-				if ( lastRemovedCachePage != null )
-				{
-						page = lastRemovedCachePage;
-						lastRemovedCachePage = null;
-				} else
-					page = new byte[ pageSize ];
-		
-				int readBytes = 0;
-				while ( readBytes < pageSize )
-				{
-						int curBytesRead = raFile.read( page, readBytes, pageSize - readBytes );
-						if ( curBytesRead < 0 )
-								// EOF
-								break;
-						readBytes += curBytesRead;
-				}
-		
-				return page;
-		}
-	
-	// ------------------------------------------------------------------------
-	@Override
-	public int read() throws IOException
-	{
-		if ( fileOffset >= fileLength )
-		{
-				return -1;
-		}
-		
-		if ( offsetWithinPage == pageSize )
-		{
-				seek( fileOffset );
-		}
-
-		fileOffset++;
-		return curPage[ offsetWithinPage++ ] & 0xff;
-	}
-
-	// ------------------------------------------------------------------------
-	@Override
-	public int read( byte[] b, int off, int len ) throws IOException
-	{	
-			if ( fileOffset >= fileLength )
-			{
-					return -1;
-			}
-			
-			if ( offsetWithinPage == pageSize ) 
-			{
-					seek( fileOffset );
-			}
-	
-			int commonLen = Math.min( pageSize - offsetWithinPage, len );
-			if ( ( fileLength - fileOffset ) < pageSize )
-					commonLen = Math.min( commonLen, (int) ( fileLength - fileOffset ) );
-			
-			System.arraycopy( curPage, offsetWithinPage, b, off, commonLen );
-			
-			offsetWithinPage += commonLen;
-			fileOffset       += commonLen;
-			
-			return commonLen;
-	}
-	
-	// ------------------------------------------------------------------------
-	@Override
-	public int available() throws IOException
-	{
-			return (int) Math.min( fileLength - fileOffset, Integer.MAX_VALUE );
-	}
-	
-	// ------------------------------------------------------------------------
-	@Override
-	public long skip( long n ) throws IOException
-	{	
-			// test if we have to reduce skip count because of EOF
-			long toSkip = n;
-			
-			if ( fileLength - fileOffset < toSkip )
-					toSkip = fileLength - fileOffset;
-			
-			if ( ( toSkip < pageSize ) && ( ( offsetWithinPage + toSkip ) <= pageSize ) )
-			{
-					// we can skip within current page
-					offsetWithinPage += toSkip;
-				  fileOffset       += toSkip;
-			}
-			else
-			{
-					// seek to the page we will get after skipping
-					seek( fileOffset + toSkip );
-			}
-			
-			return toSkip;
-	}
-	
-	// ------------------------------------------------------------------------
-	public long length() throws IOException 
-	{
-			return fileLength;
-	}
-	
-	// ------------------------------------------------------------------------
-	@Override
-	public void close() throws IOException
-	{
-			raFile.close();
-			pageCache.clear();
-	}
+    private int pageSizeShift = 12;
+    private int pageSize = 1 << pageSizeShift;
+    private long pageOffsetMask = -1L << pageSizeShift;
+    private int maxCachedPages = 1000;
+
+    private byte[] lastRemovedCachePage = null;
+
+    /** Create a LRU page cache. */
+    private final LinkedHashMap<Long, byte[]> pageCache =
+        new LinkedHashMap<Long, byte[]>( maxCachedPages, 0.75f, true )
+    {
+        private static final long serialVersionUID = -6302488539257741101L;
+
+        @Override
+        protected boolean removeEldestEntry( Map.Entry<Long, byte[]> _eldest )
+        {
+            final boolean doRemove = size() > maxCachedPages;
+            if (doRemove)
+            {
+                lastRemovedCachePage = _eldest.getValue();
+            }
+            return doRemove;
+        }
+    };
+
+    private long curPageOffset = -1;
+    private byte[] curPage = new byte[pageSize];
+    private int offsetWithinPage = 0;
+
+    private final RandomAccessFile raFile;
+    private final long fileLength;
+    private long fileOffset = 0;
+
+    // ------------------------------------------------------------------------
+    /** Create input stream instance for given file. */
+    public RandomAccessBufferedFileInputStream( File _file )
+    throws FileNotFoundException, IOException
+    {
+        raFile = new RandomAccessFile(_file, "r");
+        fileLength = _file.length();
+
+        seek(0);
+    }
+
+    // ------------------------------------------------------------------------
+    /**
+     *  Returns offset in file at which next byte would be read.
+     *  
+     *  @deprecated  use {@link #getPosition()} instead
+     */
+    public long getFilePointer()
+    {
+        return fileOffset;
+    }
+
+    // ------------------------------------------------------------------------
+    /** Returns offset in file at which next byte would be read. */
+    public long getPosition()
+    {
+        return fileOffset;
+    }
+
+    // ------------------------------------------------------------------------
+    /**
+     * Seeks to new position. If new position is outside of current page the new
+     * page is either taken from cache or read from file and added to cache.
+     */
+    public void seek( final long newOffset ) throws IOException
+    {
+        final long newPageOffset = newOffset & pageOffsetMask;
+        if ( newPageOffset != curPageOffset )
+        {
+            byte[] newPage = pageCache.get( newPageOffset );
+            if ( newPage == null )
+            {
+                raFile.seek( newPageOffset );
+                newPage = readPage();
+                pageCache.put( newPageOffset, newPage );
+            }
+            curPageOffset = newPageOffset;
+            curPage = newPage;
+        }
+
+        offsetWithinPage = (int) ( newOffset - curPageOffset );
+        fileOffset = newOffset;
+    }
+
+    // ------------------------------------------------------------------------
+    /**
+     * Reads a page with data from current file position. If we have a
+     * previously removed page from cache the buffer of this page is reused.
+     * Otherwise a new byte buffer is created.
+     */
+    private final byte[] readPage() throws IOException
+    {
+        byte[] page;
+
+        if ( lastRemovedCachePage != null )
+        {
+            page = lastRemovedCachePage;
+            lastRemovedCachePage = null;
+        }
+        else
+        {
+            page = new byte[pageSize];
+        }
+
+        int readBytes = 0;
+        while ( readBytes < pageSize )
+        {
+            int curBytesRead = raFile.read( page, readBytes, pageSize - readBytes);
+            if (curBytesRead < 0)
+            {
+                // EOF
+                break;
+            }
+            readBytes += curBytesRead;
+        }
+
+        return page;
+    }
+
+    // ------------------------------------------------------------------------
+    @Override
+    public int read() throws IOException
+    {
+        if ( fileOffset >= fileLength )
+        {
+            return -1;
+        }
+
+        if ( offsetWithinPage == pageSize )
+        {
+            seek( fileOffset );
+        }
+
+        fileOffset++;
+        return curPage[offsetWithinPage++] & 0xff;
+    }
+
+    // ------------------------------------------------------------------------
+    @Override
+    public int read( byte[] b, int off, int len ) throws IOException
+    {
+        if ( fileOffset >= fileLength )
+        {
+            return -1;
+        }
+
+        if ( offsetWithinPage == pageSize )
+        {
+            seek( fileOffset );
+        }
+
+        int commonLen = Math.min( pageSize - offsetWithinPage, len );
+        if ( ( fileLength - fileOffset ) < pageSize )
+            commonLen = Math.min( commonLen, (int) ( fileLength - fileOffset ) );
+
+        System.arraycopy( curPage, offsetWithinPage, b, off, commonLen );
+
+        offsetWithinPage += commonLen;
+        fileOffset += commonLen;
+
+        return commonLen;
+    }
+
+    // ------------------------------------------------------------------------
+    @Override
+    public int available() throws IOException
+    {
+        return (int) Math.min( fileLength - fileOffset, Integer.MAX_VALUE );
+    }
+
+    // ------------------------------------------------------------------------
+    @Override
+    public long skip( long n ) throws IOException
+    {
+        // test if we have to reduce skip count because of EOF
+        long toSkip = n;
+
+        if ( fileLength - fileOffset < toSkip )
+        {
+            toSkip = fileLength - fileOffset;
+        }
+
+        if ( ( toSkip < pageSize ) && ( ( offsetWithinPage + toSkip ) <= pageSize ) )
+        {
+            // we can skip within current page
+            offsetWithinPage += toSkip;
+            fileOffset += toSkip;
+        }
+        else
+        {
+            // seek to the page we will get after skipping
+            seek( fileOffset + toSkip );
+        }
+
+        return toSkip;
+    }
+
+    // ------------------------------------------------------------------------
+    public long length() throws IOException
+    {
+        return fileLength;
+    }
+
+    // ------------------------------------------------------------------------
+    @Override
+    public void close() throws IOException
+    {
+        raFile.close();
+        pageCache.clear();
+    }
 }

Modified: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/io/RandomAccessFile.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/io/RandomAccessFile.java?rev=1366964&r1=1366963&r2=1366964&view=diff
==============================================================================
--- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/io/RandomAccessFile.java (original)
+++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/io/RandomAccessFile.java Sun Jul 29 21:58:11 2012
@@ -62,6 +62,13 @@ public class RandomAccessFile implements
     /**
      * {@inheritDoc}
      */
+    public long getPosition() throws IOException {
+        return ras.getFilePointer();
+    }
+    
+    /**
+     * {@inheritDoc}
+     */
     public int read() throws IOException
     {
         return ras.read();

Modified: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/io/RandomAccessRead.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/io/RandomAccessRead.java?rev=1366964&r1=1366963&r2=1366964&view=diff
==============================================================================
--- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/io/RandomAccessRead.java (original)
+++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/io/RandomAccessRead.java Sun Jul 29 21:58:11 2012
@@ -25,6 +25,16 @@ public interface RandomAccessRead extend
 {
 
     /**
+     * Returns offset of next byte to be returned by a read method.
+     * 
+     * @return offset of next byte which will be returned with next {@link #read()}
+     *         (if no more bytes are left it returns a value >= length of source)
+     *         
+     * @throws IOException 
+     */
+    public long getPosition() throws IOException;
+    
+    /**
      * Seek to a position in the data.
      *
      * @param position The position to seek to.