You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pdfbox.apache.org by le...@apache.org on 2012/09/19 22:18:58 UTC

svn commit: r1387736 - in /pdfbox/trunk/pdfbox/src: main/java/org/apache/pdfbox/pdfparser/NonSequentialPDFParser.java main/java/org/apache/pdfbox/pdfparser/XrefTrailerResolver.java test/java/org/apache/pdfbox/pdfparser/TestNonSequentialPDFParser.java

Author: leleueri
Date: Wed Sep 19 20:18:57 2012
New Revision: 1387736

URL: http://svn.apache.org/viewvc?rev=1387736&view=rev
Log:
[https://issues.apache.org/jira/browse/PDFBOX-1387]NonSequentialParser with InputStream + adds two method on the XRefResolver in order to allow the validation of linearized PDF in Pregliht.

Added:
    pdfbox/trunk/pdfbox/src/test/java/org/apache/pdfbox/pdfparser/TestNonSequentialPDFParser.java   (with props)
Modified:
    pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/NonSequentialPDFParser.java
    pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/XrefTrailerResolver.java

Modified: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/NonSequentialPDFParser.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/NonSequentialPDFParser.java?rev=1387736&r1=1387735&r2=1387736&view=diff
==============================================================================
--- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/NonSequentialPDFParser.java (original)
+++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/NonSequentialPDFParser.java Wed Sep 19 20:18:57 2012
@@ -20,6 +20,7 @@ package org.apache.pdfbox.pdfparser;
 import java.io.ByteArrayInputStream;
 import java.io.File;
 import java.io.FileInputStream;
+import java.io.FileOutputStream;
 import java.io.IOException;
 import java.io.InputStream;
 import java.io.OutputStream;
@@ -31,10 +32,10 @@ import java.util.HashSet;
 import java.util.Iterator;
 import java.util.LinkedList;
 import java.util.List;
+import java.util.Map.Entry;
 import java.util.Queue;
 import java.util.Set;
 import java.util.TreeMap;
-import java.util.Map.Entry;
 
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
@@ -49,6 +50,7 @@ import org.apache.pdfbox.cos.COSObject;
 import org.apache.pdfbox.cos.COSStream;
 import org.apache.pdfbox.cos.COSString;
 import org.apache.pdfbox.exceptions.CryptographyException;
+import org.apache.pdfbox.io.IOUtils;
 import org.apache.pdfbox.io.PushBackInputStream;
 import org.apache.pdfbox.io.RandomAccess;
 import org.apache.pdfbox.io.RandomAccessBuffer;
@@ -78,7 +80,7 @@ import org.apache.pdfbox.persistence.uti
  */
 public class NonSequentialPDFParser extends PDFParser
 {
-
+	
     public static final String SYSPROP_PARSEMINIMAL = 
         "org.apache.pdfbox.pdfparser.nonSequentialPDFParser.parseMinimal";
     public static final String SYSPROP_EOFLOOKUPRANGE = 
@@ -86,15 +88,15 @@ public class NonSequentialPDFParser exte
         
     private static final InputStream EMPTY_INPUT_STREAM = new ByteArrayInputStream( new byte[0] );
     
-    private static final int    DEFAULT_TRAIL_BYTECOUNT = 2048;
-    private static final char[] EOF_MARKER              = new char[] { '%','%','E','O','F' };
-    private static final char[] STARTXREF_MARKER        = new char[] { 's','t','a','r','t','x','r','e','f' };
-    private static final char[] OBJ_MARKER              = new char[] { 'o','b','j' };
-    
+  	protected static final int    DEFAULT_TRAIL_BYTECOUNT = 2048;
+  	protected static final char[] EOF_MARKER              = new char[] { '%','%','E','O','F' };
+  	protected static final char[] STARTXREF_MARKER        = new char[] { 's','t','a','r','t','x','r','e','f' };
+  	protected static final char[] OBJ_MARKER              = new char[] { 'o','b','j' };
+
     private final File pdfFile;
     private final RandomAccessBufferedFileInputStream raStream;
     
-    private SecurityHandler securityHandler = null;
+    protected SecurityHandler securityHandler = null;
     
     private String keyStoreFilename = null;
     private String alias            = null;
@@ -113,87 +115,128 @@ public class NonSequentialPDFParser exte
     private boolean allPagesParsed   = false;
         
     private static final Log LOG = LogFactory.getLog( NonSequentialPDFParser.class );
-    
-    // ------------------------------------------------------------------------
-    /** 
-     * Constructs parser for given file using memory buffer. 
-     * 
-     * @param filename the filename of the pdf to be parsed
-     * 
-     * @throws IOException If something went wrong.
-     */
-    public NonSequentialPDFParser( String filename ) throws IOException
-    {
-        this( new File( filename ), null );
-    }
-    
-    /** 
-     * Constructs parser for given file using given buffer for temporary storage. 
-     * 
-     * @param file the pdf to be parsed
-     * @param raBuf the buffer to be used for parsing
-     *  
-     * @throws IOException If something went wrong.
-     */
-    /** 
-     * Constructs parser for given file using given buffer for temporary storage. 
-     * 
-     * @param file the pdf to be parsed
-     * @param raBuf the buffer to be used for parsing
-     *  
-     * @throws IOException If something went wrong.
-     */
-    public NonSequentialPDFParser( File file, RandomAccess raBuf ) throws IOException
-    {
-        this(file, raBuf, "");
-    }
-    
-    /** 
-     * Constructs parser for given file using given buffer for temporary storage. 
-     * 
-     * @param file the pdf to be parsed
-     * @param raBuf the buffer to be used for parsing
-     *  
-     * @throws IOException If something went wrong.
-     */
-    /** 
-     * Constructs parser for given file using given buffer for temporary storage. 
-     * 
-     * @param file the pdf to be parsed
-     * @param raBuf the buffer to be used for parsing
-     * @param decryptionPassword password to be used for decryption
-     *  
-     * @throws IOException If something went wrong.
-     */
-    public NonSequentialPDFParser( File file, RandomAccess raBuf, String decryptionPassword ) throws IOException
-    {
-        super( EMPTY_INPUT_STREAM, null, false );
-            
-        String eofLookupRangeStr = System.getProperty( SYSPROP_EOFLOOKUPRANGE );
-        if ( eofLookupRangeStr != null )
-        {
-            try
-            {
-                setEOFLookupRange( Integer.parseInt( eofLookupRangeStr ) );
-            } 
-            catch ( NumberFormatException nfe )
-            {
-                LOG.warn( "System property " + SYSPROP_EOFLOOKUPRANGE + 
-                        " does not contain an integer value, but: '" + eofLookupRangeStr + "'" );
-            }
-        }
-    
-        pdfFile = file;
-        raStream = new RandomAccessBufferedFileInputStream( pdfFile );
-    
-        setDocument( ( raBuf == null ) ? new COSDocument( new RandomAccessBuffer(), false ) :
-                                         new COSDocument( raBuf, false ) );
-    
-        pdfSource = new PushBackInputStream( raStream, 4096 );
-        
-        password = decryptionPassword;
-    }
-        
+
+  	/**
+  	 * <code>true</code> if the NonSequentialPDFParser is initialized by a InputStream, in this case
+  	 * a temporary file is created. At the end of the {@linkplain #parse()} method,the temporary file will
+  	 * be deleted.
+  	 */
+  	private boolean isTmpPDFFile = false;
+  	
+  	public static final String TMP_FILE_PREFIX = "tmpPDF";
+  	
+  	// ------------------------------------------------------------------------
+  	/** 
+  	 * Constructs parser for given file using memory buffer. 
+  	 * 
+  	 * @param filename the filename of the pdf to be parsed
+  	 * 
+  	 * @throws IOException If something went wrong.
+  	 */
+  	public NonSequentialPDFParser( String filename ) throws IOException
+  	{
+  		this( new File( filename ), null );
+  	}
+
+  	/** 
+  	 * Constructs parser for given file using given buffer for temporary storage. 
+  	 * 
+  	 * @param file the pdf to be parsed
+  	 * @param raBuf the buffer to be used for parsing
+  	 *  
+  	 * @throws IOException If something went wrong.
+  	 */
+  	/** 
+  	 * Constructs parser for given file using given buffer for temporary storage. 
+  	 * 
+  	 * @param file the pdf to be parsed
+  	 * @param raBuf the buffer to be used for parsing
+  	 *  
+  	 * @throws IOException If something went wrong.
+  	 */
+  	public NonSequentialPDFParser( File file, RandomAccess raBuf ) throws IOException
+  	{
+  		this(file, raBuf, "");
+  	}
+
+  	/** 
+  	 * Constructs parser for given file using given buffer for temporary storage. 
+  	 * 
+  	 * @param file the pdf to be parsed
+  	 * @param raBuf the buffer to be used for parsing
+  	 *  
+  	 * @throws IOException If something went wrong.
+  	 */
+  	/** 
+  	 * Constructs parser for given file using given buffer for temporary storage. 
+  	 * 
+  	 * @param file the pdf to be parsed
+  	 * @param raBuf the buffer to be used for parsing
+  	 * @param decryptionPassword password to be used for decryption
+  	 *  
+  	 * @throws IOException If something went wrong.
+  	 */
+  	public NonSequentialPDFParser( File file, RandomAccess raBuf, String decryptionPassword ) throws IOException
+  	{
+  		super( EMPTY_INPUT_STREAM, null, false );
+  		pdfFile = file;
+  		raStream = new RandomAccessBufferedFileInputStream( pdfFile );
+  		init(file, raBuf, decryptionPassword);
+  	}
+
+  	private void init(File file, RandomAccess raBuf, String decryptionPassword) throws IOException {
+  		String eofLookupRangeStr = System.getProperty( SYSPROP_EOFLOOKUPRANGE );
+  		if ( eofLookupRangeStr != null )
+  		{
+  			try
+  			{
+  				setEOFLookupRange( Integer.parseInt( eofLookupRangeStr ) );
+  			} 
+  			catch ( NumberFormatException nfe )
+  			{
+  				LOG.warn( "System property " + SYSPROP_EOFLOOKUPRANGE + 
+  						" does not contain an integer value, but: '" + eofLookupRangeStr + "'" );
+  			}
+  		}
+
+  		setDocument( ( raBuf == null ) ? new COSDocument( new RandomAccessBuffer(), false ) :	new COSDocument( raBuf, false ) );
+
+  		pdfSource = new PushBackInputStream( raStream, 4096 );
+
+  		password = decryptionPassword;
+  	}
+
+  	public NonSequentialPDFParser(InputStream input) throws IOException
+  	{
+  		super( EMPTY_INPUT_STREAM, null, false );
+  		pdfFile = createTmpFile(input);
+  		raStream = new RandomAccessBufferedFileInputStream( pdfFile );
+  		init(pdfFile, null, ""); 
+  	}
+
+  	/**
+  	 * Create a temporary file with the input stream.
+  	 * If the creation succeed, the {@linkplain #isTmpPDFFile} is set to true.
+  	 * This Temporary file will be deleted at end of the parse method
+  	 * @param input
+  	 * @return
+  	 * @throws IOException
+  	 */
+  	private File createTmpFile(InputStream input) throws IOException {
+  		File tmpFile = null;
+  		FileOutputStream fos = null;
+  		try {
+  			tmpFile = File.createTempFile(TMP_FILE_PREFIX, ".pdf");
+  			fos = new FileOutputStream(tmpFile);
+  			IOUtils.copy(input, fos);
+  			isTmpPDFFile = true;
+  			return tmpFile;
+  		} finally {
+  			IOUtils.closeQuietly(input);
+  			IOUtils.closeQuietly(fos);
+  		}
+  	}
+
     // ------------------------------------------------------------------------
     /** 
      *  Sets how many trailing bytes of PDF file are searched for
@@ -228,7 +271,7 @@ public class NonSequentialPDFParser exte
      * 
      * @throws IOException
      */
-    private void initialParse() throws IOException
+    protected void initialParse() throws IOException
     {
         final long startxrefOff = getStartxrefOffset();
             
@@ -275,45 +318,44 @@ public class NonSequentialPDFParser exte
         COSBase trailerEncryptItem = document.getTrailer().getItem( COSName.ENCRYPT );
         if ( trailerEncryptItem != null ) 
         {
-            if ( trailerEncryptItem instanceof COSObject )
-            {
-                COSObject trailerEncryptObj = (COSObject) trailerEncryptItem;
-                parseObjectDynamically( trailerEncryptObj, true );
-            }
-            
-            try
-            {
-                PDEncryptionDictionary encParameters = new PDEncryptionDictionary( document.getEncryptionDictionary() );
-                      
-                DecryptionMaterial decryptionMaterial = null;
-                if( keyStoreFilename != null )
-                {
-                    KeyStore ks = KeyStore.getInstance( "PKCS12" );
-                    ks.load( new FileInputStream( keyStoreFilename ), password.toCharArray() );
-                    
-                    decryptionMaterial = new PublicKeyDecryptionMaterial( ks, alias, password );
-                }
-                else
-                {
-                    decryptionMaterial = new StandardDecryptionMaterial( password );
-                }
-                      
-                securityHandler = SecurityHandlersManager.getInstance().getSecurityHandler( encParameters.getFilter() );
-                securityHandler.prepareForDecryption( encParameters, document.getDocumentID(), decryptionMaterial );
-                      
-                AccessPermission permission = securityHandler.getCurrentAccessPermission();
-                if ( ! permission.canExtractContent() )
-                {
-                    LOG.warn( "PDF file '" + pdfFile.getPath() + "' does not allow extracting content." );
-                }
-                  
-            }
-            catch ( Exception e )
-            {
-                throw new IOException( "Error (" + e.getClass().getSimpleName() + 
-                        ") while creating security handler for decryption: " +
-                                                   e.getMessage() /*, e // TODO: remove remark with Java 1.6 */);
-            }
+    			if ( trailerEncryptItem instanceof COSObject )
+    			{
+    				COSObject trailerEncryptObj = (COSObject) trailerEncryptItem;
+    				parseObjectDynamically( trailerEncryptObj, true );
+    			}
+    			try
+    	    {
+    	        PDEncryptionDictionary encParameters = new PDEncryptionDictionary( document.getEncryptionDictionary() );
+    	              
+    	        DecryptionMaterial decryptionMaterial = null;
+    	        if( keyStoreFilename != null )
+    	        {
+    	            KeyStore ks = KeyStore.getInstance( "PKCS12" );
+    	            ks.load( new FileInputStream( keyStoreFilename ), password.toCharArray() );
+    	            
+    	            decryptionMaterial = new PublicKeyDecryptionMaterial( ks, alias, password );
+    	        }
+    	        else
+    	        {
+    	            decryptionMaterial = new StandardDecryptionMaterial( password );
+    	        }
+    	              
+    	        securityHandler = SecurityHandlersManager.getInstance().getSecurityHandler( encParameters.getFilter() );
+    	        securityHandler.prepareForDecryption( encParameters, document.getDocumentID(), decryptionMaterial );
+    	              
+    	        AccessPermission permission = securityHandler.getCurrentAccessPermission();
+    	        if ( ! permission.canExtractContent() )
+    	        {
+    	            LOG.warn( "PDF file '" + pdfFile.getPath() + "' does not allow extracting content." );
+    	        }
+    	          
+    	    }
+    	    catch ( Exception e )
+    	    {
+    	        throw new IOException( "Error (" + e.getClass().getSimpleName() + 
+    	                ") while creating security handler for decryption: " +
+    	                                           e.getMessage() /*, e // TODO: remove remark with Java 1.6 */);
+    	    }
         }
     
         // ---- parse catalog or root object
@@ -341,6 +383,7 @@ public class NonSequentialPDFParser exte
             }
         }
         initialParseDone = true;
+        
     }
     
     // ------------------------------------------------------------------------
@@ -370,7 +413,7 @@ public class NonSequentialPDFParser exte
     }
 
     /** Sets {@link #pdfSource} to start next parsing at given file offset. */
-    private final void setPdfSource( long fileOffset ) throws IOException
+    protected final void setPdfSource( long fileOffset ) throws IOException
     {
         
         pdfSource.seek( fileOffset );
@@ -386,7 +429,7 @@ public class NonSequentialPDFParser exte
     }
 
     /** Enable handling of alternative pdfSource implementation. */
-    private final void releasePdfSourceInputStream() throws IOException
+    protected final void releasePdfSourceInputStream() throws IOException
     {
         //        if ( pdfSource != null )
         //            pdfSource.close();
@@ -404,7 +447,7 @@ public class NonSequentialPDFParser exte
     /** Looks for and parses startxref. We first look for last '%%EOF' marker
      *  (within last {@link #DEFAULT_TRAIL_BYTECOUNT} bytes (or range set via
      *  {@link #setEOFLookupRange(int)}) and go back to find <code>startxref</code>. */
-    private final long getStartxrefOffset() throws IOException
+    protected final long getStartxrefOffset() throws IOException
     {
         byte[] buf; 
         long   skipBytes;
@@ -475,7 +518,7 @@ public class NonSequentialPDFParser exte
      *  
      *  @return  start offset of pattern within buffer or <code>-1</code> if pattern could not be found 
      */
-    private final int lastIndexOf( final char[] pattern, final byte[] buf, final int endOff )
+    protected int lastIndexOf( final char[] pattern, final byte[] buf, final int endOff )
     {
         final int lastPatternChOff = pattern.length - 1;
         
@@ -510,7 +553,7 @@ public class NonSequentialPDFParser exte
      * 
      * @throws IOException if pattern could not be read
      */
-    private final void readPattern( final char[] pattern ) throws IOException
+    protected final void readPattern( final char[] pattern ) throws IOException
     {
         skipSpaces();
         
@@ -596,7 +639,9 @@ public class NonSequentialPDFParser exte
             } 
             catch ( IOException ioe ) 
             {}
-                    
+
+      			deleteTempFile();
+
             if ( exceptionOccurred && ( document != null ) )
             {
                 try 
@@ -609,6 +654,23 @@ public class NonSequentialPDFParser exte
         }
     }   
 
+  	protected File getPdfFile() {
+  		return this.pdfFile;
+  	}
+
+  	/**
+  	 * Remove the temporary file.
+  	 * A temporary file is created if this class is instantiated with an InputStream
+  	 */
+  	protected void deleteTempFile() {
+  		if (isTmpPDFFile) {
+  			try {
+  				if (!pdfFile.delete()) LOG.warn("Temporary file '" + pdfFile.getName() + "' can't be deleted");
+  			} catch (SecurityException e) {
+  				LOG.warn("Temporary file '" + pdfFile.getName() + "' can't be deleted", e);
+  			}
+  		}
+  	}
     // ------------------------------------------------------------------------
     /** 
      * Returns security handler of the document or <code>null</code> if document
@@ -638,7 +700,6 @@ public class NonSequentialPDFParser exte
         PDDocument pdDocument = super.getPDDocument();
         if ( securityHandler != null )
             pdDocument.setSecurityHandler( securityHandler );
-        
         return pdDocument;
     }
 
@@ -949,7 +1010,7 @@ public class NonSequentialPDFParser exte
      * 
      * @throws IOException If an IO error occurs.
      */
-    private COSBase parseObjectDynamically( COSObject obj, boolean requireExistingNotCompressedObj )
+    protected final COSBase parseObjectDynamically( COSObject obj, boolean requireExistingNotCompressedObj )
     throws IOException
     {
         return parseObjectDynamically( obj.getObjectNumber().intValue(),
@@ -974,7 +1035,7 @@ public class NonSequentialPDFParser exte
      * 
      * @throws IOException If an IO error occurs.
      */
-    private COSBase parseObjectDynamically( int objNr, int objGenNr, boolean requireExistingNotCompressedObj )
+    protected COSBase parseObjectDynamically( int objNr, int objGenNr, boolean requireExistingNotCompressedObj )
     throws IOException
     {
         // ---- create object key and get object (container) from pool
@@ -986,7 +1047,7 @@ public class NonSequentialPDFParser exte
             // not previously parsed
             // ---- read offset or object stream object number from xref table
             Long offsetOrObjstmObNr = xrefTrailerResolver.getXrefTable().get( objKey );
-            
+
             // sanity test to circumvent loops with broken documents
             if ( requireExistingNotCompressedObj &&
                     ( ( offsetOrObjstmObNr == null ) || ( offsetOrObjstmObNr <= 0 ) ) )
@@ -1145,7 +1206,7 @@ public class NonSequentialPDFParser exte
     
     // ------------------------------------------------------------------------
     /** Decrypts given COSString. */
-    private final void decrypt( COSString str, long objNr, long objGenNr )
+    protected final void decrypt( COSString str, long objNr, long objGenNr )
     throws IOException
     {
         try 

Modified: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/XrefTrailerResolver.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/XrefTrailerResolver.java?rev=1387736&r1=1387735&r2=1387736&view=diff
==============================================================================
--- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/XrefTrailerResolver.java (original)
+++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/XrefTrailerResolver.java Wed Sep 19 20:18:57 2012
@@ -24,6 +24,8 @@ import java.util.List;
 import java.util.Map;
 import java.util.Set;
 import java.util.Map.Entry;
+import java.util.SortedSet;
+import java.util.TreeSet;
 
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
@@ -59,7 +61,7 @@ public class XrefTrailerResolver
      */
     private class XrefTrailerObj
     {
-        private COSDictionary trailer = null;
+        protected COSDictionary trailer = null;
         private final Map<COSObjectKey, Long> xrefTable = new HashMap<COSObjectKey, Long>();
         
         /**
@@ -77,6 +79,22 @@ public class XrefTrailerResolver
     /** Log instance. */
     private static final Log LOG = LogFactory.getLog( XrefTrailerResolver.class );
 
+    public final COSDictionary getFirstTrailer() {
+    	if (bytePosToXrefMap.isEmpty()) return null;
+    	
+    	Set<Long> offsets = bytePosToXrefMap.keySet();
+    	SortedSet<Long> sortedOffset = new TreeSet<Long>(offsets);
+    	return bytePosToXrefMap.get(sortedOffset.first()).trailer;
+    }
+    
+    public final COSDictionary getLastTrailer() {
+    	if (bytePosToXrefMap.isEmpty()) return null;
+    	
+    	Set<Long> offsets = bytePosToXrefMap.keySet();
+    	SortedSet<Long> sortedOffset = new TreeSet<Long>(offsets);
+    	return bytePosToXrefMap.get(sortedOffset.last()).trailer;
+    }
+    
     /**
      * Signals that a new XRef object (table or stream) starts.
      * @param startBytePos the offset to start at

Added: pdfbox/trunk/pdfbox/src/test/java/org/apache/pdfbox/pdfparser/TestNonSequentialPDFParser.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/test/java/org/apache/pdfbox/pdfparser/TestNonSequentialPDFParser.java?rev=1387736&view=auto
==============================================================================
--- pdfbox/trunk/pdfbox/src/test/java/org/apache/pdfbox/pdfparser/TestNonSequentialPDFParser.java (added)
+++ pdfbox/trunk/pdfbox/src/test/java/org/apache/pdfbox/pdfparser/TestNonSequentialPDFParser.java Wed Sep 19 20:18:57 2012
@@ -0,0 +1,97 @@
+/*****************************************************************************
+ * 
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ * 
+ ****************************************************************************/
+
+package org.apache.pdfbox.pdfparser;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNotNull;
+
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.FilenameFilter;
+import java.io.IOException;
+
+import org.apache.pdfbox.io.RandomAccessBuffer;
+import org.junit.Before;
+import org.junit.Test;
+
+public class TestNonSequentialPDFParser {
+
+	private static final String PATH_OF_PDF = "src/test/resources/org/apache/pdfbox/pdfparser/gdb-refcard.pdf";
+	private static File tmpDirectory = new File(System.getProperty("java.io.tmpdir"));
+
+	private int numberOfTmpFiles = 0;
+
+	/**
+	 * Initialize the number of tmp file before the test
+	 * @throws Exception
+	 */
+	@Before
+	public void setUp() throws Exception {
+		numberOfTmpFiles = getNumberOfTempFile();
+	}
+
+	/**
+	 * Count the number of temporary files 
+	 * @return
+	 */
+	private int getNumberOfTempFile() {
+		int result = 0;
+		File[] tmpPdfs = tmpDirectory.listFiles(new FilenameFilter() {
+			public boolean accept(File dir, String name) {
+				return name.startsWith(NonSequentialPDFParser.TMP_FILE_PREFIX) && name.endsWith("pdf");
+			}
+		});
+
+		if (tmpPdfs != null) {
+			result = tmpPdfs.length;
+		}
+
+		return result;
+	}
+
+	@Test
+	public void testNonSequentialPDFParserString() throws Exception {
+		NonSequentialPDFParser nsp = new NonSequentialPDFParser(PATH_OF_PDF);
+		executeParserTest(nsp);
+	}
+
+	@Test
+	public void testNonSequentialPDFParserFileRandomAccess() throws IOException {
+		NonSequentialPDFParser nsp = new NonSequentialPDFParser(new File(PATH_OF_PDF), new RandomAccessBuffer());
+		executeParserTest(nsp);
+	}
+
+	@Test
+	public void testNonSequentialPDFParserInputStream() throws IOException {
+		NonSequentialPDFParser nsp = new NonSequentialPDFParser(new FileInputStream(PATH_OF_PDF));
+		executeParserTest(nsp);
+	}
+
+	
+	private void executeParserTest(NonSequentialPDFParser nsp) throws IOException {
+	  nsp.parse();
+		assertNotNull(nsp.getDocument());
+		// number tmp file must be the same
+		assertEquals(numberOfTmpFiles, getNumberOfTempFile());
+  }
+
+}

Propchange: pdfbox/trunk/pdfbox/src/test/java/org/apache/pdfbox/pdfparser/TestNonSequentialPDFParser.java
------------------------------------------------------------------------------
    svn:eol-style = native