You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pdfbox.apache.org by le...@apache.org on 2011/06/23 19:07:30 UTC
svn commit: r1138995 - in /pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox: cos/COSDocument.java cos/COSName.java pdfparser/PDFParser.java pdfparser/PDFXrefStreamParser.java pdfparser/XrefTrailerResolver.java

Author: lehmi
Date: Thu Jun 23 17:07:30 2011
New Revision: 1138995

URL: http://svn.apache.org/viewvc?rev=1138995&view=rev
Log:
PDFBOX-1016: added a specification conform xref/trailer parsing as proposed by Timo Boehme incl. some small changes/improvements

Added:
    pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/XrefTrailerResolver.java
Modified:
    pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/cos/COSDocument.java
    pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/cos/COSName.java
    pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/PDFParser.java
    pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/PDFXrefStreamParser.java

Modified: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/cos/COSDocument.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/cos/COSDocument.java?rev=1138995&r1=1138994&r2=1138995&view=diff
==============================================================================
--- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/cos/COSDocument.java (original)
+++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/cos/COSDocument.java Thu Jun 23 17:07:30 2011
@@ -30,7 +30,6 @@ import org.apache.pdfbox.io.RandomAccess
 import org.apache.pdfbox.io.RandomAccessBuffer;
 import org.apache.pdfbox.io.RandomAccessFile;
 import org.apache.pdfbox.pdfparser.PDFObjectStreamParser;
-import org.apache.pdfbox.pdfparser.PDFXrefStreamParser;
 import org.apache.pdfbox.pdmodel.interactive.digitalsignature.SignatureInterface;
 import org.apache.pdfbox.persistence.util.COSObjectKey;
 
@@ -147,7 +146,7 @@ public class COSDocument extends COSBase
      *
      * @param scratchDir The directory to store a scratch file.
      *
-     *  @throws IOException If there is an error creating the tmp file.
+     * @throws IOException If there is an error creating the tmp file.
      */
     public COSDocument(File scratchDir) throws IOException {
         this(scratchDir, false);
@@ -347,14 +346,14 @@ public class COSDocument extends COSBase
         COSObject documentCatalog = getCatalog();
         if (documentCatalog != null)
         {
-          COSDictionary acroForm = (COSDictionary)documentCatalog.getDictionaryObject(COSName.getPDFName("AcroForm"));
+          COSDictionary acroForm = (COSDictionary)documentCatalog.getDictionaryObject(COSName.ACRO_FORM);
           if (acroForm !=null)
           {
-            COSArray fields = (COSArray)acroForm.getDictionaryObject("Fields");
+            COSArray fields = (COSArray)acroForm.getDictionaryObject(COSName.FIELDS);
             for ( Object object : fields )
             {
               COSObject dict = (COSObject)object;
-              if(dict.getItem(COSName.getPDFName("FT")).equals(COSName.getPDFName("Sig")))
+              if(dict.getItem(COSName.FT).equals(COSName.SIG))
               {
                 COSBase dictionaryObject = dict.getDictionaryObject(COSName.V);
                 
@@ -525,7 +524,7 @@ public class COSDocument extends COSBase
      */
     public void dereferenceObjectStreams() throws IOException
     {
-        for( COSObject objStream : getObjectsByType( "ObjStm" ) )
+        for( COSObject objStream : getObjectsByType( COSName.OBJ_STM ) )
         {
             COSStream stream = (COSStream)objStream.getObject();
             PDFObjectStreamParser parser =
@@ -585,14 +584,13 @@ public class COSDocument extends COSBase
     }
 
     /**
-     * Used to populate the XRef HashMap. Will add an Xreftable entry
-     * that maps ObjectKeys to byte offsets in the file.
-     * @param objKey The objkey, with id and gen numbers
-     * @param offset The byte offset in this file
+     * Populate XRef HashMap with given values.
+     * Each entry maps ObjectKeys to byte offsets in the file.
+     * @param _xrefTable  xref table entries to be added
      */
-    public void setXRef(COSObjectKey objKey, int offset)
+    public void addXRefTable( Map<COSObjectKey, Integer> xrefTable )
     {
-        xrefTable.put(objKey, offset);
+        this.xrefTable.putAll( xrefTable );
     }
 
     /**
@@ -606,27 +604,6 @@ public class COSDocument extends COSBase
     }
 
     /**
-     * This method will search the list of objects for types of XRef and
-     * uses the parsed data to populate the trailer information as well as
-     * the xref Map.
-     *
-     * @throws IOException if there is an error parsing the stream
-     */
-    public void parseXrefStreams() throws IOException
-    {
-        COSDictionary trailerDict = new COSDictionary();
-        for( COSObject xrefStream : getObjectsByType( "XRef" ) )
-        {
-            COSStream stream = (COSStream)xrefStream.getObject();
-            trailerDict.addAll(stream);
-            PDFXrefStreamParser parser =
-                new PDFXrefStreamParser(stream, this, forceParsing);
-            parser.parse();
-        }
-        setTrailer( trailerDict );
-    }
-    
-    /**
      * This method set the startxref value of the document. This will only 
      * be needed for incremental updates.
      * 

Modified: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/cos/COSName.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/cos/COSName.java?rev=1138995&r1=1138994&r2=1138995&view=diff
==============================================================================
--- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/cos/COSName.java (original)
+++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/cos/COSName.java Thu Jun 23 17:07:30 2011
@@ -769,6 +769,11 @@ public final class COSName extends COSBa
      */
     public static final COSName OBJ = new COSName("Obj");
 
+    /**
+     * A common COSName value.
+     */
+    public static final COSName OBJ_STM = new COSName( "ObjStm" );
+
     /** the COSName for the content group tag. */
     public static final COSName OC = new COSName("OC");
     /** the COSName for an optional content group. */
@@ -1128,6 +1133,10 @@ public final class COSName extends COSBa
     /** "XObject" */
     public static final COSName XOBJECT = new COSName( "XObject" );
     /**
+     * A common COSName value.
+     */
+    public static final COSName XREF = new COSName( "XRef" );
+    /**
      * The prefix to a PDF name.
      */
     public static final byte[] NAME_PREFIX = new byte[] { 47  }; // The / character

Modified: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/PDFParser.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/PDFParser.java?rev=1138995&r1=1138994&r2=1138995&view=diff
==============================================================================
--- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/PDFParser.java (original)
+++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/PDFParser.java Thu Jun 23 17:07:30 2011
@@ -30,7 +30,9 @@ import org.apache.pdfbox.cos.COSBase;
 import org.apache.pdfbox.cos.COSDictionary;
 import org.apache.pdfbox.cos.COSDocument;
 import org.apache.pdfbox.cos.COSInteger;
+import org.apache.pdfbox.cos.COSName;
 import org.apache.pdfbox.cos.COSObject;
+import org.apache.pdfbox.cos.COSStream;
 import org.apache.pdfbox.exceptions.WrappedIOException;
 import org.apache.pdfbox.io.RandomAccess;
 import org.apache.pdfbox.pdmodel.PDDocument;
@@ -59,7 +61,11 @@ public class PDFParser extends BaseParse
      * A list of duplicate objects found when Parsing the PDF
      * File. 
      */
-    private List conflictList = new ArrayList();
+    private List<ConflictObj> conflictList = new ArrayList<ConflictObj>();
+    
+    /** Collects all Xref/trailer objects and resolves them into single
+     *  object using startxref reference */
+    private XrefTrailerResolver xrefTrailerResolver = new XrefTrailerResolver();
    
     /**
      * Temp file directory.
@@ -167,59 +173,55 @@ public class PDFParser extends BaseParse
             skipToNextObj();
 
             boolean wasLastParsedObjectEOF = false;
-            try
+            while(true)
             {
-                while(true)
+                if(pdfSource.isEOF())
+                {
+                    break;
+                }
+                try
                 {
-                    if(pdfSource.isEOF())
+                    wasLastParsedObjectEOF = parseObject();
+                }
+                catch(IOException e)
+                {
+                    /*
+                     * PDF files may have random data after the EOF marker. Ignore errors if
+                     * last object processed is EOF. 
+                     */
+                    if( wasLastParsedObjectEOF ) 
                     {
                         break;
                     }
-                    try
+                    if(isContinueOnError(e))
                     {
-                        wasLastParsedObjectEOF = parseObject();
+                        /*
+                         * Warning is sent to the PDFBox.log and to the Console that
+                         * we skipped over an object
+                         */
+                        log.warn("Parsing Error, Skipping Object", e);
+                        skipToNextObj();
                     }
-                    catch(IOException e)
-                    {
-                        if(isContinueOnError(e))
-                        {
-                            /*
-                             * Warning is sent to the PDFBox.log and to the Console that
-                             * we skipped over an object
-                             */
-                            log.warn("Parsing Error, Skipping Object", e);
-                            skipToNextObj();
-                        }
-                        else
-                        { 
-                            throw e;
-                        }
+                    else
+                    { 
+                        throw e;
                     }
-                    skipSpaces();
-                }
-                //Test if we saw a trailer section. If not, look for an XRef Stream (Cross-Reference Stream) 
-                //to populate the trailer and xref information. For PDF 1.5 and above 
-                if( document.getTrailer() == null )
-                {
-                    document.parseXrefStreams();
                 }
-                if( !document.isEncrypted() )
-                {
-                    document.dereferenceObjectStreams();
-                }
-                ConflictObj.resolveConflicts(document, conflictList);     
+                skipSpaces();
             }
-            catch( IOException e )
+            
+            // set xref to start with 
+            xrefTrailerResolver.setStartxref( document.getStartXref() );
+            
+            // get resolved xref table + trailer
+            document.setTrailer( xrefTrailerResolver.getTrailer() );
+            document.addXRefTable( xrefTrailerResolver.getXrefTable() );
+            
+            if( !document.isEncrypted() )
             {
-                /*
-                 * PDF files may have random data after the EOF marker. Ignore errors if
-                 * last object processed is EOF. 
-                 */
-                if( !wasLastParsedObjectEOF )
-                {
-                    throw e;
-                }
+                document.dereferenceObjectStreams();
             }
+            ConflictObj.resolveConflicts(document, conflictList);     
         }
         catch( Throwable t )
         {
@@ -447,7 +449,7 @@ public class PDFParser extends BaseParse
         //xref table. Note: The contents of the Xref table are currently ignored
         else if( peekedChar == 'x') 
         {
-            parseXrefTable();
+            parseXrefTable( currentObjByteOffset );
         }
         // Note: startxref can occur in either a trailer section or by itself 
         else if (peekedChar == 't' || peekedChar == 's') 
@@ -548,6 +550,15 @@ public class PDFParser extends BaseParse
                 if( pb instanceof COSDictionary )
                 {
                     pb = parseCOSStream( (COSDictionary)pb, getDocument().getScratchFile() );
+                    
+                    // test for XRef type
+                    final COSStream strmObj = (COSStream) pb;
+                    final COSName objectType = (COSName)strmObj.getItem( COSName.TYPE );
+                    if( objectType != null && objectType.equals( COSName.XREF ) )
+                    {
+                        // XRef stream
+                    	parseXrefStream( strmObj, currentObjByteOffset );
+                    }
                 }
                 else
                 {
@@ -657,11 +668,11 @@ public class PDFParser extends BaseParse
     /**
      * This will parse the xref table from the stream and add it to the state
      * The XrefTable contents are ignored.
-     *            
+     * @param startByteOffset the offset to start at           
      * @return false on parsing error 
      * @throws IOException If an IO error occurs.
      */
-    private boolean parseXrefTable() throws IOException
+    private boolean parseXrefTable( int startByteOffset ) throws IOException
     {
         if(pdfSource.peek() != 'x')
         {
@@ -672,6 +683,10 @@ public class PDFParser extends BaseParse
         {
             return false;
         }
+        
+        // signal start of new XRef
+        xrefTrailerResolver.nextXrefObj( startByteOffset );
+        
         /*
          * Xref tables can have multiple sections. 
          * Each starts with a starting object id and a count.
@@ -708,7 +723,7 @@ public class PDFParser extends BaseParse
                         int currOffset = Integer.parseInt(splitString[0]);
                         int currGenID = Integer.parseInt(splitString[1]);
                         COSObjectKey objKey = new COSObjectKey(currObjID, currGenID);
-                        document.setXRef(objKey, currOffset);
+                        xrefTrailerResolver.setXRef(objKey, currOffset);
                     }
                     catch(NumberFormatException e)
                     {
@@ -771,20 +786,29 @@ public class PDFParser extends BaseParse
         skipSpaces();
 
         COSDictionary parsedTrailer = parseCOSDictionary();
-        COSDictionary docTrailer = document.getTrailer();
-        if( docTrailer == null )
-        {
-            document.setTrailer( parsedTrailer );
-        }
-        else
-        {
-            docTrailer.addAll( parsedTrailer );
-        }
+        xrefTrailerResolver.setTrailer( parsedTrailer );
+        
         skipSpaces();
         return true;
     }
     
     /**
+     * Fills XRefTrailerResolver with data of given stream.
+     * Stream must be of type XRef.
+     * @param stream the stream to be read
+     * @param objByteOffset the offset to start at
+     * @throws IOException if there is an error parsing the stream
+     */
+    public void parseXrefStream( COSStream stream, int objByteOffset ) throws IOException
+    {
+        xrefTrailerResolver.nextXrefObj( objByteOffset );
+    	xrefTrailerResolver.setTrailer( stream );
+    	PDFXrefStreamParser parser =
+            new PDFXrefStreamParser( stream, document, forceParsing, xrefTrailerResolver );
+        parser.parse();
+    }
+    
+    /**
      * Used to resolve conflicts when a PDF Document has multiple objects with
      * the same id number. Ideally, we could use the Xref table when parsing
      * the document to be able to determine which of the objects with the same ID
@@ -820,12 +844,12 @@ public class PDFParser extends BaseParse
          * table. 
          * @throws IOException
          */
-        private static void resolveConflicts(COSDocument document, List conflictList) throws IOException
+        private static void resolveConflicts(COSDocument document, List<ConflictObj> conflictList) throws IOException
         {
-            Iterator conflicts = conflictList.iterator();
+            Iterator<ConflictObj> conflicts = conflictList.iterator();
             while(conflicts.hasNext())
             {
-                ConflictObj o = (ConflictObj)conflicts.next();
+                ConflictObj o = conflicts.next();
                 Integer offset = new Integer(o.offset);
                 if(document.getXrefTable().containsValue(offset))
                 {

Modified: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/PDFXrefStreamParser.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/PDFXrefStreamParser.java?rev=1138995&r1=1138994&r2=1138995&view=diff
==============================================================================
--- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/PDFXrefStreamParser.java (original)
+++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/PDFXrefStreamParser.java Thu Jun 23 17:07:30 2011
@@ -22,6 +22,7 @@ import java.util.ArrayList;
 import java.util.Iterator;
 
 import org.apache.pdfbox.cos.COSArray;
+import org.apache.pdfbox.cos.COSBase;
 import org.apache.pdfbox.cos.COSDocument;
 import org.apache.pdfbox.cos.COSInteger;
 import org.apache.pdfbox.cos.COSName;
@@ -38,36 +39,28 @@ import org.apache.pdfbox.persistence.uti
 public class PDFXrefStreamParser extends BaseParser 
 {
     private COSStream stream;
+    private XrefTrailerResolver xrefTrailerResolver;
 
     /**
      * Constructor.
      *
-     * @since Apache PDFBox 1.3.0
+     * @since 1.3.0
      * @param strm The stream to parse.
      * @param doc The document for the current parsing.
      * @param forceParcing flag to skip malformed or otherwise unparseable
      *                     input where possible
+	 * @param xrefTrailerResolver resolver to read the xref/trailer information
+	 * 
      * @throws IOException If there is an error initializing the stream.
      */
     public PDFXrefStreamParser(
-            COSStream strm, COSDocument doc, boolean forceParsing)
+            COSStream strm, COSDocument doc, boolean forceParsing,
+            XrefTrailerResolver xrefTrailerResolver )
             throws IOException {
         super(strm.getUnfilteredStream(), forceParsing);
         setDocument(doc);
         stream = strm;
-    }
-
-    /**
-     * Constructor.
-     *
-     * @param strm The stream to parse.
-     * @param doc The document for the current parsing.
-     *
-     * @throws IOException If there is an error initializing the stream.
-     */
-    public PDFXrefStreamParser(COSStream strm, COSDocument doc)
-            throws IOException {
-        this(strm, doc, false);
+        this.xrefTrailerResolver = xrefTrailerResolver;
     }
 
     /**
@@ -90,12 +83,12 @@ public class PDFXrefStreamParser extends
                 indexArray.add(stream.getDictionaryObject(COSName.SIZE));
             }
             
-            ArrayList objNums = new ArrayList();
+            ArrayList<Integer> objNums = new ArrayList<Integer>();
             
             /*
              * Populates objNums with all object numbers available
              */
-            Iterator indexIter = indexArray.iterator();
+            Iterator<COSBase> indexIter = indexArray.iterator();
             while(indexIter.hasNext())
             {
                 int objID = ((COSInteger)indexIter.next()).intValue();
@@ -105,7 +98,7 @@ public class PDFXrefStreamParser extends
                     objNums.add(new Integer(objID + i));
                 }
             }
-            Iterator objIter = objNums.iterator();
+            Iterator<Integer> objIter = objNums.iterator();
             /*
              * Calculating the size of the line in bytes
              */
@@ -152,7 +145,7 @@ public class PDFXrefStreamParser extends
                             genNum += (currLine[i + w0 + w1] & 0x00ff) << ((w2 - i - 1) * 8);
                         }
                         COSObjectKey objKey = new COSObjectKey(objID.intValue(), genNum);
-                        document.setXRef(objKey, offset);
+                        xrefTrailerResolver.setXRef(objKey, offset);
                         break;
                     case 2:
                         /*

Added: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/XrefTrailerResolver.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/XrefTrailerResolver.java?rev=1138995&view=auto
==============================================================================
--- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/XrefTrailerResolver.java (added)
+++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/XrefTrailerResolver.java Thu Jun 23 17:07:30 2011
@@ -0,0 +1,211 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.pdfbox.pdfparser;
+
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.pdfbox.cos.COSDictionary;
+import org.apache.pdfbox.cos.COSName;
+import org.apache.pdfbox.persistence.util.COSObjectKey;
+
+/** 
+ * This class will collect all XRef/trailer objects and creates correct
+ * xref/trailer information after all objects are read using startxref
+ * and 'Prev' information (unused XRef/trailer objects are discarded).
+ *
+ * In case of missing startxref or wrong startxref pointer all
+ * XRef/trailer objects are used to create xref table / trailer dictionary
+ * in order they occur.
+ *  
+ * For each new xref object/XRef stream method {@link #nextXrefObj(int)}
+ * must be called with start byte position. All following calls to
+ * {@link #setXRef(COSObjectKey, int)} or {@link #setTrailer(COSDictionary)}
+ * will add the data for this byte position.
+ *  
+ * After all objects are parsed the startxref position must be provided
+ * using {@link #setStartxref(int)}. This is used to build the chain of
+ * active xref/trailer objects used for creating document trailer and xref table.
+ *  
+ * @author Timo BÃ¶hme (timo.boehme at ontochem.com)
+ */
+public class XrefTrailerResolver
+{
+
+    /**
+     * A class which represents a xref/trailer object
+     * 
+     */
+  	class XrefTrailerObj
+  	{
+  	    private COSDictionary trailer = null;
+  	    private final Map<COSObjectKey, Integer> xrefTable = new HashMap<COSObjectKey, Integer>();
+  	}
+  	
+  	private final Map<Integer, XrefTrailerObj> bytePosToXrefMap = new HashMap<Integer, XrefTrailerObj>();
+  	private XrefTrailerObj curXrefTrailerObj   = null;
+  	private XrefTrailerObj resolvedXrefTrailer = null;
+  	
+    /** Log instance. */
+    private static final Log log = LogFactory.getLog( XrefTrailerResolver.class );
+    
+  	/** 
+  	 * Signals that a new XRef object (table or stream) starts. 
+  	 * @param startBytePos the offset to start at
+  	 * 
+  	 */
+  	public void nextXrefObj( final int startBytePos )
+  	{
+  	    bytePosToXrefMap.put( startBytePos, curXrefTrailerObj = new XrefTrailerObj() ); 
+  	}
+  	
+    /**
+     * Populate XRef HashMap of current XRef object.
+     * Will add an Xreftable entry that maps ObjectKeys to byte offsets in the file.
+     * @param objKey The objkey, with id and gen numbers
+     * @param offset The byte offset in this file
+     */
+    public void setXRef( COSObjectKey objKey, int offset )
+    {
+        if ( curXrefTrailerObj == null ) 
+    	{
+            // should not happen...
+      	  	log.warn( "Cannot add XRef entry for '" + objKey.getNumber() + "' because XRef start was not signalled." );
+      	  	return;
+    	}
+        curXrefTrailerObj.xrefTable.put( objKey, offset );
+    }
+    
+    /**
+     * Adds trailer information for current XRef object.
+     *
+     * @param trailer the current document trailer dictionary
+     */
+    public void setTrailer( COSDictionary trailer )
+    {
+        if ( curXrefTrailerObj == null ) 
+        {
+            // should not happen...
+      	  	log.warn( "Cannot add trailer because XRef start was not signalled." );
+      	  	return;
+        }
+        curXrefTrailerObj.trailer = trailer;
+    }
+    
+    /** 
+     * Sets the byte position of the first XRef
+     * (has to be called after very last startxref was read).
+     * This is used to resolve chain of active XRef/trailer.
+     * 
+     * In case startxref position is not found we output a
+     * warning and use all XRef/trailer objects combined
+     * in byte position order.
+     * Thus for incomplete PDF documents with missing
+     * startxref one could call this method with parameter value -1.
+     */
+    public void setStartxref( int startxrefBytePos )
+    {
+        if ( resolvedXrefTrailer != null ) 
+    	{
+            log.warn( "Method must be called only ones with last startxref value." );
+    	  	return;
+    	}
+    	  
+    	resolvedXrefTrailer = new XrefTrailerObj();
+    	resolvedXrefTrailer.trailer = new COSDictionary();
+    	  
+    	XrefTrailerObj curObj = bytePosToXrefMap.get( startxrefBytePos );
+  	  	List<Integer>  xrefSeqBytePos = new ArrayList<Integer>();
+    	  
+  	  	if ( curObj == null )
+  	  	{
+  	  	    // no XRef at given position
+      	  	log.warn( "Did not found XRef object at specified startxref position " + startxrefBytePos );
+      	  	
+      	  	// use all objects in byte position order (last entries overwrite previous ones)
+      	  	xrefSeqBytePos.addAll( bytePosToXrefMap.keySet() );
+      	  	Collections.sort( xrefSeqBytePos );
+  	  	}
+  	  	else
+  	  	{
+  	  	    // found starting Xref object
+  	  	    // add this and follow chain defined by 'Prev' keys
+  	  	    xrefSeqBytePos.add( startxrefBytePos );
+  	  	    while ( curObj.trailer != null )
+  	  	    {
+  	  	        int prevBytePos = curObj.trailer.getInt( COSName.PREV, -1 );
+  	  	        if ( prevBytePos == -1 )
+  	  	        {
+  	  	            break;
+  	  	        }
+  	  	        
+  	  	        curObj = bytePosToXrefMap.get( prevBytePos );
+  	  	        if ( curObj == null ) 
+  	  	        {
+  	  	            log.warn( "Did not found XRef object pointed to by 'Prev' key at position " + prevBytePos );
+  	  	            break;
+  	  	        }
+  	  	        xrefSeqBytePos.add( prevBytePos );
+  	  	        
+  	  	        // sanity check to prevent infinite loops
+  	  	        if ( xrefSeqBytePos.size() >= bytePosToXrefMap.size() )
+  	  	        {
+  	  	            break;
+  	  	        }
+  	  	    }  
+  	  	    // have to reverse order so that later XRefs will overwrite previous ones
+  	  	    Collections.reverse( xrefSeqBytePos );
+  	  	}
+    	  
+    	  // merge used and sorted XRef/trailer
+  	  	for ( Integer bPos : xrefSeqBytePos ) 
+  	  	{
+  	  	    curObj = bytePosToXrefMap.get( bPos );
+  	  	    if ( curObj.trailer != null )
+  	  	    {
+  	  	        resolvedXrefTrailer.trailer.addAll( curObj.trailer );
+  	  	    }
+  	  	    resolvedXrefTrailer.xrefTable.putAll( curObj.xrefTable );
+  	  	}
+    	  
+    }
+    
+    /** 
+     * Gets the resolved trailer. Might return <code>null</code> in case
+     * {@link #setStartxref(int)} was not called before. 
+     * 
+     */
+    public COSDictionary getTrailer()
+    {
+        return ( resolvedXrefTrailer == null ) ? null : resolvedXrefTrailer.trailer;
+    }
+    
+    /** 
+     * Gets the resolved xref table. Might return <code>null</code> in case
+     *  {@link #setStartxref(int)} was not called before. 
+     *
+     */
+    public Map<COSObjectKey, Integer> getXrefTable()
+    {
+        return ( resolvedXrefTrailer == null ) ? null : resolvedXrefTrailer.xrefTable;
+    }
+}