You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pdfbox.apache.org by le...@apache.org on 2014/01/24 19:57:48 UTC

svn commit: r1561111 - in /pdfbox/trunk: pdfbox/src/main/java/org/apache/pdfbox/pdfparser/ preflight/src/main/java/org/apache/pdfbox/preflight/parser/

Author: lehmi
Date: Fri Jan 24 18:57:47 2014
New Revision: 1561111

URL: http://svn.apache.org/r1561111
Log:
PDFBOX-1822: determine the correct type of the trailer as proposed by Timo Boehme

Modified:
    pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/NonSequentialPDFParser.java
    pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/PDFParser.java
    pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/XrefTrailerResolver.java
    pdfbox/trunk/preflight/src/main/java/org/apache/pdfbox/preflight/parser/PreflightParser.java

Modified: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/NonSequentialPDFParser.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/NonSequentialPDFParser.java?rev=1561111&r1=1561110&r2=1561111&view=diff
==============================================================================
--- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/NonSequentialPDFParser.java (original)
+++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/NonSequentialPDFParser.java Fri Jan 24 18:57:47 2014
@@ -56,6 +56,7 @@ import org.apache.pdfbox.io.PushBackInpu
 import org.apache.pdfbox.io.RandomAccess;
 import org.apache.pdfbox.io.RandomAccessBuffer;
 import org.apache.pdfbox.io.RandomAccessBufferedFileInputStream;
+import org.apache.pdfbox.pdfparser.XrefTrailerResolver.XRefType;
 import org.apache.pdfbox.pdmodel.PDDocument;
 import org.apache.pdfbox.pdmodel.PDPage;
 import org.apache.pdfbox.pdmodel.encryption.AccessPermission;
@@ -353,7 +354,6 @@ public class NonSequentialPDFParser exte
             // -- parse xref
             if (pdfSource.peek() == X)
             {
-                document.setIsXRefStream(false);
                 // xref table and trailer
                 // use existing parser to parse xref table
                 parseXrefTable(prev);
@@ -377,7 +377,6 @@ public class NonSequentialPDFParser exte
             }
             else
             {
-                document.setIsXRefStream(true);
                 // parse xref stream
                 prev = parseXrefObjStream(prev);
                 if (isLenient && prev > -1)
@@ -398,7 +397,7 @@ public class NonSequentialPDFParser exte
         xrefTrailerResolver.setStartxref(startXrefOffset);
         COSDictionary trailer = xrefTrailerResolver.getTrailer();
         document.setTrailer(trailer);
-
+        document.setIsXRefStream(XRefType.STREAM == xrefTrailerResolver.getXrefType());
         // check the offsets of all referenced objects
         if (isLenient) {
             checkXrefOffsets();

Modified: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/PDFParser.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/PDFParser.java?rev=1561111&r1=1561110&r2=1561111&view=diff
==============================================================================
--- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/PDFParser.java (original)
+++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/PDFParser.java Fri Jan 24 18:57:47 2014
@@ -36,6 +36,7 @@ import org.apache.pdfbox.cos.COSObject;
 import org.apache.pdfbox.cos.COSStream;
 import org.apache.pdfbox.exceptions.WrappedIOException;
 import org.apache.pdfbox.io.RandomAccess;
+import org.apache.pdfbox.pdfparser.XrefTrailerResolver.XRefType;
 import org.apache.pdfbox.pdmodel.PDDocument;
 import org.apache.pdfbox.pdmodel.fdf.FDFDocument;
 import org.apache.pdfbox.persistence.util.COSObjectKey;
@@ -237,6 +238,7 @@ public class PDFParser extends BaseParse
 
             // get resolved xref table + trailer
             document.setTrailer( xrefTrailerResolver.getTrailer() );
+            document.setIsXRefStream(XRefType.STREAM == xrefTrailerResolver.getXrefType());
             document.addXRefTable( xrefTrailerResolver.getXrefTable() );
 
             if( !document.isEncrypted() )
@@ -738,7 +740,7 @@ public class PDFParser extends BaseParse
         }
 
         // signal start of new XRef
-        xrefTrailerResolver.nextXrefObj( startByteOffset );
+        xrefTrailerResolver.nextXrefObj( startByteOffset, XRefType.TABLE );
 
         /*
          * Xref tables can have multiple sections.
@@ -880,7 +882,7 @@ public class PDFParser extends BaseParse
      */
     public void parseXrefStream( COSStream stream, long objByteOffset ) throws IOException
     {
-        xrefTrailerResolver.nextXrefObj( objByteOffset );
+        xrefTrailerResolver.nextXrefObj( objByteOffset, XRefType.STREAM );
         xrefTrailerResolver.setTrailer( stream );
         PDFXrefStreamParser parser =
             new PDFXrefStreamParser( stream, document, forceParsing, xrefTrailerResolver );

Modified: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/XrefTrailerResolver.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/XrefTrailerResolver.java?rev=1561111&r1=1561110&r2=1561111&view=diff
==============================================================================
--- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/XrefTrailerResolver.java (original)
+++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/XrefTrailerResolver.java Fri Jan 24 18:57:47 2014
@@ -31,7 +31,6 @@ import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.pdfbox.cos.COSDictionary;
 import org.apache.pdfbox.cos.COSName;
-import org.apache.pdfbox.io.IOUtils;
 import org.apache.pdfbox.persistence.util.COSObjectKey;
 
 /**
@@ -63,6 +62,9 @@ public class XrefTrailerResolver
     private class XrefTrailerObj
     {
         protected COSDictionary trailer = null;
+
+        private XRefType xrefType;
+
         private final Map<COSObjectKey, Long> xrefTable = new HashMap<COSObjectKey, Long>();
         
         /**
@@ -77,18 +79,33 @@ public class XrefTrailerResolver
          */
         public void clearResources()
         {
-        	if (trailer != null)
-        	{
-        		trailer.clear();
-        		trailer = null;
-        	}
-        	if (xrefTable != null)
-        	{
-        		xrefTable.clear();
-        	}
+            if (trailer != null)
+            {
+                trailer.clear();
+                trailer = null;
+            }
+            if (xrefTable != null)
+            {
+                xrefTable.clear();
+            }
         }
     }
 
+    /** 
+     * The XRefType of a trailer.
+     */
+    public enum XRefType
+    {
+        /**
+         * XRef table type.
+         */
+        TABLE, 
+        /**
+         * XRef stream type.
+         */
+        STREAM;
+    }
+    
     private final Map<Long, XrefTrailerObj> bytePosToXrefMap = new HashMap<Long, XrefTrailerObj>();
     private XrefTrailerObj curXrefTrailerObj   = null;
     private XrefTrailerObj resolvedXrefTrailer = null;
@@ -96,33 +113,60 @@ public class XrefTrailerResolver
     /** Log instance. */
     private static final Log LOG = LogFactory.getLog( XrefTrailerResolver.class );
 
-    public final COSDictionary getFirstTrailer() {
-    	if (bytePosToXrefMap.isEmpty()) return null;
-    	
-    	Set<Long> offsets = bytePosToXrefMap.keySet();
-    	SortedSet<Long> sortedOffset = new TreeSet<Long>(offsets);
-    	return bytePosToXrefMap.get(sortedOffset.first()).trailer;
+    /**
+     * Returns the first trailer if at least one exists.
+     * 
+     * @return the first trailer or null
+     */
+    public final COSDictionary getFirstTrailer() 
+    {
+        if (bytePosToXrefMap.isEmpty())
+        {
+            return null;
+        }
+        Set<Long> offsets = bytePosToXrefMap.keySet();
+        SortedSet<Long> sortedOffset = new TreeSet<Long>(offsets);
+        return bytePosToXrefMap.get(sortedOffset.first()).trailer;
     }
     
-    public final COSDictionary getLastTrailer() {
-    	if (bytePosToXrefMap.isEmpty()) return null;
-    	
-    	Set<Long> offsets = bytePosToXrefMap.keySet();
-    	SortedSet<Long> sortedOffset = new TreeSet<Long>(offsets);
-    	return bytePosToXrefMap.get(sortedOffset.last()).trailer;
+    /**
+     * Returns the last trailer if at least one exists.
+     * 
+     * @return the last trailer ir null
+     */
+    public final COSDictionary getLastTrailer() 
+    {
+        if (bytePosToXrefMap.isEmpty()) 
+        {
+            return null;
+        }
+        Set<Long> offsets = bytePosToXrefMap.keySet();
+        SortedSet<Long> sortedOffset = new TreeSet<Long>(offsets);
+        return bytePosToXrefMap.get(sortedOffset.last()).trailer;
     }
     
     /**
      * Signals that a new XRef object (table or stream) starts.
      * @param startBytePos the offset to start at
-     *
+     * @param type the type of the Xref object
      */
-    public void nextXrefObj( final long startBytePos )
+    public void nextXrefObj( final long startBytePos, XRefType type )
     {
         bytePosToXrefMap.put( startBytePos, curXrefTrailerObj = new XrefTrailerObj() );
+        curXrefTrailerObj.xrefType = type;
     }
 
     /**
+     * Returns the XRefTxpe of the resolved trailer.
+     * 
+     * @return the XRefType or null.
+     */
+    public XRefType getXrefType()
+    { 
+        return ( resolvedXrefTrailer == null ) ? null : resolvedXrefTrailer.xrefType; 
+    } 
+    
+    /**
      * Populate XRef HashMap of current XRef object.
      * Will add an Xreftable entry that maps ObjectKeys to byte offsets in the file.
      * @param objKey The objkey, with id and gen numbers
@@ -306,20 +350,20 @@ public class XrefTrailerResolver
      */
     public void clearResources()
     {
-    	if (curXrefTrailerObj != null)
-    	{
-    		curXrefTrailerObj.clearResources();
-    		curXrefTrailerObj = null;
-    	}
-    	if (resolvedXrefTrailer != null)
-    	{
-    		resolvedXrefTrailer.clearResources();
-    		resolvedXrefTrailer = null;
-    	}
-    	if (bytePosToXrefMap != null)
-    	{
-    		bytePosToXrefMap.clear();
-    	}
+        if (curXrefTrailerObj != null)
+        {
+            curXrefTrailerObj.clearResources();
+            curXrefTrailerObj = null;
+        }
+        if (resolvedXrefTrailer != null)
+        {
+            resolvedXrefTrailer.clearResources();
+            resolvedXrefTrailer = null;
+        }
+        if (bytePosToXrefMap != null)
+        {
+            bytePosToXrefMap.clear();
+        }
     }
 
 }

Modified: pdfbox/trunk/preflight/src/main/java/org/apache/pdfbox/preflight/parser/PreflightParser.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/preflight/src/main/java/org/apache/pdfbox/preflight/parser/PreflightParser.java?rev=1561111&r1=1561110&r2=1561111&view=diff
==============================================================================
--- pdfbox/trunk/preflight/src/main/java/org/apache/pdfbox/preflight/parser/PreflightParser.java (original)
+++ pdfbox/trunk/preflight/src/main/java/org/apache/pdfbox/preflight/parser/PreflightParser.java Fri Jan 24 18:57:47 2014
@@ -76,6 +76,7 @@ import org.apache.pdfbox.pdfparser.BaseP
 import org.apache.pdfbox.pdfparser.NonSequentialPDFParser;
 import org.apache.pdfbox.pdfparser.PDFObjectStreamParser;
 import org.apache.pdfbox.pdfparser.PDFParser;
+import org.apache.pdfbox.pdfparser.XrefTrailerResolver.XRefType;
 import org.apache.pdfbox.pdmodel.PDDocument;
 import org.apache.pdfbox.persistence.util.COSObjectKey;
 import org.apache.pdfbox.preflight.Format;
@@ -346,7 +347,7 @@ public class PreflightParser extends Non
         }
 
         // signal start of new XRef
-        xrefTrailerResolver.nextXrefObj(startByteOffset);
+        xrefTrailerResolver.nextXrefObj(startByteOffset,XRefType.TABLE);
 
         /*
          * Xref tables can have multiple sections. Each starts with a starting object id and a count.