You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pdfbox.apache.org by le...@apache.org on 2015/02/23 20:00:52 UTC

svn commit: r1661747 - in /pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser: COSParser.java PDFParser.java

Author: lehmi
Date: Mon Feb 23 19:00:52 2015
New Revision: 1661747

URL: http://svn.apache.org/r1661747
Log:
PDFBOX-2527: rebuild trailer instead of brute force search for startxref

Modified:
    pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/COSParser.java
    pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/PDFParser.java

Modified: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/COSParser.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/COSParser.java?rev=1661747&r1=1661746&r2=1661747&view=diff
==============================================================================
--- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/COSParser.java (original)
+++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/COSParser.java Mon Feb 23 19:00:52 2015
@@ -386,28 +386,20 @@ public class COSParser extends BaseParse
         }
         // find last startxref preceding EOF marker
         bufOff = lastIndexOf(STARTXREF, buf, bufOff);
-        long startXRefOffset = -1;
+        long startXRefOffset = skipBytes + bufOff;
 
         if (bufOff < 0)
         {
             if (isLenient) 
             {
-            	// brute force search for startxref
-                startXRefOffset = bfSearchForStartXref();
-                if (startXRefOffset > -1)
-                {
-                    LOG.debug("Fixed offset for startxref " + startXRefOffset);
-                }
+            	LOG.debug("Can't find offset for startxref");
+            	return -1;
             }
             else
             {
                 throw new IOException("Missing 'startxref' marker.");
             }
         }
-        else
-        {
-        	startXRefOffset = skipBytes + bufOff;
-        }
         return startXRefOffset;
     }
     
@@ -1518,6 +1510,67 @@ public class COSParser extends BaseParse
             pdfSource.seek(originOffset);
         }
     }
+    
+    /**
+     * Rebuild the trailer dictionary if startxref can't be found.
+     *  
+     * @return the rebuild trailer dictionary
+     * 
+     * @throws IOException if something went wrong
+     */
+    protected final COSDictionary rebuildTrailer() throws IOException
+    {
+    	COSDictionary trailer = null;
+    	bfSearchForObjects();
+    	if (bfSearchCOSObjectKeyOffsets != null)
+    	{
+            xrefTrailerResolver.nextXrefObj( 0, XRefType.TABLE );
+            for (COSObjectKey objectKey : bfSearchCOSObjectKeyOffsets.keySet())
+            {
+                xrefTrailerResolver.setXRef(objectKey, bfSearchCOSObjectKeyOffsets.get(objectKey));
+            }
+            xrefTrailerResolver.setStartxref(0);
+    		trailer = xrefTrailerResolver.getTrailer();
+    		getDocument().setTrailer(trailer);
+    		for(COSObjectKey key : bfSearchCOSObjectKeyOffsets.keySet())
+    		{
+    			Long offset = bfSearchCOSObjectKeyOffsets.get(key);
+    			pdfSource.seek(offset);
+    	        readObjectNumber();
+    	        readGenerationNumber();
+    	        readExpectedString(OBJ_MARKER, true);
+    			COSDictionary dictionary = null;
+    			try
+    			{
+    				dictionary = parseCOSDictionary();
+	    			if (dictionary != null)
+	    			{
+	    				if (COSName.CATALOG.equals(dictionary.getCOSName(COSName.TYPE)))
+	    				{
+	    					trailer.setItem(COSName.ROOT, document.getObjectFromPool(key));
+	    				}
+	    				else if (dictionary.containsKey(COSName.TITLE)
+	    						|| dictionary.containsKey(COSName.AUTHOR)
+	    						|| dictionary.containsKey(COSName.SUBJECT)
+	    						|| dictionary.containsKey(COSName.KEYWORDS)
+	    						|| dictionary.containsKey(COSName.CREATOR)
+	    						|| dictionary.containsKey(COSName.PRODUCER)
+	    						|| dictionary.containsKey(COSName.CREATION_DATE))
+	    				{
+	    					trailer.setItem(COSName.INFO, document.getObjectFromPool(key));
+	    				}
+	    				// TODO find/assign Encrypt entry
+	    			}
+    			}
+    			catch(IOException exception)
+    			{
+    				LOG.error("Skipped invalid dictionary for object "+key);
+    			}
+    		}
+    	}
+    	return trailer;
+    }
+    
     /**
      * This will parse the startxref section from the stream.
      * The startxref value is ignored.
@@ -1539,31 +1592,6 @@ public class COSParser extends BaseParse
     }
 
     /**
-     * Brute force search for startxref.
-     * 
-     * @return the offset of startxref  
-     * 
-     * @throws IOException if something went wrong
-     */
-    private long bfSearchForStartXref() throws IOException
-    {
-    	long newOffset = -1;
-    	long originOffset = pdfSource.getOffset();
-        pdfSource.seek(MINIMUM_SEARCH_OFFSET);
-        while (!pdfSource.isEOF())
-        {
-            if (isString(STARTXREF))
-            {
-            	newOffset = pdfSource.getOffset(); 
-            	break;
-            }
-            pdfSource.read();
-        }
-        pdfSource.seek(originOffset);
-        return newOffset;
-    }
-
-    /**
      * This will parse the trailer from the stream and add it to the state.
      *
      * @return false on parsing error

Modified: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/PDFParser.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/PDFParser.java?rev=1661747&r1=1661746&r2=1661747&view=diff
==============================================================================
--- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/PDFParser.java (original)
+++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/PDFParser.java Mon Feb 23 19:00:52 2015
@@ -315,7 +315,7 @@ public class PDFParser extends COSParser
         }
         else if (isLenient())
         {
-            trailer = searchXref(0);
+        	trailer = rebuildTrailer();
         }
         // prepare decryption if necessary
         prepareDecryption();