You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pdfbox.apache.org by le...@apache.org on 2015/02/22 16:12:42 UTC

svn commit: r1661485 - in /pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser: COSParser.java PDFParser.java

Author: lehmi
Date: Sun Feb 22 15:12:41 2015
New Revision: 1661485

URL: http://svn.apache.org/r1661485
Log:
PDFBOX-2527: added a brute force search for startxref

Modified:
    pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/COSParser.java
    pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/PDFParser.java

Modified: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/COSParser.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/COSParser.java?rev=1661485&r1=1661484&r2=1661485&view=diff
==============================================================================
--- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/COSParser.java (original)
+++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/COSParser.java Sun Feb 22 15:12:41 2015
@@ -99,20 +99,10 @@ public class COSParser extends BaseParse
      */
     protected static final char[] EOF_MARKER = new char[] { '%', '%', 'E', 'O', 'F' };
     /**
-     * StartXRef-marker.
-     */
-    protected static final char[] STARTXREF_MARKER = new char[] { 's', 't', 'a', 'r', 't', 'x',
-            'r', 'e', 'f' };
-    /**
      * obj-marker.
      */
     protected static final char[] OBJ_MARKER = new char[] { 'o', 'b', 'j' };
 
-    /**
-     * trailer-marker.
-     */
-    private static final char[] TRAILER_MARKER = new char[] { 't', 'r', 'a', 'i', 'l', 'e', 'r' };
-    
     private long trailerOffset;
     
     /**
@@ -224,7 +214,7 @@ public class COSParser extends BaseParse
     protected COSDictionary parseXref(long startXRefOffset) throws IOException
     {
         pdfSource.seek(startXRefOffset);
-        long startXrefOffset = parseStartXref();
+        long startXrefOffset = Math.max(0, parseStartXref());
         // check the startxref offset
         long fixedOffset = checkXRefOffset(startXrefOffset);
         if (fixedOffset > -1)
@@ -271,7 +261,7 @@ public class COSParser extends BaseParse
                 {
                     int streamOffset = trailer.getInt(COSName.XREF_STM);
                     // check the xref stream reference
-                    fixedOffset = checkXRefStreamOffset(streamOffset);
+                    fixedOffset = checkXRefStreamOffset(streamOffset, false);
                     if (fixedOffset > -1 && fixedOffset != streamOffset)
                     {
                         streamOffset = (int)fixedOffset;
@@ -395,25 +385,30 @@ public class COSParser extends BaseParse
             }
         }
         // find last startxref preceding EOF marker
-        bufOff = lastIndexOf(STARTXREF_MARKER, buf, bufOff);
+        bufOff = lastIndexOf(STARTXREF, buf, bufOff);
+        long startXRefOffset = -1;
 
         if (bufOff < 0)
         {
             if (isLenient) 
             {
-                trailerOffset = lastIndexOf(TRAILER_MARKER, buf, buf.length);
-                if (trailerOffset > 0)
+            	// brute force search for startxref
+                startXRefOffset = bfSearchForStartXref();
+                if (startXRefOffset > -1)
                 {
-                    trailerOffset += skipBytes;
+                    LOG.debug("Fixed offset for startxref " + startXRefOffset);
                 }
-                return -1;
             }
             else
             {
                 throw new IOException("Missing 'startxref' marker.");
             }
         }
-        return skipBytes + bufOff;
+        else
+        {
+        	startXRefOffset = skipBytes + bufOff;
+        }
+        return startXRefOffset;
     }
     
     /**
@@ -1076,10 +1071,13 @@ public class COSParser extends BaseParse
         {
             return startXRefOffset;
         }
-        long fixedOffset = checkXRefStreamOffset(startXRefOffset);
-        if (fixedOffset > -1)
+        if (startXRefOffset > 0)
         {
-        	return fixedOffset;
+	        long fixedOffset = checkXRefStreamOffset(startXRefOffset, true);
+	        if (fixedOffset > -1)
+	        {
+	        	return fixedOffset;
+	        }
         }
         // try to find a fixed offset
         return calculateXRefFixedOffset(startXRefOffset, false);
@@ -1092,10 +1090,10 @@ public class COSParser extends BaseParse
      * @return the revised offset
      * @throws IOException
      */
-    private long checkXRefStreamOffset(long startXRefOffset) throws IOException
+    private long checkXRefStreamOffset(long startXRefOffset, boolean checkOnly) throws IOException
     {
         // repair mode isn't available in non-lenient mode
-        if (!isLenient)
+        if (!isLenient || startXRefOffset == 0)
         {
             return startXRefOffset;
         }
@@ -1127,7 +1125,7 @@ public class COSParser extends BaseParse
             }
         }
         // try to find a fixed offset
-        return calculateXRefFixedOffset(startXRefOffset, true);
+        return checkOnly ? -1 : calculateXRefFixedOffset(startXRefOffset, true);
     }
     /**
      * Try to find a fixed offset for the given xref table/stream.
@@ -1541,6 +1539,31 @@ public class COSParser extends BaseParse
     }
 
     /**
+     * Brute force search for startxref.
+     * 
+     * @return the offset of startxref  
+     * 
+     * @throws IOException if something went wrong
+     */
+    private long bfSearchForStartXref() throws IOException
+    {
+    	long newOffset = -1;
+    	long originOffset = pdfSource.getOffset();
+        pdfSource.seek(MINIMUM_SEARCH_OFFSET);
+        while (!pdfSource.isEOF())
+        {
+            if (isString(STARTXREF))
+            {
+            	newOffset = pdfSource.getOffset(); 
+            	break;
+            }
+            pdfSource.read();
+        }
+        pdfSource.seek(originOffset);
+        return newOffset;
+    }
+
+    /**
      * This will parse the trailer from the stream and add it to the state.
      *
      * @return false on parsing error

Modified: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/PDFParser.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/PDFParser.java?rev=1661485&r1=1661484&r2=1661485&view=diff
==============================================================================
--- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/PDFParser.java (original)
+++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/PDFParser.java Sun Feb 22 15:12:41 2015
@@ -309,7 +309,7 @@ public class PDFParser extends COSParser
         COSDictionary trailer = null;
         // parse startxref
         long startXRefOffset = getStartxrefOffset();
-        if (startXRefOffset > 0)
+        if (startXRefOffset > -1)
         {
             trailer = parseXref(startXRefOffset);
         }