You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pdfbox.apache.org by le...@apache.org on 2017/11/05 13:09:09 UTC

svn commit: r1814354 - /pdfbox/branches/2.0/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/COSParser.java

Author: lehmi
Date: Sun Nov  5 13:09:09 2017
New Revision: 1814354

URL: http://svn.apache.org/viewvc?rev=1814354&view=rev
Log:
PDFBOX-3956: optimized endobj detection and brute force search for objects

Modified:
    pdfbox/branches/2.0/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/COSParser.java

Modified: pdfbox/branches/2.0/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/COSParser.java
URL: http://svn.apache.org/viewvc/pdfbox/branches/2.0/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/COSParser.java?rev=1814354&r1=1814353&r2=1814354&view=diff
==============================================================================
--- pdfbox/branches/2.0/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/COSParser.java (original)
+++ pdfbox/branches/2.0/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/COSParser.java Sun Nov  5 13:09:09 2017
@@ -1467,15 +1467,17 @@ public class COSParser extends BaseParse
             long lastObjectId = Long.MIN_VALUE;
             int lastGenID = Integer.MIN_VALUE;
             long lastObjOffset = Long.MIN_VALUE;
-            char[] objString = " obj".toCharArray();
-            char[] endobjString = "endo".toCharArray();
+            char[] endobjString = "ndo".toCharArray();
+            char[] endobjRemainingString = "bj".toCharArray();
             boolean endOfObjFound = false;
             do
             {
                 source.seek(currentOffset);
-                if (isString(objString))
+                int nextChar = source.read();
+                currentOffset++;
+                if (nextChar == ' ' && isString(OBJ_MARKER))
                 {
-                    long tempOffset = currentOffset - 1;
+                    long tempOffset = currentOffset - 2;
                     source.seek(tempOffset);
                     int genID = source.peek();
                     // is the next char a digit?
@@ -1510,7 +1512,7 @@ public class COSParser extends BaseParse
                                 lastObjectId = objectId;
                                 lastGenID = genID;
                                 lastObjOffset = tempOffset + 1;
-                                currentOffset += objString.length - 1;
+                                currentOffset += OBJ_MARKER.length - 1;
                                 endOfObjFound = false;
                             }
                         }
@@ -1519,12 +1521,22 @@ public class COSParser extends BaseParse
                 // check for "endo" as abbreviation for "endobj", as the pdf may be cut off
                 // in the middle of the keyword, see PDFBOX-3936.
                 // We could possibly implement a more intelligent algorithm if necessary
-                else if (isString(endobjString))
+                else if (nextChar == 'e' && isString(endobjString))
                 {
-                    endOfObjFound = true;
-                    currentOffset += endobjString.length - 1;
+                    currentOffset += endobjString.length;
+                    source.seek(currentOffset);
+                    if (source.isEOF())
+                    {
+                        endOfObjFound = true;
+                        continue;
+                    }
+                    if (isString(endobjRemainingString))
+                    {
+                        currentOffset += endobjRemainingString.length;
+                        endOfObjFound = true;
+                        continue;
+                    }
                 }
-                currentOffset++;
             }
             while (currentOffset < lastEOFMarker && !source.isEOF());
             if ((lastEOFMarker < Long.MAX_VALUE || endOfObjFound) && lastObjOffset > 0)