You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pdfbox.apache.org by le...@apache.org on 2017/11/05 13:23:18 UTC

svn commit: r1814355 - /pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/COSParser.java

Author: lehmi
Date: Sun Nov  5 13:23:18 2017
New Revision: 1814355

URL: http://svn.apache.org/viewvc?rev=1814355&view=rev
Log:
PDFBOX-3956: optimized endobj detection and brute force search for objects

Modified:
    pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/COSParser.java

Modified: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/COSParser.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/COSParser.java?rev=1814355&r1=1814354&r2=1814355&view=diff
==============================================================================
--- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/COSParser.java (original)
+++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/COSParser.java Sun Nov  5 13:23:18 2017
@@ -1468,15 +1468,17 @@ public class COSParser extends BaseParse
         long lastObjectId = Long.MIN_VALUE;
         int lastGenID = Integer.MIN_VALUE;
         long lastObjOffset = Long.MIN_VALUE;
-        char[] objString = " obj".toCharArray();
-        char[] endobjString = "endo".toCharArray();
+        char[] endobjString = "ndo".toCharArray();
+        char[] endobjRemainingString = "bj".toCharArray();
         boolean endOfObjFound = false;
         do
         {
             source.seek(currentOffset);
-            if (isString(objString))
+            int nextChar = source.read();
+            currentOffset++;
+            if (nextChar == ' ' && isString(OBJ_MARKER))
             {
-                long tempOffset = currentOffset - 1;
+                long tempOffset = currentOffset - 2;
                 source.seek(tempOffset);
                 int genID = source.peek();
                 // is the next char a digit?
@@ -1510,7 +1512,7 @@ public class COSParser extends BaseParse
                             lastObjectId = objectId;
                             lastGenID = genID;
                             lastObjOffset = tempOffset + 1;
-                            currentOffset += objString.length - 1;
+                            currentOffset += OBJ_MARKER.length - 1;
                             endOfObjFound = false;
                         }
                     }
@@ -1519,12 +1521,22 @@ public class COSParser extends BaseParse
             // check for "endo" as abbreviation for "endobj", as the pdf may be cut off
             // in the middle of the keyword, see PDFBOX-3936.
             // We could possibly implement a more intelligent algorithm if necessary
-            else if (isString(endobjString))
+            else if (nextChar == 'e' && isString(endobjString))
             {
-                endOfObjFound = true;
-                currentOffset += endobjString.length - 1;
+                currentOffset += endobjString.length;
+                source.seek(currentOffset);
+                if (source.isEOF())
+                {
+                    endOfObjFound = true;
+                    continue;
+                }
+                if (isString(endobjRemainingString))
+                {
+                    currentOffset += endobjRemainingString.length;
+                    endOfObjFound = true;
+                    continue;
+                }
             }
-            currentOffset++;
         } while (currentOffset < lastEOFMarker && !source.isEOF());
         if ((lastEOFMarker < Long.MAX_VALUE || endOfObjFound) && lastObjOffset > 0)
         {