You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pdfbox.apache.org by le...@apache.org on 2017/11/05 13:09:09 UTC
svn commit: r1814354 -
/pdfbox/branches/2.0/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/COSParser.java
Author: lehmi
Date: Sun Nov 5 13:09:09 2017
New Revision: 1814354
URL: http://svn.apache.org/viewvc?rev=1814354&view=rev
Log:
PDFBOX-3956: optimized endobj detection and brute force search for objects
Modified:
pdfbox/branches/2.0/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/COSParser.java
Modified: pdfbox/branches/2.0/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/COSParser.java
URL: http://svn.apache.org/viewvc/pdfbox/branches/2.0/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/COSParser.java?rev=1814354&r1=1814353&r2=1814354&view=diff
==============================================================================
--- pdfbox/branches/2.0/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/COSParser.java (original)
+++ pdfbox/branches/2.0/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/COSParser.java Sun Nov 5 13:09:09 2017
@@ -1467,15 +1467,17 @@ public class COSParser extends BaseParse
long lastObjectId = Long.MIN_VALUE;
int lastGenID = Integer.MIN_VALUE;
long lastObjOffset = Long.MIN_VALUE;
- char[] objString = " obj".toCharArray();
- char[] endobjString = "endo".toCharArray();
+ char[] endobjString = "ndo".toCharArray();
+ char[] endobjRemainingString = "bj".toCharArray();
boolean endOfObjFound = false;
do
{
source.seek(currentOffset);
- if (isString(objString))
+ int nextChar = source.read();
+ currentOffset++;
+ if (nextChar == ' ' && isString(OBJ_MARKER))
{
- long tempOffset = currentOffset - 1;
+ long tempOffset = currentOffset - 2;
source.seek(tempOffset);
int genID = source.peek();
// is the next char a digit?
@@ -1510,7 +1512,7 @@ public class COSParser extends BaseParse
lastObjectId = objectId;
lastGenID = genID;
lastObjOffset = tempOffset + 1;
- currentOffset += objString.length - 1;
+ currentOffset += OBJ_MARKER.length - 1;
endOfObjFound = false;
}
}
@@ -1519,12 +1521,22 @@ public class COSParser extends BaseParse
// check for "endo" as abbreviation for "endobj", as the pdf may be cut off
// in the middle of the keyword, see PDFBOX-3936.
// We could possibly implement a more intelligent algorithm if necessary
- else if (isString(endobjString))
+ else if (nextChar == 'e' && isString(endobjString))
{
- endOfObjFound = true;
- currentOffset += endobjString.length - 1;
+ currentOffset += endobjString.length;
+ source.seek(currentOffset);
+ if (source.isEOF())
+ {
+ endOfObjFound = true;
+ continue;
+ }
+ if (isString(endobjRemainingString))
+ {
+ currentOffset += endobjRemainingString.length;
+ endOfObjFound = true;
+ continue;
+ }
}
- currentOffset++;
}
while (currentOffset < lastEOFMarker && !source.isEOF());
if ((lastEOFMarker < Long.MAX_VALUE || endOfObjFound) && lastObjOffset > 0)