You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pdfbox.apache.org by le...@apache.org on 2017/10/01 10:52:21 UTC
svn commit: r1810261 -
/pdfbox/branches/2.0/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/COSParser.java
Author: lehmi
Date: Sun Oct 1 10:52:20 2017
New Revision: 1810261
URL: http://svn.apache.org/viewvc?rev=1810261&view=rev
Log:
PDFBOX-3936: improve end of object detection if the keyword "endobj" object is cut off in the middle
Modified:
pdfbox/branches/2.0/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/COSParser.java
Modified: pdfbox/branches/2.0/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/COSParser.java
URL: http://svn.apache.org/viewvc/pdfbox/branches/2.0/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/COSParser.java?rev=1810261&r1=1810260&r2=1810261&view=diff
==============================================================================
--- pdfbox/branches/2.0/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/COSParser.java (original)
+++ pdfbox/branches/2.0/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/COSParser.java Sun Oct 1 10:52:20 2017
@@ -1414,8 +1414,8 @@ public class COSParser extends BaseParse
int lastGenID = Integer.MIN_VALUE;
long lastObjOffset = Long.MIN_VALUE;
char[] objString = " obj".toCharArray();
- char[] endobjString = "endobj".toCharArray();
- boolean endobjFound = false;
+ char[] endobjString = "endo".toCharArray();
+ boolean endOfObjFound = false;
do
{
source.seek(currentOffset);
@@ -1457,20 +1457,23 @@ public class COSParser extends BaseParse
lastGenID = genID;
lastObjOffset = tempOffset + 1;
currentOffset += objString.length - 1;
- endobjFound = false;
+ endOfObjFound = false;
}
}
}
}
+ // check for "endo" as abbreviation for "endobj", as the pdf may be cut off
+ // in the middle of the keyword, see PDFBOX-3936.
+ // We could possibly implement a more intelligent algorithm if necessary
else if (isString(endobjString))
{
- endobjFound = true;
+ endOfObjFound = true;
currentOffset += endobjString.length - 1;
}
currentOffset++;
}
while (currentOffset < lastEOFMarker && !source.isEOF());
- if ((lastEOFMarker < Long.MAX_VALUE || endobjFound) && lastObjOffset > 0)
+ if ((lastEOFMarker < Long.MAX_VALUE || endOfObjFound) && lastObjOffset > 0)
{
// if the pdf wasn't cut off in the middle or if the last object ends with a "endobj" marker
// the last object id has to be added here so that it can't get lost as there isn't any subsequent