You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pdfbox.apache.org by le...@apache.org on 2015/02/22 16:12:42 UTC
svn commit: r1661485 - in
/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser:
COSParser.java PDFParser.java
Author: lehmi
Date: Sun Feb 22 15:12:41 2015
New Revision: 1661485
URL: http://svn.apache.org/r1661485
Log:
PDFBOX-2527: added a brute force search for startxref
Modified:
pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/COSParser.java
pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/PDFParser.java
Modified: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/COSParser.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/COSParser.java?rev=1661485&r1=1661484&r2=1661485&view=diff
==============================================================================
--- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/COSParser.java (original)
+++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/COSParser.java Sun Feb 22 15:12:41 2015
@@ -99,20 +99,10 @@ public class COSParser extends BaseParse
*/
protected static final char[] EOF_MARKER = new char[] { '%', '%', 'E', 'O', 'F' };
/**
- * StartXRef-marker.
- */
- protected static final char[] STARTXREF_MARKER = new char[] { 's', 't', 'a', 'r', 't', 'x',
- 'r', 'e', 'f' };
- /**
* obj-marker.
*/
protected static final char[] OBJ_MARKER = new char[] { 'o', 'b', 'j' };
- /**
- * trailer-marker.
- */
- private static final char[] TRAILER_MARKER = new char[] { 't', 'r', 'a', 'i', 'l', 'e', 'r' };
-
private long trailerOffset;
/**
@@ -224,7 +214,7 @@ public class COSParser extends BaseParse
protected COSDictionary parseXref(long startXRefOffset) throws IOException
{
pdfSource.seek(startXRefOffset);
- long startXrefOffset = parseStartXref();
+ long startXrefOffset = Math.max(0, parseStartXref());
// check the startxref offset
long fixedOffset = checkXRefOffset(startXrefOffset);
if (fixedOffset > -1)
@@ -271,7 +261,7 @@ public class COSParser extends BaseParse
{
int streamOffset = trailer.getInt(COSName.XREF_STM);
// check the xref stream reference
- fixedOffset = checkXRefStreamOffset(streamOffset);
+ fixedOffset = checkXRefStreamOffset(streamOffset, false);
if (fixedOffset > -1 && fixedOffset != streamOffset)
{
streamOffset = (int)fixedOffset;
@@ -395,25 +385,30 @@ public class COSParser extends BaseParse
}
}
// find last startxref preceding EOF marker
- bufOff = lastIndexOf(STARTXREF_MARKER, buf, bufOff);
+ bufOff = lastIndexOf(STARTXREF, buf, bufOff);
+ long startXRefOffset = -1;
if (bufOff < 0)
{
if (isLenient)
{
- trailerOffset = lastIndexOf(TRAILER_MARKER, buf, buf.length);
- if (trailerOffset > 0)
+ // brute force search for startxref
+ startXRefOffset = bfSearchForStartXref();
+ if (startXRefOffset > -1)
{
- trailerOffset += skipBytes;
+ LOG.debug("Fixed offset for startxref " + startXRefOffset);
}
- return -1;
}
else
{
throw new IOException("Missing 'startxref' marker.");
}
}
- return skipBytes + bufOff;
+ else
+ {
+ startXRefOffset = skipBytes + bufOff;
+ }
+ return startXRefOffset;
}
/**
@@ -1076,10 +1071,13 @@ public class COSParser extends BaseParse
{
return startXRefOffset;
}
- long fixedOffset = checkXRefStreamOffset(startXRefOffset);
- if (fixedOffset > -1)
+ if (startXRefOffset > 0)
{
- return fixedOffset;
+ long fixedOffset = checkXRefStreamOffset(startXRefOffset, true);
+ if (fixedOffset > -1)
+ {
+ return fixedOffset;
+ }
}
// try to find a fixed offset
return calculateXRefFixedOffset(startXRefOffset, false);
@@ -1092,10 +1090,10 @@ public class COSParser extends BaseParse
* @return the revised offset
* @throws IOException
*/
- private long checkXRefStreamOffset(long startXRefOffset) throws IOException
+ private long checkXRefStreamOffset(long startXRefOffset, boolean checkOnly) throws IOException
{
// repair mode isn't available in non-lenient mode
- if (!isLenient)
+ if (!isLenient || startXRefOffset == 0)
{
return startXRefOffset;
}
@@ -1127,7 +1125,7 @@ public class COSParser extends BaseParse
}
}
// try to find a fixed offset
- return calculateXRefFixedOffset(startXRefOffset, true);
+ return checkOnly ? -1 : calculateXRefFixedOffset(startXRefOffset, true);
}
/**
* Try to find a fixed offset for the given xref table/stream.
@@ -1541,6 +1539,31 @@ public class COSParser extends BaseParse
}
/**
+ * Brute force search for startxref.
+ *
+ * @return the offset of startxref
+ *
+ * @throws IOException if something went wrong
+ */
+ private long bfSearchForStartXref() throws IOException
+ {
+ long newOffset = -1;
+ long originOffset = pdfSource.getOffset();
+ pdfSource.seek(MINIMUM_SEARCH_OFFSET);
+ while (!pdfSource.isEOF())
+ {
+ if (isString(STARTXREF))
+ {
+ newOffset = pdfSource.getOffset();
+ break;
+ }
+ pdfSource.read();
+ }
+ pdfSource.seek(originOffset);
+ return newOffset;
+ }
+
+ /**
* This will parse the trailer from the stream and add it to the state.
*
* @return false on parsing error
Modified: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/PDFParser.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/PDFParser.java?rev=1661485&r1=1661484&r2=1661485&view=diff
==============================================================================
--- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/PDFParser.java (original)
+++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/PDFParser.java Sun Feb 22 15:12:41 2015
@@ -309,7 +309,7 @@ public class PDFParser extends COSParser
COSDictionary trailer = null;
// parse startxref
long startXRefOffset = getStartxrefOffset();
- if (startXRefOffset > 0)
+ if (startXRefOffset > -1)
{
trailer = parseXref(startXRefOffset);
}