You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pdfbox.apache.org by le...@apache.org on 2015/02/23 20:00:52 UTC
svn commit: r1661747 - in
/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser:
COSParser.java PDFParser.java
Author: lehmi
Date: Mon Feb 23 19:00:52 2015
New Revision: 1661747
URL: http://svn.apache.org/r1661747
Log:
PDFBOX-2527: rebuild trailer instead of brute force search for startxref
Modified:
pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/COSParser.java
pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/PDFParser.java
Modified: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/COSParser.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/COSParser.java?rev=1661747&r1=1661746&r2=1661747&view=diff
==============================================================================
--- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/COSParser.java (original)
+++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/COSParser.java Mon Feb 23 19:00:52 2015
@@ -386,28 +386,20 @@ public class COSParser extends BaseParse
}
// find last startxref preceding EOF marker
bufOff = lastIndexOf(STARTXREF, buf, bufOff);
- long startXRefOffset = -1;
+ long startXRefOffset = skipBytes + bufOff;
if (bufOff < 0)
{
if (isLenient)
{
- // brute force search for startxref
- startXRefOffset = bfSearchForStartXref();
- if (startXRefOffset > -1)
- {
- LOG.debug("Fixed offset for startxref " + startXRefOffset);
- }
+ LOG.debug("Can't find offset for startxref");
+ return -1;
}
else
{
throw new IOException("Missing 'startxref' marker.");
}
}
- else
- {
- startXRefOffset = skipBytes + bufOff;
- }
return startXRefOffset;
}
@@ -1518,6 +1510,67 @@ public class COSParser extends BaseParse
pdfSource.seek(originOffset);
}
}
+
+ /**
+ * Rebuild the trailer dictionary if startxref can't be found.
+ *
+ * @return the rebuild trailer dictionary
+ *
+ * @throws IOException if something went wrong
+ */
+ protected final COSDictionary rebuildTrailer() throws IOException
+ {
+ COSDictionary trailer = null;
+ bfSearchForObjects();
+ if (bfSearchCOSObjectKeyOffsets != null)
+ {
+ xrefTrailerResolver.nextXrefObj( 0, XRefType.TABLE );
+ for (COSObjectKey objectKey : bfSearchCOSObjectKeyOffsets.keySet())
+ {
+ xrefTrailerResolver.setXRef(objectKey, bfSearchCOSObjectKeyOffsets.get(objectKey));
+ }
+ xrefTrailerResolver.setStartxref(0);
+ trailer = xrefTrailerResolver.getTrailer();
+ getDocument().setTrailer(trailer);
+ for(COSObjectKey key : bfSearchCOSObjectKeyOffsets.keySet())
+ {
+ Long offset = bfSearchCOSObjectKeyOffsets.get(key);
+ pdfSource.seek(offset);
+ readObjectNumber();
+ readGenerationNumber();
+ readExpectedString(OBJ_MARKER, true);
+ COSDictionary dictionary = null;
+ try
+ {
+ dictionary = parseCOSDictionary();
+ if (dictionary != null)
+ {
+ if (COSName.CATALOG.equals(dictionary.getCOSName(COSName.TYPE)))
+ {
+ trailer.setItem(COSName.ROOT, document.getObjectFromPool(key));
+ }
+ else if (dictionary.containsKey(COSName.TITLE)
+ || dictionary.containsKey(COSName.AUTHOR)
+ || dictionary.containsKey(COSName.SUBJECT)
+ || dictionary.containsKey(COSName.KEYWORDS)
+ || dictionary.containsKey(COSName.CREATOR)
+ || dictionary.containsKey(COSName.PRODUCER)
+ || dictionary.containsKey(COSName.CREATION_DATE))
+ {
+ trailer.setItem(COSName.INFO, document.getObjectFromPool(key));
+ }
+ // TODO find/assign Encrypt entry
+ }
+ }
+ catch(IOException exception)
+ {
+ LOG.error("Skipped invalid dictionary for object "+key);
+ }
+ }
+ }
+ return trailer;
+ }
+
/**
* This will parse the startxref section from the stream.
* The startxref value is ignored.
@@ -1539,31 +1592,6 @@ public class COSParser extends BaseParse
}
/**
- * Brute force search for startxref.
- *
- * @return the offset of startxref
- *
- * @throws IOException if something went wrong
- */
- private long bfSearchForStartXref() throws IOException
- {
- long newOffset = -1;
- long originOffset = pdfSource.getOffset();
- pdfSource.seek(MINIMUM_SEARCH_OFFSET);
- while (!pdfSource.isEOF())
- {
- if (isString(STARTXREF))
- {
- newOffset = pdfSource.getOffset();
- break;
- }
- pdfSource.read();
- }
- pdfSource.seek(originOffset);
- return newOffset;
- }
-
- /**
* This will parse the trailer from the stream and add it to the state.
*
* @return false on parsing error
Modified: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/PDFParser.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/PDFParser.java?rev=1661747&r1=1661746&r2=1661747&view=diff
==============================================================================
--- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/PDFParser.java (original)
+++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/PDFParser.java Mon Feb 23 19:00:52 2015
@@ -315,7 +315,7 @@ public class PDFParser extends COSParser
}
else if (isLenient())
{
- trailer = searchXref(0);
+ trailer = rebuildTrailer();
}
// prepare decryption if necessary
prepareDecryption();