You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pdfbox.apache.org by le...@apache.org on 2017/10/22 17:32:20 UTC
svn commit: r1812937 -
/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/COSParser.java
Author: lehmi
Date: Sun Oct 22 17:32:20 2017
New Revision: 1812937
URL: http://svn.apache.org/viewvc?rev=1812937&view=rev
Log:
PDFBOX-3957: search for valid trailer entries when rebuilding the trailer
Modified:
pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/COSParser.java
Modified: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/COSParser.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/COSParser.java?rev=1812937&r1=1812936&r2=1812937&view=diff
==============================================================================
--- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/COSParser.java (original)
+++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/COSParser.java Sun Oct 22 17:32:20 2017
@@ -113,6 +113,11 @@ public class COSParser extends BaseParse
protected static final char[] OBJ_MARKER = new char[] { 'o', 'b', 'j' };
/**
+ * trailer-marker.
+ */
+ private static final char[] TRAILER_MARKER = new char[] { 't', 'r', 'a', 'i', 'l', 'e', 'r' };
+
+ /**
* ObjStream-marker.
*/
private static final char[] OBJ_STREAM = new char[] { '/', 'O', 'b', 'j', 'S', 't', 'm' };
@@ -1609,7 +1614,76 @@ public class COSParser extends BaseParse
}
return newValue;
}
-
+
+ /**
+ * Brute force search for all trailer marker.
+ *
+ * @throws IOException if something went wrong
+ */
+ private List<COSObjectKey[]> bfSearchForTrailer() throws IOException
+ {
+ List<COSObjectKey[]> trailerDicts = new ArrayList<COSObjectKey[]>();
+ long originOffset = source.getPosition();
+ source.seek(MINIMUM_SEARCH_OFFSET);
+ while (!source.isEOF())
+ {
+ // search for trailer marker
+ if (isString(TRAILER_MARKER))
+ {
+ source.seek(source.getPosition() + TRAILER_MARKER.length);
+ try
+ {
+ skipSpaces();
+ COSDictionary trailerDict = parseCOSDictionary();
+ COSObjectKey[] trailerKeys = new COSObjectKey[2];
+ if (trailerDict.containsKey(COSName.ROOT))
+ {
+ COSBase rootObj = trailerDict.getItem(COSName.ROOT);
+ if (rootObj instanceof COSObject)
+ {
+ long objNumber = ((COSObject) rootObj).getObjectNumber();
+ int genNumber = ((COSObject) rootObj).getGenerationNumber();
+ trailerKeys[0] = new COSObjectKey(objNumber, genNumber);
+ }
+ }
+ if (trailerDict.containsKey(COSName.INFO))
+ {
+ COSBase infoObj = trailerDict.getItem(COSName.INFO);
+ long objNumber = ((COSObject) infoObj).getObjectNumber();
+ int genNumber = ((COSObject) infoObj).getGenerationNumber();
+ trailerKeys[1] = new COSObjectKey(objNumber, genNumber);
+ }
+ if (trailerKeys[0] != null || trailerKeys[1] != null)
+ {
+ trailerDicts.add(trailerKeys);
+ }
+ }
+ catch (IOException exception)
+ {
+ continue;
+ }
+ }
+ source.read();
+ }
+ source.seek(originOffset);
+ // eliminate double entries
+ int trailerdictsSize = trailerDicts.size();
+ if (trailerdictsSize > 1)
+ {
+ COSObjectKey[] first = trailerDicts.get(0);
+ for (int i = trailerdictsSize - 1; i > 0; i--)
+ {
+ COSObjectKey[] other = trailerDicts.get(i);
+ if (first[0].equals(other[0]) && first[1].equals(other[1]))
+ {
+ trailerDicts.remove(other);
+ }
+ }
+
+ }
+ return trailerDicts;
+ }
+
/**
* Brute force search for the last EOF marker.
*
@@ -1957,75 +2031,96 @@ public class COSParser extends BaseParse
xrefTrailerResolver.setStartxref(0);
trailer = xrefTrailerResolver.getTrailer();
getDocument().setTrailer(trailer);
- // search for the different parts of the trailer dictionary
- for (Entry<COSObjectKey, Long> entry : bfCOSObjectKeyOffsets.entrySet())
+ List<COSObjectKey[]> trailerObjects = bfSearchForTrailer();
+ if (trailerObjects.size() == 1)
{
- Long offset = entry.getValue();
- COSDictionary dictionary = null;
- // handle compressed objects
- if (offset < 0)
+ COSObjectKey[] trailerObj = trailerObjects.get(0);
+ COSObjectKey rootKey = trailerObj[0];
+ Long rootOffset = rootKey != null ? bfSearchCOSObjectKeyOffsets.get(rootKey) : null;
+ COSObjectKey infoKey = trailerObj[1];
+ Long infoOffset = infoKey != null ? bfSearchCOSObjectKeyOffsets.get(infoKey) : null;
+ if (rootKey != null && rootOffset != null)
{
- COSObject compressedObject = document.getObjectFromPool(entry.getKey());
- if (compressedObject.getObject() == null)
- {
- parseObjectStream((int) -offset);
- }
- COSBase baseObject = compressedObject.getObject();
- if (baseObject instanceof COSDictionary)
+ COSDictionary rootDict = retrieveCOSDictionary(rootKey, rootOffset);
+ if (rootDict != null && isCatalog(rootDict))
{
- dictionary = (COSDictionary) baseObject;
+ trailer.setItem(COSName.ROOT, document.getObjectFromPool(rootKey));
}
- else
+ }
+ if (infoKey != null && infoOffset != null)
+ {
+ COSDictionary infoDict = retrieveCOSDictionary(infoKey, infoOffset);
+ if (infoDict != null && isInfo(infoDict))
{
- continue;
+ trailer.setItem(COSName.INFO, document.getObjectFromPool(infoKey));
}
}
- else
+ }
+ else
+ {
+ // search for the different parts of the trailer dictionary
+ for (Entry<COSObjectKey, Long> entry : bfSearchCOSObjectKeyOffsets.entrySet())
{
- source.seek(offset);
- readObjectNumber();
- readGenerationNumber();
- readExpectedString(OBJ_MARKER, true);
- if (source.peek() != '<')
+ COSDictionary dictionary = retrieveCOSDictionary(entry.getKey(), entry.getValue());
+ if (dictionary == null)
{
continue;
}
- try
+ // document catalog
+ if (isCatalog(dictionary))
{
- dictionary = parseCOSDictionary();
+ trailer.setItem(COSName.ROOT, document.getObjectFromPool(entry.getKey()));
}
- catch (IOException exception)
+ // info dictionary
+ else if (isInfo(dictionary))
{
- LOG.debug("Skipped object " + entry.getKey()
- + ", either it's corrupt or not a dictionary");
- continue;
+ trailer.setItem(COSName.INFO, document.getObjectFromPool(entry.getKey()));
}
+ // encryption dictionary, if existing, is lost
+ // We can't run "Algorithm 2" from PDF specification because of missing ID
}
- // document catalog
- if (isCatalog(dictionary))
+ }
+ trailerWasRebuild = true;
+ return trailer;
+ }
+
+ private COSDictionary retrieveCOSDictionary(COSObjectKey key, Long offset) throws IOException
+ {
+ COSDictionary dictionary = null;
+ // handle compressed objects
+ if (offset < 0)
+ {
+ COSObject compressedObject = document.getObjectFromPool(key);
+ if (compressedObject.getObject() == null)
{
- trailer.setItem(COSName.ROOT, document.getObjectFromPool(entry.getKey()));
+ parseObjectStream((int) -offset);
}
- // info dictionary
- else if (!dictionary.containsKey(COSName.PARENT)
- && !dictionary.containsKey(COSName.A)
- && !dictionary.containsKey(COSName.DEST)
- && (dictionary.containsKey(COSName.MOD_DATE)
- || dictionary.containsKey(COSName.TITLE)
- || dictionary.containsKey(COSName.AUTHOR)
- || dictionary.containsKey(COSName.SUBJECT)
- || dictionary.containsKey(COSName.KEYWORDS)
- || dictionary.containsKey(COSName.CREATOR)
- || dictionary.containsKey(COSName.PRODUCER)
- || dictionary.containsKey(COSName.CREATION_DATE)))
+ COSBase baseObject = compressedObject.getObject();
+ if (baseObject instanceof COSDictionary)
{
- trailer.setItem(COSName.INFO, document.getObjectFromPool(entry.getKey()));
+ dictionary = (COSDictionary) baseObject;
}
- // encryption dictionary, if existing, is lost
- // We can't run "Algorithm 2" from PDF specification because of missing ID
}
- trailerWasRebuild = true;
- return trailer;
+ else
+ {
+ source.seek(offset);
+ readObjectNumber();
+ readGenerationNumber();
+ readExpectedString(OBJ_MARKER, true);
+ if (source.peek() != '<')
+ {
+ return null;
+ }
+ try
+ {
+ dictionary = parseCOSDictionary();
+ }
+ catch (IOException exception)
+ {
+ LOG.debug("Skipped object " + key + ", either it's corrupt or not a dictionary");
+ }
+ }
+ return dictionary;
}
/**
@@ -2091,7 +2186,7 @@ public class COSParser extends BaseParse
* Tell if the dictionary is a PDF catalog. Override this for an FDF catalog.
*
* @param dictionary
- * @return
+ * @return true if the given dictionary is a root dictionary
*/
protected boolean isCatalog(COSDictionary dictionary)
{
@@ -2099,8 +2194,33 @@ public class COSParser extends BaseParse
}
/**
- * This will parse the startxref section from the stream.
- * The startxref value is ignored.
+ * Tell if the dictionary is an info dictionary.
+ *
+ * @param dictionary
+ * @return true if the given dictionary is an info dictionary
+ */
+ private boolean isInfo(COSDictionary dictionary)
+ {
+ if (dictionary.containsKey(COSName.PARENT) || dictionary.containsKey(COSName.A)
+ || dictionary.containsKey(COSName.DEST))
+ {
+ return false;
+ }
+ if (!dictionary.containsKey(COSName.MOD_DATE) && !dictionary.containsKey(COSName.TITLE)
+ && !dictionary.containsKey(COSName.AUTHOR)
+ && !dictionary.containsKey(COSName.SUBJECT)
+ && !dictionary.containsKey(COSName.KEYWORDS)
+ && !dictionary.containsKey(COSName.CREATOR)
+ && !dictionary.containsKey(COSName.PRODUCER)
+ && !dictionary.containsKey(COSName.CREATION_DATE))
+ {
+ return false;
+ }
+ return true;
+ }
+
+ /**
+ * This will parse the startxref section from the stream. The startxref value is ignored.
*
* @return the startxref value or -1 on parsing error
* @throws IOException If an IO error occurs.