You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pdfbox.apache.org by le...@apache.org on 2017/10/08 11:37:50 UTC
svn commit: r1811474 - in
/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser:
COSParser.java PDFParser.java
Author: lehmi
Date: Sun Oct 8 11:37:50 2017
New Revision: 1811474
URL: http://svn.apache.org/viewvc?rev=1811474&view=rev
Log:
PDFBOX-3950: added a checker for not dereferenced entries within the pages dictionaries, null-objects are removed
Modified:
pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/COSParser.java
pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/PDFParser.java
Modified: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/COSParser.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/COSParser.java?rev=1811474&r1=1811473&r2=1811474&view=diff
==============================================================================
--- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/COSParser.java (original)
+++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/COSParser.java Sun Oct 8 11:37:50 2017
@@ -130,6 +130,8 @@ public class COSParser extends BaseParse
private boolean isLenient = true;
protected boolean initialParseDone = false;
+
+ private boolean trailerWasRebuild = false;
/**
* Contains all found objects of a brute force search.
*/
@@ -1993,14 +1995,74 @@ public class COSParser extends BaseParse
// encryption dictionary, if existing, is lost
// We can't run "Algorithm 2" from PDF specification because of missing ID
}
+ trailerWasRebuild = true;
return trailer;
}
/**
+ * Check if all entries of the pages dictionary are present. Those which can't be dereferenced are removed.
+ *
+ * @param root the root dictionary of the pdf
+ */
+ protected void checkPages(COSDictionary root)
+ {
+ if (trailerWasRebuild && root != null)
+ {
+ // check if all page objects are dereferenced
+ COSBase pages = root.getDictionaryObject(COSName.PAGES);
+ if (pages != null && pages instanceof COSDictionary)
+ {
+ checkPagesDictionary((COSDictionary) pages);
+ }
+ }
+ }
+
+ private int checkPagesDictionary(COSDictionary pagesDict)
+ {
+ // check for kids
+ COSBase kids = pagesDict.getDictionaryObject(COSName.KIDS);
+ int numberOfPages = 0;
+ if (kids != null && kids instanceof COSArray)
+ {
+ COSArray kidsArray = (COSArray) kids;
+ List<? extends COSBase> kidsList = kidsArray.toList();
+ for (COSBase kid : kidsList)
+ {
+ COSObject kidObject = (COSObject) kid;
+ COSBase kidBaseobject = kidObject.getObject();
+ // object wasn't dereferenced -> remove it
+ if (kidBaseobject.equals(COSNull.NULL))
+ {
+ LOG.warn("Removed null object " + kid + " from pages dictionary");
+ kidsArray.remove(kid);
+ }
+ else if (kidBaseobject instanceof COSDictionary)
+ {
+ COSDictionary kidDictionary = (COSDictionary) kidBaseobject;
+ COSName type = kidDictionary.getCOSName(COSName.TYPE);
+ if (COSName.PAGES.equals(type))
+ {
+ // process nested pages dictionaries
+ numberOfPages += checkPagesDictionary(kidDictionary);
+ }
+ else if (COSName.PAGE.equals(type))
+ {
+ // count pages
+ numberOfPages++;
+ }
+ }
+ }
+ }
+ // fix counter
+ pagesDict.setInt(COSName.COUNT, numberOfPages);
+ return numberOfPages;
+ }
+
+ /**
* Tell if the dictionary is a PDF catalog. Override this for an FDF catalog.
*
* @param dictionary
- * @return
+ * @return
*/
protected boolean isCatalog(COSDictionary dictionary)
{
Modified: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/PDFParser.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/PDFParser.java?rev=1811474&r1=1811473&r2=1811474&view=diff
==============================================================================
--- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/PDFParser.java (original)
+++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/PDFParser.java Sun Oct 8 11:37:50 2017
@@ -209,7 +209,8 @@ public class PDFParser extends COSParser
{
parseDictObjects((COSDictionary) infoBase, (COSName[]) null);
}
-
+ // check pages dictionaries
+ checkPages(root);
document.setDecrypted();
initialParseDone = true;
}