You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pdfbox.apache.org by le...@apache.org on 2017/10/07 17:15:48 UTC
svn commit: r1811443 -
/pdfbox/branches/2.0/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/PDFParser.java
Author: lehmi
Date: Sat Oct 7 17:15:48 2017
New Revision: 1811443
URL: http://svn.apache.org/viewvc?rev=1811443&view=rev
Log:
PDFBOX-3950: added a checker for not dereferenced entries within the pages dictionaries, null-objects are removed
Modified:
pdfbox/branches/2.0/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/PDFParser.java
Modified: pdfbox/branches/2.0/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/PDFParser.java
URL: http://svn.apache.org/viewvc/pdfbox/branches/2.0/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/PDFParser.java?rev=1811443&r1=1811442&r2=1811443&view=diff
==============================================================================
--- pdfbox/branches/2.0/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/PDFParser.java (original)
+++ pdfbox/branches/2.0/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/PDFParser.java Sat Oct 7 17:15:48 2017
@@ -19,9 +19,11 @@ package org.apache.pdfbox.pdfparser;
import java.io.IOException;
import java.io.InputStream;
import java.security.KeyStore;
+import java.util.List;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
+import org.apache.pdfbox.cos.COSArray;
import org.apache.pdfbox.cos.COSBase;
import org.apache.pdfbox.cos.COSDictionary;
import org.apache.pdfbox.cos.COSDocument;
@@ -244,10 +246,60 @@ public class PDFParser extends COSParser
parseDictObjects((COSDictionary) infoBase, (COSName[]) null);
}
+ if (rebuildTrailer && root != null)
+ {
+ // check if all page objects are dereferenced
+ COSBase pages = root.getDictionaryObject(COSName.PAGES);
+ if (pages != null && pages instanceof COSDictionary)
+ {
+ checkPages((COSDictionary) pages);
+ }
+ }
document.setDecrypted();
initialParseDone = true;
}
+ private int checkPages(COSDictionary pagesDict)
+ {
+ // check for kids
+ COSBase kids = pagesDict.getDictionaryObject(COSName.KIDS);
+ int numberOfPages = 0;
+ if (kids != null && kids instanceof COSArray)
+ {
+ COSArray kidsArray = (COSArray) kids;
+ List<? extends COSBase> kidsList = kidsArray.toList();
+ for (COSBase kid : kidsList)
+ {
+ COSObject kidObject = (COSObject) kid;
+ COSBase kidBaseobject = kidObject.getObject();
+ // object wasn't dereferenced -> remove it
+ if (kidBaseobject.equals(COSNull.NULL))
+ {
+ LOG.warn("Removed null object " + kid + " from pages dictionary");
+ kidsArray.remove(kid);
+ }
+ else if (kidBaseobject instanceof COSDictionary)
+ {
+ COSDictionary kidDictionary = (COSDictionary) kidBaseobject;
+ COSName type = kidDictionary.getCOSName(COSName.TYPE);
+ if (COSName.PAGES.equals(type))
+ {
+ // process nested pages dictionaries
+ numberOfPages += checkPages(kidDictionary);
+ }
+ else if (COSName.PAGE.equals(type))
+ {
+ // count pages
+ numberOfPages++;
+ }
+ }
+ }
+ }
+ // fix counter
+ pagesDict.setInt(COSName.COUNT, numberOfPages);
+ return numberOfPages;
+ }
+
/**
* This will parse the stream and populate the COSDocument object. This will close
* the keystore stream when it is done parsing.