You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pdfbox.apache.org by le...@apache.org on 2022/09/15 06:33:40 UTC

svn commit: r1904086 - in /pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox: cos/COSDictionary.java pdmodel/PDDocument.java

Author: lehmi
Date: Thu Sep 15 06:33:39 2022
New Revision: 1904086

URL: http://svn.apache.org/viewvc?rev=1904086&view=rev
Log:
PDFBOX-5489: get highest xref object number of an imported page to avoid mixed up object numbers when saving the resulting file(s)

Modified:
    pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/cos/COSDictionary.java
    pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/PDDocument.java

Modified: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/cos/COSDictionary.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/cos/COSDictionary.java?rev=1904086&r1=1904085&r2=1904086&view=diff
==============================================================================
--- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/cos/COSDictionary.java (original)
+++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/cos/COSDictionary.java Thu Sep 15 06:33:39 2022
@@ -1419,5 +1419,49 @@ public class COSDictionary extends COSBa
     {
         return updateState;
     }
-    
+
+    /**
+     * Collects all indirect objects numbers within this dictionary and all included dictionaries. It is used to avoid
+     * mixed up object numbers wwhen importing an existing page to another pdf.
+     * 
+     * Expert use only. You might run into an endless recursion if choosing a wrong starting point.
+     * 
+     * @param indirectObjects a list of already found indirect objects.
+     * 
+     */
+    public void getIndirectObjectKeys(List<COSObjectKey> indirectObjects)
+    {
+        // avoid endless recursions
+        if (indirectObjects == null || (getKey() != null && indirectObjects.contains(getKey())))
+        {
+            return;
+        }
+        for (COSBase cosBase : items.values())
+        {
+            COSDictionary dictionary = null;
+            if (cosBase instanceof COSObject)
+            {
+                // add indirect object key and dereference object
+                if (cosBase.getKey() != null && !indirectObjects.contains(cosBase.getKey()))
+                {
+                    indirectObjects.add(cosBase.getKey());
+                    COSBase referencedObject = ((COSObject) cosBase).getObject();
+                    if (referencedObject instanceof COSDictionary)
+                    {
+                        dictionary = (COSDictionary) referencedObject;
+                    }
+                }
+            }
+            else if (cosBase instanceof COSDictionary)
+            {
+                dictionary = (COSDictionary) cosBase;
+            }
+            if (dictionary != null)
+            {
+                // descend to included dictionary to collect all included indirect objects
+                dictionary.getIndirectObjectKeys(indirectObjects);
+            }
+        }
+    }
+
 }

Modified: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/PDDocument.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/PDDocument.java?rev=1904086&r1=1904085&r2=1904086&view=diff
==============================================================================
--- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/PDDocument.java (original)
+++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/PDDocument.java Thu Sep 15 06:33:39 2022
@@ -44,6 +44,7 @@ import org.apache.pdfbox.cos.COSDocument
 import org.apache.pdfbox.cos.COSInteger;
 import org.apache.pdfbox.cos.COSName;
 import org.apache.pdfbox.cos.COSObject;
+import org.apache.pdfbox.cos.COSObjectKey;
 import org.apache.pdfbox.cos.COSUpdateInfo;
 import org.apache.pdfbox.io.IOUtils;
 import org.apache.pdfbox.io.MemoryUsageSetting;
@@ -687,6 +688,7 @@ public class PDDocument implements Close
         PDStream dest = new PDStream(this, page.getContents(), COSName.FLATE_DECODE);
         importedPage.setContents(dest);
         addPage(importedPage);
+        setHighestImportedObjectNumber(importedPage);
         importedPage.setCropBox(new PDRectangle(page.getCropBox().getCOSArray()));
         importedPage.setMediaBox(new PDRectangle(page.getMediaBox().getCOSArray()));
         importedPage.setRotation(page.getRotation());
@@ -699,6 +701,22 @@ public class PDDocument implements Close
     }
 
     /**
+     * Determine the highest object number from the imported page to avoid mixed up numbers when saving the new pdf.
+     * 
+     * @param importedPage the imported page.
+     */
+    private void setHighestImportedObjectNumber(PDPage importedPage)
+    {
+        List<COSObjectKey> indirectObjectKeys = new ArrayList<>();
+        importedPage.getCOSObject().getIndirectObjectKeys(indirectObjectKeys);
+        long highestImportedNumber = indirectObjectKeys.stream().map(COSObjectKey::getNumber)
+                    .max(Long::compare).get();
+        long highestXRefObjectNumber = getDocument().getHighestXRefObjectNumber();
+        getDocument().setHighestXRefObjectNumber(
+                Math.max(highestXRefObjectNumber, highestImportedNumber));
+    }
+
+    /**
      * This will get the low level document.
      * 
      * @return The document that this layer sits on top of.