You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pdfbox.apache.org by ti...@apache.org on 2021/11/10 17:11:08 UTC

svn commit: r1894920 - /pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/multipdf/Splitter.java

Author: tilman
Date: Wed Nov 10 17:11:08 2021
New Revision: 1894920

URL: http://svn.apache.org/viewvc?rev=1894920&view=rev
Log:
PDFBOX-5317: copy elements of /Info instead of copying the old one, as suggested by Oliver Schmidtmer

Modified:
    pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/multipdf/Splitter.java

Modified: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/multipdf/Splitter.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/multipdf/Splitter.java?rev=1894920&r1=1894919&r2=1894920&view=diff
==============================================================================
--- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/multipdf/Splitter.java (original)
+++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/multipdf/Splitter.java Wed Nov 10 17:11:08 2021
@@ -20,8 +20,14 @@ import java.io.IOException;
 import java.util.ArrayList;
 import java.util.List;
 
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.pdfbox.cos.COSBase;
+import org.apache.pdfbox.cos.COSDictionary;
+import org.apache.pdfbox.cos.COSName;
 import org.apache.pdfbox.io.MemoryUsageSetting;
 import org.apache.pdfbox.pdmodel.PDDocument;
+import org.apache.pdfbox.pdmodel.PDDocumentInformation;
 import org.apache.pdfbox.pdmodel.PDPage;
 import org.apache.pdfbox.pdmodel.interactive.action.PDAction;
 import org.apache.pdfbox.pdmodel.interactive.action.PDActionGoTo;
@@ -38,6 +44,8 @@ import org.apache.pdfbox.pdmodel.interac
  */
 public class Splitter
 {
+    private static final Log LOG = LogFactory.getLog(Splitter.class);
+
     private PDDocument sourceDocument;
     private PDDocument currentDestinationDocument;
 
@@ -212,7 +220,35 @@ public class Splitter
         PDDocument document = memoryUsageSetting == null ?
                                 new PDDocument() : new PDDocument(memoryUsageSetting);
         document.getDocument().setVersion(getSourceDocument().getVersion());
-        document.setDocumentInformation(getSourceDocument().getDocumentInformation());
+        PDDocumentInformation sourceDocumentInformation = getSourceDocument().getDocumentInformation();
+        if (sourceDocumentInformation != null)
+        {
+            // PDFBOX-5317: Image Capture Plus files where /Root and /Info share the same dictionary
+            // Only copy simple elements to avoid huge files
+            COSDictionary sourceDocumentInformationDictionary = sourceDocumentInformation.getCOSObject();
+            COSDictionary destDocumentInformationDictionary = new COSDictionary();
+            for (COSName key : sourceDocumentInformationDictionary.keySet())
+            {
+                COSBase value = sourceDocumentInformationDictionary.getDictionaryObject(key);
+                if (value instanceof COSDictionary)
+                {
+                    LOG.warn("Nested entry for key '" + key.getName()
+                            + "' skipped in document information dictionary");
+                    if (sourceDocument.getDocumentCatalog().getCOSObject() ==
+                            sourceDocument.getDocumentInformation().getCOSObject())
+                    {
+                        LOG.warn("/Root and /Info share the same dictionary");
+                    }
+                    continue;
+                }
+                if (COSName.TYPE.equals(key))
+                {
+                    continue; // there is no /Type in the document information dictionary
+                }
+                destDocumentInformationDictionary.setItem(key, value);
+            }
+            document.setDocumentInformation(new PDDocumentInformation(destDocumentInformationDictionary));
+        }
         document.getDocumentCatalog().setViewerPreferences(
                 getSourceDocument().getDocumentCatalog().getViewerPreferences());
         return document;