You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pdfbox.apache.org by le...@apache.org on 2020/11/28 13:01:08 UTC

svn commit: r1883896 - in /pdfbox/trunk/pdfbox/src: main/java/org/apache/pdfbox/pdfwriter/ main/java/org/apache/pdfbox/pdfwriter/compress/ main/java/org/apache/pdfbox/pdmodel/ test/java/org/apache/pdfbox/pdfwriter/ test/resources/input/compression/

Author: lehmi
Date: Sat Nov 28 13:01:07 2020
New Revision: 1883896

URL: http://svn.apache.org/viewvc?rev=1883896&view=rev
Log:
PDFBOX-4952: add support for compressed object streams as proposed by Christian Appl

Added:
    pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfwriter/compress/
    pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfwriter/compress/COSObjectPool.java   (with props)
    pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfwriter/compress/COSWriterCompressionPool.java   (with props)
    pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfwriter/compress/COSWriterObjectStream.java   (with props)
    pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfwriter/compress/CompressParameters.java   (with props)
    pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfwriter/compress/TraversedCOSElement.java   (with props)
    pdfbox/trunk/pdfbox/src/test/java/org/apache/pdfbox/pdfwriter/COSDocumentCompressionTest.java   (with props)
    pdfbox/trunk/pdfbox/src/test/resources/input/compression/
    pdfbox/trunk/pdfbox/src/test/resources/input/compression/acroform.pdf   (with props)
    pdfbox/trunk/pdfbox/src/test/resources/input/compression/attachment.pdf   (with props)
    pdfbox/trunk/pdfbox/src/test/resources/input/compression/unencrypted.pdf   (with props)
Modified:
    pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfwriter/COSWriter.java
    pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/PDDocument.java

Modified: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfwriter/COSWriter.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfwriter/COSWriter.java?rev=1883896&r1=1883895&r2=1883896&view=diff
==============================================================================
--- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfwriter/COSWriter.java (original)
+++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfwriter/COSWriter.java Sat Nov 28 13:01:07 2020
@@ -63,7 +63,11 @@ import org.apache.pdfbox.io.RandomAccess
 import org.apache.pdfbox.pdfparser.PDFXRefStream;
 import org.apache.pdfbox.pdfparser.xref.FreeXReference;
 import org.apache.pdfbox.pdfparser.xref.NormalXReference;
+import org.apache.pdfbox.pdfparser.xref.ObjectStreamXReference;
 import org.apache.pdfbox.pdfparser.xref.XReferenceEntry;
+import org.apache.pdfbox.pdfwriter.compress.COSWriterCompressionPool;
+import org.apache.pdfbox.pdfwriter.compress.COSWriterObjectStream;
+import org.apache.pdfbox.pdfwriter.compress.CompressParameters;
 import org.apache.pdfbox.pdmodel.PDDocument;
 import org.apache.pdfbox.pdmodel.encryption.ProtectionPolicy;
 import org.apache.pdfbox.pdmodel.encryption.SecurityHandler;
@@ -223,6 +227,8 @@ public class COSWriter implements ICOSVi
     private SignatureInterface signatureInterface;
     private byte[] incrementPart;
     private COSArray byteRangeArray;
+    private CompressParameters compressParameters = null;
+    private boolean blockAddingObject = false;
 
     /**
      * COSWriter constructor.
@@ -232,17 +238,29 @@ public class COSWriter implements ICOSVi
      */
     public COSWriter(OutputStream outputStream)
     {
+        this(outputStream, (CompressParameters) null);
+    }
+
+    /**
+     * COSWriter constructor.
+     *
+     * @param outputStream The output stream to write the PDF. It will be closed when this object is closed.
+     * @param compressParameters The configuration for the document's compression.
+     */
+    public COSWriter(OutputStream outputStream, CompressParameters compressParameters)
+    {
         setOutput(outputStream);
         setStandardOutput(new COSStandardOutputStream(output));
+        this.compressParameters = compressParameters;
     }
 
     /**
      * COSWriter constructor for incremental updates. There must be a path of objects that have
-     * {@link COSUpdateInfo#isNeedToBeUpdated()} set, starting from the document catalog. For
-     * signatures this is taken care by PDFBox itself.
+     * {@link COSUpdateInfo#isNeedToBeUpdated()} set, starting from the document catalog. For signatures this is taken
+     * care by PDFBox itself.
      *
-     * @param outputStream output stream where the new PDF data will be written. It will be closed
-     * when this object is closed.
+     * @param outputStream output stream where the new PDF data will be written. It will be closed when this object is
+     * closed.
      * @param inputData random access read containing source PDF data
      *
      * @throws IOException if something went wrong
@@ -291,6 +309,16 @@ public class COSWriter implements ICOSVi
         this.objectsToWrite.addAll(objectsToWrite);
     }
 
+    /**
+     * Returns true, if the resulting document shall be compressed.
+     *
+     * @return True, if the resulting document shall be compressed.
+     */
+    public boolean isCompress()
+    {
+        return compressParameters != null;
+    }
+
     private void prepareIncrement(PDDocument doc)
     {
         if (doc != null)
@@ -485,9 +513,90 @@ public class COSWriter implements ICOSVi
         doWriteObjects();
     }
 
+    /**
+     * This will write the compressed body of the document.
+     *
+     * @param document The document to write the body for.
+     * @throws IOException If there is an error writing the data.
+     */
+    private void doWriteBodyCompressed(COSDocument document) throws IOException
+    {
+        COSDictionary trailer = document.getTrailer();
+        COSDictionary root = trailer.getCOSDictionary(COSName.ROOT);
+        COSDictionary encrypt = trailer.getCOSDictionary(COSName.ENCRYPT);
+        blockAddingObject = true;
+        willEncrypt = encrypt != null;
+        if (root != null)
+        {
+            COSWriterCompressionPool compressionPool = new COSWriterCompressionPool(pdDocument,
+                    compressParameters);
+            // Append object stream entries to document.
+            for (COSObjectKey key : compressionPool.getObjectStreamObjects())
+            {
+                COSBase object = compressionPool.getObject(key);
+                writtenObjects.add(object);
+                objectKeys.put(object, key);
+                keyObject.put(key, object);
+            }
+            // Append top level objects to document.
+            for (COSObjectKey key : compressionPool.getTopLevelObjects())
+            {
+                COSBase object = compressionPool.getObject(key);
+                writtenObjects.add(object);
+                objectKeys.put(object, key);
+                keyObject.put(key, object);
+            }
+            for (COSObjectKey key : compressionPool.getTopLevelObjects())
+            {
+                currentObjectKey = key;
+                number = this.currentObjectKey.getNumber();
+                doWriteObject(key, keyObject.get(key));
+            }
+            // Append object streams to document.
+            long highestXRefObjectNumber = compressionPool.getHighestXRefObjectNumber();
+            for (COSWriterObjectStream finalizedObjectStream : compressionPool
+                    .createObjectStreams())
+            {
+                highestXRefObjectNumber++;
+                // Create new COSObject for object stream.
+                COSStream stream = finalizedObjectStream.update();
+                // Determine key for object stream.
+                COSObjectKey objectStreamKey = new COSObjectKey(highestXRefObjectNumber, 0);
+                COSObject objectStream = new COSObject(stream, objectStreamKey);
+                // Add object stream entries to xref - stream.
+                int i = 0;
+                for (COSObjectKey key : finalizedObjectStream.getPreparedKeys())
+                {
+                    COSBase object = compressionPool.getObject(key);
+                    addXRefEntry(new ObjectStreamXReference(i, key, object, objectStreamKey));
+                    i++;
+                }
+                // Include object stream in document.
+                currentObjectKey = objectStreamKey;
+                number = objectStreamKey.getNumber();
+                doWriteObject(objectStreamKey, objectStream);
+            }
+            willEncrypt = false;
+            if (encrypt != null)
+            {
+                highestXRefObjectNumber++;
+                COSObjectKey encryptKey = new COSObjectKey(highestXRefObjectNumber, 0);
+                setNumber(encryptKey.getNumber());
+                currentObjectKey = encryptKey;
+                number = currentObjectKey.getNumber();
+                writtenObjects.add(encrypt);
+                keyObject.put(encryptKey, encrypt);
+                objectKeys.put(encrypt, encryptKey);
+
+                doWriteObject(encryptKey, encrypt);
+            }
+            this.blockAddingObject = false;
+        }
+    }
+
     private void doWriteObjects() throws IOException
     {
-        while( objectsToWrite.size() > 0 )
+        while (!objectsToWrite.isEmpty())
         {
             COSBase nextObject = objectsToWrite.removeFirst();
             objectsToWriteSet.remove(nextObject);
@@ -497,6 +606,10 @@ public class COSWriter implements ICOSVi
 
     private void addObjectToWrite( COSBase object )
     {
+        if (blockAddingObject)
+        {
+            return;
+        }
         COSBase actual = object;
         if( actual instanceof COSObject )
         {
@@ -532,9 +645,43 @@ public class COSWriter implements ICOSVi
     }
 
     /**
+     * This will write a COS object for a predefined key.
+     *
+     * @param key The key of the object to write.
+     * @param obj The object to write.
+     *
+     * @throws IOException if the output cannot be written
+     */
+    public void doWriteObject(COSObjectKey key, COSBase obj) throws IOException
+    {
+        // add a x ref entry
+        addXRefEntry(new NormalXReference(getStandardOutput().getPos(), key, obj));
+        // write the object
+        getStandardOutput()
+                .write(String.valueOf(key.getNumber()).getBytes(StandardCharsets.ISO_8859_1));
+        getStandardOutput().write(SPACE);
+        getStandardOutput()
+                .write(String.valueOf(key.getGeneration()).getBytes(StandardCharsets.ISO_8859_1));
+        getStandardOutput().write(SPACE);
+        getStandardOutput().write(OBJ);
+        getStandardOutput().writeEOL();
+        // null test added to please Sonar
+        // TODO: shouldn't all public methods be guarded against passing null. Passing null to most methods will
+        // fail with an NPE
+        if (obj != null)
+        {
+            obj.accept(this);
+        }
+        getStandardOutput().writeEOL();
+        getStandardOutput().write(ENDOBJ);
+        getStandardOutput().writeEOL();
+    }
+
+    /**
      * Convenience method, so that we get false for types that can't be updated.
+     * 
      * @param base
-     * @return 
+     * @return
      */
     private boolean isNeedToBeUpdated(COSBase base)
     {
@@ -557,24 +704,7 @@ public class COSWriter implements ICOSVi
             writtenObjects.add( obj );
             // find the physical reference
             currentObjectKey = getObjectKey( obj );
-            // add a x ref entry
-            addXRefEntry(new NormalXReference(getStandardOutput().getPos(), currentObjectKey, obj));
-            // write the object
-            getStandardOutput().write(String.valueOf(currentObjectKey.getNumber()).getBytes(StandardCharsets.ISO_8859_1));
-            getStandardOutput().write(SPACE);
-            getStandardOutput().write(String.valueOf(currentObjectKey.getGeneration()).getBytes(StandardCharsets.ISO_8859_1));
-            getStandardOutput().write(SPACE);
-            getStandardOutput().write(OBJ);
-            getStandardOutput().writeEOL();
-            // null test added to please Sonar
-            // TODO: shouldn't all public methods be guarded against passing null. Passing null to most methods will
-            // fail with an NPE
-            if (obj != null) {
-                obj.accept( this );
-            }
-            getStandardOutput().writeEOL();
-            getStandardOutput().write(ENDOBJ);
-            getStandardOutput().writeEOL();
+            doWriteObject(currentObjectKey, obj);
     }
 
     /**
@@ -586,6 +716,11 @@ public class COSWriter implements ICOSVi
      */
     protected void doWriteHeader(COSDocument doc) throws IOException
     {
+        if (isCompress())
+        {
+            doc.setVersion(
+                    Math.max(doc.getVersion(), COSWriterCompressionPool.MINIMUM_SUPPORTED_VERSION));
+        }
         String headerString;
         if (fdfDocument != null)
         {
@@ -923,7 +1058,7 @@ public class COSWriter implements ICOSVi
             }
         }
         // If no new entry is found, we need to write out the last result
-        if (xRefEntriesList.size() > 0)
+        if (!xRefEntriesList.isEmpty())
         {
             list.add(last - count + 1);
             list.add(count);
@@ -1159,7 +1294,14 @@ public class COSWriter implements ICOSVi
             getStandardOutput().writeCRLF();
         }
 
-        doWriteBody(doc);
+        if (isCompress())
+        {
+            doWriteBodyCompressed(doc);
+        }
+        else
+        {
+            doWriteBody(doc);
+        }
 
         // get the previous trailer
         COSDictionary trailer = doc.getTrailer();

Added: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfwriter/compress/COSObjectPool.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfwriter/compress/COSObjectPool.java?rev=1883896&view=auto
==============================================================================
--- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfwriter/compress/COSObjectPool.java (added)
+++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfwriter/compress/COSObjectPool.java Sat Nov 28 13:01:07 2020
@@ -0,0 +1,150 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.pdfbox.pdfwriter.compress;
+
+import org.apache.pdfbox.cos.COSBase;
+import org.apache.pdfbox.cos.COSObject;
+import org.apache.pdfbox.cos.COSObjectKey;
+
+import java.util.HashMap;
+import java.util.Map;
+
+/**
+ * An instance of this class maps {@link COSBase} instances to {@link COSObjectKey}s and allows for a bidirectional
+ * lookup.
+ * 
+ * @author Christian Appl
+ */
+public class COSObjectPool
+{
+    private final Map<COSObjectKey, COSBase> keyPool = new HashMap<>();
+    private final Map<COSBase, COSObjectKey> objectPool = new HashMap<>();
+
+    private long highestXRefObjectNumber = 0;
+
+    /**
+     * Creates a map of {@link COSBase} instances to {@link COSObjectKey}s, allowing bidirectional lookups. This
+     * constructor can be used for pre - initialized structures to start the assignment of new object numbers starting
+     * from the hereby given offset.
+     *
+     * @param highestXRefObjectNumber The highest known object number.
+     */
+    public COSObjectPool(long highestXRefObjectNumber)
+    {
+        this.highestXRefObjectNumber = Math.max(this.highestXRefObjectNumber,
+                highestXRefObjectNumber);
+    }
+
+    /**
+     * Update the key and object maps.
+     *
+     * @param key The key, that shall be added.
+     * @param object The object, that shall be added.
+     * @return The actual key, the object has been added for.
+     */
+    public COSObjectKey put(COSObjectKey key, COSBase object)
+    {
+        if (object == null || contains(object))
+        {
+            return null;
+        }
+        COSObjectKey actualKey = key;
+        if (actualKey == null || contains(actualKey))
+        {
+            highestXRefObjectNumber++;
+            actualKey = new COSObjectKey(highestXRefObjectNumber, 0);
+        }
+        else
+        {
+            highestXRefObjectNumber = Math.max(key.getNumber(), highestXRefObjectNumber);
+        }
+        keyPool.put(actualKey, object);
+        objectPool.put(object, actualKey);
+        return actualKey;
+    }
+
+    /**
+     * Returns the {@link COSObjectKey} for a given registered {@link COSBase}. Returns null if such an object is not
+     * registered.
+     *
+     * @param object The {@link COSBase} a {@link COSObjectKey} shall be determined for.
+     * @return key The {@link COSObjectKey}, that matches the registered {@link COSBase}, or null if such an object is
+     * not registered.
+     */
+    public COSObjectKey getKey(COSBase object)
+    {
+        COSObjectKey key = null;
+        if (object instanceof COSObject)
+        {
+            key = objectPool.get(((COSObject) object).getObject());
+        }
+        if (key == null)
+        {
+            return objectPool.get(object);
+        }
+        return key;
+    }
+
+    /**
+     * Returns true, if a {@link COSBase} is registered for the given {@link COSObjectKey}.
+     *
+     * @param key The {@link COSObjectKey} that shall be checked for a registered {@link COSBase}.
+     * @return True, if a {@link COSBase} is registered for the given {@link COSObjectKey}.
+     */
+    public boolean contains(COSObjectKey key)
+    {
+        return keyPool.containsKey(key);
+    }
+
+    /**
+     * Returns the {@link COSBase}, that is registered for the given {@link COSObjectKey}, or null if no object is
+     * registered for that key.
+     *
+     * @param key The {@link COSObjectKey} a registered {@link COSBase} shall be found for.
+     * @return The {@link COSBase}, that is registered for the given {@link COSObjectKey}, or null if no object is
+     * registered for that key.
+     */
+    public COSBase getObject(COSObjectKey key)
+    {
+        return keyPool.get(key);
+    }
+
+    /**
+     * Returns true, if the given {@link COSBase} is a registered object of this pool.
+     *
+     * @param object The {@link COSBase} that shall be checked.
+     * @return True, if such a {@link COSBase} is registered in this pool.
+     */
+    public boolean contains(COSBase object)
+    {
+        return (object instanceof COSObject
+                && objectPool.containsKey(((COSObject) object).getObject()))
+                || objectPool.containsKey(object);
+    }
+
+    /**
+     * Returns the highest known object number (see: {@link COSObjectKey} for further information), that is currently
+     * registered in this pool.
+     *
+     * @return The highest known object number (see: {@link COSObjectKey} for further information), that is currently
+     * registered in this pool.
+     */
+    public long getHighestXRefObjectNumber()
+    {
+        return highestXRefObjectNumber;
+    }
+}

Propchange: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfwriter/compress/COSObjectPool.java
------------------------------------------------------------------------------
    svn:eol-style = native

Added: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfwriter/compress/COSWriterCompressionPool.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfwriter/compress/COSWriterCompressionPool.java?rev=1883896&view=auto
==============================================================================
--- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfwriter/compress/COSWriterCompressionPool.java (added)
+++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfwriter/compress/COSWriterCompressionPool.java Sat Nov 28 13:01:07 2020
@@ -0,0 +1,307 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.pdfbox.pdfwriter.compress;
+
+import org.apache.pdfbox.cos.*;
+import org.apache.pdfbox.pdfparser.PDFXRefStream;
+import org.apache.pdfbox.pdmodel.PDDocument;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
+
+/**
+ * An instance of this class compresses the contents of a given {@link PDDocument}.
+ *
+ * @author Christian Appl
+ */
+public class COSWriterCompressionPool
+{
+
+    public static final float MINIMUM_SUPPORTED_VERSION = 1.6f;
+
+    private final PDDocument document;
+    private final CompressParameters parameters;
+
+    private final COSObjectPool objectPool;
+    private final COSObjectPool originalPool;
+
+    // A list containing all objects, that shall be directly appended to the document's top level container.
+    private final List<COSObjectKey> topLevelObjects = new ArrayList<>();
+    // A list containing all objects, that may be appended to an object stream.
+    private final List<COSObjectKey> objectStreamObjects = new ArrayList<>();
+
+    /**
+     * <p>
+     * Constructs an object that can be used to compress the contents of a given {@link PDDocument}. It provides the
+     * means to:
+     * </p>
+     * <ul>
+     * <li>Compress the COSStructure of the document, by streaming {@link COSBase}s to compressed
+     * {@link COSWriterObjectStream}s</li>
+     * </ul>
+     *
+     * @param document The document, that shall be compressed.
+     * @param parameters The configuration of the compression operations, that shall be applied.
+     * @throws IOException Shall be thrown if a compression operation failed.
+     */
+    public COSWriterCompressionPool(PDDocument document, CompressParameters parameters)
+            throws IOException
+    {
+        this.document = document;
+        this.parameters = parameters != null ? parameters : new CompressParameters();
+        objectPool = new COSObjectPool(document.getDocument().getHighestXRefObjectNumber());
+        originalPool = new COSObjectPool(document.getDocument().getHighestXRefObjectNumber());
+
+        // Initialize object pool.
+        COSDocument cosDocument = document.getDocument();
+
+        COSDictionary trailer = cosDocument.getTrailer();
+        addStructure(
+                new TraversedCOSElement(new COSObject(trailer.getCOSDictionary(COSName.ROOT))));
+        addStructure(
+                new TraversedCOSElement(new COSObject(trailer.getCOSDictionary(COSName.INFO))));
+
+        Collections.sort(objectStreamObjects);
+        Collections.sort(topLevelObjects);
+    }
+
+    /**
+     * Adds the given {@link COSBase} to this pool, using the given {@link COSObjectKey} as it's referencable ID. This
+     * method shall determine an appropriate key, for yet unregistered objects, to register them. Depending on the type
+     * of object, it shall either be appended as-is or shall be appended to a compressed {@link COSWriterObjectStream}.
+     *
+     * @param key The {@link COSObjectKey} that shall be used as the {@link COSBase}s ID, if possible.
+     * @param element The {@link COSBase}, that shall be registered in this pool.
+     */
+    private COSBase addObjectToPool(COSObjectKey key, TraversedCOSElement element)
+    {
+        // Drop hollow objects.
+        COSBase base = element.getCurrentObject();
+        base = base instanceof COSObject ? ((COSObject) base).getObject() : base;
+        if (base == null || objectPool.contains(base))
+        {
+            return base;
+        }
+
+        // Check whether the object can not be appended to an object stream.
+        // An objectStream shall only contain generation 0 objects.
+        // It shall never contain the encryption dictionary.
+        // It shall never contain the document's root dictionary. (relevant for document encryption)
+        // It shall never contain other streams.
+        if ((key != null && key.getGeneration() != 0)
+                || (document.getEncryption() != null
+                        && base == document.getEncryption().getCOSObject())
+                || base == this.document.getDocument().getTrailer().getCOSDictionary(COSName.ROOT)
+                || base instanceof COSStream)
+        {
+            originalPool.put(key, base);
+            COSObjectKey actualKey = objectPool.put(key, base);
+            if (actualKey == null)
+            {
+                return base;
+            }
+            topLevelObjects.add(actualKey);
+            return base;
+        }
+
+        // Determine the object key.
+        COSObjectKey actualKey = objectPool.put(key, base);
+        if (actualKey == null)
+        {
+            return base;
+        }
+
+        // Append it to an object stream.
+        this.objectStreamObjects.add(actualKey);
+        return base;
+    }
+
+    /**
+     * Attempts to find yet unregistered streams and dictionaries in the given structure.
+     *
+     * @param traversedObject A Collection of all objects, that have already been traversed, to avoid cycles.
+     * @throws IOException Shall be thrown, if compressing the object failed.
+     */
+    private COSBase addStructure(TraversedCOSElement traversedObject) throws IOException
+    {
+        COSBase current = traversedObject.getCurrentObject();
+        COSBase base = current;
+        COSBase retVal = current;
+
+        if (current instanceof COSStream
+                || (current instanceof COSDictionary && !current.isDirect()))
+        {
+            base = addObjectToPool(base.getKey(), traversedObject);
+            retVal = base;
+        }
+        else if (current instanceof COSObject)
+        {
+            base = ((COSObject) current).getObject();
+            if (base instanceof COSDictionary)
+            {
+                base = addObjectToPool(current.getKey(), traversedObject);
+            }
+            else
+            {
+                retVal = base;
+            }
+        }
+
+        if (base instanceof COSArray)
+        {
+            COSArray array = (COSArray) base;
+            for (int i = 0; i < array.size(); i++)
+            {
+                COSBase value = array.get(i);
+                if ((value instanceof COSDictionary || value instanceof COSObject
+                        || value instanceof COSArray)
+                        && !traversedObject.getAllTraversedObjects().contains(value))
+                {
+                    COSBase writtenValue = addStructure(
+                            traversedObject.appendTraversedElement(value));
+                    if ((value instanceof COSStream || value instanceof COSObject)
+                            && !value.equals(writtenValue))
+                    {
+                        array.set(i, writtenValue);
+                    }
+                }
+            }
+        }
+        else if (base instanceof COSDictionary)
+        {
+            COSDictionary dictionary = (COSDictionary) base;
+            for (COSName name : dictionary.keySet())
+            {
+                COSBase value = dictionary.getItem(name);
+                if ((value instanceof COSDictionary || value instanceof COSObject
+                        || value instanceof COSArray)
+                        && !traversedObject.getAllTraversedObjects().contains(value))
+                {
+                    COSBase writtenValue = addStructure(
+                            traversedObject.appendTraversedElement(value));
+                    if ((value instanceof COSStream || value instanceof COSObject)
+                            && !value.equals(writtenValue))
+                    {
+                        dictionary.setItem(name, writtenValue);
+                    }
+                }
+            }
+        }
+
+        return retVal;
+    }
+
+    /**
+     * Returns all {@link COSBase}s, that must be added to the document's top level container. Those objects are not
+     * valid to be added to an object stream.
+     *
+     * @return A list of all top level {@link COSBase}s.
+     */
+    public List<COSObjectKey> getTopLevelObjects()
+    {
+        return topLevelObjects;
+    }
+
+    /**
+     * Returns all {@link COSBase}s that can be appended to an object stream. This list is only provided to enable
+     * reflections. Contained objects should indeed be added to a compressed document via an object stream, as can be
+     * created via calling: {@link COSWriterCompressionPool#createObjectStreams()}
+     *
+     * @return A list of all {@link COSBase}s, that can be added to an object stream.
+     */
+    public List<COSObjectKey> getObjectStreamObjects()
+    {
+        return objectStreamObjects;
+    }
+
+    /**
+     * Returns true, if the given {@link COSBase} is a registered object of this compression pool.
+     *
+     * @param object The object, that shall be checked.
+     * @return True, if the given {@link COSBase} is a registered object of this compression pool.
+     */
+    public boolean contains(COSBase object)
+    {
+        return objectPool.contains(object) || originalPool.contains(object);
+    }
+
+    /**
+     * Returns the {@link COSObjectKey}, that is registered for the given {@link COSBase} in this compression pool.
+     *
+     * @param object The {@link COSBase} a {@link COSObjectKey} is registered for in this compression pool.
+     * @return The {@link COSObjectKey}, that is registered for the given {@link COSBase} in this compression pool, if
+     * such an object is contained.
+     */
+    public COSObjectKey getKey(COSBase object)
+    {
+        COSObjectKey key = objectPool.getKey(object);
+        if (key == null)
+        {
+            key = originalPool.getKey(object);
+        }
+        return key;
+    }
+
+    /**
+     * Returns the {@link COSBase}, that is registered for the given {@link COSObjectKey} in this compression pool.
+     *
+     * @param key The {@link COSObjectKey} a {@link COSBase} is registered for in this compression pool.
+     * @return The {@link COSBase}, that is registered for the given {@link COSObjectKey} in this compression pool, if
+     * such an object is contained.
+     */
+    public COSBase getObject(COSObjectKey key)
+    {
+        return objectPool.getObject(key);
+    }
+
+    /**
+     * Returns the highest object number, that is registered in this compression pool.
+     *
+     * @return The highest object number, that is registered in this compression pool.
+     */
+    public long getHighestXRefObjectNumber()
+    {
+        return objectPool.getHighestXRefObjectNumber();
+    }
+
+    /**
+     * Creates {@link COSWriterObjectStream}s for all currently registered objects of this pool, that have been marked
+     * as fit for being compressed in this manner. Such object streams may be added to a PDF document and shall be
+     * declared in a document's {@link PDFXRefStream} accordingly. The objects contained in such a stream must not be
+     * added to the document separately.
+     *
+     * @return The created {@link COSWriterObjectStream}s for all currently registered compressible objects.
+     */
+    public List<COSWriterObjectStream> createObjectStreams()
+    {
+        List<COSWriterObjectStream> objectStreams = new ArrayList<COSWriterObjectStream>();
+        COSWriterObjectStream objectStream = null;
+        for (int i = 0; i < objectStreamObjects.size(); i++)
+        {
+            COSObjectKey key = objectStreamObjects.get(i);
+            if (objectStream == null || (i % parameters.getObjectStreamSize()) == 0)
+            {
+                objectStream = new COSWriterObjectStream(this);
+                objectStreams.add(objectStream);
+            }
+            objectStream.prepareStreamObject(key, objectPool.getObject(key));
+        }
+        return objectStreams;
+    }
+}

Propchange: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfwriter/compress/COSWriterCompressionPool.java
------------------------------------------------------------------------------
    svn:eol-style = native

Added: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfwriter/compress/COSWriterObjectStream.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfwriter/compress/COSWriterObjectStream.java?rev=1883896&view=auto
==============================================================================
--- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfwriter/compress/COSWriterObjectStream.java (added)
+++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfwriter/compress/COSWriterObjectStream.java Sat Nov 28 13:01:07 2020
@@ -0,0 +1,438 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.pdfbox.pdfwriter.compress;
+
+import org.apache.pdfbox.contentstream.operator.Operator;
+import org.apache.pdfbox.contentstream.operator.OperatorName;
+import org.apache.pdfbox.cos.*;
+import org.apache.pdfbox.pdfparser.PDFXRefStream;
+import org.apache.pdfbox.pdfwriter.COSWriter;
+
+import java.io.ByteArrayOutputStream;
+import java.io.IOException;
+import java.io.OutputStream;
+import java.nio.charset.StandardCharsets;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Map;
+
+/**
+ * An instance of this class represents an object stream, that compresses a number of {@link COSObject}s in a stream. It
+ * may be added to the top level container of a written PDF document in place of the compressed objects. The document's
+ * {@link PDFXRefStream} must be adapted accordingly.
+ *
+ * @author Christian Appl
+ */
+public class COSWriterObjectStream extends COSStream
+{
+
+    private final COSWriterCompressionPool compressionPool;
+    private final List<COSObjectKey> preparedKeys = new ArrayList<>();
+    private final List<COSBase> preparedObjects = new ArrayList<>();
+
+    /**
+     * Creates an object stream for compressible objects from the given {@link COSWriterCompressionPool}. The objects
+     * must first be prepared for this object stream, by adding them via calling
+     * {@link COSWriterObjectStream#prepareStreamObject(COSObjectKey, COSBase)} and will be written to this
+     * {@link COSStream}, when {@link COSWriterObjectStream#update()} is called.
+     *
+     * @param compressionPool The compression pool an object stream shall be created for.
+     */
+    public COSWriterObjectStream(COSWriterCompressionPool compressionPool)
+    {
+        this.compressionPool = compressionPool;
+        setItem(COSName.TYPE, COSName.OBJ_STM);
+    }
+
+    /**
+     * Returns the number of objects, that have been written to this object stream. ({@link COSName#N})
+     *
+     * @return The number of objects, that have been written to this object stream.
+     */
+    public int getObjectCount()
+    {
+        return getInt(COSName.N, 0);
+    }
+
+    /**
+     * Sets the number of objects, that have been written to this object stream. ({@link COSName#N})
+     *
+     * @param size The number of objects, that have been written to this object stream.
+     */
+    public void setObjectCount(int size)
+    {
+        setInt(COSName.N, size);
+    }
+
+    /**
+     * Returns the byte offset of the first object contained in this object stream. ({@link COSName#FIRST})
+     *
+     * @return The byte offset of the first object contained in this object stream.
+     */
+    public int getFirstEntryOffset()
+    {
+        return getInt(COSName.FIRST, 0);
+    }
+
+    /**
+     * Sets the byte offset of the first object contained in this object stream. ({@link COSName#FIRST})
+     *
+     * @param firstEntryOffset The byte offset of the first object contained in this object stream.
+     */
+    public void setFirstEntryOffset(int firstEntryOffset)
+    {
+        setInt(COSName.FIRST, firstEntryOffset);
+    }
+
+    /**
+     * Prepares the given {@link COSObject} to be written to this object stream, using the given {@link COSObjectKey} as
+     * it's ID for indirect references.
+     *
+     * @param key The {@link COSObjectKey}, that shall be used for indirect references to the {@link COSObject}.
+     * @param object The {@link COSObject}, that shall be written to this object stream.
+     */
+    public void prepareStreamObject(COSObjectKey key, COSBase object)
+    {
+        if (key != null && object != null)
+        {
+            preparedKeys.add(key);
+            preparedObjects
+                    .add(object instanceof COSObject ? ((COSObject) object).getObject() : object);
+        }
+    }
+
+    /**
+     * Returns all {@link COSObjectKey}s, that shall be added to the object stream, when
+     * {@link COSWriterObjectStream#update()} is called.
+     *
+     * @return All {@link COSObjectKey}s, that shall be added to the object stream.
+     */
+    public List<COSObjectKey> getPreparedKeys()
+    {
+        return preparedKeys;
+    }
+
+    /**
+     * Returns all {@link COSObject}s, that shall be added to the object stream, when
+     * {@link COSWriterObjectStream#update()} is called.
+     *
+     * @return All {@link COSObject}s, that shall be added to the object stream.
+     */
+    public List<COSBase> getPreparedObjects()
+    {
+        return preparedObjects;
+    }
+
+    /**
+     * Updates the underlying {@link COSStream} by writing all prepared {@link COSObject}s to this object stream.
+     *
+     * @return The underlying {@link COSStream} dictionary of this object stream.
+     * @throws IOException Shall be thrown, if writing the object stream failed.
+     */
+    public COSStream update() throws IOException
+    {
+        setObjectCount(preparedKeys.size());
+        // Prepare the compressible objects for writing.
+        List<Long> objectNumbers = new ArrayList<>();
+        List<byte[]> objectsBuffer = new ArrayList<>();
+        for (int i = 0; i < getObjectCount(); i++)
+        {
+            try (ByteArrayOutputStream partialOutput = new ByteArrayOutputStream())
+            {
+                objectNumbers.add(preparedKeys.get(i).getNumber());
+                COSBase base = preparedObjects.get(i);
+                writeObject(partialOutput, base, true);
+                objectsBuffer.add(partialOutput.toByteArray());
+            }
+        }
+
+        // Deduce the object stream byte offset map.
+        byte[] offsetsMapBuffer;
+        long nextObjectOffset = 0;
+        try (ByteArrayOutputStream partialOutput = new ByteArrayOutputStream())
+        {
+            for (int i = 0; i < objectNumbers.size(); i++)
+            {
+                partialOutput.write(
+                        String.valueOf(objectNumbers.get(i)).getBytes(StandardCharsets.ISO_8859_1));
+                partialOutput.write(COSWriter.SPACE);
+                partialOutput.write(
+                        String.valueOf(nextObjectOffset).getBytes(StandardCharsets.ISO_8859_1));
+                partialOutput.write(COSWriter.SPACE);
+                nextObjectOffset += objectsBuffer.get(i).length;
+            }
+            offsetsMapBuffer = partialOutput.toByteArray();
+        }
+
+        // Write Flate compressed object stream data.
+        try (OutputStream output = createOutputStream(COSName.FLATE_DECODE))
+        {
+            output.write(offsetsMapBuffer);
+            setFirstEntryOffset(offsetsMapBuffer.length);
+            for (byte[] rawObject : objectsBuffer)
+            {
+                output.write(rawObject);
+            }
+        }
+        return this;
+    }
+
+    /**
+     * This method prepares and writes COS data to the object stream by selecting appropriate specialized methods for
+     * the content.
+     *
+     * @param output The stream, that shall be written to.
+     * @param object The content, that shall be written.
+     * @param topLevel True, if the currently written object is a top level entry of this object stream.
+     * @throws IOException Shall be thrown, when an exception occurred for the write operation.
+     */
+    private void writeObject(OutputStream output, Object object, boolean topLevel)
+            throws IOException
+    {
+        if (object == null)
+        {
+            return;
+        }
+        if (object instanceof Operator)
+        {
+            writeOperator(output, (Operator) object);
+            return;
+        }
+        if (!(object instanceof COSBase))
+        {
+            throw new IOException("Error: Unknown type in object stream:" + object);
+        }
+        COSBase base = object instanceof COSObject ? ((COSObject) object).getObject()
+                : (COSBase) object;
+        if (base == null)
+        {
+            return;
+        }
+        if (!topLevel && this.compressionPool.contains(base))
+        {
+            COSObjectKey key = this.compressionPool.getKey(base);
+            if (key == null)
+            {
+                throw new IOException(
+                        "Error: Adding unknown object reference to object stream:" + object);
+            }
+            writeObjectReference(output, key);
+        }
+        else if (base instanceof COSString)
+        {
+            writeCOSString(output, (COSString) base);
+        }
+        else if (base instanceof COSFloat)
+        {
+            writeCOSFloat(output, (COSFloat) base);
+        }
+        else if (base instanceof COSInteger)
+        {
+            writeCOSInteger(output, (COSInteger) base);
+        }
+        else if (base instanceof COSBoolean)
+        {
+            writeCOSBoolean(output, (COSBoolean) base);
+        }
+        else if (base instanceof COSName)
+        {
+            writeCOSName(output, (COSName) base);
+        }
+        else if (base instanceof COSArray)
+        {
+            writeCOSArray(output, (COSArray) base);
+        }
+        else if (base instanceof COSDictionary)
+        {
+            writeCOSDictionary(output, (COSDictionary) base);
+        }
+        else if (base instanceof COSNull)
+        {
+            writeCOSNull(output);
+        }
+        else
+        {
+            throw new IOException("Error: Unknown type in object stream:" + object);
+        }
+    }
+
+    /**
+     * Write the given {@link COSString} to the given stream.
+     *
+     * @param output The stream, that shall be written to.
+     * @param cosString The content, that shall be written.
+     */
+    private void writeCOSString(OutputStream output, COSString cosString) throws IOException
+    {
+        COSWriter.writeString(cosString, output);
+        output.write(COSWriter.SPACE);
+    }
+
+    /**
+     * Write the given {@link COSFloat} to the given stream.
+     *
+     * @param output The stream, that shall be written to.
+     * @param cosFloat The content, that shall be written.
+     */
+    private void writeCOSFloat(OutputStream output, COSFloat cosFloat) throws IOException
+    {
+        cosFloat.writePDF(output);
+        output.write(COSWriter.SPACE);
+    }
+
+    /**
+     * Write the given {@link COSInteger} to the given stream.
+     *
+     * @param output The stream, that shall be written to.
+     * @param cosInteger The content, that shall be written.
+     */
+    private void writeCOSInteger(OutputStream output, COSInteger cosInteger) throws IOException
+    {
+        cosInteger.writePDF(output);
+        output.write(COSWriter.SPACE);
+    }
+
+    /**
+     * Write the given {@link COSBoolean} to the given stream.
+     *
+     * @param output The stream, that shall be written to.
+     * @param cosBoolean The content, that shall be written.
+     */
+    private void writeCOSBoolean(OutputStream output, COSBoolean cosBoolean) throws IOException
+    {
+        cosBoolean.writePDF(output);
+        output.write(COSWriter.SPACE);
+    }
+
+    /**
+     * Write the given {@link COSName} to the given stream.
+     *
+     * @param output The stream, that shall be written to.
+     * @param cosName The content, that shall be written.
+     */
+    private void writeCOSName(OutputStream output, COSName cosName) throws IOException
+    {
+        cosName.writePDF(output);
+        output.write(COSWriter.SPACE);
+    }
+
+    /**
+     * Write the given {@link COSArray} to the given stream.
+     *
+     * @param output The stream, that shall be written to.
+     * @param cosArray The content, that shall be written.
+     */
+    private void writeCOSArray(OutputStream output, COSArray cosArray) throws IOException
+    {
+        output.write(COSWriter.ARRAY_OPEN);
+        for (COSBase value : cosArray.toList())
+        {
+            if (value == null)
+            {
+                writeCOSNull(output);
+            }
+            else
+            {
+                writeObject(output, value, false);
+            }
+        }
+        output.write(COSWriter.ARRAY_CLOSE);
+        output.write(COSWriter.SPACE);
+    }
+
+    /**
+     * Write the given {@link COSDictionary} to the given stream.
+     *
+     * @param output The stream, that shall be written to.
+     * @param cosDictionary The content, that shall be written.
+     */
+    private void writeCOSDictionary(OutputStream output, COSDictionary cosDictionary)
+            throws IOException
+    {
+        output.write(COSWriter.DICT_OPEN);
+        for (Map.Entry<COSName, COSBase> entry : cosDictionary.entrySet())
+        {
+            if (entry.getValue() != null)
+            {
+                writeObject(output, entry.getKey(), false);
+                writeObject(output, entry.getValue(), false);
+            }
+        }
+        output.write(COSWriter.DICT_CLOSE);
+        output.write(COSWriter.SPACE);
+    }
+
+    /**
+     * Write the given {@link COSObjectKey} to the given stream.
+     *
+     * @param output The stream, that shall be written to.
+     * @param indirectReference The content, that shall be written.
+     */
+    private void writeObjectReference(OutputStream output, COSObjectKey indirectReference)
+            throws IOException
+    {
+        output.write(String.valueOf(indirectReference.getNumber())
+                .getBytes(StandardCharsets.ISO_8859_1));
+        output.write(COSWriter.SPACE);
+        output.write(String.valueOf(indirectReference.getGeneration())
+                .getBytes(StandardCharsets.ISO_8859_1));
+        output.write(COSWriter.SPACE);
+        output.write(COSWriter.REFERENCE);
+        output.write(COSWriter.SPACE);
+    }
+
+    /**
+     * Write {@link COSNull} to the given stream.
+     *
+     * @param output The stream, that shall be written to.
+     */
+    private void writeCOSNull(OutputStream output) throws IOException
+    {
+        output.write("null".getBytes(StandardCharsets.ISO_8859_1));
+        output.write(COSWriter.SPACE);
+    }
+
+    /**
+     * Write the given {@link Operator} to the given stream.
+     *
+     * @param output The stream, that shall be written to.
+     * @param operator The content, that shall be written.
+     */
+    private void writeOperator(OutputStream output, Operator operator) throws IOException
+    {
+        if (operator.getName().equals(OperatorName.BEGIN_INLINE_IMAGE))
+        {
+            output.write(OperatorName.BEGIN_INLINE_IMAGE.getBytes(StandardCharsets.ISO_8859_1));
+            COSDictionary dic = operator.getImageParameters();
+            for (COSName key : dic.keySet())
+            {
+                Object value = dic.getDictionaryObject(key);
+                key.writePDF(output);
+                output.write(COSWriter.SPACE);
+                writeObject(output, value, false);
+            }
+            output.write(
+                    OperatorName.BEGIN_INLINE_IMAGE_DATA.getBytes(StandardCharsets.ISO_8859_1));
+            output.write(operator.getImageData());
+            output.write(OperatorName.END_INLINE_IMAGE.getBytes(StandardCharsets.ISO_8859_1));
+        }
+        else
+        {
+            output.write(operator.getName().getBytes(StandardCharsets.ISO_8859_1));
+        }
+    }
+}

Propchange: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfwriter/compress/COSWriterObjectStream.java
------------------------------------------------------------------------------
    svn:eol-style = native

Added: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfwriter/compress/CompressParameters.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfwriter/compress/CompressParameters.java?rev=1883896&view=auto
==============================================================================
--- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfwriter/compress/CompressParameters.java (added)
+++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfwriter/compress/CompressParameters.java Sat Nov 28 13:01:07 2020
@@ -0,0 +1,58 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.pdfbox.pdfwriter.compress;
+
+/**
+ * An instance of this class centralizes and provides the configuration for a PDF compression.
+ * 
+ * @author Christian Appl
+ */
+public class CompressParameters
+{
+
+    public static final int DEFAULT_OBJECT_STREAM_SIZE = 200;
+
+    private int objectStreamSize = DEFAULT_OBJECT_STREAM_SIZE;
+
+    /**
+     * Sets the number of objects, that can be contained in compressed object streams. Higher object stream sizes may
+     * cause PDF readers to slow down during the rendering of PDF documents, therefore a reasonable value should be
+     * selected.
+     *
+     * @param objectStreamSize The number of objects, that can be contained in compressed object streams.
+     * @return The current instance, to allow method chaining.
+     */
+    public CompressParameters setObjectStreamSize(int objectStreamSize)
+    {
+        this.objectStreamSize = objectStreamSize <= 0 ? DEFAULT_OBJECT_STREAM_SIZE
+                : objectStreamSize;
+        return this;
+    }
+
+    /**
+     * Returns the number of objects, that can be contained in compressed object streams. Higher object stream sizes may
+     * cause PDF readers to slow down during the rendering of PDF documents, therefore a reasonable value should be
+     * selected.
+     *
+     * @return The number of objects, that can be contained in compressed object streams.
+     */
+    public int getObjectStreamSize()
+    {
+        return objectStreamSize;
+    }
+
+}

Propchange: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfwriter/compress/CompressParameters.java
------------------------------------------------------------------------------
    svn:eol-style = native

Added: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfwriter/compress/TraversedCOSElement.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfwriter/compress/TraversedCOSElement.java?rev=1883896&view=auto
==============================================================================
--- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfwriter/compress/TraversedCOSElement.java (added)
+++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfwriter/compress/TraversedCOSElement.java Sat Nov 28 13:01:07 2020
@@ -0,0 +1,199 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.pdfbox.pdfwriter.compress;
+
+import org.apache.pdfbox.cos.COSBase;
+import org.apache.pdfbox.cos.COSObject;
+import org.apache.pdfbox.cos.COSStream;
+
+import java.util.ArrayList;
+import java.util.List;
+
+/**
+ * An instance of this class represents a traversed element of a COS tree. It allows to determine the position of a
+ * {@link COSBase} in a hierarchical COS structure and provides the means to further traverse and evaluate it's
+ * descendants.
+ *
+ * @author Christian Appl
+ */
+public class TraversedCOSElement
+{
+
+    private final TraversedCOSElement parent;
+    private final COSBase currentObject;
+    private final List<TraversedCOSElement> traversedChildren = new ArrayList<>();
+    private boolean partOfStreamDictionary = false;
+    private final List<COSBase> allObjects;
+
+    /**
+     * Construct a fresh entrypoint for the traversal of a hierarchical COS structure, beginning with the given
+     * {@link COSBase}.
+     *
+     * @param currentObject The initial {@link COSBase}, with which the structure traversal shall begin.
+     */
+    public TraversedCOSElement(COSBase currentObject)
+    {
+        this(new ArrayList<>(), null, currentObject);
+    }
+
+    /**
+     * Construct a traversal node for the traversal of a hierarchical COS structure, located at the given
+     * {@link COSBase}, preceded by this given list of ancestors and contained in the given parent structure.
+     *
+     * @param allObjects The list of nodes, that have been traversed to reach the current object.
+     * @param parent The parent node, that does contain this node.
+     * @param currentObject The initial {@link COSBase}, with which the structure traversal shall begin.
+     */
+    private TraversedCOSElement(List<COSBase> allObjects, TraversedCOSElement parent,
+            COSBase currentObject)
+    {
+        this.parent = parent;
+        this.currentObject = currentObject;
+        this.allObjects = allObjects;
+    }
+
+    /**
+     * Construct a new traversal node for the given element and append it as a child to the current node.
+     *
+     * @param element The element, that shall be traversed.
+     * @return The resulting traversal node, that has been created.
+     */
+    public TraversedCOSElement appendTraversedElement(COSBase element)
+    {
+        if (element == null)
+        {
+            return this;
+        }
+        allObjects.add(element);
+        TraversedCOSElement traversedElement = new TraversedCOSElement(allObjects, this, element);
+        traversedElement.setPartOfStreamDictionary(
+                isPartOfStreamDictionary() || getCurrentBaseObject() instanceof COSStream);
+        this.traversedChildren.add(traversedElement);
+        return traversedElement;
+    }
+
+    /**
+     * Returns the current {@link COSBase} of this traversal node.
+     *
+     * @return The current {@link COSBase} of this traversal node.
+     */
+    public COSBase getCurrentObject()
+    {
+        return currentObject;
+    }
+
+    /**
+     * Returns the actual current {@link COSBase} of this traversal node. Meaning: If the current traversal node
+     * contains a reference to a {@link COSObject}, it's actual base object will be returned instead.
+     *
+     * @return The actual current {@link COSBase} of this traversal node.
+     */
+    public COSBase getCurrentBaseObject()
+    {
+        return currentObject instanceof COSObject ? ((COSObject) currentObject).getObject()
+                : currentObject;
+    }
+
+    /**
+     * Returns the parent node of the current traversal node.
+     *
+     * @return The parent node of the current traversal node.
+     */
+    public TraversedCOSElement getParent()
+    {
+        return this.parent;
+    }
+
+    /**
+     * Returns all known traversable/traversed children contained by the current traversal node.
+     *
+     * @return All known traversable/traversed children contained by the current traversal node.
+     */
+    public List<TraversedCOSElement> getTraversedChildren()
+    {
+        return traversedChildren;
+    }
+
+    public List<TraversedCOSElement> getTraversedElements()
+    {
+        List<TraversedCOSElement> ancestry = this.parent == null
+                ? new ArrayList<TraversedCOSElement>() : this.parent.getTraversedElements();
+        ancestry.add(this);
+        return ancestry;
+    }
+
+    /**
+     * Returns true, if the given {@link COSBase} is equal to the object wrapped by this traversal node.
+     *
+     * @param object The object, that shall be compared.
+     * @return True, if the given {@link COSBase} is equal to the object wrapped by this traversal node.
+     */
+    public boolean equals(COSBase object)
+    {
+        return this.currentObject == object;
+    }
+
+    /**
+     * Searches all known traversed child nodes of the current traversal node for the given {@link COSBase}.
+     *
+     * @param object The {@link COSBase}, that shall be found.
+     * @return The traversal node representing the searched {@link COSBase} or null, if such a node can not be found.
+     */
+    public TraversedCOSElement findAtCurrentPosition(COSBase object)
+    {
+        for (TraversedCOSElement child : traversedChildren)
+        {
+            if (child.equals(object))
+            {
+                return child;
+            }
+        }
+
+        return null;
+    }
+
+    /**
+     * Returns a list of all objects, that have been traversed in the created traversal tree.
+     *
+     * @return A list of all objects, that have been traversed in the created traversal tree.
+     */
+    public List<COSBase> getAllTraversedObjects()
+    {
+        return allObjects;
+    }
+
+    /**
+     * Returns true, if the given traversal node has been marked as a part of a {@link COSStream}.
+     *
+     * @return True, if the given traversal node has been marked as a part of a {@link COSStream}
+     */
+    public boolean isPartOfStreamDictionary()
+    {
+        return partOfStreamDictionary;
+    }
+
+    /**
+     * Set to true, if the given traversal node shall be marked as a part of a {@link COSStream}.
+     *
+     * @param partOfStreamDictionary True, if the given traversal node shall be marked as a part of a {@link COSStream}
+     */
+    public void setPartOfStreamDictionary(boolean partOfStreamDictionary)
+    {
+        this.partOfStreamDictionary = partOfStreamDictionary;
+    }
+
+}

Propchange: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfwriter/compress/TraversedCOSElement.java
------------------------------------------------------------------------------
    svn:eol-style = native

Modified: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/PDDocument.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/PDDocument.java?rev=1883896&r1=1883895&r2=1883896&view=diff
==============================================================================
--- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/PDDocument.java (original)
+++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/PDDocument.java Sat Nov 28 13:01:07 2020
@@ -49,6 +49,7 @@ import org.apache.pdfbox.io.IOUtils;
 import org.apache.pdfbox.io.MemoryUsageSetting;
 import org.apache.pdfbox.io.RandomAccessRead;
 import org.apache.pdfbox.pdfwriter.COSWriter;
+import org.apache.pdfbox.pdfwriter.compress.CompressParameters;
 import org.apache.pdfbox.pdmodel.common.COSArrayList;
 import org.apache.pdfbox.pdmodel.common.PDRectangle;
 import org.apache.pdfbox.pdmodel.common.PDStream;
@@ -907,38 +908,65 @@ public class PDDocument implements Close
      */
     public void save(OutputStream output) throws IOException
     {
+        saveCompressed(output, null);
+    }
+
+    /**
+     * Compress the document and save it to a file.
+     *
+     * @param file The file to save as.
+     * @param parameters The parameters for the document's compression.
+     * @throws IOException if the output could not be written
+     */
+    public void saveCompressed(File file, CompressParameters parameters) throws IOException
+    {
+        saveCompressed(new BufferedOutputStream(new FileOutputStream(file)), parameters);
+    }
+
+    /**
+     * This will compress the document and save it to an output stream.
+     *
+     * @param output The stream to write to. It will be closed when done. It is recommended to wrap it in a
+     * {@link java.io.BufferedOutputStream}, unless it is already buffered.
+     * @param parameters The parameters for the document's compression.
+     * @throws IOException if the output could not be written
+     */
+    public void saveCompressed(OutputStream output, CompressParameters parameters)
+            throws IOException
+    {
         if (document.isClosed())
         {
             throw new IOException("Cannot save a document which has been closed");
         }
 
+        // object stream compression requires a cross reference stream.
+        document.setIsXRefStream(parameters != null);
         // subset designated fonts
         for (PDFont font : fontsToSubset)
         {
             font.subset();
         }
         fontsToSubset.clear();
-        
-         // save PDF
-        try (COSWriter writer = new COSWriter(output))
+
+        // save PDF
+        try (COSWriter writer = new COSWriter(output, parameters))
         {
             writer.write(this);
         }
     }
 
     /**
-     * Save the PDF as an incremental update. This is only possible if the PDF was loaded from a
-     * file or a stream, not if the document was created in PDFBox itself. There must be a path of
-     * objects that have {@link COSUpdateInfo#isNeedToBeUpdated()} set, starting from the document
-     * catalog. For signatures this is taken care by PDFBox itself.
-     *<p>
-     * Other usages of this method are for experienced users only. You will usually never need it.
-     * It is useful only if you are required to keep the current revision and append the changes. A
-     * typical use case is changing a signed file without invalidating the signature.
-     *
-     * @param output stream to write to. It will be closed when done. It
-     * <i><b>must never</b></i> point to the source file or that one will be
-     * harmed!
+     * Save the PDF as an incremental update. This is only possible if the PDF was loaded from a file or a stream, not
+     * if the document was created in PDFBox itself. There must be a path of objects that have
+     * {@link COSUpdateInfo#isNeedToBeUpdated()} set, starting from the document catalog. For signatures this is taken
+     * care by PDFBox itself.
+     * <p>
+     * Other usages of this method are for experienced users only. You will usually never need it. It is useful only if
+     * you are required to keep the current revision and append the changes. A typical use case is changing a signed
+     * file without invalidating the signature.
+     *
+     * @param output stream to write to. It will be closed when done. It <i><b>must never</b></i> point to the source
+     * file or that one will be harmed!
      * @throws IOException if the output could not be written
      * @throws IllegalStateException if the document was not loaded from a file or a stream.
      */

Added: pdfbox/trunk/pdfbox/src/test/java/org/apache/pdfbox/pdfwriter/COSDocumentCompressionTest.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/test/java/org/apache/pdfbox/pdfwriter/COSDocumentCompressionTest.java?rev=1883896&view=auto
==============================================================================
--- pdfbox/trunk/pdfbox/src/test/java/org/apache/pdfbox/pdfwriter/COSDocumentCompressionTest.java (added)
+++ pdfbox/trunk/pdfbox/src/test/java/org/apache/pdfbox/pdfwriter/COSDocumentCompressionTest.java Sat Nov 28 13:01:07 2020
@@ -0,0 +1,262 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.pdfbox.pdfwriter;
+
+import org.apache.pdfbox.Loader;
+import org.apache.pdfbox.cos.COSName;
+import org.apache.pdfbox.pdfwriter.compress.CompressParameters;
+import org.apache.pdfbox.pdmodel.PDDocument;
+import org.apache.pdfbox.pdmodel.PDPage;
+import org.apache.pdfbox.pdmodel.PDPageContentStream;
+import org.apache.pdfbox.pdmodel.common.PDRectangle;
+import org.apache.pdfbox.pdmodel.common.filespecification.PDComplexFileSpecification;
+import org.apache.pdfbox.pdmodel.encryption.AccessPermission;
+import org.apache.pdfbox.pdmodel.encryption.StandardProtectionPolicy;
+import org.apache.pdfbox.pdmodel.font.PDType1Font;
+import org.apache.pdfbox.pdmodel.interactive.annotation.PDAnnotation;
+import org.junit.jupiter.api.Test;
+
+import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertNotNull;
+
+import java.io.File;
+import java.util.List;
+import java.util.Map;
+
+/**
+ * This test attempts to save different documents compressed, without causing errors, it also checks, whether the PDF is
+ * readable after compression and whether some central contents are still contained after compression. Output files are
+ * created in "target/test-output/compression/" source files are placed in "src/test/resources/input/compression/".
+ *
+ * @author Christian Appl
+ */
+class COSDocumentCompressionTest
+{
+
+    static File inDir = new File("src/test/resources/input/compression/");
+    static File outDir = new File("target/test-output/compression/");
+
+    public COSDocumentCompressionTest()
+    {
+        outDir.mkdirs();
+    }
+
+    /**
+     * Compress a document, that contains acroform fields and touch the expected fields.
+     *
+     * @throws Exception Shall be thrown, when compressing the document failed.
+     */
+    @Test
+    void testCompressAcroformDoc() throws Exception
+    {
+        File source = new File(inDir, "acroform.pdf");
+        File target = new File(outDir, "acroform.pdf");
+
+        PDDocument document = Loader.loadPDF(source);
+        try
+        {
+            document.saveCompressed(target, new CompressParameters());
+        }
+        finally
+        {
+            document.close();
+        }
+
+        document = Loader.loadPDF(target);
+        try
+        {
+            assertEquals(1, document.getNumberOfPages(),
+                    "The number of pages should not have changed, during compression.");
+            PDPage page = document.getPage(0);
+            List<PDAnnotation> annotations = page.getAnnotations();
+            assertEquals(13, annotations.size(),
+                    "The number of annotations should not have changed");
+            assertEquals("TextField", annotations.get(0).getCOSObject().getNameAsString(COSName.T),
+                    "The 1. annotation should have been a text field.");
+            assertEquals("Button", annotations.get(1).getCOSObject().getNameAsString(COSName.T),
+                    "The 2. annotation should have been a button.");
+            assertEquals("CheckBox1", annotations.get(2).getCOSObject().getNameAsString(COSName.T),
+                    "The 3. annotation should have been a checkbox.");
+            assertEquals("CheckBox2", annotations.get(3).getCOSObject().getNameAsString(COSName.T),
+                    "The 4. annotation should have been a checkbox.");
+            assertEquals("TextFieldMultiLine",
+                    annotations.get(4).getCOSObject().getNameAsString(COSName.T),
+                    "The 5. annotation should have been a multiline textfield.");
+            assertEquals("TextFieldMultiLineRT",
+                    annotations.get(5).getCOSObject().getNameAsString(COSName.T),
+                    "The 6. annotation should have been a multiline textfield.");
+            assertNotNull(annotations.get(6).getCOSObject().getItem(COSName.PARENT),
+                    "The 7. annotation should have had a parent entry.");
+            assertEquals("GroupOption",
+                    annotations.get(6).getCOSObject().getCOSDictionary(COSName.PARENT)
+                            .getNameAsString(COSName.T),
+                    "The 7. annotation's parent should have been a GroupOption.");
+            assertNotNull(annotations.get(7).getCOSObject().getItem(COSName.PARENT),
+                    "The 8. annotation should have had a parent entry.");
+            assertEquals("GroupOption",
+                    annotations.get(7).getCOSObject().getCOSDictionary(COSName.PARENT)
+                            .getNameAsString(COSName.T),
+                    "The 8. annotation's parent should have been a GroupOption.");
+            assertEquals("ListBox", annotations.get(8).getCOSObject().getNameAsString(COSName.T),
+                    "The 9. annotation should have been a ListBox.");
+            assertEquals("ListBoxMultiSelect",
+                    annotations.get(9).getCOSObject().getNameAsString(COSName.T),
+                    "The 10. annotation should have been a ListBox Multiselect.");
+            assertEquals("ComboBox", annotations.get(10).getCOSObject().getNameAsString(COSName.T),
+                    "The 11. annotation should have been a ComboBox.");
+            assertEquals("ComboBoxEditable",
+                    annotations.get(11).getCOSObject().getNameAsString(COSName.T),
+                    "The 12. annotation should have been a EditableComboBox.");
+            assertEquals("Signature", annotations.get(12).getCOSObject().getNameAsString(COSName.T),
+                    "The 13. annotation should have been a Signature.");
+        }
+        finally
+        {
+            document.close();
+        }
+    }
+
+    /**
+     * Compress a document, that contains an attachment and touch the expected attachment.
+     *
+     * @throws Exception Shall be thrown, when compressing the document failed.
+     */
+    @Test
+    void testCompressAttachmentsDoc() throws Exception
+    {
+        File source = new File(inDir, "attachment.pdf");
+        File target = new File(outDir, "attachment.pdf");
+
+        PDDocument document = Loader.loadPDF(source);
+        try
+        {
+            document.saveCompressed(target, new CompressParameters());
+        }
+        finally
+        {
+            document.close();
+        }
+
+        document = Loader.loadPDF(target);
+        try
+        {
+            assertEquals(2, document.getNumberOfPages(),
+                    "The number of pages should not have changed, during compression.");
+            Map<String, PDComplexFileSpecification> embeddedFiles = document.getDocumentCatalog()
+                    .getNames().getEmbeddedFiles().getNames();
+            assertEquals(1, embeddedFiles.size(),
+                    "The document should have contained an attachment");
+            PDComplexFileSpecification attachment;
+            assertNotNull((attachment = embeddedFiles.get("A4Unicode.pdf")),
+                    "The document should have contained 'A4Unicode.pdf'.");
+            assertEquals(14997, attachment.getEmbeddedFile().getLength(),
+                    "The attachments length is not as expected.");
+        }
+        finally
+        {
+            document.close();
+        }
+    }
+
+    /**
+     * Compress and encrypt the given document, without causing an exception to be thrown.
+     *
+     * @throws Exception Shall be thrown, when compressing/encrypting the document failed.
+     */
+    @Test
+    void testCompressEncryptedDoc() throws Exception
+    {
+        File source = new File(inDir, "unencrypted.pdf");
+        File target = new File(outDir, "encrypted.pdf");
+
+        PDDocument document = Loader.loadPDF(source, "user");
+        try
+        {
+            document.protect(
+                    new StandardProtectionPolicy("owner", "user", new AccessPermission(0)));
+            document.saveCompressed(target, new CompressParameters());
+        }
+        finally
+        {
+            document.close();
+        }
+
+        document = Loader.loadPDF(target, "user");
+        // If this didn't fail, the encryption dictionary should be present and working.
+        assertEquals(2, document.getNumberOfPages());
+        document.close();
+    }
+
+    /**
+     * Adds a page to an existing document, compresses it and touches the resulting page content stream.
+     *
+     * @throws Exception Shall be thrown, if compressing the document failed.
+     */
+    @Test
+    void testAlteredDoc() throws Exception
+    {
+        File source = new File(inDir, "unencrypted.pdf");
+        File target = new File(outDir, "altered.pdf");
+
+        PDDocument document = Loader.loadPDF(source);
+        try
+        {
+            PDPage page = new PDPage(new PDRectangle(100, 100));
+            document.addPage(page);
+            PDPageContentStream contentStream = new PDPageContentStream(document, page);
+
+            try
+            {
+                contentStream.beginText();
+                contentStream.newLineAtOffset(20, 80);
+                contentStream.setFont(PDType1Font.HELVETICA, 12);
+                contentStream.showText("Test");
+                contentStream.endText();
+            }
+            finally
+            {
+                contentStream.close();
+            }
+
+            document.save(target);
+            // document.saveCompressed(target, new CompressParameters());
+        }
+        catch (Throwable t)
+        {
+            t.printStackTrace();
+        }
+        finally
+        {
+            document.close();
+        }
+
+        document = Loader.loadPDF(target);
+        try
+        {
+            assertEquals(3, document.getNumberOfPages(),
+                    "The number of pages should not have changed, during compression.");
+            PDPage page = document.getPage(2);
+            assertEquals(43, page.getContentStreams().next().getLength(),
+                    "The stream length of the new page is not as expected.");
+        }
+        finally
+        {
+            document.close();
+        }
+    }
+
+}

Propchange: pdfbox/trunk/pdfbox/src/test/java/org/apache/pdfbox/pdfwriter/COSDocumentCompressionTest.java
------------------------------------------------------------------------------
    svn:eol-style = native

Added: pdfbox/trunk/pdfbox/src/test/resources/input/compression/acroform.pdf
URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/test/resources/input/compression/acroform.pdf?rev=1883896&view=auto
==============================================================================
Binary file - no diff available.

Propchange: pdfbox/trunk/pdfbox/src/test/resources/input/compression/acroform.pdf
------------------------------------------------------------------------------
    svn:mime-type = application/pdf

Added: pdfbox/trunk/pdfbox/src/test/resources/input/compression/attachment.pdf
URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/test/resources/input/compression/attachment.pdf?rev=1883896&view=auto
==============================================================================
Binary file - no diff available.

Propchange: pdfbox/trunk/pdfbox/src/test/resources/input/compression/attachment.pdf
------------------------------------------------------------------------------
    svn:mime-type = application/pdf

Added: pdfbox/trunk/pdfbox/src/test/resources/input/compression/unencrypted.pdf
URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/test/resources/input/compression/unencrypted.pdf?rev=1883896&view=auto
==============================================================================
Binary file - no diff available.

Propchange: pdfbox/trunk/pdfbox/src/test/resources/input/compression/unencrypted.pdf
------------------------------------------------------------------------------
    svn:mime-type = application/pdf