You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pdfbox.apache.org by le...@apache.org on 2020/11/28 13:01:08 UTC
svn commit: r1883896 - in /pdfbox/trunk/pdfbox/src:
main/java/org/apache/pdfbox/pdfwriter/
main/java/org/apache/pdfbox/pdfwriter/compress/
main/java/org/apache/pdfbox/pdmodel/ test/java/org/apache/pdfbox/pdfwriter/
test/resources/input/compression/
Author: lehmi
Date: Sat Nov 28 13:01:07 2020
New Revision: 1883896
URL: http://svn.apache.org/viewvc?rev=1883896&view=rev
Log:
PDFBOX-4952: add support for compressed object streams as proposed by Christian Appl
Added:
pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfwriter/compress/
pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfwriter/compress/COSObjectPool.java (with props)
pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfwriter/compress/COSWriterCompressionPool.java (with props)
pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfwriter/compress/COSWriterObjectStream.java (with props)
pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfwriter/compress/CompressParameters.java (with props)
pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfwriter/compress/TraversedCOSElement.java (with props)
pdfbox/trunk/pdfbox/src/test/java/org/apache/pdfbox/pdfwriter/COSDocumentCompressionTest.java (with props)
pdfbox/trunk/pdfbox/src/test/resources/input/compression/
pdfbox/trunk/pdfbox/src/test/resources/input/compression/acroform.pdf (with props)
pdfbox/trunk/pdfbox/src/test/resources/input/compression/attachment.pdf (with props)
pdfbox/trunk/pdfbox/src/test/resources/input/compression/unencrypted.pdf (with props)
Modified:
pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfwriter/COSWriter.java
pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/PDDocument.java
Modified: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfwriter/COSWriter.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfwriter/COSWriter.java?rev=1883896&r1=1883895&r2=1883896&view=diff
==============================================================================
--- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfwriter/COSWriter.java (original)
+++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfwriter/COSWriter.java Sat Nov 28 13:01:07 2020
@@ -63,7 +63,11 @@ import org.apache.pdfbox.io.RandomAccess
import org.apache.pdfbox.pdfparser.PDFXRefStream;
import org.apache.pdfbox.pdfparser.xref.FreeXReference;
import org.apache.pdfbox.pdfparser.xref.NormalXReference;
+import org.apache.pdfbox.pdfparser.xref.ObjectStreamXReference;
import org.apache.pdfbox.pdfparser.xref.XReferenceEntry;
+import org.apache.pdfbox.pdfwriter.compress.COSWriterCompressionPool;
+import org.apache.pdfbox.pdfwriter.compress.COSWriterObjectStream;
+import org.apache.pdfbox.pdfwriter.compress.CompressParameters;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.pdmodel.encryption.ProtectionPolicy;
import org.apache.pdfbox.pdmodel.encryption.SecurityHandler;
@@ -223,6 +227,8 @@ public class COSWriter implements ICOSVi
private SignatureInterface signatureInterface;
private byte[] incrementPart;
private COSArray byteRangeArray;
+ private CompressParameters compressParameters = null;
+ private boolean blockAddingObject = false;
/**
* COSWriter constructor.
@@ -232,17 +238,29 @@ public class COSWriter implements ICOSVi
*/
public COSWriter(OutputStream outputStream)
{
+ this(outputStream, (CompressParameters) null);
+ }
+
+ /**
+ * COSWriter constructor.
+ *
+ * @param outputStream The output stream to write the PDF. It will be closed when this object is closed.
+ * @param compressParameters The configuration for the document's compression.
+ */
+ public COSWriter(OutputStream outputStream, CompressParameters compressParameters)
+ {
setOutput(outputStream);
setStandardOutput(new COSStandardOutputStream(output));
+ this.compressParameters = compressParameters;
}
/**
* COSWriter constructor for incremental updates. There must be a path of objects that have
- * {@link COSUpdateInfo#isNeedToBeUpdated()} set, starting from the document catalog. For
- * signatures this is taken care by PDFBox itself.
+ * {@link COSUpdateInfo#isNeedToBeUpdated()} set, starting from the document catalog. For signatures this is taken
+ * care by PDFBox itself.
*
- * @param outputStream output stream where the new PDF data will be written. It will be closed
- * when this object is closed.
+ * @param outputStream output stream where the new PDF data will be written. It will be closed when this object is
+ * closed.
* @param inputData random access read containing source PDF data
*
* @throws IOException if something went wrong
@@ -291,6 +309,16 @@ public class COSWriter implements ICOSVi
this.objectsToWrite.addAll(objectsToWrite);
}
+ /**
+ * Returns true, if the resulting document shall be compressed.
+ *
+ * @return True, if the resulting document shall be compressed.
+ */
+ public boolean isCompress()
+ {
+ return compressParameters != null;
+ }
+
private void prepareIncrement(PDDocument doc)
{
if (doc != null)
@@ -485,9 +513,90 @@ public class COSWriter implements ICOSVi
doWriteObjects();
}
+ /**
+ * This will write the compressed body of the document.
+ *
+ * @param document The document to write the body for.
+ * @throws IOException If there is an error writing the data.
+ */
+ private void doWriteBodyCompressed(COSDocument document) throws IOException
+ {
+ COSDictionary trailer = document.getTrailer();
+ COSDictionary root = trailer.getCOSDictionary(COSName.ROOT);
+ COSDictionary encrypt = trailer.getCOSDictionary(COSName.ENCRYPT);
+ blockAddingObject = true;
+ willEncrypt = encrypt != null;
+ if (root != null)
+ {
+ COSWriterCompressionPool compressionPool = new COSWriterCompressionPool(pdDocument,
+ compressParameters);
+ // Append object stream entries to document.
+ for (COSObjectKey key : compressionPool.getObjectStreamObjects())
+ {
+ COSBase object = compressionPool.getObject(key);
+ writtenObjects.add(object);
+ objectKeys.put(object, key);
+ keyObject.put(key, object);
+ }
+ // Append top level objects to document.
+ for (COSObjectKey key : compressionPool.getTopLevelObjects())
+ {
+ COSBase object = compressionPool.getObject(key);
+ writtenObjects.add(object);
+ objectKeys.put(object, key);
+ keyObject.put(key, object);
+ }
+ for (COSObjectKey key : compressionPool.getTopLevelObjects())
+ {
+ currentObjectKey = key;
+ number = this.currentObjectKey.getNumber();
+ doWriteObject(key, keyObject.get(key));
+ }
+ // Append object streams to document.
+ long highestXRefObjectNumber = compressionPool.getHighestXRefObjectNumber();
+ for (COSWriterObjectStream finalizedObjectStream : compressionPool
+ .createObjectStreams())
+ {
+ highestXRefObjectNumber++;
+ // Create new COSObject for object stream.
+ COSStream stream = finalizedObjectStream.update();
+ // Determine key for object stream.
+ COSObjectKey objectStreamKey = new COSObjectKey(highestXRefObjectNumber, 0);
+ COSObject objectStream = new COSObject(stream, objectStreamKey);
+ // Add object stream entries to xref - stream.
+ int i = 0;
+ for (COSObjectKey key : finalizedObjectStream.getPreparedKeys())
+ {
+ COSBase object = compressionPool.getObject(key);
+ addXRefEntry(new ObjectStreamXReference(i, key, object, objectStreamKey));
+ i++;
+ }
+ // Include object stream in document.
+ currentObjectKey = objectStreamKey;
+ number = objectStreamKey.getNumber();
+ doWriteObject(objectStreamKey, objectStream);
+ }
+ willEncrypt = false;
+ if (encrypt != null)
+ {
+ highestXRefObjectNumber++;
+ COSObjectKey encryptKey = new COSObjectKey(highestXRefObjectNumber, 0);
+ setNumber(encryptKey.getNumber());
+ currentObjectKey = encryptKey;
+ number = currentObjectKey.getNumber();
+ writtenObjects.add(encrypt);
+ keyObject.put(encryptKey, encrypt);
+ objectKeys.put(encrypt, encryptKey);
+
+ doWriteObject(encryptKey, encrypt);
+ }
+ this.blockAddingObject = false;
+ }
+ }
+
private void doWriteObjects() throws IOException
{
- while( objectsToWrite.size() > 0 )
+ while (!objectsToWrite.isEmpty())
{
COSBase nextObject = objectsToWrite.removeFirst();
objectsToWriteSet.remove(nextObject);
@@ -497,6 +606,10 @@ public class COSWriter implements ICOSVi
private void addObjectToWrite( COSBase object )
{
+ if (blockAddingObject)
+ {
+ return;
+ }
COSBase actual = object;
if( actual instanceof COSObject )
{
@@ -532,9 +645,43 @@ public class COSWriter implements ICOSVi
}
/**
+ * This will write a COS object for a predefined key.
+ *
+ * @param key The key of the object to write.
+ * @param obj The object to write.
+ *
+ * @throws IOException if the output cannot be written
+ */
+ public void doWriteObject(COSObjectKey key, COSBase obj) throws IOException
+ {
+ // add a x ref entry
+ addXRefEntry(new NormalXReference(getStandardOutput().getPos(), key, obj));
+ // write the object
+ getStandardOutput()
+ .write(String.valueOf(key.getNumber()).getBytes(StandardCharsets.ISO_8859_1));
+ getStandardOutput().write(SPACE);
+ getStandardOutput()
+ .write(String.valueOf(key.getGeneration()).getBytes(StandardCharsets.ISO_8859_1));
+ getStandardOutput().write(SPACE);
+ getStandardOutput().write(OBJ);
+ getStandardOutput().writeEOL();
+ // null test added to please Sonar
+ // TODO: shouldn't all public methods be guarded against passing null. Passing null to most methods will
+ // fail with an NPE
+ if (obj != null)
+ {
+ obj.accept(this);
+ }
+ getStandardOutput().writeEOL();
+ getStandardOutput().write(ENDOBJ);
+ getStandardOutput().writeEOL();
+ }
+
+ /**
* Convenience method, so that we get false for types that can't be updated.
+ *
* @param base
- * @return
+ * @return
*/
private boolean isNeedToBeUpdated(COSBase base)
{
@@ -557,24 +704,7 @@ public class COSWriter implements ICOSVi
writtenObjects.add( obj );
// find the physical reference
currentObjectKey = getObjectKey( obj );
- // add a x ref entry
- addXRefEntry(new NormalXReference(getStandardOutput().getPos(), currentObjectKey, obj));
- // write the object
- getStandardOutput().write(String.valueOf(currentObjectKey.getNumber()).getBytes(StandardCharsets.ISO_8859_1));
- getStandardOutput().write(SPACE);
- getStandardOutput().write(String.valueOf(currentObjectKey.getGeneration()).getBytes(StandardCharsets.ISO_8859_1));
- getStandardOutput().write(SPACE);
- getStandardOutput().write(OBJ);
- getStandardOutput().writeEOL();
- // null test added to please Sonar
- // TODO: shouldn't all public methods be guarded against passing null. Passing null to most methods will
- // fail with an NPE
- if (obj != null) {
- obj.accept( this );
- }
- getStandardOutput().writeEOL();
- getStandardOutput().write(ENDOBJ);
- getStandardOutput().writeEOL();
+ doWriteObject(currentObjectKey, obj);
}
/**
@@ -586,6 +716,11 @@ public class COSWriter implements ICOSVi
*/
protected void doWriteHeader(COSDocument doc) throws IOException
{
+ if (isCompress())
+ {
+ doc.setVersion(
+ Math.max(doc.getVersion(), COSWriterCompressionPool.MINIMUM_SUPPORTED_VERSION));
+ }
String headerString;
if (fdfDocument != null)
{
@@ -923,7 +1058,7 @@ public class COSWriter implements ICOSVi
}
}
// If no new entry is found, we need to write out the last result
- if (xRefEntriesList.size() > 0)
+ if (!xRefEntriesList.isEmpty())
{
list.add(last - count + 1);
list.add(count);
@@ -1159,7 +1294,14 @@ public class COSWriter implements ICOSVi
getStandardOutput().writeCRLF();
}
- doWriteBody(doc);
+ if (isCompress())
+ {
+ doWriteBodyCompressed(doc);
+ }
+ else
+ {
+ doWriteBody(doc);
+ }
// get the previous trailer
COSDictionary trailer = doc.getTrailer();
Added: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfwriter/compress/COSObjectPool.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfwriter/compress/COSObjectPool.java?rev=1883896&view=auto
==============================================================================
--- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfwriter/compress/COSObjectPool.java (added)
+++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfwriter/compress/COSObjectPool.java Sat Nov 28 13:01:07 2020
@@ -0,0 +1,150 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.pdfbox.pdfwriter.compress;
+
+import org.apache.pdfbox.cos.COSBase;
+import org.apache.pdfbox.cos.COSObject;
+import org.apache.pdfbox.cos.COSObjectKey;
+
+import java.util.HashMap;
+import java.util.Map;
+
+/**
+ * An instance of this class maps {@link COSBase} instances to {@link COSObjectKey}s and allows for a bidirectional
+ * lookup.
+ *
+ * @author Christian Appl
+ */
+public class COSObjectPool
+{
+ private final Map<COSObjectKey, COSBase> keyPool = new HashMap<>();
+ private final Map<COSBase, COSObjectKey> objectPool = new HashMap<>();
+
+ private long highestXRefObjectNumber = 0;
+
+ /**
+ * Creates a map of {@link COSBase} instances to {@link COSObjectKey}s, allowing bidirectional lookups. This
+ * constructor can be used for pre - initialized structures to start the assignment of new object numbers starting
+ * from the hereby given offset.
+ *
+ * @param highestXRefObjectNumber The highest known object number.
+ */
+ public COSObjectPool(long highestXRefObjectNumber)
+ {
+ this.highestXRefObjectNumber = Math.max(this.highestXRefObjectNumber,
+ highestXRefObjectNumber);
+ }
+
+ /**
+ * Update the key and object maps.
+ *
+ * @param key The key, that shall be added.
+ * @param object The object, that shall be added.
+ * @return The actual key, the object has been added for.
+ */
+ public COSObjectKey put(COSObjectKey key, COSBase object)
+ {
+ if (object == null || contains(object))
+ {
+ return null;
+ }
+ COSObjectKey actualKey = key;
+ if (actualKey == null || contains(actualKey))
+ {
+ highestXRefObjectNumber++;
+ actualKey = new COSObjectKey(highestXRefObjectNumber, 0);
+ }
+ else
+ {
+ highestXRefObjectNumber = Math.max(key.getNumber(), highestXRefObjectNumber);
+ }
+ keyPool.put(actualKey, object);
+ objectPool.put(object, actualKey);
+ return actualKey;
+ }
+
+ /**
+ * Returns the {@link COSObjectKey} for a given registered {@link COSBase}. Returns null if such an object is not
+ * registered.
+ *
+ * @param object The {@link COSBase} a {@link COSObjectKey} shall be determined for.
+ * @return key The {@link COSObjectKey}, that matches the registered {@link COSBase}, or null if such an object is
+ * not registered.
+ */
+ public COSObjectKey getKey(COSBase object)
+ {
+ COSObjectKey key = null;
+ if (object instanceof COSObject)
+ {
+ key = objectPool.get(((COSObject) object).getObject());
+ }
+ if (key == null)
+ {
+ return objectPool.get(object);
+ }
+ return key;
+ }
+
+ /**
+ * Returns true, if a {@link COSBase} is registered for the given {@link COSObjectKey}.
+ *
+ * @param key The {@link COSObjectKey} that shall be checked for a registered {@link COSBase}.
+ * @return True, if a {@link COSBase} is registered for the given {@link COSObjectKey}.
+ */
+ public boolean contains(COSObjectKey key)
+ {
+ return keyPool.containsKey(key);
+ }
+
+ /**
+ * Returns the {@link COSBase}, that is registered for the given {@link COSObjectKey}, or null if no object is
+ * registered for that key.
+ *
+ * @param key The {@link COSObjectKey} a registered {@link COSBase} shall be found for.
+ * @return The {@link COSBase}, that is registered for the given {@link COSObjectKey}, or null if no object is
+ * registered for that key.
+ */
+ public COSBase getObject(COSObjectKey key)
+ {
+ return keyPool.get(key);
+ }
+
+ /**
+ * Returns true, if the given {@link COSBase} is a registered object of this pool.
+ *
+ * @param object The {@link COSBase} that shall be checked.
+ * @return True, if such a {@link COSBase} is registered in this pool.
+ */
+ public boolean contains(COSBase object)
+ {
+ return (object instanceof COSObject
+ && objectPool.containsKey(((COSObject) object).getObject()))
+ || objectPool.containsKey(object);
+ }
+
+ /**
+ * Returns the highest known object number (see: {@link COSObjectKey} for further information), that is currently
+ * registered in this pool.
+ *
+ * @return The highest known object number (see: {@link COSObjectKey} for further information), that is currently
+ * registered in this pool.
+ */
+ public long getHighestXRefObjectNumber()
+ {
+ return highestXRefObjectNumber;
+ }
+}
Propchange: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfwriter/compress/COSObjectPool.java
------------------------------------------------------------------------------
svn:eol-style = native
Added: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfwriter/compress/COSWriterCompressionPool.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfwriter/compress/COSWriterCompressionPool.java?rev=1883896&view=auto
==============================================================================
--- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfwriter/compress/COSWriterCompressionPool.java (added)
+++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfwriter/compress/COSWriterCompressionPool.java Sat Nov 28 13:01:07 2020
@@ -0,0 +1,307 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.pdfbox.pdfwriter.compress;
+
+import org.apache.pdfbox.cos.*;
+import org.apache.pdfbox.pdfparser.PDFXRefStream;
+import org.apache.pdfbox.pdmodel.PDDocument;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
+
+/**
+ * An instance of this class compresses the contents of a given {@link PDDocument}.
+ *
+ * @author Christian Appl
+ */
+public class COSWriterCompressionPool
+{
+
+ public static final float MINIMUM_SUPPORTED_VERSION = 1.6f;
+
+ private final PDDocument document;
+ private final CompressParameters parameters;
+
+ private final COSObjectPool objectPool;
+ private final COSObjectPool originalPool;
+
+ // A list containing all objects, that shall be directly appended to the document's top level container.
+ private final List<COSObjectKey> topLevelObjects = new ArrayList<>();
+ // A list containing all objects, that may be appended to an object stream.
+ private final List<COSObjectKey> objectStreamObjects = new ArrayList<>();
+
+ /**
+ * <p>
+ * Constructs an object that can be used to compress the contents of a given {@link PDDocument}. It provides the
+ * means to:
+ * </p>
+ * <ul>
+ * <li>Compress the COSStructure of the document, by streaming {@link COSBase}s to compressed
+ * {@link COSWriterObjectStream}s</li>
+ * </ul>
+ *
+ * @param document The document, that shall be compressed.
+ * @param parameters The configuration of the compression operations, that shall be applied.
+ * @throws IOException Shall be thrown if a compression operation failed.
+ */
+ public COSWriterCompressionPool(PDDocument document, CompressParameters parameters)
+ throws IOException
+ {
+ this.document = document;
+ this.parameters = parameters != null ? parameters : new CompressParameters();
+ objectPool = new COSObjectPool(document.getDocument().getHighestXRefObjectNumber());
+ originalPool = new COSObjectPool(document.getDocument().getHighestXRefObjectNumber());
+
+ // Initialize object pool.
+ COSDocument cosDocument = document.getDocument();
+
+ COSDictionary trailer = cosDocument.getTrailer();
+ addStructure(
+ new TraversedCOSElement(new COSObject(trailer.getCOSDictionary(COSName.ROOT))));
+ addStructure(
+ new TraversedCOSElement(new COSObject(trailer.getCOSDictionary(COSName.INFO))));
+
+ Collections.sort(objectStreamObjects);
+ Collections.sort(topLevelObjects);
+ }
+
+ /**
+ * Adds the given {@link COSBase} to this pool, using the given {@link COSObjectKey} as it's referencable ID. This
+ * method shall determine an appropriate key, for yet unregistered objects, to register them. Depending on the type
+ * of object, it shall either be appended as-is or shall be appended to a compressed {@link COSWriterObjectStream}.
+ *
+ * @param key The {@link COSObjectKey} that shall be used as the {@link COSBase}s ID, if possible.
+ * @param element The {@link COSBase}, that shall be registered in this pool.
+ */
+ private COSBase addObjectToPool(COSObjectKey key, TraversedCOSElement element)
+ {
+ // Drop hollow objects.
+ COSBase base = element.getCurrentObject();
+ base = base instanceof COSObject ? ((COSObject) base).getObject() : base;
+ if (base == null || objectPool.contains(base))
+ {
+ return base;
+ }
+
+ // Check whether the object can not be appended to an object stream.
+ // An objectStream shall only contain generation 0 objects.
+ // It shall never contain the encryption dictionary.
+ // It shall never contain the document's root dictionary. (relevant for document encryption)
+ // It shall never contain other streams.
+ if ((key != null && key.getGeneration() != 0)
+ || (document.getEncryption() != null
+ && base == document.getEncryption().getCOSObject())
+ || base == this.document.getDocument().getTrailer().getCOSDictionary(COSName.ROOT)
+ || base instanceof COSStream)
+ {
+ originalPool.put(key, base);
+ COSObjectKey actualKey = objectPool.put(key, base);
+ if (actualKey == null)
+ {
+ return base;
+ }
+ topLevelObjects.add(actualKey);
+ return base;
+ }
+
+ // Determine the object key.
+ COSObjectKey actualKey = objectPool.put(key, base);
+ if (actualKey == null)
+ {
+ return base;
+ }
+
+ // Append it to an object stream.
+ this.objectStreamObjects.add(actualKey);
+ return base;
+ }
+
+ /**
+ * Attempts to find yet unregistered streams and dictionaries in the given structure.
+ *
+ * @param traversedObject A Collection of all objects, that have already been traversed, to avoid cycles.
+ * @throws IOException Shall be thrown, if compressing the object failed.
+ */
+ private COSBase addStructure(TraversedCOSElement traversedObject) throws IOException
+ {
+ COSBase current = traversedObject.getCurrentObject();
+ COSBase base = current;
+ COSBase retVal = current;
+
+ if (current instanceof COSStream
+ || (current instanceof COSDictionary && !current.isDirect()))
+ {
+ base = addObjectToPool(base.getKey(), traversedObject);
+ retVal = base;
+ }
+ else if (current instanceof COSObject)
+ {
+ base = ((COSObject) current).getObject();
+ if (base instanceof COSDictionary)
+ {
+ base = addObjectToPool(current.getKey(), traversedObject);
+ }
+ else
+ {
+ retVal = base;
+ }
+ }
+
+ if (base instanceof COSArray)
+ {
+ COSArray array = (COSArray) base;
+ for (int i = 0; i < array.size(); i++)
+ {
+ COSBase value = array.get(i);
+ if ((value instanceof COSDictionary || value instanceof COSObject
+ || value instanceof COSArray)
+ && !traversedObject.getAllTraversedObjects().contains(value))
+ {
+ COSBase writtenValue = addStructure(
+ traversedObject.appendTraversedElement(value));
+ if ((value instanceof COSStream || value instanceof COSObject)
+ && !value.equals(writtenValue))
+ {
+ array.set(i, writtenValue);
+ }
+ }
+ }
+ }
+ else if (base instanceof COSDictionary)
+ {
+ COSDictionary dictionary = (COSDictionary) base;
+ for (COSName name : dictionary.keySet())
+ {
+ COSBase value = dictionary.getItem(name);
+ if ((value instanceof COSDictionary || value instanceof COSObject
+ || value instanceof COSArray)
+ && !traversedObject.getAllTraversedObjects().contains(value))
+ {
+ COSBase writtenValue = addStructure(
+ traversedObject.appendTraversedElement(value));
+ if ((value instanceof COSStream || value instanceof COSObject)
+ && !value.equals(writtenValue))
+ {
+ dictionary.setItem(name, writtenValue);
+ }
+ }
+ }
+ }
+
+ return retVal;
+ }
+
+ /**
+ * Returns all {@link COSBase}s, that must be added to the document's top level container. Those objects are not
+ * valid to be added to an object stream.
+ *
+ * @return A list of all top level {@link COSBase}s.
+ */
+ public List<COSObjectKey> getTopLevelObjects()
+ {
+ return topLevelObjects;
+ }
+
+ /**
+ * Returns all {@link COSBase}s that can be appended to an object stream. This list is only provided to enable
+ * reflections. Contained objects should indeed be added to a compressed document via an object stream, as can be
+ * created via calling: {@link COSWriterCompressionPool#createObjectStreams()}
+ *
+ * @return A list of all {@link COSBase}s, that can be added to an object stream.
+ */
+ public List<COSObjectKey> getObjectStreamObjects()
+ {
+ return objectStreamObjects;
+ }
+
+ /**
+ * Returns true, if the given {@link COSBase} is a registered object of this compression pool.
+ *
+ * @param object The object, that shall be checked.
+ * @return True, if the given {@link COSBase} is a registered object of this compression pool.
+ */
+ public boolean contains(COSBase object)
+ {
+ return objectPool.contains(object) || originalPool.contains(object);
+ }
+
+ /**
+ * Returns the {@link COSObjectKey}, that is registered for the given {@link COSBase} in this compression pool.
+ *
+ * @param object The {@link COSBase} a {@link COSObjectKey} is registered for in this compression pool.
+ * @return The {@link COSObjectKey}, that is registered for the given {@link COSBase} in this compression pool, if
+ * such an object is contained.
+ */
+ public COSObjectKey getKey(COSBase object)
+ {
+ COSObjectKey key = objectPool.getKey(object);
+ if (key == null)
+ {
+ key = originalPool.getKey(object);
+ }
+ return key;
+ }
+
+ /**
+ * Returns the {@link COSBase}, that is registered for the given {@link COSObjectKey} in this compression pool.
+ *
+ * @param key The {@link COSObjectKey} a {@link COSBase} is registered for in this compression pool.
+ * @return The {@link COSBase}, that is registered for the given {@link COSObjectKey} in this compression pool, if
+ * such an object is contained.
+ */
+ public COSBase getObject(COSObjectKey key)
+ {
+ return objectPool.getObject(key);
+ }
+
+ /**
+ * Returns the highest object number, that is registered in this compression pool.
+ *
+ * @return The highest object number, that is registered in this compression pool.
+ */
+ public long getHighestXRefObjectNumber()
+ {
+ return objectPool.getHighestXRefObjectNumber();
+ }
+
+ /**
+ * Creates {@link COSWriterObjectStream}s for all currently registered objects of this pool, that have been marked
+ * as fit for being compressed in this manner. Such object streams may be added to a PDF document and shall be
+ * declared in a document's {@link PDFXRefStream} accordingly. The objects contained in such a stream must not be
+ * added to the document separately.
+ *
+ * @return The created {@link COSWriterObjectStream}s for all currently registered compressible objects.
+ */
+ public List<COSWriterObjectStream> createObjectStreams()
+ {
+ List<COSWriterObjectStream> objectStreams = new ArrayList<COSWriterObjectStream>();
+ COSWriterObjectStream objectStream = null;
+ for (int i = 0; i < objectStreamObjects.size(); i++)
+ {
+ COSObjectKey key = objectStreamObjects.get(i);
+ if (objectStream == null || (i % parameters.getObjectStreamSize()) == 0)
+ {
+ objectStream = new COSWriterObjectStream(this);
+ objectStreams.add(objectStream);
+ }
+ objectStream.prepareStreamObject(key, objectPool.getObject(key));
+ }
+ return objectStreams;
+ }
+}
Propchange: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfwriter/compress/COSWriterCompressionPool.java
------------------------------------------------------------------------------
svn:eol-style = native
Added: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfwriter/compress/COSWriterObjectStream.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfwriter/compress/COSWriterObjectStream.java?rev=1883896&view=auto
==============================================================================
--- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfwriter/compress/COSWriterObjectStream.java (added)
+++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfwriter/compress/COSWriterObjectStream.java Sat Nov 28 13:01:07 2020
@@ -0,0 +1,438 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.pdfbox.pdfwriter.compress;
+
+import org.apache.pdfbox.contentstream.operator.Operator;
+import org.apache.pdfbox.contentstream.operator.OperatorName;
+import org.apache.pdfbox.cos.*;
+import org.apache.pdfbox.pdfparser.PDFXRefStream;
+import org.apache.pdfbox.pdfwriter.COSWriter;
+
+import java.io.ByteArrayOutputStream;
+import java.io.IOException;
+import java.io.OutputStream;
+import java.nio.charset.StandardCharsets;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Map;
+
+/**
+ * An instance of this class represents an object stream, that compresses a number of {@link COSObject}s in a stream. It
+ * may be added to the top level container of a written PDF document in place of the compressed objects. The document's
+ * {@link PDFXRefStream} must be adapted accordingly.
+ *
+ * @author Christian Appl
+ */
+public class COSWriterObjectStream extends COSStream
+{
+
+ private final COSWriterCompressionPool compressionPool;
+ private final List<COSObjectKey> preparedKeys = new ArrayList<>();
+ private final List<COSBase> preparedObjects = new ArrayList<>();
+
+ /**
+ * Creates an object stream for compressible objects from the given {@link COSWriterCompressionPool}. The objects
+ * must first be prepared for this object stream, by adding them via calling
+ * {@link COSWriterObjectStream#prepareStreamObject(COSObjectKey, COSBase)} and will be written to this
+ * {@link COSStream}, when {@link COSWriterObjectStream#update()} is called.
+ *
+ * @param compressionPool The compression pool an object stream shall be created for.
+ */
+ public COSWriterObjectStream(COSWriterCompressionPool compressionPool)
+ {
+ this.compressionPool = compressionPool;
+ setItem(COSName.TYPE, COSName.OBJ_STM);
+ }
+
+ /**
+ * Returns the number of objects, that have been written to this object stream. ({@link COSName#N})
+ *
+ * @return The number of objects, that have been written to this object stream.
+ */
+ public int getObjectCount()
+ {
+ return getInt(COSName.N, 0);
+ }
+
+ /**
+ * Sets the number of objects, that have been written to this object stream. ({@link COSName#N})
+ *
+ * @param size The number of objects, that have been written to this object stream.
+ */
+ public void setObjectCount(int size)
+ {
+ setInt(COSName.N, size);
+ }
+
+ /**
+ * Returns the byte offset of the first object contained in this object stream. ({@link COSName#FIRST})
+ *
+ * @return The byte offset of the first object contained in this object stream.
+ */
+ public int getFirstEntryOffset()
+ {
+ return getInt(COSName.FIRST, 0);
+ }
+
+ /**
+ * Sets the byte offset of the first object contained in this object stream. ({@link COSName#FIRST})
+ *
+ * @param firstEntryOffset The byte offset of the first object contained in this object stream.
+ */
+ public void setFirstEntryOffset(int firstEntryOffset)
+ {
+ setInt(COSName.FIRST, firstEntryOffset);
+ }
+
+ /**
+ * Prepares the given {@link COSObject} to be written to this object stream, using the given {@link COSObjectKey} as
+ * it's ID for indirect references.
+ *
+ * @param key The {@link COSObjectKey}, that shall be used for indirect references to the {@link COSObject}.
+ * @param object The {@link COSObject}, that shall be written to this object stream.
+ */
+ public void prepareStreamObject(COSObjectKey key, COSBase object)
+ {
+ if (key != null && object != null)
+ {
+ preparedKeys.add(key);
+ preparedObjects
+ .add(object instanceof COSObject ? ((COSObject) object).getObject() : object);
+ }
+ }
+
+ /**
+ * Returns all {@link COSObjectKey}s, that shall be added to the object stream, when
+ * {@link COSWriterObjectStream#update()} is called.
+ *
+ * @return All {@link COSObjectKey}s, that shall be added to the object stream.
+ */
+ public List<COSObjectKey> getPreparedKeys()
+ {
+ return preparedKeys;
+ }
+
+ /**
+ * Returns all {@link COSObject}s, that shall be added to the object stream, when
+ * {@link COSWriterObjectStream#update()} is called.
+ *
+ * @return All {@link COSObject}s, that shall be added to the object stream.
+ */
+ public List<COSBase> getPreparedObjects()
+ {
+ return preparedObjects;
+ }
+
+ /**
+ * Updates the underlying {@link COSStream} by writing all prepared {@link COSObject}s to this object stream.
+ *
+ * @return The underlying {@link COSStream} dictionary of this object stream.
+ * @throws IOException Shall be thrown, if writing the object stream failed.
+ */
+ public COSStream update() throws IOException
+ {
+ setObjectCount(preparedKeys.size());
+ // Prepare the compressible objects for writing.
+ List<Long> objectNumbers = new ArrayList<>();
+ List<byte[]> objectsBuffer = new ArrayList<>();
+ for (int i = 0; i < getObjectCount(); i++)
+ {
+ try (ByteArrayOutputStream partialOutput = new ByteArrayOutputStream())
+ {
+ objectNumbers.add(preparedKeys.get(i).getNumber());
+ COSBase base = preparedObjects.get(i);
+ writeObject(partialOutput, base, true);
+ objectsBuffer.add(partialOutput.toByteArray());
+ }
+ }
+
+ // Deduce the object stream byte offset map.
+ byte[] offsetsMapBuffer;
+ long nextObjectOffset = 0;
+ try (ByteArrayOutputStream partialOutput = new ByteArrayOutputStream())
+ {
+ for (int i = 0; i < objectNumbers.size(); i++)
+ {
+ partialOutput.write(
+ String.valueOf(objectNumbers.get(i)).getBytes(StandardCharsets.ISO_8859_1));
+ partialOutput.write(COSWriter.SPACE);
+ partialOutput.write(
+ String.valueOf(nextObjectOffset).getBytes(StandardCharsets.ISO_8859_1));
+ partialOutput.write(COSWriter.SPACE);
+ nextObjectOffset += objectsBuffer.get(i).length;
+ }
+ offsetsMapBuffer = partialOutput.toByteArray();
+ }
+
+ // Write Flate compressed object stream data.
+ try (OutputStream output = createOutputStream(COSName.FLATE_DECODE))
+ {
+ output.write(offsetsMapBuffer);
+ setFirstEntryOffset(offsetsMapBuffer.length);
+ for (byte[] rawObject : objectsBuffer)
+ {
+ output.write(rawObject);
+ }
+ }
+ return this;
+ }
+
+ /**
+ * This method prepares and writes COS data to the object stream by selecting appropriate specialized methods for
+ * the content.
+ *
+ * @param output The stream, that shall be written to.
+ * @param object The content, that shall be written.
+ * @param topLevel True, if the currently written object is a top level entry of this object stream.
+ * @throws IOException Shall be thrown, when an exception occurred for the write operation.
+ */
+ private void writeObject(OutputStream output, Object object, boolean topLevel)
+ throws IOException
+ {
+ if (object == null)
+ {
+ return;
+ }
+ if (object instanceof Operator)
+ {
+ writeOperator(output, (Operator) object);
+ return;
+ }
+ if (!(object instanceof COSBase))
+ {
+ throw new IOException("Error: Unknown type in object stream:" + object);
+ }
+ COSBase base = object instanceof COSObject ? ((COSObject) object).getObject()
+ : (COSBase) object;
+ if (base == null)
+ {
+ return;
+ }
+ if (!topLevel && this.compressionPool.contains(base))
+ {
+ COSObjectKey key = this.compressionPool.getKey(base);
+ if (key == null)
+ {
+ throw new IOException(
+ "Error: Adding unknown object reference to object stream:" + object);
+ }
+ writeObjectReference(output, key);
+ }
+ else if (base instanceof COSString)
+ {
+ writeCOSString(output, (COSString) base);
+ }
+ else if (base instanceof COSFloat)
+ {
+ writeCOSFloat(output, (COSFloat) base);
+ }
+ else if (base instanceof COSInteger)
+ {
+ writeCOSInteger(output, (COSInteger) base);
+ }
+ else if (base instanceof COSBoolean)
+ {
+ writeCOSBoolean(output, (COSBoolean) base);
+ }
+ else if (base instanceof COSName)
+ {
+ writeCOSName(output, (COSName) base);
+ }
+ else if (base instanceof COSArray)
+ {
+ writeCOSArray(output, (COSArray) base);
+ }
+ else if (base instanceof COSDictionary)
+ {
+ writeCOSDictionary(output, (COSDictionary) base);
+ }
+ else if (base instanceof COSNull)
+ {
+ writeCOSNull(output);
+ }
+ else
+ {
+ throw new IOException("Error: Unknown type in object stream:" + object);
+ }
+ }
+
+ /**
+ * Write the given {@link COSString} to the given stream.
+ *
+ * @param output The stream, that shall be written to.
+ * @param cosString The content, that shall be written.
+ */
+ private void writeCOSString(OutputStream output, COSString cosString) throws IOException
+ {
+ COSWriter.writeString(cosString, output);
+ output.write(COSWriter.SPACE);
+ }
+
+ /**
+ * Write the given {@link COSFloat} to the given stream.
+ *
+ * @param output The stream, that shall be written to.
+ * @param cosFloat The content, that shall be written.
+ */
+ private void writeCOSFloat(OutputStream output, COSFloat cosFloat) throws IOException
+ {
+ cosFloat.writePDF(output);
+ output.write(COSWriter.SPACE);
+ }
+
+ /**
+ * Write the given {@link COSInteger} to the given stream.
+ *
+ * @param output The stream, that shall be written to.
+ * @param cosInteger The content, that shall be written.
+ */
+ private void writeCOSInteger(OutputStream output, COSInteger cosInteger) throws IOException
+ {
+ cosInteger.writePDF(output);
+ output.write(COSWriter.SPACE);
+ }
+
+ /**
+ * Write the given {@link COSBoolean} to the given stream.
+ *
+ * @param output The stream, that shall be written to.
+ * @param cosBoolean The content, that shall be written.
+ */
+ private void writeCOSBoolean(OutputStream output, COSBoolean cosBoolean) throws IOException
+ {
+ cosBoolean.writePDF(output);
+ output.write(COSWriter.SPACE);
+ }
+
+ /**
+ * Write the given {@link COSName} to the given stream.
+ *
+ * @param output The stream, that shall be written to.
+ * @param cosName The content, that shall be written.
+ */
+ private void writeCOSName(OutputStream output, COSName cosName) throws IOException
+ {
+ cosName.writePDF(output);
+ output.write(COSWriter.SPACE);
+ }
+
+ /**
+ * Write the given {@link COSArray} to the given stream.
+ *
+ * @param output The stream, that shall be written to.
+ * @param cosArray The content, that shall be written.
+ */
+ private void writeCOSArray(OutputStream output, COSArray cosArray) throws IOException
+ {
+ output.write(COSWriter.ARRAY_OPEN);
+ for (COSBase value : cosArray.toList())
+ {
+ if (value == null)
+ {
+ writeCOSNull(output);
+ }
+ else
+ {
+ writeObject(output, value, false);
+ }
+ }
+ output.write(COSWriter.ARRAY_CLOSE);
+ output.write(COSWriter.SPACE);
+ }
+
+ /**
+ * Write the given {@link COSDictionary} to the given stream.
+ *
+ * @param output The stream, that shall be written to.
+ * @param cosDictionary The content, that shall be written.
+ */
+ private void writeCOSDictionary(OutputStream output, COSDictionary cosDictionary)
+ throws IOException
+ {
+ output.write(COSWriter.DICT_OPEN);
+ for (Map.Entry<COSName, COSBase> entry : cosDictionary.entrySet())
+ {
+ if (entry.getValue() != null)
+ {
+ writeObject(output, entry.getKey(), false);
+ writeObject(output, entry.getValue(), false);
+ }
+ }
+ output.write(COSWriter.DICT_CLOSE);
+ output.write(COSWriter.SPACE);
+ }
+
+ /**
+ * Write the given {@link COSObjectKey} to the given stream.
+ *
+ * @param output The stream, that shall be written to.
+ * @param indirectReference The content, that shall be written.
+ */
+ private void writeObjectReference(OutputStream output, COSObjectKey indirectReference)
+ throws IOException
+ {
+ output.write(String.valueOf(indirectReference.getNumber())
+ .getBytes(StandardCharsets.ISO_8859_1));
+ output.write(COSWriter.SPACE);
+ output.write(String.valueOf(indirectReference.getGeneration())
+ .getBytes(StandardCharsets.ISO_8859_1));
+ output.write(COSWriter.SPACE);
+ output.write(COSWriter.REFERENCE);
+ output.write(COSWriter.SPACE);
+ }
+
+ /**
+ * Write {@link COSNull} to the given stream.
+ *
+ * @param output The stream, that shall be written to.
+ */
+ private void writeCOSNull(OutputStream output) throws IOException
+ {
+ output.write("null".getBytes(StandardCharsets.ISO_8859_1));
+ output.write(COSWriter.SPACE);
+ }
+
+ /**
+ * Write the given {@link Operator} to the given stream.
+ *
+ * @param output The stream, that shall be written to.
+ * @param operator The content, that shall be written.
+ */
+ private void writeOperator(OutputStream output, Operator operator) throws IOException
+ {
+ if (operator.getName().equals(OperatorName.BEGIN_INLINE_IMAGE))
+ {
+ output.write(OperatorName.BEGIN_INLINE_IMAGE.getBytes(StandardCharsets.ISO_8859_1));
+ COSDictionary dic = operator.getImageParameters();
+ for (COSName key : dic.keySet())
+ {
+ Object value = dic.getDictionaryObject(key);
+ key.writePDF(output);
+ output.write(COSWriter.SPACE);
+ writeObject(output, value, false);
+ }
+ output.write(
+ OperatorName.BEGIN_INLINE_IMAGE_DATA.getBytes(StandardCharsets.ISO_8859_1));
+ output.write(operator.getImageData());
+ output.write(OperatorName.END_INLINE_IMAGE.getBytes(StandardCharsets.ISO_8859_1));
+ }
+ else
+ {
+ output.write(operator.getName().getBytes(StandardCharsets.ISO_8859_1));
+ }
+ }
+}
Propchange: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfwriter/compress/COSWriterObjectStream.java
------------------------------------------------------------------------------
svn:eol-style = native
Added: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfwriter/compress/CompressParameters.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfwriter/compress/CompressParameters.java?rev=1883896&view=auto
==============================================================================
--- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfwriter/compress/CompressParameters.java (added)
+++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfwriter/compress/CompressParameters.java Sat Nov 28 13:01:07 2020
@@ -0,0 +1,58 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.pdfbox.pdfwriter.compress;
+
+/**
+ * An instance of this class centralizes and provides the configuration for a PDF compression.
+ *
+ * @author Christian Appl
+ */
+public class CompressParameters
+{
+
+ public static final int DEFAULT_OBJECT_STREAM_SIZE = 200;
+
+ private int objectStreamSize = DEFAULT_OBJECT_STREAM_SIZE;
+
+ /**
+ * Sets the number of objects, that can be contained in compressed object streams. Higher object stream sizes may
+ * cause PDF readers to slow down during the rendering of PDF documents, therefore a reasonable value should be
+ * selected.
+ *
+ * @param objectStreamSize The number of objects, that can be contained in compressed object streams.
+ * @return The current instance, to allow method chaining.
+ */
+ public CompressParameters setObjectStreamSize(int objectStreamSize)
+ {
+ this.objectStreamSize = objectStreamSize <= 0 ? DEFAULT_OBJECT_STREAM_SIZE
+ : objectStreamSize;
+ return this;
+ }
+
+ /**
+ * Returns the number of objects, that can be contained in compressed object streams. Higher object stream sizes may
+ * cause PDF readers to slow down during the rendering of PDF documents, therefore a reasonable value should be
+ * selected.
+ *
+ * @return The number of objects, that can be contained in compressed object streams.
+ */
+ public int getObjectStreamSize()
+ {
+ return objectStreamSize;
+ }
+
+}
Propchange: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfwriter/compress/CompressParameters.java
------------------------------------------------------------------------------
svn:eol-style = native
Added: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfwriter/compress/TraversedCOSElement.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfwriter/compress/TraversedCOSElement.java?rev=1883896&view=auto
==============================================================================
--- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfwriter/compress/TraversedCOSElement.java (added)
+++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfwriter/compress/TraversedCOSElement.java Sat Nov 28 13:01:07 2020
@@ -0,0 +1,199 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.pdfbox.pdfwriter.compress;
+
+import org.apache.pdfbox.cos.COSBase;
+import org.apache.pdfbox.cos.COSObject;
+import org.apache.pdfbox.cos.COSStream;
+
+import java.util.ArrayList;
+import java.util.List;
+
+/**
+ * An instance of this class represents a traversed element of a COS tree. It allows to determine the position of a
+ * {@link COSBase} in a hierarchical COS structure and provides the means to further traverse and evaluate it's
+ * descendants.
+ *
+ * @author Christian Appl
+ */
+public class TraversedCOSElement
+{
+
+ private final TraversedCOSElement parent;
+ private final COSBase currentObject;
+ private final List<TraversedCOSElement> traversedChildren = new ArrayList<>();
+ private boolean partOfStreamDictionary = false;
+ private final List<COSBase> allObjects;
+
+ /**
+ * Construct a fresh entrypoint for the traversal of a hierarchical COS structure, beginning with the given
+ * {@link COSBase}.
+ *
+ * @param currentObject The initial {@link COSBase}, with which the structure traversal shall begin.
+ */
+ public TraversedCOSElement(COSBase currentObject)
+ {
+ this(new ArrayList<>(), null, currentObject);
+ }
+
+ /**
+ * Construct a traversal node for the traversal of a hierarchical COS structure, located at the given
+ * {@link COSBase}, preceded by this given list of ancestors and contained in the given parent structure.
+ *
+ * @param allObjects The list of nodes, that have been traversed to reach the current object.
+ * @param parent The parent node, that does contain this node.
+ * @param currentObject The initial {@link COSBase}, with which the structure traversal shall begin.
+ */
+ private TraversedCOSElement(List<COSBase> allObjects, TraversedCOSElement parent,
+ COSBase currentObject)
+ {
+ this.parent = parent;
+ this.currentObject = currentObject;
+ this.allObjects = allObjects;
+ }
+
+ /**
+ * Construct a new traversal node for the given element and append it as a child to the current node.
+ *
+ * @param element The element, that shall be traversed.
+ * @return The resulting traversal node, that has been created.
+ */
+ public TraversedCOSElement appendTraversedElement(COSBase element)
+ {
+ if (element == null)
+ {
+ return this;
+ }
+ allObjects.add(element);
+ TraversedCOSElement traversedElement = new TraversedCOSElement(allObjects, this, element);
+ traversedElement.setPartOfStreamDictionary(
+ isPartOfStreamDictionary() || getCurrentBaseObject() instanceof COSStream);
+ this.traversedChildren.add(traversedElement);
+ return traversedElement;
+ }
+
+ /**
+ * Returns the current {@link COSBase} of this traversal node.
+ *
+ * @return The current {@link COSBase} of this traversal node.
+ */
+ public COSBase getCurrentObject()
+ {
+ return currentObject;
+ }
+
+ /**
+ * Returns the actual current {@link COSBase} of this traversal node. Meaning: If the current traversal node
+ * contains a reference to a {@link COSObject}, it's actual base object will be returned instead.
+ *
+ * @return The actual current {@link COSBase} of this traversal node.
+ */
+ public COSBase getCurrentBaseObject()
+ {
+ return currentObject instanceof COSObject ? ((COSObject) currentObject).getObject()
+ : currentObject;
+ }
+
+ /**
+ * Returns the parent node of the current traversal node.
+ *
+ * @return The parent node of the current traversal node.
+ */
+ public TraversedCOSElement getParent()
+ {
+ return this.parent;
+ }
+
+ /**
+ * Returns all known traversable/traversed children contained by the current traversal node.
+ *
+ * @return All known traversable/traversed children contained by the current traversal node.
+ */
+ public List<TraversedCOSElement> getTraversedChildren()
+ {
+ return traversedChildren;
+ }
+
+ public List<TraversedCOSElement> getTraversedElements()
+ {
+ List<TraversedCOSElement> ancestry = this.parent == null
+ ? new ArrayList<TraversedCOSElement>() : this.parent.getTraversedElements();
+ ancestry.add(this);
+ return ancestry;
+ }
+
+ /**
+ * Returns true, if the given {@link COSBase} is equal to the object wrapped by this traversal node.
+ *
+ * @param object The object, that shall be compared.
+ * @return True, if the given {@link COSBase} is equal to the object wrapped by this traversal node.
+ */
+ public boolean equals(COSBase object)
+ {
+ return this.currentObject == object;
+ }
+
+ /**
+ * Searches all known traversed child nodes of the current traversal node for the given {@link COSBase}.
+ *
+ * @param object The {@link COSBase}, that shall be found.
+ * @return The traversal node representing the searched {@link COSBase} or null, if such a node can not be found.
+ */
+ public TraversedCOSElement findAtCurrentPosition(COSBase object)
+ {
+ for (TraversedCOSElement child : traversedChildren)
+ {
+ if (child.equals(object))
+ {
+ return child;
+ }
+ }
+
+ return null;
+ }
+
+ /**
+ * Returns a list of all objects, that have been traversed in the created traversal tree.
+ *
+ * @return A list of all objects, that have been traversed in the created traversal tree.
+ */
+ public List<COSBase> getAllTraversedObjects()
+ {
+ return allObjects;
+ }
+
+ /**
+ * Returns true, if the given traversal node has been marked as a part of a {@link COSStream}.
+ *
+ * @return True, if the given traversal node has been marked as a part of a {@link COSStream}
+ */
+ public boolean isPartOfStreamDictionary()
+ {
+ return partOfStreamDictionary;
+ }
+
+ /**
+ * Set to true, if the given traversal node shall be marked as a part of a {@link COSStream}.
+ *
+ * @param partOfStreamDictionary True, if the given traversal node shall be marked as a part of a {@link COSStream}
+ */
+ public void setPartOfStreamDictionary(boolean partOfStreamDictionary)
+ {
+ this.partOfStreamDictionary = partOfStreamDictionary;
+ }
+
+}
Propchange: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfwriter/compress/TraversedCOSElement.java
------------------------------------------------------------------------------
svn:eol-style = native
Modified: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/PDDocument.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/PDDocument.java?rev=1883896&r1=1883895&r2=1883896&view=diff
==============================================================================
--- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/PDDocument.java (original)
+++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/PDDocument.java Sat Nov 28 13:01:07 2020
@@ -49,6 +49,7 @@ import org.apache.pdfbox.io.IOUtils;
import org.apache.pdfbox.io.MemoryUsageSetting;
import org.apache.pdfbox.io.RandomAccessRead;
import org.apache.pdfbox.pdfwriter.COSWriter;
+import org.apache.pdfbox.pdfwriter.compress.CompressParameters;
import org.apache.pdfbox.pdmodel.common.COSArrayList;
import org.apache.pdfbox.pdmodel.common.PDRectangle;
import org.apache.pdfbox.pdmodel.common.PDStream;
@@ -907,38 +908,65 @@ public class PDDocument implements Close
*/
public void save(OutputStream output) throws IOException
{
+ saveCompressed(output, null);
+ }
+
+ /**
+ * Compress the document and save it to a file.
+ *
+ * @param file The file to save as.
+ * @param parameters The parameters for the document's compression.
+ * @throws IOException if the output could not be written
+ */
+ public void saveCompressed(File file, CompressParameters parameters) throws IOException
+ {
+ saveCompressed(new BufferedOutputStream(new FileOutputStream(file)), parameters);
+ }
+
+ /**
+ * This will compress the document and save it to an output stream.
+ *
+ * @param output The stream to write to. It will be closed when done. It is recommended to wrap it in a
+ * {@link java.io.BufferedOutputStream}, unless it is already buffered.
+ * @param parameters The parameters for the document's compression.
+ * @throws IOException if the output could not be written
+ */
+ public void saveCompressed(OutputStream output, CompressParameters parameters)
+ throws IOException
+ {
if (document.isClosed())
{
throw new IOException("Cannot save a document which has been closed");
}
+ // object stream compression requires a cross reference stream.
+ document.setIsXRefStream(parameters != null);
// subset designated fonts
for (PDFont font : fontsToSubset)
{
font.subset();
}
fontsToSubset.clear();
-
- // save PDF
- try (COSWriter writer = new COSWriter(output))
+
+ // save PDF
+ try (COSWriter writer = new COSWriter(output, parameters))
{
writer.write(this);
}
}
/**
- * Save the PDF as an incremental update. This is only possible if the PDF was loaded from a
- * file or a stream, not if the document was created in PDFBox itself. There must be a path of
- * objects that have {@link COSUpdateInfo#isNeedToBeUpdated()} set, starting from the document
- * catalog. For signatures this is taken care by PDFBox itself.
- *<p>
- * Other usages of this method are for experienced users only. You will usually never need it.
- * It is useful only if you are required to keep the current revision and append the changes. A
- * typical use case is changing a signed file without invalidating the signature.
- *
- * @param output stream to write to. It will be closed when done. It
- * <i><b>must never</b></i> point to the source file or that one will be
- * harmed!
+ * Save the PDF as an incremental update. This is only possible if the PDF was loaded from a file or a stream, not
+ * if the document was created in PDFBox itself. There must be a path of objects that have
+ * {@link COSUpdateInfo#isNeedToBeUpdated()} set, starting from the document catalog. For signatures this is taken
+ * care by PDFBox itself.
+ * <p>
+ * Other usages of this method are for experienced users only. You will usually never need it. It is useful only if
+ * you are required to keep the current revision and append the changes. A typical use case is changing a signed
+ * file without invalidating the signature.
+ *
+ * @param output stream to write to. It will be closed when done. It <i><b>must never</b></i> point to the source
+ * file or that one will be harmed!
* @throws IOException if the output could not be written
* @throws IllegalStateException if the document was not loaded from a file or a stream.
*/
Added: pdfbox/trunk/pdfbox/src/test/java/org/apache/pdfbox/pdfwriter/COSDocumentCompressionTest.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/test/java/org/apache/pdfbox/pdfwriter/COSDocumentCompressionTest.java?rev=1883896&view=auto
==============================================================================
--- pdfbox/trunk/pdfbox/src/test/java/org/apache/pdfbox/pdfwriter/COSDocumentCompressionTest.java (added)
+++ pdfbox/trunk/pdfbox/src/test/java/org/apache/pdfbox/pdfwriter/COSDocumentCompressionTest.java Sat Nov 28 13:01:07 2020
@@ -0,0 +1,262 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.pdfbox.pdfwriter;
+
+import org.apache.pdfbox.Loader;
+import org.apache.pdfbox.cos.COSName;
+import org.apache.pdfbox.pdfwriter.compress.CompressParameters;
+import org.apache.pdfbox.pdmodel.PDDocument;
+import org.apache.pdfbox.pdmodel.PDPage;
+import org.apache.pdfbox.pdmodel.PDPageContentStream;
+import org.apache.pdfbox.pdmodel.common.PDRectangle;
+import org.apache.pdfbox.pdmodel.common.filespecification.PDComplexFileSpecification;
+import org.apache.pdfbox.pdmodel.encryption.AccessPermission;
+import org.apache.pdfbox.pdmodel.encryption.StandardProtectionPolicy;
+import org.apache.pdfbox.pdmodel.font.PDType1Font;
+import org.apache.pdfbox.pdmodel.interactive.annotation.PDAnnotation;
+import org.junit.jupiter.api.Test;
+
+import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertNotNull;
+
+import java.io.File;
+import java.util.List;
+import java.util.Map;
+
+/**
+ * This test attempts to save different documents compressed, without causing errors, it also checks, whether the PDF is
+ * readable after compression and whether some central contents are still contained after compression. Output files are
+ * created in "target/test-output/compression/" source files are placed in "src/test/resources/input/compression/".
+ *
+ * @author Christian Appl
+ */
+class COSDocumentCompressionTest
+{
+
+ static File inDir = new File("src/test/resources/input/compression/");
+ static File outDir = new File("target/test-output/compression/");
+
+ public COSDocumentCompressionTest()
+ {
+ outDir.mkdirs();
+ }
+
+ /**
+ * Compress a document, that contains acroform fields and touch the expected fields.
+ *
+ * @throws Exception Shall be thrown, when compressing the document failed.
+ */
+ @Test
+ void testCompressAcroformDoc() throws Exception
+ {
+ File source = new File(inDir, "acroform.pdf");
+ File target = new File(outDir, "acroform.pdf");
+
+ PDDocument document = Loader.loadPDF(source);
+ try
+ {
+ document.saveCompressed(target, new CompressParameters());
+ }
+ finally
+ {
+ document.close();
+ }
+
+ document = Loader.loadPDF(target);
+ try
+ {
+ assertEquals(1, document.getNumberOfPages(),
+ "The number of pages should not have changed, during compression.");
+ PDPage page = document.getPage(0);
+ List<PDAnnotation> annotations = page.getAnnotations();
+ assertEquals(13, annotations.size(),
+ "The number of annotations should not have changed");
+ assertEquals("TextField", annotations.get(0).getCOSObject().getNameAsString(COSName.T),
+ "The 1. annotation should have been a text field.");
+ assertEquals("Button", annotations.get(1).getCOSObject().getNameAsString(COSName.T),
+ "The 2. annotation should have been a button.");
+ assertEquals("CheckBox1", annotations.get(2).getCOSObject().getNameAsString(COSName.T),
+ "The 3. annotation should have been a checkbox.");
+ assertEquals("CheckBox2", annotations.get(3).getCOSObject().getNameAsString(COSName.T),
+ "The 4. annotation should have been a checkbox.");
+ assertEquals("TextFieldMultiLine",
+ annotations.get(4).getCOSObject().getNameAsString(COSName.T),
+ "The 5. annotation should have been a multiline textfield.");
+ assertEquals("TextFieldMultiLineRT",
+ annotations.get(5).getCOSObject().getNameAsString(COSName.T),
+ "The 6. annotation should have been a multiline textfield.");
+ assertNotNull(annotations.get(6).getCOSObject().getItem(COSName.PARENT),
+ "The 7. annotation should have had a parent entry.");
+ assertEquals("GroupOption",
+ annotations.get(6).getCOSObject().getCOSDictionary(COSName.PARENT)
+ .getNameAsString(COSName.T),
+ "The 7. annotation's parent should have been a GroupOption.");
+ assertNotNull(annotations.get(7).getCOSObject().getItem(COSName.PARENT),
+ "The 8. annotation should have had a parent entry.");
+ assertEquals("GroupOption",
+ annotations.get(7).getCOSObject().getCOSDictionary(COSName.PARENT)
+ .getNameAsString(COSName.T),
+ "The 8. annotation's parent should have been a GroupOption.");
+ assertEquals("ListBox", annotations.get(8).getCOSObject().getNameAsString(COSName.T),
+ "The 9. annotation should have been a ListBox.");
+ assertEquals("ListBoxMultiSelect",
+ annotations.get(9).getCOSObject().getNameAsString(COSName.T),
+ "The 10. annotation should have been a ListBox Multiselect.");
+ assertEquals("ComboBox", annotations.get(10).getCOSObject().getNameAsString(COSName.T),
+ "The 11. annotation should have been a ComboBox.");
+ assertEquals("ComboBoxEditable",
+ annotations.get(11).getCOSObject().getNameAsString(COSName.T),
+ "The 12. annotation should have been a EditableComboBox.");
+ assertEquals("Signature", annotations.get(12).getCOSObject().getNameAsString(COSName.T),
+ "The 13. annotation should have been a Signature.");
+ }
+ finally
+ {
+ document.close();
+ }
+ }
+
+ /**
+ * Compress a document, that contains an attachment and touch the expected attachment.
+ *
+ * @throws Exception Shall be thrown, when compressing the document failed.
+ */
+ @Test
+ void testCompressAttachmentsDoc() throws Exception
+ {
+ File source = new File(inDir, "attachment.pdf");
+ File target = new File(outDir, "attachment.pdf");
+
+ PDDocument document = Loader.loadPDF(source);
+ try
+ {
+ document.saveCompressed(target, new CompressParameters());
+ }
+ finally
+ {
+ document.close();
+ }
+
+ document = Loader.loadPDF(target);
+ try
+ {
+ assertEquals(2, document.getNumberOfPages(),
+ "The number of pages should not have changed, during compression.");
+ Map<String, PDComplexFileSpecification> embeddedFiles = document.getDocumentCatalog()
+ .getNames().getEmbeddedFiles().getNames();
+ assertEquals(1, embeddedFiles.size(),
+ "The document should have contained an attachment");
+ PDComplexFileSpecification attachment;
+ assertNotNull((attachment = embeddedFiles.get("A4Unicode.pdf")),
+ "The document should have contained 'A4Unicode.pdf'.");
+ assertEquals(14997, attachment.getEmbeddedFile().getLength(),
+ "The attachments length is not as expected.");
+ }
+ finally
+ {
+ document.close();
+ }
+ }
+
+ /**
+ * Compress and encrypt the given document, without causing an exception to be thrown.
+ *
+ * @throws Exception Shall be thrown, when compressing/encrypting the document failed.
+ */
+ @Test
+ void testCompressEncryptedDoc() throws Exception
+ {
+ File source = new File(inDir, "unencrypted.pdf");
+ File target = new File(outDir, "encrypted.pdf");
+
+ PDDocument document = Loader.loadPDF(source, "user");
+ try
+ {
+ document.protect(
+ new StandardProtectionPolicy("owner", "user", new AccessPermission(0)));
+ document.saveCompressed(target, new CompressParameters());
+ }
+ finally
+ {
+ document.close();
+ }
+
+ document = Loader.loadPDF(target, "user");
+ // If this didn't fail, the encryption dictionary should be present and working.
+ assertEquals(2, document.getNumberOfPages());
+ document.close();
+ }
+
+ /**
+ * Adds a page to an existing document, compresses it and touches the resulting page content stream.
+ *
+ * @throws Exception Shall be thrown, if compressing the document failed.
+ */
+ @Test
+ void testAlteredDoc() throws Exception
+ {
+ File source = new File(inDir, "unencrypted.pdf");
+ File target = new File(outDir, "altered.pdf");
+
+ PDDocument document = Loader.loadPDF(source);
+ try
+ {
+ PDPage page = new PDPage(new PDRectangle(100, 100));
+ document.addPage(page);
+ PDPageContentStream contentStream = new PDPageContentStream(document, page);
+
+ try
+ {
+ contentStream.beginText();
+ contentStream.newLineAtOffset(20, 80);
+ contentStream.setFont(PDType1Font.HELVETICA, 12);
+ contentStream.showText("Test");
+ contentStream.endText();
+ }
+ finally
+ {
+ contentStream.close();
+ }
+
+ document.save(target);
+ // document.saveCompressed(target, new CompressParameters());
+ }
+ catch (Throwable t)
+ {
+ t.printStackTrace();
+ }
+ finally
+ {
+ document.close();
+ }
+
+ document = Loader.loadPDF(target);
+ try
+ {
+ assertEquals(3, document.getNumberOfPages(),
+ "The number of pages should not have changed, during compression.");
+ PDPage page = document.getPage(2);
+ assertEquals(43, page.getContentStreams().next().getLength(),
+ "The stream length of the new page is not as expected.");
+ }
+ finally
+ {
+ document.close();
+ }
+ }
+
+}
Propchange: pdfbox/trunk/pdfbox/src/test/java/org/apache/pdfbox/pdfwriter/COSDocumentCompressionTest.java
------------------------------------------------------------------------------
svn:eol-style = native
Added: pdfbox/trunk/pdfbox/src/test/resources/input/compression/acroform.pdf
URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/test/resources/input/compression/acroform.pdf?rev=1883896&view=auto
==============================================================================
Binary file - no diff available.
Propchange: pdfbox/trunk/pdfbox/src/test/resources/input/compression/acroform.pdf
------------------------------------------------------------------------------
svn:mime-type = application/pdf
Added: pdfbox/trunk/pdfbox/src/test/resources/input/compression/attachment.pdf
URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/test/resources/input/compression/attachment.pdf?rev=1883896&view=auto
==============================================================================
Binary file - no diff available.
Propchange: pdfbox/trunk/pdfbox/src/test/resources/input/compression/attachment.pdf
------------------------------------------------------------------------------
svn:mime-type = application/pdf
Added: pdfbox/trunk/pdfbox/src/test/resources/input/compression/unencrypted.pdf
URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/test/resources/input/compression/unencrypted.pdf?rev=1883896&view=auto
==============================================================================
Binary file - no diff available.
Propchange: pdfbox/trunk/pdfbox/src/test/resources/input/compression/unencrypted.pdf
------------------------------------------------------------------------------
svn:mime-type = application/pdf