You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@commons.apache.org by bo...@apache.org on 2019/08/07 13:47:07 UTC
[commons-compress] branch master updated: COMPRESS-485 keep zip
entries order in parallel zip creation
This is an automated email from the ASF dual-hosted git repository.
bodewig pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/commons-compress.git
The following commit(s) were added to refs/heads/master by this push:
new 38342b8 COMPRESS-485 keep zip entries order in parallel zip creation
38342b8 is described below
commit 38342b8446e9c03ee758511c309f98e2a85ff599
Author: Hervé Boutemy <hb...@apache.org>
AuthorDate: Mon May 6 08:22:38 2019 +0200
COMPRESS-485 keep zip entries order in parallel zip creation
this will ease Reproducible Builds when creating zip or jar archives
thanks to Arnaud Nauwynck for the great help
---
.../archivers/zip/ParallelScatterZipCreator.java | 39 +++++++++++---------
.../archivers/zip/ScatterZipOutputStream.java | 42 ++++++++++++++++++++++
src/site/xdoc/zip.xml | 19 +++++-----
.../zip/ParallelScatterZipCreatorTest.java | 5 ++-
4 files changed, 80 insertions(+), 25 deletions(-)
diff --git a/src/main/java/org/apache/commons/compress/archivers/zip/ParallelScatterZipCreator.java b/src/main/java/org/apache/commons/compress/archivers/zip/ParallelScatterZipCreator.java
index 820cd58..c5010c0 100644
--- a/src/main/java/org/apache/commons/compress/archivers/zip/ParallelScatterZipCreator.java
+++ b/src/main/java/org/apache/commons/compress/archivers/zip/ParallelScatterZipCreator.java
@@ -41,9 +41,9 @@ import static org.apache.commons.compress.archivers.zip.ZipArchiveEntryRequest.c
/**
* Creates a zip in parallel by using multiple threadlocal {@link ScatterZipOutputStream} instances.
* <p>
- * Note that this class generally makes no guarantees about the order of things written to
- * the output file. Things that need to come in a specific order (manifests, directories)
- * must be handled by the client of this class, usually by writing these things to the
+ * Note that until 1.18, this class generally made no guarantees about the order of things written to
+ * the output file. Things that needed to come in a specific order (manifests, directories)
+ * had to be handled by the client of this class, usually by writing these things to the
* {@link ZipArchiveOutputStream} <em>before</em> calling {@link #writeTo writeTo} on this class.</p>
* <p>
* The client can supply an {@link java.util.concurrent.ExecutorService}, but for reasons of
@@ -55,7 +55,7 @@ public class ParallelScatterZipCreator {
private final List<ScatterZipOutputStream> streams = synchronizedList(new ArrayList<ScatterZipOutputStream>());
private final ExecutorService es;
private final ScatterGatherBackingStoreSupplier backingStoreSupplier;
- private final List<Future<Object>> futures = new ArrayList<>();
+ private final List<Future<ScatterZipOutputStream>> futures = new ArrayList<>();
private final long startedAt = System.currentTimeMillis();
private long compressionDoneAt = 0;
@@ -157,7 +157,7 @@ public class ParallelScatterZipCreator {
*
* @param callable The callable to run, created by {@link #createCallable createCallable}, possibly wrapped by caller.
*/
- public final void submit(final Callable<Object> callable) {
+ public final void submit(final Callable<ScatterZipOutputStream> callable) {
futures.add(es.submit(callable));
}
@@ -179,17 +179,18 @@ public class ParallelScatterZipCreator {
* will be propagated through the callable.
*/
- public final Callable<Object> createCallable(final ZipArchiveEntry zipArchiveEntry, final InputStreamSupplier source) {
+ public final Callable<ScatterZipOutputStream> createCallable(final ZipArchiveEntry zipArchiveEntry, final InputStreamSupplier source) {
final int method = zipArchiveEntry.getMethod();
if (method == ZipMethod.UNKNOWN_CODE) {
throw new IllegalArgumentException("Method must be set on zipArchiveEntry: " + zipArchiveEntry);
}
final ZipArchiveEntryRequest zipArchiveEntryRequest = createZipArchiveEntryRequest(zipArchiveEntry, source);
- return new Callable<Object>() {
+ return new Callable<ScatterZipOutputStream>() {
@Override
- public Object call() throws Exception {
- tlScatterStreams.get().addArchiveEntry(zipArchiveEntryRequest);
- return null;
+ public ScatterZipOutputStream call() throws Exception {
+ ScatterZipOutputStream scatterStream = tlScatterStreams.get();
+ scatterStream.addArchiveEntry(zipArchiveEntryRequest);
+ return scatterStream;
}
};
}
@@ -210,12 +211,13 @@ public class ParallelScatterZipCreator {
* will be propagated through the callable.
* @since 1.13
*/
- public final Callable<Object> createCallable(final ZipArchiveEntryRequestSupplier zipArchiveEntryRequestSupplier) {
- return new Callable<Object>() {
+ public final Callable<ScatterZipOutputStream> createCallable(final ZipArchiveEntryRequestSupplier zipArchiveEntryRequestSupplier) {
+ return new Callable<ScatterZipOutputStream>() {
@Override
- public Object call() throws Exception {
- tlScatterStreams.get().addArchiveEntry(zipArchiveEntryRequestSupplier.get());
- return null;
+ public ScatterZipOutputStream call() throws Exception {
+ ScatterZipOutputStream scatterStream = tlScatterStreams.get();
+ scatterStream.addArchiveEntry(zipArchiveEntryRequestSupplier.get());
+ return scatterStream;
}
};
}
@@ -255,8 +257,13 @@ public class ParallelScatterZipCreator {
compressionDoneAt = System.currentTimeMillis();
synchronized (streams) {
+ // write zip entries in the order they were added (kept as futures)
+ for (final Future<ScatterZipOutputStream> future : futures) {
+ ScatterZipOutputStream scatterStream = future.get();
+ scatterStream.zipEntryWriter().writeNextZipEntry(targetStream);
+ }
+
for (final ScatterZipOutputStream scatterStream : streams) {
- scatterStream.writeTo(targetStream);
scatterStream.close();
}
}
diff --git a/src/main/java/org/apache/commons/compress/archivers/zip/ScatterZipOutputStream.java b/src/main/java/org/apache/commons/compress/archivers/zip/ScatterZipOutputStream.java
index 006c531..deed65e 100644
--- a/src/main/java/org/apache/commons/compress/archivers/zip/ScatterZipOutputStream.java
+++ b/src/main/java/org/apache/commons/compress/archivers/zip/ScatterZipOutputStream.java
@@ -27,6 +27,7 @@ import java.io.File;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStream;
+import java.util.Iterator;
import java.util.Queue;
import java.util.concurrent.ConcurrentLinkedQueue;
import java.util.concurrent.atomic.AtomicBoolean;
@@ -51,6 +52,7 @@ public class ScatterZipOutputStream implements Closeable {
private final ScatterGatherBackingStore backingStore;
private final StreamCompressor streamCompressor;
private AtomicBoolean isClosed = new AtomicBoolean();
+ private ZipEntryWriter zipEntryWriter = null;
private static class CompressedEntry {
final ZipArchiveEntryRequest zipArchiveEntryRequest;
@@ -106,6 +108,7 @@ public class ScatterZipOutputStream implements Closeable {
*
* @param target The archive to receive the contents of this {@link ScatterZipOutputStream}.
* @throws IOException If writing fails
+ * @see #zipEntryWriter()
*/
public void writeTo(final ZipArchiveOutputStream target) throws IOException {
backingStore.closeForWriting();
@@ -119,6 +122,42 @@ public class ScatterZipOutputStream implements Closeable {
}
}
+ public static class ZipEntryWriter implements Closeable {
+ private final Iterator<CompressedEntry> itemsIterator;
+ private final InputStream itemsIteratorData;
+
+ public ZipEntryWriter(ScatterZipOutputStream scatter) throws IOException {
+ scatter.backingStore.closeForWriting();
+ itemsIterator = scatter.items.iterator();
+ itemsIteratorData = scatter.backingStore.getInputStream();
+ }
+
+ @Override
+ public void close() throws IOException {
+ if (itemsIteratorData != null) {
+ itemsIteratorData.close();
+ }
+ }
+
+ public void writeNextZipEntry(final ZipArchiveOutputStream target) throws IOException {
+ CompressedEntry compressedEntry = itemsIterator.next();
+ try (final BoundedInputStream rawStream = new BoundedInputStream(itemsIteratorData, compressedEntry.compressedSize)) {
+ target.addRawArchiveEntry(compressedEntry.transferToArchiveEntry(), rawStream);
+ }
+ }
+ }
+
+ /**
+ * Get a zip entry writer for this scatter stream.
+ * @throws IOException If getting scatter stream input stream
+ * @return the ZipEntryWriter created on first call of the method
+ */
+ public ZipEntryWriter zipEntryWriter() throws IOException {
+ if (zipEntryWriter == null) {
+ zipEntryWriter = new ZipEntryWriter(this);
+ }
+ return zipEntryWriter;
+ }
/**
* Closes this stream, freeing all resources involved in the creation of this stream.
@@ -130,6 +169,9 @@ public class ScatterZipOutputStream implements Closeable {
return;
}
try {
+ if (zipEntryWriter != null) {
+ zipEntryWriter.close();
+ }
backingStore.close();
} finally {
streamCompressor.close();
diff --git a/src/site/xdoc/zip.xml b/src/site/xdoc/zip.xml
index 59ff5dc..1629ef5 100644
--- a/src/site/xdoc/zip.xml
+++ b/src/site/xdoc/zip.xml
@@ -551,14 +551,17 @@
<p>To assist this process, clients can use
<code>ParallelScatterZipCreator</code> that will handle threads
pools and correct memory model consistency so the client
- can avoid these issues. Please note that when writing well-formed
- Zip files this way, it is usually necessary to keep a
- separate <code>ScatterZipOutputStream</code> that receives all directories
- and writes this to the target <code>ZipArchiveOutputStream</code> before
- the ones created through <code>ParallelScatterZipCreator</code>. This is the responsibility of the client.</p>
-
- <p>There is no guarantee of order of the entries when writing a Zip
- file with <code>ParallelScatterZipCreator</code>.</p>
+ can avoid these issues.</p>
+
+ <p>Until version 1.18, there was no guarantee of order of the entries when writing a Zip
+ file with <code>ParallelScatterZipCreator</code>. In consequence, when writing well-formed
+ Zip files this way, it was usually necessary to keep a
+ separate <code>ScatterZipOutputStream</code> that received all directories
+ and wrote this to the target <code>ZipArchiveOutputStream</code> before
+ the ones created through <code>ParallelScatterZipCreator</code>. This was the responsibility of the client.</p>
+
+ <p>Starting with version 1.19, entries order is kept, then this specific handling of directories is not
+ necessary any more.</p>
See the examples section for a code sample demonstrating how to make a zip file.
</subsection>
diff --git a/src/test/java/org/apache/commons/compress/archivers/zip/ParallelScatterZipCreatorTest.java b/src/test/java/org/apache/commons/compress/archivers/zip/ParallelScatterZipCreatorTest.java
index 19a8fe9..7a46ede 100644
--- a/src/test/java/org/apache/commons/compress/archivers/zip/ParallelScatterZipCreatorTest.java
+++ b/src/test/java/org/apache/commons/compress/archivers/zip/ParallelScatterZipCreatorTest.java
@@ -98,12 +98,15 @@ public class ParallelScatterZipCreatorTest {
private void removeEntriesFoundInZipFile(final File result, final Map<String, byte[]> entries) throws IOException {
final ZipFile zf = new ZipFile(result);
final Enumeration<ZipArchiveEntry> entriesInPhysicalOrder = zf.getEntriesInPhysicalOrder();
+ int i = 0;
while (entriesInPhysicalOrder.hasMoreElements()){
final ZipArchiveEntry zipArchiveEntry = entriesInPhysicalOrder.nextElement();
final InputStream inputStream = zf.getInputStream(zipArchiveEntry);
final byte[] actual = IOUtils.toByteArray(inputStream);
final byte[] expected = entries.remove(zipArchiveEntry.getName());
assertArrayEquals( "For " + zipArchiveEntry.getName(), expected, actual);
+ // check order of zip entries vs order of order of addition to the parallel zip creator
+ assertEquals( "For " + zipArchiveEntry.getName(), "file" + i++, zipArchiveEntry.getName());
}
zf.close();
}
@@ -145,7 +148,7 @@ public class ParallelScatterZipCreatorTest {
return new ByteArrayInputStream(payloadBytes);
}
};
- final Callable<Object> callable;
+ final Callable<ScatterZipOutputStream> callable;
if (i % 2 == 0) {
callable = zipCreator.createCallable(za, iss);
} else {