You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@commons.apache.org by bo...@apache.org on 2017/02/07 20:00:50 UTC
commons-compress git commit: COMPRESS-271 implement block dependency
when writing framed lz4
Repository: commons-compress
Updated Branches:
refs/heads/master a5f7089f6 -> a5720927e
COMPRESS-271 implement block dependency when writing framed lz4
Project: http://git-wip-us.apache.org/repos/asf/commons-compress/repo
Commit: http://git-wip-us.apache.org/repos/asf/commons-compress/commit/a5720927
Tree: http://git-wip-us.apache.org/repos/asf/commons-compress/tree/a5720927
Diff: http://git-wip-us.apache.org/repos/asf/commons-compress/diff/a5720927
Branch: refs/heads/master
Commit: a5720927e1d76cb920d126d8bfecd9695daed09c
Parents: a5f7089
Author: Stefan Bodewig <bo...@apache.org>
Authored: Tue Feb 7 21:00:17 2017 +0100
Committer: Stefan Bodewig <bo...@apache.org>
Committed: Tue Feb 7 21:00:17 2017 +0100
----------------------------------------------------------------------
.../lz4/BlockLZ4CompressorOutputStream.java | 17 ++++++
.../lz4/FramedLZ4CompressorOutputStream.java | 59 +++++++++++++++-----
.../lz4/FramedLZ4CompressorRoundtripTest.java | 9 ++-
3 files changed, 70 insertions(+), 15 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/commons-compress/blob/a5720927/src/main/java/org/apache/commons/compress/compressors/lz4/BlockLZ4CompressorOutputStream.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/commons/compress/compressors/lz4/BlockLZ4CompressorOutputStream.java b/src/main/java/org/apache/commons/compress/compressors/lz4/BlockLZ4CompressorOutputStream.java
index 5c01f83..f7ba28d 100644
--- a/src/main/java/org/apache/commons/compress/compressors/lz4/BlockLZ4CompressorOutputStream.java
+++ b/src/main/java/org/apache/commons/compress/compressors/lz4/BlockLZ4CompressorOutputStream.java
@@ -145,6 +145,23 @@ public class BlockLZ4CompressorOutputStream extends CompressorOutputStream {
}
}
+ /**
+ * Adds some initial data to fill the window with.
+ *
+ * @param data the data to fill the window with.
+ * @param off offset of real data into the array
+ * @param len amount of data
+ * @throws IllegalStateException if the stream has already started to write data
+ * @see LZ77Compressor#prefill
+ */
+ public void prefill(byte[] data, int off, int len) {
+ if (len > 0) {
+ byte[] b = Arrays.copyOfRange(data, off, off + len);
+ compressor.prefill(b);
+ recordLiteral(b);
+ }
+ }
+
private void addLiteralBlock(LZ77Compressor.LiteralBlock block) throws IOException {
Pair last = writeBlocksAndReturnUnfinishedPair(block.getLength());
recordLiteral(last.addLiteral(block));
http://git-wip-us.apache.org/repos/asf/commons-compress/blob/a5720927/src/main/java/org/apache/commons/compress/compressors/lz4/FramedLZ4CompressorOutputStream.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/commons/compress/compressors/lz4/FramedLZ4CompressorOutputStream.java b/src/main/java/org/apache/commons/compress/compressors/lz4/FramedLZ4CompressorOutputStream.java
index fc599d3..f2c0e54 100644
--- a/src/main/java/org/apache/commons/compress/compressors/lz4/FramedLZ4CompressorOutputStream.java
+++ b/src/main/java/org/apache/commons/compress/compressors/lz4/FramedLZ4CompressorOutputStream.java
@@ -35,11 +35,6 @@ import org.apache.commons.compress.utils.ByteUtils;
* @NotThreadSafe
*/
public class FramedLZ4CompressorOutputStream extends CompressorOutputStream {
- /*
- * TODO before releasing 1.14:
- *
- * + block dependence
- */
private static final byte[] END_MARK = new byte[4];
@@ -57,6 +52,10 @@ public class FramedLZ4CompressorOutputStream extends CompressorOutputStream {
// used for block checksum, if requested
private final XXHash32 blockHash;
+ // only created if the config requires block dependency
+ private byte[] blockDependencyBuffer;
+ private int collectedBlockDependencyBytes;
+
/**
* The block sizes supported by the format.
*/
@@ -88,7 +87,7 @@ public class FramedLZ4CompressorOutputStream extends CompressorOutputStream {
*/
public static class Parameters {
private final BlockSize blockSize;
- private final boolean withContentChecksum, withBlockChecksum;
+ private final boolean withContentChecksum, withBlockChecksum, withBlockDependency;
/**
* The default parameters of 4M block size, enabled content
@@ -96,7 +95,7 @@ public class FramedLZ4CompressorOutputStream extends CompressorOutputStream {
*
* <p>This matches the defaults of the lz4 command line utility.</p>
*/
- public static final Parameters DEFAULT = new Parameters(BlockSize.M4, true, false);
+ public static final Parameters DEFAULT = new Parameters(BlockSize.M4, true, false, false);
/**
* Sets up custom a custom block size for the LZ4 stream but
@@ -105,7 +104,7 @@ public class FramedLZ4CompressorOutputStream extends CompressorOutputStream {
* @param blockSize the size of a single block.
*/
public Parameters(BlockSize blockSize) {
- this(blockSize, true, false);
+ this(blockSize, true, false, false);
}
/**
* Sets up custom parameters for the LZ4 stream.
@@ -114,17 +113,23 @@ public class FramedLZ4CompressorOutputStream extends CompressorOutputStream {
* @param withBlockChecksum whether to write a block checksum.
* Note that block checksums are not supported by the lz4
* command line utility
+ * @param withBlockDependency whether a block may depend on
+ * the content of a previous block. Enabling this may improve
+ * compression ratio but makes it impossible to decompress the
+ * output in parallel.
*/
- public Parameters(BlockSize blockSize, boolean withContentChecksum, boolean withBlockChecksum) {
+ public Parameters(BlockSize blockSize, boolean withContentChecksum, boolean withBlockChecksum,
+ boolean withBlockDependency) {
this.blockSize = blockSize;
this.withContentChecksum = withContentChecksum;
this.withBlockChecksum = withBlockChecksum;
+ this.withBlockDependency = withBlockDependency;
}
@Override
public String toString() {
return "LZ4 Parameters with BlockSize " + blockSize + ", withContentChecksum " + withContentChecksum
- + ", withBlockChecksum " + withBlockChecksum;
+ + ", withBlockChecksum " + withBlockChecksum + ", withBlockDependency " + withBlockDependency;
}
}
@@ -152,6 +157,9 @@ public class FramedLZ4CompressorOutputStream extends CompressorOutputStream {
blockHash = params.withBlockChecksum ? new XXHash32() : null;
out.write(FramedLZ4CompressorInputStream.LZ4_SIGNATURE);
writeFrameDescriptor();
+ blockDependencyBuffer = params.withBlockDependency
+ ? new byte[BlockLZ4CompressorInputStream.WINDOW_SIZE]
+ : null;
}
@Override
@@ -199,8 +207,10 @@ public class FramedLZ4CompressorOutputStream extends CompressorOutputStream {
}
private void writeFrameDescriptor() throws IOException {
- int flags = FramedLZ4CompressorInputStream.SUPPORTED_VERSION
- | FramedLZ4CompressorInputStream.BLOCK_INDEPENDENCE_MASK;
+ int flags = FramedLZ4CompressorInputStream.SUPPORTED_VERSION;
+ if (!params.withBlockDependency) {
+ flags |= FramedLZ4CompressorInputStream.BLOCK_INDEPENDENCE_MASK;
+ }
if (params.withContentChecksum) {
flags |= FramedLZ4CompressorInputStream.CONTENT_CHECKSUM_MASK;
}
@@ -217,10 +227,18 @@ public class FramedLZ4CompressorOutputStream extends CompressorOutputStream {
}
private void flushBlock() throws IOException {
+ final boolean withBlockDependency = params.withBlockDependency;
ByteArrayOutputStream baos = new ByteArrayOutputStream();
- try (OutputStream o = new BlockLZ4CompressorOutputStream(baos)) {
+ try (BlockLZ4CompressorOutputStream o = new BlockLZ4CompressorOutputStream(baos)) {
+ if (withBlockDependency) {
+ o.prefill(blockDependencyBuffer, blockDependencyBuffer.length - collectedBlockDependencyBytes,
+ collectedBlockDependencyBytes);
+ }
o.write(blockData, 0, currentIndex);
}
+ if (withBlockDependency) {
+ appendToBlockDependencyBuffer(blockData, 0, currentIndex);
+ }
byte[] b = baos.toByteArray();
if (b.length > currentIndex) { // compression increased size, maybe beyond blocksize
ByteUtils.toLittleEndian(out, currentIndex | FramedLZ4CompressorInputStream.UNCOMPRESSED_FLAG_MASK,
@@ -250,5 +268,20 @@ public class FramedLZ4CompressorOutputStream extends CompressorOutputStream {
}
}
+ private void appendToBlockDependencyBuffer(final byte[] b, final int off, int len) {
+ len = Math.min(len, blockDependencyBuffer.length);
+ if (len > 0) {
+ int keep = blockDependencyBuffer.length - len;
+ if (keep > 0) {
+ // move last keep bytes towards the start of the buffer
+ System.arraycopy(blockDependencyBuffer, len, blockDependencyBuffer, 0, keep);
+ }
+ // append new data
+ System.arraycopy(b, off, blockDependencyBuffer, keep, len);
+ collectedBlockDependencyBytes = Math.min(collectedBlockDependencyBytes + len,
+ blockDependencyBuffer.length);
+ }
+ }
+
}
http://git-wip-us.apache.org/repos/asf/commons-compress/blob/a5720927/src/test/java/org/apache/commons/compress/compressors/lz4/FramedLZ4CompressorRoundtripTest.java
----------------------------------------------------------------------
diff --git a/src/test/java/org/apache/commons/compress/compressors/lz4/FramedLZ4CompressorRoundtripTest.java b/src/test/java/org/apache/commons/compress/compressors/lz4/FramedLZ4CompressorRoundtripTest.java
index fe37a28..d54fafe 100644
--- a/src/test/java/org/apache/commons/compress/compressors/lz4/FramedLZ4CompressorRoundtripTest.java
+++ b/src/test/java/org/apache/commons/compress/compressors/lz4/FramedLZ4CompressorRoundtripTest.java
@@ -44,9 +44,14 @@ public final class FramedLZ4CompressorRoundtripTest extends AbstractTestCase {
new Object[] { new FramedLZ4CompressorOutputStream.Parameters(FramedLZ4CompressorOutputStream.BlockSize.M1) },
new Object[] { FramedLZ4CompressorOutputStream.Parameters.DEFAULT },
// default without content checksum
- new Object[] { new FramedLZ4CompressorOutputStream.Parameters(FramedLZ4CompressorOutputStream.BlockSize.M4, false, false) },
+ new Object[] { new FramedLZ4CompressorOutputStream.Parameters(FramedLZ4CompressorOutputStream.BlockSize.M4,
+ false, false, false) },
// default with block checksum
- new Object[] { new FramedLZ4CompressorOutputStream.Parameters(FramedLZ4CompressorOutputStream.BlockSize.M4, true, true) },
+ new Object[] { new FramedLZ4CompressorOutputStream.Parameters(FramedLZ4CompressorOutputStream.BlockSize.M4,
+ true, true, false) },
+ // small blocksize (so we get enough blocks) and enabled block dependency, otherwise defaults
+ new Object[] { new FramedLZ4CompressorOutputStream.Parameters(FramedLZ4CompressorOutputStream.BlockSize.K64,
+ true, false, true) },
});
}