You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@commons.apache.org by bo...@apache.org on 2017/02/07 20:00:50 UTC

commons-compress git commit: COMPRESS-271 implement block dependency when writing framed lz4

Repository: commons-compress
Updated Branches:
  refs/heads/master a5f7089f6 -> a5720927e


COMPRESS-271 implement block dependency when writing framed lz4


Project: http://git-wip-us.apache.org/repos/asf/commons-compress/repo
Commit: http://git-wip-us.apache.org/repos/asf/commons-compress/commit/a5720927
Tree: http://git-wip-us.apache.org/repos/asf/commons-compress/tree/a5720927
Diff: http://git-wip-us.apache.org/repos/asf/commons-compress/diff/a5720927

Branch: refs/heads/master
Commit: a5720927e1d76cb920d126d8bfecd9695daed09c
Parents: a5f7089
Author: Stefan Bodewig <bo...@apache.org>
Authored: Tue Feb 7 21:00:17 2017 +0100
Committer: Stefan Bodewig <bo...@apache.org>
Committed: Tue Feb 7 21:00:17 2017 +0100

----------------------------------------------------------------------
 .../lz4/BlockLZ4CompressorOutputStream.java     | 17 ++++++
 .../lz4/FramedLZ4CompressorOutputStream.java    | 59 +++++++++++++++-----
 .../lz4/FramedLZ4CompressorRoundtripTest.java   |  9 ++-
 3 files changed, 70 insertions(+), 15 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/commons-compress/blob/a5720927/src/main/java/org/apache/commons/compress/compressors/lz4/BlockLZ4CompressorOutputStream.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/commons/compress/compressors/lz4/BlockLZ4CompressorOutputStream.java b/src/main/java/org/apache/commons/compress/compressors/lz4/BlockLZ4CompressorOutputStream.java
index 5c01f83..f7ba28d 100644
--- a/src/main/java/org/apache/commons/compress/compressors/lz4/BlockLZ4CompressorOutputStream.java
+++ b/src/main/java/org/apache/commons/compress/compressors/lz4/BlockLZ4CompressorOutputStream.java
@@ -145,6 +145,23 @@ public class BlockLZ4CompressorOutputStream extends CompressorOutputStream {
         }
     }
 
+    /**
+     * Adds some initial data to fill the window with.
+     *
+     * @param data the data to fill the window with.
+     * @param off offset of real data into the array
+     * @param len amount of data
+     * @throws IllegalStateException if the stream has already started to write data
+     * @see LZ77Compressor#prefill
+     */
+    public void prefill(byte[] data, int off, int len) {
+        if (len > 0) {
+            byte[] b = Arrays.copyOfRange(data, off, off + len);
+            compressor.prefill(b);
+            recordLiteral(b);
+        }
+    }
+
     private void addLiteralBlock(LZ77Compressor.LiteralBlock block) throws IOException {
         Pair last = writeBlocksAndReturnUnfinishedPair(block.getLength());
         recordLiteral(last.addLiteral(block));

http://git-wip-us.apache.org/repos/asf/commons-compress/blob/a5720927/src/main/java/org/apache/commons/compress/compressors/lz4/FramedLZ4CompressorOutputStream.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/commons/compress/compressors/lz4/FramedLZ4CompressorOutputStream.java b/src/main/java/org/apache/commons/compress/compressors/lz4/FramedLZ4CompressorOutputStream.java
index fc599d3..f2c0e54 100644
--- a/src/main/java/org/apache/commons/compress/compressors/lz4/FramedLZ4CompressorOutputStream.java
+++ b/src/main/java/org/apache/commons/compress/compressors/lz4/FramedLZ4CompressorOutputStream.java
@@ -35,11 +35,6 @@ import org.apache.commons.compress.utils.ByteUtils;
  * @NotThreadSafe
  */
 public class FramedLZ4CompressorOutputStream extends CompressorOutputStream {
-    /*
-     * TODO before releasing 1.14:
-     *
-     * + block dependence
-     */
 
     private static final byte[] END_MARK = new byte[4];
 
@@ -57,6 +52,10 @@ public class FramedLZ4CompressorOutputStream extends CompressorOutputStream {
     // used for block checksum, if requested
     private final XXHash32 blockHash;
 
+    // only created if the config requires block dependency
+    private byte[] blockDependencyBuffer;
+    private int collectedBlockDependencyBytes;
+
     /**
      * The block sizes supported by the format.
      */
@@ -88,7 +87,7 @@ public class FramedLZ4CompressorOutputStream extends CompressorOutputStream {
      */
     public static class Parameters {
         private final BlockSize blockSize;
-        private final boolean withContentChecksum, withBlockChecksum;
+        private final boolean withContentChecksum, withBlockChecksum, withBlockDependency;
 
         /**
          * The default parameters of 4M block size, enabled content
@@ -96,7 +95,7 @@ public class FramedLZ4CompressorOutputStream extends CompressorOutputStream {
          *
          * <p>This matches the defaults of the lz4 command line utility.</p>
          */
-        public static final Parameters DEFAULT = new Parameters(BlockSize.M4, true, false);
+        public static final Parameters DEFAULT = new Parameters(BlockSize.M4, true, false, false);
 
         /**
          * Sets up custom a custom block size for the LZ4 stream but
@@ -105,7 +104,7 @@ public class FramedLZ4CompressorOutputStream extends CompressorOutputStream {
          * @param blockSize the size of a single block.
          */
         public Parameters(BlockSize blockSize) {
-            this(blockSize, true, false);
+            this(blockSize, true, false, false);
         }
         /**
          * Sets up custom parameters for the LZ4 stream.
@@ -114,17 +113,23 @@ public class FramedLZ4CompressorOutputStream extends CompressorOutputStream {
          * @param withBlockChecksum whether to write a block checksum.
          * Note that block checksums are not supported by the lz4
          * command line utility
+         * @param withBlockDependency whether a block may depend on
+         * the content of a previous block. Enabling this may improve
+         * compression ratio but makes it impossible to decompress the
+         * output in parallel.
          */
-        public Parameters(BlockSize blockSize, boolean withContentChecksum, boolean withBlockChecksum) {
+        public Parameters(BlockSize blockSize, boolean withContentChecksum, boolean withBlockChecksum,
+            boolean withBlockDependency) {
             this.blockSize = blockSize;
             this.withContentChecksum = withContentChecksum;
             this.withBlockChecksum = withBlockChecksum;
+            this.withBlockDependency = withBlockDependency;
         }
 
         @Override
         public String toString() {
             return "LZ4 Parameters with BlockSize " + blockSize + ", withContentChecksum " + withContentChecksum
-                + ", withBlockChecksum " + withBlockChecksum;
+                + ", withBlockChecksum " + withBlockChecksum + ", withBlockDependency " + withBlockDependency;
         }
     }
 
@@ -152,6 +157,9 @@ public class FramedLZ4CompressorOutputStream extends CompressorOutputStream {
         blockHash = params.withBlockChecksum ? new XXHash32() : null;
         out.write(FramedLZ4CompressorInputStream.LZ4_SIGNATURE);
         writeFrameDescriptor();
+        blockDependencyBuffer = params.withBlockDependency
+            ? new byte[BlockLZ4CompressorInputStream.WINDOW_SIZE]
+            : null;
     }
 
     @Override
@@ -199,8 +207,10 @@ public class FramedLZ4CompressorOutputStream extends CompressorOutputStream {
     }
 
     private void writeFrameDescriptor() throws IOException {
-        int flags = FramedLZ4CompressorInputStream.SUPPORTED_VERSION
-            | FramedLZ4CompressorInputStream.BLOCK_INDEPENDENCE_MASK;
+        int flags = FramedLZ4CompressorInputStream.SUPPORTED_VERSION;
+        if (!params.withBlockDependency) {
+            flags |= FramedLZ4CompressorInputStream.BLOCK_INDEPENDENCE_MASK;
+        }
         if (params.withContentChecksum) {
             flags |= FramedLZ4CompressorInputStream.CONTENT_CHECKSUM_MASK;
         }
@@ -217,10 +227,18 @@ public class FramedLZ4CompressorOutputStream extends CompressorOutputStream {
     }
 
     private void flushBlock() throws IOException {
+        final boolean withBlockDependency = params.withBlockDependency;
         ByteArrayOutputStream baos = new ByteArrayOutputStream();
-        try (OutputStream o = new BlockLZ4CompressorOutputStream(baos)) {
+        try (BlockLZ4CompressorOutputStream o = new BlockLZ4CompressorOutputStream(baos)) {
+            if (withBlockDependency) {
+                o.prefill(blockDependencyBuffer, blockDependencyBuffer.length - collectedBlockDependencyBytes,
+                    collectedBlockDependencyBytes);
+            }
             o.write(blockData, 0, currentIndex);
         }
+        if (withBlockDependency) {
+            appendToBlockDependencyBuffer(blockData, 0, currentIndex);
+        }
         byte[] b = baos.toByteArray();
         if (b.length > currentIndex) { // compression increased size, maybe beyond blocksize
             ByteUtils.toLittleEndian(out, currentIndex | FramedLZ4CompressorInputStream.UNCOMPRESSED_FLAG_MASK,
@@ -250,5 +268,20 @@ public class FramedLZ4CompressorOutputStream extends CompressorOutputStream {
         }
     }
 
+    private void appendToBlockDependencyBuffer(final byte[] b, final int off, int len) {
+        len = Math.min(len, blockDependencyBuffer.length);
+        if (len > 0) {
+            int keep = blockDependencyBuffer.length - len;
+            if (keep > 0) {
+                // move last keep bytes towards the start of the buffer
+                System.arraycopy(blockDependencyBuffer, len, blockDependencyBuffer, 0, keep);
+            }
+            // append new data
+            System.arraycopy(b, off, blockDependencyBuffer, keep, len);
+            collectedBlockDependencyBytes = Math.min(collectedBlockDependencyBytes + len,
+                blockDependencyBuffer.length);
+        }
+    }
+
 }
 

http://git-wip-us.apache.org/repos/asf/commons-compress/blob/a5720927/src/test/java/org/apache/commons/compress/compressors/lz4/FramedLZ4CompressorRoundtripTest.java
----------------------------------------------------------------------
diff --git a/src/test/java/org/apache/commons/compress/compressors/lz4/FramedLZ4CompressorRoundtripTest.java b/src/test/java/org/apache/commons/compress/compressors/lz4/FramedLZ4CompressorRoundtripTest.java
index fe37a28..d54fafe 100644
--- a/src/test/java/org/apache/commons/compress/compressors/lz4/FramedLZ4CompressorRoundtripTest.java
+++ b/src/test/java/org/apache/commons/compress/compressors/lz4/FramedLZ4CompressorRoundtripTest.java
@@ -44,9 +44,14 @@ public final class FramedLZ4CompressorRoundtripTest extends AbstractTestCase {
             new Object[] { new FramedLZ4CompressorOutputStream.Parameters(FramedLZ4CompressorOutputStream.BlockSize.M1) },
             new Object[] { FramedLZ4CompressorOutputStream.Parameters.DEFAULT },
             // default without content checksum
-            new Object[] { new FramedLZ4CompressorOutputStream.Parameters(FramedLZ4CompressorOutputStream.BlockSize.M4, false, false) },
+            new Object[] { new FramedLZ4CompressorOutputStream.Parameters(FramedLZ4CompressorOutputStream.BlockSize.M4,
+                false, false, false) },
             // default with block checksum
-            new Object[] { new FramedLZ4CompressorOutputStream.Parameters(FramedLZ4CompressorOutputStream.BlockSize.M4, true, true) },
+            new Object[] { new FramedLZ4CompressorOutputStream.Parameters(FramedLZ4CompressorOutputStream.BlockSize.M4,
+                true, true, false) },
+            // small blocksize (so we get enough blocks) and enabled block dependency, otherwise defaults
+            new Object[] { new FramedLZ4CompressorOutputStream.Parameters(FramedLZ4CompressorOutputStream.BlockSize.K64,
+                true, false, true) },
         });
     }