You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@commons.apache.org by bo...@apache.org on 2017/04/02 11:10:20 UTC

[1/2] commons-compress git commit: misleading test name

Repository: commons-compress
Updated Branches:
  refs/heads/master b893471b9 -> f7e0a4a02


misleading test name


Project: http://git-wip-us.apache.org/repos/asf/commons-compress/repo
Commit: http://git-wip-us.apache.org/repos/asf/commons-compress/commit/86e2bd07
Tree: http://git-wip-us.apache.org/repos/asf/commons-compress/tree/86e2bd07
Diff: http://git-wip-us.apache.org/repos/asf/commons-compress/diff/86e2bd07

Branch: refs/heads/master
Commit: 86e2bd0781261b5b0192bdd2c5285e0982daf8c2
Parents: b893471
Author: Stefan Bodewig <bo...@apache.org>
Authored: Sun Apr 2 12:43:15 2017 +0200
Committer: Stefan Bodewig <bo...@apache.org>
Committed: Sun Apr 2 12:43:15 2017 +0200

----------------------------------------------------------------------
 .../commons/compress/compressors/lz77support/ParametersTest.java   | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/commons-compress/blob/86e2bd07/src/test/java/org/apache/commons/compress/compressors/lz77support/ParametersTest.java
----------------------------------------------------------------------
diff --git a/src/test/java/org/apache/commons/compress/compressors/lz77support/ParametersTest.java b/src/test/java/org/apache/commons/compress/compressors/lz77support/ParametersTest.java
index 2597462..44d8ac4 100644
--- a/src/test/java/org/apache/commons/compress/compressors/lz77support/ParametersTest.java
+++ b/src/test/java/org/apache/commons/compress/compressors/lz77support/ParametersTest.java
@@ -115,7 +115,7 @@ public class ParametersTest {
     }
 
     @Test(expected = IllegalArgumentException.class)
-    public void windowSizeMustNotBeAPowerOfTwo() {
+    public void windowSizeMustBeAPowerOfTwo() {
         newParameters(100, 200, 300, 400, 500);
     }
 


[2/2] commons-compress git commit: add lazy matching to LZ77 compressors

Posted by bo...@apache.org.
add lazy matching to LZ77 compressors


Project: http://git-wip-us.apache.org/repos/asf/commons-compress/repo
Commit: http://git-wip-us.apache.org/repos/asf/commons-compress/commit/f7e0a4a0
Tree: http://git-wip-us.apache.org/repos/asf/commons-compress/tree/f7e0a4a0
Diff: http://git-wip-us.apache.org/repos/asf/commons-compress/diff/f7e0a4a0

Branch: refs/heads/master
Commit: f7e0a4a0212424edd1765d1abe66772af069dafb
Parents: 86e2bd0
Author: Stefan Bodewig <bo...@apache.org>
Authored: Sun Apr 2 13:09:54 2017 +0200
Committer: Stefan Bodewig <bo...@apache.org>
Committed: Sun Apr 2 13:09:54 2017 +0200

----------------------------------------------------------------------
 .../compressors/lz77support/LZ77Compressor.java | 32 +++++++++++
 .../compressors/lz77support/Parameters.java     | 58 ++++++++++++++++++--
 2 files changed, 85 insertions(+), 5 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/commons-compress/blob/f7e0a4a0/src/main/java/org/apache/commons/compress/compressors/lz77support/LZ77Compressor.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/commons/compress/compressors/lz77support/LZ77Compressor.java b/src/main/java/org/apache/commons/compress/compressors/lz77support/LZ77Compressor.java
index c284f4c..5379259 100644
--- a/src/main/java/org/apache/commons/compress/compressors/lz77support/LZ77Compressor.java
+++ b/src/main/java/org/apache/commons/compress/compressors/lz77support/LZ77Compressor.java
@@ -394,6 +394,8 @@ public class LZ77Compressor {
 
     private void compress() throws IOException {
         final int minMatch = params.getMinBackReferenceLength();
+        final boolean lazy = params.getLazyMatching();
+        final int lazyThreshold = params.getLazyMatchingThreshold();
 
         while (lookahead >= minMatch) {
             catchUpMissedInserts();
@@ -402,6 +404,11 @@ public class LZ77Compressor {
             if (hashHead != NO_MATCH && hashHead - currentPosition <= params.getMaxOffset()) {
                 // sets matchStart as a side effect
                 matchLength = longestMatch(hashHead);
+
+                if (lazy && matchLength <= lazyThreshold && lookahead > minMatch) {
+                    // try to find a longer match using the next position
+                    matchLength = longestMatchForNextPosition(matchLength);
+                }
             }
             if (matchLength >= minMatch) {
                 if (blockStart != currentPosition) {
@@ -441,6 +448,31 @@ public class LZ77Compressor {
         return hashHead;
     }
 
+    private int longestMatchForNextPosition(final int prevMatchLength) {
+        // save a bunch of values to restore them if the next match isn't better than the current one
+        final int prevMatchStart = matchStart;
+        final int prevInsertHash = insertHash;
+
+        lookahead--;
+        currentPosition++;
+        int hashHead = insertString(currentPosition);
+        final int prevHashHead = prev[currentPosition & wMask];
+        int matchLength = longestMatch(hashHead);
+
+        if (matchLength <= prevMatchLength) {
+            // use the first match, as the next one isn't any better
+            matchLength = prevMatchLength;
+            matchStart = prevMatchStart;
+
+            // restore modified values
+            head[insertHash] = prevHashHead;
+            insertHash = prevInsertHash;
+            currentPosition--;
+            lookahead++;
+        }
+        return matchLength;
+    }
+
     private void insertStringsInMatch(int matchLength) {
         // inserts strings contained in current match
         // insertString inserts the byte 2 bytes after position, which may not yet be available -> missedInserts

http://git-wip-us.apache.org/repos/asf/commons-compress/blob/f7e0a4a0/src/main/java/org/apache/commons/compress/compressors/lz77support/Parameters.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/commons/compress/compressors/lz77support/Parameters.java b/src/main/java/org/apache/commons/compress/compressors/lz77support/Parameters.java
index e4e9e47..5cdbae9 100644
--- a/src/main/java/org/apache/commons/compress/compressors/lz77support/Parameters.java
+++ b/src/main/java/org/apache/commons/compress/compressors/lz77support/Parameters.java
@@ -52,7 +52,8 @@ public final class Parameters {
     public static class Builder {
         private final int windowSize;
         private int minBackReferenceLength, maxBackReferenceLength, maxOffset, maxLiteralLength;
-        private Integer niceBackReferenceLength, maxCandidates;
+        private Integer niceBackReferenceLength, maxCandidates, lazyThreshold;
+        private Boolean lazyMatches;
 
         private Builder(int windowSize) {
             if (windowSize < 2 || !isPowerOfTwo(windowSize)) {
@@ -173,6 +174,30 @@ public final class Parameters {
         }
 
         /**
+         * Sets whether lazy matching should be performed.
+         *
+         * <p>Lazy matching means that after a back-reference for a certain position has been found the compressor will
+         * try to find a longer match for the next position.</p>
+         *
+         * <p>Lazy matching is enabled by default and disabled when tuning for speed.</p>
+         */
+        public Builder withLazyMatching(boolean lazy) {
+            lazyMatches = lazy;
+            return this;
+        }
+
+        /**
+         * Sets the threshold for lazy matching.
+         *
+         * <p>Even if lazy matching is enabled it will not be performed if the length of the back-reference found for
+         * the current position is longer than this value.</p>
+         */
+        public Builder withLazyThreshold(int threshold) {
+            lazyThreshold = threshold;
+            return this;
+        }
+
+        /**
          * Changes the default setting for "nice back-reference length" and "maximum number of candidates" for improved
          * compression speed at the cost of compression ratio.
          *
@@ -181,6 +206,8 @@ public final class Parameters {
         public Builder tunedForSpeed() {
             niceBackReferenceLength = Math.max(minBackReferenceLength, maxBackReferenceLength / 8);
             maxCandidates = Math.max(32, windowSize / 1024);
+            lazyMatches = false;
+            lazyThreshold = minBackReferenceLength;
             return this;
         }
 
@@ -191,8 +218,9 @@ public final class Parameters {
          * <p>Use this method after configuring "maximum back-reference length".</p>
          */
         public Builder tunedForCompressionRatio() {
-            niceBackReferenceLength = maxBackReferenceLength;
+            niceBackReferenceLength = lazyThreshold = maxBackReferenceLength;
             maxCandidates = Math.max(32, windowSize / 16);
+            lazyMatches = true;
             return this;
         }
 
@@ -205,17 +233,21 @@ public final class Parameters {
             int niceLen = niceBackReferenceLength != null ? niceBackReferenceLength
                 : Math.max(minBackReferenceLength, maxBackReferenceLength / 2);
             int candidates = maxCandidates != null ? maxCandidates : Math.max(256, windowSize / 128);
+            boolean lazy = lazyMatches != null ? lazyMatches : true;
+            int threshold = lazy ? (lazyThreshold != null ? lazyThreshold : niceLen) : minBackReferenceLength;
 
             return new Parameters(windowSize, minBackReferenceLength, maxBackReferenceLength,
-                maxOffset, maxLiteralLength, niceLen, candidates);
+                maxOffset, maxLiteralLength, niceLen, candidates, lazy, threshold);
         }
     }
 
     private final int windowSize, minBackReferenceLength, maxBackReferenceLength, maxOffset, maxLiteralLength,
-        niceBackReferenceLength, maxCandidates;
+        niceBackReferenceLength, maxCandidates, lazyThreshold;
+    private final boolean lazyMatching;
 
     private Parameters(int windowSize, int minBackReferenceLength, int maxBackReferenceLength, int maxOffset,
-        int maxLiteralLength, int niceBackReferenceLength, int maxCandidates) {
+            int maxLiteralLength, int niceBackReferenceLength, int maxCandidates, boolean lazyMatching,
+            int lazyThreshold) {
         this.windowSize = windowSize;
         this.minBackReferenceLength = minBackReferenceLength;
         this.maxBackReferenceLength = maxBackReferenceLength;
@@ -223,6 +255,8 @@ public final class Parameters {
         this.maxLiteralLength = maxLiteralLength;
         this.niceBackReferenceLength = niceBackReferenceLength;
         this.maxCandidates = maxCandidates;
+        this.lazyMatching = lazyMatching;
+        this.lazyThreshold = lazyThreshold;
     }
 
     /**
@@ -276,6 +310,20 @@ public final class Parameters {
         return maxCandidates;
     }
 
+    /**
+     * Gets whether to perform lazy matching.
+     */
+    public boolean getLazyMatching() {
+        return lazyMatching;
+    }
+
+    /**
+     * Gets the threshold for lazy matching.
+     */
+    public int getLazyMatchingThreshold() {
+        return lazyThreshold;
+    }
+
     private static final boolean isPowerOfTwo(int x) {
         // pre-condition: x > 0
         return (x & (x - 1)) == 0;