You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@commons.apache.org by bo...@apache.org on 2017/02/08 17:05:17 UTC

[2/3] commons-compress git commit: improve docs

improve docs


Project: http://git-wip-us.apache.org/repos/asf/commons-compress/repo
Commit: http://git-wip-us.apache.org/repos/asf/commons-compress/commit/b1e52489
Tree: http://git-wip-us.apache.org/repos/asf/commons-compress/tree/b1e52489
Diff: http://git-wip-us.apache.org/repos/asf/commons-compress/diff/b1e52489

Branch: refs/heads/master
Commit: b1e524891f908cd667b8c54b4a04b3af2e65969a
Parents: cfc68ac
Author: Stefan Bodewig <bo...@apache.org>
Authored: Wed Feb 8 17:55:08 2017 +0100
Committer: Stefan Bodewig <bo...@apache.org>
Committed: Wed Feb 8 17:55:08 2017 +0100

----------------------------------------------------------------------
 .../AbstractLZ77CompressorInputStream.java       | 19 ++++++++++++++++---
 .../compressors/lz77support/LZ77Compressor.java  | 19 ++++++++++++-------
 2 files changed, 28 insertions(+), 10 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/commons-compress/blob/b1e52489/src/main/java/org/apache/commons/compress/compressors/lz77support/AbstractLZ77CompressorInputStream.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/commons/compress/compressors/lz77support/AbstractLZ77CompressorInputStream.java b/src/main/java/org/apache/commons/compress/compressors/lz77support/AbstractLZ77CompressorInputStream.java
index cd9fb7e..284b79f 100644
--- a/src/main/java/org/apache/commons/compress/compressors/lz77support/AbstractLZ77CompressorInputStream.java
+++ b/src/main/java/org/apache/commons/compress/compressors/lz77support/AbstractLZ77CompressorInputStream.java
@@ -41,7 +41,7 @@ import org.apache.commons.compress.utils.IOUtils;
  * implementation delegates to the no-arg version, leading to infinite
  * mutual recursion and a {@code StackOverflowError} otherwise.</p>
  *
- * <p>The contract for subclasses {@code read} implementation is:</p>
+ * <p>The contract for subclasses' {@code read} implementation is:</p>
  * <ul>
  *
  *  <li>keep track of the current state of the stream. Is it inside a
@@ -77,10 +77,17 @@ public abstract class AbstractLZ77CompressorInputStream extends CompressorInputS
     /** Size of the window - must be bigger than the biggest offset expected. */
     private final int windowSize;
 
-    /** Buffer to write decompressed bytes to for back-references */
+    /**
+     * Buffer to write decompressed bytes to for back-references, will
+     * be three times windowSize big.
+     *
+     * <p>Three times so we can slide the whole buffer a windowSize to
+     * the left once we've read twice windowSize and still have enough
+     * data inside of it to satisfy back-references.</p>
+     */
     private final byte[] buf;
 
-    /** One behind the index of the last byte in the buffer that was written */
+    /** One behind the index of the last byte in the buffer that was written, i.e. the next position to write to */
     private int writeIndex;
 
     /** Index of the next byte to be read. */
@@ -171,7 +178,9 @@ public abstract class AbstractLZ77CompressorInputStream extends CompressorInputS
         if (writeIndex != 0) {
             throw new IllegalStateException("the stream has already been read from, can't prefill anymore");
         }
+        // we don't need more data than the big offset could refer to, so cap it
         int len = Math.min(windowSize, data.length);
+        // we need the last data as we are dealing with *back*-references
         System.arraycopy(data, data.length - len, buf, 0, len);
         writeIndex += len;
         readIndex += len;
@@ -214,6 +223,7 @@ public abstract class AbstractLZ77CompressorInputStream extends CompressorInputS
     }
 
     private void tryToReadLiteral(int bytesToRead) throws IOException {
+        // min of "what is still inside the literal", "what does the user want" and "how muc can fit into the buffer"
         final int reallyTryToRead = (int) Math.min(Math.min(bytesToRead, bytesRemaining),
                                                    buf.length - writeIndex);
         final int bytesRead = reallyTryToRead > 0
@@ -287,6 +297,9 @@ public abstract class AbstractLZ77CompressorInputStream extends CompressorInputS
             System.arraycopy(buf, writeIndex - backReferenceOffset, buf, writeIndex, copy);
             writeIndex += copy;
         } else {
+            // back-reference overlaps with the bytes created from it
+            // like go back two bytes and then copy six (by copying
+            // the last two bytes three time).
             final int fullRots = copy / backReferenceOffset;
             for (int i = 0; i < fullRots; i++) {
                 System.arraycopy(buf, writeIndex - backReferenceOffset, buf, writeIndex, backReferenceOffset);

http://git-wip-us.apache.org/repos/asf/commons-compress/blob/b1e52489/src/main/java/org/apache/commons/compress/compressors/lz77support/LZ77Compressor.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/commons/compress/compressors/lz77support/LZ77Compressor.java b/src/main/java/org/apache/commons/compress/compressors/lz77support/LZ77Compressor.java
index 0dcdb2a..63b880a 100644
--- a/src/main/java/org/apache/commons/compress/compressors/lz77support/LZ77Compressor.java
+++ b/src/main/java/org/apache/commons/compress/compressors/lz77support/LZ77Compressor.java
@@ -38,9 +38,9 @@ import java.util.Arrays;
  * back-references - so it can be re-used. It follows the algorithm
  * explained in section 4 of RFC 1951 (DEFLATE) and currently doesn't
  * implement the "lazy match" optimization. The three-byte hash
- * function used in this class is the same used by zlib and InfoZIP's
- * ZIP implementation of DEFLATE. Strongly inspired by InfoZIP's
- * implementation.</p>
+ * function used in this class is the same as the one used by zlib and
+ * InfoZIP's ZIP implementation of DEFLATE. The whole class is
+ * strongly inspired by InfoZIP's implementation.</p>
  *
  * <p>LZ77 is used vaguely here (as well as many other places that
  * talk about it :-), LZSS would likely be closer to the truth but
@@ -49,7 +49,7 @@ import java.util.Arrays;
  * <p>The API consists of a compressor that is fed <code>byte</code>s
  * and emits {@link Block}s to a registered callback where the blocks
  * represent either {@link LiteralBlock literal blocks}, {@link
- * BackReference back references} or {@link EOD end of data
+ * BackReference back-references} or {@link EOD end of data
  * markers}. In order to ensure the callback receives all information,
  * the {@code #finish} method must be used once all data has been fed
  * into the compressor.</p>
@@ -220,7 +220,9 @@ public class LZ77Compressor {
     private int blockStart = 0;
     // position of the current match
     private int matchStart = NO_MATCH;
-    // number of insertString calls for the up to three last bytes of the last match
+    // number of missed insertString calls for the up to three last
+    // bytes of the last match that can only be performed once more
+    // data has been read
     private int missedInserts = 0;
 
     /**
@@ -269,7 +271,7 @@ public class LZ77Compressor {
      */
     public void compress(byte[] data, int off, int len) throws IOException {
         final int wSize = params.getWindowSize();
-        while (len > wSize) {
+        while (len > wSize) { // chop into windowSize sized chunks
             doCompress(data, off, wSize);
             off += wSize;
             len -= wSize;
@@ -311,8 +313,11 @@ public class LZ77Compressor {
         if (currentPosition != 0 || lookahead != 0) {
             throw new IllegalStateException("the compressor has already started to accept data, can't prefill anymore");
         }
+
+        // don't need more than windowSize for back-references
         final int len = Math.min(params.getWindowSize(), data.length);
         System.arraycopy(data, data.length - len, window, 0, len);
+
         if (len >= NUMBER_OF_BYTES_IN_HASH) {
             initialize();
             final int stop = len - NUMBER_OF_BYTES_IN_HASH + 1;
@@ -320,7 +325,7 @@ public class LZ77Compressor {
                 insertString(i);
             }
             missedInserts = NUMBER_OF_BYTES_IN_HASH - 1;
-        } else {
+        } else { // not enough data to hash anything
             missedInserts = len;
         }
         blockStart = currentPosition = len;