You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@commons.apache.org by ah...@apache.org on 2019/12/30 22:17:27 UTC

[commons-codec] branch master updated (a387ac8 -> 4c5b1b7)

This is an automated email from the ASF dual-hosted git repository.

aherbert pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/commons-codec.git.


    from a387ac8  Test incremental hash with huge length array added to unprocessed bytes.
     new 33491ff  Overflow safe position counter in XXHash32.
     new bc82d82  Update the method to get the little-endian int.
     new 88e729e  Test for incremental XXHash32.
     new 08bfd44  Test XXHash32 with huge length array added to unprocessed bytes.
     new 4c5b1b7  Add reference file for XXHash32 using a small number of bytes.

The 5 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.


Summary of changes:
 .../org/apache/commons/codec/digest/XXHash32.java  | 49 +++++++++---------
 .../commons/codec/digest/XXHash32OverflowTest.java | 59 ++++++++++++++++++++++
 .../apache/commons/codec/digest/XXHash32Test.java  | 20 ++++++++
 src/test/resources/small.bin                       |  1 +
 4 files changed, 106 insertions(+), 23 deletions(-)
 create mode 100644 src/test/java/org/apache/commons/codec/digest/XXHash32OverflowTest.java
 create mode 100644 src/test/resources/small.bin


[commons-codec] 01/05: Overflow safe position counter in XXHash32.

Posted by ah...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

aherbert pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/commons-codec.git

commit 33491ff84299cc1a48031fba066e6e69f21e1861
Author: Alex Herbert <ah...@apache.org>
AuthorDate: Mon Dec 30 21:40:17 2019 +0000

    Overflow safe position counter in XXHash32.
---
 .../java/org/apache/commons/codec/digest/XXHash32.java | 18 +++++++++++++++---
 1 file changed, 15 insertions(+), 3 deletions(-)

diff --git a/src/main/java/org/apache/commons/codec/digest/XXHash32.java b/src/main/java/org/apache/commons/codec/digest/XXHash32.java
index 0d81e0a..0fce378 100644
--- a/src/main/java/org/apache/commons/codec/digest/XXHash32.java
+++ b/src/main/java/org/apache/commons/codec/digest/XXHash32.java
@@ -55,6 +55,8 @@ public class XXHash32 implements Checksum {
 
     private int totalLen;
     private int pos;
+    /** Set to true when the state array has been updated since the last reset. */
+    private boolean stateUpdated;
 
     /**
      * Creates an XXHash32 instance with a seed of 0.
@@ -77,6 +79,7 @@ public class XXHash32 implements Checksum {
         initializeState();
         totalLen = 0;
         pos = 0;
+        stateUpdated = false;
     }
 
     @Override
@@ -94,12 +97,16 @@ public class XXHash32 implements Checksum {
 
         final int end = off + len;
 
-        if (pos + len < BUF_SIZE) {
+        // Check if the unprocessed bytes and new bytes can fill a block of 16.
+        // Make this overflow safe in the event that len is Integer.MAX_VALUE.
+        // Equivalent to: (pos + len < BUF_SIZE)
+        if (pos + len - BUF_SIZE < 0) {
             System.arraycopy(b, off, buffer, pos, len);
             pos += len;
             return;
         }
 
+        // Process left-over bytes with new bytes
         if (pos > 0) {
             final int size = BUF_SIZE - pos;
             System.arraycopy(b, off, buffer, pos, size);
@@ -113,22 +120,27 @@ public class XXHash32 implements Checksum {
             off += BUF_SIZE;
         }
 
+        // Handle left-over bytes
         if (off < end) {
             pos = end - off;
             System.arraycopy(b, off, buffer, 0, pos);
+        } else {
+            pos = 0;
         }
     }
 
     @Override
     public long getValue() {
         int hash;
-        if (totalLen > BUF_SIZE) {
+        if (stateUpdated) {
+            // Hash with the state
             hash =
                 rotateLeft(state[0],  1) +
                 rotateLeft(state[1],  7) +
                 rotateLeft(state[2], 12) +
                 rotateLeft(state[3], 18);
         } else {
+            // Hash using the original seed from position 2
             hash = state[2] + PRIME5;
         }
         hash += totalLen;
@@ -178,7 +190,7 @@ public class XXHash32 implements Checksum {
         state[2] = s2;
         state[3] = s3;
 
-        pos = 0;
+        stateUpdated = true;
     }
 
     /**


[commons-codec] 04/05: Test XXHash32 with huge length array added to unprocessed bytes.

Posted by ah...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

aherbert pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/commons-codec.git

commit 08bfd445e88464a0d1b3de1f01f0f5903be2118f
Author: Alex Herbert <ah...@apache.org>
AuthorDate: Mon Dec 30 21:58:16 2019 +0000

    Test XXHash32 with huge length array added to unprocessed bytes.
---
 .../commons/codec/digest/XXHash32OverflowTest.java | 59 ++++++++++++++++++++++
 1 file changed, 59 insertions(+)

diff --git a/src/test/java/org/apache/commons/codec/digest/XXHash32OverflowTest.java b/src/test/java/org/apache/commons/codec/digest/XXHash32OverflowTest.java
new file mode 100644
index 0000000..6e53e8e
--- /dev/null
+++ b/src/test/java/org/apache/commons/codec/digest/XXHash32OverflowTest.java
@@ -0,0 +1,59 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.commons.codec.digest;
+
+import org.junit.Assert;
+import org.junit.Assume;
+import org.junit.Test;
+
+public class XXHash32OverflowTest {
+
+    /**
+     * This test hits an edge case where a very large number of bytes is added to the incremental
+     * hash. The data is constructed so that an integer counter of unprocessed bytes will
+     * overflow. If this is not handled correctly then the code throws an exception when it
+     * copies more data into the unprocessed bytes array.
+     */
+    @Test
+    public void testIncrementalHashWithUnprocessedBytesAndHugeLengthArray() {
+        // Assert the test precondition that a large array added to unprocessed bytes
+        // will overflow an integer counter. We use the smallest hugeLength possible
+        // as some VMs cannot allocate maximum length arrays.
+        final int bufferSize = 16;
+        final int unprocessedSize = bufferSize - 1;
+        final int hugeLength = Integer.MAX_VALUE - (unprocessedSize - 1);
+        Assert.assertTrue("This should overflow to negative", unprocessedSize + hugeLength < bufferSize);
+
+        // Check the test can be run
+        byte[] bytes = null;
+        try {
+            bytes = new byte[hugeLength];
+        } catch (OutOfMemoryError ignore) {
+            // Some VMs cannot allocate an array this large.
+            // Some test environments may not have enough available memory for this.
+        }
+        Assume.assumeTrue("Cannot allocate array of length " + hugeLength, bytes != null);
+
+        final XXHash32 inc = new XXHash32();
+        // Add bytes that should be unprocessed
+        inc.update(bytes, 0, unprocessedSize);
+        // Add a huge number of bytes to overflow an integer counter of unprocessed bytes.
+        inc.update(bytes, 0, hugeLength);
+    }
+}


[commons-codec] 05/05: Add reference file for XXHash32 using a small number of bytes.

Posted by ah...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

aherbert pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/commons-codec.git

commit 4c5b1b745dad5176b27ab3f305553a19db535fa0
Author: Alex Herbert <ah...@apache.org>
AuthorDate: Mon Dec 30 22:17:22 2019 +0000

    Add reference file for XXHash32 using a small number of bytes.
    
    This hits the edge cases for completing the hash with leftover bytes and
    no processing in the main update method.
---
 src/test/java/org/apache/commons/codec/digest/XXHash32Test.java | 2 ++
 src/test/resources/small.bin                                    | 1 +
 2 files changed, 3 insertions(+)

diff --git a/src/test/java/org/apache/commons/codec/digest/XXHash32Test.java b/src/test/java/org/apache/commons/codec/digest/XXHash32Test.java
index 07c5ce7..c3a5fbf 100644
--- a/src/test/java/org/apache/commons/codec/digest/XXHash32Test.java
+++ b/src/test/java/org/apache/commons/codec/digest/XXHash32Test.java
@@ -61,8 +61,10 @@ public class XXHash32Test {
     public static Collection<Object[]> factory() {
         return Arrays.asList(new Object[][] {
             // reference checksums created with xxh32sum
+            // http://cyan4973.github.io/xxHash/
             { "bla.tar", "fbb5c8d1" },
             { "bla.tar.xz", "4106a208" },
+            { "small.bin", "f66c26f8" },
         });
     }
 
diff --git a/src/test/resources/small.bin b/src/test/resources/small.bin
new file mode 100644
index 0000000..56dda0f
--- /dev/null
+++ b/src/test/resources/small.bin
@@ -0,0 +1 @@
+��4e�cǗJ�
\ No newline at end of file


[commons-codec] 02/05: Update the method to get the little-endian int.

Posted by ah...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

aherbert pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/commons-codec.git

commit bc82d82d2a70200f5c7a084908f3587fbe8cc204
Author: Alex Herbert <ah...@apache.org>
AuthorDate: Mon Dec 30 21:43:31 2019 +0000

    Update the method to get the little-endian int.
    
    The previous version used a generic method for variable length bytes
    always with a fixed length of 4.
---
 .../org/apache/commons/codec/digest/XXHash32.java  | 31 ++++++++--------------
 1 file changed, 11 insertions(+), 20 deletions(-)

diff --git a/src/main/java/org/apache/commons/codec/digest/XXHash32.java b/src/main/java/org/apache/commons/codec/digest/XXHash32.java
index 0fce378..140d08b 100644
--- a/src/main/java/org/apache/commons/codec/digest/XXHash32.java
+++ b/src/main/java/org/apache/commons/codec/digest/XXHash32.java
@@ -162,8 +162,18 @@ public class XXHash32 implements Checksum {
         return hash & 0xffffffffl;
     }
 
+    /**
+     * Gets the little-endian int from 4 bytes starting at the specified index.
+     *
+     * @param buffer The data
+     * @param idx The index
+     * @return The little-endian int
+     */
     private static int getInt(final byte[] buffer, final int idx) {
-        return (int) (fromLittleEndian(buffer, idx, 4) & 0xffffffffl);
+        return ((buffer[idx    ] & 0xff)      ) |
+               ((buffer[idx + 1] & 0xff) <<  8) |
+               ((buffer[idx + 2] & 0xff) << 16) |
+               ((buffer[idx + 3] & 0xff) << 24);
     }
 
     private void initializeState() {
@@ -192,23 +202,4 @@ public class XXHash32 implements Checksum {
 
         stateUpdated = true;
     }
-
-    /**
-     * Reads the given byte array as a little endian long.
-     * @param bytes the byte array to convert
-     * @param off the offset into the array that starts the value
-     * @param length the number of bytes representing the value
-     * @return the number read
-     * @throws IllegalArgumentException if len is bigger than eight
-     */
-    private static long fromLittleEndian(final byte[] bytes, final int off, final int length) {
-        if (length > 8) {
-            throw new IllegalArgumentException("can't read more than eight bytes into a long value");
-        }
-        long l = 0;
-        for (int i = 0; i < length; i++) {
-            l |= (bytes[off + i] & 0xffl) << (8 * i);
-        }
-        return l;
-    }
 }


[commons-codec] 03/05: Test for incremental XXHash32.

Posted by ah...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

aherbert pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/commons-codec.git

commit 88e729ecddb4869b61c19f06623d19d347c4e42b
Author: Alex Herbert <ah...@apache.org>
AuthorDate: Mon Dec 30 21:51:25 2019 +0000

    Test for incremental XXHash32.
---
 .../org/apache/commons/codec/digest/XXHash32Test.java  | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)

diff --git a/src/test/java/org/apache/commons/codec/digest/XXHash32Test.java b/src/test/java/org/apache/commons/codec/digest/XXHash32Test.java
index acae79b..07c5ce7 100644
--- a/src/test/java/org/apache/commons/codec/digest/XXHash32Test.java
+++ b/src/test/java/org/apache/commons/codec/digest/XXHash32Test.java
@@ -76,6 +76,24 @@ public class XXHash32Test {
         Assert.assertEquals("checksum for " + file.getName(), expectedChecksum, Long.toHexString(h.getValue()));
     }
 
+    @Test
+    public void verifyIncrementalChecksum() throws IOException {
+        final XXHash32 h = new XXHash32();
+        try (final FileInputStream s = new FileInputStream(file)) {
+            final byte[] b = toByteArray(s);
+            // Hit the case where the hash should be reset
+            h.update(b[0]);
+            h.reset();
+            // Pass in chunks
+            h.update(b[0]);
+            h.update(b, 1, b.length - 2);
+            h.update(b, b.length - 1, 1);
+            // Check the hash ignores negative length
+            h.update(b, 0, -1);
+        }
+        Assert.assertEquals("checksum for " + file.getName(), expectedChecksum, Long.toHexString(h.getValue()));
+    }
+
     private static byte[] toByteArray(final InputStream input) throws IOException {
         final ByteArrayOutputStream output = new ByteArrayOutputStream();
         copy(input, output, 10240);