You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@commons.apache.org by bo...@apache.org on 2017/09/30 16:38:06 UTC

svn commit: r1810226 - in /commons/proper/codec/trunk/src: changes/ main/java/org/apache/commons/codec/digest/ test/java/org/apache/commons/codec/digest/ test/resources/

Author: bodewig
Date: Sat Sep 30 16:38:05 2017
New Revision: 1810226

URL: http://svn.apache.org/viewvc?rev=1810226&view=rev
Log:
CODEC-241 add support for XXHash32

Added:
    commons/proper/codec/trunk/src/main/java/org/apache/commons/codec/digest/XXHash32.java   (with props)
    commons/proper/codec/trunk/src/test/java/org/apache/commons/codec/digest/XXHash32Test.java   (with props)
    commons/proper/codec/trunk/src/test/resources/
    commons/proper/codec/trunk/src/test/resources/bla.tar   (with props)
    commons/proper/codec/trunk/src/test/resources/bla.tar.xz   (with props)
Modified:
    commons/proper/codec/trunk/src/changes/changes.xml

Modified: commons/proper/codec/trunk/src/changes/changes.xml
URL: http://svn.apache.org/viewvc/commons/proper/codec/trunk/src/changes/changes.xml?rev=1810226&r1=1810225&r2=1810226&view=diff
==============================================================================
--- commons/proper/codec/trunk/src/changes/changes.xml (original)
+++ commons/proper/codec/trunk/src/changes/changes.xml Sat Sep 30 16:38:05 2017
@@ -45,6 +45,7 @@ The <action> type attribute can be add,u
     <release version="1.11" date="2017-MM-DD" description="Feature and fix release.">
       <!-- The first attribute below should be the issue id; makes it easier to navigate in the IDE outline -->
 
+      <action issue="CODEC-241" type="add">Add support for XXHash32</action>
       <action issue="CODEC-234" dev="ggregory" type="update" due-to="Christopher Schultz, Sebb">Base32.decode should support lowercase letters</action>
       <action issue="CODEC-233" dev="sebb" type="update" due-to="Yossi Tamari">Soundex should support more algorithm variants</action>
       <action issue="CODEC-145" dev="sebb" type="fix" due-to="Jesse Glick">Base64.encodeBase64String could better use newStringUsAscii (ditto encodeBase64URLSafeString)</action>

Added: commons/proper/codec/trunk/src/main/java/org/apache/commons/codec/digest/XXHash32.java
URL: http://svn.apache.org/viewvc/commons/proper/codec/trunk/src/main/java/org/apache/commons/codec/digest/XXHash32.java?rev=1810226&view=auto
==============================================================================
--- commons/proper/codec/trunk/src/main/java/org/apache/commons/codec/digest/XXHash32.java (added)
+++ commons/proper/codec/trunk/src/main/java/org/apache/commons/codec/digest/XXHash32.java Sat Sep 30 16:38:05 2017
@@ -0,0 +1,199 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.commons.codec.digest;
+
+import static java.lang.Integer.rotateLeft;
+
+import java.util.zip.Checksum;
+
+/**
+ * Implementation of the xxhash32 hash algorithm.
+ *
+ * <p>Copied from Commons Compress 1.14
+ * <a href="https://git-wip-us.apache.org/repos/asf?p=commons-compress.git;a=blob;f=src/main/java/org/apache/commons/compress/compressors/lz4/XXHash32.java;h=a406ffc197449be594d46f0d2712b2d4786a1e68;hb=HEAD">https://git-wip-us.apache.org/repos/asf?p=commons-compress.git;a=blob;f=src/main/java/org/apache/commons/compress/compressors/lz4/XXHash32.java;h=a406ffc197449be594d46f0d2712b2d4786a1e68;hb=HEAD</a></p>
+ *
+ * @see <a href="http://cyan4973.github.io/xxHash/">xxHash</a>
+ * @NotThreadSafe
+ * @since 1.11
+ */
+public class XXHash32 implements Checksum {
+
+    private static final int BUF_SIZE = 16;
+    private static final int ROTATE_BITS = 13;
+
+    private static final int PRIME1 = (int) 2654435761l;
+    private static final int PRIME2 = (int) 2246822519l;
+    private static final int PRIME3 = (int) 3266489917l;
+    private static final int PRIME4 =  668265263;
+    private static final int PRIME5 =  374761393;
+
+    private final byte[] oneByte = new byte[1];
+    private final int[] state = new int[4];
+    // Note: the code used to use ByteBuffer but the manual method is 50% faster
+    // See: http://git-wip-us.apache.org/repos/asf/commons-compress/diff/2f56fb5c
+    private final byte[] buffer = new byte[BUF_SIZE];
+    private final int seed;
+
+    private int totalLen;
+    private int pos;
+
+    /**
+     * Creates an XXHash32 instance with a seed of 0.
+     */
+    public XXHash32() {
+        this(0);
+    }
+
+    /**
+     * Creates an XXHash32 instance.
+     * @param seed the seed to use
+     */
+    public XXHash32(int seed) {
+        this.seed = seed;
+        initializeState();
+    }
+
+    @Override
+    public void reset() {
+        initializeState();
+        totalLen = 0;
+        pos = 0;
+    }
+
+    @Override
+    public void update(int b) {
+        oneByte[0] = (byte) (b & 0xff);
+        update(oneByte, 0, 1);
+    }
+
+    @Override
+    public void update(byte[] b, int off, final int len) {
+        if (len <= 0) {
+            return;
+        }
+        totalLen += len;
+
+        final int end = off + len;
+
+        if (pos + len < BUF_SIZE) {
+            System.arraycopy(b, off, buffer, pos, len);
+            pos += len;
+            return;
+        }
+
+        if (pos > 0) {
+            final int size = BUF_SIZE - pos;
+            System.arraycopy(b, off, buffer, pos, size);
+            process(buffer, 0);
+            off += size;
+        }
+
+        final int limit = end - BUF_SIZE;
+        while (off <= limit) {
+            process(b, off);
+            off += BUF_SIZE;
+        }
+
+        if (off < end) {
+            pos = end - off;
+            System.arraycopy(b, off, buffer, 0, pos);
+        }
+    }
+
+    @Override
+    public long getValue() {
+        int hash;
+        if (totalLen > BUF_SIZE) {
+            hash =
+                rotateLeft(state[0],  1) +
+                rotateLeft(state[1],  7) +
+                rotateLeft(state[2], 12) +
+                rotateLeft(state[3], 18);
+        } else {
+            hash = state[2] + PRIME5;
+        }
+        hash += totalLen;
+
+        int idx = 0;
+        final int limit = pos - 4;
+        for (; idx <= limit; idx += 4) {
+            hash = rotateLeft(hash + getInt(buffer, idx) * PRIME3, 17) * PRIME4;
+        }
+        while (idx < pos) {
+            hash = rotateLeft(hash + (buffer[idx++] & 0xff) * PRIME5, 11) * PRIME1;
+        }
+
+        hash ^= hash >>> 15;
+        hash *= PRIME2;
+        hash ^= hash >>> 13;
+        hash *= PRIME3;
+        hash ^= hash >>> 16;
+        return hash & 0xffffffffl;
+    }
+
+    private static int getInt(byte[] buffer, int idx) {
+        return (int) (fromLittleEndian(buffer, idx, 4) & 0xffffffffl);
+    }
+
+    private void initializeState() {
+        state[0] = seed + PRIME1 + PRIME2;
+        state[1] = seed + PRIME2;
+        state[2] = seed;
+        state[3] = seed - PRIME1;
+    }
+
+    private void process(byte[] b, int offset) {
+        // local shadows for performance
+        int s0 = state[0];
+        int s1 = state[1];
+        int s2 = state[2];
+        int s3 = state[3];
+
+        s0 = rotateLeft(s0 + getInt(b, offset) * PRIME2, ROTATE_BITS) * PRIME1;
+        s1 = rotateLeft(s1 + getInt(b, offset + 4) * PRIME2, ROTATE_BITS) * PRIME1;
+        s2 = rotateLeft(s2 + getInt(b, offset + 8) * PRIME2, ROTATE_BITS) * PRIME1;
+        s3 = rotateLeft(s3 + getInt(b, offset + 12) * PRIME2, ROTATE_BITS) * PRIME1;
+
+        state[0] = s0;
+        state[1] = s1;
+        state[2] = s2;
+        state[3] = s3;
+
+        pos = 0;
+    }
+
+    /**
+     * Reads the given byte array as a little endian long.
+     * @param bytes the byte array to convert
+     * @param off the offset into the array that starts the value
+     * @param length the number of bytes representing the value
+     * @return the number read
+     * @throws IllegalArgumentException if len is bigger than eight
+     */
+    private static long fromLittleEndian(byte[] bytes, final int off, final int length) {
+        if (length > 8) {
+            throw new IllegalArgumentException("can't read more than eight bytes into a long value");
+        }
+        long l = 0;
+        for (int i = 0; i < length; i++) {
+            l |= (bytes[off + i] & 0xffl) << (8 * i);
+        }
+        return l;
+    }
+}

Propchange: commons/proper/codec/trunk/src/main/java/org/apache/commons/codec/digest/XXHash32.java
------------------------------------------------------------------------------
    svn:eol-style = native

Added: commons/proper/codec/trunk/src/test/java/org/apache/commons/codec/digest/XXHash32Test.java
URL: http://svn.apache.org/viewvc/commons/proper/codec/trunk/src/test/java/org/apache/commons/codec/digest/XXHash32Test.java?rev=1810226&view=auto
==============================================================================
--- commons/proper/codec/trunk/src/test/java/org/apache/commons/codec/digest/XXHash32Test.java (added)
+++ commons/proper/codec/trunk/src/test/java/org/apache/commons/codec/digest/XXHash32Test.java Sat Sep 30 16:38:05 2017
@@ -0,0 +1,98 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.commons.codec.digest;
+
+import java.io.ByteArrayOutputStream;
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.FileNotFoundException;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.OutputStream;
+import java.net.URI;
+import java.net.URL;
+import java.util.Arrays;
+import java.util.Collection;
+
+import org.junit.Assert;
+import org.junit.Test;
+import org.junit.runners.Parameterized;
+import org.junit.runners.Parameterized.Parameters;
+import org.junit.runner.RunWith;
+
+@RunWith(Parameterized.class)
+public class XXHash32Test {
+
+    private final File file;
+    private final String expectedChecksum;
+
+    public XXHash32Test(String path, String c) throws IOException {
+        final URL url = XXHash32Test.class.getClassLoader().getResource(path);
+        if (url == null) {
+            throw new FileNotFoundException("couldn't find " + path);
+        }
+        URI uri = null;
+        try {
+            uri = url.toURI();
+        } catch (final java.net.URISyntaxException ex) {
+            throw new IOException(ex);
+        }
+        file = new File(uri);
+        expectedChecksum = c;
+    }
+
+    @Parameters
+    public static Collection<Object[]> factory() {
+        return Arrays.asList(new Object[][] {
+            // reference checksums created with xxh32sum
+            { "bla.tar", "fbb5c8d1" },
+            { "bla.tar.xz", "4106a208" },
+        });
+    }
+
+    @Test
+    public void verifyChecksum() throws IOException {
+        XXHash32 h = new XXHash32();
+        FileInputStream s = new FileInputStream(file);
+        try {
+            byte[] b = toByteArray(s);
+            h.update(b, 0, b.length);
+        } finally {
+            s.close();
+        }
+        Assert.assertEquals("checksum for " + file.getName(), expectedChecksum, Long.toHexString(h.getValue()));
+    }
+
+    private static byte[] toByteArray(final InputStream input) throws IOException {
+        final ByteArrayOutputStream output = new ByteArrayOutputStream();
+        copy(input, output, 10240);
+        return output.toByteArray();
+    }
+
+    private static long copy(final InputStream input, final OutputStream output, final int buffersize) throws IOException {
+        final byte[] buffer = new byte[buffersize];
+        int n = 0;
+        long count=0;
+        while (-1 != (n = input.read(buffer))) {
+            output.write(buffer, 0, n);
+            count += n;
+        }
+        return count;
+    }
+}

Propchange: commons/proper/codec/trunk/src/test/java/org/apache/commons/codec/digest/XXHash32Test.java
------------------------------------------------------------------------------
    svn:eol-style = native

Added: commons/proper/codec/trunk/src/test/resources/bla.tar
URL: http://svn.apache.org/viewvc/commons/proper/codec/trunk/src/test/resources/bla.tar?rev=1810226&view=auto
==============================================================================
Binary file - no diff available.

Propchange: commons/proper/codec/trunk/src/test/resources/bla.tar
------------------------------------------------------------------------------
    svn:mime-type = application/octet-stream

Added: commons/proper/codec/trunk/src/test/resources/bla.tar.xz
URL: http://svn.apache.org/viewvc/commons/proper/codec/trunk/src/test/resources/bla.tar.xz?rev=1810226&view=auto
==============================================================================
Binary file - no diff available.

Propchange: commons/proper/codec/trunk/src/test/resources/bla.tar.xz
------------------------------------------------------------------------------
    svn:mime-type = application/octet-stream