You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@datasketches.apache.org by le...@apache.org on 2021/07/22 19:52:38 UTC

[datasketches-memory] branch master updated: Place public accessible, static XxHash redirect into memory. This will enable the elimination of the current use of "internal" memory code by the DS-java component.

This is an automated email from the ASF dual-hosted git repository.

leerho pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/datasketches-memory.git


The following commit(s) were added to refs/heads/master by this push:
     new 5555a43  Place public accessible, static XxHash redirect into memory. This will enable the elimination of the current use of "internal" memory code by the DS-java component.
5555a43 is described below

commit 5555a433f92f74243f93d58043fc0e221b38706e
Author: Lee Rhodes <le...@users.noreply.github.com>
AuthorDate: Thu Jul 22 12:52:21 2021 -0700

    Place public accessible, static XxHash redirect into memory. This will
    enable the elimination of the current use of "internal" memory code by
    the DS-java component.
---
 .../datasketches/memory/test/XxHash64Test.java     |  26 +--
 .../org/apache/datasketches/memory/BaseState.java  |   9 +
 .../org/apache/datasketches/memory/XxHash.java     | 184 +++++++++++++++++++++
 .../memory/internal/BaseStateImpl.java             |   5 +
 .../datasketches/memory/internal/XxHash64.java     |  21 +++
 5 files changed, 234 insertions(+), 11 deletions(-)

diff --git a/datasketches-memory-java8-tests/src/test/java/org/apache/datasketches/memory/test/XxHash64Test.java b/datasketches-memory-java8-tests/src/test/java/org/apache/datasketches/memory/test/XxHash64Test.java
index 13e77ed..f20aa09 100644
--- a/datasketches-memory-java8-tests/src/test/java/org/apache/datasketches/memory/test/XxHash64Test.java
+++ b/datasketches-memory-java8-tests/src/test/java/org/apache/datasketches/memory/test/XxHash64Test.java
@@ -19,7 +19,7 @@
 
 package org.apache.datasketches.memory.test;
 
-import static org.apache.datasketches.memory.internal.XxHash64.*;
+import static org.apache.datasketches.memory.XxHash.*;
 import static org.testng.Assert.assertEquals;
 import static org.testng.Assert.assertTrue;
 
@@ -75,7 +75,8 @@ public class XxHash64Test {
 
   /*
    * This test is adapted from
-   * <a href="https://github.com/OpenHFT/Zero-Allocation-Hashing/blob/master/src/test/java/net/openhft/hashing/XxHashCollisionTest.java">
+   * <a href="https://github.com/OpenHFT/Zero-Allocation-Hashing/blob/master/
+   * src/test/java/net/openhft/hashing/XxHashCollisionTest.java">
    * OpenHFT/Zero-Allocation-Hashing</a> to test hash compatibility with that implementation.
    * It is licensed under Apache License, version 2.0. See LICENSE.
    */
@@ -128,42 +129,45 @@ public class XxHash64Test {
   public void testArrHashes() {
     WritableMemory wmem = WritableMemory.writableWrap(barr);
     long hash0 = wmem.xxHash64(8, 8, 0);
-    long hash1 = hashBytes(barr, 8, 8, 0);
+    long hash1 = hashByteArr(barr, 8, 8, 0);
     assertEquals(hash1, hash0);
 
     char[] carr = new char[8];
     wmem.getCharArray(0, carr, 0, 8);
-    hash1 = hashChars(carr, 4, 4, 0);
+    hash1 = hashCharArr(carr, 4, 4, 0);
     assertEquals(hash1, hash0);
 
     short[] sarr = new short[8];
     wmem.getShortArray(0, sarr, 0, 8);
-    hash1 = hashShorts(sarr, 4, 4, 0);
+    hash1 = hashShortArr(sarr, 4, 4, 0);
     assertEquals(hash1, hash0);
 
     int[] iarr = new int[4];
     wmem.getIntArray(0, iarr, 0, 4);
-    hash1 = hashInts(iarr, 2, 2, 0);
+    hash1 = hashIntArr(iarr, 2, 2, 0);
     assertEquals(hash1, hash0);
 
     float[] farr = new float[4];
     wmem.getFloatArray(0, farr, 0, 4);
-    hash1 = hashFloats(farr, 2, 2, 0);
+    hash1 = hashFloatArr(farr, 2, 2, 0);
     assertEquals(hash1, hash0);
 
     long[] larr = new long[2];
     wmem.getLongArray(0, larr, 0, 2);
-    hash1 = hashLongs(larr, 1, 1, 0);
+    hash1 = hashLongArr(larr, 1, 1, 0);
+    long in = wmem.getLong(8);
+    long hash2 = hashLong(in, 00); //tests the single long hash
     assertEquals(hash1, hash0);
+    assertEquals(hash2, hash0);
 
     double[] darr = new double[2];
     wmem.getDoubleArray(0, darr, 0, 2);
-    hash1 = hashDoubles(darr, 1, 1, 0);
+    hash1 = hashDoubleArr(darr, 1, 1, 0);
     assertEquals(hash1, hash0);
 
     boolean[] blarr = new boolean[16];
     wmem.getBooleanArray(0, blarr, 0, 16); //any byte != 0 is true
-    hash1 = hashBooleans(blarr, 8, 8, 0);
+    hash1 = hashBooleanArr(blarr, 8, 8, 0);
     assertEquals(hash1, hash0);
   }
 
@@ -172,7 +176,7 @@ public class XxHash64Test {
     String s = "Now is the time for all good men to come to the aid of their country.";
     char[] arr = s.toCharArray();
     long hash0 = hashString(s, 0, s.length(), 0);
-    long hash1 = hashChars(arr, 0, arr.length, 0);
+    long hash1 = hashCharArr(arr, 0, arr.length, 0);
     assertEquals(hash1, hash0);
   }
 
diff --git a/datasketches-memory-java8/src/main/java/org/apache/datasketches/memory/BaseState.java b/datasketches-memory-java8/src/main/java/org/apache/datasketches/memory/BaseState.java
index 25b2ad1..259f571 100644
--- a/datasketches-memory-java8/src/main/java/org/apache/datasketches/memory/BaseState.java
+++ b/datasketches-memory-java8/src/main/java/org/apache/datasketches/memory/BaseState.java
@@ -159,6 +159,15 @@ public interface BaseState {
   long xxHash64(long offsetBytes, long lengthBytes, long seed);
   
   /**
+   * Returns a 64-bit hash from a single long. This method has been optimized for speed when only
+   * a single hash of a long is required.
+   * @param in A long.
+   * @param seed A long valued seed.
+   * @return the hash.
+   */
+  long xxHash64(long in, long seed);
+  
+  /**
    * Returns true if this Memory is backed by a ByteBuffer.
    * @return true if this Memory is backed by a ByteBuffer.
    */
diff --git a/datasketches-memory-java8/src/main/java/org/apache/datasketches/memory/XxHash.java b/datasketches-memory-java8/src/main/java/org/apache/datasketches/memory/XxHash.java
new file mode 100644
index 0000000..e17b34b
--- /dev/null
+++ b/datasketches-memory-java8/src/main/java/org/apache/datasketches/memory/XxHash.java
@@ -0,0 +1,184 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.datasketches.memory;
+
+import static org.apache.datasketches.memory.internal.XxHash64.*;
+
+/**
+ * The XxHash is a fast, non-cryptographic, 64-bit hash function that has
+ * excellent avalanche and 2-way bit independence properties.
+ * This java version adapted  the C++ version and the OpenHFT/Zero-Allocation-Hashing implementation
+ * referenced below as inspiration.
+ *
+ * <p>The C++ source repository:
+ * <a href="https://github.com/Cyan4973/xxHash">
+ * https://github.com/Cyan4973/xxHash</a>. It has a BSD 2-Clause License:
+ * <a href="http://www.opensource.org/licenses/bsd-license.php">
+ * http://www.opensource.org/licenses/bsd-license.php</a>.  See LICENSE.
+ *
+ * <p>Portions of this code were adapted from
+ * <a href="https://github.com/OpenHFT/Zero-Allocation-Hashing/blob/master/src/main/java/net/openhft/hashing/XxHash.java">
+ * OpenHFT/Zero-Allocation-Hashing</a>, which has an Apache 2 license as does this site. See LICENSE.
+ *
+ * @author Lee Rhodes
+ */
+public final class XxHash {
+
+  public XxHash() { /* singleton */ }
+  
+  /**
+   * Hash the given arr starting at the given offset and continuing for the given length using the
+   * given seed.
+   * @param arr the given array
+   * @param offsetBooleans starting at this offset
+   * @param lengthBooleans continuing for this length
+   * @param seed the given seed
+   * @return the hash
+   */
+  public static long hashBooleanArr(final boolean[] arr, final long offsetBooleans,
+      final long lengthBooleans, final long seed) {
+    return hashBooleans(arr, offsetBooleans, lengthBooleans, seed);
+  }
+  
+  /**
+   * Hash the given arr starting at the given offset and continuing for the given length using the
+   * given seed.
+   * @param arr the given array
+   * @param offsetBytes starting at this offset
+   * @param lengthBytes continuing for this length
+   * @param seed the given seed
+   * @return the hash
+   */
+  public static long hashByteArr(final byte[] arr, final long offsetBytes,
+      final long lengthBytes, final long seed) {
+    return hashBytes(arr, offsetBytes, lengthBytes, seed);
+  }
+  
+  /**
+   * Hash the given arr starting at the given offset and continuing for the given length using the
+   * given seed.
+   * @param arr the given array
+   * @param offsetShorts starting at this offset
+   * @param lengthShorts continuing for this length
+   * @param seed the given seed
+   * @return the hash
+   */
+  public static long hashShortArr(final short[] arr, final long offsetShorts,
+      final long lengthShorts, final long seed) {
+    return hashShorts(arr, offsetShorts, lengthShorts, seed);
+  }
+  
+  /**
+   * Hash the given arr starting at the given offset and continuing for the given length using the
+   * given seed.
+   * @param arr the given array
+   * @param offsetChars starting at this offset
+   * @param lengthChars continuing for this length
+   * @param seed the given seed
+   * @return the hash
+   */
+  public static long hashCharArr(final char[] arr, final long offsetChars,
+      final long lengthChars, final long seed) {
+    return hashChars(arr, offsetChars, lengthChars, seed);
+  }
+  
+  /**
+   * Hash the given arr starting at the given offset and continuing for the given length using the
+   * given seed.
+   * @param arr the given array
+   * @param offsetInts starting at this offset
+   * @param lengthInts continuing for this length
+   * @param seed the given seed
+   * @return the hash
+   */
+  public static long hashIntArr(final int[] arr, final long offsetInts,
+      final long lengthInts, final long seed) {
+    return hashInts(arr, offsetInts, lengthInts, seed);
+  }
+  
+  /**
+   * Hash the given arr starting at the given offset and continuing for the given length using the
+   * given seed.
+   * @param arr the given array
+   * @param offsetLongs starting at this offset
+   * @param lengthLongs continuing for this length
+   * @param seed the given seed
+   * @return the hash
+   */
+  public static long hashLongArr(final long[] arr, final long offsetLongs,
+      final long lengthLongs, final long seed) {
+    return hashLongs(arr, offsetLongs, lengthLongs, seed);
+  }
+  
+  /**
+   * Returns a 64-bit hash from a single long. This method has been optimized for speed when only
+   * a single hash of a long is required.
+   * @param in A long.
+   * @param seed A long valued seed.
+   * @return the hash.
+   */
+  public static long hashLong(final long in, final long seed) {
+    return hash(in, seed);
+  }
+  
+  /**
+   * Hash the given arr starting at the given offset and continuing for the given length using the
+   * given seed.
+   * @param arr the given array
+   * @param offsetFloats starting at this offset
+   * @param lengthFloats continuing for this length
+   * @param seed the given seed
+   * @return the hash
+   */
+  public static long hashFloatArr(final float[] arr, final long offsetFloats,
+      final long lengthFloats, final long seed) {
+    return hashFloats(arr, offsetFloats, lengthFloats, seed);
+  }
+  
+  /**
+   * Hash the given arr starting at the given offset and continuing for the given length using the
+   * given seed.
+   * @param arr the given array
+   * @param offsetDoubles starting at this offset
+   * @param lengthDoubles continuing for this length
+   * @param seed the given seed
+   * @return the hash
+   */
+  public static long hashDoubleArr(final double[] arr, final long offsetDoubles,
+      final long lengthDoubles, final long seed) {
+    return hashDoubles(arr, offsetDoubles, lengthDoubles, seed);
+  }
+  
+  /**
+   * Hash the given arr starting at the given offset and continuing for the given length using the
+   * given seed.
+   * @param str the given string
+   * @param offsetChars starting at this offset
+   * @param lengthChars continuing for this length
+   * @param seed the given seed
+   * @return the hash
+   */
+  public static long hashString(final String str, final long offsetChars,
+      final long lengthChars, final long seed) {
+    return org.apache.datasketches.memory.internal.XxHash64.hashString(str, offsetChars, lengthChars, seed);
+  }
+  
+}
+
diff --git a/datasketches-memory-java8/src/main/java/org/apache/datasketches/memory/internal/BaseStateImpl.java b/datasketches-memory-java8/src/main/java/org/apache/datasketches/memory/internal/BaseStateImpl.java
index c542708..28ed3dd 100644
--- a/datasketches-memory-java8/src/main/java/org/apache/datasketches/memory/internal/BaseStateImpl.java
+++ b/datasketches-memory-java8/src/main/java/org/apache/datasketches/memory/internal/BaseStateImpl.java
@@ -210,6 +210,11 @@ public abstract class BaseStateImpl implements BaseState {
   }
 
   @Override
+  public final long xxHash64(final long in, final long seed) {
+    return XxHash64.hash(in, seed);
+  }
+  
+  @Override
   public final boolean hasByteBuffer() {
     assertValid();
     return getByteBuffer() != null;
diff --git a/datasketches-memory-java8/src/main/java/org/apache/datasketches/memory/internal/XxHash64.java b/datasketches-memory-java8/src/main/java/org/apache/datasketches/memory/internal/XxHash64.java
index 76eff68..8592382 100644
--- a/datasketches-memory-java8/src/main/java/org/apache/datasketches/memory/internal/XxHash64.java
+++ b/datasketches-memory-java8/src/main/java/org/apache/datasketches/memory/internal/XxHash64.java
@@ -171,6 +171,25 @@ public class XxHash64 {
     return finalize(hash);
   }
 
+  /**
+   * Returns a 64-bit hash from a single long. This method has been optimized for speed when only
+   * a single hash of a long is required.
+   * @param in A long.
+   * @param seed A long valued seed.
+   * @return the hash.
+   */
+  public static long hash(final long in, final long seed) {
+    long hash = seed + P5;
+    hash += 8;
+    long k1 = in;
+    k1 *= P2;
+    k1 = Long.rotateLeft(k1, 31);
+    k1 *= P1;
+    hash ^= k1;
+    hash = (Long.rotateLeft(hash, 27) * P1) + P4;
+    return finalize(hash);
+  }
+  
   private static long finalize(long hash) {
     hash ^= hash >>> 33;
     hash *= P2;
@@ -180,6 +199,8 @@ public class XxHash64 {
     return hash;
   }
 
+  
+  
   /**
    * Hash the given arr starting at the given offset and continuing for the given length using the
    * given seed.

---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@datasketches.apache.org
For additional commands, e-mail: commits-help@datasketches.apache.org