You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@datasketches.apache.org by le...@apache.org on 2021/07/19 18:11:47 UTC

[datasketches-java] branch Memory2 updated: Move DS-java:datasketches-hash/MurmurHash3v2.java to DS-memory.

This is an automated email from the ASF dual-hosted git repository.

leerho pushed a commit to branch Memory2
in repository https://gitbox.apache.org/repos/asf/datasketches-java.git


The following commit(s) were added to refs/heads/Memory2 by this push:
     new daeb06f  Move DS-java:datasketches-hash/MurmurHash3v2.java to DS-memory.
daeb06f is described below

commit daeb06f24ef9cae852ea8a03850feb4bac70b081
Author: Lee Rhodes <le...@users.noreply.github.com>
AuthorDate: Mon Jul 19 11:11:06 2021 -0700

    Move DS-java:datasketches-hash/MurmurHash3v2.java to DS-memory.
---
 .../apache/datasketches/hash/MurmurHash3v2.java    | 363 ---------------------
 .../datasketches/hash/MurmurHash3v2Test.java       |  23 +-
 2 files changed, 12 insertions(+), 374 deletions(-)

diff --git a/src/main/java/org/apache/datasketches/hash/MurmurHash3v2.java b/src/main/java/org/apache/datasketches/hash/MurmurHash3v2.java
deleted file mode 100644
index 3e77bc6..0000000
--- a/src/main/java/org/apache/datasketches/hash/MurmurHash3v2.java
+++ /dev/null
@@ -1,363 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-package org.apache.datasketches.hash;
-
-import static java.nio.charset.StandardCharsets.UTF_8;
-import static org.apache.datasketches.memory.internal.UnsafeUtil.unsafe;
-
-import org.apache.datasketches.memory.Memory;
-import org.apache.datasketches.memory.WritableMemory;
-
-/**
- * <p>The MurmurHash3 is a fast, non-cryptographic, 128-bit hash function that has
- * excellent avalanche and 2-way bit independence properties.</p>
- *
- * <p>Austin Appleby's C++
- * <a href="https://github.com/aappleby/smhasher/blob/master/src/MurmurHash3.cpp">
- * MurmurHash3_x64_128(...), final revision 150</a>,
- * which is in the Public Domain, was the inspiration for this implementation in Java.</p>
- *
- * <p>This implementation of the MurmurHash3 allows hashing of a block of on-heap Memory defined by an offset
- * and length. The calling API also allows the user to supply the small output array of two longs,
- * so that the entire hash function is static and free of object allocations.</p>
- *
- * <p>This implementation produces exactly the same hash result as the
- * {@link MurmurHash3#hash} function given compatible inputs.</p>
- *
- * @author Lee Rhodes
- */
-public final class MurmurHash3v2 {
-  private static final long C1 = 0x87c37b91114253d5L;
-  private static final long C2 = 0x4cf5ad432745937fL;
-
-  //Provided for backward compatibility
-
-  /**
-   * Returns a 128-bit hash of the input.
-   * Provided for compatibility with older version of MurmurHash3,
-   * but empty or null input now throws IllegalArgumentException.
-   * @param in long array
-   * @param seed A long valued seed.
-   * @return the hash
-   */
-  public static long[] hash(final long[] in, final long seed) {
-    if ((in == null) || (in.length == 0)) {
-      throw new IllegalArgumentException("Input in is empty or null.");
-    }
-    return hash(Memory.wrap(in), 0L, in.length << 3, seed, new long[2]);
-  }
-
-  /**
-   * Returns a 128-bit hash of the input.
-   * Provided for compatibility with older version of MurmurHash3,
-   * but empty or null input now throws IllegalArgumentException.
-   * @param in int array
-   * @param seed A long valued seed.
-   * @return the hash
-   */
-  public static long[] hash(final int[] in, final long seed) {
-    if ((in == null) || (in.length == 0)) {
-      throw new IllegalArgumentException("Input in is empty or null.");
-    }
-    return hash(Memory.wrap(in), 0L, in.length << 2, seed, new long[2]);
-  }
-
-  /**
-   * Returns a 128-bit hash of the input.
-   * Provided for compatibility with older version of MurmurHash3,
-   * but empty or null input now throws IllegalArgumentException.
-   * @param in char array
-   * @param seed A long valued seed.
-   * @return the hash
-   */
-  public static long[] hash(final char[] in, final long seed) {
-    if ((in == null) || (in.length == 0)) {
-      throw new IllegalArgumentException("Input in is empty or null.");
-    }
-    return hash(Memory.wrap(in), 0L, in.length << 1, seed, new long[2]);
-  }
-
-  /**
-   * Returns a 128-bit hash of the input.
-   * Provided for compatibility with older version of MurmurHash3,
-   * but empty or null input now throws IllegalArgumentException.
-   * @param in byte array
-   * @param seed A long valued seed.
-   * @return the hash
-   */
-  public static long[] hash(final byte[] in, final long seed) {
-    if ((in == null) || (in.length == 0)) {
-      throw new IllegalArgumentException("Input in is empty or null.");
-    }
-    return hash(Memory.wrap(in), 0L, in.length, seed, new long[2]);
-  }
-
-  //Single primitive inputs
-
-  /**
-   * Returns a 128-bit hash of the input.
-   * Note the entropy of the resulting hash cannot be more than 64 bits.
-   * @param in a long
-   * @param seed A long valued seed.
-   * @param hashOut A long array of size 2
-   * @return the hash
-   */
-  public static long[] hash(final long in, final long seed, final long[] hashOut) {
-    final long h1 = seed ^ mixK1(in);
-    final long h2 = seed;
-    return finalMix128(h1, h2, 8, hashOut);
-  }
-
-  /**
-   * Returns a 128-bit hash of the input.
-   * Note the entropy of the resulting hash cannot be more than 64 bits.
-   * @param in a double
-   * @param seed A long valued seed.
-   * @param hashOut A long array of size 2
-   * @return the hash
-   */
-  public static long[] hash(final double in, final long seed, final long[] hashOut) {
-    final double d = (in == 0.0) ? 0.0 : in;    // canonicalize -0.0, 0.0
-    final long k1 = Double.doubleToLongBits(d); // canonicalize all NaN forms
-    final long h1 = seed ^ mixK1(k1);
-    final long h2 = seed;
-    return finalMix128(h1, h2, 8, hashOut);
-  }
-
-  /**
-   * Returns a 128-bit hash of the input.
-   * An empty or null input throws IllegalArgumentException.
-   * @param in a String
-   * @param seed A long valued seed.
-   * @param hashOut A long array of size 2
-   * @return the hash
-   */
-  public static long[] hash(final String in, final long seed, final long[] hashOut) {
-    if ((in == null) || (in.length() == 0)) {
-      throw new IllegalArgumentException("Input in is empty or null.");
-    }
-    final byte[] byteArr = in.getBytes(UTF_8);
-    return hash(Memory.wrap(byteArr), 0L, byteArr.length, seed, hashOut);
-  }
-
-  //The main API call
-
-  /**
-   * Returns a 128-bit hash of the input as a long array of size 2.
-   *
-   * @param mem The input on-heap Memory. Must be non-null and non-empty, 
-   * otherwise throws IllegalArgumentException.
-   * @param offsetBytes the starting point within Memory.
-   * @param lengthBytes the total number of bytes to be hashed.
-   * @param seed A long valued seed.
-   * @param hashOut the size 2 long array for the resulting 128-bit hash
-   * @return the hash.
-   */
-  @SuppressWarnings("restriction")
-  public static long[] hash(final Memory mem, final long offsetBytes, final long lengthBytes,
-      final long seed, final long[] hashOut) {
-    if ((mem == null) || (mem.getCapacity() == 0L)) { 
-      throw new IllegalArgumentException("Input mem is empty or null.");
-    }
-    final Object uObj = ((WritableMemory) mem).getArray();
-    if (uObj == null) { 
-      throw new IllegalArgumentException("The backing resource of input mem is not on-heap."); 
-    }
-    long cumOff = mem.getCumulativeOffset() + offsetBytes;
-
-    long h1 = seed;
-    long h2 = seed;
-    long rem = lengthBytes;
-
-    // Process the 128-bit blocks (the body) into the hash
-    while (rem >= 16L) {
-      final long k1 = unsafe.getLong(uObj, cumOff);     //0, 16, 32, ...
-      final long k2 = unsafe.getLong(uObj, cumOff + 8); //8, 24, 40, ...
-      cumOff += 16L;
-      rem -= 16L;
-
-      h1 ^= mixK1(k1);
-      h1 = Long.rotateLeft(h1, 27);
-      h1 += h2;
-      h1 = (h1 * 5) + 0x52dce729L;
-
-      h2 ^= mixK2(k2);
-      h2 = Long.rotateLeft(h2, 31);
-      h2 += h1;
-      h2 = (h2 * 5) + 0x38495ab5L;
-    }
-
-    // Get the tail (if any): 1 to 15 bytes
-    if (rem > 0L) {
-      long k1 = 0;
-      long k2 = 0;
-      switch ((int) rem) {
-        case 15: {
-          k2 ^= (unsafe.getByte(uObj, cumOff + 14) & 0xFFL) << 48;
-        }
-        //$FALL-THROUGH$
-        case 14: {
-          k2 ^= (unsafe.getShort(uObj, cumOff + 12) & 0xFFFFL) << 32;
-          k2 ^= (unsafe.getInt(uObj, cumOff + 8) & 0xFFFFFFFFL);
-          k1 = unsafe.getLong(uObj, cumOff);
-          break;
-        }
-
-        case 13: {
-          k2 ^= (unsafe.getByte(uObj, cumOff + 12) & 0xFFL) << 32;
-        }
-        //$FALL-THROUGH$
-        case 12: {
-          k2 ^= (unsafe.getInt(uObj, cumOff + 8) & 0xFFFFFFFFL);
-          k1 = unsafe.getLong(uObj, cumOff);
-          break;
-        }
-
-        case 11: {
-          k2 ^= (unsafe.getByte(uObj, cumOff + 10) & 0xFFL) << 16;
-        }
-        //$FALL-THROUGH$
-        case 10: {
-          k2 ^= (unsafe.getShort(uObj, cumOff +  8) & 0xFFFFL);
-          k1 = unsafe.getLong(uObj, cumOff);
-          break;
-        }
-
-        case  9: {
-          k2 ^= (unsafe.getByte(uObj, cumOff +  8) & 0xFFL);
-        }
-        //$FALL-THROUGH$
-        case  8: {
-          k1 = unsafe.getLong(uObj, cumOff);
-          break;
-        }
-
-        case  7: {
-          k1 ^= (unsafe.getByte(uObj, cumOff +  6) & 0xFFL) << 48;
-        }
-        //$FALL-THROUGH$
-        case  6: {
-          k1 ^= (unsafe.getShort(uObj, cumOff +  4) & 0xFFFFL) << 32;
-          k1 ^= (unsafe.getInt(uObj, cumOff) & 0xFFFFFFFFL);
-          break;
-        }
-
-        case  5: {
-          k1 ^= (unsafe.getByte(uObj, cumOff +  4) & 0xFFL) << 32;
-        }
-        //$FALL-THROUGH$
-        case  4: {
-          k1 ^= (unsafe.getInt(uObj, cumOff) & 0xFFFFFFFFL);
-          break;
-        }
-
-        case  3: {
-          k1 ^= (unsafe.getByte(uObj, cumOff +  2) & 0xFFL) << 16;
-        }
-        //$FALL-THROUGH$
-        case  2: {
-          k1 ^= (unsafe.getShort(uObj, cumOff) & 0xFFFFL);
-          break;
-        }
-
-        case  1: {
-          k1 ^= (unsafe.getByte(uObj, cumOff) & 0xFFL);
-          break;
-        }
-        //default: break; //can't happen
-      }
-
-      h1 ^= mixK1(k1);
-      h2 ^= mixK2(k2);
-    }
-    return finalMix128(h1, h2, lengthBytes, hashOut);
-  }
-
-  //--Helper methods----------------------------------------------------
-
-  /**
-   * Self mix of k1
-   *
-   * @param k1 input argument
-   * @return mix
-   */
-  private static long mixK1(long k1) {
-    k1 *= C1;
-    k1 = Long.rotateLeft(k1, 31);
-    k1 *= C2;
-    return k1;
-  }
-
-  /**
-   * Self mix of k2
-   *
-   * @param k2 input argument
-   * @return mix
-   */
-  private static long mixK2(long k2) {
-    k2 *= C2;
-    k2 = Long.rotateLeft(k2, 33);
-    k2 *= C1;
-    return k2;
-  }
-
-
-  /**
-   * Final self mix of h*.
-   *
-   * @param h input to final mix
-   * @return mix
-   */
-  private static long finalMix64(long h) {
-    h ^= h >>> 33;
-    h *= 0xff51afd7ed558ccdL;
-    h ^= h >>> 33;
-    h *= 0xc4ceb9fe1a85ec53L;
-    h ^= h >>> 33;
-    return h;
-  }
-
-  /**
-   * Finalization: Add the length into the hash and mix
-   * @param h1 intermediate hash
-   * @param h2 intermediate hash
-   * @param lengthBytes the length in bytes
-   * @param hashOut the output array of 2 longs
-   * @return hashOut
-   */
-  private static long[] finalMix128(long h1, long h2, final long lengthBytes, final long[] hashOut) {
-    h1 ^= lengthBytes;
-    h2 ^= lengthBytes;
-
-    h1 += h2;
-    h2 += h1;
-
-    h1 = finalMix64(h1);
-    h2 = finalMix64(h2);
-
-    h1 += h2;
-    h2 += h1;
-
-    hashOut[0] = h1;
-    hashOut[1] = h2;
-    return hashOut;
-  }
-
-}
diff --git a/src/test/java/org/apache/datasketches/hash/MurmurHash3v2Test.java b/src/test/java/org/apache/datasketches/hash/MurmurHash3v2Test.java
index a5342b8..12b6a90 100644
--- a/src/test/java/org/apache/datasketches/hash/MurmurHash3v2Test.java
+++ b/src/test/java/org/apache/datasketches/hash/MurmurHash3v2Test.java
@@ -28,6 +28,7 @@ import static org.testng.Assert.fail;
 import org.testng.annotations.Test;
 
 import org.apache.datasketches.memory.Memory;
+import org.apache.datasketches.memory.MurmurHash3v2;
 import org.apache.datasketches.memory.WritableMemory;
 
 /**
@@ -138,49 +139,49 @@ public class MurmurHash3v2Test {
   }
   
   private static final long[] hashV1(long[] key, long seed) {
-    return org.apache.datasketches.hash.MurmurHash3.hash(key, seed);
+    return MurmurHash3.hash(key, seed);
   }
   
   private static final long[] hashV1(int[] key, long seed) {
-    return org.apache.datasketches.hash.MurmurHash3.hash(key, seed);
+    return MurmurHash3.hash(key, seed);
   }
 
   private static final long[] hashV1(char[] key, long seed) {
-    return org.apache.datasketches.hash.MurmurHash3.hash(key, seed);
+    return MurmurHash3.hash(key, seed);
   }
   
   private static final long[] hashV1(byte[] key, long seed) {
-    return org.apache.datasketches.hash.MurmurHash3.hash(key, seed);
+    return MurmurHash3.hash(key, seed);
   }
   
   private static final long[] hashV2(long[] key, long seed) {
-    return org.apache.datasketches.hash.MurmurHash3v2.hash(key, seed);
+    return MurmurHash3v2.hash(key, seed);
   }
   
   private static final long[] hashV2(int[] key2, long seed) {
-    return org.apache.datasketches.hash.MurmurHash3v2.hash(key2, seed);
+    return MurmurHash3v2.hash(key2, seed);
   }
 
   private static final long[] hashV2(char[] key, long seed) {
-    return org.apache.datasketches.hash.MurmurHash3v2.hash(key, seed);
+    return MurmurHash3v2.hash(key, seed);
   }
   
   private static final long[] hashV2(byte[] key, long seed) {
-    return org.apache.datasketches.hash.MurmurHash3v2.hash(key, seed);
+    return MurmurHash3v2.hash(key, seed);
   }
   
   //V2 single primitives
   
   private static final long[] hashV2(long key, long seed, long[] out) {
-    return org.apache.datasketches.hash.MurmurHash3v2.hash(key, seed, out);
+    return MurmurHash3v2.hash(key, seed, out);
   }
   
 //  private static final long[] hashV2(double key, long seed, long[] out) {
-//    return org.apache.datasketches.hash.MurmurHash3v2.hash(key, seed, out);
+//    return MurmurHash3v2.hash(key, seed, out);
 //  }
   
 //  private static final long[] hashV2(String key, long seed, long[] out) {
-//    return org.apache.datasketches.hash.MurmurHash3v2.hash(key, seed, out);
+//    return MurmurHash3v2.hash(key, seed, out);
 //  }
   
   

---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@datasketches.apache.org
For additional commands, e-mail: commits-help@datasketches.apache.org