You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@datasketches.apache.org by le...@apache.org on 2021/08/10 23:16:07 UTC

[datasketches-java] branch Memory2 updated: Minor test fixes, moved MurmurHas3v2 sketch to Memory.

This is an automated email from the ASF dual-hosted git repository.

leerho pushed a commit to branch Memory2
in repository https://gitbox.apache.org/repos/asf/datasketches-java.git


The following commit(s) were added to refs/heads/Memory2 by this push:
     new f2ff4b5  Minor test fixes, moved MurmurHas3v2 sketch to Memory.
f2ff4b5 is described below

commit f2ff4b50698ab68776cfd7f557a9431d97eb13a7
Author: Lee Rhodes <le...@users.noreply.github.com>
AuthorDate: Tue Aug 10 16:15:35 2021 -0700

    Minor test fixes, moved MurmurHas3v2 sketch to Memory.
---
 .../apache/datasketches/hash/MurmurHash3v2.java    | 361 ---------------------
 .../org/apache/datasketches/cpc/LzTzSpeedTest.java |   2 +-
 .../datasketches/quantiles/DoublesSketchTest.java  |   2 -
 3 files changed, 1 insertion(+), 364 deletions(-)

diff --git a/src/main/java/org/apache/datasketches/hash/MurmurHash3v2.java b/src/main/java/org/apache/datasketches/hash/MurmurHash3v2.java
deleted file mode 100644
index f7bf205..0000000
--- a/src/main/java/org/apache/datasketches/hash/MurmurHash3v2.java
+++ /dev/null
@@ -1,361 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-package org.apache.datasketches.hash;
-
-import static java.nio.charset.StandardCharsets.UTF_8;
-import static org.apache.datasketches.memory.internal.UnsafeUtil.unsafe;
-
-import org.apache.datasketches.memory.Memory;
-import org.apache.datasketches.memory.WritableMemory;
-
-/**
- * <p>The MurmurHash3 is a fast, non-cryptographic, 128-bit hash function that has
- * excellent avalanche and 2-way bit independence properties.</p>
- *
- * <p>Austin Appleby's C++
- * <a href="https://github.com/aappleby/smhasher/blob/master/src/MurmurHash3.cpp">
- * MurmurHash3_x64_128(...), final revision 150</a>,
- * which is in the Public Domain, was the inspiration for this implementation in Java.</p>
- *
- * <p>This implementation of the MurmurHash3 allows hashing of a block of Memory defined by an offset
- * and length. The calling API also allows the user to supply the small output array of two longs,
- * so that the entire hash function is static and free of object allocations.</p>
- *
- * <p>This implementation produces exactly the same hash result as the
- * {@link MurmurHash3#hash} function given compatible inputs.</p>
- *
- * @author Lee Rhodes
- */
-public final class MurmurHash3v2 {
-  private static final long C1 = 0x87c37b91114253d5L;
-  private static final long C2 = 0x4cf5ad432745937fL;
-
-  //Provided for backward compatibility
-
-  /**
-   * Returns a 128-bit hash of the input.
-   * Provided for compatibility with older version of MurmurHash3,
-   * but empty or null input now returns a hash.
-   * @param in long array
-   * @param seed A long valued seed.
-   * @return the hash
-   */
-  public static long[] hash(final long[] in, final long seed) {
-    if ((in == null) || (in.length == 0)) {
-      return emptyOrNull(seed, new long[2]);
-    }
-    return hash(Memory.wrap(in), 0L, in.length << 3, seed, new long[2]);
-  }
-
-  /**
-   * Returns a 128-bit hash of the input.
-   * Provided for compatibility with older version of MurmurHash3,
-   * but empty or null input now returns a hash.
-   * @param in int array
-   * @param seed A long valued seed.
-   * @return the hash
-   */
-  public static long[] hash(final int[] in, final long seed) {
-    if ((in == null) || (in.length == 0)) {
-      return emptyOrNull(seed, new long[2]);
-    }
-    return hash(Memory.wrap(in), 0L, in.length << 2, seed, new long[2]);
-  }
-
-  /**
-   * Returns a 128-bit hash of the input.
-   * Provided for compatibility with older version of MurmurHash3,
-   * but empty or null input now returns a hash.
-   * @param in char array
-   * @param seed A long valued seed.
-   * @return the hash
-   */
-  public static long[] hash(final char[] in, final long seed) {
-    if ((in == null) || (in.length == 0)) {
-      return emptyOrNull(seed, new long[2]);
-    }
-    return hash(Memory.wrap(in), 0L, in.length << 1, seed, new long[2]);
-  }
-
-  /**
-   * Returns a 128-bit hash of the input.
-   * Provided for compatibility with older version of MurmurHash3,
-   * but empty or null input now returns a hash.
-   * @param in byte array
-   * @param seed A long valued seed.
-   * @return the hash
-   */
-  public static long[] hash(final byte[] in, final long seed) {
-    if ((in == null) || (in.length == 0)) {
-      return emptyOrNull(seed, new long[2]);
-    }
-    return hash(Memory.wrap(in), 0L, in.length, seed, new long[2]);
-  }
-
-  //Single primitive inputs
-
-  /**
-   * Returns a 128-bit hash of the input.
-   * Note the entropy of the resulting hash cannot be more than 64 bits.
-   * @param in a long
-   * @param seed A long valued seed.
-   * @param hashOut A long array of size 2
-   * @return the hash
-   */
-  public static long[] hash(final long in, final long seed, final long[] hashOut) {
-    final long h1 = seed ^ mixK1(in);
-    final long h2 = seed;
-    return finalMix128(h1, h2, 8, hashOut);
-  }
-
-  /**
-   * Returns a 128-bit hash of the input.
-   * Note the entropy of the resulting hash cannot be more than 64 bits.
-   * @param in a double
-   * @param seed A long valued seed.
-   * @param hashOut A long array of size 2
-   * @return the hash
-   */
-  public static long[] hash(final double in, final long seed, final long[] hashOut) {
-    final double d = (in == 0.0) ? 0.0 : in;    // canonicalize -0.0, 0.0
-    final long k1 = Double.doubleToLongBits(d); // canonicalize all NaN forms
-    final long h1 = seed ^ mixK1(k1);
-    final long h2 = seed;
-    return finalMix128(h1, h2, 8, hashOut);
-  }
-
-  /**
-   * Returns a 128-bit hash of the input.
-   * @param in a String
-   * @param seed A long valued seed.
-   * @param hashOut A long array of size 2
-   * @return the hash
-   */
-  public static long[] hash(final String in, final long seed, final long[] hashOut) {
-    if ((in == null) || (in.length() == 0)) {
-      return emptyOrNull(seed, hashOut);
-    }
-    final byte[] byteArr = in.getBytes(UTF_8);
-    return hash(Memory.wrap(byteArr), 0L, byteArr.length, seed, hashOut);
-  }
-
-  //The main API call
-
-  /**
-   * Returns a 128-bit hash of the input as a long array of size 2.
-   *
-   * @param mem The input Memory. Must be non-null and non-empty.
-   * @param offsetBytes the starting point within Memory.
-   * @param lengthBytes the total number of bytes to be hashed.
-   * @param seed A long valued seed.
-   * @param hashOut the size 2 long array for the resulting 128-bit hash
-   * @return the hash.
-   */
-  @SuppressWarnings("restriction")
-  public static long[] hash(final Memory mem, final long offsetBytes, final long lengthBytes,
-      final long seed, final long[] hashOut) {
-    if ((mem == null) || (mem.getCapacity() == 0L)) {
-      return emptyOrNull(seed, hashOut);
-    }
-    final Object uObj = ((WritableMemory) mem).getArray(); //may be null
-    long cumOff = mem.getCumulativeOffset() + offsetBytes;
-
-    long h1 = seed;
-    long h2 = seed;
-    long rem = lengthBytes;
-
-    // Process the 128-bit blocks (the body) into the hash
-    while (rem >= 16L) {
-      final long k1 = unsafe.getLong(uObj, cumOff);     //0, 16, 32, ...
-      final long k2 = unsafe.getLong(uObj, cumOff + 8); //8, 24, 40, ...
-      cumOff += 16L;
-      rem -= 16L;
-
-      h1 ^= mixK1(k1);
-      h1 = Long.rotateLeft(h1, 27);
-      h1 += h2;
-      h1 = (h1 * 5) + 0x52dce729L;
-
-      h2 ^= mixK2(k2);
-      h2 = Long.rotateLeft(h2, 31);
-      h2 += h1;
-      h2 = (h2 * 5) + 0x38495ab5L;
-    }
-
-    // Get the tail (if any): 1 to 15 bytes
-    if (rem > 0L) {
-      long k1 = 0;
-      long k2 = 0;
-      switch ((int) rem) {
-        case 15: {
-          k2 ^= (unsafe.getByte(uObj, cumOff + 14) & 0xFFL) << 48;
-        }
-        //$FALL-THROUGH$
-        case 14: {
-          k2 ^= (unsafe.getShort(uObj, cumOff + 12) & 0xFFFFL) << 32;
-          k2 ^= (unsafe.getInt(uObj, cumOff + 8) & 0xFFFFFFFFL);
-          k1 = unsafe.getLong(uObj, cumOff);
-          break;
-        }
-
-        case 13: {
-          k2 ^= (unsafe.getByte(uObj, cumOff + 12) & 0xFFL) << 32;
-        }
-        //$FALL-THROUGH$
-        case 12: {
-          k2 ^= (unsafe.getInt(uObj, cumOff + 8) & 0xFFFFFFFFL);
-          k1 = unsafe.getLong(uObj, cumOff);
-          break;
-        }
-
-        case 11: {
-          k2 ^= (unsafe.getByte(uObj, cumOff + 10) & 0xFFL) << 16;
-        }
-        //$FALL-THROUGH$
-        case 10: {
-          k2 ^= (unsafe.getShort(uObj, cumOff +  8) & 0xFFFFL);
-          k1 = unsafe.getLong(uObj, cumOff);
-          break;
-        }
-
-        case  9: {
-          k2 ^= (unsafe.getByte(uObj, cumOff +  8) & 0xFFL);
-        }
-        //$FALL-THROUGH$
-        case  8: {
-          k1 = unsafe.getLong(uObj, cumOff);
-          break;
-        }
-
-        case  7: {
-          k1 ^= (unsafe.getByte(uObj, cumOff +  6) & 0xFFL) << 48;
-        }
-        //$FALL-THROUGH$
-        case  6: {
-          k1 ^= (unsafe.getShort(uObj, cumOff +  4) & 0xFFFFL) << 32;
-          k1 ^= (unsafe.getInt(uObj, cumOff) & 0xFFFFFFFFL);
-          break;
-        }
-
-        case  5: {
-          k1 ^= (unsafe.getByte(uObj, cumOff +  4) & 0xFFL) << 32;
-        }
-        //$FALL-THROUGH$
-        case  4: {
-          k1 ^= (unsafe.getInt(uObj, cumOff) & 0xFFFFFFFFL);
-          break;
-        }
-
-        case  3: {
-          k1 ^= (unsafe.getByte(uObj, cumOff +  2) & 0xFFL) << 16;
-        }
-        //$FALL-THROUGH$
-        case  2: {
-          k1 ^= (unsafe.getShort(uObj, cumOff) & 0xFFFFL);
-          break;
-        }
-
-        case  1: {
-          k1 ^= (unsafe.getByte(uObj, cumOff) & 0xFFL);
-          break;
-        }
-        //default: break; //can't happen
-      }
-
-      h1 ^= mixK1(k1);
-      h2 ^= mixK2(k2);
-    }
-    return finalMix128(h1, h2, lengthBytes, hashOut);
-  }
-
-  //--Helper methods----------------------------------------------------
-
-  /**
-   * Self mix of k1
-   *
-   * @param k1 input argument
-   * @return mix
-   */
-  private static long mixK1(long k1) {
-    k1 *= C1;
-    k1 = Long.rotateLeft(k1, 31);
-    k1 *= C2;
-    return k1;
-  }
-
-  /**
-   * Self mix of k2
-   *
-   * @param k2 input argument
-   * @return mix
-   */
-  private static long mixK2(long k2) {
-    k2 *= C2;
-    k2 = Long.rotateLeft(k2, 33);
-    k2 *= C1;
-    return k2;
-  }
-
-
-  /**
-   * Final self mix of h*.
-   *
-   * @param h input to final mix
-   * @return mix
-   */
-  private static long finalMix64(long h) {
-    h ^= h >>> 33;
-    h *= 0xff51afd7ed558ccdL;
-    h ^= h >>> 33;
-    h *= 0xc4ceb9fe1a85ec53L;
-    h ^= h >>> 33;
-    return h;
-  }
-
-  /**
-   * Finalization: Add the length into the hash and mix
-   * @param h1 intermediate hash
-   * @param h2 intermediate hash
-   * @param lengthBytes the length in bytes
-   * @param hashOut the output array of 2 longs
-   * @return hashOut
-   */
-  private static long[] finalMix128(long h1, long h2, final long lengthBytes, final long[] hashOut) {
-    h1 ^= lengthBytes;
-    h2 ^= lengthBytes;
-
-    h1 += h2;
-    h2 += h1;
-
-    h1 = finalMix64(h1);
-    h2 = finalMix64(h2);
-
-    h1 += h2;
-    h2 += h1;
-
-    hashOut[0] = h1;
-    hashOut[1] = h2;
-    return hashOut;
-  }
-
-  private static long[] emptyOrNull(final long seed, final long[] hashOut) {
-    return finalMix128(seed, seed, 0, hashOut);
-  }
-}
diff --git a/src/test/java/org/apache/datasketches/cpc/LzTzSpeedTest.java b/src/test/java/org/apache/datasketches/cpc/LzTzSpeedTest.java
index b134eea..9936609 100644
--- a/src/test/java/org/apache/datasketches/cpc/LzTzSpeedTest.java
+++ b/src/test/java/org/apache/datasketches/cpc/LzTzSpeedTest.java
@@ -164,7 +164,7 @@ public class LzTzSpeedTest {
    * @param s value to print
    */
   static void println(String s) {
-    System.out.println(s); //disable here
+    //System.out.println(s); //disable here
   }
 
   static {
diff --git a/src/test/java/org/apache/datasketches/quantiles/DoublesSketchTest.java b/src/test/java/org/apache/datasketches/quantiles/DoublesSketchTest.java
index bc9638d..cc5e30b 100644
--- a/src/test/java/org/apache/datasketches/quantiles/DoublesSketchTest.java
+++ b/src/test/java/org/apache/datasketches/quantiles/DoublesSketchTest.java
@@ -156,9 +156,7 @@ public class DoublesSketchTest {
         try {
           sketch.update(i);
           if (sketch.isSameResource(mem)) { continue; }
-          System.out.println(i);
         } catch (NullPointerException e) {
-          System.out.println("NPE " + i);
           break;
         }
       }

---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@datasketches.apache.org
For additional commands, e-mail: commits-help@datasketches.apache.org