You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@datasketches.apache.org by le...@apache.org on 2021/08/10 23:16:07 UTC
[datasketches-java] branch Memory2 updated: Minor test fixes,
moved MurmurHas3v2 sketch to Memory.
This is an automated email from the ASF dual-hosted git repository.
leerho pushed a commit to branch Memory2
in repository https://gitbox.apache.org/repos/asf/datasketches-java.git
The following commit(s) were added to refs/heads/Memory2 by this push:
new f2ff4b5 Minor test fixes, moved MurmurHas3v2 sketch to Memory.
f2ff4b5 is described below
commit f2ff4b50698ab68776cfd7f557a9431d97eb13a7
Author: Lee Rhodes <le...@users.noreply.github.com>
AuthorDate: Tue Aug 10 16:15:35 2021 -0700
Minor test fixes, moved MurmurHas3v2 sketch to Memory.
---
.../apache/datasketches/hash/MurmurHash3v2.java | 361 ---------------------
.../org/apache/datasketches/cpc/LzTzSpeedTest.java | 2 +-
.../datasketches/quantiles/DoublesSketchTest.java | 2 -
3 files changed, 1 insertion(+), 364 deletions(-)
diff --git a/src/main/java/org/apache/datasketches/hash/MurmurHash3v2.java b/src/main/java/org/apache/datasketches/hash/MurmurHash3v2.java
deleted file mode 100644
index f7bf205..0000000
--- a/src/main/java/org/apache/datasketches/hash/MurmurHash3v2.java
+++ /dev/null
@@ -1,361 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-package org.apache.datasketches.hash;
-
-import static java.nio.charset.StandardCharsets.UTF_8;
-import static org.apache.datasketches.memory.internal.UnsafeUtil.unsafe;
-
-import org.apache.datasketches.memory.Memory;
-import org.apache.datasketches.memory.WritableMemory;
-
-/**
- * <p>The MurmurHash3 is a fast, non-cryptographic, 128-bit hash function that has
- * excellent avalanche and 2-way bit independence properties.</p>
- *
- * <p>Austin Appleby's C++
- * <a href="https://github.com/aappleby/smhasher/blob/master/src/MurmurHash3.cpp">
- * MurmurHash3_x64_128(...), final revision 150</a>,
- * which is in the Public Domain, was the inspiration for this implementation in Java.</p>
- *
- * <p>This implementation of the MurmurHash3 allows hashing of a block of Memory defined by an offset
- * and length. The calling API also allows the user to supply the small output array of two longs,
- * so that the entire hash function is static and free of object allocations.</p>
- *
- * <p>This implementation produces exactly the same hash result as the
- * {@link MurmurHash3#hash} function given compatible inputs.</p>
- *
- * @author Lee Rhodes
- */
-public final class MurmurHash3v2 {
- private static final long C1 = 0x87c37b91114253d5L;
- private static final long C2 = 0x4cf5ad432745937fL;
-
- //Provided for backward compatibility
-
- /**
- * Returns a 128-bit hash of the input.
- * Provided for compatibility with older version of MurmurHash3,
- * but empty or null input now returns a hash.
- * @param in long array
- * @param seed A long valued seed.
- * @return the hash
- */
- public static long[] hash(final long[] in, final long seed) {
- if ((in == null) || (in.length == 0)) {
- return emptyOrNull(seed, new long[2]);
- }
- return hash(Memory.wrap(in), 0L, in.length << 3, seed, new long[2]);
- }
-
- /**
- * Returns a 128-bit hash of the input.
- * Provided for compatibility with older version of MurmurHash3,
- * but empty or null input now returns a hash.
- * @param in int array
- * @param seed A long valued seed.
- * @return the hash
- */
- public static long[] hash(final int[] in, final long seed) {
- if ((in == null) || (in.length == 0)) {
- return emptyOrNull(seed, new long[2]);
- }
- return hash(Memory.wrap(in), 0L, in.length << 2, seed, new long[2]);
- }
-
- /**
- * Returns a 128-bit hash of the input.
- * Provided for compatibility with older version of MurmurHash3,
- * but empty or null input now returns a hash.
- * @param in char array
- * @param seed A long valued seed.
- * @return the hash
- */
- public static long[] hash(final char[] in, final long seed) {
- if ((in == null) || (in.length == 0)) {
- return emptyOrNull(seed, new long[2]);
- }
- return hash(Memory.wrap(in), 0L, in.length << 1, seed, new long[2]);
- }
-
- /**
- * Returns a 128-bit hash of the input.
- * Provided for compatibility with older version of MurmurHash3,
- * but empty or null input now returns a hash.
- * @param in byte array
- * @param seed A long valued seed.
- * @return the hash
- */
- public static long[] hash(final byte[] in, final long seed) {
- if ((in == null) || (in.length == 0)) {
- return emptyOrNull(seed, new long[2]);
- }
- return hash(Memory.wrap(in), 0L, in.length, seed, new long[2]);
- }
-
- //Single primitive inputs
-
- /**
- * Returns a 128-bit hash of the input.
- * Note the entropy of the resulting hash cannot be more than 64 bits.
- * @param in a long
- * @param seed A long valued seed.
- * @param hashOut A long array of size 2
- * @return the hash
- */
- public static long[] hash(final long in, final long seed, final long[] hashOut) {
- final long h1 = seed ^ mixK1(in);
- final long h2 = seed;
- return finalMix128(h1, h2, 8, hashOut);
- }
-
- /**
- * Returns a 128-bit hash of the input.
- * Note the entropy of the resulting hash cannot be more than 64 bits.
- * @param in a double
- * @param seed A long valued seed.
- * @param hashOut A long array of size 2
- * @return the hash
- */
- public static long[] hash(final double in, final long seed, final long[] hashOut) {
- final double d = (in == 0.0) ? 0.0 : in; // canonicalize -0.0, 0.0
- final long k1 = Double.doubleToLongBits(d); // canonicalize all NaN forms
- final long h1 = seed ^ mixK1(k1);
- final long h2 = seed;
- return finalMix128(h1, h2, 8, hashOut);
- }
-
- /**
- * Returns a 128-bit hash of the input.
- * @param in a String
- * @param seed A long valued seed.
- * @param hashOut A long array of size 2
- * @return the hash
- */
- public static long[] hash(final String in, final long seed, final long[] hashOut) {
- if ((in == null) || (in.length() == 0)) {
- return emptyOrNull(seed, hashOut);
- }
- final byte[] byteArr = in.getBytes(UTF_8);
- return hash(Memory.wrap(byteArr), 0L, byteArr.length, seed, hashOut);
- }
-
- //The main API call
-
- /**
- * Returns a 128-bit hash of the input as a long array of size 2.
- *
- * @param mem The input Memory. Must be non-null and non-empty.
- * @param offsetBytes the starting point within Memory.
- * @param lengthBytes the total number of bytes to be hashed.
- * @param seed A long valued seed.
- * @param hashOut the size 2 long array for the resulting 128-bit hash
- * @return the hash.
- */
- @SuppressWarnings("restriction")
- public static long[] hash(final Memory mem, final long offsetBytes, final long lengthBytes,
- final long seed, final long[] hashOut) {
- if ((mem == null) || (mem.getCapacity() == 0L)) {
- return emptyOrNull(seed, hashOut);
- }
- final Object uObj = ((WritableMemory) mem).getArray(); //may be null
- long cumOff = mem.getCumulativeOffset() + offsetBytes;
-
- long h1 = seed;
- long h2 = seed;
- long rem = lengthBytes;
-
- // Process the 128-bit blocks (the body) into the hash
- while (rem >= 16L) {
- final long k1 = unsafe.getLong(uObj, cumOff); //0, 16, 32, ...
- final long k2 = unsafe.getLong(uObj, cumOff + 8); //8, 24, 40, ...
- cumOff += 16L;
- rem -= 16L;
-
- h1 ^= mixK1(k1);
- h1 = Long.rotateLeft(h1, 27);
- h1 += h2;
- h1 = (h1 * 5) + 0x52dce729L;
-
- h2 ^= mixK2(k2);
- h2 = Long.rotateLeft(h2, 31);
- h2 += h1;
- h2 = (h2 * 5) + 0x38495ab5L;
- }
-
- // Get the tail (if any): 1 to 15 bytes
- if (rem > 0L) {
- long k1 = 0;
- long k2 = 0;
- switch ((int) rem) {
- case 15: {
- k2 ^= (unsafe.getByte(uObj, cumOff + 14) & 0xFFL) << 48;
- }
- //$FALL-THROUGH$
- case 14: {
- k2 ^= (unsafe.getShort(uObj, cumOff + 12) & 0xFFFFL) << 32;
- k2 ^= (unsafe.getInt(uObj, cumOff + 8) & 0xFFFFFFFFL);
- k1 = unsafe.getLong(uObj, cumOff);
- break;
- }
-
- case 13: {
- k2 ^= (unsafe.getByte(uObj, cumOff + 12) & 0xFFL) << 32;
- }
- //$FALL-THROUGH$
- case 12: {
- k2 ^= (unsafe.getInt(uObj, cumOff + 8) & 0xFFFFFFFFL);
- k1 = unsafe.getLong(uObj, cumOff);
- break;
- }
-
- case 11: {
- k2 ^= (unsafe.getByte(uObj, cumOff + 10) & 0xFFL) << 16;
- }
- //$FALL-THROUGH$
- case 10: {
- k2 ^= (unsafe.getShort(uObj, cumOff + 8) & 0xFFFFL);
- k1 = unsafe.getLong(uObj, cumOff);
- break;
- }
-
- case 9: {
- k2 ^= (unsafe.getByte(uObj, cumOff + 8) & 0xFFL);
- }
- //$FALL-THROUGH$
- case 8: {
- k1 = unsafe.getLong(uObj, cumOff);
- break;
- }
-
- case 7: {
- k1 ^= (unsafe.getByte(uObj, cumOff + 6) & 0xFFL) << 48;
- }
- //$FALL-THROUGH$
- case 6: {
- k1 ^= (unsafe.getShort(uObj, cumOff + 4) & 0xFFFFL) << 32;
- k1 ^= (unsafe.getInt(uObj, cumOff) & 0xFFFFFFFFL);
- break;
- }
-
- case 5: {
- k1 ^= (unsafe.getByte(uObj, cumOff + 4) & 0xFFL) << 32;
- }
- //$FALL-THROUGH$
- case 4: {
- k1 ^= (unsafe.getInt(uObj, cumOff) & 0xFFFFFFFFL);
- break;
- }
-
- case 3: {
- k1 ^= (unsafe.getByte(uObj, cumOff + 2) & 0xFFL) << 16;
- }
- //$FALL-THROUGH$
- case 2: {
- k1 ^= (unsafe.getShort(uObj, cumOff) & 0xFFFFL);
- break;
- }
-
- case 1: {
- k1 ^= (unsafe.getByte(uObj, cumOff) & 0xFFL);
- break;
- }
- //default: break; //can't happen
- }
-
- h1 ^= mixK1(k1);
- h2 ^= mixK2(k2);
- }
- return finalMix128(h1, h2, lengthBytes, hashOut);
- }
-
- //--Helper methods----------------------------------------------------
-
- /**
- * Self mix of k1
- *
- * @param k1 input argument
- * @return mix
- */
- private static long mixK1(long k1) {
- k1 *= C1;
- k1 = Long.rotateLeft(k1, 31);
- k1 *= C2;
- return k1;
- }
-
- /**
- * Self mix of k2
- *
- * @param k2 input argument
- * @return mix
- */
- private static long mixK2(long k2) {
- k2 *= C2;
- k2 = Long.rotateLeft(k2, 33);
- k2 *= C1;
- return k2;
- }
-
-
- /**
- * Final self mix of h*.
- *
- * @param h input to final mix
- * @return mix
- */
- private static long finalMix64(long h) {
- h ^= h >>> 33;
- h *= 0xff51afd7ed558ccdL;
- h ^= h >>> 33;
- h *= 0xc4ceb9fe1a85ec53L;
- h ^= h >>> 33;
- return h;
- }
-
- /**
- * Finalization: Add the length into the hash and mix
- * @param h1 intermediate hash
- * @param h2 intermediate hash
- * @param lengthBytes the length in bytes
- * @param hashOut the output array of 2 longs
- * @return hashOut
- */
- private static long[] finalMix128(long h1, long h2, final long lengthBytes, final long[] hashOut) {
- h1 ^= lengthBytes;
- h2 ^= lengthBytes;
-
- h1 += h2;
- h2 += h1;
-
- h1 = finalMix64(h1);
- h2 = finalMix64(h2);
-
- h1 += h2;
- h2 += h1;
-
- hashOut[0] = h1;
- hashOut[1] = h2;
- return hashOut;
- }
-
- private static long[] emptyOrNull(final long seed, final long[] hashOut) {
- return finalMix128(seed, seed, 0, hashOut);
- }
-}
diff --git a/src/test/java/org/apache/datasketches/cpc/LzTzSpeedTest.java b/src/test/java/org/apache/datasketches/cpc/LzTzSpeedTest.java
index b134eea..9936609 100644
--- a/src/test/java/org/apache/datasketches/cpc/LzTzSpeedTest.java
+++ b/src/test/java/org/apache/datasketches/cpc/LzTzSpeedTest.java
@@ -164,7 +164,7 @@ public class LzTzSpeedTest {
* @param s value to print
*/
static void println(String s) {
- System.out.println(s); //disable here
+ //System.out.println(s); //disable here
}
static {
diff --git a/src/test/java/org/apache/datasketches/quantiles/DoublesSketchTest.java b/src/test/java/org/apache/datasketches/quantiles/DoublesSketchTest.java
index bc9638d..cc5e30b 100644
--- a/src/test/java/org/apache/datasketches/quantiles/DoublesSketchTest.java
+++ b/src/test/java/org/apache/datasketches/quantiles/DoublesSketchTest.java
@@ -156,9 +156,7 @@ public class DoublesSketchTest {
try {
sketch.update(i);
if (sketch.isSameResource(mem)) { continue; }
- System.out.println(i);
} catch (NullPointerException e) {
- System.out.println("NPE " + i);
break;
}
}
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@datasketches.apache.org
For additional commands, e-mail: commits-help@datasketches.apache.org