You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@datasketches.apache.org by le...@apache.org on 2021/08/10 22:56:23 UTC
[datasketches-java] branch Memory2 updated: A few simple changes.
This is an automated email from the ASF dual-hosted git repository.
leerho pushed a commit to branch Memory2
in repository https://gitbox.apache.org/repos/asf/datasketches-java.git
The following commit(s) were added to refs/heads/Memory2 by this push:
new 3e2860a A few simple changes.
3e2860a is described below
commit 3e2860aace9a2e1c44e4e44483d7204d8145ea86
Author: Lee Rhodes <le...@users.noreply.github.com>
AuthorDate: Tue Aug 10 15:56:09 2021 -0700
A few simple changes.
---
.../apache/datasketches/hash/MurmurHash3v2.java | 361 +++++++++++++++++++++
.../datasketches/sampling/VarOptItemsSamples.java | 3 +
.../frequencies/SerDeCompatibilityTest.java | 7 +-
.../datasketches/quantiles/DoublesSketchTest.java | 26 +-
4 files changed, 390 insertions(+), 7 deletions(-)
diff --git a/src/main/java/org/apache/datasketches/hash/MurmurHash3v2.java b/src/main/java/org/apache/datasketches/hash/MurmurHash3v2.java
new file mode 100644
index 0000000..f7bf205
--- /dev/null
+++ b/src/main/java/org/apache/datasketches/hash/MurmurHash3v2.java
@@ -0,0 +1,361 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.datasketches.hash;
+
+import static java.nio.charset.StandardCharsets.UTF_8;
+import static org.apache.datasketches.memory.internal.UnsafeUtil.unsafe;
+
+import org.apache.datasketches.memory.Memory;
+import org.apache.datasketches.memory.WritableMemory;
+
+/**
+ * <p>The MurmurHash3 is a fast, non-cryptographic, 128-bit hash function that has
+ * excellent avalanche and 2-way bit independence properties.</p>
+ *
+ * <p>Austin Appleby's C++
+ * <a href="https://github.com/aappleby/smhasher/blob/master/src/MurmurHash3.cpp">
+ * MurmurHash3_x64_128(...), final revision 150</a>,
+ * which is in the Public Domain, was the inspiration for this implementation in Java.</p>
+ *
+ * <p>This implementation of the MurmurHash3 allows hashing of a block of Memory defined by an offset
+ * and length. The calling API also allows the user to supply the small output array of two longs,
+ * so that the entire hash function is static and free of object allocations.</p>
+ *
+ * <p>This implementation produces exactly the same hash result as the
+ * {@link MurmurHash3#hash} function given compatible inputs.</p>
+ *
+ * @author Lee Rhodes
+ */
+public final class MurmurHash3v2 {
+ private static final long C1 = 0x87c37b91114253d5L;
+ private static final long C2 = 0x4cf5ad432745937fL;
+
+ //Provided for backward compatibility
+
+ /**
+ * Returns a 128-bit hash of the input.
+ * Provided for compatibility with older version of MurmurHash3,
+ * but empty or null input now returns a hash.
+ * @param in long array
+ * @param seed A long valued seed.
+ * @return the hash
+ */
+ public static long[] hash(final long[] in, final long seed) {
+ if ((in == null) || (in.length == 0)) {
+ return emptyOrNull(seed, new long[2]);
+ }
+ return hash(Memory.wrap(in), 0L, in.length << 3, seed, new long[2]);
+ }
+
+ /**
+ * Returns a 128-bit hash of the input.
+ * Provided for compatibility with older version of MurmurHash3,
+ * but empty or null input now returns a hash.
+ * @param in int array
+ * @param seed A long valued seed.
+ * @return the hash
+ */
+ public static long[] hash(final int[] in, final long seed) {
+ if ((in == null) || (in.length == 0)) {
+ return emptyOrNull(seed, new long[2]);
+ }
+ return hash(Memory.wrap(in), 0L, in.length << 2, seed, new long[2]);
+ }
+
+ /**
+ * Returns a 128-bit hash of the input.
+ * Provided for compatibility with older version of MurmurHash3,
+ * but empty or null input now returns a hash.
+ * @param in char array
+ * @param seed A long valued seed.
+ * @return the hash
+ */
+ public static long[] hash(final char[] in, final long seed) {
+ if ((in == null) || (in.length == 0)) {
+ return emptyOrNull(seed, new long[2]);
+ }
+ return hash(Memory.wrap(in), 0L, in.length << 1, seed, new long[2]);
+ }
+
+ /**
+ * Returns a 128-bit hash of the input.
+ * Provided for compatibility with older version of MurmurHash3,
+ * but empty or null input now returns a hash.
+ * @param in byte array
+ * @param seed A long valued seed.
+ * @return the hash
+ */
+ public static long[] hash(final byte[] in, final long seed) {
+ if ((in == null) || (in.length == 0)) {
+ return emptyOrNull(seed, new long[2]);
+ }
+ return hash(Memory.wrap(in), 0L, in.length, seed, new long[2]);
+ }
+
+ //Single primitive inputs
+
+ /**
+ * Returns a 128-bit hash of the input.
+ * Note the entropy of the resulting hash cannot be more than 64 bits.
+ * @param in a long
+ * @param seed A long valued seed.
+ * @param hashOut A long array of size 2
+ * @return the hash
+ */
+ public static long[] hash(final long in, final long seed, final long[] hashOut) {
+ final long h1 = seed ^ mixK1(in);
+ final long h2 = seed;
+ return finalMix128(h1, h2, 8, hashOut);
+ }
+
+ /**
+ * Returns a 128-bit hash of the input.
+ * Note the entropy of the resulting hash cannot be more than 64 bits.
+ * @param in a double
+ * @param seed A long valued seed.
+ * @param hashOut A long array of size 2
+ * @return the hash
+ */
+ public static long[] hash(final double in, final long seed, final long[] hashOut) {
+ final double d = (in == 0.0) ? 0.0 : in; // canonicalize -0.0, 0.0
+ final long k1 = Double.doubleToLongBits(d); // canonicalize all NaN forms
+ final long h1 = seed ^ mixK1(k1);
+ final long h2 = seed;
+ return finalMix128(h1, h2, 8, hashOut);
+ }
+
+ /**
+ * Returns a 128-bit hash of the input.
+ * @param in a String
+ * @param seed A long valued seed.
+ * @param hashOut A long array of size 2
+ * @return the hash
+ */
+ public static long[] hash(final String in, final long seed, final long[] hashOut) {
+ if ((in == null) || (in.length() == 0)) {
+ return emptyOrNull(seed, hashOut);
+ }
+ final byte[] byteArr = in.getBytes(UTF_8);
+ return hash(Memory.wrap(byteArr), 0L, byteArr.length, seed, hashOut);
+ }
+
+ //The main API call
+
+ /**
+ * Returns a 128-bit hash of the input as a long array of size 2.
+ *
+ * @param mem The input Memory. Must be non-null and non-empty.
+ * @param offsetBytes the starting point within Memory.
+ * @param lengthBytes the total number of bytes to be hashed.
+ * @param seed A long valued seed.
+ * @param hashOut the size 2 long array for the resulting 128-bit hash
+ * @return the hash.
+ */
+ @SuppressWarnings("restriction")
+ public static long[] hash(final Memory mem, final long offsetBytes, final long lengthBytes,
+ final long seed, final long[] hashOut) {
+ if ((mem == null) || (mem.getCapacity() == 0L)) {
+ return emptyOrNull(seed, hashOut);
+ }
+ final Object uObj = ((WritableMemory) mem).getArray(); //may be null
+ long cumOff = mem.getCumulativeOffset() + offsetBytes;
+
+ long h1 = seed;
+ long h2 = seed;
+ long rem = lengthBytes;
+
+ // Process the 128-bit blocks (the body) into the hash
+ while (rem >= 16L) {
+ final long k1 = unsafe.getLong(uObj, cumOff); //0, 16, 32, ...
+ final long k2 = unsafe.getLong(uObj, cumOff + 8); //8, 24, 40, ...
+ cumOff += 16L;
+ rem -= 16L;
+
+ h1 ^= mixK1(k1);
+ h1 = Long.rotateLeft(h1, 27);
+ h1 += h2;
+ h1 = (h1 * 5) + 0x52dce729L;
+
+ h2 ^= mixK2(k2);
+ h2 = Long.rotateLeft(h2, 31);
+ h2 += h1;
+ h2 = (h2 * 5) + 0x38495ab5L;
+ }
+
+ // Get the tail (if any): 1 to 15 bytes
+ if (rem > 0L) {
+ long k1 = 0;
+ long k2 = 0;
+ switch ((int) rem) {
+ case 15: {
+ k2 ^= (unsafe.getByte(uObj, cumOff + 14) & 0xFFL) << 48;
+ }
+ //$FALL-THROUGH$
+ case 14: {
+ k2 ^= (unsafe.getShort(uObj, cumOff + 12) & 0xFFFFL) << 32;
+ k2 ^= (unsafe.getInt(uObj, cumOff + 8) & 0xFFFFFFFFL);
+ k1 = unsafe.getLong(uObj, cumOff);
+ break;
+ }
+
+ case 13: {
+ k2 ^= (unsafe.getByte(uObj, cumOff + 12) & 0xFFL) << 32;
+ }
+ //$FALL-THROUGH$
+ case 12: {
+ k2 ^= (unsafe.getInt(uObj, cumOff + 8) & 0xFFFFFFFFL);
+ k1 = unsafe.getLong(uObj, cumOff);
+ break;
+ }
+
+ case 11: {
+ k2 ^= (unsafe.getByte(uObj, cumOff + 10) & 0xFFL) << 16;
+ }
+ //$FALL-THROUGH$
+ case 10: {
+ k2 ^= (unsafe.getShort(uObj, cumOff + 8) & 0xFFFFL);
+ k1 = unsafe.getLong(uObj, cumOff);
+ break;
+ }
+
+ case 9: {
+ k2 ^= (unsafe.getByte(uObj, cumOff + 8) & 0xFFL);
+ }
+ //$FALL-THROUGH$
+ case 8: {
+ k1 = unsafe.getLong(uObj, cumOff);
+ break;
+ }
+
+ case 7: {
+ k1 ^= (unsafe.getByte(uObj, cumOff + 6) & 0xFFL) << 48;
+ }
+ //$FALL-THROUGH$
+ case 6: {
+ k1 ^= (unsafe.getShort(uObj, cumOff + 4) & 0xFFFFL) << 32;
+ k1 ^= (unsafe.getInt(uObj, cumOff) & 0xFFFFFFFFL);
+ break;
+ }
+
+ case 5: {
+ k1 ^= (unsafe.getByte(uObj, cumOff + 4) & 0xFFL) << 32;
+ }
+ //$FALL-THROUGH$
+ case 4: {
+ k1 ^= (unsafe.getInt(uObj, cumOff) & 0xFFFFFFFFL);
+ break;
+ }
+
+ case 3: {
+ k1 ^= (unsafe.getByte(uObj, cumOff + 2) & 0xFFL) << 16;
+ }
+ //$FALL-THROUGH$
+ case 2: {
+ k1 ^= (unsafe.getShort(uObj, cumOff) & 0xFFFFL);
+ break;
+ }
+
+ case 1: {
+ k1 ^= (unsafe.getByte(uObj, cumOff) & 0xFFL);
+ break;
+ }
+ //default: break; //can't happen
+ }
+
+ h1 ^= mixK1(k1);
+ h2 ^= mixK2(k2);
+ }
+ return finalMix128(h1, h2, lengthBytes, hashOut);
+ }
+
+ //--Helper methods----------------------------------------------------
+
+ /**
+ * Self mix of k1
+ *
+ * @param k1 input argument
+ * @return mix
+ */
+ private static long mixK1(long k1) {
+ k1 *= C1;
+ k1 = Long.rotateLeft(k1, 31);
+ k1 *= C2;
+ return k1;
+ }
+
+ /**
+ * Self mix of k2
+ *
+ * @param k2 input argument
+ * @return mix
+ */
+ private static long mixK2(long k2) {
+ k2 *= C2;
+ k2 = Long.rotateLeft(k2, 33);
+ k2 *= C1;
+ return k2;
+ }
+
+
+ /**
+ * Final self mix of h*.
+ *
+ * @param h input to final mix
+ * @return mix
+ */
+ private static long finalMix64(long h) {
+ h ^= h >>> 33;
+ h *= 0xff51afd7ed558ccdL;
+ h ^= h >>> 33;
+ h *= 0xc4ceb9fe1a85ec53L;
+ h ^= h >>> 33;
+ return h;
+ }
+
+ /**
+ * Finalization: Add the length into the hash and mix
+ * @param h1 intermediate hash
+ * @param h2 intermediate hash
+ * @param lengthBytes the length in bytes
+ * @param hashOut the output array of 2 longs
+ * @return hashOut
+ */
+ private static long[] finalMix128(long h1, long h2, final long lengthBytes, final long[] hashOut) {
+ h1 ^= lengthBytes;
+ h2 ^= lengthBytes;
+
+ h1 += h2;
+ h2 += h1;
+
+ h1 = finalMix64(h1);
+ h2 = finalMix64(h2);
+
+ h1 += h2;
+ h2 += h1;
+
+ hashOut[0] = h1;
+ hashOut[1] = h2;
+ return hashOut;
+ }
+
+ private static long[] emptyOrNull(final long seed, final long[] hashOut) {
+ return finalMix128(seed, seed, 0, hashOut);
+ }
+}
diff --git a/src/main/java/org/apache/datasketches/sampling/VarOptItemsSamples.java b/src/main/java/org/apache/datasketches/sampling/VarOptItemsSamples.java
index 24614fe..b5b265c 100644
--- a/src/main/java/org/apache/datasketches/sampling/VarOptItemsSamples.java
+++ b/src/main/java/org/apache/datasketches/sampling/VarOptItemsSamples.java
@@ -57,6 +57,7 @@ public class VarOptItemsSamples<T> implements Iterable<VarOptItemsSamples<T>.Wei
/**
* A convenience class to allow easy iterator access to a VarOpt sample.
*/
+ //@SuppressWarnings("synthetic-access")
public final class WeightedSample {
private final int idx_;
private double adjustedWeight_;
@@ -98,6 +99,7 @@ public class VarOptItemsSamples<T> implements Iterable<VarOptItemsSamples<T>.Wei
/**
* The standard iterator
*/
+ //@SuppressWarnings("synthetic-access")
public class VarOptItemsIterator implements Iterator<WeightedSample> {
int currIdx_;
int finalIdx_; // inclusive final index
@@ -147,6 +149,7 @@ public class VarOptItemsSamples<T> implements Iterable<VarOptItemsSamples<T>.Wei
}
}
+ //@SuppressWarnings("synthetic-access")
class WeightCorrectingRRegionIterator extends VarOptItemsIterator {
private double cumWeight = 0.0;
diff --git a/src/test/java/org/apache/datasketches/frequencies/SerDeCompatibilityTest.java b/src/test/java/org/apache/datasketches/frequencies/SerDeCompatibilityTest.java
index 3414598..2e80d3b 100644
--- a/src/test/java/org/apache/datasketches/frequencies/SerDeCompatibilityTest.java
+++ b/src/test/java/org/apache/datasketches/frequencies/SerDeCompatibilityTest.java
@@ -19,12 +19,11 @@
package org.apache.datasketches.frequencies;
-import org.testng.Assert;
-import org.testng.annotations.Test;
-
-import org.apache.datasketches.memory.WritableMemory;
import org.apache.datasketches.ArrayOfItemsSerDe;
import org.apache.datasketches.ArrayOfLongsSerDe;
+import org.apache.datasketches.memory.WritableMemory;
+import org.testng.Assert;
+import org.testng.annotations.Test;
@SuppressWarnings("javadoc")
public class SerDeCompatibilityTest {
diff --git a/src/test/java/org/apache/datasketches/quantiles/DoublesSketchTest.java b/src/test/java/org/apache/datasketches/quantiles/DoublesSketchTest.java
index 32b47e9..bc9638d 100644
--- a/src/test/java/org/apache/datasketches/quantiles/DoublesSketchTest.java
+++ b/src/test/java/org/apache/datasketches/quantiles/DoublesSketchTest.java
@@ -24,11 +24,10 @@ import static org.testng.Assert.assertFalse;
import static org.testng.Assert.assertNull;
import static org.testng.Assert.assertTrue;
-import org.testng.Assert;
-import org.testng.annotations.Test;
-
import org.apache.datasketches.memory.WritableHandle;
import org.apache.datasketches.memory.WritableMemory;
+import org.testng.Assert;
+import org.testng.annotations.Test;
@SuppressWarnings("javadoc")
public class DoublesSketchTest {
@@ -148,6 +147,27 @@ public class DoublesSketchTest {
}
@Test
+ public void directSketchShouldMoveOntoHeapEventually2() {
+ try (WritableHandle wdh = WritableMemory.allocateDirect(50)) {
+ WritableMemory mem = wdh.getWritable();
+ UpdateDoublesSketch sketch = DoublesSketch.builder().build(mem);
+ Assert.assertTrue(sketch.isSameResource(mem));
+ for (int i = 0; i < 1000; i++) {
+ try {
+ sketch.update(i);
+ if (sketch.isSameResource(mem)) { continue; }
+ System.out.println(i);
+ } catch (NullPointerException e) {
+ System.out.println("NPE " + i);
+ break;
+ }
+ }
+ } catch (final Exception e) {
+ throw new RuntimeException(e);
+ }
+ }
+
+ @Test
public void checkEmptyDirect() {
try (WritableHandle wdh = WritableMemory.allocateDirect(1000)) {
WritableMemory mem = wdh.getWritable();
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@datasketches.apache.org
For additional commands, e-mail: commits-help@datasketches.apache.org