You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by ra...@apache.org on 2019/08/05 06:31:13 UTC
[arrow] branch master updated: ARROW-6030: [Java] Efficiently
compute hash code for ArrowBufPointer
This is an automated email from the ASF dual-hosted git repository.
ravindra pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/master by this push:
new a4480f7 ARROW-6030: [Java] Efficiently compute hash code for ArrowBufPointer
a4480f7 is described below
commit a4480f7ebce639c950695f80bdbea1d94bd3f8f6
Author: liyafan82 <li...@gmail.com>
AuthorDate: Mon Aug 5 12:00:46 2019 +0530
ARROW-6030: [Java] Efficiently compute hash code for ArrowBufPointer
As ArrowBufHasher is introduced, we can compute the hash code of a continuous region within an ArrowBuf.
We optimize the process to make it efficient to avoid recomputation.
Closes #4939 from liyafan82/fly_0725_pthash1 and squashes the following commits:
a73432545 <liyafan82> Efficiently compute hash code for ArrowBufPointer
Authored-by: liyafan82 <li...@gmail.com>
Signed-off-by: Pindikura Ravindra <ra...@dremio.com>
---
.../apache/arrow/memory/util/ArrowBufPointer.java | 64 +++++++++-
.../arrow/memory/util/hash/DirectHasher.java | 5 +
.../arrow/memory/util/TestArrowBufPointer.java | 131 ++++++++++++++++++++-
3 files changed, 197 insertions(+), 3 deletions(-)
diff --git a/java/memory/src/main/java/org/apache/arrow/memory/util/ArrowBufPointer.java b/java/memory/src/main/java/org/apache/arrow/memory/util/ArrowBufPointer.java
index cafe4b0..3d0bf7b 100644
--- a/java/memory/src/main/java/org/apache/arrow/memory/util/ArrowBufPointer.java
+++ b/java/memory/src/main/java/org/apache/arrow/memory/util/ArrowBufPointer.java
@@ -17,6 +17,10 @@
package org.apache.arrow.memory.util;
+import org.apache.arrow.memory.util.hash.ArrowBufHasher;
+import org.apache.arrow.memory.util.hash.DirectHasher;
+import org.apache.arrow.util.Preconditions;
+
import io.netty.buffer.ArrowBuf;
/**
@@ -25,17 +29,40 @@ import io.netty.buffer.ArrowBuf;
*/
public final class ArrowBufPointer {
+ /**
+ * The hash code when the arrow buffer is null.
+ */
+ public static final int NULL_HASH_CODE = 0;
+
private ArrowBuf buf;
private int offset;
private int length;
+ private int hashCode = NULL_HASH_CODE;
+
+ private final ArrowBufHasher hasher;
+
+ /**
+ * A flag indicating if the underlying memory region has changed.
+ */
+ private boolean hashCodeChanged = false;
+
/**
* The default constructor.
*/
public ArrowBufPointer() {
+ this(DirectHasher.INSTANCE);
+ }
+ /**
+ * Constructs an arrow buffer pointer with the specified hasher.
+ * @param hasher the hasher to use.
+ */
+ public ArrowBufPointer(ArrowBufHasher hasher) {
+ Preconditions.checkNotNull(hasher);
+ this.hasher = hasher;
}
/**
@@ -45,6 +72,19 @@ public final class ArrowBufPointer {
* @param length the length off set of the memory region pointed to.
*/
public ArrowBufPointer(ArrowBuf buf, int offset, int length) {
+ this(buf, offset, length, DirectHasher.INSTANCE);
+ }
+
+ /**
+ * Constructs an Arrow buffer pointer.
+ * @param buf the underlying {@link ArrowBuf}, which can be null.
+ * @param offset the start off set of the memory region pointed to.
+ * @param length the length off set of the memory region pointed to.
+ * @param hasher the hasher used to calculate the hash code.
+ */
+ public ArrowBufPointer(ArrowBuf buf, int offset, int length, ArrowBufHasher hasher) {
+ Preconditions.checkNotNull(hasher);
+ this.hasher = hasher;
set(buf, offset, length);
}
@@ -58,6 +98,8 @@ public final class ArrowBufPointer {
this.buf = buf;
this.offset = offset;
this.length = length;
+
+ hashCodeChanged = true;
}
/**
@@ -85,6 +127,13 @@ public final class ArrowBufPointer {
return false;
}
+ if (!hasher.equals(((ArrowBufPointer) o).hasher)) {
+ // note that the hasher is incorporated in equality determination
+ // this is to avoid problems in cases where two Arrow buffer pointers are not equal
+ // while having different hashers and equal hash codes.
+ return false;
+ }
+
ArrowBufPointer other = (ArrowBufPointer) o;
if (buf == null || other.buf == null) {
if (buf == null && other.buf == null) {
@@ -100,7 +149,18 @@ public final class ArrowBufPointer {
@Override
public int hashCode() {
- // implement after ARROW-5898
- throw new UnsupportedOperationException();
+ if (!hashCodeChanged) {
+ return hashCode;
+ }
+
+ // re-compute the hash code
+ if (buf == null) {
+ hashCode = NULL_HASH_CODE;
+ } else {
+ hashCode = hasher.hashCode(buf, offset, length);
+ }
+
+ hashCodeChanged = false;
+ return hashCode;
}
}
diff --git a/java/memory/src/main/java/org/apache/arrow/memory/util/hash/DirectHasher.java b/java/memory/src/main/java/org/apache/arrow/memory/util/hash/DirectHasher.java
index 18f39c3..aa889e6 100644
--- a/java/memory/src/main/java/org/apache/arrow/memory/util/hash/DirectHasher.java
+++ b/java/memory/src/main/java/org/apache/arrow/memory/util/hash/DirectHasher.java
@@ -79,4 +79,9 @@ public class DirectHasher extends ArrowBufHasher {
return hash;
}
+
+ @Override
+ public boolean equals(Object obj) {
+ return obj != null && this.getClass() == obj.getClass();
+ }
}
diff --git a/java/memory/src/test/java/org/apache/arrow/memory/util/TestArrowBufPointer.java b/java/memory/src/test/java/org/apache/arrow/memory/util/TestArrowBufPointer.java
index f9d738e..36b78af 100644
--- a/java/memory/src/test/java/org/apache/arrow/memory/util/TestArrowBufPointer.java
+++ b/java/memory/src/test/java/org/apache/arrow/memory/util/TestArrowBufPointer.java
@@ -17,12 +17,15 @@
package org.apache.arrow.memory.util;
+import static junit.framework.TestCase.assertEquals;
import static junit.framework.TestCase.assertTrue;
-
+import static org.junit.jupiter.api.Assertions.assertFalse;
import org.apache.arrow.memory.BufferAllocator;
import org.apache.arrow.memory.RootAllocator;
+import org.apache.arrow.memory.util.hash.ArrowBufHasher;
+import org.apache.arrow.memory.util.hash.DirectHasher;
import org.junit.After;
import org.junit.Before;
import org.junit.Test;
@@ -67,4 +70,130 @@ public class TestArrowBufPointer {
}
}
}
+
+ @Test
+ public void testArrowBufPointersHashCode() {
+ final int vectorLength = 100;
+ try (ArrowBuf buf1 = allocator.buffer(vectorLength * 4);
+ ArrowBuf buf2 = allocator.buffer(vectorLength * 4)) {
+ for (int i = 0; i < vectorLength; i++) {
+ buf1.setInt(i * 4, i);
+ buf2.setInt(i * 4, i);
+ }
+
+ CounterHasher hasher1 = new CounterHasher();
+ CounterHasher hasher2 = new CounterHasher();
+
+ ArrowBufPointer pointer1 = new ArrowBufPointer(hasher1);
+ assertEquals(ArrowBufPointer.NULL_HASH_CODE, pointer1.hashCode());
+
+ ArrowBufPointer pointer2 = new ArrowBufPointer(hasher2);
+ assertEquals(ArrowBufPointer.NULL_HASH_CODE, pointer2.hashCode());
+
+ for (int i = 0; i < vectorLength; i++) {
+ pointer1.set(buf1, i * 4, 4);
+ pointer2.set(buf2, i * 4, 4);
+
+ assertEquals(pointer1.hashCode(), pointer2.hashCode());
+
+ // verify that the hash codes have been re-computed
+ assertEquals(hasher1.counter, i + 1);
+ assertEquals(hasher2.counter, i + 1);
+ }
+ }
+ }
+
+ @Test
+ public void testNullPointersHashCode() {
+ ArrowBufPointer pointer = new ArrowBufPointer();
+ assertEquals(ArrowBufPointer.NULL_HASH_CODE, pointer.hashCode());
+
+ pointer.set(null, 0, 0);
+ assertEquals(ArrowBufPointer.NULL_HASH_CODE, pointer.hashCode());
+ }
+
+ @Test
+ public void testReuseHashCode() {
+ try (ArrowBuf buf = allocator.buffer(10)) {
+ buf.setInt(0, 10);
+ buf.setInt(4, 20);
+
+ CounterHasher hasher = new CounterHasher();
+ ArrowBufPointer pointer = new ArrowBufPointer(hasher);
+
+ pointer.set(buf, 0, 4);
+ pointer.hashCode();
+
+ // hash code computed
+ assertEquals(1, hasher.counter);
+
+ // no hash code re-compute
+ pointer.hashCode();
+ assertEquals(1, hasher.counter);
+
+ // hash code re-computed
+ pointer.set(buf, 4, 4);
+ pointer.hashCode();
+ assertEquals(2, hasher.counter);
+ }
+ }
+
+ @Test
+ public void testHashersForEquality() {
+ try (ArrowBuf buf = allocator.buffer(10)) {
+ // pointer 1 uses the default hasher
+ ArrowBufPointer pointer1 = new ArrowBufPointer(buf, 0, 10);
+
+ // pointer 2 uses the counter hasher
+ ArrowBufPointer pointer2 = new ArrowBufPointer(buf, 0, 10, new CounterHasher());
+
+ // the two pointers cannot be equal, since they have different hashers
+ assertFalse(pointer1.equals(pointer2));
+ }
+ }
+
+ /**
+ * Hasher with a counter that increments each time a hash code is calculated.
+ * This is to validate that the hash code in {@link ArrowBufPointer} is reused.
+ */
+ class CounterHasher extends ArrowBufHasher {
+
+ protected int counter = 0;
+
+ @Override
+ protected int combineHashCode(int currentHashCode, int newHashCode) {
+ return 0;
+ }
+
+ @Override
+ protected int getByteHashCode(byte byteValue) {
+ return 0;
+ }
+
+ @Override
+ protected int getIntHashCode(int intValue) {
+ return 0;
+ }
+
+ @Override
+ protected int getLongHashCode(long longValue) {
+ return 0;
+ }
+
+ @Override
+ protected int finalizeHashCode(int hashCode) {
+ return 0;
+ }
+
+ @Override
+ public int hashCode(ArrowBuf buf, int offset, int length) {
+ counter += 1;
+ return DirectHasher.INSTANCE.hashCode(buf, offset, length);
+ }
+
+ @Override
+ public boolean equals(Object o) {
+ return o != null && this.getClass() == o.getClass();
+ }
+ }
}