You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by ra...@apache.org on 2019/04/15 11:20:04 UTC
[arrow] branch master updated: ARROW-5164: [Gandiva][C++] Introduce
murmur32 for 32 bit types.
This is an automated email from the ASF dual-hosted git repository.
ravindra pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/master by this push:
new 70db333 ARROW-5164: [Gandiva][C++] Introduce murmur32 for 32 bit types.
70db333 is described below
commit 70db3335dcd7de3726950635b8d25a58dd9c47be
Author: Praveen <pr...@dremio.com>
AuthorDate: Mon Apr 15 16:49:39 2019 +0530
ARROW-5164: [Gandiva][C++] Introduce murmur32 for 32 bit types.
- Use 32 bit hash functions for 32 bit types instead of truncating the 64 bit hash.
Author: Praveen <pr...@dremio.com>
Closes #4153 from praveenbingo/ARROW-5164 and squashes the following commits:
eb15a331 <Praveen> Fix gcc issues.
8c369b4e <Praveen> Fix lint issues.
73b41a08 <Praveen> ARROW-5164: Introduce murmur32 for 32 bit types.
---
cpp/src/gandiva/precompiled/hash.cc | 100 +++++++++++++++++++++++++++++++++++-
1 file changed, 98 insertions(+), 2 deletions(-)
diff --git a/cpp/src/gandiva/precompiled/hash.cc b/cpp/src/gandiva/precompiled/hash.cc
index 4ab942c..bd884a9 100644
--- a/cpp/src/gandiva/precompiled/hash.cc
+++ b/cpp/src/gandiva/precompiled/hash.cc
@@ -72,6 +72,41 @@ static inline uint64 murmur3_64(uint64 val, int32 seed) {
return h1;
}
+static inline uint32 murmur3_32(uint64 val, int32 seed) {
+ uint64 c1 = 0xcc9e2d51ull;
+ uint64 c2 = 0x1b873593ull;
+ int length = 8;
+ static uint64 UINT_MASK = 0xffffffffull;
+ uint64 lh1 = seed & UINT_MASK;
+ for (int i = 0; i < 2; i++) {
+ uint64 lk1 = ((val >> i * 32) & UINT_MASK);
+ lk1 *= c1;
+ lk1 &= UINT_MASK;
+
+ lk1 = ((lk1 << 15) & UINT_MASK) | (lk1 >> 17);
+
+ lk1 *= c2;
+ lk1 &= UINT_MASK;
+
+ lh1 ^= lk1;
+ lh1 = ((lh1 << 13) & UINT_MASK) | (lh1 >> 19);
+
+ lh1 = lh1 * 5 + 0xe6546b64L;
+ lh1 = UINT_MASK & lh1;
+ }
+ lh1 ^= length;
+
+ lh1 ^= lh1 >> 16;
+ lh1 *= 0x85ebca6bull;
+ lh1 = UINT_MASK & lh1;
+ lh1 ^= lh1 >> 13;
+ lh1 *= 0xc2b2ae35ull;
+ lh1 = UINT_MASK & lh1;
+ lh1 ^= lh1 >> 16;
+
+ return static_cast<uint32>(lh1);
+}
+
static inline uint64 double_to_long_bits(double value) {
uint64 result;
memcpy(&result, &value, sizeof(result));
@@ -83,7 +118,7 @@ FORCE_INLINE int64 hash64(double val, int64 seed) {
}
FORCE_INLINE int32 hash32(double val, int32 seed) {
- return static_cast<int32>(murmur3_64(double_to_long_bits(val), seed));
+ return murmur3_32(double_to_long_bits(val), seed);
}
// Wrappers for all the numeric/data/time arrow types
@@ -229,12 +264,73 @@ static inline uint64 murmur3_64_buf(const uint8* key, int32 len, int32 seed) {
return h1;
}
+static uint32 murmur3_32_buf(const uint8* key, int32 len, int32 seed) {
+ static const uint64 c1 = 0xcc9e2d51ull;
+ static const uint64 c2 = 0x1b873593ull;
+ static const uint64 UINT_MASK = 0xffffffffull;
+ uint64 lh1 = seed;
+ const uint32* blocks = reinterpret_cast<const uint32*>(key);
+ int nblocks = len / 4;
+ const uint8* tail = reinterpret_cast<const uint8*>(key + nblocks * 4);
+ for (int i = 0; i < nblocks; i++) {
+ uint64 lk1 = static_cast<uint64>(blocks[i]);
+
+ // k1 *= c1;
+ lk1 *= c1;
+ lk1 &= UINT_MASK;
+
+ lk1 = ((lk1 << 15) & UINT_MASK) | (lk1 >> 17);
+
+ lk1 *= c2;
+ lk1 = lk1 & UINT_MASK;
+ lh1 ^= lk1;
+ lh1 = ((lh1 << 13) & UINT_MASK) | (lh1 >> 19);
+
+ lh1 = lh1 * 5 + 0xe6546b64ull;
+ lh1 = UINT_MASK & lh1;
+ }
+
+ // tail
+ uint64 lk1 = 0;
+
+ switch (len & 3) {
+ case 3:
+ lk1 = (tail[2] & 0xff) << 16;
+ case 2:
+ lk1 |= (tail[1] & 0xff) << 8;
+ case 1:
+ lk1 |= (tail[0] & 0xff);
+ lk1 *= c1;
+ lk1 = UINT_MASK & lk1;
+ lk1 = ((lk1 << 15) & UINT_MASK) | (lk1 >> 17);
+
+ lk1 *= c2;
+ lk1 = lk1 & UINT_MASK;
+
+ lh1 ^= lk1;
+ }
+
+ // finalization
+ lh1 ^= len;
+
+ lh1 ^= lh1 >> 16;
+ lh1 *= 0x85ebca6b;
+ lh1 = UINT_MASK & lh1;
+ lh1 ^= lh1 >> 13;
+
+ lh1 *= 0xc2b2ae35;
+ lh1 = UINT_MASK & lh1;
+ lh1 ^= lh1 >> 16;
+
+ return static_cast<uint32>(lh1 & UINT_MASK);
+}
+
FORCE_INLINE int64 hash64_buf(const uint8* buf, int len, int64 seed) {
return murmur3_64_buf(buf, len, static_cast<int32>(seed));
}
FORCE_INLINE int32 hash32_buf(const uint8* buf, int len, int32 seed) {
- return static_cast<int32>(murmur3_64_buf(buf, len, seed));
+ return murmur3_32_buf(buf, len, seed);
}
// Wrappers for the varlen types