You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by ra...@apache.org on 2019/06/26 01:33:24 UTC
[arrow] branch master updated: ARROW-5661: [Gandiva] [C++] support
hash functions for decimals in gandiva
This is an automated email from the ASF dual-hosted git repository.
ravindra pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/master by this push:
new e2c3508 ARROW-5661: [Gandiva] [C++] support hash functions for decimals in gandiva
e2c3508 is described below
commit e2c35089b8b15e715ad004371ec8547abbb7a170
Author: Prudhvi Porandla <pr...@icloud.com>
AuthorDate: Wed Jun 26 07:02:58 2019 +0530
ARROW-5661: [Gandiva] [C++] support hash functions for decimals in gandiva
1. change hash functions to match java implementation
2. hash functions for decimals
3. isnull/isnotnull, indistinct/isnotdistinct, isnumeric for decimals
Author: Prudhvi Porandla <pr...@icloud.com>
Closes #4618 from pprudhvi/decimal-hash and squashes the following commits:
2db61e981 <Prudhvi Porandla> use EXPECT_ARROW_ARRAY_EQUALS
db7cc479d <Prudhvi Porandla> clang-format
d089ec90e <Prudhvi Porandla> Merge branch 'master' of https://github.com/apache/arrow into decimal-hash
c7ea71b0a <Prudhvi Porandla> run clang-format
9eeb24559 <Prudhvi Porandla> remove wrong scale unittest
5c525e1c7 <Prudhvi Porandla> Merge branch 'master' of https://github.com/apache/arrow into decimal-hash
5f8924524 <Prudhvi Porandla> 1. test for chained hashes 2. seed is always valid
6be7e4738 <Prudhvi Porandla> add test for isnull, isdistinct
bbb257847 <Prudhvi Porandla> Merge branch 'master' of https://github.com/apache/arrow into decimal-hash
2b0db5797 <Prudhvi Porandla> Merge branch 'master' of https://github.com/apache/arrow into decimal-hash
6e5464322 <Prudhvi Porandla> change hashWithSeed behaviour when seed or input is not set; variable name changes
eb11f971f <Prudhvi Porandla> decimal is numeric only if validity bit is set
225cbcbe7 <Prudhvi Porandla> Merge branch 'master' of https://github.com/apache/arrow into decimal-hash
c8289bbd3 <Prudhvi Porandla> use equals in is_distinct
8c6c4bb04 <Prudhvi Porandla> Merge branch 'master' of https://github.com/apache/arrow into decimal-hash
3f3820015 <Prudhvi Porandla> add isnull/isnotnull, isnumeric, isdistinct/isnotdistinct to decimal type
e45bc6093 <Prudhvi Porandla> add data, seed validity parameters
1e419b2dc <Prudhvi Porandla> change function names
e6f6c487e <Prudhvi Porandla> Merge branch 'master' of https://github.com/apache/arrow into decimal-hash
c3abc22e9 <Prudhvi Porandla> correct function names in decimal_wrapper
c0baa0284 <Prudhvi Porandla> correct function names in ir
ad5d500dd <Prudhvi Porandla> add tests for decimal hash functions
918c0231f <Prudhvi Porandla> hash functions for decimal
---
cpp/src/gandiva/decimal_ir.cc | 134 +++++++++++
cpp/src/gandiva/function_registry_common.h | 2 +-
cpp/src/gandiva/precompiled/decimal_wrapper.cc | 127 ++++++++++
cpp/src/gandiva/precompiled/hash.cc | 40 ++--
cpp/src/gandiva/tests/decimal_test.cc | 307 +++++++++++++++++++++++++
cpp/src/gandiva/tests/hash_test.cc | 4 +-
6 files changed, 595 insertions(+), 19 deletions(-)
diff --git a/cpp/src/gandiva/decimal_ir.cc b/cpp/src/gandiva/decimal_ir.cc
index bbd437d..6e4bb56 100644
--- a/cpp/src/gandiva/decimal_ir.cc
+++ b/cpp/src/gandiva/decimal_ir.cc
@@ -557,6 +557,7 @@ Status DecimalIR::AddFunctions(Engine* engine) {
auto decimal_ir = std::make_shared<DecimalIR>(engine);
auto i128 = decimal_ir->types()->i128_type();
auto i32 = decimal_ir->types()->i32_type();
+ auto i1 = decimal_ir->types()->i1_type();
auto i64 = decimal_ir->types()->i64_type();
auto f64 = decimal_ir->types()->double_type();
@@ -688,6 +689,139 @@ Status DecimalIR::AddFunctions(Engine* engine) {
{"x_precision", i32},
{"x_scale", i32},
}));
+
+ ARROW_RETURN_NOT_OK(decimal_ir->BuildDecimalFunction("hash_decimal128", i32,
+ {
+ {"x_value", i128},
+ {"x_precision", i32},
+ {"x_scale", i32},
+ {"x_isvalid", i1},
+ }));
+
+ ARROW_RETURN_NOT_OK(decimal_ir->BuildDecimalFunction("hash32_decimal128", i32,
+ {
+ {"x_value", i128},
+ {"x_precision", i32},
+ {"x_scale", i32},
+ {"x_isvalid", i1},
+ }));
+
+ ARROW_RETURN_NOT_OK(decimal_ir->BuildDecimalFunction("hash64_decimal128", i64,
+ {
+ {"x_value", i128},
+ {"x_precision", i32},
+ {"x_scale", i32},
+ {"x_isvalid", i1},
+ }));
+
+ ARROW_RETURN_NOT_OK(decimal_ir->BuildDecimalFunction("hash32WithSeed_decimal128", i32,
+ {
+ {"x_value", i128},
+ {"x_precision", i32},
+ {"x_scale", i32},
+ {"x_isvalid", i1},
+ {"seed", i32},
+ {"seed_isvalid", i1},
+ }));
+
+ ARROW_RETURN_NOT_OK(decimal_ir->BuildDecimalFunction("hash64WithSeed_decimal128", i64,
+ {
+ {"x_value", i128},
+ {"x_precision", i32},
+ {"x_scale", i32},
+ {"x_isvalid", i1},
+ {"seed", i64},
+ {"seed_isvalid", i1},
+ }));
+
+ ARROW_RETURN_NOT_OK(decimal_ir->BuildDecimalFunction("hash32AsDouble_decimal128", i32,
+ {
+ {"x_value", i128},
+ {"x_precision", i32},
+ {"x_scale", i32},
+ {"x_isvalid", i1},
+ }));
+
+ ARROW_RETURN_NOT_OK(decimal_ir->BuildDecimalFunction("hash64AsDouble_decimal128", i64,
+ {
+ {"x_value", i128},
+ {"x_precision", i32},
+ {"x_scale", i32},
+ {"x_isvalid", i1},
+ }));
+
+ ARROW_RETURN_NOT_OK(
+ decimal_ir->BuildDecimalFunction("hash32AsDoubleWithSeed_decimal128", i32,
+ {
+ {"x_value", i128},
+ {"x_precision", i32},
+ {"x_scale", i32},
+ {"x_isvalid", i1},
+ {"seed", i32},
+ {"seed_isvalid", i1},
+ }));
+
+ ARROW_RETURN_NOT_OK(
+ decimal_ir->BuildDecimalFunction("hash64AsDoubleWithSeed_decimal128", i64,
+ {
+ {"x_value", i128},
+ {"x_precision", i32},
+ {"x_scale", i32},
+ {"x_isvalid", i1},
+ {"seed", i64},
+ {"seed_isvalid", i1},
+ }));
+
+ ARROW_RETURN_NOT_OK(decimal_ir->BuildDecimalFunction("isnull_decimal128", i1,
+ {
+ {"x_value", i128},
+ {"x_precision", i32},
+ {"x_scale", i32},
+ {"x_isvalid", i1},
+ }));
+
+ ARROW_RETURN_NOT_OK(decimal_ir->BuildDecimalFunction("isnotnull_decimal128", i1,
+ {
+ {"x_value", i128},
+ {"x_precision", i32},
+ {"x_scale", i32},
+ {"x_isvalid", i1},
+ }));
+
+ ARROW_RETURN_NOT_OK(decimal_ir->BuildDecimalFunction("isnumeric_decimal128", i1,
+ {
+ {"x_value", i128},
+ {"x_precision", i32},
+ {"x_scale", i32},
+ {"x_isvalid", i1},
+ }));
+
+ ARROW_RETURN_NOT_OK(
+ decimal_ir->BuildDecimalFunction("is_distinct_from_decimal128_decimal128", i1,
+ {
+ {"x_value", i128},
+ {"x_precision", i32},
+ {"x_scale", i32},
+ {"x_isvalid", i1},
+ {"y_value", i128},
+ {"y_precision", i32},
+ {"y_scale", i32},
+ {"y_isvalid", i1},
+ }));
+
+ ARROW_RETURN_NOT_OK(
+ decimal_ir->BuildDecimalFunction("is_not_distinct_from_decimal128_decimal128", i1,
+ {
+ {"x_value", i128},
+ {"x_precision", i32},
+ {"x_scale", i32},
+ {"x_isvalid", i1},
+ {"y_value", i128},
+ {"y_precision", i32},
+ {"y_scale", i32},
+ {"y_isvalid", i1},
+ }));
+
return Status::OK();
}
diff --git a/cpp/src/gandiva/function_registry_common.h b/cpp/src/gandiva/function_registry_common.h
index a2ca271..f6a3d14 100644
--- a/cpp/src/gandiva/function_registry_common.h
+++ b/cpp/src/gandiva/function_registry_common.h
@@ -190,7 +190,7 @@ typedef std::unordered_map<const FunctionSignature*, const NativeFunction*, KeyH
#define NUMERIC_TYPES(INNER, NAME) \
INNER(NAME, int8), INNER(NAME, int16), INNER(NAME, int32), INNER(NAME, int64), \
INNER(NAME, uint8), INNER(NAME, uint16), INNER(NAME, uint32), INNER(NAME, uint64), \
- INNER(NAME, float32), INNER(NAME, float64)
+ INNER(NAME, float32), INNER(NAME, float64), INNER(NAME, decimal128)
// Iterate the inner macro over numeric and date/time types
#define NUMERIC_DATE_TYPES(INNER, NAME) \
diff --git a/cpp/src/gandiva/precompiled/decimal_wrapper.cc b/cpp/src/gandiva/precompiled/decimal_wrapper.cc
index 02ab915..630fe8b 100644
--- a/cpp/src/gandiva/precompiled/decimal_wrapper.cc
+++ b/cpp/src/gandiva/precompiled/decimal_wrapper.cc
@@ -231,4 +231,131 @@ void castDECIMAL_decimal128_internal(int64_t x_high, uint64_t x_low, int32_t x_p
*out_low = out.low_bits();
}
+FORCE_INLINE
+int32_t hash32_decimal128_internal(int64_t x_high, uint64_t x_low, int32_t x_precision,
+ int32_t x_scale, boolean x_isvalid) {
+ return x_isvalid
+ ? hash32_buf(gandiva::BasicDecimal128(x_high, x_low).ToBytes().data(), 16, 0)
+ : 0;
+}
+
+FORCE_INLINE
+int32_t hash_decimal128_internal(int64_t x_high, uint64_t x_low, int32_t x_precision,
+ int32_t x_scale, boolean x_isvalid) {
+ return hash32_decimal128_internal(x_high, x_low, x_precision, x_scale, x_isvalid);
+}
+
+FORCE_INLINE
+int64_t hash64_decimal128_internal(int64_t x_high, uint64_t x_low, int32_t x_precision,
+ int32_t x_scale, boolean x_isvalid) {
+ return x_isvalid
+ ? hash64_buf(gandiva::BasicDecimal128(x_high, x_low).ToBytes().data(), 16, 0)
+ : 0;
+}
+
+FORCE_INLINE
+int32_t hash32WithSeed_decimal128_internal(int64_t x_high, uint64_t x_low,
+ int32_t x_precision, int32_t x_scale,
+ boolean x_isvalid, int32_t seed,
+ boolean seed_isvalid) {
+ if (!x_isvalid) {
+ return seed;
+ }
+ return hash32_buf(gandiva::BasicDecimal128(x_high, x_low).ToBytes().data(), 16, seed);
+}
+
+FORCE_INLINE
+int64_t hash64WithSeed_decimal128_internal(int64_t x_high, uint64_t x_low,
+ int32_t x_precision, int32_t x_scale,
+ boolean x_isvalid, int64_t seed,
+ boolean seed_isvalid) {
+ if (!x_isvalid) {
+ return seed;
+ }
+ return hash64_buf(gandiva::BasicDecimal128(x_high, x_low).ToBytes().data(), 16, seed);
+}
+
+FORCE_INLINE
+int32_t hash32AsDouble_decimal128_internal(int64_t x_high, uint64_t x_low,
+ int32_t x_precision, int32_t x_scale,
+ boolean x_isvalid) {
+ return x_isvalid
+ ? hash32_buf(gandiva::BasicDecimal128(x_high, x_low).ToBytes().data(), 16, 0)
+ : 0;
+}
+
+FORCE_INLINE
+int64_t hash64AsDouble_decimal128_internal(int64_t x_high, uint64_t x_low,
+ int32_t x_precision, int32_t x_scale,
+ boolean x_isvalid) {
+ return x_isvalid
+ ? hash64_buf(gandiva::BasicDecimal128(x_high, x_low).ToBytes().data(), 16, 0)
+ : 0;
+}
+
+FORCE_INLINE
+int32_t hash32AsDoubleWithSeed_decimal128_internal(int64_t x_high, uint64_t x_low,
+ int32_t x_precision, int32_t x_scale,
+ boolean x_isvalid, int32_t seed,
+ boolean seed_isvalid) {
+ if (!x_isvalid) {
+ return seed;
+ }
+ return hash32_buf(gandiva::BasicDecimal128(x_high, x_low).ToBytes().data(), 16, seed);
+}
+
+FORCE_INLINE
+int64_t hash64AsDoubleWithSeed_decimal128_internal(int64_t x_high, uint64_t x_low,
+ int32_t x_precision, int32_t x_scale,
+ boolean x_isvalid, int64_t seed,
+ boolean seed_isvalid) {
+ if (!x_isvalid) {
+ return seed;
+ }
+ return hash64_buf(gandiva::BasicDecimal128(x_high, x_low).ToBytes().data(), 16, seed);
+}
+
+FORCE_INLINE
+boolean isnull_decimal128_internal(int64_t x_high, uint64_t x_low, int32_t x_precision,
+ int32_t x_scale, boolean x_isvalid) {
+ return !x_isvalid;
+}
+
+FORCE_INLINE
+boolean isnotnull_decimal128_internal(int64_t x_high, uint64_t x_low, int32_t x_precision,
+ int32_t x_scale, boolean x_isvalid) {
+ return x_isvalid;
+}
+
+FORCE_INLINE
+boolean isnumeric_decimal128_internal(int64_t x_high, uint64_t x_low, int32_t x_precision,
+ int32_t x_scale, boolean x_isvalid) {
+ return x_isvalid;
+}
+
+FORCE_INLINE
+boolean is_not_distinct_from_decimal128_decimal128_internal(
+ int64_t x_high, uint64_t x_low, int32_t x_precision, int32_t x_scale,
+ boolean x_isvalid, int64_t y_high, uint64_t y_low, int32_t y_precision,
+ int32_t y_scale, boolean y_isvalid) {
+ if (x_isvalid != y_isvalid) {
+ return false;
+ }
+ if (!x_isvalid) {
+ return true;
+ }
+ return 0 == compare_internal_decimal128_decimal128(x_high, x_low, x_precision, x_scale,
+ y_high, y_low, y_precision, y_scale);
+}
+
+FORCE_INLINE
+boolean is_distinct_from_decimal128_decimal128_internal(
+ int64_t x_high, uint64_t x_low, int32_t x_precision, int32_t x_scale,
+ boolean x_isvalid, int64_t y_high, uint64_t y_low, int32_t y_precision,
+ int32_t y_scale, boolean y_isvalid) {
+ return !is_not_distinct_from_decimal128_decimal128_internal(
+ x_high, x_low, x_precision, x_scale, x_isvalid, y_high, y_low, y_precision, y_scale,
+ y_isvalid);
+}
+
} // extern "C"
diff --git a/cpp/src/gandiva/precompiled/hash.cc b/cpp/src/gandiva/precompiled/hash.cc
index bd884a9..073a909 100644
--- a/cpp/src/gandiva/precompiled/hash.cc
+++ b/cpp/src/gandiva/precompiled/hash.cc
@@ -126,13 +126,19 @@ FORCE_INLINE int32 hash32(double val, int32 seed) {
#define HASH64_WITH_SEED_OP(NAME, TYPE) \
FORCE_INLINE \
int64 NAME##_##TYPE(TYPE in, boolean is_valid, int64 seed, boolean seed_isvalid) { \
- return is_valid && seed_isvalid ? hash64(static_cast<double>(in), seed) : 0; \
+ if (!is_valid) { \
+ return seed; \
+ } \
+ return hash64(static_cast<double>(in), seed); \
}
#define HASH32_WITH_SEED_OP(NAME, TYPE) \
FORCE_INLINE \
int32 NAME##_##TYPE(TYPE in, boolean is_valid, int32 seed, boolean seed_isvalid) { \
- return is_valid && seed_isvalid ? hash32(static_cast<double>(in), seed) : 0; \
+ if (!is_valid) { \
+ return seed; \
+ } \
+ return hash32(static_cast<double>(in), seed); \
}
#define HASH64_OP(NAME, TYPE) \
@@ -335,22 +341,24 @@ FORCE_INLINE int32 hash32_buf(const uint8* buf, int len, int32 seed) {
// Wrappers for the varlen types
-#define HASH64_BUF_WITH_SEED_OP(NAME, TYPE) \
- FORCE_INLINE \
- int64 NAME##_##TYPE(TYPE in, int32 len, boolean is_valid, int64 seed, \
- boolean seed_isvalid) { \
- return is_valid && seed_isvalid \
- ? hash64_buf(reinterpret_cast<const uint8_t*>(in), len, seed) \
- : 0; \
+#define HASH64_BUF_WITH_SEED_OP(NAME, TYPE) \
+ FORCE_INLINE \
+ int64 NAME##_##TYPE(TYPE in, int32 len, boolean is_valid, int64 seed, \
+ boolean seed_isvalid) { \
+ if (!is_valid) { \
+ return seed; \
+ } \
+ return hash64_buf(reinterpret_cast<const uint8_t*>(in), len, seed); \
}
-#define HASH32_BUF_WITH_SEED_OP(NAME, TYPE) \
- FORCE_INLINE \
- int32 NAME##_##TYPE(TYPE in, int32 len, boolean is_valid, int32 seed, \
- boolean seed_isvalid) { \
- return is_valid && seed_isvalid \
- ? hash32_buf(reinterpret_cast<const uint8_t*>(in), len, seed) \
- : 0; \
+#define HASH32_BUF_WITH_SEED_OP(NAME, TYPE) \
+ FORCE_INLINE \
+ int32 NAME##_##TYPE(TYPE in, int32 len, boolean is_valid, int32 seed, \
+ boolean seed_isvalid) { \
+ if (!is_valid) { \
+ return seed; \
+ } \
+ return hash32_buf(reinterpret_cast<const uint8_t*>(in), len, seed); \
}
#define HASH64_BUF_OP(NAME, TYPE) \
diff --git a/cpp/src/gandiva/tests/decimal_test.cc b/cpp/src/gandiva/tests/decimal_test.cc
index 5fa32f1..9941fea 100644
--- a/cpp/src/gandiva/tests/decimal_test.cc
+++ b/cpp/src/gandiva/tests/decimal_test.cc
@@ -480,4 +480,311 @@ TEST_F(TestDecimal, TestCastFunctions) {
EXPECT_ARROW_ARRAY_EQUALS(array_float64, outputs[4]);
}
+// isnull, isnumeric
+TEST_F(TestDecimal, TestIsNullNumericFunctions) {
+ // schema for input fields
+ constexpr int32_t precision = 38;
+ constexpr int32_t scale = 2;
+ auto decimal_type = std::make_shared<arrow::Decimal128Type>(precision, scale);
+ auto field_dec = field("dec", decimal_type);
+ auto schema = arrow::schema({field_dec});
+
+ // build expressions
+ auto exprs = std::vector<ExpressionPtr>{
+ TreeExprBuilder::MakeExpression("isnull", {field_dec},
+ field("isnull", arrow::boolean())),
+
+ TreeExprBuilder::MakeExpression("isnotnull", {field_dec},
+ field("isnotnull", arrow::boolean())),
+ TreeExprBuilder::MakeExpression("isnumeric", {field_dec},
+ field("isnumeric", arrow::boolean()))};
+
+ // Build a projector for the expression.
+ std::shared_ptr<Projector> projector;
+ auto status = Projector::Make(schema, exprs, TestConfiguration(), &projector);
+ DCHECK_OK(status);
+
+ // Create a row-batch with some sample data
+ int num_records = 5;
+ auto validity = {false, true, true, true, false};
+
+ auto array_dec = MakeArrowArrayDecimal(
+ decimal_type, MakeDecimalVector({"1.51", "1.23", "1.23", "-1.23", "-1.24"}, scale),
+ validity);
+
+ // prepare input record batch
+ auto in_batch = arrow::RecordBatch::Make(schema, num_records, {array_dec});
+
+ // Evaluate expression
+ arrow::ArrayVector outputs;
+ status = projector->Evaluate(*in_batch, pool_, &outputs);
+ DCHECK_OK(status);
+
+ // Validate results
+ auto is_null = outputs.at(0);
+ auto is_not_null = outputs.at(1);
+ auto is_numeric = outputs.at(2);
+
+ // isnull
+ EXPECT_ARROW_ARRAY_EQUALS(MakeArrowArrayBool({true, false, false, false, true}),
+ outputs[0]);
+
+ // isnotnull
+ EXPECT_ARROW_ARRAY_EQUALS(MakeArrowArrayBool(validity), outputs[1]);
+
+ // isnumeric
+ EXPECT_ARROW_ARRAY_EQUALS(MakeArrowArrayBool(validity), outputs[2]);
+}
+
+TEST_F(TestDecimal, TestIsDistinct) {
+ // schema for input fields
+ constexpr int32_t precision = 38;
+ constexpr int32_t scale_1 = 2;
+ auto decimal_type_1 = std::make_shared<arrow::Decimal128Type>(precision, scale_1);
+ auto field_dec_1 = field("dec_1", decimal_type_1);
+ constexpr int32_t scale_2 = 1;
+ auto decimal_type_2 = std::make_shared<arrow::Decimal128Type>(precision, scale_2);
+ auto field_dec_2 = field("dec_2", decimal_type_2);
+
+ auto schema = arrow::schema({field_dec_1, field_dec_2});
+
+ // build expressions
+ auto exprs = std::vector<ExpressionPtr>{
+ TreeExprBuilder::MakeExpression("is_distinct_from", {field_dec_1, field_dec_2},
+ field("isdistinct", arrow::boolean())),
+
+ TreeExprBuilder::MakeExpression("is_not_distinct_from", {field_dec_1, field_dec_2},
+ field("isnotdistinct", arrow::boolean()))};
+
+ // Build a projector for the expression.
+ std::shared_ptr<Projector> projector;
+ auto status = Projector::Make(schema, exprs, TestConfiguration(), &projector);
+ DCHECK_OK(status);
+
+ // Create a row-batch with some sample data
+ int num_records = 4;
+
+ auto validity_1 = {true, false, true, true};
+ auto array_dec_1 = MakeArrowArrayDecimal(
+ decimal_type_1, MakeDecimalVector({"1.51", "1.23", "1.20", "-1.20"}, scale_1),
+ validity_1);
+
+ auto validity_2 = {true, false, false, true};
+ auto array_dec_2 = MakeArrowArrayDecimal(
+ decimal_type_2, MakeDecimalVector({"1.5", "1.2", "1.2", "-1.2"}, scale_2),
+ validity_2);
+
+ // prepare input record batch
+ auto in_batch =
+ arrow::RecordBatch::Make(schema, num_records, {array_dec_1, array_dec_2});
+
+ // Evaluate expression
+ arrow::ArrayVector outputs;
+ status = projector->Evaluate(*in_batch, pool_, &outputs);
+ DCHECK_OK(status);
+
+ // Validate results
+ auto is_distinct = std::dynamic_pointer_cast<arrow::BooleanArray>(outputs.at(0));
+ auto is_not_distinct = std::dynamic_pointer_cast<arrow::BooleanArray>(outputs.at(1));
+
+ // isdistinct
+ EXPECT_ARROW_ARRAY_EQUALS(MakeArrowArrayBool({true, false, true, false}), outputs[0]);
+
+ // isnotdistinct
+ EXPECT_ARROW_ARRAY_EQUALS(MakeArrowArrayBool({false, true, false, true}), outputs[1]);
+}
+
+// decimal hashes without seed
+TEST_F(TestDecimal, TestHashFunctions) {
+ // schema for input fields
+ constexpr int32_t precision = 38;
+ constexpr int32_t scale = 2;
+ auto decimal_type = std::make_shared<arrow::Decimal128Type>(precision, scale);
+ auto field_dec = field("dec", decimal_type);
+ auto literal_seed32 = TreeExprBuilder::MakeLiteral((int32_t)10);
+ auto literal_seed64 = TreeExprBuilder::MakeLiteral((int64_t)10);
+ auto schema = arrow::schema({field_dec});
+
+ // build expressions
+ auto exprs = std::vector<ExpressionPtr>{
+ TreeExprBuilder::MakeExpression("hash", {field_dec},
+ field("hash_of_dec", arrow::int32())),
+
+ TreeExprBuilder::MakeExpression("hash64", {field_dec},
+ field("hash64_of_dec", arrow::int64())),
+
+ TreeExprBuilder::MakeExpression("hash32AsDouble", {field_dec},
+ field("hash32_as_double", arrow::int32())),
+
+ TreeExprBuilder::MakeExpression("hash64AsDouble", {field_dec},
+ field("hash64_as_double", arrow::int64()))};
+
+ // Build a projector for the expression.
+ std::shared_ptr<Projector> projector;
+ auto status = Projector::Make(schema, exprs, TestConfiguration(), &projector);
+ DCHECK_OK(status);
+
+ // Create a row-batch with some sample data
+ int num_records = 5;
+ auto validity = {false, true, true, true, true};
+
+ auto array_dec = MakeArrowArrayDecimal(
+ decimal_type, MakeDecimalVector({"1.51", "1.23", "1.23", "-1.23", "-1.24"}, scale),
+ validity);
+
+ // prepare input record batch
+ auto in_batch = arrow::RecordBatch::Make(schema, num_records, {array_dec});
+
+ // Evaluate expression
+ arrow::ArrayVector outputs;
+ status = projector->Evaluate(*in_batch, pool_, &outputs);
+ DCHECK_OK(status);
+
+ // Validate results
+ auto int32_arr = std::dynamic_pointer_cast<arrow::Int32Array>(outputs.at(0));
+ EXPECT_EQ(int32_arr->null_count(), 0);
+ EXPECT_EQ(int32_arr->Value(0), 0);
+ EXPECT_EQ(int32_arr->Value(1), int32_arr->Value(2));
+ EXPECT_NE(int32_arr->Value(2), int32_arr->Value(3));
+ EXPECT_NE(int32_arr->Value(3), int32_arr->Value(4));
+
+ auto int64_arr = std::dynamic_pointer_cast<arrow::Int64Array>(outputs.at(1));
+ EXPECT_EQ(int64_arr->null_count(), 0);
+ EXPECT_EQ(int64_arr->Value(0), 0);
+ EXPECT_EQ(int64_arr->Value(1), int64_arr->Value(2));
+ EXPECT_NE(int64_arr->Value(2), int64_arr->Value(3));
+ EXPECT_NE(int64_arr->Value(3), int64_arr->Value(4));
+}
+
+TEST_F(TestDecimal, TestHash32WithSeed) {
+ constexpr int32_t precision = 38;
+ constexpr int32_t scale = 2;
+ auto decimal_type = std::make_shared<arrow::Decimal128Type>(precision, scale);
+ auto field_dec_1 = field("dec1", decimal_type);
+ auto field_dec_2 = field("dec2", decimal_type);
+ auto schema = arrow::schema({field_dec_1, field_dec_2});
+
+ auto res = field("hash32_with_seed", arrow::int32());
+
+ auto field_1_nodePtr = TreeExprBuilder::MakeField(field_dec_1);
+ auto field_2_nodePtr = TreeExprBuilder::MakeField(field_dec_2);
+
+ auto hash32 =
+ TreeExprBuilder::MakeFunction("hash32", {field_2_nodePtr}, arrow::int32());
+ auto hash32_with_seed =
+ TreeExprBuilder::MakeFunction("hash32", {field_1_nodePtr, hash32}, arrow::int32());
+ auto expr = TreeExprBuilder::MakeExpression(hash32, field("hash32", arrow::int32()));
+ auto exprWS = TreeExprBuilder::MakeExpression(hash32_with_seed, res);
+
+ auto exprs = std::vector<ExpressionPtr>{expr, exprWS};
+
+ // Build a projector for the expression.
+ std::shared_ptr<Projector> projector;
+ auto status = Projector::Make(schema, exprs, TestConfiguration(), &projector);
+ DCHECK_OK(status);
+
+ // Create a row-batch with some sample data
+ int num_records = 5;
+ auto validity_1 = {false, false, true, true, true};
+
+ auto array_dec_1 = MakeArrowArrayDecimal(
+ decimal_type, MakeDecimalVector({"1.51", "1.23", "1.23", "-1.23", "-1.24"}, scale),
+ validity_1);
+
+ auto validity_2 = {false, true, false, true, true};
+
+ auto array_dec_2 = MakeArrowArrayDecimal(
+ decimal_type, MakeDecimalVector({"1.51", "1.23", "1.23", "-1.23", "-1.24"}, scale),
+ validity_2);
+
+ // prepare input record batch
+ auto in_batch =
+ arrow::RecordBatch::Make(schema, num_records, {array_dec_1, array_dec_2});
+
+ // Evaluate expression
+ arrow::ArrayVector outputs;
+ status = projector->Evaluate(*in_batch, pool_, &outputs);
+ DCHECK_OK(status);
+
+ // Validate results
+ auto int32_arr = std::dynamic_pointer_cast<arrow::Int32Array>(outputs.at(0));
+ auto int32_arr_WS = std::dynamic_pointer_cast<arrow::Int32Array>(outputs.at(1));
+ EXPECT_EQ(int32_arr->null_count(), 0);
+ // seed 0, null decimal
+ EXPECT_EQ(int32_arr_WS->Value(0), 0);
+ // null decimal => hash = seed
+ EXPECT_EQ(int32_arr_WS->Value(1), int32_arr->Value(1));
+ // seed = 0 => hash = hash without seed
+ EXPECT_EQ(int32_arr_WS->Value(2), int32_arr->Value(1));
+ // different inputs => different outputs
+ EXPECT_NE(int32_arr_WS->Value(3), int32_arr_WS->Value(4));
+ // hash with, without seed are not equal
+ EXPECT_NE(int32_arr_WS->Value(4), int32_arr->Value(4));
+}
+
+TEST_F(TestDecimal, TestHash64WithSeed) {
+ constexpr int32_t precision = 38;
+ constexpr int32_t scale = 2;
+ auto decimal_type = std::make_shared<arrow::Decimal128Type>(precision, scale);
+ auto field_dec_1 = field("dec1", decimal_type);
+ auto field_dec_2 = field("dec2", decimal_type);
+ auto schema = arrow::schema({field_dec_1, field_dec_2});
+
+ auto res = field("hash64_with_seed", arrow::int64());
+
+ auto field_1_nodePtr = TreeExprBuilder::MakeField(field_dec_1);
+ auto field_2_nodePtr = TreeExprBuilder::MakeField(field_dec_2);
+
+ auto hash64 =
+ TreeExprBuilder::MakeFunction("hash64", {field_2_nodePtr}, arrow::int64());
+ auto hash64_with_seed =
+ TreeExprBuilder::MakeFunction("hash64", {field_1_nodePtr, hash64}, arrow::int64());
+ auto expr = TreeExprBuilder::MakeExpression(hash64, field("hash64", arrow::int64()));
+ auto exprWS = TreeExprBuilder::MakeExpression(hash64_with_seed, res);
+
+ auto exprs = std::vector<ExpressionPtr>{expr, exprWS};
+
+ // Build a projector for the expression.
+ std::shared_ptr<Projector> projector;
+ auto status = Projector::Make(schema, exprs, TestConfiguration(), &projector);
+ DCHECK_OK(status);
+
+ // Create a row-batch with some sample data
+ int num_records = 5;
+ auto validity_1 = {false, false, true, true, true};
+
+ auto array_dec_1 = MakeArrowArrayDecimal(
+ decimal_type, MakeDecimalVector({"1.51", "1.23", "1.23", "-1.23", "-1.24"}, scale),
+ validity_1);
+
+ auto validity_2 = {false, true, false, true, true};
+
+ auto array_dec_2 = MakeArrowArrayDecimal(
+ decimal_type, MakeDecimalVector({"1.51", "1.23", "1.23", "-1.23", "-1.24"}, scale),
+ validity_2);
+
+ // prepare input record batch
+ auto in_batch =
+ arrow::RecordBatch::Make(schema, num_records, {array_dec_1, array_dec_2});
+
+ // Evaluate expression
+ arrow::ArrayVector outputs;
+ status = projector->Evaluate(*in_batch, pool_, &outputs);
+ DCHECK_OK(status);
+
+ // Validate results
+ auto int64_arr = std::dynamic_pointer_cast<arrow::Int64Array>(outputs.at(0));
+ auto int64_arr_WS = std::dynamic_pointer_cast<arrow::Int64Array>(outputs.at(1));
+ EXPECT_EQ(int64_arr->null_count(), 0);
+ // seed 0, null decimal
+ EXPECT_EQ(int64_arr_WS->Value(0), 0);
+ // null decimal => hash = seed
+ EXPECT_EQ(int64_arr_WS->Value(1), int64_arr->Value(1));
+ // seed = 0 => hash = hash without seed
+ EXPECT_EQ(int64_arr_WS->Value(2), int64_arr->Value(1));
+ // different inputs => different outputs
+ EXPECT_NE(int64_arr_WS->Value(3), int64_arr_WS->Value(4));
+ // hash with, without seed are not equal
+ EXPECT_NE(int64_arr_WS->Value(4), int64_arr->Value(4));
+}
} // namespace gandiva
diff --git a/cpp/src/gandiva/tests/hash_test.cc b/cpp/src/gandiva/tests/hash_test.cc
index afaa885..91356f5 100644
--- a/cpp/src/gandiva/tests/hash_test.cc
+++ b/cpp/src/gandiva/tests/hash_test.cc
@@ -80,7 +80,7 @@ TEST_F(TestHash, TestSimple) {
// Validate results
auto int32_arr = std::dynamic_pointer_cast<arrow::Int32Array>(outputs.at(0));
EXPECT_EQ(int32_arr->null_count(), 0);
- EXPECT_EQ(int32_arr->Value(0), 0);
+ EXPECT_EQ(int32_arr->Value(0), 10);
for (int i = 1; i < num_records; ++i) {
EXPECT_NE(int32_arr->Value(i), int32_arr->Value(i - 1));
}
@@ -141,7 +141,7 @@ TEST_F(TestHash, TestBuf) {
auto int64_arr = std::dynamic_pointer_cast<arrow::Int64Array>(outputs.at(1));
EXPECT_EQ(int64_arr->null_count(), 0);
- EXPECT_EQ(int64_arr->Value(0), 0);
+ EXPECT_EQ(int64_arr->Value(0), 10);
for (int i = 1; i < num_records; ++i) {
EXPECT_NE(int64_arr->Value(i), int64_arr->Value(i - 1));
}