You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by ra...@apache.org on 2019/06/26 01:33:24 UTC

[arrow] branch master updated: ARROW-5661: [Gandiva] [C++] support hash functions for decimals in gandiva

This is an automated email from the ASF dual-hosted git repository.

ravindra pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/master by this push:
     new e2c3508  ARROW-5661: [Gandiva] [C++] support hash functions for decimals in gandiva
e2c3508 is described below

commit e2c35089b8b15e715ad004371ec8547abbb7a170
Author: Prudhvi Porandla <pr...@icloud.com>
AuthorDate: Wed Jun 26 07:02:58 2019 +0530

    ARROW-5661: [Gandiva] [C++] support hash functions for decimals in gandiva
    
    1. change hash functions to match java implementation
    2. hash functions for decimals
    3. isnull/isnotnull, indistinct/isnotdistinct, isnumeric for decimals
    
    Author: Prudhvi Porandla <pr...@icloud.com>
    
    Closes #4618 from pprudhvi/decimal-hash and squashes the following commits:
    
    2db61e981 <Prudhvi Porandla> use EXPECT_ARROW_ARRAY_EQUALS
    db7cc479d <Prudhvi Porandla> clang-format
    d089ec90e <Prudhvi Porandla> Merge branch 'master' of https://github.com/apache/arrow into decimal-hash
    c7ea71b0a <Prudhvi Porandla> run clang-format
    9eeb24559 <Prudhvi Porandla> remove wrong scale unittest
    5c525e1c7 <Prudhvi Porandla> Merge branch 'master' of https://github.com/apache/arrow into decimal-hash
    5f8924524 <Prudhvi Porandla> 1. test for chained hashes 2. seed is always valid
    6be7e4738 <Prudhvi Porandla> add test for isnull, isdistinct
    bbb257847 <Prudhvi Porandla> Merge branch 'master' of https://github.com/apache/arrow into decimal-hash
    2b0db5797 <Prudhvi Porandla> Merge branch 'master' of https://github.com/apache/arrow into decimal-hash
    6e5464322 <Prudhvi Porandla> change hashWithSeed behaviour when seed or input is not set; variable name changes
    eb11f971f <Prudhvi Porandla> decimal is numeric only if validity bit is set
    225cbcbe7 <Prudhvi Porandla> Merge branch 'master' of https://github.com/apache/arrow into decimal-hash
    c8289bbd3 <Prudhvi Porandla> use equals in is_distinct
    8c6c4bb04 <Prudhvi Porandla> Merge branch 'master' of https://github.com/apache/arrow into decimal-hash
    3f3820015 <Prudhvi Porandla> add isnull/isnotnull, isnumeric, isdistinct/isnotdistinct to decimal type
    e45bc6093 <Prudhvi Porandla> add data, seed validity parameters
    1e419b2dc <Prudhvi Porandla> change function names
    e6f6c487e <Prudhvi Porandla> Merge branch 'master' of https://github.com/apache/arrow into decimal-hash
    c3abc22e9 <Prudhvi Porandla> correct function names in decimal_wrapper
    c0baa0284 <Prudhvi Porandla> correct function names in ir
    ad5d500dd <Prudhvi Porandla> add tests for decimal hash functions
    918c0231f <Prudhvi Porandla> hash functions for decimal
---
 cpp/src/gandiva/decimal_ir.cc                  | 134 +++++++++++
 cpp/src/gandiva/function_registry_common.h     |   2 +-
 cpp/src/gandiva/precompiled/decimal_wrapper.cc | 127 ++++++++++
 cpp/src/gandiva/precompiled/hash.cc            |  40 ++--
 cpp/src/gandiva/tests/decimal_test.cc          | 307 +++++++++++++++++++++++++
 cpp/src/gandiva/tests/hash_test.cc             |   4 +-
 6 files changed, 595 insertions(+), 19 deletions(-)

diff --git a/cpp/src/gandiva/decimal_ir.cc b/cpp/src/gandiva/decimal_ir.cc
index bbd437d..6e4bb56 100644
--- a/cpp/src/gandiva/decimal_ir.cc
+++ b/cpp/src/gandiva/decimal_ir.cc
@@ -557,6 +557,7 @@ Status DecimalIR::AddFunctions(Engine* engine) {
   auto decimal_ir = std::make_shared<DecimalIR>(engine);
   auto i128 = decimal_ir->types()->i128_type();
   auto i32 = decimal_ir->types()->i32_type();
+  auto i1 = decimal_ir->types()->i1_type();
   auto i64 = decimal_ir->types()->i64_type();
   auto f64 = decimal_ir->types()->double_type();
 
@@ -688,6 +689,139 @@ Status DecimalIR::AddFunctions(Engine* engine) {
                                                            {"x_precision", i32},
                                                            {"x_scale", i32},
                                                        }));
+
+  ARROW_RETURN_NOT_OK(decimal_ir->BuildDecimalFunction("hash_decimal128", i32,
+                                                       {
+                                                           {"x_value", i128},
+                                                           {"x_precision", i32},
+                                                           {"x_scale", i32},
+                                                           {"x_isvalid", i1},
+                                                       }));
+
+  ARROW_RETURN_NOT_OK(decimal_ir->BuildDecimalFunction("hash32_decimal128", i32,
+                                                       {
+                                                           {"x_value", i128},
+                                                           {"x_precision", i32},
+                                                           {"x_scale", i32},
+                                                           {"x_isvalid", i1},
+                                                       }));
+
+  ARROW_RETURN_NOT_OK(decimal_ir->BuildDecimalFunction("hash64_decimal128", i64,
+                                                       {
+                                                           {"x_value", i128},
+                                                           {"x_precision", i32},
+                                                           {"x_scale", i32},
+                                                           {"x_isvalid", i1},
+                                                       }));
+
+  ARROW_RETURN_NOT_OK(decimal_ir->BuildDecimalFunction("hash32WithSeed_decimal128", i32,
+                                                       {
+                                                           {"x_value", i128},
+                                                           {"x_precision", i32},
+                                                           {"x_scale", i32},
+                                                           {"x_isvalid", i1},
+                                                           {"seed", i32},
+                                                           {"seed_isvalid", i1},
+                                                       }));
+
+  ARROW_RETURN_NOT_OK(decimal_ir->BuildDecimalFunction("hash64WithSeed_decimal128", i64,
+                                                       {
+                                                           {"x_value", i128},
+                                                           {"x_precision", i32},
+                                                           {"x_scale", i32},
+                                                           {"x_isvalid", i1},
+                                                           {"seed", i64},
+                                                           {"seed_isvalid", i1},
+                                                       }));
+
+  ARROW_RETURN_NOT_OK(decimal_ir->BuildDecimalFunction("hash32AsDouble_decimal128", i32,
+                                                       {
+                                                           {"x_value", i128},
+                                                           {"x_precision", i32},
+                                                           {"x_scale", i32},
+                                                           {"x_isvalid", i1},
+                                                       }));
+
+  ARROW_RETURN_NOT_OK(decimal_ir->BuildDecimalFunction("hash64AsDouble_decimal128", i64,
+                                                       {
+                                                           {"x_value", i128},
+                                                           {"x_precision", i32},
+                                                           {"x_scale", i32},
+                                                           {"x_isvalid", i1},
+                                                       }));
+
+  ARROW_RETURN_NOT_OK(
+      decimal_ir->BuildDecimalFunction("hash32AsDoubleWithSeed_decimal128", i32,
+                                       {
+                                           {"x_value", i128},
+                                           {"x_precision", i32},
+                                           {"x_scale", i32},
+                                           {"x_isvalid", i1},
+                                           {"seed", i32},
+                                           {"seed_isvalid", i1},
+                                       }));
+
+  ARROW_RETURN_NOT_OK(
+      decimal_ir->BuildDecimalFunction("hash64AsDoubleWithSeed_decimal128", i64,
+                                       {
+                                           {"x_value", i128},
+                                           {"x_precision", i32},
+                                           {"x_scale", i32},
+                                           {"x_isvalid", i1},
+                                           {"seed", i64},
+                                           {"seed_isvalid", i1},
+                                       }));
+
+  ARROW_RETURN_NOT_OK(decimal_ir->BuildDecimalFunction("isnull_decimal128", i1,
+                                                       {
+                                                           {"x_value", i128},
+                                                           {"x_precision", i32},
+                                                           {"x_scale", i32},
+                                                           {"x_isvalid", i1},
+                                                       }));
+
+  ARROW_RETURN_NOT_OK(decimal_ir->BuildDecimalFunction("isnotnull_decimal128", i1,
+                                                       {
+                                                           {"x_value", i128},
+                                                           {"x_precision", i32},
+                                                           {"x_scale", i32},
+                                                           {"x_isvalid", i1},
+                                                       }));
+
+  ARROW_RETURN_NOT_OK(decimal_ir->BuildDecimalFunction("isnumeric_decimal128", i1,
+                                                       {
+                                                           {"x_value", i128},
+                                                           {"x_precision", i32},
+                                                           {"x_scale", i32},
+                                                           {"x_isvalid", i1},
+                                                       }));
+
+  ARROW_RETURN_NOT_OK(
+      decimal_ir->BuildDecimalFunction("is_distinct_from_decimal128_decimal128", i1,
+                                       {
+                                           {"x_value", i128},
+                                           {"x_precision", i32},
+                                           {"x_scale", i32},
+                                           {"x_isvalid", i1},
+                                           {"y_value", i128},
+                                           {"y_precision", i32},
+                                           {"y_scale", i32},
+                                           {"y_isvalid", i1},
+                                       }));
+
+  ARROW_RETURN_NOT_OK(
+      decimal_ir->BuildDecimalFunction("is_not_distinct_from_decimal128_decimal128", i1,
+                                       {
+                                           {"x_value", i128},
+                                           {"x_precision", i32},
+                                           {"x_scale", i32},
+                                           {"x_isvalid", i1},
+                                           {"y_value", i128},
+                                           {"y_precision", i32},
+                                           {"y_scale", i32},
+                                           {"y_isvalid", i1},
+                                       }));
+
   return Status::OK();
 }
 
diff --git a/cpp/src/gandiva/function_registry_common.h b/cpp/src/gandiva/function_registry_common.h
index a2ca271..f6a3d14 100644
--- a/cpp/src/gandiva/function_registry_common.h
+++ b/cpp/src/gandiva/function_registry_common.h
@@ -190,7 +190,7 @@ typedef std::unordered_map<const FunctionSignature*, const NativeFunction*, KeyH
 #define NUMERIC_TYPES(INNER, NAME)                                                       \
   INNER(NAME, int8), INNER(NAME, int16), INNER(NAME, int32), INNER(NAME, int64),         \
       INNER(NAME, uint8), INNER(NAME, uint16), INNER(NAME, uint32), INNER(NAME, uint64), \
-      INNER(NAME, float32), INNER(NAME, float64)
+      INNER(NAME, float32), INNER(NAME, float64), INNER(NAME, decimal128)
 
 // Iterate the inner macro over numeric and date/time types
 #define NUMERIC_DATE_TYPES(INNER, NAME) \
diff --git a/cpp/src/gandiva/precompiled/decimal_wrapper.cc b/cpp/src/gandiva/precompiled/decimal_wrapper.cc
index 02ab915..630fe8b 100644
--- a/cpp/src/gandiva/precompiled/decimal_wrapper.cc
+++ b/cpp/src/gandiva/precompiled/decimal_wrapper.cc
@@ -231,4 +231,131 @@ void castDECIMAL_decimal128_internal(int64_t x_high, uint64_t x_low, int32_t x_p
   *out_low = out.low_bits();
 }
 
+FORCE_INLINE
+int32_t hash32_decimal128_internal(int64_t x_high, uint64_t x_low, int32_t x_precision,
+                                   int32_t x_scale, boolean x_isvalid) {
+  return x_isvalid
+             ? hash32_buf(gandiva::BasicDecimal128(x_high, x_low).ToBytes().data(), 16, 0)
+             : 0;
+}
+
+FORCE_INLINE
+int32_t hash_decimal128_internal(int64_t x_high, uint64_t x_low, int32_t x_precision,
+                                 int32_t x_scale, boolean x_isvalid) {
+  return hash32_decimal128_internal(x_high, x_low, x_precision, x_scale, x_isvalid);
+}
+
+FORCE_INLINE
+int64_t hash64_decimal128_internal(int64_t x_high, uint64_t x_low, int32_t x_precision,
+                                   int32_t x_scale, boolean x_isvalid) {
+  return x_isvalid
+             ? hash64_buf(gandiva::BasicDecimal128(x_high, x_low).ToBytes().data(), 16, 0)
+             : 0;
+}
+
+FORCE_INLINE
+int32_t hash32WithSeed_decimal128_internal(int64_t x_high, uint64_t x_low,
+                                           int32_t x_precision, int32_t x_scale,
+                                           boolean x_isvalid, int32_t seed,
+                                           boolean seed_isvalid) {
+  if (!x_isvalid) {
+    return seed;
+  }
+  return hash32_buf(gandiva::BasicDecimal128(x_high, x_low).ToBytes().data(), 16, seed);
+}
+
+FORCE_INLINE
+int64_t hash64WithSeed_decimal128_internal(int64_t x_high, uint64_t x_low,
+                                           int32_t x_precision, int32_t x_scale,
+                                           boolean x_isvalid, int64_t seed,
+                                           boolean seed_isvalid) {
+  if (!x_isvalid) {
+    return seed;
+  }
+  return hash64_buf(gandiva::BasicDecimal128(x_high, x_low).ToBytes().data(), 16, seed);
+}
+
+FORCE_INLINE
+int32_t hash32AsDouble_decimal128_internal(int64_t x_high, uint64_t x_low,
+                                           int32_t x_precision, int32_t x_scale,
+                                           boolean x_isvalid) {
+  return x_isvalid
+             ? hash32_buf(gandiva::BasicDecimal128(x_high, x_low).ToBytes().data(), 16, 0)
+             : 0;
+}
+
+FORCE_INLINE
+int64_t hash64AsDouble_decimal128_internal(int64_t x_high, uint64_t x_low,
+                                           int32_t x_precision, int32_t x_scale,
+                                           boolean x_isvalid) {
+  return x_isvalid
+             ? hash64_buf(gandiva::BasicDecimal128(x_high, x_low).ToBytes().data(), 16, 0)
+             : 0;
+}
+
+FORCE_INLINE
+int32_t hash32AsDoubleWithSeed_decimal128_internal(int64_t x_high, uint64_t x_low,
+                                                   int32_t x_precision, int32_t x_scale,
+                                                   boolean x_isvalid, int32_t seed,
+                                                   boolean seed_isvalid) {
+  if (!x_isvalid) {
+    return seed;
+  }
+  return hash32_buf(gandiva::BasicDecimal128(x_high, x_low).ToBytes().data(), 16, seed);
+}
+
+FORCE_INLINE
+int64_t hash64AsDoubleWithSeed_decimal128_internal(int64_t x_high, uint64_t x_low,
+                                                   int32_t x_precision, int32_t x_scale,
+                                                   boolean x_isvalid, int64_t seed,
+                                                   boolean seed_isvalid) {
+  if (!x_isvalid) {
+    return seed;
+  }
+  return hash64_buf(gandiva::BasicDecimal128(x_high, x_low).ToBytes().data(), 16, seed);
+}
+
+FORCE_INLINE
+boolean isnull_decimal128_internal(int64_t x_high, uint64_t x_low, int32_t x_precision,
+                                   int32_t x_scale, boolean x_isvalid) {
+  return !x_isvalid;
+}
+
+FORCE_INLINE
+boolean isnotnull_decimal128_internal(int64_t x_high, uint64_t x_low, int32_t x_precision,
+                                      int32_t x_scale, boolean x_isvalid) {
+  return x_isvalid;
+}
+
+FORCE_INLINE
+boolean isnumeric_decimal128_internal(int64_t x_high, uint64_t x_low, int32_t x_precision,
+                                      int32_t x_scale, boolean x_isvalid) {
+  return x_isvalid;
+}
+
+FORCE_INLINE
+boolean is_not_distinct_from_decimal128_decimal128_internal(
+    int64_t x_high, uint64_t x_low, int32_t x_precision, int32_t x_scale,
+    boolean x_isvalid, int64_t y_high, uint64_t y_low, int32_t y_precision,
+    int32_t y_scale, boolean y_isvalid) {
+  if (x_isvalid != y_isvalid) {
+    return false;
+  }
+  if (!x_isvalid) {
+    return true;
+  }
+  return 0 == compare_internal_decimal128_decimal128(x_high, x_low, x_precision, x_scale,
+                                                     y_high, y_low, y_precision, y_scale);
+}
+
+FORCE_INLINE
+boolean is_distinct_from_decimal128_decimal128_internal(
+    int64_t x_high, uint64_t x_low, int32_t x_precision, int32_t x_scale,
+    boolean x_isvalid, int64_t y_high, uint64_t y_low, int32_t y_precision,
+    int32_t y_scale, boolean y_isvalid) {
+  return !is_not_distinct_from_decimal128_decimal128_internal(
+      x_high, x_low, x_precision, x_scale, x_isvalid, y_high, y_low, y_precision, y_scale,
+      y_isvalid);
+}
+
 }  // extern "C"
diff --git a/cpp/src/gandiva/precompiled/hash.cc b/cpp/src/gandiva/precompiled/hash.cc
index bd884a9..073a909 100644
--- a/cpp/src/gandiva/precompiled/hash.cc
+++ b/cpp/src/gandiva/precompiled/hash.cc
@@ -126,13 +126,19 @@ FORCE_INLINE int32 hash32(double val, int32 seed) {
 #define HASH64_WITH_SEED_OP(NAME, TYPE)                                              \
   FORCE_INLINE                                                                       \
   int64 NAME##_##TYPE(TYPE in, boolean is_valid, int64 seed, boolean seed_isvalid) { \
-    return is_valid && seed_isvalid ? hash64(static_cast<double>(in), seed) : 0;     \
+    if (!is_valid) {                                                                 \
+      return seed;                                                                   \
+    }                                                                                \
+    return hash64(static_cast<double>(in), seed);                                    \
   }
 
 #define HASH32_WITH_SEED_OP(NAME, TYPE)                                              \
   FORCE_INLINE                                                                       \
   int32 NAME##_##TYPE(TYPE in, boolean is_valid, int32 seed, boolean seed_isvalid) { \
-    return is_valid && seed_isvalid ? hash32(static_cast<double>(in), seed) : 0;     \
+    if (!is_valid) {                                                                 \
+      return seed;                                                                   \
+    }                                                                                \
+    return hash32(static_cast<double>(in), seed);                                    \
   }
 
 #define HASH64_OP(NAME, TYPE)                                 \
@@ -335,22 +341,24 @@ FORCE_INLINE int32 hash32_buf(const uint8* buf, int len, int32 seed) {
 
 // Wrappers for the varlen types
 
-#define HASH64_BUF_WITH_SEED_OP(NAME, TYPE)                                  \
-  FORCE_INLINE                                                               \
-  int64 NAME##_##TYPE(TYPE in, int32 len, boolean is_valid, int64 seed,      \
-                      boolean seed_isvalid) {                                \
-    return is_valid && seed_isvalid                                          \
-               ? hash64_buf(reinterpret_cast<const uint8_t*>(in), len, seed) \
-               : 0;                                                          \
+#define HASH64_BUF_WITH_SEED_OP(NAME, TYPE)                             \
+  FORCE_INLINE                                                          \
+  int64 NAME##_##TYPE(TYPE in, int32 len, boolean is_valid, int64 seed, \
+                      boolean seed_isvalid) {                           \
+    if (!is_valid) {                                                    \
+      return seed;                                                      \
+    }                                                                   \
+    return hash64_buf(reinterpret_cast<const uint8_t*>(in), len, seed); \
   }
 
-#define HASH32_BUF_WITH_SEED_OP(NAME, TYPE)                                  \
-  FORCE_INLINE                                                               \
-  int32 NAME##_##TYPE(TYPE in, int32 len, boolean is_valid, int32 seed,      \
-                      boolean seed_isvalid) {                                \
-    return is_valid && seed_isvalid                                          \
-               ? hash32_buf(reinterpret_cast<const uint8_t*>(in), len, seed) \
-               : 0;                                                          \
+#define HASH32_BUF_WITH_SEED_OP(NAME, TYPE)                             \
+  FORCE_INLINE                                                          \
+  int32 NAME##_##TYPE(TYPE in, int32 len, boolean is_valid, int32 seed, \
+                      boolean seed_isvalid) {                           \
+    if (!is_valid) {                                                    \
+      return seed;                                                      \
+    }                                                                   \
+    return hash32_buf(reinterpret_cast<const uint8_t*>(in), len, seed); \
   }
 
 #define HASH64_BUF_OP(NAME, TYPE)                                                   \
diff --git a/cpp/src/gandiva/tests/decimal_test.cc b/cpp/src/gandiva/tests/decimal_test.cc
index 5fa32f1..9941fea 100644
--- a/cpp/src/gandiva/tests/decimal_test.cc
+++ b/cpp/src/gandiva/tests/decimal_test.cc
@@ -480,4 +480,311 @@ TEST_F(TestDecimal, TestCastFunctions) {
   EXPECT_ARROW_ARRAY_EQUALS(array_float64, outputs[4]);
 }
 
+// isnull, isnumeric
+TEST_F(TestDecimal, TestIsNullNumericFunctions) {
+  // schema for input fields
+  constexpr int32_t precision = 38;
+  constexpr int32_t scale = 2;
+  auto decimal_type = std::make_shared<arrow::Decimal128Type>(precision, scale);
+  auto field_dec = field("dec", decimal_type);
+  auto schema = arrow::schema({field_dec});
+
+  // build expressions
+  auto exprs = std::vector<ExpressionPtr>{
+      TreeExprBuilder::MakeExpression("isnull", {field_dec},
+                                      field("isnull", arrow::boolean())),
+
+      TreeExprBuilder::MakeExpression("isnotnull", {field_dec},
+                                      field("isnotnull", arrow::boolean())),
+      TreeExprBuilder::MakeExpression("isnumeric", {field_dec},
+                                      field("isnumeric", arrow::boolean()))};
+
+  // Build a projector for the expression.
+  std::shared_ptr<Projector> projector;
+  auto status = Projector::Make(schema, exprs, TestConfiguration(), &projector);
+  DCHECK_OK(status);
+
+  // Create a row-batch with some sample data
+  int num_records = 5;
+  auto validity = {false, true, true, true, false};
+
+  auto array_dec = MakeArrowArrayDecimal(
+      decimal_type, MakeDecimalVector({"1.51", "1.23", "1.23", "-1.23", "-1.24"}, scale),
+      validity);
+
+  // prepare input record batch
+  auto in_batch = arrow::RecordBatch::Make(schema, num_records, {array_dec});
+
+  // Evaluate expression
+  arrow::ArrayVector outputs;
+  status = projector->Evaluate(*in_batch, pool_, &outputs);
+  DCHECK_OK(status);
+
+  // Validate results
+  auto is_null = outputs.at(0);
+  auto is_not_null = outputs.at(1);
+  auto is_numeric = outputs.at(2);
+
+  // isnull
+  EXPECT_ARROW_ARRAY_EQUALS(MakeArrowArrayBool({true, false, false, false, true}),
+                            outputs[0]);
+
+  // isnotnull
+  EXPECT_ARROW_ARRAY_EQUALS(MakeArrowArrayBool(validity), outputs[1]);
+
+  // isnumeric
+  EXPECT_ARROW_ARRAY_EQUALS(MakeArrowArrayBool(validity), outputs[2]);
+}
+
+TEST_F(TestDecimal, TestIsDistinct) {
+  // schema for input fields
+  constexpr int32_t precision = 38;
+  constexpr int32_t scale_1 = 2;
+  auto decimal_type_1 = std::make_shared<arrow::Decimal128Type>(precision, scale_1);
+  auto field_dec_1 = field("dec_1", decimal_type_1);
+  constexpr int32_t scale_2 = 1;
+  auto decimal_type_2 = std::make_shared<arrow::Decimal128Type>(precision, scale_2);
+  auto field_dec_2 = field("dec_2", decimal_type_2);
+
+  auto schema = arrow::schema({field_dec_1, field_dec_2});
+
+  // build expressions
+  auto exprs = std::vector<ExpressionPtr>{
+      TreeExprBuilder::MakeExpression("is_distinct_from", {field_dec_1, field_dec_2},
+                                      field("isdistinct", arrow::boolean())),
+
+      TreeExprBuilder::MakeExpression("is_not_distinct_from", {field_dec_1, field_dec_2},
+                                      field("isnotdistinct", arrow::boolean()))};
+
+  // Build a projector for the expression.
+  std::shared_ptr<Projector> projector;
+  auto status = Projector::Make(schema, exprs, TestConfiguration(), &projector);
+  DCHECK_OK(status);
+
+  // Create a row-batch with some sample data
+  int num_records = 4;
+
+  auto validity_1 = {true, false, true, true};
+  auto array_dec_1 = MakeArrowArrayDecimal(
+      decimal_type_1, MakeDecimalVector({"1.51", "1.23", "1.20", "-1.20"}, scale_1),
+      validity_1);
+
+  auto validity_2 = {true, false, false, true};
+  auto array_dec_2 = MakeArrowArrayDecimal(
+      decimal_type_2, MakeDecimalVector({"1.5", "1.2", "1.2", "-1.2"}, scale_2),
+      validity_2);
+
+  // prepare input record batch
+  auto in_batch =
+      arrow::RecordBatch::Make(schema, num_records, {array_dec_1, array_dec_2});
+
+  // Evaluate expression
+  arrow::ArrayVector outputs;
+  status = projector->Evaluate(*in_batch, pool_, &outputs);
+  DCHECK_OK(status);
+
+  // Validate results
+  auto is_distinct = std::dynamic_pointer_cast<arrow::BooleanArray>(outputs.at(0));
+  auto is_not_distinct = std::dynamic_pointer_cast<arrow::BooleanArray>(outputs.at(1));
+
+  // isdistinct
+  EXPECT_ARROW_ARRAY_EQUALS(MakeArrowArrayBool({true, false, true, false}), outputs[0]);
+
+  // isnotdistinct
+  EXPECT_ARROW_ARRAY_EQUALS(MakeArrowArrayBool({false, true, false, true}), outputs[1]);
+}
+
+// decimal hashes without seed
+TEST_F(TestDecimal, TestHashFunctions) {
+  // schema for input fields
+  constexpr int32_t precision = 38;
+  constexpr int32_t scale = 2;
+  auto decimal_type = std::make_shared<arrow::Decimal128Type>(precision, scale);
+  auto field_dec = field("dec", decimal_type);
+  auto literal_seed32 = TreeExprBuilder::MakeLiteral((int32_t)10);
+  auto literal_seed64 = TreeExprBuilder::MakeLiteral((int64_t)10);
+  auto schema = arrow::schema({field_dec});
+
+  // build expressions
+  auto exprs = std::vector<ExpressionPtr>{
+      TreeExprBuilder::MakeExpression("hash", {field_dec},
+                                      field("hash_of_dec", arrow::int32())),
+
+      TreeExprBuilder::MakeExpression("hash64", {field_dec},
+                                      field("hash64_of_dec", arrow::int64())),
+
+      TreeExprBuilder::MakeExpression("hash32AsDouble", {field_dec},
+                                      field("hash32_as_double", arrow::int32())),
+
+      TreeExprBuilder::MakeExpression("hash64AsDouble", {field_dec},
+                                      field("hash64_as_double", arrow::int64()))};
+
+  // Build a projector for the expression.
+  std::shared_ptr<Projector> projector;
+  auto status = Projector::Make(schema, exprs, TestConfiguration(), &projector);
+  DCHECK_OK(status);
+
+  // Create a row-batch with some sample data
+  int num_records = 5;
+  auto validity = {false, true, true, true, true};
+
+  auto array_dec = MakeArrowArrayDecimal(
+      decimal_type, MakeDecimalVector({"1.51", "1.23", "1.23", "-1.23", "-1.24"}, scale),
+      validity);
+
+  // prepare input record batch
+  auto in_batch = arrow::RecordBatch::Make(schema, num_records, {array_dec});
+
+  // Evaluate expression
+  arrow::ArrayVector outputs;
+  status = projector->Evaluate(*in_batch, pool_, &outputs);
+  DCHECK_OK(status);
+
+  // Validate results
+  auto int32_arr = std::dynamic_pointer_cast<arrow::Int32Array>(outputs.at(0));
+  EXPECT_EQ(int32_arr->null_count(), 0);
+  EXPECT_EQ(int32_arr->Value(0), 0);
+  EXPECT_EQ(int32_arr->Value(1), int32_arr->Value(2));
+  EXPECT_NE(int32_arr->Value(2), int32_arr->Value(3));
+  EXPECT_NE(int32_arr->Value(3), int32_arr->Value(4));
+
+  auto int64_arr = std::dynamic_pointer_cast<arrow::Int64Array>(outputs.at(1));
+  EXPECT_EQ(int64_arr->null_count(), 0);
+  EXPECT_EQ(int64_arr->Value(0), 0);
+  EXPECT_EQ(int64_arr->Value(1), int64_arr->Value(2));
+  EXPECT_NE(int64_arr->Value(2), int64_arr->Value(3));
+  EXPECT_NE(int64_arr->Value(3), int64_arr->Value(4));
+}
+
+TEST_F(TestDecimal, TestHash32WithSeed) {
+  constexpr int32_t precision = 38;
+  constexpr int32_t scale = 2;
+  auto decimal_type = std::make_shared<arrow::Decimal128Type>(precision, scale);
+  auto field_dec_1 = field("dec1", decimal_type);
+  auto field_dec_2 = field("dec2", decimal_type);
+  auto schema = arrow::schema({field_dec_1, field_dec_2});
+
+  auto res = field("hash32_with_seed", arrow::int32());
+
+  auto field_1_nodePtr = TreeExprBuilder::MakeField(field_dec_1);
+  auto field_2_nodePtr = TreeExprBuilder::MakeField(field_dec_2);
+
+  auto hash32 =
+      TreeExprBuilder::MakeFunction("hash32", {field_2_nodePtr}, arrow::int32());
+  auto hash32_with_seed =
+      TreeExprBuilder::MakeFunction("hash32", {field_1_nodePtr, hash32}, arrow::int32());
+  auto expr = TreeExprBuilder::MakeExpression(hash32, field("hash32", arrow::int32()));
+  auto exprWS = TreeExprBuilder::MakeExpression(hash32_with_seed, res);
+
+  auto exprs = std::vector<ExpressionPtr>{expr, exprWS};
+
+  // Build a projector for the expression.
+  std::shared_ptr<Projector> projector;
+  auto status = Projector::Make(schema, exprs, TestConfiguration(), &projector);
+  DCHECK_OK(status);
+
+  // Create a row-batch with some sample data
+  int num_records = 5;
+  auto validity_1 = {false, false, true, true, true};
+
+  auto array_dec_1 = MakeArrowArrayDecimal(
+      decimal_type, MakeDecimalVector({"1.51", "1.23", "1.23", "-1.23", "-1.24"}, scale),
+      validity_1);
+
+  auto validity_2 = {false, true, false, true, true};
+
+  auto array_dec_2 = MakeArrowArrayDecimal(
+      decimal_type, MakeDecimalVector({"1.51", "1.23", "1.23", "-1.23", "-1.24"}, scale),
+      validity_2);
+
+  // prepare input record batch
+  auto in_batch =
+      arrow::RecordBatch::Make(schema, num_records, {array_dec_1, array_dec_2});
+
+  // Evaluate expression
+  arrow::ArrayVector outputs;
+  status = projector->Evaluate(*in_batch, pool_, &outputs);
+  DCHECK_OK(status);
+
+  // Validate results
+  auto int32_arr = std::dynamic_pointer_cast<arrow::Int32Array>(outputs.at(0));
+  auto int32_arr_WS = std::dynamic_pointer_cast<arrow::Int32Array>(outputs.at(1));
+  EXPECT_EQ(int32_arr->null_count(), 0);
+  // seed 0, null decimal
+  EXPECT_EQ(int32_arr_WS->Value(0), 0);
+  // null decimal => hash = seed
+  EXPECT_EQ(int32_arr_WS->Value(1), int32_arr->Value(1));
+  // seed = 0 => hash = hash without seed
+  EXPECT_EQ(int32_arr_WS->Value(2), int32_arr->Value(1));
+  // different inputs => different outputs
+  EXPECT_NE(int32_arr_WS->Value(3), int32_arr_WS->Value(4));
+  // hash with, without seed are not equal
+  EXPECT_NE(int32_arr_WS->Value(4), int32_arr->Value(4));
+}
+
+TEST_F(TestDecimal, TestHash64WithSeed) {
+  constexpr int32_t precision = 38;
+  constexpr int32_t scale = 2;
+  auto decimal_type = std::make_shared<arrow::Decimal128Type>(precision, scale);
+  auto field_dec_1 = field("dec1", decimal_type);
+  auto field_dec_2 = field("dec2", decimal_type);
+  auto schema = arrow::schema({field_dec_1, field_dec_2});
+
+  auto res = field("hash64_with_seed", arrow::int64());
+
+  auto field_1_nodePtr = TreeExprBuilder::MakeField(field_dec_1);
+  auto field_2_nodePtr = TreeExprBuilder::MakeField(field_dec_2);
+
+  auto hash64 =
+      TreeExprBuilder::MakeFunction("hash64", {field_2_nodePtr}, arrow::int64());
+  auto hash64_with_seed =
+      TreeExprBuilder::MakeFunction("hash64", {field_1_nodePtr, hash64}, arrow::int64());
+  auto expr = TreeExprBuilder::MakeExpression(hash64, field("hash64", arrow::int64()));
+  auto exprWS = TreeExprBuilder::MakeExpression(hash64_with_seed, res);
+
+  auto exprs = std::vector<ExpressionPtr>{expr, exprWS};
+
+  // Build a projector for the expression.
+  std::shared_ptr<Projector> projector;
+  auto status = Projector::Make(schema, exprs, TestConfiguration(), &projector);
+  DCHECK_OK(status);
+
+  // Create a row-batch with some sample data
+  int num_records = 5;
+  auto validity_1 = {false, false, true, true, true};
+
+  auto array_dec_1 = MakeArrowArrayDecimal(
+      decimal_type, MakeDecimalVector({"1.51", "1.23", "1.23", "-1.23", "-1.24"}, scale),
+      validity_1);
+
+  auto validity_2 = {false, true, false, true, true};
+
+  auto array_dec_2 = MakeArrowArrayDecimal(
+      decimal_type, MakeDecimalVector({"1.51", "1.23", "1.23", "-1.23", "-1.24"}, scale),
+      validity_2);
+
+  // prepare input record batch
+  auto in_batch =
+      arrow::RecordBatch::Make(schema, num_records, {array_dec_1, array_dec_2});
+
+  // Evaluate expression
+  arrow::ArrayVector outputs;
+  status = projector->Evaluate(*in_batch, pool_, &outputs);
+  DCHECK_OK(status);
+
+  // Validate results
+  auto int64_arr = std::dynamic_pointer_cast<arrow::Int64Array>(outputs.at(0));
+  auto int64_arr_WS = std::dynamic_pointer_cast<arrow::Int64Array>(outputs.at(1));
+  EXPECT_EQ(int64_arr->null_count(), 0);
+  // seed 0, null decimal
+  EXPECT_EQ(int64_arr_WS->Value(0), 0);
+  // null decimal => hash = seed
+  EXPECT_EQ(int64_arr_WS->Value(1), int64_arr->Value(1));
+  // seed = 0 => hash = hash without seed
+  EXPECT_EQ(int64_arr_WS->Value(2), int64_arr->Value(1));
+  // different inputs => different outputs
+  EXPECT_NE(int64_arr_WS->Value(3), int64_arr_WS->Value(4));
+  // hash with, without seed are not equal
+  EXPECT_NE(int64_arr_WS->Value(4), int64_arr->Value(4));
+}
 }  // namespace gandiva
diff --git a/cpp/src/gandiva/tests/hash_test.cc b/cpp/src/gandiva/tests/hash_test.cc
index afaa885..91356f5 100644
--- a/cpp/src/gandiva/tests/hash_test.cc
+++ b/cpp/src/gandiva/tests/hash_test.cc
@@ -80,7 +80,7 @@ TEST_F(TestHash, TestSimple) {
   // Validate results
   auto int32_arr = std::dynamic_pointer_cast<arrow::Int32Array>(outputs.at(0));
   EXPECT_EQ(int32_arr->null_count(), 0);
-  EXPECT_EQ(int32_arr->Value(0), 0);
+  EXPECT_EQ(int32_arr->Value(0), 10);
   for (int i = 1; i < num_records; ++i) {
     EXPECT_NE(int32_arr->Value(i), int32_arr->Value(i - 1));
   }
@@ -141,7 +141,7 @@ TEST_F(TestHash, TestBuf) {
 
   auto int64_arr = std::dynamic_pointer_cast<arrow::Int64Array>(outputs.at(1));
   EXPECT_EQ(int64_arr->null_count(), 0);
-  EXPECT_EQ(int64_arr->Value(0), 0);
+  EXPECT_EQ(int64_arr->Value(0), 10);
   for (int i = 1; i < num_records; ++i) {
     EXPECT_NE(int64_arr->Value(i), int64_arr->Value(i - 1));
   }