You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by pr...@apache.org on 2021/05/20 05:06:56 UTC
[arrow] branch master updated: ARROW-12621: [C++][Gandiva] Add
alias to sha1 and sha256 functions
This is an automated email from the ASF dual-hosted git repository.
praveenbingo pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/master by this push:
new 23b9116 ARROW-12621: [C++][Gandiva] Add alias to sha1 and sha256 functions
23b9116 is described below
commit 23b911679e6ae6f00a50398b1586b27cf4e008df
Author: Anthony Louis <an...@simbioseventures.com>
AuthorDate: Thu May 20 10:35:59 2021 +0530
ARROW-12621: [C++][Gandiva] Add alias to sha1 and sha256 functions
The names of the sha functions are **hashSHA1** and **hashSHA256**. The objective is to make the functions also being available through the **sha** and **sha1** names for SHA1 algorithms and **sha2** and **sha256** for SHA256 algorithms.
Closes #10218 from anthonylouisbsb/feature/add-alias-sha-functions and squashes the following commits:
da4f9350e <Anthony Louis> Remove SHA2 alias
559d6caaa <Anthony Louis> Add tests to check if alias are working
a101dd888 <Anthony Louis> Add alias to sha hash functions
Authored-by: Anthony Louis <an...@simbioseventures.com>
Signed-off-by: Praveen <pr...@dremio.com>
---
cpp/src/gandiva/function_registry_common.h | 12 +-
cpp/src/gandiva/tests/hash_test.cc | 188 ++++++++++++++++++++++++++++-
2 files changed, 192 insertions(+), 8 deletions(-)
diff --git a/cpp/src/gandiva/function_registry_common.h b/cpp/src/gandiva/function_registry_common.h
index d1555fb..580b2f6 100644
--- a/cpp/src/gandiva/function_registry_common.h
+++ b/cpp/src/gandiva/function_registry_common.h
@@ -213,9 +213,9 @@ typedef std::unordered_map<const FunctionSignature*, const NativeFunction*, KeyH
// - can return errors
//
// The function name includes the base name & input type name. gdv_fn_sha1_float64
-#define HASH_SHA1_NULL_NEVER(NAME, ALIASES, TYPE) \
- NativeFunction(#NAME, std::vector<std::string> ALIASES, DataTypeVector{TYPE()}, \
- utf8(), kResultNullNever, ARROW_STRINGIFY(gdv_fn_sha1_##TYPE), \
+#define HASH_SHA1_NULL_NEVER(NAME, ALIASES, TYPE) \
+ NativeFunction(#NAME, {"sha", "sha1"}, DataTypeVector{TYPE()}, utf8(), \
+ kResultNullNever, ARROW_STRINGIFY(gdv_fn_sha1_##TYPE), \
NativeFunction::kNeedsContext | NativeFunction::kCanReturnErrors)
// HashSHA256 functions that :
@@ -223,9 +223,9 @@ typedef std::unordered_map<const FunctionSignature*, const NativeFunction*, KeyH
// - can return errors
//
// The function name includes the base name & input type name. gdv_fn_sha256_float64
-#define HASH_SHA256_NULL_NEVER(NAME, ALIASES, TYPE) \
- NativeFunction(#NAME, std::vector<std::string> ALIASES, DataTypeVector{TYPE()}, \
- utf8(), kResultNullNever, ARROW_STRINGIFY(gdv_fn_sha256_##TYPE), \
+#define HASH_SHA256_NULL_NEVER(NAME, ALIASES, TYPE) \
+ NativeFunction(#NAME, {"sha256"}, DataTypeVector{TYPE()}, utf8(), kResultNullNever, \
+ ARROW_STRINGIFY(gdv_fn_sha256_##TYPE), \
NativeFunction::kNeedsContext | NativeFunction::kCanReturnErrors)
// Iterate the inner macro over all numeric types
diff --git a/cpp/src/gandiva/tests/hash_test.cc b/cpp/src/gandiva/tests/hash_test.cc
index 9f4fff8..40ebc50 100644
--- a/cpp/src/gandiva/tests/hash_test.cc
+++ b/cpp/src/gandiva/tests/hash_test.cc
@@ -15,12 +15,12 @@
// specific language governing permissions and limitations
// under the License.
+#include <gtest/gtest.h>
+
#include <sstream>
-#include <gtest/gtest.h>
#include "arrow/memory_pool.h"
#include "arrow/status.h"
-
#include "gandiva/projector.h"
#include "gandiva/tests/test_util.h"
#include "gandiva/tree_expr_builder.h"
@@ -428,4 +428,188 @@ TEST_F(TestHash, TestSha1Varlen) {
EXPECT_NE(value_at_position, response->GetScalar(i - 1).ValueOrDie()->ToString());
}
}
+
+TEST_F(TestHash, TestSha1FunctionsAlias) {
+ // schema for input fields
+ auto field_a = field("a", utf8());
+ auto field_b = field("c", int64());
+ auto field_c = field("e", float64());
+ auto schema = arrow::schema({field_a, field_b, field_c});
+
+ // output fields
+ auto res_0 = field("res0", utf8());
+ auto res_0_sha1 = field("res0sha1", utf8());
+ auto res_0_sha = field("res0sha", utf8());
+
+ auto res_1 = field("res1", utf8());
+ auto res_1_sha1 = field("res1sha1", utf8());
+ auto res_1_sha = field("res1sha", utf8());
+
+ auto res_2 = field("res2", utf8());
+ auto res_2_sha1 = field("res2_sha1", utf8());
+ auto res_2_sha = field("res2_sha", utf8());
+
+ // build expressions.
+ // hashSHA1(a)
+ auto node_a = TreeExprBuilder::MakeField(field_a);
+ auto hashSha1 = TreeExprBuilder::MakeFunction("hashSHA1", {node_a}, utf8());
+ auto expr_0 = TreeExprBuilder::MakeExpression(hashSha1, res_0);
+ auto sha1 = TreeExprBuilder::MakeFunction("sha1", {node_a}, utf8());
+ auto expr_0_sha1 = TreeExprBuilder::MakeExpression(sha1, res_0_sha1);
+ auto sha = TreeExprBuilder::MakeFunction("sha", {node_a}, utf8());
+ auto expr_0_sha = TreeExprBuilder::MakeExpression(sha, res_0_sha);
+
+ auto node_b = TreeExprBuilder::MakeField(field_b);
+ auto hashSha1_1 = TreeExprBuilder::MakeFunction("hashSHA1", {node_b}, utf8());
+ auto expr_1 = TreeExprBuilder::MakeExpression(hashSha1_1, res_1);
+ auto sha1_1 = TreeExprBuilder::MakeFunction("sha1", {node_b}, utf8());
+ auto expr_1_sha1 = TreeExprBuilder::MakeExpression(sha1_1, res_1_sha1);
+ auto sha_1 = TreeExprBuilder::MakeFunction("sha", {node_b}, utf8());
+ auto expr_1_sha = TreeExprBuilder::MakeExpression(sha_1, res_1_sha);
+
+ auto node_c = TreeExprBuilder::MakeField(field_c);
+ auto hashSha1_2 = TreeExprBuilder::MakeFunction("hashSHA1", {node_c}, utf8());
+ auto expr_2 = TreeExprBuilder::MakeExpression(hashSha1_2, res_2);
+ auto sha1_2 = TreeExprBuilder::MakeFunction("sha1", {node_c}, utf8());
+ auto expr_2_sha1 = TreeExprBuilder::MakeExpression(sha1_2, res_2_sha1);
+ auto sha_2 = TreeExprBuilder::MakeFunction("sha", {node_c}, utf8());
+ auto expr_2_sha = TreeExprBuilder::MakeExpression(sha_2, res_2_sha);
+
+ // Build a projector for the expressions.
+ std::shared_ptr<Projector> projector;
+ auto status = Projector::Make(schema,
+ {expr_0, expr_0_sha, expr_0_sha1, expr_1, expr_1_sha,
+ expr_1_sha1, expr_2, expr_2_sha, expr_2_sha1},
+ TestConfiguration(), &projector);
+ ASSERT_OK(status) << status.message();
+
+ // Create a row-batch with some sample data
+ int32_t num_records = 3;
+
+ std::string first_string =
+ "ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn\nY [ˈʏpsilɔn], "
+ "Yen [jɛn], Yoga [ˈjoːgɑ]";
+ std::string second_string =
+ "ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeın\nY [ˈʏpsilɔn], "
+ "Yen [jɛn], Yoga [ˈjoːgɑ] コンニチハ";
+
+ auto array_utf8 =
+ MakeArrowArrayUtf8({"", first_string, second_string}, {false, true, true});
+
+ auto validity_array = {false, true, true};
+
+ auto array_int64 = MakeArrowArrayInt64({1, 0, 32423}, validity_array);
+
+ auto array_float64 = MakeArrowArrayFloat64({1.0, 0.0, 324893.3849}, validity_array);
+
+ // prepare input record batch
+ auto in_batch = arrow::RecordBatch::Make(schema, num_records,
+ {array_utf8, array_int64, array_float64});
+
+ // Evaluate expression
+ arrow::ArrayVector outputs;
+ status = projector->Evaluate(*in_batch, pool_, &outputs);
+ ASSERT_OK(status);
+
+ // Checks that the response for the hashSHA1, sha and sha1 are equals for the first
+ // field of utf8 type
+ EXPECT_ARROW_ARRAY_EQUALS(outputs.at(0), outputs.at(1)); // hashSha1 and sha
+ EXPECT_ARROW_ARRAY_EQUALS(outputs.at(1), outputs.at(2)); // sha and sha1
+
+ // Checks that the response for the hashSHA1, sha and sha1 are equals for the second
+ // field of int64 type
+ EXPECT_ARROW_ARRAY_EQUALS(outputs.at(3), outputs.at(4)); // hashSha1 and sha
+ EXPECT_ARROW_ARRAY_EQUALS(outputs.at(4), outputs.at(5)); // sha and sha1
+
+ // Checks that the response for the hashSHA1, sha and sha1 are equals for the first
+ // field of float64 type
+ EXPECT_ARROW_ARRAY_EQUALS(outputs.at(6), outputs.at(7)); // hashSha1 and sha responses
+ EXPECT_ARROW_ARRAY_EQUALS(outputs.at(7), outputs.at(8)); // sha and sha1 responses
+}
+
+TEST_F(TestHash, TestSha256FunctionsAlias) {
+ // schema for input fields
+ auto field_a = field("a", utf8());
+ auto field_b = field("c", int64());
+ auto field_c = field("e", float64());
+ auto schema = arrow::schema({field_a, field_b, field_c});
+
+ // output fields
+ auto res_0 = field("res0", utf8());
+ auto res_0_sha256 = field("res0sha256", utf8());
+
+ auto res_1 = field("res1", utf8());
+ auto res_1_sha256 = field("res1sha256", utf8());
+
+ auto res_2 = field("res2", utf8());
+ auto res_2_sha256 = field("res2_sha256", utf8());
+
+ // build expressions.
+ // hashSHA1(a)
+ auto node_a = TreeExprBuilder::MakeField(field_a);
+ auto hashSha2 = TreeExprBuilder::MakeFunction("hashSHA256", {node_a}, utf8());
+ auto expr_0 = TreeExprBuilder::MakeExpression(hashSha2, res_0);
+ auto sha256 = TreeExprBuilder::MakeFunction("sha256", {node_a}, utf8());
+ auto expr_0_sha256 = TreeExprBuilder::MakeExpression(sha256, res_0_sha256);
+
+ auto node_b = TreeExprBuilder::MakeField(field_b);
+ auto hashSha2_1 = TreeExprBuilder::MakeFunction("hashSHA256", {node_b}, utf8());
+ auto expr_1 = TreeExprBuilder::MakeExpression(hashSha2_1, res_1);
+ auto sha256_1 = TreeExprBuilder::MakeFunction("sha256", {node_b}, utf8());
+ auto expr_1_sha256 = TreeExprBuilder::MakeExpression(sha256_1, res_1_sha256);
+
+ auto node_c = TreeExprBuilder::MakeField(field_c);
+ auto hashSha2_2 = TreeExprBuilder::MakeFunction("hashSHA256", {node_c}, utf8());
+ auto expr_2 = TreeExprBuilder::MakeExpression(hashSha2_2, res_2);
+ auto sha256_2 = TreeExprBuilder::MakeFunction("sha256", {node_c}, utf8());
+ auto expr_2_sha256 = TreeExprBuilder::MakeExpression(sha256_2, res_2_sha256);
+
+ // Build a projector for the expressions.
+ std::shared_ptr<Projector> projector;
+ auto status = Projector::Make(
+ schema, {expr_0, expr_0_sha256, expr_1, expr_1_sha256, expr_2, expr_2_sha256},
+ TestConfiguration(), &projector);
+ ASSERT_OK(status) << status.message();
+
+ // Create a row-batch with some sample data
+ int32_t num_records = 3;
+
+ std::string first_string =
+ "ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn\nY [ˈʏpsilɔn], "
+ "Yen [jɛn], Yoga [ˈjoːgɑ]";
+ std::string second_string =
+ "ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeın\nY [ˈʏpsilɔn], "
+ "Yen [jɛn], Yoga [ˈjoːgɑ] コンニチハ";
+
+ auto array_utf8 =
+ MakeArrowArrayUtf8({"", first_string, second_string}, {false, true, true});
+
+ auto validity_array = {false, true, true};
+
+ auto array_int64 = MakeArrowArrayInt64({1, 0, 32423}, validity_array);
+
+ auto array_float64 = MakeArrowArrayFloat64({1.0, 0.0, 324893.3849}, validity_array);
+
+ // prepare input record batch
+ auto in_batch = arrow::RecordBatch::Make(schema, num_records,
+ {array_utf8, array_int64, array_float64});
+
+ // Evaluate expression
+ arrow::ArrayVector outputs;
+ status = projector->Evaluate(*in_batch, pool_, &outputs);
+ ASSERT_OK(status);
+
+ // Checks that the response for the hashSHA2, sha256 and sha2 are equals for the first
+ // field of utf8 type
+ EXPECT_ARROW_ARRAY_EQUALS(outputs.at(0), outputs.at(1)); // hashSha2 and sha256
+
+ // Checks that the response for the hashSHA2, sha256 and sha2 are equals for the second
+ // field of int64 type
+ EXPECT_ARROW_ARRAY_EQUALS(outputs.at(2), outputs.at(3)); // hashSha2 and sha256
+
+ // Checks that the response for the hashSHA2, sha256 and sha2 are equals for the first
+ // field of float64 type
+ EXPECT_ARROW_ARRAY_EQUALS(outputs.at(4),
+ outputs.at(5)); // hashSha2 and sha256 responses
+}
} // namespace gandiva