You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by ra...@apache.org on 2021/11/23 08:38:51 UTC
[arrow] branch master updated: ARROW-14011: [C++][Gandiva] Add elt hive function to gandiva
This is an automated email from the ASF dual-hosted git repository.
ravindra pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/master by this push:
new 5f6c92a ARROW-14011: [C++][Gandiva] Add elt hive function to gandiva
5f6c92a is described below
commit 5f6c92a8f40fb3c26ed35e0e4cb6433e1237ff57
Author: Augusto Silva <au...@hotmail.com>
AuthorDate: Tue Nov 23 14:07:10 2021 +0530
ARROW-14011: [C++][Gandiva] Add elt hive function to gandiva
Return string at index number. For example elt(2,'hello','world') returns 'world'. Returns NULL if N is less than 1 or greater than the number of arguments.
Closes #11166 from augustoasilva/feature/add-elt-function
Authored-by: Augusto Silva <au...@hotmail.com>
Signed-off-by: Pindikura Ravindra <ra...@dremio.com>
---
cpp/src/gandiva/function_registry_string.cc | 13 +++
cpp/src/gandiva/gdv_function_stubs_test.cc | 2 +-
cpp/src/gandiva/precompiled/string_ops.cc | 121 +++++++++++++++++++++++++
cpp/src/gandiva/precompiled/string_ops_test.cc | 67 ++++++++++++++
cpp/src/gandiva/precompiled/types.h | 25 +++++
cpp/src/gandiva/tests/projector_test.cc | 82 +++++++++++++++++
6 files changed, 309 insertions(+), 1 deletion(-)
diff --git a/cpp/src/gandiva/function_registry_string.cc b/cpp/src/gandiva/function_registry_string.cc
index 6fc022d..d0b6d96 100644
--- a/cpp/src/gandiva/function_registry_string.cc
+++ b/cpp/src/gandiva/function_registry_string.cc
@@ -106,6 +106,19 @@ std::vector<NativeFunction> GetStringFunctionRegistry() {
utf8(), kResultNullIfNull, "concat_ws_utf8_utf8_utf8_utf8_utf8",
NativeFunction::kNeedsContext | NativeFunction::kCanReturnErrors),
+ NativeFunction("elt", {}, DataTypeVector{int32(), utf8(), utf8()}, utf8(),
+ kResultNullInternal, "elt_int32_utf8_utf8"),
+
+ NativeFunction("elt", {}, DataTypeVector{int32(), utf8(), utf8(), utf8()}, utf8(),
+ kResultNullInternal, "elt_int32_utf8_utf8_utf8"),
+
+ NativeFunction("elt", {}, DataTypeVector{int32(), utf8(), utf8(), utf8(), utf8()},
+ utf8(), kResultNullInternal, "elt_int32_utf8_utf8_utf8_utf8"),
+
+ NativeFunction("elt", {},
+ DataTypeVector{int32(), utf8(), utf8(), utf8(), utf8(), utf8()},
+ utf8(), kResultNullInternal, "elt_int32_utf8_utf8_utf8_utf8_utf8"),
+
NativeFunction("castBIT", {"castBOOLEAN"}, DataTypeVector{utf8()}, boolean(),
kResultNullIfNull, "castBIT_utf8", NativeFunction::kNeedsContext),
diff --git a/cpp/src/gandiva/gdv_function_stubs_test.cc b/cpp/src/gandiva/gdv_function_stubs_test.cc
index f7c2198..da99cf8 100644
--- a/cpp/src/gandiva/gdv_function_stubs_test.cc
+++ b/cpp/src/gandiva/gdv_function_stubs_test.cc
@@ -19,6 +19,7 @@
#include <gmock/gmock.h>
#include <gtest/gtest.h>
+#include "arrow/util/logging.h"
#include "gandiva/execution_context.h"
@@ -765,5 +766,4 @@ TEST(TestGdvFnStubs, TestCastVarbinaryFloat8) {
::testing::HasSubstr("Failed to cast the string e to double"));
ctx.Reset();
}
-
} // namespace gandiva
diff --git a/cpp/src/gandiva/precompiled/string_ops.cc b/cpp/src/gandiva/precompiled/string_ops.cc
index 7790860..06f4635 100644
--- a/cpp/src/gandiva/precompiled/string_ops.cc
+++ b/cpp/src/gandiva/precompiled/string_ops.cc
@@ -2344,4 +2344,125 @@ const char* concat_ws_utf8_utf8_utf8_utf8_utf8(int64_t context, const char* sepa
return out;
}
+
+FORCE_INLINE
+const char* elt_int32_utf8_utf8(int32_t pos, bool pos_validity, const char* word1,
+ int32_t word1_len, bool in1_validity, const char* word2,
+ int32_t word2_len, bool in2_validity, bool* out_valid,
+ int32_t* out_len) {
+ *out_valid = true;
+
+ switch (pos) {
+ case 1:
+ *out_len = word1_len;
+ return word1;
+ break;
+ case 2:
+ *out_len = word2_len;
+ return word2;
+ break;
+ default:
+ *out_len = 0;
+ *out_valid = false;
+ return nullptr;
+ }
+}
+
+FORCE_INLINE
+const char* elt_int32_utf8_utf8_utf8(int32_t pos, bool pos_validity, const char* word1,
+ int32_t word1_len, bool word1_validity,
+ const char* word2, int32_t word2_len,
+ bool word2_validity, const char* word3,
+ int32_t word3_len, bool word3_validity,
+ bool* out_valid, int32_t* out_len) {
+ *out_valid = true;
+
+ switch (pos) {
+ case 1:
+ *out_len = word1_len;
+ return word1;
+ break;
+ case 2:
+ *out_len = word2_len;
+ return word2;
+ break;
+ case 3:
+ *out_len = word3_len;
+ return word3;
+ break;
+ default:
+ *out_len = 0;
+ *out_valid = false;
+ return nullptr;
+ }
+}
+
+FORCE_INLINE
+const char* elt_int32_utf8_utf8_utf8_utf8(
+ int32_t pos, bool pos_validity, const char* word1, int32_t word1_len,
+ bool word1_validity, const char* word2, int32_t word2_len, bool word2_validity,
+ const char* word3, int32_t word3_len, bool word3_validity, const char* word4,
+ int32_t word4_len, bool word4_validity, bool* out_valid, int32_t* out_len) {
+ *out_valid = true;
+
+ switch (pos) {
+ case 1:
+ *out_len = word1_len;
+ return word1;
+ break;
+ case 2:
+ *out_len = word2_len;
+ return word2;
+ break;
+ case 3:
+ *out_len = word3_len;
+ return word3;
+ break;
+ case 4:
+ *out_len = word4_len;
+ return word4;
+ break;
+ default:
+ *out_len = 0;
+ *out_valid = false;
+ return nullptr;
+ }
+}
+
+FORCE_INLINE
+const char* elt_int32_utf8_utf8_utf8_utf8_utf8(
+ int32_t pos, bool pos_validity, const char* word1, int32_t word1_len,
+ bool word1_validity, const char* word2, int32_t word2_len, bool word2_validity,
+ const char* word3, int32_t word3_len, bool word3_validity, const char* word4,
+ int32_t word4_len, bool word4_validity, const char* word5, int32_t word5_len,
+ bool word5_validity, bool* out_valid, int32_t* out_len) {
+ *out_valid = true;
+
+ switch (pos) {
+ case 1:
+ *out_len = word1_len;
+ return word1;
+ break;
+ case 2:
+ *out_len = word2_len;
+ return word2;
+ break;
+ case 3:
+ *out_len = word3_len;
+ return word3;
+ break;
+ case 4:
+ *out_len = word4_len;
+ return word4;
+ break;
+ case 5:
+ *out_len = word5_len;
+ return word5;
+ break;
+ default:
+ *out_len = 0;
+ *out_valid = false;
+ return nullptr;
+ }
+}
} // extern "C"
diff --git a/cpp/src/gandiva/precompiled/string_ops_test.cc b/cpp/src/gandiva/precompiled/string_ops_test.cc
index 15ab5b6..e4e4a7d 100644
--- a/cpp/src/gandiva/precompiled/string_ops_test.cc
+++ b/cpp/src/gandiva/precompiled/string_ops_test.cc
@@ -1805,4 +1805,71 @@ TEST(TestStringOps, TestConcatWs) {
EXPECT_EQ(std::string(out, out_len), "");
}
+TEST(TestStringOps, TestEltFunction) {
+ // gandiva::ExecutionContext ctx;
+ // int64_t ctx_ptr = reinterpret_cast<int64_t>(&ctx);
+ gdv_int32 out_len = 0;
+ bool out_vality = false;
+
+ const char* word1 = "john";
+ auto word1_len = static_cast<int32_t>(strlen(word1));
+ const char* word2 = "";
+ auto word2_len = static_cast<int32_t>(strlen(word2));
+ auto out_string = elt_int32_utf8_utf8(1, true, word1, word1_len, true, word2, word2_len,
+ true, &out_vality, &out_len);
+ EXPECT_EQ("john", std::string(out_string, out_len));
+ EXPECT_EQ(out_vality, true);
+
+ word1 = "hello";
+ word1_len = static_cast<int32_t>(strlen(word1));
+ word2 = "world";
+ word2_len = static_cast<int32_t>(strlen(word2));
+ out_string = elt_int32_utf8_utf8(2, true, word1, word1_len, true, word2, word2_len,
+ true, &out_vality, &out_len);
+ EXPECT_EQ("world", std::string(out_string, out_len));
+ EXPECT_EQ(out_vality, true);
+
+ word1 = "goodbye";
+ word1_len = static_cast<int32_t>(strlen(word1));
+ word2 = "world";
+ word2_len = static_cast<int32_t>(strlen(word2));
+ out_string = elt_int32_utf8_utf8(4, true, word1, word1_len, true, word2, word2_len,
+ true, &out_vality, &out_len);
+ EXPECT_EQ("", std::string(out_string, out_len));
+ EXPECT_EQ(out_vality, false);
+
+ word1 = "hi";
+ word1_len = static_cast<int32_t>(strlen(word1));
+ word2 = "yeah";
+ word2_len = static_cast<int32_t>(strlen(word2));
+ out_string = elt_int32_utf8_utf8(0, true, word1, word1_len, true, word2, word2_len,
+ true, &out_vality, &out_len);
+ EXPECT_EQ("", std::string(out_string, out_len));
+ EXPECT_EQ(out_vality, false);
+
+ const char* word3 = "wow";
+ auto word3_len = static_cast<int32_t>(strlen(word3));
+ out_string =
+ elt_int32_utf8_utf8_utf8(3, true, word1, word1_len, true, word2, word2_len, true,
+ word3, word3_len, true, &out_vality, &out_len);
+ EXPECT_EQ("wow", std::string(out_string, out_len));
+ EXPECT_EQ(out_vality, true);
+
+ const char* word4 = "awesome";
+ auto word4_len = static_cast<int32_t>(strlen(word4));
+ out_string = elt_int32_utf8_utf8_utf8_utf8(
+ 4, true, word1, word1_len, true, word2, word2_len, true, word3, word3_len, true,
+ word4, word4_len, true, &out_vality, &out_len);
+ EXPECT_EQ("awesome", std::string(out_string, out_len));
+ EXPECT_EQ(out_vality, true);
+
+ const char* word5 = "not-empty";
+ auto word5_len = static_cast<int32_t>(strlen(word5));
+ out_string = elt_int32_utf8_utf8_utf8_utf8_utf8(
+ 5, true, word1, word1_len, true, word2, word2_len, true, word3, word3_len, true,
+ word4, word4_len, true, word5, word5_len, true, &out_vality, &out_len);
+ EXPECT_EQ("not-empty", std::string(out_string, out_len));
+ EXPECT_EQ(out_vality, true);
+}
+
} // namespace gandiva
diff --git a/cpp/src/gandiva/precompiled/types.h b/cpp/src/gandiva/precompiled/types.h
index a35d3d3..1a35741 100644
--- a/cpp/src/gandiva/precompiled/types.h
+++ b/cpp/src/gandiva/precompiled/types.h
@@ -616,4 +616,29 @@ const char* concat_ws_utf8_utf8_utf8_utf8_utf8(int64_t context, const char* sepa
int32_t word4_len, const char* word5,
int32_t word5_len, int32_t* out_len);
+const char* elt_int32_utf8_utf8(int32_t pos, bool pos_validity, const char* word1,
+ int32_t word1_len, bool in1_validity, const char* word2,
+ int32_t word2_len, bool in2_validity, bool* out_valid,
+ int32_t* out_len);
+
+const char* elt_int32_utf8_utf8_utf8(int32_t pos, bool pos_validity, const char* word1,
+ int32_t word1_len, bool word1_validity,
+ const char* word2, int32_t word2_len,
+ bool word2_validity, const char* word3,
+ int32_t word3_len, bool word3_validity,
+ bool* out_valid, int32_t* out_len);
+
+const char* elt_int32_utf8_utf8_utf8_utf8(
+ int32_t pos, bool pos_validity, const char* word1, int32_t word1_len,
+ bool word1_validity, const char* word2, int32_t word2_len, bool word2_validity,
+ const char* word3, int32_t word3_len, bool word3_validity, const char* word4,
+ int32_t word4_len, bool word4_validity, bool* out_valid, int32_t* out_len);
+
+const char* elt_int32_utf8_utf8_utf8_utf8_utf8(
+ int32_t pos, bool pos_validity, const char* word1, int32_t word1_len,
+ bool word1_validity, const char* word2, int32_t word2_len, bool word2_validity,
+ const char* word3, int32_t word3_len, bool word3_validity, const char* word4,
+ int32_t word4_len, bool word4_validity, const char* word5, int32_t word5_len,
+ bool word5_validity, bool* out_valid, int32_t* out_len);
+
} // extern "C"
diff --git a/cpp/src/gandiva/tests/projector_test.cc b/cpp/src/gandiva/tests/projector_test.cc
index 945b930..8046c4e 100644
--- a/cpp/src/gandiva/tests/projector_test.cc
+++ b/cpp/src/gandiva/tests/projector_test.cc
@@ -1683,4 +1683,86 @@ TEST_F(TestProjector, TestConcatWsFunction) {
EXPECT_ARROW_ARRAY_EQUALS(expected_out0, outputs.at(0));
}
+TEST_F(TestProjector, TestEltFunction) {
+ auto field0 = field("f0", arrow::int32());
+ auto field1 = field("f1", arrow::utf8());
+ auto field2 = field("f2", arrow::utf8());
+
+ auto schema = arrow::schema({field0, field1, field2});
+
+ // output fields
+ auto out_field = field("out", arrow::utf8());
+
+ // Build expression
+ auto elt_expr =
+ TreeExprBuilder::MakeExpression("elt", {field0, field1, field2}, out_field);
+
+ std::shared_ptr<Projector> projector0;
+ auto status = Projector::Make(schema, {elt_expr}, TestConfiguration(), &projector0);
+ EXPECT_TRUE(status.ok());
+
+ // Create a row-batch with some sample data
+ int num_records = 4;
+
+ auto array0 = MakeArrowArrayInt32({1, 2, 2, 1}, {true, true, true, true});
+ auto array1 =
+ MakeArrowArrayUtf8({"john", "bigger", "goodbye", "hi"}, {true, true, true, true});
+ auto array2 =
+ MakeArrowArrayUtf8({"doe", "world", "world", "yeah"}, {true, true, true, true});
+ auto in_batch0 =
+ arrow::RecordBatch::Make(schema, num_records, {array0, array1, array2});
+
+ auto expected_out0 =
+ MakeArrowArrayUtf8({"john", "world", "world", "hi"}, {true, true, true, true});
+
+ arrow::ArrayVector outputs;
+
+ // Evaluate expression
+ status = projector0->Evaluate(*in_batch0, pool_, &outputs);
+ EXPECT_TRUE(status.ok());
+ EXPECT_ARROW_ARRAY_EQUALS(expected_out0, outputs.at(0));
+
+ std::shared_ptr<Projector> projector1;
+ status = Projector::Make(schema, {elt_expr}, TestConfiguration(), &projector1);
+
+ auto array3 = MakeArrowArrayInt32({1, 1, 1, 1}, {true, true, true, true});
+ auto array4 =
+ MakeArrowArrayUtf8({"inconsequential", "insignificant", "welcome", "dependencies"},
+ {true, true, true, true});
+ auto array5 =
+ MakeArrowArrayUtf8({"wrong", "tiny", "hi", "deps"}, {true, true, true, true});
+ auto in_batch1 =
+ arrow::RecordBatch::Make(schema, num_records, {array3, array4, array5});
+
+ auto expected_out1 =
+ MakeArrowArrayUtf8({"inconsequential", "insignificant", "welcome", "dependencies"},
+ {true, true, true, true});
+
+ arrow::ArrayVector outputs1;
+
+ status = projector1->Evaluate(*in_batch1, pool_, &outputs1);
+ EXPECT_TRUE(status.ok());
+ EXPECT_ARROW_ARRAY_EQUALS(expected_out1, outputs1.at(0));
+
+ std::shared_ptr<Projector> projector2;
+ status = Projector::Make(schema, {elt_expr}, TestConfiguration(), &projector2);
+
+ auto array6 = MakeArrowArrayInt32({2, 2, 2, 2}, {true, true, true, true});
+ auto array7 =
+ MakeArrowArrayUtf8({"inconsequential", "insignificant", "welcome", "dependencies"},
+ {true, true, true, true});
+ auto array8 =
+ MakeArrowArrayUtf8({"wrong", "tiny", "hi", "deps"}, {true, true, true, true});
+ auto in_batch2 =
+ arrow::RecordBatch::Make(schema, num_records, {array6, array7, array8});
+
+ auto expected_out2 =
+ MakeArrowArrayUtf8({"wrong", "tiny", "hi", "deps"}, {true, true, true, true});
+
+ arrow::ArrayVector outputs2;
+ status = projector2->Evaluate(*in_batch2, pool_, &outputs2);
+ EXPECT_TRUE(status.ok());
+ EXPECT_ARROW_ARRAY_EQUALS(expected_out2, outputs2.at(0));
+}
+
} // namespace gandiva