You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by ra...@apache.org on 2021/11/23 08:38:51 UTC

[arrow] branch master updated: ARROW-14011: [C++][Gandiva] Add elt hive function to gandiva

This is an automated email from the ASF dual-hosted git repository.

ravindra pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/master by this push:
     new 5f6c92a  ARROW-14011: [C++][Gandiva] Add elt hive function to gandiva
5f6c92a is described below

commit 5f6c92a8f40fb3c26ed35e0e4cb6433e1237ff57
Author: Augusto Silva <au...@hotmail.com>
AuthorDate: Tue Nov 23 14:07:10 2021 +0530

    ARROW-14011: [C++][Gandiva] Add elt hive function to gandiva
    
    Return string at index number. For example elt(2,'hello','world') returns 'world'. Returns NULL if N is less than 1 or greater than the number of arguments.
    
    Closes #11166 from augustoasilva/feature/add-elt-function
    
    Authored-by: Augusto Silva <au...@hotmail.com>
    Signed-off-by: Pindikura Ravindra <ra...@dremio.com>
---
 cpp/src/gandiva/function_registry_string.cc    |  13 +++
 cpp/src/gandiva/gdv_function_stubs_test.cc     |   2 +-
 cpp/src/gandiva/precompiled/string_ops.cc      | 121 +++++++++++++++++++++++++
 cpp/src/gandiva/precompiled/string_ops_test.cc |  67 ++++++++++++++
 cpp/src/gandiva/precompiled/types.h            |  25 +++++
 cpp/src/gandiva/tests/projector_test.cc        |  82 +++++++++++++++++
 6 files changed, 309 insertions(+), 1 deletion(-)

diff --git a/cpp/src/gandiva/function_registry_string.cc b/cpp/src/gandiva/function_registry_string.cc
index 6fc022d..d0b6d96 100644
--- a/cpp/src/gandiva/function_registry_string.cc
+++ b/cpp/src/gandiva/function_registry_string.cc
@@ -106,6 +106,19 @@ std::vector<NativeFunction> GetStringFunctionRegistry() {
                      utf8(), kResultNullIfNull, "concat_ws_utf8_utf8_utf8_utf8_utf8",
                      NativeFunction::kNeedsContext | NativeFunction::kCanReturnErrors),
 
+      NativeFunction("elt", {}, DataTypeVector{int32(), utf8(), utf8()}, utf8(),
+                     kResultNullInternal, "elt_int32_utf8_utf8"),
+
+      NativeFunction("elt", {}, DataTypeVector{int32(), utf8(), utf8(), utf8()}, utf8(),
+                     kResultNullInternal, "elt_int32_utf8_utf8_utf8"),
+
+      NativeFunction("elt", {}, DataTypeVector{int32(), utf8(), utf8(), utf8(), utf8()},
+                     utf8(), kResultNullInternal, "elt_int32_utf8_utf8_utf8_utf8"),
+
+      NativeFunction("elt", {},
+                     DataTypeVector{int32(), utf8(), utf8(), utf8(), utf8(), utf8()},
+                     utf8(), kResultNullInternal, "elt_int32_utf8_utf8_utf8_utf8_utf8"),
+
       NativeFunction("castBIT", {"castBOOLEAN"}, DataTypeVector{utf8()}, boolean(),
                      kResultNullIfNull, "castBIT_utf8", NativeFunction::kNeedsContext),
 
diff --git a/cpp/src/gandiva/gdv_function_stubs_test.cc b/cpp/src/gandiva/gdv_function_stubs_test.cc
index f7c2198..da99cf8 100644
--- a/cpp/src/gandiva/gdv_function_stubs_test.cc
+++ b/cpp/src/gandiva/gdv_function_stubs_test.cc
@@ -19,6 +19,7 @@
 
 #include <gmock/gmock.h>
 #include <gtest/gtest.h>
+#include "arrow/util/logging.h"
 
 #include "gandiva/execution_context.h"
 
@@ -765,5 +766,4 @@ TEST(TestGdvFnStubs, TestCastVarbinaryFloat8) {
               ::testing::HasSubstr("Failed to cast the string e to double"));
   ctx.Reset();
 }
-
 }  // namespace gandiva
diff --git a/cpp/src/gandiva/precompiled/string_ops.cc b/cpp/src/gandiva/precompiled/string_ops.cc
index 7790860..06f4635 100644
--- a/cpp/src/gandiva/precompiled/string_ops.cc
+++ b/cpp/src/gandiva/precompiled/string_ops.cc
@@ -2344,4 +2344,125 @@ const char* concat_ws_utf8_utf8_utf8_utf8_utf8(int64_t context, const char* sepa
 
   return out;
 }
+
+FORCE_INLINE
+const char* elt_int32_utf8_utf8(int32_t pos, bool pos_validity, const char* word1,
+                                int32_t word1_len, bool in1_validity, const char* word2,
+                                int32_t word2_len, bool in2_validity, bool* out_valid,
+                                int32_t* out_len) {
+  *out_valid = true;
+
+  switch (pos) {
+    case 1:
+      *out_len = word1_len;
+      return word1;
+      break;
+    case 2:
+      *out_len = word2_len;
+      return word2;
+      break;
+    default:
+      *out_len = 0;
+      *out_valid = false;
+      return nullptr;
+  }
+}
+
+FORCE_INLINE
+const char* elt_int32_utf8_utf8_utf8(int32_t pos, bool pos_validity, const char* word1,
+                                     int32_t word1_len, bool word1_validity,
+                                     const char* word2, int32_t word2_len,
+                                     bool word2_validity, const char* word3,
+                                     int32_t word3_len, bool word3_validity,
+                                     bool* out_valid, int32_t* out_len) {
+  *out_valid = true;
+
+  switch (pos) {
+    case 1:
+      *out_len = word1_len;
+      return word1;
+      break;
+    case 2:
+      *out_len = word2_len;
+      return word2;
+      break;
+    case 3:
+      *out_len = word3_len;
+      return word3;
+      break;
+    default:
+      *out_len = 0;
+      *out_valid = false;
+      return nullptr;
+  }
+}
+
+FORCE_INLINE
+const char* elt_int32_utf8_utf8_utf8_utf8(
+    int32_t pos, bool pos_validity, const char* word1, int32_t word1_len,
+    bool word1_validity, const char* word2, int32_t word2_len, bool word2_validity,
+    const char* word3, int32_t word3_len, bool word3_validity, const char* word4,
+    int32_t word4_len, bool word4_validity, bool* out_valid, int32_t* out_len) {
+  *out_valid = true;
+
+  switch (pos) {
+    case 1:
+      *out_len = word1_len;
+      return word1;
+      break;
+    case 2:
+      *out_len = word2_len;
+      return word2;
+      break;
+    case 3:
+      *out_len = word3_len;
+      return word3;
+      break;
+    case 4:
+      *out_len = word4_len;
+      return word4;
+      break;
+    default:
+      *out_len = 0;
+      *out_valid = false;
+      return nullptr;
+  }
+}
+
+FORCE_INLINE
+const char* elt_int32_utf8_utf8_utf8_utf8_utf8(
+    int32_t pos, bool pos_validity, const char* word1, int32_t word1_len,
+    bool word1_validity, const char* word2, int32_t word2_len, bool word2_validity,
+    const char* word3, int32_t word3_len, bool word3_validity, const char* word4,
+    int32_t word4_len, bool word4_validity, const char* word5, int32_t word5_len,
+    bool word5_validity, bool* out_valid, int32_t* out_len) {
+  *out_valid = true;
+
+  switch (pos) {
+    case 1:
+      *out_len = word1_len;
+      return word1;
+      break;
+    case 2:
+      *out_len = word2_len;
+      return word2;
+      break;
+    case 3:
+      *out_len = word3_len;
+      return word3;
+      break;
+    case 4:
+      *out_len = word4_len;
+      return word4;
+      break;
+    case 5:
+      *out_len = word5_len;
+      return word5;
+      break;
+    default:
+      *out_len = 0;
+      *out_valid = false;
+      return nullptr;
+  }
+}
 }  // extern "C"
diff --git a/cpp/src/gandiva/precompiled/string_ops_test.cc b/cpp/src/gandiva/precompiled/string_ops_test.cc
index 15ab5b6..e4e4a7d 100644
--- a/cpp/src/gandiva/precompiled/string_ops_test.cc
+++ b/cpp/src/gandiva/precompiled/string_ops_test.cc
@@ -1805,4 +1805,71 @@ TEST(TestStringOps, TestConcatWs) {
   EXPECT_EQ(std::string(out, out_len), "");
 }
 
+TEST(TestStringOps, TestEltFunction) {
+  //  gandiva::ExecutionContext ctx;
+  //  int64_t ctx_ptr = reinterpret_cast<int64_t>(&ctx);
+  gdv_int32 out_len = 0;
+  bool out_vality = false;
+
+  const char* word1 = "john";
+  auto word1_len = static_cast<int32_t>(strlen(word1));
+  const char* word2 = "";
+  auto word2_len = static_cast<int32_t>(strlen(word2));
+  auto out_string = elt_int32_utf8_utf8(1, true, word1, word1_len, true, word2, word2_len,
+                                        true, &out_vality, &out_len);
+  EXPECT_EQ("john", std::string(out_string, out_len));
+  EXPECT_EQ(out_vality, true);
+
+  word1 = "hello";
+  word1_len = static_cast<int32_t>(strlen(word1));
+  word2 = "world";
+  word2_len = static_cast<int32_t>(strlen(word2));
+  out_string = elt_int32_utf8_utf8(2, true, word1, word1_len, true, word2, word2_len,
+                                   true, &out_vality, &out_len);
+  EXPECT_EQ("world", std::string(out_string, out_len));
+  EXPECT_EQ(out_vality, true);
+
+  word1 = "goodbye";
+  word1_len = static_cast<int32_t>(strlen(word1));
+  word2 = "world";
+  word2_len = static_cast<int32_t>(strlen(word2));
+  out_string = elt_int32_utf8_utf8(4, true, word1, word1_len, true, word2, word2_len,
+                                   true, &out_vality, &out_len);
+  EXPECT_EQ("", std::string(out_string, out_len));
+  EXPECT_EQ(out_vality, false);
+
+  word1 = "hi";
+  word1_len = static_cast<int32_t>(strlen(word1));
+  word2 = "yeah";
+  word2_len = static_cast<int32_t>(strlen(word2));
+  out_string = elt_int32_utf8_utf8(0, true, word1, word1_len, true, word2, word2_len,
+                                   true, &out_vality, &out_len);
+  EXPECT_EQ("", std::string(out_string, out_len));
+  EXPECT_EQ(out_vality, false);
+
+  const char* word3 = "wow";
+  auto word3_len = static_cast<int32_t>(strlen(word3));
+  out_string =
+      elt_int32_utf8_utf8_utf8(3, true, word1, word1_len, true, word2, word2_len, true,
+                               word3, word3_len, true, &out_vality, &out_len);
+  EXPECT_EQ("wow", std::string(out_string, out_len));
+  EXPECT_EQ(out_vality, true);
+
+  const char* word4 = "awesome";
+  auto word4_len = static_cast<int32_t>(strlen(word4));
+  out_string = elt_int32_utf8_utf8_utf8_utf8(
+      4, true, word1, word1_len, true, word2, word2_len, true, word3, word3_len, true,
+      word4, word4_len, true, &out_vality, &out_len);
+  EXPECT_EQ("awesome", std::string(out_string, out_len));
+  EXPECT_EQ(out_vality, true);
+
+  const char* word5 = "not-empty";
+  auto word5_len = static_cast<int32_t>(strlen(word5));
+  out_string = elt_int32_utf8_utf8_utf8_utf8_utf8(
+      5, true, word1, word1_len, true, word2, word2_len, true, word3, word3_len, true,
+      word4, word4_len, true, word5, word5_len, true, &out_vality, &out_len);
+  EXPECT_EQ("not-empty", std::string(out_string, out_len));
+  EXPECT_EQ(out_vality, true);
+}
+
 }  // namespace gandiva
diff --git a/cpp/src/gandiva/precompiled/types.h b/cpp/src/gandiva/precompiled/types.h
index a35d3d3..1a35741 100644
--- a/cpp/src/gandiva/precompiled/types.h
+++ b/cpp/src/gandiva/precompiled/types.h
@@ -616,4 +616,29 @@ const char* concat_ws_utf8_utf8_utf8_utf8_utf8(int64_t context, const char* sepa
                                                int32_t word4_len, const char* word5,
                                                int32_t word5_len, int32_t* out_len);
 
+const char* elt_int32_utf8_utf8(int32_t pos, bool pos_validity, const char* word1,
+                                int32_t word1_len, bool in1_validity, const char* word2,
+                                int32_t word2_len, bool in2_validity, bool* out_valid,
+                                int32_t* out_len);
+
+const char* elt_int32_utf8_utf8_utf8(int32_t pos, bool pos_validity, const char* word1,
+                                     int32_t word1_len, bool word1_validity,
+                                     const char* word2, int32_t word2_len,
+                                     bool word2_validity, const char* word3,
+                                     int32_t word3_len, bool word3_validity,
+                                     bool* out_valid, int32_t* out_len);
+
+const char* elt_int32_utf8_utf8_utf8_utf8(
+    int32_t pos, bool pos_validity, const char* word1, int32_t word1_len,
+    bool word1_validity, const char* word2, int32_t word2_len, bool word2_validity,
+    const char* word3, int32_t word3_len, bool word3_validity, const char* word4,
+    int32_t word4_len, bool word4_validity, bool* out_valid, int32_t* out_len);
+
+const char* elt_int32_utf8_utf8_utf8_utf8_utf8(
+    int32_t pos, bool pos_validity, const char* word1, int32_t word1_len,
+    bool word1_validity, const char* word2, int32_t word2_len, bool word2_validity,
+    const char* word3, int32_t word3_len, bool word3_validity, const char* word4,
+    int32_t word4_len, bool word4_validity, const char* word5, int32_t word5_len,
+    bool word5_validity, bool* out_valid, int32_t* out_len);
+
 }  // extern "C"
diff --git a/cpp/src/gandiva/tests/projector_test.cc b/cpp/src/gandiva/tests/projector_test.cc
index 945b930..8046c4e 100644
--- a/cpp/src/gandiva/tests/projector_test.cc
+++ b/cpp/src/gandiva/tests/projector_test.cc
@@ -1683,4 +1683,86 @@ TEST_F(TestProjector, TestConcatWsFunction) {
   EXPECT_ARROW_ARRAY_EQUALS(expected_out0, outputs.at(0));
 }
 
+TEST_F(TestProjector, TestEltFunction) {
+  auto field0 = field("f0", arrow::int32());
+  auto field1 = field("f1", arrow::utf8());
+  auto field2 = field("f2", arrow::utf8());
+
+  auto schema = arrow::schema({field0, field1, field2});
+
+  // output fields
+  auto out_field = field("out", arrow::utf8());
+
+  // Build expression
+  auto elt_expr =
+      TreeExprBuilder::MakeExpression("elt", {field0, field1, field2}, out_field);
+
+  std::shared_ptr<Projector> projector0;
+  auto status = Projector::Make(schema, {elt_expr}, TestConfiguration(), &projector0);
+  EXPECT_TRUE(status.ok());
+
+  // Create a row-batch with some sample data
+  int num_records = 4;
+
+  auto array0 = MakeArrowArrayInt32({1, 2, 2, 1}, {true, true, true, true});
+  auto array1 =
+      MakeArrowArrayUtf8({"john", "bigger", "goodbye", "hi"}, {true, true, true, true});
+  auto array2 =
+      MakeArrowArrayUtf8({"doe", "world", "world", "yeah"}, {true, true, true, true});
+  auto in_batch0 =
+      arrow::RecordBatch::Make(schema, num_records, {array0, array1, array2});
+
+  auto expected_out0 =
+      MakeArrowArrayUtf8({"john", "world", "world", "hi"}, {true, true, true, true});
+
+  arrow::ArrayVector outputs;
+
+  // Evaluate expression
+  status = projector0->Evaluate(*in_batch0, pool_, &outputs);
+  EXPECT_TRUE(status.ok());
+  EXPECT_ARROW_ARRAY_EQUALS(expected_out0, outputs.at(0));
+
+  std::shared_ptr<Projector> projector1;
+  status = Projector::Make(schema, {elt_expr}, TestConfiguration(), &projector1);
+
+  auto array3 = MakeArrowArrayInt32({1, 1, 1, 1}, {true, true, true, true});
+  auto array4 =
+      MakeArrowArrayUtf8({"inconsequential", "insignificant", "welcome", "dependencies"},
+                         {true, true, true, true});
+  auto array5 =
+      MakeArrowArrayUtf8({"wrong", "tiny", "hi", "deps"}, {true, true, true, true});
+  auto in_batch1 =
+      arrow::RecordBatch::Make(schema, num_records, {array3, array4, array5});
+
+  auto expected_out1 =
+      MakeArrowArrayUtf8({"inconsequential", "insignificant", "welcome", "dependencies"},
+                         {true, true, true, true});
+
+  arrow::ArrayVector outputs1;
+
+  status = projector1->Evaluate(*in_batch1, pool_, &outputs1);
+  EXPECT_TRUE(status.ok());
+  EXPECT_ARROW_ARRAY_EQUALS(expected_out1, outputs1.at(0));
+
+  std::shared_ptr<Projector> projector2;
+  status = Projector::Make(schema, {elt_expr}, TestConfiguration(), &projector2);
+
+  auto array6 = MakeArrowArrayInt32({2, 2, 2, 2}, {true, true, true, true});
+  auto array7 =
+      MakeArrowArrayUtf8({"inconsequential", "insignificant", "welcome", "dependencies"},
+                         {true, true, true, true});
+  auto array8 =
+      MakeArrowArrayUtf8({"wrong", "tiny", "hi", "deps"}, {true, true, true, true});
+  auto in_batch2 =
+      arrow::RecordBatch::Make(schema, num_records, {array6, array7, array8});
+
+  auto expected_out2 =
+      MakeArrowArrayUtf8({"wrong", "tiny", "hi", "deps"}, {true, true, true, true});
+
+  arrow::ArrayVector outputs2;
+  status = projector2->Evaluate(*in_batch2, pool_, &outputs2);
+  EXPECT_TRUE(status.ok());
+  EXPECT_ARROW_ARRAY_EQUALS(expected_out2, outputs2.at(0));
+}
+
 }  // namespace gandiva