You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by pr...@apache.org on 2021/07/06 06:50:49 UTC
[arrow] branch master updated: ARROW-12567: [C++][Gandiva]
Implement ILIKE SQL function
This is an automated email from the ASF dual-hosted git repository.
praveenbingo pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/master by this push:
new 0072c67 ARROW-12567: [C++][Gandiva] Implement ILIKE SQL function
0072c67 is described below
commit 0072c677fbbc85832fa7a90ab49daf7c1f99a373
Author: frank400 <j....@gmail.com>
AuthorDate: Tue Jul 6 12:19:05 2021 +0530
ARROW-12567: [C++][Gandiva] Implement ILIKE SQL function
Closes #10179 from jvictorhuguenin/feature/implement-sql-ilike and squashes the following commits:
f160880d2 <frank400> Optimize holder constructor call
97e6e2d83 <frank400> Remove unnecessary Make method
c2363b10f <frank400> Disable TryOptimize for ilike
a48414931 <frank400> Fix checkstyle on cmake file
c6a8372cd <frank400> Delete unnecessary holder
4be6cc611 <frank400> Fix redefined function
b78085a14 <frank400> Fix miss include
2efd43e2b <frank400> Implement ilike function
Authored-by: frank400 <j....@gmail.com>
Signed-off-by: Praveen <pr...@dremio.com>
---
cpp/src/gandiva/function_holder_registry.h | 1 +
cpp/src/gandiva/function_registry_string.cc | 4 ++
cpp/src/gandiva/gdv_function_stubs.cc | 17 +++++++
cpp/src/gandiva/gdv_function_stubs.h | 3 ++
cpp/src/gandiva/like_holder.cc | 21 ++++++++
cpp/src/gandiva/like_holder.h | 6 +++
cpp/src/gandiva/like_holder_test.cc | 75 +++++++++++++++++++++++++++--
7 files changed, 123 insertions(+), 4 deletions(-)
diff --git a/cpp/src/gandiva/function_holder_registry.h b/cpp/src/gandiva/function_holder_registry.h
index e1c5630..225c732 100644
--- a/cpp/src/gandiva/function_holder_registry.h
+++ b/cpp/src/gandiva/function_holder_registry.h
@@ -62,6 +62,7 @@ class FunctionHolderRegistry {
static map_type& makers() {
static map_type maker_map = {
{"like", LAMBDA_MAKER(LikeHolder)},
+ {"ilike", LAMBDA_MAKER(LikeHolder)},
{"to_date", LAMBDA_MAKER(ToDateHolder)},
{"random", LAMBDA_MAKER(RandomGeneratorHolder)},
{"rand", LAMBDA_MAKER(RandomGeneratorHolder)},
diff --git a/cpp/src/gandiva/function_registry_string.cc b/cpp/src/gandiva/function_registry_string.cc
index 90e2231..7491e44 100644
--- a/cpp/src/gandiva/function_registry_string.cc
+++ b/cpp/src/gandiva/function_registry_string.cc
@@ -131,6 +131,10 @@ std::vector<NativeFunction> GetStringFunctionRegistry() {
kResultNullIfNull, "gdv_fn_like_utf8_utf8_utf8",
NativeFunction::kNeedsFunctionHolder),
+ NativeFunction("ilike", {}, DataTypeVector{utf8(), utf8()}, boolean(),
+ kResultNullIfNull, "gdv_fn_ilike_utf8_utf8",
+ NativeFunction::kNeedsFunctionHolder),
+
NativeFunction("ltrim", {}, DataTypeVector{utf8(), utf8()}, utf8(),
kResultNullIfNull, "ltrim_utf8_utf8", NativeFunction::kNeedsContext),
diff --git a/cpp/src/gandiva/gdv_function_stubs.cc b/cpp/src/gandiva/gdv_function_stubs.cc
index 38c31a8..3c27804 100644
--- a/cpp/src/gandiva/gdv_function_stubs.cc
+++ b/cpp/src/gandiva/gdv_function_stubs.cc
@@ -52,6 +52,12 @@ bool gdv_fn_like_utf8_utf8_utf8(int64_t ptr, const char* data, int data_len,
return (*holder)(std::string(data, data_len));
}
+bool gdv_fn_ilike_utf8_utf8(int64_t ptr, const char* data, int data_len,
+ const char* pattern, int pattern_len) {
+ gandiva::LikeHolder* holder = reinterpret_cast<gandiva::LikeHolder*>(ptr);
+ return (*holder)(std::string(data, data_len));
+}
+
double gdv_fn_random(int64_t ptr) {
gandiva::RandomGeneratorHolder* holder =
reinterpret_cast<gandiva::RandomGeneratorHolder*>(ptr);
@@ -807,6 +813,17 @@ void ExportedStubFunctions::AddMappings(Engine* engine) const {
types->i1_type() /*return_type*/, args,
reinterpret_cast<void*>(gdv_fn_like_utf8_utf8_utf8));
+ // gdv_fn_ilike_utf8_utf8
+ args = {types->i64_type(), // int64_t ptr
+ types->i8_ptr_type(), // const char* data
+ types->i32_type(), // int data_len
+ types->i8_ptr_type(), // const char* pattern
+ types->i32_type()}; // int pattern_len
+
+ engine->AddGlobalMappingForFunc("gdv_fn_ilike_utf8_utf8",
+ types->i1_type() /*return_type*/, args,
+ reinterpret_cast<void*>(gdv_fn_ilike_utf8_utf8));
+
// gdv_fn_to_date_utf8_utf8
args = {types->i64_type(), // int64_t execution_context
types->i64_type(), // int64_t holder_ptr
diff --git a/cpp/src/gandiva/gdv_function_stubs.h b/cpp/src/gandiva/gdv_function_stubs.h
index ee22c3f..043e940 100644
--- a/cpp/src/gandiva/gdv_function_stubs.h
+++ b/cpp/src/gandiva/gdv_function_stubs.h
@@ -50,6 +50,9 @@ bool gdv_fn_like_utf8_utf8_utf8(int64_t ptr, const char* data, int data_len,
const char* pattern, int pattern_len,
const char* escape_char, int escape_char_len);
+bool gdv_fn_ilike_utf8_utf8(int64_t ptr, const char* data, int data_len,
+ const char* pattern, int pattern_len);
+
int64_t gdv_fn_to_date_utf8_utf8_int32(int64_t context, int64_t ptr, const char* data,
int data_len, bool in1_validity,
const char* pattern, int pattern_len,
diff --git a/cpp/src/gandiva/like_holder.cc b/cpp/src/gandiva/like_holder.cc
index 5a3510e..af9ac67 100644
--- a/cpp/src/gandiva/like_holder.cc
+++ b/cpp/src/gandiva/like_holder.cc
@@ -80,6 +80,13 @@ Status LikeHolder::Make(const FunctionNode& node, std::shared_ptr<LikeHolder>* h
!IsArrowStringLiteral(literal_type),
Status::Invalid(
"'like' function requires a string literal as the second parameter"));
+
+ RE2::Options regex_op;
+ if (node.descriptor()->name() == "ilike") {
+ regex_op.set_case_sensitive(false); // set case-insensitive for ilike function.
+
+ return Make(arrow::util::get<std::string>(literal->holder()), holder, regex_op);
+ }
if (node.children().size() == 2) {
return Make(arrow::util::get<std::string>(literal->holder()), holder);
} else {
@@ -132,4 +139,18 @@ Status LikeHolder::Make(const std::string& sql_pattern, const std::string& escap
return Status::OK();
}
+Status LikeHolder::Make(const std::string& sql_pattern,
+ std::shared_ptr<LikeHolder>* holder, RE2::Options regex_op) {
+ std::string pcre_pattern;
+ ARROW_RETURN_NOT_OK(RegexUtil::SqlLikePatternToPcre(sql_pattern, pcre_pattern));
+
+ std::shared_ptr<LikeHolder> lholder;
+ lholder = std::shared_ptr<LikeHolder>(new LikeHolder(pcre_pattern, regex_op));
+
+ ARROW_RETURN_IF(!lholder->regex_.ok(),
+ Status::Invalid("Building RE2 pattern '", pcre_pattern, "' failed"));
+
+ *holder = lholder;
+ return Status::OK();
+}
} // namespace gandiva
diff --git a/cpp/src/gandiva/like_holder.h b/cpp/src/gandiva/like_holder.h
index c7982e9..73e5801 100644
--- a/cpp/src/gandiva/like_holder.h
+++ b/cpp/src/gandiva/like_holder.h
@@ -42,6 +42,9 @@ class GANDIVA_EXPORT LikeHolder : public FunctionHolder {
static Status Make(const std::string& sql_pattern, const std::string& escape_char,
std::shared_ptr<LikeHolder>* holder);
+ static Status Make(const std::string& sql_pattern, std::shared_ptr<LikeHolder>* holder,
+ RE2::Options regex_op);
+
// Try and optimise a function node with a "like" pattern.
static const FunctionNode TryOptimize(const FunctionNode& node);
@@ -51,6 +54,9 @@ class GANDIVA_EXPORT LikeHolder : public FunctionHolder {
private:
explicit LikeHolder(const std::string& pattern) : pattern_(pattern), regex_(pattern) {}
+ LikeHolder(const std::string& pattern, RE2::Options regex_op)
+ : pattern_(pattern), regex_(pattern, regex_op) {}
+
std::string pattern_; // posix pattern string, to help debugging
RE2 regex_; // compiled regex for the pattern
diff --git a/cpp/src/gandiva/like_holder_test.cc b/cpp/src/gandiva/like_holder_test.cc
index 18e585f..a52533a 100644
--- a/cpp/src/gandiva/like_holder_test.cc
+++ b/cpp/src/gandiva/like_holder_test.cc
@@ -27,6 +27,7 @@ namespace gandiva {
class TestLikeHolder : public ::testing::Test {
public:
+ RE2::Options regex_op;
FunctionNode BuildLike(std::string pattern) {
auto field = std::make_shared<FieldNode>(arrow::field("in", arrow::utf8()));
auto pattern_node =
@@ -48,7 +49,7 @@ class TestLikeHolder : public ::testing::Test {
TEST_F(TestLikeHolder, TestMatchAny) {
std::shared_ptr<LikeHolder> like_holder;
- auto status = LikeHolder::Make("ab%", &like_holder);
+ auto status = LikeHolder::Make("ab%", &like_holder, regex_op);
EXPECT_EQ(status.ok(), true) << status.message();
auto& like = *like_holder;
@@ -63,7 +64,7 @@ TEST_F(TestLikeHolder, TestMatchAny) {
TEST_F(TestLikeHolder, TestMatchOne) {
std::shared_ptr<LikeHolder> like_holder;
- auto status = LikeHolder::Make("ab_", &like_holder);
+ auto status = LikeHolder::Make("ab_", &like_holder, regex_op);
EXPECT_EQ(status.ok(), true) << status.message();
auto& like = *like_holder;
@@ -78,7 +79,7 @@ TEST_F(TestLikeHolder, TestMatchOne) {
TEST_F(TestLikeHolder, TestPcreSpecial) {
std::shared_ptr<LikeHolder> like_holder;
- auto status = LikeHolder::Make(".*ab_", &like_holder);
+ auto status = LikeHolder::Make(".*ab_", &like_holder, regex_op);
EXPECT_EQ(status.ok(), true) << status.message();
auto& like = *like_holder;
@@ -97,7 +98,7 @@ TEST_F(TestLikeHolder, TestRegexEscape) {
TEST_F(TestLikeHolder, TestDot) {
std::shared_ptr<LikeHolder> like_holder;
- auto status = LikeHolder::Make("abc.", &like_holder);
+ auto status = LikeHolder::Make("abc.", &like_holder, regex_op);
EXPECT_EQ(status.ok(), true) << status.message();
auto& like = *like_holder;
@@ -211,4 +212,70 @@ TEST_F(TestLikeHolder, TestMultipleEscapeChar) {
auto status = LikeHolder::Make("ab\\_", "\\\\", &like_holder);
EXPECT_EQ(status.ok(), false) << status.message();
}
+class TestILikeHolder : public ::testing::Test {
+ public:
+ RE2::Options regex_op;
+ FunctionNode BuildILike(std::string pattern) {
+ auto field = std::make_shared<FieldNode>(arrow::field("in", arrow::utf8()));
+ auto pattern_node =
+ std::make_shared<LiteralNode>(arrow::utf8(), LiteralHolder(pattern), false);
+ return FunctionNode("ilike", {field, pattern_node}, arrow::boolean());
+ }
+};
+
+TEST_F(TestILikeHolder, TestMatchAny) {
+ std::shared_ptr<LikeHolder> like_holder;
+
+ regex_op.set_case_sensitive(false);
+ auto status = LikeHolder::Make("ab%", &like_holder, regex_op);
+ EXPECT_EQ(status.ok(), true) << status.message();
+
+ auto& like = *like_holder;
+ EXPECT_TRUE(like("ab"));
+ EXPECT_TRUE(like("aBc"));
+ EXPECT_TRUE(like("ABCD"));
+
+ EXPECT_FALSE(like("a"));
+ EXPECT_FALSE(like("cab"));
+}
+
+TEST_F(TestILikeHolder, TestMatchOne) {
+ std::shared_ptr<LikeHolder> like_holder;
+
+ regex_op.set_case_sensitive(false);
+ auto status = LikeHolder::Make("Ab_", &like_holder, regex_op);
+ EXPECT_EQ(status.ok(), true) << status.message();
+
+ auto& like = *like_holder;
+ EXPECT_TRUE(like("abc"));
+ EXPECT_TRUE(like("aBd"));
+
+ EXPECT_FALSE(like("A"));
+ EXPECT_FALSE(like("Abcd"));
+ EXPECT_FALSE(like("DaBc"));
+}
+
+TEST_F(TestILikeHolder, TestPcreSpecial) {
+ std::shared_ptr<LikeHolder> like_holder;
+
+ regex_op.set_case_sensitive(false);
+ auto status = LikeHolder::Make(".*aB_", &like_holder, regex_op);
+ EXPECT_EQ(status.ok(), true) << status.message();
+
+ auto& like = *like_holder;
+ EXPECT_TRUE(like(".*Abc")); // . and * aren't special in sql regex
+ EXPECT_FALSE(like("xxAbc"));
+}
+
+TEST_F(TestILikeHolder, TestDot) {
+ std::shared_ptr<LikeHolder> like_holder;
+
+ regex_op.set_case_sensitive(false);
+ auto status = LikeHolder::Make("aBc.", &like_holder, regex_op);
+ EXPECT_EQ(status.ok(), true) << status.message();
+
+ auto& like = *like_holder;
+ EXPECT_FALSE(like("abcd"));
+}
+
} // namespace gandiva