You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by mi...@apache.org on 2024/01/04 18:42:03 UTC
(impala) 02/03: IMPALA-12581: Fix issue of ILIKE and IREGEXP not working correctly with non-const pattern
This is an automated email from the ASF dual-hosted git repository.
michaelsmith pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git
commit 12f2026675f6de42d44310c57fe7037f3dc82f46
Author: Eyizoha <ey...@163.com>
AuthorDate: Wed Dec 13 11:17:42 2023 +0800
IMPALA-12581: Fix issue of ILIKE and IREGEXP not working correctly with non-const pattern
This patch fixes the issue where ILIKE and IREGEXP couldn't ignore case
when encountering non-constant patterns.
For example, 'SELECT 'ABC' ILIKE pattern FROM tbl' would return false
when the pattern in tbl is '%b%'.
Tests:
- Add TestNonConstPatternILike to test_exprs.py to verify the
effectiveness of the fixing.
Change-Id: I3d66680f5a7660e6a41859754c4230f276e66712
Reviewed-on: http://gerrit.cloudera.org:8080/20785
Reviewed-by: Impala Public Jenkins <im...@cloudera.com>
Tested-by: Impala Public Jenkins <im...@cloudera.com>
---
be/src/exprs/like-predicate.cc | 10 +++++++---
be/src/exprs/like-predicate.h | 3 +++
tests/query_test/test_exprs.py | 28 ++++++++++++++++++++++++++++
3 files changed, 38 insertions(+), 3 deletions(-)
diff --git a/be/src/exprs/like-predicate.cc b/be/src/exprs/like-predicate.cc
index 74271012b..993f656c4 100644
--- a/be/src/exprs/like-predicate.cc
+++ b/be/src/exprs/like-predicate.cc
@@ -64,8 +64,9 @@ void LikePredicate::LikePrepareInternal(FunctionContext* context,
FunctionContext::FunctionStateScope scope, bool case_sensitive) {
if (scope != FunctionContext::THREAD_LOCAL) return;
LikePredicateState* state = new LikePredicateState();
- state->function_ = LikeFn;
context->SetFunctionState(scope, state);
+ state->function_ = LikeFn;
+ state->case_sensitive_ = case_sensitive;
if (context->IsArgConstant(1)) {
StringVal pattern_val = *reinterpret_cast<StringVal*>(context->GetConstantArg(1));
if (pattern_val.is_null) return;
@@ -136,6 +137,7 @@ void LikePredicate::RegexPrepareInternal(FunctionContext* context,
LikePredicateState* state = new LikePredicateState();
context->SetFunctionState(scope, state);
state->function_ = RegexFn;
+ state->case_sensitive_ = case_sensitive;
if (context->IsArgConstant(1)) {
StringVal* pattern = reinterpret_cast<StringVal*>(context->GetConstantArg(1));
if (pattern->is_null) return;
@@ -330,9 +332,10 @@ BooleanVal LikePredicate::RegexMatch(FunctionContext* context,
const StringVal& operand_value, const StringVal& pattern_value,
bool is_like_pattern) {
if (operand_value.is_null || pattern_value.is_null) return BooleanVal::null();
+
+ LikePredicateState* state = reinterpret_cast<LikePredicateState*>(
+ context->GetFunctionState(FunctionContext::THREAD_LOCAL));
if (context->IsArgConstant(1)) {
- LikePredicateState* state = reinterpret_cast<LikePredicateState*>(
- context->GetFunctionState(FunctionContext::THREAD_LOCAL));
if (is_like_pattern) {
return RE2::FullMatch(re2::StringPiece(reinterpret_cast<const char*>(
operand_value.ptr), operand_value.len), *state->regex_.get());
@@ -343,6 +346,7 @@ BooleanVal LikePredicate::RegexMatch(FunctionContext* context,
} else {
string re_pattern;
RE2::Options opts;
+ opts.set_case_sensitive(state->case_sensitive_);
StringFunctions::SetRE2MemOpt(&opts);
if (is_like_pattern) {
ConvertLikePattern(context, pattern_value, &re_pattern);
diff --git a/be/src/exprs/like-predicate.h b/be/src/exprs/like-predicate.h
index 2a7cb7143..d4dc36c30 100644
--- a/be/src/exprs/like-predicate.h
+++ b/be/src/exprs/like-predicate.h
@@ -79,6 +79,9 @@ class LikePredicate: public Predicate {
/// Used for RLIKE and REGEXP predicates if the pattern is a constant argument.
boost::scoped_ptr<re2::RE2> regex_;
+ /// Used for ILIKE and IREGEXP predicates if the pattern is not a constant argument.
+ bool case_sensitive_;
+
LikePredicateState() : escape_char_('\\') {
}
diff --git a/tests/query_test/test_exprs.py b/tests/query_test/test_exprs.py
index 569e584e8..9558fb172 100644
--- a/tests/query_test/test_exprs.py
+++ b/tests/query_test/test_exprs.py
@@ -290,3 +290,31 @@ class TestConstantFoldingNoTypeLoss(ImpalaTestSuite):
query = "select typeof(cast(1 as bigint) + cast(rand() as tinyint))"
result = self.execute_query_expect_success(self.client, query)
assert result.data == ["BIGINT"]
+
+
+class TestNonConstPatternILike(ImpalaTestSuite):
+ """Tests for ILIKE and IREGEXP with non-constant patterns for IMPALA-12581.
+ These tests verify that ILIKE and IREGEXP work correctly when the pattern
+ is not a constant string."""
+ @classmethod
+ def add_test_dimensions(cls):
+ super(TestNonConstPatternILike, cls).add_test_dimensions()
+
+ @classmethod
+ def get_workload(cls):
+ return 'functional-query'
+
+ def test_non_const_pattern_ilike(self, vector, unique_database):
+ tbl_name = '`{0}`.`ilike_test`'.format(unique_database)
+
+ self.execute_query_expect_success(self.client,
+ "CREATE TABLE {0} (pattern_str string)".format(tbl_name))
+ self.execute_query_expect_success(self.client,
+ "INSERT INTO TABLE {0} VALUES('%b%'), ('.*b.*')".format(tbl_name))
+
+ ilike_result = self.execute_query_expect_success(self.client,
+ "SELECT count(*) FROM {0} WHERE 'ABC' ILIKE pattern_str".format(tbl_name))
+ assert int(ilike_result.get_data()) == 1
+ iregexp_result = self.execute_query_expect_success(self.client,
+ "SELECT count(*) FROM {0} WHERE 'ABC' IREGEXP pattern_str".format(tbl_name))
+ assert int(iregexp_result.get_data()) == 1