You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by mi...@apache.org on 2024/01/04 18:42:03 UTC

(impala) 02/03: IMPALA-12581: Fix issue of ILIKE and IREGEXP not working correctly with non-const pattern

This is an automated email from the ASF dual-hosted git repository.

michaelsmith pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git

commit 12f2026675f6de42d44310c57fe7037f3dc82f46
Author: Eyizoha <ey...@163.com>
AuthorDate: Wed Dec 13 11:17:42 2023 +0800

    IMPALA-12581: Fix issue of ILIKE and IREGEXP not working correctly with non-const pattern
    
    This patch fixes the issue where ILIKE and IREGEXP couldn't ignore case
    when encountering non-constant patterns.
    For example, 'SELECT 'ABC' ILIKE pattern FROM tbl' would return false
    when the pattern in tbl is '%b%'.
    
    Tests:
     - Add TestNonConstPatternILike to test_exprs.py to verify the
       effectiveness of the fixing.
    
    Change-Id: I3d66680f5a7660e6a41859754c4230f276e66712
    Reviewed-on: http://gerrit.cloudera.org:8080/20785
    Reviewed-by: Impala Public Jenkins <im...@cloudera.com>
    Tested-by: Impala Public Jenkins <im...@cloudera.com>
---
 be/src/exprs/like-predicate.cc | 10 +++++++---
 be/src/exprs/like-predicate.h  |  3 +++
 tests/query_test/test_exprs.py | 28 ++++++++++++++++++++++++++++
 3 files changed, 38 insertions(+), 3 deletions(-)

diff --git a/be/src/exprs/like-predicate.cc b/be/src/exprs/like-predicate.cc
index 74271012b..993f656c4 100644
--- a/be/src/exprs/like-predicate.cc
+++ b/be/src/exprs/like-predicate.cc
@@ -64,8 +64,9 @@ void LikePredicate::LikePrepareInternal(FunctionContext* context,
     FunctionContext::FunctionStateScope scope, bool case_sensitive) {
   if (scope != FunctionContext::THREAD_LOCAL) return;
   LikePredicateState* state = new LikePredicateState();
-  state->function_ = LikeFn;
   context->SetFunctionState(scope, state);
+  state->function_ = LikeFn;
+  state->case_sensitive_ = case_sensitive;
   if (context->IsArgConstant(1)) {
     StringVal pattern_val = *reinterpret_cast<StringVal*>(context->GetConstantArg(1));
     if (pattern_val.is_null) return;
@@ -136,6 +137,7 @@ void LikePredicate::RegexPrepareInternal(FunctionContext* context,
   LikePredicateState* state = new LikePredicateState();
   context->SetFunctionState(scope, state);
   state->function_ = RegexFn;
+  state->case_sensitive_ = case_sensitive;
   if (context->IsArgConstant(1)) {
     StringVal* pattern = reinterpret_cast<StringVal*>(context->GetConstantArg(1));
     if (pattern->is_null) return;
@@ -330,9 +332,10 @@ BooleanVal LikePredicate::RegexMatch(FunctionContext* context,
     const StringVal& operand_value, const StringVal& pattern_value,
     bool is_like_pattern) {
   if (operand_value.is_null || pattern_value.is_null) return BooleanVal::null();
+
+  LikePredicateState* state = reinterpret_cast<LikePredicateState*>(
+      context->GetFunctionState(FunctionContext::THREAD_LOCAL));
   if (context->IsArgConstant(1)) {
-    LikePredicateState* state = reinterpret_cast<LikePredicateState*>(
-        context->GetFunctionState(FunctionContext::THREAD_LOCAL));
     if (is_like_pattern) {
       return RE2::FullMatch(re2::StringPiece(reinterpret_cast<const char*>(
           operand_value.ptr), operand_value.len), *state->regex_.get());
@@ -343,6 +346,7 @@ BooleanVal LikePredicate::RegexMatch(FunctionContext* context,
   } else {
     string re_pattern;
     RE2::Options opts;
+    opts.set_case_sensitive(state->case_sensitive_);
     StringFunctions::SetRE2MemOpt(&opts);
     if (is_like_pattern) {
       ConvertLikePattern(context, pattern_value, &re_pattern);
diff --git a/be/src/exprs/like-predicate.h b/be/src/exprs/like-predicate.h
index 2a7cb7143..d4dc36c30 100644
--- a/be/src/exprs/like-predicate.h
+++ b/be/src/exprs/like-predicate.h
@@ -79,6 +79,9 @@ class LikePredicate: public Predicate {
     /// Used for RLIKE and REGEXP predicates if the pattern is a constant argument.
     boost::scoped_ptr<re2::RE2> regex_;
 
+    /// Used for ILIKE and IREGEXP predicates if the pattern is not a constant argument.
+    bool case_sensitive_;
+
     LikePredicateState() : escape_char_('\\') {
     }
 
diff --git a/tests/query_test/test_exprs.py b/tests/query_test/test_exprs.py
index 569e584e8..9558fb172 100644
--- a/tests/query_test/test_exprs.py
+++ b/tests/query_test/test_exprs.py
@@ -290,3 +290,31 @@ class TestConstantFoldingNoTypeLoss(ImpalaTestSuite):
     query = "select typeof(cast(1 as bigint) + cast(rand() as tinyint))"
     result = self.execute_query_expect_success(self.client, query)
     assert result.data == ["BIGINT"]
+
+
+class TestNonConstPatternILike(ImpalaTestSuite):
+  """Tests for ILIKE and IREGEXP with non-constant patterns for IMPALA-12581.
+     These tests verify that ILIKE and IREGEXP work correctly when the pattern
+     is not a constant string."""
+  @classmethod
+  def add_test_dimensions(cls):
+    super(TestNonConstPatternILike, cls).add_test_dimensions()
+
+  @classmethod
+  def get_workload(cls):
+    return 'functional-query'
+
+  def test_non_const_pattern_ilike(self, vector, unique_database):
+    tbl_name = '`{0}`.`ilike_test`'.format(unique_database)
+
+    self.execute_query_expect_success(self.client,
+        "CREATE TABLE {0} (pattern_str string)".format(tbl_name))
+    self.execute_query_expect_success(self.client,
+        "INSERT INTO TABLE {0} VALUES('%b%'), ('.*b.*')".format(tbl_name))
+
+    ilike_result = self.execute_query_expect_success(self.client,
+        "SELECT count(*) FROM {0} WHERE 'ABC' ILIKE pattern_str".format(tbl_name))
+    assert int(ilike_result.get_data()) == 1
+    iregexp_result = self.execute_query_expect_success(self.client,
+        "SELECT count(*) FROM {0} WHERE 'ABC' IREGEXP pattern_str".format(tbl_name))
+    assert int(iregexp_result.get_data()) == 1