You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@doris.apache.org by yi...@apache.org on 2023/06/06 23:27:53 UTC

[doris] branch master updated: [fix](regex) String with Chinese characters matching failed (#20493)

This is an automated email from the ASF dual-hosted git repository.

yiguolei pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new 49f8f20fb1 [fix](regex) String with Chinese characters matching failed (#20493)
49f8f20fb1 is described below

commit 49f8f20fb1818d1906e9517794efd2632116a8da
Author: Jerry Hu <mr...@gmail.com>
AuthorDate: Wed Jun 7 07:27:47 2023 +0800

    [fix](regex) String with Chinese characters matching failed (#20493)
---
 be/src/vec/functions/like.cpp                                       | 5 +++--
 .../sql_functions/string_functions/test_string_function_regexp.out  | 6 ++++++
 .../string_functions/test_string_function_regexp.groovy             | 3 +++
 3 files changed, 12 insertions(+), 2 deletions(-)

diff --git a/be/src/vec/functions/like.cpp b/be/src/vec/functions/like.cpp
index bcd8262d33..8d56c4f2d1 100644
--- a/be/src/vec/functions/like.cpp
+++ b/be/src/vec/functions/like.cpp
@@ -437,8 +437,9 @@ Status FunctionLikeBase::regexp_fn_predicate(LikeSearchState* state,
 Status FunctionLikeBase::hs_prepare(FunctionContext* context, const char* expression,
                                     hs_database_t** database, hs_scratch_t** scratch) {
     hs_compile_error_t* compile_err;
-    auto res = hs_compile(expression, HS_FLAG_DOTALL | HS_FLAG_ALLOWEMPTY, HS_MODE_BLOCK, nullptr,
-                          database, &compile_err);
+    auto res = hs_compile(expression, HS_FLAG_DOTALL | HS_FLAG_ALLOWEMPTY | HS_FLAG_UTF8,
+                          HS_MODE_BLOCK, nullptr, database, &compile_err);
+
     if (res != HS_SUCCESS) {
         *database = nullptr;
         if (context) {
diff --git a/regression-test/data/query_p0/sql_functions/string_functions/test_string_function_regexp.out b/regression-test/data/query_p0/sql_functions/string_functions/test_string_function_regexp.out
index 3c7d8473ae..415b8f2822 100644
--- a/regression-test/data/query_p0/sql_functions/string_functions/test_string_function_regexp.out
+++ b/regression-test/data/query_p0/sql_functions/string_functions/test_string_function_regexp.out
@@ -73,6 +73,12 @@ a-b c
 -- !sql --
 a <b> b
 
+-- !sql_utf1 --
+true
+
+-- !sql_utf2 --
+true
+
 -- !sql_regexp_null --
 \N
 \N
diff --git a/regression-test/suites/query_p0/sql_functions/string_functions/test_string_function_regexp.groovy b/regression-test/suites/query_p0/sql_functions/string_functions/test_string_function_regexp.groovy
index ba4f941a4b..cb80939adf 100644
--- a/regression-test/suites/query_p0/sql_functions/string_functions/test_string_function_regexp.groovy
+++ b/regression-test/suites/query_p0/sql_functions/string_functions/test_string_function_regexp.groovy
@@ -63,6 +63,9 @@ suite("test_string_function_regexp") {
     qt_sql "SELECT regexp_replace_one('a b c', \" \", \"-\");"
     qt_sql "SELECT regexp_replace_one('a b b','(b)','<\\\\1>');"
 
+    qt_sql_utf1 """ select '皖12345' REGEXP '^[皖][0-9]{5}\$'; """
+    qt_sql_utf2 """ select '皖 12345' REGEXP '^[皖] [0-9]{5}\$'; """
+
     // bug fix
     sql """
         INSERT INTO ${tbName} VALUES


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@doris.apache.org
For additional commands, e-mail: commits-help@doris.apache.org