You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@doris.apache.org by yi...@apache.org on 2023/06/06 23:27:53 UTC
[doris] branch master updated: [fix](regex) String with Chinese characters matching failed (#20493)
This is an automated email from the ASF dual-hosted git repository.
yiguolei pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new 49f8f20fb1 [fix](regex) String with Chinese characters matching failed (#20493)
49f8f20fb1 is described below
commit 49f8f20fb1818d1906e9517794efd2632116a8da
Author: Jerry Hu <mr...@gmail.com>
AuthorDate: Wed Jun 7 07:27:47 2023 +0800
[fix](regex) String with Chinese characters matching failed (#20493)
---
be/src/vec/functions/like.cpp | 5 +++--
.../sql_functions/string_functions/test_string_function_regexp.out | 6 ++++++
.../string_functions/test_string_function_regexp.groovy | 3 +++
3 files changed, 12 insertions(+), 2 deletions(-)
diff --git a/be/src/vec/functions/like.cpp b/be/src/vec/functions/like.cpp
index bcd8262d33..8d56c4f2d1 100644
--- a/be/src/vec/functions/like.cpp
+++ b/be/src/vec/functions/like.cpp
@@ -437,8 +437,9 @@ Status FunctionLikeBase::regexp_fn_predicate(LikeSearchState* state,
Status FunctionLikeBase::hs_prepare(FunctionContext* context, const char* expression,
hs_database_t** database, hs_scratch_t** scratch) {
hs_compile_error_t* compile_err;
- auto res = hs_compile(expression, HS_FLAG_DOTALL | HS_FLAG_ALLOWEMPTY, HS_MODE_BLOCK, nullptr,
- database, &compile_err);
+ auto res = hs_compile(expression, HS_FLAG_DOTALL | HS_FLAG_ALLOWEMPTY | HS_FLAG_UTF8,
+ HS_MODE_BLOCK, nullptr, database, &compile_err);
+
if (res != HS_SUCCESS) {
*database = nullptr;
if (context) {
diff --git a/regression-test/data/query_p0/sql_functions/string_functions/test_string_function_regexp.out b/regression-test/data/query_p0/sql_functions/string_functions/test_string_function_regexp.out
index 3c7d8473ae..415b8f2822 100644
--- a/regression-test/data/query_p0/sql_functions/string_functions/test_string_function_regexp.out
+++ b/regression-test/data/query_p0/sql_functions/string_functions/test_string_function_regexp.out
@@ -73,6 +73,12 @@ a-b c
-- !sql --
a <b> b
+-- !sql_utf1 --
+true
+
+-- !sql_utf2 --
+true
+
-- !sql_regexp_null --
\N
\N
diff --git a/regression-test/suites/query_p0/sql_functions/string_functions/test_string_function_regexp.groovy b/regression-test/suites/query_p0/sql_functions/string_functions/test_string_function_regexp.groovy
index ba4f941a4b..cb80939adf 100644
--- a/regression-test/suites/query_p0/sql_functions/string_functions/test_string_function_regexp.groovy
+++ b/regression-test/suites/query_p0/sql_functions/string_functions/test_string_function_regexp.groovy
@@ -63,6 +63,9 @@ suite("test_string_function_regexp") {
qt_sql "SELECT regexp_replace_one('a b c', \" \", \"-\");"
qt_sql "SELECT regexp_replace_one('a b b','(b)','<\\\\1>');"
+ qt_sql_utf1 """ select '皖12345' REGEXP '^[皖][0-9]{5}\$'; """
+ qt_sql_utf2 """ select '皖 12345' REGEXP '^[皖] [0-9]{5}\$'; """
+
// bug fix
sql """
INSERT INTO ${tbName} VALUES
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@doris.apache.org
For additional commands, e-mail: commits-help@doris.apache.org