You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@doris.apache.org by yi...@apache.org on 2022/10/28 00:40:17 UTC
[doris] branch master updated: [Function](string) support sub_replace function (#13736)
This is an automated email from the ASF dual-hosted git repository.
yiguolei pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new 43c6428aea [Function](string) support sub_replace function (#13736)
43c6428aea is described below
commit 43c6428aea0fc70c9c80de4d31ec379c13898924
Author: zhangstar333 <87...@users.noreply.github.com>
AuthorDate: Fri Oct 28 08:40:08 2022 +0800
[Function](string) support sub_replace function (#13736)
* [Function](string) support sub_replace function
* remove conf
---
be/src/vec/functions/function_string.cpp | 2 +
be/src/vec/functions/function_string.h | 135 +++++++++++++++++++++
.../sql-functions/string-functions/sub_replace.md | 53 ++++++++
docs/sidebars.json | 1 +
.../sql-functions/string-functions/sub_replace.md | 52 ++++++++
gensrc/script/doris_builtins_functions.py | 5 +-
.../string_functions/test_string_function.out | 6 +
.../string_functions/test_string_function.groovy | 3 +
8 files changed, 256 insertions(+), 1 deletion(-)
diff --git a/be/src/vec/functions/function_string.cpp b/be/src/vec/functions/function_string.cpp
index 7033eee2f8..f694d274ce 100644
--- a/be/src/vec/functions/function_string.cpp
+++ b/be/src/vec/functions/function_string.cpp
@@ -686,6 +686,8 @@ void register_function_string(SimpleFunctionFactory& factory) {
factory.register_function<FunctionMoneyFormat<MoneyFormatDecimalImpl>>();
factory.register_function<FunctionStringMd5AndSM3<SM3Sum>>();
factory.register_function<FunctionReplace>();
+ factory.register_function<FunctionSubReplace<SubReplaceThreeImpl>>();
+ factory.register_function<FunctionSubReplace<SubReplaceFourImpl>>();
factory.register_alias(FunctionLeft::name, "strleft");
factory.register_alias(FunctionRight::name, "strright");
diff --git a/be/src/vec/functions/function_string.h b/be/src/vec/functions/function_string.h
index d3b278a0af..c652f8deef 100644
--- a/be/src/vec/functions/function_string.h
+++ b/be/src/vec/functions/function_string.h
@@ -1630,4 +1630,139 @@ struct ReverseImpl {
}
};
+template <typename Impl>
+class FunctionSubReplace : public IFunction {
+public:
+ static constexpr auto name = "sub_replace";
+
+ static FunctionPtr create() { return std::make_shared<FunctionSubReplace<Impl>>(); }
+
+ String get_name() const override { return name; }
+
+ DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
+ return make_nullable(std::make_shared<DataTypeString>());
+ }
+
+ bool is_variadic() const override { return true; }
+
+ DataTypes get_variadic_argument_types_impl() const override {
+ return Impl::get_variadic_argument_types();
+ }
+
+ size_t get_number_of_arguments() const override {
+ return get_variadic_argument_types_impl().size();
+ }
+
+ bool use_default_implementation_for_nulls() const override { return false; }
+
+ bool use_default_implementation_for_constants() const override { return true; }
+
+ Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
+ size_t result, size_t input_rows_count) override {
+ return Impl::execute_impl(context, block, arguments, result, input_rows_count);
+ }
+};
+
+struct SubReplaceImpl {
+ static Status replace_execute(Block& block, const ColumnNumbers& arguments, size_t result,
+ size_t input_rows_count) {
+ auto res_column = ColumnString::create();
+ auto result_column = assert_cast<ColumnString*>(res_column.get());
+ auto args_null_map = ColumnUInt8::create(input_rows_count, 0);
+ ColumnPtr argument_columns[4];
+ for (int i = 0; i < 4; ++i) {
+ argument_columns[i] =
+ block.get_by_position(arguments[i]).column->convert_to_full_column_if_const();
+ if (auto* nullable = check_and_get_column<ColumnNullable>(*argument_columns[i])) {
+ // Danger: Here must dispose the null map data first! Because
+ // argument_columns[i]=nullable->get_nested_column_ptr(); will release the mem
+ // of column nullable mem of null map
+ VectorizedUtils::update_null_map(args_null_map->get_data(),
+ nullable->get_null_map_data());
+ argument_columns[i] = nullable->get_nested_column_ptr();
+ }
+ }
+
+ auto data_column = assert_cast<const ColumnString*>(argument_columns[0].get());
+ auto mask_column = assert_cast<const ColumnString*>(argument_columns[1].get());
+ auto start_column = assert_cast<const ColumnVector<Int32>*>(argument_columns[2].get());
+ auto length_column = assert_cast<const ColumnVector<Int32>*>(argument_columns[3].get());
+
+ vector(data_column, mask_column, start_column->get_data(), length_column->get_data(),
+ args_null_map->get_data(), result_column, input_rows_count);
+
+ block.get_by_position(result).column =
+ ColumnNullable::create(std::move(res_column), std::move(args_null_map));
+ return Status::OK();
+ }
+
+private:
+ static void vector(const ColumnString* data_column, const ColumnString* mask_column,
+ const PaddedPODArray<Int32>& start, const PaddedPODArray<Int32>& length,
+ NullMap& args_null_map, ColumnString* result_column,
+ size_t input_rows_count) {
+ ColumnString::Chars& res_chars = result_column->get_chars();
+ ColumnString::Offsets& res_offsets = result_column->get_offsets();
+ for (size_t row = 0; row < input_rows_count; ++row) {
+ StringRef origin_str = data_column->get_data_at(row);
+ StringRef new_str = mask_column->get_data_at(row);
+ size_t origin_str_len = origin_str.size;
+ //input is null, start < 0, len < 0, str_size <= start. return NULL
+ if (args_null_map[row] || start[row] < 0 || length[row] < 0 ||
+ origin_str_len <= start[row]) {
+ res_offsets.push_back(res_chars.size());
+ args_null_map[row] = 1;
+ } else {
+ std::string_view replace_str = new_str.to_string_view();
+ std::string result = origin_str.to_string();
+ result.replace(start[row], length[row], replace_str);
+ result_column->insert_data(result.data(), result.length());
+ }
+ }
+ }
+};
+
+struct SubReplaceThreeImpl {
+ static DataTypes get_variadic_argument_types() {
+ return {std::make_shared<DataTypeString>(), std::make_shared<DataTypeString>(),
+ std::make_shared<DataTypeInt32>()};
+ }
+
+ static Status execute_impl(FunctionContext* context, Block& block,
+ const ColumnNumbers& arguments, size_t result,
+ size_t input_rows_count) {
+ auto params = ColumnInt32::create(input_rows_count);
+ auto& strlen_data = params->get_data();
+
+ auto str_col =
+ block.get_by_position(arguments[1]).column->convert_to_full_column_if_const();
+ if (auto* nullable = check_and_get_column<const ColumnNullable>(*str_col)) {
+ str_col = nullable->get_nested_column_ptr();
+ }
+ auto& str_offset = assert_cast<const ColumnString*>(str_col.get())->get_offsets();
+
+ for (int i = 0; i < input_rows_count; ++i) {
+ strlen_data[i] = str_offset[i] - str_offset[i - 1];
+ }
+
+ block.insert({std::move(params), std::make_shared<DataTypeInt32>(), "strlen"});
+ ColumnNumbers temp_arguments = {arguments[0], arguments[1], arguments[2],
+ block.columns() - 1};
+ return SubReplaceImpl::replace_execute(block, temp_arguments, result, input_rows_count);
+ }
+};
+
+struct SubReplaceFourImpl {
+ static DataTypes get_variadic_argument_types() {
+ return {std::make_shared<DataTypeString>(), std::make_shared<DataTypeString>(),
+ std::make_shared<DataTypeInt32>(), std::make_shared<DataTypeInt32>()};
+ }
+
+ static Status execute_impl(FunctionContext* context, Block& block,
+ const ColumnNumbers& arguments, size_t result,
+ size_t input_rows_count) {
+ return SubReplaceImpl::replace_execute(block, arguments, result, input_rows_count);
+ }
+};
+
} // namespace doris::vectorized
diff --git a/docs/en/docs/sql-manual/sql-functions/string-functions/sub_replace.md b/docs/en/docs/sql-manual/sql-functions/string-functions/sub_replace.md
new file mode 100644
index 0000000000..38ae718851
--- /dev/null
+++ b/docs/en/docs/sql-manual/sql-functions/string-functions/sub_replace.md
@@ -0,0 +1,53 @@
+---
+{
+"title": "sub_replace",
+"language": "en"
+}
+---
+
+<!--
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements. See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership. The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License. You may obtain a copy of the License at
+ http://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing,
+software distributed under the License is distributed on an
+"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+KIND, either express or implied. See the License for the
+specific language governing permissions and limitations
+under the License.
+-->
+
+## sub_replace
+### Description
+#### Syntax
+
+`VARCHAR sub_replace(VARCHAR str, VARCHAR new_str, INT start [, INT len])`
+
+Return with new_str replaces the str with length and starting position from start.
+When start and len are negative integers, return NULL.
+and the default value of len is the length of new_str.
+
+### example
+
+```
+mysql> select sub_replace("this is origin str","NEW-STR",1);
++-------------------------------------------------+
+| sub_replace('this is origin str', 'NEW-STR', 1) |
++-------------------------------------------------+
+| tNEW-STRorigin str |
++-------------------------------------------------+
+
+mysql> select sub_replace("doris","***",1,2);
++-----------------------------------+
+| sub_replace('doris', '***', 1, 2) |
++-----------------------------------+
+| d***is |
++-----------------------------------+
+```
+### keywords
+ SUB_REPLACE
diff --git a/docs/sidebars.json b/docs/sidebars.json
index b02c6d5ad8..c0360c2b5c 100644
--- a/docs/sidebars.json
+++ b/docs/sidebars.json
@@ -370,6 +370,7 @@
"sql-manual/sql-functions/string-functions/concat_ws",
"sql-manual/sql-functions/string-functions/substr",
"sql-manual/sql-functions/string-functions/substring",
+ "sql-manual/sql-functions/string-functions/sub_replace",
"sql-manual/sql-functions/string-functions/append_trailing_char_if_absent",
"sql-manual/sql-functions/string-functions/ends_with",
"sql-manual/sql-functions/string-functions/starts_with",
diff --git a/docs/zh-CN/docs/sql-manual/sql-functions/string-functions/sub_replace.md b/docs/zh-CN/docs/sql-manual/sql-functions/string-functions/sub_replace.md
new file mode 100644
index 0000000000..5f2a782cb0
--- /dev/null
+++ b/docs/zh-CN/docs/sql-manual/sql-functions/string-functions/sub_replace.md
@@ -0,0 +1,52 @@
+---
+{
+"title": "sub_replace",
+"language": "zh-CN"
+}
+---
+
+<!--
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements. See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership. The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License. You may obtain a copy of the License at
+ http://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing,
+software distributed under the License is distributed on an
+"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+KIND, either express or implied. See the License for the
+specific language governing permissions and limitations
+under the License.
+-->
+
+## sub_replace
+### description
+#### Syntax
+
+`VARCHAR sub_replace(VARCHAR str, VARCHAR new_str, INT start [, INT len])`
+
+返回用new_str字符串替换str中从start开始长度为len的新字符串。
+其中start,len为负整数,返回NULL, 且len的默认值为new_str的长度。
+
+### example
+
+```
+mysql> select sub_replace("this is origin str","NEW-STR",1);
++-------------------------------------------------+
+| sub_replace('this is origin str', 'NEW-STR', 1) |
++-------------------------------------------------+
+| tNEW-STRorigin str |
++-------------------------------------------------+
+
+mysql> select sub_replace("doris","***",1,2);
++-----------------------------------+
+| sub_replace('doris', '***', 1, 2) |
++-----------------------------------+
+| d***is |
++-----------------------------------+
+```
+### keywords
+ SUB_REPLACE
diff --git a/gensrc/script/doris_builtins_functions.py b/gensrc/script/doris_builtins_functions.py
index 5c14145853..ec88c654b9 100755
--- a/gensrc/script/doris_builtins_functions.py
+++ b/gensrc/script/doris_builtins_functions.py
@@ -2180,7 +2180,10 @@ visible_functions = [
[['split_part'], 'VARCHAR', ['VARCHAR', 'VARCHAR', 'INT'],
'_ZN5doris15StringFunctions10split_partEPN9doris_udf15FunctionContextERKNS1_9StringValES6_RKNS1_6IntValE',
'', '', 'vec', 'ALWAYS_NULLABLE'],
- [['extract_url_parameter'], 'VARCHAR', ['VARCHAR', 'VARCHAR'],'','', '', 'vec', ''],
+ [['extract_url_parameter'], 'VARCHAR', ['VARCHAR', 'VARCHAR'],'','', '', 'vec', ''],
+
+ [['sub_replace'], 'VARCHAR', ['VARCHAR', 'VARCHAR', 'INT'],'','', '', 'vec', 'ALWAYS_NULLABLE'],
+ [['sub_replace'], 'VARCHAR', ['VARCHAR', 'VARCHAR', 'INT', 'INT'],'','', '', 'vec', 'ALWAYS_NULLABLE'],
# Longtext function
[['substr', 'substring'], 'STRING', ['STRING', 'INT'],
diff --git a/regression-test/data/query_p0/sql_functions/string_functions/test_string_function.out b/regression-test/data/query_p0/sql_functions/string_functions/test_string_function.out
index 1eaf68bbca..492999e714 100644
--- a/regression-test/data/query_p0/sql_functions/string_functions/test_string_function.out
+++ b/regression-test/data/query_p0/sql_functions/string_functions/test_string_function.out
@@ -272,3 +272,9 @@ a
-- !sql --
+-- !sql --
+tNEW-STRorigin str
+
+-- !sql --
+d***is
+
diff --git a/regression-test/suites/query_p0/sql_functions/string_functions/test_string_function.groovy b/regression-test/suites/query_p0/sql_functions/string_functions/test_string_function.groovy
index 2a107ef487..8894a4f5b0 100644
--- a/regression-test/suites/query_p0/sql_functions/string_functions/test_string_function.groovy
+++ b/regression-test/suites/query_p0/sql_functions/string_functions/test_string_function.groovy
@@ -139,4 +139,7 @@ suite("test_string_function") {
qt_sql "select substr('a',-1,1);"
qt_sql "select substr('a',-2,1);"
qt_sql "select substr('a',-3,1);"
+
+ qt_sql "select sub_replace(\"this is origin str\",\"NEW-STR\",1);"
+ qt_sql "select sub_replace(\"doris\",\"***\",1,2);"
}
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@doris.apache.org
For additional commands, e-mail: commits-help@doris.apache.org