You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@doris.apache.org by yi...@apache.org on 2022/10/28 00:40:17 UTC

[doris] branch master updated: [Function](string) support sub_replace function (#13736)

This is an automated email from the ASF dual-hosted git repository.

yiguolei pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new 43c6428aea [Function](string) support sub_replace function (#13736)
43c6428aea is described below

commit 43c6428aea0fc70c9c80de4d31ec379c13898924
Author: zhangstar333 <87...@users.noreply.github.com>
AuthorDate: Fri Oct 28 08:40:08 2022 +0800

    [Function](string) support sub_replace function (#13736)
    
    * [Function](string) support sub_replace function
    
    * remove conf
---
 be/src/vec/functions/function_string.cpp           |   2 +
 be/src/vec/functions/function_string.h             | 135 +++++++++++++++++++++
 .../sql-functions/string-functions/sub_replace.md  |  53 ++++++++
 docs/sidebars.json                                 |   1 +
 .../sql-functions/string-functions/sub_replace.md  |  52 ++++++++
 gensrc/script/doris_builtins_functions.py          |   5 +-
 .../string_functions/test_string_function.out      |   6 +
 .../string_functions/test_string_function.groovy   |   3 +
 8 files changed, 256 insertions(+), 1 deletion(-)

diff --git a/be/src/vec/functions/function_string.cpp b/be/src/vec/functions/function_string.cpp
index 7033eee2f8..f694d274ce 100644
--- a/be/src/vec/functions/function_string.cpp
+++ b/be/src/vec/functions/function_string.cpp
@@ -686,6 +686,8 @@ void register_function_string(SimpleFunctionFactory& factory) {
     factory.register_function<FunctionMoneyFormat<MoneyFormatDecimalImpl>>();
     factory.register_function<FunctionStringMd5AndSM3<SM3Sum>>();
     factory.register_function<FunctionReplace>();
+    factory.register_function<FunctionSubReplace<SubReplaceThreeImpl>>();
+    factory.register_function<FunctionSubReplace<SubReplaceFourImpl>>();
 
     factory.register_alias(FunctionLeft::name, "strleft");
     factory.register_alias(FunctionRight::name, "strright");
diff --git a/be/src/vec/functions/function_string.h b/be/src/vec/functions/function_string.h
index d3b278a0af..c652f8deef 100644
--- a/be/src/vec/functions/function_string.h
+++ b/be/src/vec/functions/function_string.h
@@ -1630,4 +1630,139 @@ struct ReverseImpl {
     }
 };
 
+template <typename Impl>
+class FunctionSubReplace : public IFunction {
+public:
+    static constexpr auto name = "sub_replace";
+
+    static FunctionPtr create() { return std::make_shared<FunctionSubReplace<Impl>>(); }
+
+    String get_name() const override { return name; }
+
+    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
+        return make_nullable(std::make_shared<DataTypeString>());
+    }
+
+    bool is_variadic() const override { return true; }
+
+    DataTypes get_variadic_argument_types_impl() const override {
+        return Impl::get_variadic_argument_types();
+    }
+
+    size_t get_number_of_arguments() const override {
+        return get_variadic_argument_types_impl().size();
+    }
+
+    bool use_default_implementation_for_nulls() const override { return false; }
+
+    bool use_default_implementation_for_constants() const override { return true; }
+
+    Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
+                        size_t result, size_t input_rows_count) override {
+        return Impl::execute_impl(context, block, arguments, result, input_rows_count);
+    }
+};
+
+struct SubReplaceImpl {
+    static Status replace_execute(Block& block, const ColumnNumbers& arguments, size_t result,
+                                  size_t input_rows_count) {
+        auto res_column = ColumnString::create();
+        auto result_column = assert_cast<ColumnString*>(res_column.get());
+        auto args_null_map = ColumnUInt8::create(input_rows_count, 0);
+        ColumnPtr argument_columns[4];
+        for (int i = 0; i < 4; ++i) {
+            argument_columns[i] =
+                    block.get_by_position(arguments[i]).column->convert_to_full_column_if_const();
+            if (auto* nullable = check_and_get_column<ColumnNullable>(*argument_columns[i])) {
+                // Danger: Here must dispose the null map data first! Because
+                // argument_columns[i]=nullable->get_nested_column_ptr(); will release the mem
+                // of column nullable mem of null map
+                VectorizedUtils::update_null_map(args_null_map->get_data(),
+                                                 nullable->get_null_map_data());
+                argument_columns[i] = nullable->get_nested_column_ptr();
+            }
+        }
+
+        auto data_column = assert_cast<const ColumnString*>(argument_columns[0].get());
+        auto mask_column = assert_cast<const ColumnString*>(argument_columns[1].get());
+        auto start_column = assert_cast<const ColumnVector<Int32>*>(argument_columns[2].get());
+        auto length_column = assert_cast<const ColumnVector<Int32>*>(argument_columns[3].get());
+
+        vector(data_column, mask_column, start_column->get_data(), length_column->get_data(),
+               args_null_map->get_data(), result_column, input_rows_count);
+
+        block.get_by_position(result).column =
+                ColumnNullable::create(std::move(res_column), std::move(args_null_map));
+        return Status::OK();
+    }
+
+private:
+    static void vector(const ColumnString* data_column, const ColumnString* mask_column,
+                       const PaddedPODArray<Int32>& start, const PaddedPODArray<Int32>& length,
+                       NullMap& args_null_map, ColumnString* result_column,
+                       size_t input_rows_count) {
+        ColumnString::Chars& res_chars = result_column->get_chars();
+        ColumnString::Offsets& res_offsets = result_column->get_offsets();
+        for (size_t row = 0; row < input_rows_count; ++row) {
+            StringRef origin_str = data_column->get_data_at(row);
+            StringRef new_str = mask_column->get_data_at(row);
+            size_t origin_str_len = origin_str.size;
+            //input is null, start < 0, len < 0, str_size <= start. return NULL
+            if (args_null_map[row] || start[row] < 0 || length[row] < 0 ||
+                origin_str_len <= start[row]) {
+                res_offsets.push_back(res_chars.size());
+                args_null_map[row] = 1;
+            } else {
+                std::string_view replace_str = new_str.to_string_view();
+                std::string result = origin_str.to_string();
+                result.replace(start[row], length[row], replace_str);
+                result_column->insert_data(result.data(), result.length());
+            }
+        }
+    }
+};
+
+struct SubReplaceThreeImpl {
+    static DataTypes get_variadic_argument_types() {
+        return {std::make_shared<DataTypeString>(), std::make_shared<DataTypeString>(),
+                std::make_shared<DataTypeInt32>()};
+    }
+
+    static Status execute_impl(FunctionContext* context, Block& block,
+                               const ColumnNumbers& arguments, size_t result,
+                               size_t input_rows_count) {
+        auto params = ColumnInt32::create(input_rows_count);
+        auto& strlen_data = params->get_data();
+
+        auto str_col =
+                block.get_by_position(arguments[1]).column->convert_to_full_column_if_const();
+        if (auto* nullable = check_and_get_column<const ColumnNullable>(*str_col)) {
+            str_col = nullable->get_nested_column_ptr();
+        }
+        auto& str_offset = assert_cast<const ColumnString*>(str_col.get())->get_offsets();
+
+        for (int i = 0; i < input_rows_count; ++i) {
+            strlen_data[i] = str_offset[i] - str_offset[i - 1];
+        }
+
+        block.insert({std::move(params), std::make_shared<DataTypeInt32>(), "strlen"});
+        ColumnNumbers temp_arguments = {arguments[0], arguments[1], arguments[2],
+                                        block.columns() - 1};
+        return SubReplaceImpl::replace_execute(block, temp_arguments, result, input_rows_count);
+    }
+};
+
+struct SubReplaceFourImpl {
+    static DataTypes get_variadic_argument_types() {
+        return {std::make_shared<DataTypeString>(), std::make_shared<DataTypeString>(),
+                std::make_shared<DataTypeInt32>(), std::make_shared<DataTypeInt32>()};
+    }
+
+    static Status execute_impl(FunctionContext* context, Block& block,
+                               const ColumnNumbers& arguments, size_t result,
+                               size_t input_rows_count) {
+        return SubReplaceImpl::replace_execute(block, arguments, result, input_rows_count);
+    }
+};
+
 } // namespace doris::vectorized
diff --git a/docs/en/docs/sql-manual/sql-functions/string-functions/sub_replace.md b/docs/en/docs/sql-manual/sql-functions/string-functions/sub_replace.md
new file mode 100644
index 0000000000..38ae718851
--- /dev/null
+++ b/docs/en/docs/sql-manual/sql-functions/string-functions/sub_replace.md
@@ -0,0 +1,53 @@
+---
+{
+"title": "sub_replace",
+"language": "en"
+}
+---
+
+<!--
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements.  See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership.  The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License.  You may obtain a copy of the License at
+  http://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing,
+software distributed under the License is distributed on an
+"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+KIND, either express or implied.  See the License for the
+specific language governing permissions and limitations
+under the License.
+-->
+
+## sub_replace
+### Description
+#### Syntax
+
+`VARCHAR sub_replace(VARCHAR str, VARCHAR new_str, INT start [, INT len])`
+
+Return with new_str replaces the str with length and starting position from start.
+When start and len are negative integers, return NULL.
+and the default value of len is the length of new_str.
+
+### example
+
+```
+mysql> select sub_replace("this is origin str","NEW-STR",1);
++-------------------------------------------------+
+| sub_replace('this is origin str', 'NEW-STR', 1) |
++-------------------------------------------------+
+| tNEW-STRorigin str                              |
++-------------------------------------------------+
+
+mysql> select sub_replace("doris","***",1,2);
++-----------------------------------+
+| sub_replace('doris', '***', 1, 2) |
++-----------------------------------+
+| d***is                            |
++-----------------------------------+
+```
+### keywords
+    SUB_REPLACE
diff --git a/docs/sidebars.json b/docs/sidebars.json
index b02c6d5ad8..c0360c2b5c 100644
--- a/docs/sidebars.json
+++ b/docs/sidebars.json
@@ -370,6 +370,7 @@
                                 "sql-manual/sql-functions/string-functions/concat_ws",
                                 "sql-manual/sql-functions/string-functions/substr",
                                 "sql-manual/sql-functions/string-functions/substring",
+                                "sql-manual/sql-functions/string-functions/sub_replace",
                                 "sql-manual/sql-functions/string-functions/append_trailing_char_if_absent",
                                 "sql-manual/sql-functions/string-functions/ends_with",
                                 "sql-manual/sql-functions/string-functions/starts_with",
diff --git a/docs/zh-CN/docs/sql-manual/sql-functions/string-functions/sub_replace.md b/docs/zh-CN/docs/sql-manual/sql-functions/string-functions/sub_replace.md
new file mode 100644
index 0000000000..5f2a782cb0
--- /dev/null
+++ b/docs/zh-CN/docs/sql-manual/sql-functions/string-functions/sub_replace.md
@@ -0,0 +1,52 @@
+---
+{
+"title": "sub_replace",
+"language": "zh-CN"
+}
+---
+
+<!-- 
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements.  See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership.  The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License.  You may obtain a copy of the License at
+  http://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing,
+software distributed under the License is distributed on an
+"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+KIND, either express or implied.  See the License for the
+specific language governing permissions and limitations
+under the License.
+-->
+
+## sub_replace
+### description
+#### Syntax
+
+`VARCHAR sub_replace(VARCHAR str, VARCHAR new_str, INT start [, INT len])`
+
+返回用new_str字符串替换str中从start开始长度为len的新字符串。
+其中start,len为负整数,返回NULL, 且len的默认值为new_str的长度。
+
+### example
+
+```
+mysql> select sub_replace("this is origin str","NEW-STR",1);
++-------------------------------------------------+
+| sub_replace('this is origin str', 'NEW-STR', 1) |
++-------------------------------------------------+
+| tNEW-STRorigin str                              |
++-------------------------------------------------+
+
+mysql> select sub_replace("doris","***",1,2);
++-----------------------------------+
+| sub_replace('doris', '***', 1, 2) |
++-----------------------------------+
+| d***is                            |
++-----------------------------------+
+```
+### keywords
+    SUB_REPLACE
diff --git a/gensrc/script/doris_builtins_functions.py b/gensrc/script/doris_builtins_functions.py
index 5c14145853..ec88c654b9 100755
--- a/gensrc/script/doris_builtins_functions.py
+++ b/gensrc/script/doris_builtins_functions.py
@@ -2180,7 +2180,10 @@ visible_functions = [
     [['split_part'], 'VARCHAR', ['VARCHAR', 'VARCHAR', 'INT'],
         '_ZN5doris15StringFunctions10split_partEPN9doris_udf15FunctionContextERKNS1_9StringValES6_RKNS1_6IntValE',
         '', '', 'vec', 'ALWAYS_NULLABLE'],
-     [['extract_url_parameter'], 'VARCHAR', ['VARCHAR', 'VARCHAR'],'','', '', 'vec', ''],
+    [['extract_url_parameter'], 'VARCHAR', ['VARCHAR', 'VARCHAR'],'','', '', 'vec', ''],
+
+    [['sub_replace'], 'VARCHAR', ['VARCHAR', 'VARCHAR', 'INT'],'','', '', 'vec', 'ALWAYS_NULLABLE'],
+    [['sub_replace'], 'VARCHAR', ['VARCHAR', 'VARCHAR', 'INT', 'INT'],'','', '', 'vec', 'ALWAYS_NULLABLE'],
 
     # Longtext function
     [['substr', 'substring'], 'STRING', ['STRING', 'INT'],
diff --git a/regression-test/data/query_p0/sql_functions/string_functions/test_string_function.out b/regression-test/data/query_p0/sql_functions/string_functions/test_string_function.out
index 1eaf68bbca..492999e714 100644
--- a/regression-test/data/query_p0/sql_functions/string_functions/test_string_function.out
+++ b/regression-test/data/query_p0/sql_functions/string_functions/test_string_function.out
@@ -272,3 +272,9 @@ a
 -- !sql --
 
 
+-- !sql --
+tNEW-STRorigin str
+
+-- !sql --
+d***is
+
diff --git a/regression-test/suites/query_p0/sql_functions/string_functions/test_string_function.groovy b/regression-test/suites/query_p0/sql_functions/string_functions/test_string_function.groovy
index 2a107ef487..8894a4f5b0 100644
--- a/regression-test/suites/query_p0/sql_functions/string_functions/test_string_function.groovy
+++ b/regression-test/suites/query_p0/sql_functions/string_functions/test_string_function.groovy
@@ -139,4 +139,7 @@ suite("test_string_function") {
     qt_sql "select substr('a',-1,1);"
     qt_sql "select substr('a',-2,1);"
     qt_sql "select substr('a',-3,1);"
+
+    qt_sql "select sub_replace(\"this is origin str\",\"NEW-STR\",1);"
+    qt_sql "select sub_replace(\"doris\",\"***\",1,2);"
 }


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@doris.apache.org
For additional commands, e-mail: commits-help@doris.apache.org