You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@doris.apache.org by pa...@apache.org on 2022/10/13 13:31:53 UTC

[doris] branch master updated: [feature](function) support `initcap` string function (#13193)

This is an automated email from the ASF dual-hosted git repository.

panxiaolei pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new de4315c1c5 [feature](function) support `initcap` string function (#13193)
de4315c1c5 is described below

commit de4315c1c5d8b6f52c94e792abd12a58f7541fe6
Author: luozenglin <37...@users.noreply.github.com>
AuthorDate: Thu Oct 13 21:31:44 2022 +0800

    [feature](function) support `initcap` string function (#13193)
    
    support `initcap` string function
---
 be/src/exprs/string_functions.cpp                  | 21 ++++++++++
 be/src/exprs/string_functions.h                    |  2 +
 be/src/util/simd/vstring_function.h                |  4 +-
 be/src/vec/functions/function_string.cpp           | 39 ++++++++++++++++++
 .../sql-functions/string-functions/initcap.md      | 47 ++++++++++++++++++++++
 docs/sidebars.json                                 |  1 +
 .../sql-functions/string-functions/initcap.md      | 46 +++++++++++++++++++++
 gensrc/script/doris_builtins_functions.py          |  2 +
 .../string_functions/test_string_function.out      |  3 ++
 .../string_functions/test_string_function.groovy   |  2 +
 10 files changed, 165 insertions(+), 2 deletions(-)

diff --git a/be/src/exprs/string_functions.cpp b/be/src/exprs/string_functions.cpp
index 2f3abd2e8e..8c381f5d0c 100644
--- a/be/src/exprs/string_functions.cpp
+++ b/be/src/exprs/string_functions.cpp
@@ -350,6 +350,27 @@ StringVal StringFunctions::upper(FunctionContext* context, const StringVal& str)
     return result;
 }
 
+StringVal StringFunctions::initcap(FunctionContext* context, const StringVal& str) {
+    if (str.is_null) {
+        return StringVal::null();
+    }
+    StringVal result(context, str.len);
+
+    simd::VStringFunctions::to_lower(str.ptr, str.len, result.ptr);
+
+    bool need_capitalize = true;
+    for (int64_t i = 0; i < str.len; ++i) {
+        if (!::isalnum(result.ptr[i])) {
+            need_capitalize = true;
+        } else if (need_capitalize) {
+            result.ptr[i] = ::toupper(result.ptr[i]);
+            need_capitalize = false;
+        }
+    }
+
+    return result;
+}
+
 StringVal StringFunctions::reverse(FunctionContext* context, const StringVal& str) {
     if (str.is_null) {
         return StringVal::null();
diff --git a/be/src/exprs/string_functions.h b/be/src/exprs/string_functions.h
index 1e5ecf8d9e..ac9cfee632 100644
--- a/be/src/exprs/string_functions.h
+++ b/be/src/exprs/string_functions.h
@@ -81,6 +81,8 @@ public:
                                       const doris_udf::StringVal& str);
     static doris_udf::StringVal upper(doris_udf::FunctionContext* context,
                                       const doris_udf::StringVal& str);
+    static doris_udf::StringVal initcap(doris_udf::FunctionContext* context,
+                                        const doris_udf::StringVal& str);
     static doris_udf::StringVal reverse(doris_udf::FunctionContext* context,
                                         const doris_udf::StringVal& str);
     static doris_udf::StringVal trim(doris_udf::FunctionContext* context,
diff --git a/be/src/util/simd/vstring_function.h b/be/src/util/simd/vstring_function.h
index 7749f82818..e627683d1a 100644
--- a/be/src/util/simd/vstring_function.h
+++ b/be/src/util/simd/vstring_function.h
@@ -188,7 +188,7 @@ public:
         }
     }
 
-    static void to_lower(uint8_t* src, int64_t len, uint8_t* dst) {
+    static void to_lower(const uint8_t* src, int64_t len, uint8_t* dst) {
         if (len <= 0) {
             return;
         }
@@ -196,7 +196,7 @@ public:
         lowerUpper.transfer(src, src + len, dst);
     }
 
-    static void to_upper(uint8_t* src, int64_t len, uint8_t* dst) {
+    static void to_upper(const uint8_t* src, int64_t len, uint8_t* dst) {
         if (len <= 0) {
             return;
         }
diff --git a/be/src/vec/functions/function_string.cpp b/be/src/vec/functions/function_string.cpp
index 9dd34195f5..8ab6066372 100644
--- a/be/src/vec/functions/function_string.cpp
+++ b/be/src/vec/functions/function_string.cpp
@@ -265,6 +265,42 @@ struct TransferImpl {
     }
 };
 
+// Capitalize first letter
+struct NameToInitcap {
+    static constexpr auto name = "initcap";
+};
+
+struct InitcapImpl {
+    static Status vector(const ColumnString::Chars& data, const ColumnString::Offsets& offsets,
+                         ColumnString::Chars& res_data, ColumnString::Offsets& res_offsets) {
+        size_t offset_size = offsets.size();
+        res_offsets.resize(offsets.size());
+        memcpy(res_offsets.data(), offsets.data(), offset_size * sizeof(offsets.data()));
+
+        size_t data_length = data.size();
+        res_data.resize(data_length);
+        simd::VStringFunctions::to_lower(data.data(), data_length, res_data.data());
+
+        bool need_capitalize = true;
+        for (size_t offset_index = 0, start_index = 0; offset_index < offset_size; ++offset_index) {
+            auto end_index = res_offsets[offset_index];
+            need_capitalize = true;
+
+            for (size_t i = start_index; i < end_index; ++i) {
+                if (!::isalnum(res_data[i])) {
+                    need_capitalize = true;
+                } else if (need_capitalize) {
+                    res_data[i] = ::toupper(res_data[i]);
+                    need_capitalize = false;
+                }
+            }
+
+            start_index = end_index;
+        }
+        return Status::OK();
+    }
+};
+
 struct NameTrim {
     static constexpr auto name = "trim";
 };
@@ -588,6 +624,8 @@ using FunctionToLower = FunctionStringToString<TransferImpl<::tolower>, NameToLo
 
 using FunctionToUpper = FunctionStringToString<TransferImpl<::toupper>, NameToUpper>;
 
+using FunctionToInitcap = FunctionStringToString<InitcapImpl, NameToInitcap>;
+
 using FunctionLTrim = FunctionStringToString<TrimImpl<true, false>, NameLTrim>;
 
 using FunctionRTrim = FunctionStringToString<TrimImpl<false, true>, NameRTrim>;
@@ -619,6 +657,7 @@ void register_function_string(SimpleFunctionFactory& factory) {
     factory.register_function<FunctionUnHex>();
     factory.register_function<FunctionToLower>();
     factory.register_function<FunctionToUpper>();
+    factory.register_function<FunctionToInitcap>();
     factory.register_function<FunctionLTrim>();
     factory.register_function<FunctionRTrim>();
     factory.register_function<FunctionTrim>();
diff --git a/docs/en/docs/sql-manual/sql-functions/string-functions/initcap.md b/docs/en/docs/sql-manual/sql-functions/string-functions/initcap.md
new file mode 100644
index 0000000000..1c88aae5f9
--- /dev/null
+++ b/docs/en/docs/sql-manual/sql-functions/string-functions/initcap.md
@@ -0,0 +1,47 @@
+---
+{
+    "title": "initcap",
+    "language": "en"
+}
+---
+
+<!-- 
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements.  See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership.  The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License.  You may obtain a copy of the License at
+
+  http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing,
+software distributed under the License is distributed on an
+"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+KIND, either express or implied.  See the License for the
+specific language governing permissions and limitations
+under the License.
+-->
+
+## initcap
+### description
+#### Syntax
+
+`VARCHAR initcap(VARCHAR str)`
+
+Convert the first letter of each word to upper case and the rest to lower case. 
+Words are sequences of alphanumeric characters separated by non-alphanumeric characters.
+
+### example
+
+```
+mysql> select initcap('hello hello.,HELLO123HELlo');
++---------------------------------------+
+| initcap('hello hello.,HELLO123HELlo') |
++---------------------------------------+
+| Hello Hello.,Hello123hello            |
++---------------------------------------+
+```
+### keywords
+    INITCAP
\ No newline at end of file
diff --git a/docs/sidebars.json b/docs/sidebars.json
index a3efe0dfc9..3de9e228f1 100644
--- a/docs/sidebars.json
+++ b/docs/sidebars.json
@@ -357,6 +357,7 @@
                                 "sql-manual/sql-functions/string-functions/lcase",
                                 "sql-manual/sql-functions/string-functions/upper",
                                 "sql-manual/sql-functions/string-functions/ucase",
+                                "sql-manual/sql-functions/string-functions/initcap",
                                 "sql-manual/sql-functions/string-functions/repeat",
                                 "sql-manual/sql-functions/string-functions/reverse",
                                 "sql-manual/sql-functions/string-functions/concat",
diff --git a/docs/zh-CN/docs/sql-manual/sql-functions/string-functions/initcap.md b/docs/zh-CN/docs/sql-manual/sql-functions/string-functions/initcap.md
new file mode 100644
index 0000000000..cee32f2b08
--- /dev/null
+++ b/docs/zh-CN/docs/sql-manual/sql-functions/string-functions/initcap.md
@@ -0,0 +1,46 @@
+---
+{
+    "title": "initcap",
+    "language": "zh-CN"
+}
+---
+
+<!-- 
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements.  See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership.  The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License.  You may obtain a copy of the License at
+
+  http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing,
+software distributed under the License is distributed on an
+"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+KIND, either express or implied.  See the License for the
+specific language governing permissions and limitations
+under the License.
+-->
+
+## initcap
+### description
+#### Syntax
+
+`VARCHAR initcap(VARCHAR str)`
+
+将参数中包含的单词首字母大写,其余字母转为小写。单词是由非字母数字字符分隔的字母数字字符序列。
+
+### example
+
+```
+mysql> select initcap('hello hello.,HELLO123HELlo');
++---------------------------------------+
+| initcap('hello hello.,HELLO123HELlo') |
++---------------------------------------+
+| Hello Hello.,Hello123hello            |
++---------------------------------------+
+```
+### keywords
+    INITCAP
diff --git a/gensrc/script/doris_builtins_functions.py b/gensrc/script/doris_builtins_functions.py
index f75c1ff680..585b268ad8 100755
--- a/gensrc/script/doris_builtins_functions.py
+++ b/gensrc/script/doris_builtins_functions.py
@@ -2030,6 +2030,8 @@ visible_functions = [
             '_ZN5doris15StringFunctions5lowerEPN9doris_udf15FunctionContextERKNS1_9StringValE', '', '', 'vec', ''],
     [['upper', 'ucase'], 'VARCHAR', ['VARCHAR'],
             '_ZN5doris15StringFunctions5upperEPN9doris_udf15FunctionContextERKNS1_9StringValE', '', '', 'vec', ''],
+    [['initcap'], 'VARCHAR', ['VARCHAR'],
+            '_ZN5doris15StringFunctions7initcapEPN9doris_udf15FunctionContextERKNS1_9StringValE', '', '', 'vec', ''],
     [['trim'], 'VARCHAR', ['VARCHAR'],
             '_ZN5doris15StringFunctions4trimEPN9doris_udf15FunctionContextERKNS1_9StringValE', '', '', 'vec', ''],
     [['ltrim'], 'VARCHAR', ['VARCHAR'],
diff --git a/regression-test/data/query_p0/sql_functions/string_functions/test_string_function.out b/regression-test/data/query_p0/sql_functions/string_functions/test_string_function.out
index 4d6b141073..d0f2b8c71a 100644
--- a/regression-test/data/query_p0/sql_functions/string_functions/test_string_function.out
+++ b/regression-test/data/query_p0/sql_functions/string_functions/test_string_function.out
@@ -134,6 +134,9 @@ abc123
 -- !sql --
 abc123
 
+-- !sql --
+Abc123abc Abc.Abc,?|Abc
+
 -- !sql --
 Hello
 
diff --git a/regression-test/suites/query_p0/sql_functions/string_functions/test_string_function.groovy b/regression-test/suites/query_p0/sql_functions/string_functions/test_string_function.groovy
index ceb1989b10..80ef709bd3 100644
--- a/regression-test/suites/query_p0/sql_functions/string_functions/test_string_function.groovy
+++ b/regression-test/suites/query_p0/sql_functions/string_functions/test_string_function.groovy
@@ -77,6 +77,8 @@ suite("test_string_function") {
     qt_sql "SELECT lcase(\"AbC123\");"
     qt_sql "SELECT lower(\"AbC123\");"
 
+    qt_sql "SELECT initcap(\"AbC123abc abc.abc,?|abc\");"
+
     qt_sql "select left(\"Hello doris\",5);"
     qt_sql "select right(\"Hello doris\",5);"
 


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@doris.apache.org
For additional commands, e-mail: commits-help@doris.apache.org