You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@doris.apache.org by li...@apache.org on 2023/01/06 13:10:16 UTC

[doris] branch branch-1.2-lts updated: [cherry-pick][feature](string_function) support split_by_string function (#15677)

This is an automated email from the ASF dual-hosted git repository.

lihaopeng pushed a commit to branch branch-1.2-lts
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/branch-1.2-lts by this push:
     new 0d05d55859 [cherry-pick][feature](string_function) support split_by_string function (#15677)
0d05d55859 is described below

commit 0d05d55859b327991827366493dcef10fd7fafa0
Author: yongkang.zhong <zh...@qq.com>
AuthorDate: Fri Jan 6 21:10:09 2023 +0800

    [cherry-pick][feature](string_function) support split_by_string function (#15677)
    
    Co-authored-by: liqing-coder <10...@users.noreply.github.com>
---
 be/src/vec/functions/function_string.cpp           |   1 +
 be/src/vec/functions/function_string.h             | 122 +++++++++++++++++++++
 be/test/vec/function/function_string_test.cpp      |   1 +
 .../string-functions/split_by_string.md            | 112 +++++++++++++++++++
 docs/sidebars.json                                 |   1 +
 .../string-functions/split_by_string.md            | 112 +++++++++++++++++++
 gensrc/script/doris_builtins_functions.py          |   2 +
 .../string_functions/test_split_by_string.out      |  89 +++++++++++++++
 .../string_functions/test_split_by_string.groovy   | 107 ++++++++++++++++++
 9 files changed, 547 insertions(+)

diff --git a/be/src/vec/functions/function_string.cpp b/be/src/vec/functions/function_string.cpp
index 3898979a32..ad8499dcc7 100644
--- a/be/src/vec/functions/function_string.cpp
+++ b/be/src/vec/functions/function_string.cpp
@@ -683,6 +683,7 @@ void register_function_string(SimpleFunctionFactory& factory) {
     factory.register_function<FunctionToBase64>();
     factory.register_function<FunctionFromBase64>();
     factory.register_function<FunctionSplitPart>();
+    factory.register_function<FunctionSplitByString>();
     factory.register_function<FunctionStringMd5AndSM3<MD5Sum>>();
     factory.register_function<FunctionSubstringIndex>();
     factory.register_function<FunctionExtractURLParameter>();
diff --git a/be/src/vec/functions/function_string.h b/be/src/vec/functions/function_string.h
index 73089500a6..fa6f2f9934 100644
--- a/be/src/vec/functions/function_string.h
+++ b/be/src/vec/functions/function_string.h
@@ -55,6 +55,7 @@
 #include "vec/columns/columns_number.h"
 #include "vec/common/assert_cast.h"
 #include "vec/common/string_ref.h"
+#include "vec/data_types/data_type_array.h"
 #include "vec/data_types/data_type_decimal.h"
 #include "vec/data_types/data_type_nullable.h"
 #include "vec/data_types/data_type_number.h"
@@ -1578,6 +1579,127 @@ public:
     }
 };
 
+class FunctionSplitByString : public IFunction {
+public:
+    static constexpr auto name = "split_by_string";
+
+    static FunctionPtr create() { return std::make_shared<FunctionSplitByString>(); }
+    using NullMapType = PaddedPODArray<UInt8>;
+
+    String get_name() const override { return name; }
+
+    bool is_variadic() const override { return false; }
+
+    size_t get_number_of_arguments() const override { return 2; }
+
+    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
+        DCHECK(is_string(arguments[0]))
+                << "first argument for function: " << name << " should be string"
+                << " and arguments[0] is " << arguments[0]->get_name();
+        DCHECK(is_string(arguments[1]))
+                << "second argument for function: " << name << " should be string"
+                << " and arguments[1] is " << arguments[1]->get_name();
+        return std::make_shared<DataTypeArray>(make_nullable(arguments[0]));
+    }
+
+    Status execute_impl(FunctionContext* /*context*/, Block& block, const ColumnNumbers& arguments,
+                        size_t result, size_t /*input_rows_count*/) override {
+        DCHECK_EQ(arguments.size(), 2);
+
+        ColumnPtr src_column =
+                block.get_by_position(arguments[0]).column->convert_to_full_column_if_const();
+        ColumnPtr delimiter_column =
+                block.get_by_position(arguments[1]).column->convert_to_full_column_if_const();
+
+        DataTypePtr src_column_type = block.get_by_position(arguments[0]).type;
+        auto dest_column_ptr = ColumnArray::create(make_nullable(src_column_type)->create_column(),
+                                                   ColumnArray::ColumnOffsets::create());
+
+        IColumn* dest_nested_column = &dest_column_ptr->get_data();
+        auto& dest_offsets = dest_column_ptr->get_offsets();
+        DCHECK(dest_nested_column != nullptr);
+        dest_nested_column->reserve(0);
+        dest_offsets.reserve(0);
+
+        NullMapType* dest_nested_null_map = nullptr;
+        ColumnNullable* dest_nullable_col = reinterpret_cast<ColumnNullable*>(dest_nested_column);
+        dest_nested_column = dest_nullable_col->get_nested_column_ptr();
+        dest_nested_null_map = &dest_nullable_col->get_null_map_column().get_data();
+
+        _execute(*src_column, *delimiter_column, *dest_nested_column, dest_offsets,
+                 dest_nested_null_map);
+        block.replace_by_position(result, std::move(dest_column_ptr));
+        return Status::OK();
+    }
+
+private:
+    void _execute(const IColumn& src_column, const IColumn& delimiter_column,
+                  IColumn& dest_nested_column, ColumnArray::Offsets64& dest_offsets,
+                  NullMapType* dest_nested_null_map) {
+        ColumnString& dest_column_string = reinterpret_cast<ColumnString&>(dest_nested_column);
+        ColumnString::Chars& column_string_chars = dest_column_string.get_chars();
+        ColumnString::Offsets& column_string_offsets = dest_column_string.get_offsets();
+        column_string_chars.reserve(0);
+
+        ColumnArray::Offset64 string_pos = 0;
+        ColumnArray::Offset64 dest_pos = 0;
+        const ColumnString* src_column_string = reinterpret_cast<const ColumnString*>(&src_column);
+        ColumnArray::Offset64 src_offsets_size = src_column_string->get_offsets().size();
+
+        for (size_t i = 0; i < src_offsets_size; i++) {
+            const StringRef delimiter_ref = delimiter_column.get_data_at(i);
+            const StringRef str_ref = src_column_string->get_data_at(i);
+
+            if (str_ref.size == 0) {
+                dest_offsets.push_back(dest_pos);
+                continue;
+            }
+            if (delimiter_ref.size == 0) {
+                for (size_t str_pos = 0; str_pos < str_ref.size;) {
+                    const size_t str_offset = str_pos;
+                    const size_t old_size = column_string_chars.size();
+                    str_pos++;
+                    const size_t new_size = old_size + 1;
+                    column_string_chars.resize(new_size);
+                    memcpy(column_string_chars.data() + old_size, str_ref.data + str_offset, 1);
+                    (*dest_nested_null_map).push_back(false);
+                    string_pos++;
+                    dest_pos++;
+                    column_string_offsets.push_back(string_pos);
+                }
+            } else {
+                for (size_t str_pos = 0; str_pos <= str_ref.size;) {
+                    const size_t str_offset = str_pos;
+                    const size_t old_size = column_string_chars.size();
+                    const size_t split_part_size = split_str(str_pos, str_ref, delimiter_ref);
+                    str_pos += delimiter_ref.size;
+                    const size_t new_size = old_size + split_part_size;
+                    column_string_chars.resize(new_size);
+                    if (split_part_size > 0) {
+                        memcpy(column_string_chars.data() + old_size, str_ref.data + str_offset,
+                               split_part_size);
+                    }
+                    (*dest_nested_null_map).push_back(false);
+                    string_pos += split_part_size;
+                    dest_pos++;
+                    column_string_offsets.push_back(string_pos);
+                }
+            }
+            dest_offsets.push_back(dest_pos);
+        }
+    }
+
+    size_t split_str(size_t& pos, const StringRef str_ref, StringRef delimiter_ref) {
+        size_t old_size = pos;
+        size_t str_size = str_ref.size;
+        while (pos < str_size &&
+               memcmp(str_ref.data + pos, delimiter_ref.data, delimiter_ref.size)) {
+            pos++;
+        }
+        return pos - old_size;
+    }
+};
+
 struct SM3Sum {
     static constexpr auto name = "sm3sum";
     using ObjectData = SM3Digest;
diff --git a/be/test/vec/function/function_string_test.cpp b/be/test/vec/function/function_string_test.cpp
index c499718ff1..0ce215a650 100644
--- a/be/test/vec/function/function_string_test.cpp
+++ b/be/test/vec/function/function_string_test.cpp
@@ -23,6 +23,7 @@
 #include "function_test_util.h"
 #include "util/encryption_util.h"
 #include "vec/core/types.h"
+#include "vec/data_types/data_type_array.h"
 #include "vec/data_types/data_type_string.h"
 
 namespace doris::vectorized {
diff --git a/docs/en/docs/sql-manual/sql-functions/string-functions/split_by_string.md b/docs/en/docs/sql-manual/sql-functions/string-functions/split_by_string.md
new file mode 100644
index 0000000000..9c473c2dc9
--- /dev/null
+++ b/docs/en/docs/sql-manual/sql-functions/string-functions/split_by_string.md
@@ -0,0 +1,112 @@
+---
+{
+    "title": "split_by_string",
+    "language": "zh-CN"
+}
+---
+
+<!-- 
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements.  See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership.  The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License.  You may obtain a copy of the License at
+  http://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing,
+software distributed under the License is distributed on an
+"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+KIND, either express or implied.  See the License for the
+specific language governing permissions and limitations
+under the License.
+-->
+
+## split_by_string 
+
+### description
+
+#### Syntax
+
+```
+split_by_string(s, separator)
+```
+Splits a string into substrings separated by a string. It uses a constant string separator of multiple characters as the separator. If the string separator is empty, it will split the string s into an array of single characters.
+
+#### Arguments
+`separator` — The separator. Type: `String`
+
+`s` — The string to split. Type: `String`
+
+#### Returned value(s)
+
+Returns an array of selected substrings. Empty substrings may be selected when:
+
+A non-empty separator occurs at the beginning or end of the string;
+
+There are multiple consecutive separators;
+
+The original string s is empty.
+
+Type: `Array(String)`
+
+### notice
+
+`Only supported in vectorized engine`
+
+### example
+
+```
+SELECT split_by_string('1, 2 3, 4,5, abcde', ', ');
+select split_by_string('a1b1c1d','1');
++---------------------------------+
+| split_by_string('a1b1c1d', '1') |
++---------------------------------+
+| ['a', 'b', 'c', 'd']            |
++---------------------------------+
+
+select split_by_string(',,a,b,c,',',');
++----------------------------------+
+| split_by_string(',,a,b,c,', ',') |
++----------------------------------+
+| ['', '', 'a', 'b', 'c', '']      |
++----------------------------------+
+
+SELECT split_by_string(NULL,',');
++----------------------------+
+| split_by_string(NULL, ',') |
++----------------------------+
+| NULL                       |
++----------------------------+
+
+select split_by_string('a,b,c,abcde',',');
++-------------------------------------+
+| split_by_string('a,b,c,abcde', ',') |
++-------------------------------------+
+| ['a', 'b', 'c', 'abcde']            |
++-------------------------------------+
+
+select split_by_string('1,,2,3,,4,5,,abcde', ',,');
++---------------------------------------------+
+| split_by_string('1,,2,3,,4,5,,abcde', ',,') |
++---------------------------------------------+
+| ['1', '2,3', '4,5', 'abcde']                |
++---------------------------------------------+
+
+select split_by_string(',,,,',',,');
++-------------------------------+
+| split_by_string(',,,,', ',,') |
++-------------------------------+
+| ['', '', '']                  |
++-------------------------------+
+
+select split_by_string(',,a,,b,,c,,',',,');
++--------------------------------------+
+| split_by_string(',,a,,b,,c,,', ',,') |
++--------------------------------------+
+| ['', 'a', 'b', 'c', '']              |
++--------------------------------------+
+```
+### keywords
+
+SPLIT_BY_STRING,SPLIT
\ No newline at end of file
diff --git a/docs/sidebars.json b/docs/sidebars.json
index 0dfcd8a982..3b439925eb 100644
--- a/docs/sidebars.json
+++ b/docs/sidebars.json
@@ -410,6 +410,7 @@
                                 "sql-manual/sql-functions/string-functions/strleft",
                                 "sql-manual/sql-functions/string-functions/strright",
                                 "sql-manual/sql-functions/string-functions/split_part",
+                                "sql-manual/sql-functions/string-functions/split_by_string",
                                 "sql-manual/sql-functions/string-functions/substring_index",
                                 "sql-manual/sql-functions/string-functions/money_format",
                                 "sql-manual/sql-functions/string-functions/parse_url",
diff --git a/docs/zh-CN/docs/sql-manual/sql-functions/string-functions/split_by_string.md b/docs/zh-CN/docs/sql-manual/sql-functions/string-functions/split_by_string.md
new file mode 100644
index 0000000000..033388160b
--- /dev/null
+++ b/docs/zh-CN/docs/sql-manual/sql-functions/string-functions/split_by_string.md
@@ -0,0 +1,112 @@
+---
+{
+    "title": "split_by_string",
+    "language": "zh-CN"
+}
+---
+
+<!-- 
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements.  See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership.  The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License.  You may obtain a copy of the License at
+  http://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing,
+software distributed under the License is distributed on an
+"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+KIND, either express or implied.  See the License for the
+specific language governing permissions and limitations
+under the License.
+-->
+
+## split_by_string 
+
+### description
+
+#### Syntax
+
+```
+split_by_string(s, separator)
+```
+将字符串拆分为由字符串分隔的子字符串。它使用多个字符的常量字符串分隔符作为分隔符。如果字符串分隔符为空,它将字符串拆分为单个字符数组。
+
+#### Arguments
+
+`separator` — 分隔符是一个字符串,是用来分割的标志字符. 类型: `String`
+
+`s` — 需要分割的字符串. 类型: `String`
+
+#### Returned value(s)
+
+返回一个包含子字符串的数组. 以下情况会返回空的子字符串:
+
+需要分割的字符串的首尾是分隔符;
+
+多个分隔符连续出现;
+
+需要分割的字符串为空,而分隔符不为空.
+
+Type: `Array(String)`
+
+### notice
+
+`Only supported in vectorized engine`
+
+### example
+
+```
+select split_by_string('a1b1c1d','1');
++---------------------------------+
+| split_by_string('a1b1c1d', '1') |
++---------------------------------+
+| ['a', 'b', 'c', 'd']            |
++---------------------------------+
+
+select split_by_string(',,a,b,c,',',');
++----------------------------------+
+| split_by_string(',,a,b,c,', ',') |
++----------------------------------+
+| ['', '', 'a', 'b', 'c', '']      |
++----------------------------------+
+
+SELECT split_by_string(NULL,',');
++----------------------------+
+| split_by_string(NULL, ',') |
++----------------------------+
+| NULL                       |
++----------------------------+
+
+select split_by_string('a,b,c,abcde',',');
++-------------------------------------+
+| split_by_string('a,b,c,abcde', ',') |
++-------------------------------------+
+| ['a', 'b', 'c', 'abcde']            |
++-------------------------------------+
+
+select split_by_string('1,,2,3,,4,5,,abcde', ',,');
++---------------------------------------------+
+| split_by_string('1,,2,3,,4,5,,abcde', ',,') |
++---------------------------------------------+
+| ['1', '2,3', '4,5', 'abcde']                |
++---------------------------------------------+
+
+select split_by_string(',,,,',',,');
++-------------------------------+
+| split_by_string(',,,,', ',,') |
++-------------------------------+
+| ['', '', '']                  |
++-------------------------------+
+
+select split_by_string(',,a,,b,,c,,',',,');
++--------------------------------------+
+| split_by_string(',,a,,b,,c,,', ',,') |
++--------------------------------------+
+| ['', 'a', 'b', 'c', '']              |
++--------------------------------------+
+```
+### keywords
+
+SPLIT_BY_STRING,SPLIT
\ No newline at end of file
diff --git a/gensrc/script/doris_builtins_functions.py b/gensrc/script/doris_builtins_functions.py
index 2f7fd04e71..6c9fb3eccd 100755
--- a/gensrc/script/doris_builtins_functions.py
+++ b/gensrc/script/doris_builtins_functions.py
@@ -2375,6 +2375,8 @@ visible_functions = [
     [['money_format'], 'VARCHAR', ['DECIMAL128'],
      '_ZN5doris15StringFunctions12money_formatEPN9doris_udf15FunctionContextERKNS1_12DecimalV2ValE',
      '', '', 'vec', ''],
+    [['split_by_string'],'ARRAY_VARCHAR',['STRING','STRING'],
+        '', '', '', 'vec', ''],
     [['split_part'], 'VARCHAR', ['VARCHAR', 'VARCHAR', 'INT'],
         '_ZN5doris15StringFunctions10split_partEPN9doris_udf15FunctionContextERKNS1_9StringValES6_RKNS1_6IntValE',
         '', '', 'vec', 'ALWAYS_NULLABLE'],
diff --git a/regression-test/data/query_p0/sql_functions/string_functions/test_split_by_string.out b/regression-test/data/query_p0/sql_functions/string_functions/test_split_by_string.out
new file mode 100644
index 0000000000..fd69d943f3
--- /dev/null
+++ b/regression-test/data/query_p0/sql_functions/string_functions/test_split_by_string.out
@@ -0,0 +1,89 @@
+-- This file is automatically generated. You should know what you did if you want to edit this
+-- !sql --
+['a', 'b', 'c', 'd', 'e']
+
+-- !sql --
+['1', '2', '5', '5', '3']
+
+-- !sql --
+[]
+
+-- !sql --
+[]
+
+-- !sql --
+[]
+
+-- !sql --
+['a', 'b', 'c', 'd']
+
+-- !sql --
+['', '', '', '']
+
+-- !sql --
+['a', 'b', 'c', 'abcde']
+
+-- !sql --
+['', '', 'a', 'b', 'c', '']
+
+-- !sql --
+['null']
+
+-- !sql --
+['1', '2,3', '4,5', 'abcde']
+
+-- !sql --
+['a', 'b', 'c', 'd', 'e']
+
+-- !sql --
+[]
+
+-- !sql --
+[]
+
+-- !sql --
+[]
+
+-- !sql --
+['1', '2,3', '', '', '4,5, abcde']
+
+-- !sql --
+['', '', '']
+
+-- !sql --
+['a', 'b', 'c']
+
+-- !sql --
+['a', 'b', 'c', '']
+
+-- !sql --
+['', 'a', 'b', 'c', '']
+
+-- !sql --
+['null']
+
+-- !sql --
+1	abcde		['a', 'b', 'c', 'd', 'e']
+2	12553		['1', '2', '5', '5', '3']
+3			[]
+4		,	[]
+5		a	[]
+6	a1b1c1d	1	['a', 'b', 'c', 'd']
+7	,,,	,	['', '', '', '']
+8	a,b,c	,	['a', 'b', 'c']
+9	a,b,c,	,	['a', 'b', 'c', '']
+10	\N	,	\N
+11	a,b,c,12345,	,	['a', 'b', 'c', '12345', '']
+
+-- !sql --
+1	1,,2,3,,4,5,,abcde	,,	['1', '2,3', '4,5', 'abcde']
+2	abcde		['a', 'b', 'c', 'd', 'e']
+3			[]
+4		,	[]
+5		a	[]
+6	1,,2,3,,,,,,4,5,,abcde	,,	['1', '2,3', '', '', '4,5', 'abcde']
+7	,,,	,	['', '', '', '']
+8	a,b,c	,	['a', 'b', 'c']
+9	a,b,c,	,	['a', 'b', 'c', '']
+10	\N	,	\N
+
diff --git a/regression-test/suites/query_p0/sql_functions/string_functions/test_split_by_string.groovy b/regression-test/suites/query_p0/sql_functions/string_functions/test_split_by_string.groovy
new file mode 100644
index 0000000000..343ebb0634
--- /dev/null
+++ b/regression-test/suites/query_p0/sql_functions/string_functions/test_split_by_string.groovy
@@ -0,0 +1,107 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+suite("test_split_by_string") {
+    sql "set enable_vectorized_engine = true;"
+
+    // split by char
+    qt_sql "select split_by_string('abcde','');"
+    qt_sql "select split_by_string('12553','');"
+    qt_sql "select split_by_string('','');"
+    qt_sql "select split_by_string('',',');"
+    qt_sql "select split_by_string('','a');"
+
+    qt_sql "select split_by_string('a1b1c1d','1');"
+    qt_sql "select split_by_string(',,,',',');"
+    qt_sql "select split_by_string('a,b,c,abcde',',');"
+    qt_sql "select split_by_string(',,a,b,c,',',');"
+    qt_sql "select split_by_string('null',',');"
+    
+    // split by string
+    qt_sql "select split_by_string('1,,2,3,,4,5,,abcde', ',,');"
+    qt_sql "select split_by_string('abcde','');"
+    qt_sql "select split_by_string('','');"
+    qt_sql "select split_by_string('',',');"
+    qt_sql "select split_by_string('','a');"
+
+    qt_sql "select split_by_string('1,,2,3,,,,,,4,5, abcde', ',,');"
+    qt_sql "select split_by_string(',,,,',',,');"
+    qt_sql "select split_by_string('a,,b,,c',',,');"
+    qt_sql "select split_by_string('a,,b,,c,,',',,');"
+    qt_sql "select split_by_string(',,a,,b,,c,,',',,');"
+    qt_sql "select split_by_string('null',',');"
+
+    def tableName1 = "test_split_by_char"
+
+    sql """DROP TABLE IF EXISTS ${tableName1}"""
+    sql """ 
+            CREATE TABLE IF NOT EXISTS ${tableName1} (
+              `k1` int(11) NULL COMMENT "",
+              `v1` varchar(20) NULL COMMENT "",
+              `v2` varchar(1) NOT NULL COMMENT ""
+            ) ENGINE=OLAP
+            DUPLICATE KEY(`k1`)
+            DISTRIBUTED BY HASH(`k1`) BUCKETS 1
+            PROPERTIES (
+            "replication_allocation" = "tag.location.default: 1",
+            "storage_format" = "V2"
+            )
+        """
+    sql """ INSERT INTO ${tableName1} VALUES(1, 'abcde', '') """
+    sql """ INSERT INTO ${tableName1} VALUES(2, '12553', '') """
+    sql """ INSERT INTO ${tableName1} VALUES(3, '', '') """
+    sql """ INSERT INTO ${tableName1} VALUES(4, '', ',') """
+    sql """ INSERT INTO ${tableName1} VALUES(5, '', 'a') """
+    sql """ INSERT INTO ${tableName1} VALUES(6, 'a1b1c1d', '1') """
+    sql """ INSERT INTO ${tableName1} VALUES(7, ',,,', ',') """
+    sql """ INSERT INTO ${tableName1} VALUES(8, 'a,b,c', ',') """
+    sql """ INSERT INTO ${tableName1} VALUES(9, 'a,b,c,', ',') """
+    sql """ INSERT INTO ${tableName1} VALUES(10, null, ',') """
+    sql """ INSERT INTO ${tableName1} VALUES(11, 'a,b,c,12345,', ',') """
+
+    qt_sql "SELECT *, split_by_string(v1, v2) FROM ${tableName1} ORDER BY k1"
+
+    def tableName2 = "test_split_by_string"
+
+    sql """DROP TABLE IF EXISTS ${tableName2}"""
+    sql """ 
+            CREATE TABLE IF NOT EXISTS ${tableName2} (
+              `k1` int(11) NULL COMMENT "",
+              `v1` varchar(50) NULL COMMENT "",
+              `v2` varchar(10) NOT NULL COMMENT ""
+            ) ENGINE=OLAP
+            DUPLICATE KEY(`k1`)
+            DISTRIBUTED BY HASH(`k1`) BUCKETS 1
+            PROPERTIES (
+            "replication_allocation" = "tag.location.default: 1",
+            "storage_format" = "V2"
+            )
+        """
+    sql """ INSERT INTO ${tableName2} VALUES(1, '1,,2,3,,4,5,,abcde', ',,') """
+    sql """ INSERT INTO ${tableName2} VALUES(2, 'abcde','') """
+    sql """ INSERT INTO ${tableName2} VALUES(3, '', '') """
+    sql """ INSERT INTO ${tableName2} VALUES(4, '', ',') """
+    sql """ INSERT INTO ${tableName2} VALUES(5, '', 'a') """
+    sql """ INSERT INTO ${tableName2} VALUES(6, '1,,2,3,,,,,,4,5,,abcde', ',,') """
+    sql """ INSERT INTO ${tableName2} VALUES(7, ',,,', ',') """
+    sql """ INSERT INTO ${tableName2} VALUES(8, 'a,b,c', ',') """
+    sql """ INSERT INTO ${tableName2} VALUES(9, 'a,b,c,', ',') """
+    sql """ INSERT INTO ${tableName2} VALUES(10, null, ',') """
+
+
+    qt_sql "SELECT *, split_by_string(v1, v2) FROM ${tableName2} ORDER BY k1"
+}
\ No newline at end of file


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@doris.apache.org
For additional commands, e-mail: commits-help@doris.apache.org