You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@doris.apache.org by li...@apache.org on 2023/01/06 13:10:16 UTC
[doris] branch branch-1.2-lts updated: [cherry-pick][feature](string_function) support split_by_string function (#15677)
This is an automated email from the ASF dual-hosted git repository.
lihaopeng pushed a commit to branch branch-1.2-lts
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-1.2-lts by this push:
new 0d05d55859 [cherry-pick][feature](string_function) support split_by_string function (#15677)
0d05d55859 is described below
commit 0d05d55859b327991827366493dcef10fd7fafa0
Author: yongkang.zhong <zh...@qq.com>
AuthorDate: Fri Jan 6 21:10:09 2023 +0800
[cherry-pick][feature](string_function) support split_by_string function (#15677)
Co-authored-by: liqing-coder <10...@users.noreply.github.com>
---
be/src/vec/functions/function_string.cpp | 1 +
be/src/vec/functions/function_string.h | 122 +++++++++++++++++++++
be/test/vec/function/function_string_test.cpp | 1 +
.../string-functions/split_by_string.md | 112 +++++++++++++++++++
docs/sidebars.json | 1 +
.../string-functions/split_by_string.md | 112 +++++++++++++++++++
gensrc/script/doris_builtins_functions.py | 2 +
.../string_functions/test_split_by_string.out | 89 +++++++++++++++
.../string_functions/test_split_by_string.groovy | 107 ++++++++++++++++++
9 files changed, 547 insertions(+)
diff --git a/be/src/vec/functions/function_string.cpp b/be/src/vec/functions/function_string.cpp
index 3898979a32..ad8499dcc7 100644
--- a/be/src/vec/functions/function_string.cpp
+++ b/be/src/vec/functions/function_string.cpp
@@ -683,6 +683,7 @@ void register_function_string(SimpleFunctionFactory& factory) {
factory.register_function<FunctionToBase64>();
factory.register_function<FunctionFromBase64>();
factory.register_function<FunctionSplitPart>();
+ factory.register_function<FunctionSplitByString>();
factory.register_function<FunctionStringMd5AndSM3<MD5Sum>>();
factory.register_function<FunctionSubstringIndex>();
factory.register_function<FunctionExtractURLParameter>();
diff --git a/be/src/vec/functions/function_string.h b/be/src/vec/functions/function_string.h
index 73089500a6..fa6f2f9934 100644
--- a/be/src/vec/functions/function_string.h
+++ b/be/src/vec/functions/function_string.h
@@ -55,6 +55,7 @@
#include "vec/columns/columns_number.h"
#include "vec/common/assert_cast.h"
#include "vec/common/string_ref.h"
+#include "vec/data_types/data_type_array.h"
#include "vec/data_types/data_type_decimal.h"
#include "vec/data_types/data_type_nullable.h"
#include "vec/data_types/data_type_number.h"
@@ -1578,6 +1579,127 @@ public:
}
};
+class FunctionSplitByString : public IFunction {
+public:
+ static constexpr auto name = "split_by_string";
+
+ static FunctionPtr create() { return std::make_shared<FunctionSplitByString>(); }
+ using NullMapType = PaddedPODArray<UInt8>;
+
+ String get_name() const override { return name; }
+
+ bool is_variadic() const override { return false; }
+
+ size_t get_number_of_arguments() const override { return 2; }
+
+ DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
+ DCHECK(is_string(arguments[0]))
+ << "first argument for function: " << name << " should be string"
+ << " and arguments[0] is " << arguments[0]->get_name();
+ DCHECK(is_string(arguments[1]))
+ << "second argument for function: " << name << " should be string"
+ << " and arguments[1] is " << arguments[1]->get_name();
+ return std::make_shared<DataTypeArray>(make_nullable(arguments[0]));
+ }
+
+ Status execute_impl(FunctionContext* /*context*/, Block& block, const ColumnNumbers& arguments,
+ size_t result, size_t /*input_rows_count*/) override {
+ DCHECK_EQ(arguments.size(), 2);
+
+ ColumnPtr src_column =
+ block.get_by_position(arguments[0]).column->convert_to_full_column_if_const();
+ ColumnPtr delimiter_column =
+ block.get_by_position(arguments[1]).column->convert_to_full_column_if_const();
+
+ DataTypePtr src_column_type = block.get_by_position(arguments[0]).type;
+ auto dest_column_ptr = ColumnArray::create(make_nullable(src_column_type)->create_column(),
+ ColumnArray::ColumnOffsets::create());
+
+ IColumn* dest_nested_column = &dest_column_ptr->get_data();
+ auto& dest_offsets = dest_column_ptr->get_offsets();
+ DCHECK(dest_nested_column != nullptr);
+ dest_nested_column->reserve(0);
+ dest_offsets.reserve(0);
+
+ NullMapType* dest_nested_null_map = nullptr;
+ ColumnNullable* dest_nullable_col = reinterpret_cast<ColumnNullable*>(dest_nested_column);
+ dest_nested_column = dest_nullable_col->get_nested_column_ptr();
+ dest_nested_null_map = &dest_nullable_col->get_null_map_column().get_data();
+
+ _execute(*src_column, *delimiter_column, *dest_nested_column, dest_offsets,
+ dest_nested_null_map);
+ block.replace_by_position(result, std::move(dest_column_ptr));
+ return Status::OK();
+ }
+
+private:
+ void _execute(const IColumn& src_column, const IColumn& delimiter_column,
+ IColumn& dest_nested_column, ColumnArray::Offsets64& dest_offsets,
+ NullMapType* dest_nested_null_map) {
+ ColumnString& dest_column_string = reinterpret_cast<ColumnString&>(dest_nested_column);
+ ColumnString::Chars& column_string_chars = dest_column_string.get_chars();
+ ColumnString::Offsets& column_string_offsets = dest_column_string.get_offsets();
+ column_string_chars.reserve(0);
+
+ ColumnArray::Offset64 string_pos = 0;
+ ColumnArray::Offset64 dest_pos = 0;
+ const ColumnString* src_column_string = reinterpret_cast<const ColumnString*>(&src_column);
+ ColumnArray::Offset64 src_offsets_size = src_column_string->get_offsets().size();
+
+ for (size_t i = 0; i < src_offsets_size; i++) {
+ const StringRef delimiter_ref = delimiter_column.get_data_at(i);
+ const StringRef str_ref = src_column_string->get_data_at(i);
+
+ if (str_ref.size == 0) {
+ dest_offsets.push_back(dest_pos);
+ continue;
+ }
+ if (delimiter_ref.size == 0) {
+ for (size_t str_pos = 0; str_pos < str_ref.size;) {
+ const size_t str_offset = str_pos;
+ const size_t old_size = column_string_chars.size();
+ str_pos++;
+ const size_t new_size = old_size + 1;
+ column_string_chars.resize(new_size);
+ memcpy(column_string_chars.data() + old_size, str_ref.data + str_offset, 1);
+ (*dest_nested_null_map).push_back(false);
+ string_pos++;
+ dest_pos++;
+ column_string_offsets.push_back(string_pos);
+ }
+ } else {
+ for (size_t str_pos = 0; str_pos <= str_ref.size;) {
+ const size_t str_offset = str_pos;
+ const size_t old_size = column_string_chars.size();
+ const size_t split_part_size = split_str(str_pos, str_ref, delimiter_ref);
+ str_pos += delimiter_ref.size;
+ const size_t new_size = old_size + split_part_size;
+ column_string_chars.resize(new_size);
+ if (split_part_size > 0) {
+ memcpy(column_string_chars.data() + old_size, str_ref.data + str_offset,
+ split_part_size);
+ }
+ (*dest_nested_null_map).push_back(false);
+ string_pos += split_part_size;
+ dest_pos++;
+ column_string_offsets.push_back(string_pos);
+ }
+ }
+ dest_offsets.push_back(dest_pos);
+ }
+ }
+
+ size_t split_str(size_t& pos, const StringRef str_ref, StringRef delimiter_ref) {
+ size_t old_size = pos;
+ size_t str_size = str_ref.size;
+ while (pos < str_size &&
+ memcmp(str_ref.data + pos, delimiter_ref.data, delimiter_ref.size)) {
+ pos++;
+ }
+ return pos - old_size;
+ }
+};
+
struct SM3Sum {
static constexpr auto name = "sm3sum";
using ObjectData = SM3Digest;
diff --git a/be/test/vec/function/function_string_test.cpp b/be/test/vec/function/function_string_test.cpp
index c499718ff1..0ce215a650 100644
--- a/be/test/vec/function/function_string_test.cpp
+++ b/be/test/vec/function/function_string_test.cpp
@@ -23,6 +23,7 @@
#include "function_test_util.h"
#include "util/encryption_util.h"
#include "vec/core/types.h"
+#include "vec/data_types/data_type_array.h"
#include "vec/data_types/data_type_string.h"
namespace doris::vectorized {
diff --git a/docs/en/docs/sql-manual/sql-functions/string-functions/split_by_string.md b/docs/en/docs/sql-manual/sql-functions/string-functions/split_by_string.md
new file mode 100644
index 0000000000..9c473c2dc9
--- /dev/null
+++ b/docs/en/docs/sql-manual/sql-functions/string-functions/split_by_string.md
@@ -0,0 +1,112 @@
+---
+{
+ "title": "split_by_string",
+ "language": "zh-CN"
+}
+---
+
+<!--
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements. See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership. The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License. You may obtain a copy of the License at
+ http://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing,
+software distributed under the License is distributed on an
+"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+KIND, either express or implied. See the License for the
+specific language governing permissions and limitations
+under the License.
+-->
+
+## split_by_string
+
+### description
+
+#### Syntax
+
+```
+split_by_string(s, separator)
+```
+Splits a string into substrings separated by a string. It uses a constant string separator of multiple characters as the separator. If the string separator is empty, it will split the string s into an array of single characters.
+
+#### Arguments
+`separator` — The separator. Type: `String`
+
+`s` — The string to split. Type: `String`
+
+#### Returned value(s)
+
+Returns an array of selected substrings. Empty substrings may be selected when:
+
+A non-empty separator occurs at the beginning or end of the string;
+
+There are multiple consecutive separators;
+
+The original string s is empty.
+
+Type: `Array(String)`
+
+### notice
+
+`Only supported in vectorized engine`
+
+### example
+
+```
+SELECT split_by_string('1, 2 3, 4,5, abcde', ', ');
+select split_by_string('a1b1c1d','1');
++---------------------------------+
+| split_by_string('a1b1c1d', '1') |
++---------------------------------+
+| ['a', 'b', 'c', 'd'] |
++---------------------------------+
+
+select split_by_string(',,a,b,c,',',');
++----------------------------------+
+| split_by_string(',,a,b,c,', ',') |
++----------------------------------+
+| ['', '', 'a', 'b', 'c', ''] |
++----------------------------------+
+
+SELECT split_by_string(NULL,',');
++----------------------------+
+| split_by_string(NULL, ',') |
++----------------------------+
+| NULL |
++----------------------------+
+
+select split_by_string('a,b,c,abcde',',');
++-------------------------------------+
+| split_by_string('a,b,c,abcde', ',') |
++-------------------------------------+
+| ['a', 'b', 'c', 'abcde'] |
++-------------------------------------+
+
+select split_by_string('1,,2,3,,4,5,,abcde', ',,');
++---------------------------------------------+
+| split_by_string('1,,2,3,,4,5,,abcde', ',,') |
++---------------------------------------------+
+| ['1', '2,3', '4,5', 'abcde'] |
++---------------------------------------------+
+
+select split_by_string(',,,,',',,');
++-------------------------------+
+| split_by_string(',,,,', ',,') |
++-------------------------------+
+| ['', '', ''] |
++-------------------------------+
+
+select split_by_string(',,a,,b,,c,,',',,');
++--------------------------------------+
+| split_by_string(',,a,,b,,c,,', ',,') |
++--------------------------------------+
+| ['', 'a', 'b', 'c', ''] |
++--------------------------------------+
+```
+### keywords
+
+SPLIT_BY_STRING,SPLIT
\ No newline at end of file
diff --git a/docs/sidebars.json b/docs/sidebars.json
index 0dfcd8a982..3b439925eb 100644
--- a/docs/sidebars.json
+++ b/docs/sidebars.json
@@ -410,6 +410,7 @@
"sql-manual/sql-functions/string-functions/strleft",
"sql-manual/sql-functions/string-functions/strright",
"sql-manual/sql-functions/string-functions/split_part",
+ "sql-manual/sql-functions/string-functions/split_by_string",
"sql-manual/sql-functions/string-functions/substring_index",
"sql-manual/sql-functions/string-functions/money_format",
"sql-manual/sql-functions/string-functions/parse_url",
diff --git a/docs/zh-CN/docs/sql-manual/sql-functions/string-functions/split_by_string.md b/docs/zh-CN/docs/sql-manual/sql-functions/string-functions/split_by_string.md
new file mode 100644
index 0000000000..033388160b
--- /dev/null
+++ b/docs/zh-CN/docs/sql-manual/sql-functions/string-functions/split_by_string.md
@@ -0,0 +1,112 @@
+---
+{
+ "title": "split_by_string",
+ "language": "zh-CN"
+}
+---
+
+<!--
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements. See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership. The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License. You may obtain a copy of the License at
+ http://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing,
+software distributed under the License is distributed on an
+"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+KIND, either express or implied. See the License for the
+specific language governing permissions and limitations
+under the License.
+-->
+
+## split_by_string
+
+### description
+
+#### Syntax
+
+```
+split_by_string(s, separator)
+```
+将字符串拆分为由字符串分隔的子字符串。它使用多个字符的常量字符串分隔符作为分隔符。如果字符串分隔符为空,它将字符串拆分为单个字符数组。
+
+#### Arguments
+
+`separator` — 分隔符是一个字符串,是用来分割的标志字符. 类型: `String`
+
+`s` — 需要分割的字符串. 类型: `String`
+
+#### Returned value(s)
+
+返回一个包含子字符串的数组. 以下情况会返回空的子字符串:
+
+需要分割的字符串的首尾是分隔符;
+
+多个分隔符连续出现;
+
+需要分割的字符串为空,而分隔符不为空.
+
+Type: `Array(String)`
+
+### notice
+
+`Only supported in vectorized engine`
+
+### example
+
+```
+select split_by_string('a1b1c1d','1');
++---------------------------------+
+| split_by_string('a1b1c1d', '1') |
++---------------------------------+
+| ['a', 'b', 'c', 'd'] |
++---------------------------------+
+
+select split_by_string(',,a,b,c,',',');
++----------------------------------+
+| split_by_string(',,a,b,c,', ',') |
++----------------------------------+
+| ['', '', 'a', 'b', 'c', ''] |
++----------------------------------+
+
+SELECT split_by_string(NULL,',');
++----------------------------+
+| split_by_string(NULL, ',') |
++----------------------------+
+| NULL |
++----------------------------+
+
+select split_by_string('a,b,c,abcde',',');
++-------------------------------------+
+| split_by_string('a,b,c,abcde', ',') |
++-------------------------------------+
+| ['a', 'b', 'c', 'abcde'] |
++-------------------------------------+
+
+select split_by_string('1,,2,3,,4,5,,abcde', ',,');
++---------------------------------------------+
+| split_by_string('1,,2,3,,4,5,,abcde', ',,') |
++---------------------------------------------+
+| ['1', '2,3', '4,5', 'abcde'] |
++---------------------------------------------+
+
+select split_by_string(',,,,',',,');
++-------------------------------+
+| split_by_string(',,,,', ',,') |
++-------------------------------+
+| ['', '', ''] |
++-------------------------------+
+
+select split_by_string(',,a,,b,,c,,',',,');
++--------------------------------------+
+| split_by_string(',,a,,b,,c,,', ',,') |
++--------------------------------------+
+| ['', 'a', 'b', 'c', ''] |
++--------------------------------------+
+```
+### keywords
+
+SPLIT_BY_STRING,SPLIT
\ No newline at end of file
diff --git a/gensrc/script/doris_builtins_functions.py b/gensrc/script/doris_builtins_functions.py
index 2f7fd04e71..6c9fb3eccd 100755
--- a/gensrc/script/doris_builtins_functions.py
+++ b/gensrc/script/doris_builtins_functions.py
@@ -2375,6 +2375,8 @@ visible_functions = [
[['money_format'], 'VARCHAR', ['DECIMAL128'],
'_ZN5doris15StringFunctions12money_formatEPN9doris_udf15FunctionContextERKNS1_12DecimalV2ValE',
'', '', 'vec', ''],
+ [['split_by_string'],'ARRAY_VARCHAR',['STRING','STRING'],
+ '', '', '', 'vec', ''],
[['split_part'], 'VARCHAR', ['VARCHAR', 'VARCHAR', 'INT'],
'_ZN5doris15StringFunctions10split_partEPN9doris_udf15FunctionContextERKNS1_9StringValES6_RKNS1_6IntValE',
'', '', 'vec', 'ALWAYS_NULLABLE'],
diff --git a/regression-test/data/query_p0/sql_functions/string_functions/test_split_by_string.out b/regression-test/data/query_p0/sql_functions/string_functions/test_split_by_string.out
new file mode 100644
index 0000000000..fd69d943f3
--- /dev/null
+++ b/regression-test/data/query_p0/sql_functions/string_functions/test_split_by_string.out
@@ -0,0 +1,89 @@
+-- This file is automatically generated. You should know what you did if you want to edit this
+-- !sql --
+['a', 'b', 'c', 'd', 'e']
+
+-- !sql --
+['1', '2', '5', '5', '3']
+
+-- !sql --
+[]
+
+-- !sql --
+[]
+
+-- !sql --
+[]
+
+-- !sql --
+['a', 'b', 'c', 'd']
+
+-- !sql --
+['', '', '', '']
+
+-- !sql --
+['a', 'b', 'c', 'abcde']
+
+-- !sql --
+['', '', 'a', 'b', 'c', '']
+
+-- !sql --
+['null']
+
+-- !sql --
+['1', '2,3', '4,5', 'abcde']
+
+-- !sql --
+['a', 'b', 'c', 'd', 'e']
+
+-- !sql --
+[]
+
+-- !sql --
+[]
+
+-- !sql --
+[]
+
+-- !sql --
+['1', '2,3', '', '', '4,5, abcde']
+
+-- !sql --
+['', '', '']
+
+-- !sql --
+['a', 'b', 'c']
+
+-- !sql --
+['a', 'b', 'c', '']
+
+-- !sql --
+['', 'a', 'b', 'c', '']
+
+-- !sql --
+['null']
+
+-- !sql --
+1 abcde ['a', 'b', 'c', 'd', 'e']
+2 12553 ['1', '2', '5', '5', '3']
+3 []
+4 , []
+5 a []
+6 a1b1c1d 1 ['a', 'b', 'c', 'd']
+7 ,,, , ['', '', '', '']
+8 a,b,c , ['a', 'b', 'c']
+9 a,b,c, , ['a', 'b', 'c', '']
+10 \N , \N
+11 a,b,c,12345, , ['a', 'b', 'c', '12345', '']
+
+-- !sql --
+1 1,,2,3,,4,5,,abcde ,, ['1', '2,3', '4,5', 'abcde']
+2 abcde ['a', 'b', 'c', 'd', 'e']
+3 []
+4 , []
+5 a []
+6 1,,2,3,,,,,,4,5,,abcde ,, ['1', '2,3', '', '', '4,5', 'abcde']
+7 ,,, , ['', '', '', '']
+8 a,b,c , ['a', 'b', 'c']
+9 a,b,c, , ['a', 'b', 'c', '']
+10 \N , \N
+
diff --git a/regression-test/suites/query_p0/sql_functions/string_functions/test_split_by_string.groovy b/regression-test/suites/query_p0/sql_functions/string_functions/test_split_by_string.groovy
new file mode 100644
index 0000000000..343ebb0634
--- /dev/null
+++ b/regression-test/suites/query_p0/sql_functions/string_functions/test_split_by_string.groovy
@@ -0,0 +1,107 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+suite("test_split_by_string") {
+ sql "set enable_vectorized_engine = true;"
+
+ // split by char
+ qt_sql "select split_by_string('abcde','');"
+ qt_sql "select split_by_string('12553','');"
+ qt_sql "select split_by_string('','');"
+ qt_sql "select split_by_string('',',');"
+ qt_sql "select split_by_string('','a');"
+
+ qt_sql "select split_by_string('a1b1c1d','1');"
+ qt_sql "select split_by_string(',,,',',');"
+ qt_sql "select split_by_string('a,b,c,abcde',',');"
+ qt_sql "select split_by_string(',,a,b,c,',',');"
+ qt_sql "select split_by_string('null',',');"
+
+ // split by string
+ qt_sql "select split_by_string('1,,2,3,,4,5,,abcde', ',,');"
+ qt_sql "select split_by_string('abcde','');"
+ qt_sql "select split_by_string('','');"
+ qt_sql "select split_by_string('',',');"
+ qt_sql "select split_by_string('','a');"
+
+ qt_sql "select split_by_string('1,,2,3,,,,,,4,5, abcde', ',,');"
+ qt_sql "select split_by_string(',,,,',',,');"
+ qt_sql "select split_by_string('a,,b,,c',',,');"
+ qt_sql "select split_by_string('a,,b,,c,,',',,');"
+ qt_sql "select split_by_string(',,a,,b,,c,,',',,');"
+ qt_sql "select split_by_string('null',',');"
+
+ def tableName1 = "test_split_by_char"
+
+ sql """DROP TABLE IF EXISTS ${tableName1}"""
+ sql """
+ CREATE TABLE IF NOT EXISTS ${tableName1} (
+ `k1` int(11) NULL COMMENT "",
+ `v1` varchar(20) NULL COMMENT "",
+ `v2` varchar(1) NOT NULL COMMENT ""
+ ) ENGINE=OLAP
+ DUPLICATE KEY(`k1`)
+ DISTRIBUTED BY HASH(`k1`) BUCKETS 1
+ PROPERTIES (
+ "replication_allocation" = "tag.location.default: 1",
+ "storage_format" = "V2"
+ )
+ """
+ sql """ INSERT INTO ${tableName1} VALUES(1, 'abcde', '') """
+ sql """ INSERT INTO ${tableName1} VALUES(2, '12553', '') """
+ sql """ INSERT INTO ${tableName1} VALUES(3, '', '') """
+ sql """ INSERT INTO ${tableName1} VALUES(4, '', ',') """
+ sql """ INSERT INTO ${tableName1} VALUES(5, '', 'a') """
+ sql """ INSERT INTO ${tableName1} VALUES(6, 'a1b1c1d', '1') """
+ sql """ INSERT INTO ${tableName1} VALUES(7, ',,,', ',') """
+ sql """ INSERT INTO ${tableName1} VALUES(8, 'a,b,c', ',') """
+ sql """ INSERT INTO ${tableName1} VALUES(9, 'a,b,c,', ',') """
+ sql """ INSERT INTO ${tableName1} VALUES(10, null, ',') """
+ sql """ INSERT INTO ${tableName1} VALUES(11, 'a,b,c,12345,', ',') """
+
+ qt_sql "SELECT *, split_by_string(v1, v2) FROM ${tableName1} ORDER BY k1"
+
+ def tableName2 = "test_split_by_string"
+
+ sql """DROP TABLE IF EXISTS ${tableName2}"""
+ sql """
+ CREATE TABLE IF NOT EXISTS ${tableName2} (
+ `k1` int(11) NULL COMMENT "",
+ `v1` varchar(50) NULL COMMENT "",
+ `v2` varchar(10) NOT NULL COMMENT ""
+ ) ENGINE=OLAP
+ DUPLICATE KEY(`k1`)
+ DISTRIBUTED BY HASH(`k1`) BUCKETS 1
+ PROPERTIES (
+ "replication_allocation" = "tag.location.default: 1",
+ "storage_format" = "V2"
+ )
+ """
+ sql """ INSERT INTO ${tableName2} VALUES(1, '1,,2,3,,4,5,,abcde', ',,') """
+ sql """ INSERT INTO ${tableName2} VALUES(2, 'abcde','') """
+ sql """ INSERT INTO ${tableName2} VALUES(3, '', '') """
+ sql """ INSERT INTO ${tableName2} VALUES(4, '', ',') """
+ sql """ INSERT INTO ${tableName2} VALUES(5, '', 'a') """
+ sql """ INSERT INTO ${tableName2} VALUES(6, '1,,2,3,,,,,,4,5,,abcde', ',,') """
+ sql """ INSERT INTO ${tableName2} VALUES(7, ',,,', ',') """
+ sql """ INSERT INTO ${tableName2} VALUES(8, 'a,b,c', ',') """
+ sql """ INSERT INTO ${tableName2} VALUES(9, 'a,b,c,', ',') """
+ sql """ INSERT INTO ${tableName2} VALUES(10, null, ',') """
+
+
+ qt_sql "SELECT *, split_by_string(v1, v2) FROM ${tableName2} ORDER BY k1"
+}
\ No newline at end of file
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@doris.apache.org
For additional commands, e-mail: commits-help@doris.apache.org