You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@doris.apache.org by xu...@apache.org on 2022/07/17 09:50:44 UTC
[doris] branch master updated: [feature-wip] (array-type) function concat_ws support array (#10749)
This is an automated email from the ASF dual-hosted git repository.
xuyang pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new 5793cb11d0 [feature-wip] (array-type) function concat_ws support array (#10749)
5793cb11d0 is described below
commit 5793cb11d07e8d46fd4675820a661d36b623d9ac
Author: zxealous <xe...@gmail.com>
AuthorDate: Sun Jul 17 17:50:39 2022 +0800
[feature-wip] (array-type) function concat_ws support array (#10749)
Issue #10052
function concat_ws support array
---
be/src/vec/functions/function_string.h | 147 ++++++++++++++++++---
be/test/vec/function/function_string_test.cpp | 17 +++
.../sql-functions/string-functions/concat_ws.md | 30 ++++-
.../sql-functions/string-functions/concat_ws.md | 28 +++-
gensrc/script/doris_builtins_functions.py | 3 +
.../string_functions/test_string_function.out | 12 ++
.../string_functions/test_string_function.groovy | 4 +
7 files changed, 215 insertions(+), 26 deletions(-)
diff --git a/be/src/vec/functions/function_string.h b/be/src/vec/functions/function_string.h
index c545223a23..a7df35b1ed 100644
--- a/be/src/vec/functions/function_string.h
+++ b/be/src/vec/functions/function_string.h
@@ -30,12 +30,14 @@
#include "util/md5.h"
#include "util/sm3.h"
#include "util/url_parser.h"
+#include "vec/columns/column_array.h"
#include "vec/columns/column_decimal.h"
#include "vec/columns/column_nullable.h"
#include "vec/columns/column_string.h"
#include "vec/columns/columns_number.h"
#include "vec/common/assert_cast.h"
#include "vec/common/string_ref.h"
+#include "vec/data_types/data_type_array.h"
#include "vec/data_types/data_type_decimal.h"
#include "vec/data_types/data_type_nullable.h"
#include "vec/data_types/data_type_number.h"
@@ -491,10 +493,13 @@ public:
}
};
-// concat_ws (string,string....)
+// concat_ws (string,string....) or (string, Array)
// TODO: avoid use fmtlib
class FunctionStringConcatWs : public IFunction {
public:
+ using Chars = ColumnString::Chars;
+ using Offsets = ColumnString::Offsets;
+
static constexpr auto name = "concat_ws";
static FunctionPtr create() { return std::make_shared<FunctionStringConcatWs>(); }
String get_name() const override { return name; }
@@ -520,8 +525,8 @@ public:
auto res = ColumnString::create();
bool is_null_type = block.get_by_position(arguments[0]).type.get()->is_nullable();
size_t argument_size = arguments.size();
- std::vector<const ColumnString::Offsets*> offsets_list(argument_size);
- std::vector<const ColumnString::Chars*> chars_list(argument_size);
+ std::vector<const Offsets*> offsets_list(argument_size);
+ std::vector<const Chars*> chars_list(argument_size);
std::vector<const ColumnUInt8::Container*> null_list(argument_size);
ColumnPtr argument_columns[argument_size];
@@ -540,6 +545,11 @@ public:
} else {
null_list[i] = &const_null_map->get_data();
}
+
+ if (check_column<ColumnArray>(argument_columns[i].get())) {
+ continue;
+ }
+
auto col_str = assert_cast<const ColumnString*>(argument_columns[i].get());
offsets_list[i] = &col_str->get_offsets();
chars_list[i] = &col_str->get_chars();
@@ -553,20 +563,126 @@ public:
fmt::memory_buffer buffer;
std::vector<std::string_view> views;
+ if (check_column<ColumnArray>(argument_columns[1].get())) {
+ // Determine if the nested type of the array is String
+ const ColumnArray& array_column =
+ reinterpret_cast<const ColumnArray&>(*argument_columns[1]);
+ if (!array_column.get_data().is_column_string()) {
+ return Status::NotSupported(
+ fmt::format("unsupported nested array of type {} for function {}",
+ is_column_nullable(array_column.get_data())
+ ? array_column.get_data().get_name()
+ : array_column.get_data().get_family_name(),
+ get_name()));
+ }
+ // Concat string in array
+ _execute_array(input_rows_count, array_column, buffer, views, offsets_list, chars_list,
+ null_list, res_data, res_offset);
+
+ } else {
+ // Concat string
+ _execute_string(input_rows_count, argument_size, buffer, views, offsets_list,
+ chars_list, null_list, res_data, res_offset);
+ }
+ if (is_null_type) {
+ block.get_by_position(result).column =
+ ColumnNullable::create(std::move(res), std::move(null_map));
+ } else {
+ block.get_by_position(result).column = std::move(res);
+ }
+ return Status::OK();
+ }
+
+private:
+ void _execute_array(const size_t& input_rows_count, const ColumnArray& array_column,
+ fmt::memory_buffer& buffer, std::vector<std::string_view>& views,
+ const std::vector<const Offsets*>& offsets_list,
+ const std::vector<const Chars*>& chars_list,
+ const std::vector<const ColumnUInt8::Container*>& null_list,
+ Chars& res_data, Offsets& res_offset) {
+ // Get array nested column
+ const UInt8* array_nested_null_map = nullptr;
+ ColumnPtr array_nested_column = nullptr;
+
+ if (is_column_nullable(array_column.get_data())) {
+ const auto& array_nested_null_column =
+ reinterpret_cast<const ColumnNullable&>(array_column.get_data());
+ // String's null map in array
+ array_nested_null_map =
+ array_nested_null_column.get_null_map_column().get_data().data();
+ array_nested_column = array_nested_null_column.get_nested_column_ptr();
+ } else {
+ array_nested_column = array_column.get_data_ptr();
+ }
+
+ const auto& string_column = reinterpret_cast<const ColumnString&>(*array_nested_column);
+ const Chars& string_src_chars = string_column.get_chars();
+ const Offsets& src_string_offsets = string_column.get_offsets();
+ const Offsets& src_array_offsets = array_column.get_offsets();
+ ColumnArray::Offset current_src_array_offset = 0;
+
+ // Concat string in array
for (size_t i = 0; i < input_rows_count; ++i) {
- auto& seq_offsets = *offsets_list[0];
- auto& seq_chars = *chars_list[0];
- auto& seq_nullmap = *null_list[0];
- if (seq_nullmap[i]) {
- res_data.push_back('\0');
+ auto& sep_offsets = *offsets_list[0];
+ auto& sep_chars = *chars_list[0];
+ auto& sep_nullmap = *null_list[0];
+
+ if (sep_nullmap[i]) {
res_offset[i] = res_data.size();
+ current_src_array_offset += src_array_offsets[i] - src_array_offsets[i - 1];
continue;
}
- int seq_size = seq_offsets[i] - seq_offsets[i - 1] - 1;
- const char* seq_data = reinterpret_cast<const char*>(&seq_chars[seq_offsets[i - 1]]);
+ int sep_size = sep_offsets[i] - sep_offsets[i - 1] - 1;
+ const char* sep_data = reinterpret_cast<const char*>(&sep_chars[sep_offsets[i - 1]]);
- std::string_view seq(seq_data, seq_size);
+ std::string_view sep(sep_data, sep_size);
+ buffer.clear();
+ views.clear();
+
+ for (auto next_src_array_offset = src_array_offsets[i];
+ current_src_array_offset < next_src_array_offset; ++current_src_array_offset) {
+ const auto current_src_string_offset =
+ current_src_array_offset ? src_string_offsets[current_src_array_offset - 1]
+ : 0;
+ size_t bytes_to_copy = src_string_offsets[current_src_array_offset] -
+ current_src_string_offset - 1;
+ const char* ptr =
+ reinterpret_cast<const char*>(&string_src_chars[current_src_string_offset]);
+
+ if (array_nested_null_map == nullptr ||
+ !array_nested_null_map[current_src_array_offset]) {
+ views.emplace_back(ptr, bytes_to_copy);
+ }
+ }
+
+ fmt::format_to(buffer, "{}", fmt::join(views, sep));
+
+ StringOP::push_value_string(std::string_view(buffer.data(), buffer.size()), i, res_data,
+ res_offset);
+ }
+ }
+
+ void _execute_string(const size_t& input_rows_count, const size_t& argument_size,
+ fmt::memory_buffer& buffer, std::vector<std::string_view>& views,
+ const std::vector<const Offsets*>& offsets_list,
+ const std::vector<const Chars*>& chars_list,
+ const std::vector<const ColumnUInt8::Container*>& null_list,
+ Chars& res_data, Offsets& res_offset) {
+ // Concat string
+ for (size_t i = 0; i < input_rows_count; ++i) {
+ auto& sep_offsets = *offsets_list[0];
+ auto& sep_chars = *chars_list[0];
+ auto& sep_nullmap = *null_list[0];
+ if (sep_nullmap[i]) {
+ res_offset[i] = res_data.size();
+ continue;
+ }
+
+ int sep_size = sep_offsets[i] - sep_offsets[i - 1] - 1;
+ const char* sep_data = reinterpret_cast<const char*>(&sep_chars[sep_offsets[i - 1]]);
+
+ std::string_view sep(sep_data, sep_size);
buffer.clear();
views.clear();
for (size_t j = 1; j < argument_size; ++j) {
@@ -580,17 +696,10 @@ public:
views.emplace_back(ptr, size);
}
}
- fmt::format_to(buffer, "{}", fmt::join(views, seq));
+ fmt::format_to(buffer, "{}", fmt::join(views, sep));
StringOP::push_value_string(std::string_view(buffer.data(), buffer.size()), i, res_data,
res_offset);
}
- if (is_null_type) {
- block.get_by_position(result).column =
- ColumnNullable::create(std::move(res), std::move(null_map));
- } else {
- block.get_by_position(result).column = std::move(res);
- }
- return Status::OK();
}
};
diff --git a/be/test/vec/function/function_string_test.cpp b/be/test/vec/function/function_string_test.cpp
index 60cb797aae..2d259014be 100644
--- a/be/test/vec/function/function_string_test.cpp
+++ b/be/test/vec/function/function_string_test.cpp
@@ -387,6 +387,23 @@ TEST(function_string_test, function_concat_ws_test) {
check_function<DataTypeString, true>(func_name, input_types, data_set);
};
+
+ {
+ InputTypeSet input_types = {TypeIndex::String, TypeIndex::Array, TypeIndex::String};
+
+ Array vec1 = {Field("", 0), Field("", 0), Field("", 0)};
+ Array vec2 = {Field("123", 3), Field("456", 3), Field("789", 3)};
+ Array vec3 = {Field("", 0), Field("?", 1), Field("", 0)};
+ Array vec4 = {Field("abc", 3), Field("", 0), Field("def", 3)};
+ Array vec5 = {Field("abc", 3), Field("def", 3), Field("ghi", 3)};
+ DataSet data_set = {{{std::string("-"), vec1}, std::string("--")},
+ {{std::string(""), vec2}, std::string("123456789")},
+ {{std::string("-"), vec3}, std::string("-?-")},
+ {{Null(), vec4}, Null()},
+ {{std::string("-"), vec5}, std::string("abc-def-ghi")}};
+
+ check_function<DataTypeString, true>(func_name, input_types, data_set);
+ };
}
TEST(function_string_test, function_null_or_empty_test) {
diff --git a/docs/en/docs/sql-manual/sql-functions/string-functions/concat_ws.md b/docs/en/docs/sql-manual/sql-functions/string-functions/concat_ws.md
index 80a0c11dee..a8a4f57d4b 100644
--- a/docs/en/docs/sql-manual/sql-functions/string-functions/concat_ws.md
+++ b/docs/en/docs/sql-manual/sql-functions/string-functions/concat_ws.md
@@ -28,12 +28,13 @@ under the License.
### Description
#### Syntax
-`VARCHAR concat ws (VARCHAR sep, VARCHAR str,...)`
+`VARCHAR concat_ws (VARCHAR sep, VARCHAR str,...)`
+`VARCHAR concat_ws(VARCHAR sep, ARRAY array)`
-Using the first parameter SEP as a connector, the second parameter and all subsequent parameters are spliced into a string.
+Using the first parameter SEP as a connector, the second parameter and all subsequent parameters(or all string in an ARRAY) are spliced into a string.
If the separator is NULL, return NULL.
-` The concat_ws` function does not skip empty strings, but NULL values.
+The `concat_ws` function does not skip empty strings, it skips NULL values.
### example
@@ -58,6 +59,27 @@ mysql> select concat_ws("or", "d", NULL,"is");
+---------------------------------+
| doris |
+---------------------------------+
+
+mysql> select concat_ws("or", ["d", "is"]);
++-----------------------------------+
+| concat_ws('or', ARRAY('d', 'is')) |
++-----------------------------------+
+| doris |
++-----------------------------------+
+
+mysql> select concat_ws(NULL, ["d", "is"]);
++-----------------------------------+
+| concat_ws(NULL, ARRAY('d', 'is')) |
++-----------------------------------+
+| NULL |
++-----------------------------------+
+
+mysql> select concat_ws("or", ["d", NULL,"is"]);
++-----------------------------------------+
+| concat_ws('or', ARRAY('d', NULL, 'is')) |
++-----------------------------------------+
+| doris |
++-----------------------------------------+
```
### keywords
-CONCAT_WS,CONCAT,WS
+CONCAT_WS,CONCAT,WS,ARRAY
diff --git a/docs/zh-CN/docs/sql-manual/sql-functions/string-functions/concat_ws.md b/docs/zh-CN/docs/sql-manual/sql-functions/string-functions/concat_ws.md
index bcebf9f4a6..a21d1b3ba5 100644
--- a/docs/zh-CN/docs/sql-manual/sql-functions/string-functions/concat_ws.md
+++ b/docs/zh-CN/docs/sql-manual/sql-functions/string-functions/concat_ws.md
@@ -29,11 +29,12 @@ under the License.
#### Syntax
`VARCHAR concat_ws(VARCHAR sep, VARCHAR str,...)`
+`VARCHAR concat_ws(VARCHAR sep, ARRAY array)`
-使用第一个参数 sep 作为连接符,将第二个参数以及后续所有参数拼接成一个字符串.
+使用第一个参数 sep 作为连接符,将第二个参数以及后续所有参数(或ARRAY中的所有字符串)拼接成一个字符串。
如果分隔符是 NULL,返回 NULL。
-`concat_ws`函数不会跳过空字符串,会跳过 NULL 值
+`concat_ws`函数不会跳过空字符串,会跳过 NULL 值。
### example
@@ -58,6 +59,27 @@ mysql> select concat_ws("or", "d", NULL,"is");
+---------------------------------+
| doris |
+---------------------------------+
+
+mysql> select concat_ws("or", ["d", "is"]);
++-----------------------------------+
+| concat_ws('or', ARRAY('d', 'is')) |
++-----------------------------------+
+| doris |
++-----------------------------------+
+
+mysql> select concat_ws(NULL, ["d", "is"]);
++-----------------------------------+
+| concat_ws(NULL, ARRAY('d', 'is')) |
++-----------------------------------+
+| NULL |
++-----------------------------------+
+
+mysql> select concat_ws("or", ["d", NULL,"is"]);
++-----------------------------------------+
+| concat_ws('or', ARRAY('d', NULL, 'is')) |
++-----------------------------------------+
+| doris |
++-----------------------------------------+
```
### keywords
-CONCAT_WS,CONCAT,WS
+CONCAT_WS,CONCAT,WS,ARRAY
diff --git a/gensrc/script/doris_builtins_functions.py b/gensrc/script/doris_builtins_functions.py
index 3883e7b1ac..249638831b 100755
--- a/gensrc/script/doris_builtins_functions.py
+++ b/gensrc/script/doris_builtins_functions.py
@@ -1018,6 +1018,9 @@ visible_functions = [
[['concat_ws'], 'VARCHAR', ['VARCHAR', 'VARCHAR', '...'],
'_ZN5doris15StringFunctions9concat_wsEPN9doris_udf'
'15FunctionContextERKNS1_9StringValEiPS5_', '', '', 'vec', 'CUSTOM'],
+ [['concat_ws'], 'VARCHAR', ['VARCHAR', 'ARRAY_VARCHAR'],
+ '_ZN5doris15StringFunctions9concat_wsEPN9doris_udf'
+ '15FunctionContextERKNS1_9StringValEiPS5_', '', '', 'vec', 'CUSTOM'],
[['find_in_set'], 'INT', ['VARCHAR', 'VARCHAR'],
'_ZN5doris15StringFunctions11find_in_setEPN9doris_udf'
'15FunctionContextERKNS1_9StringValES6_', '', '', 'vec', ''],
diff --git a/regression-test/data/query/sql_functions/string_functions/test_string_function.out b/regression-test/data/query/sql_functions/string_functions/test_string_function.out
index 70c7d33202..61e0295054 100644
--- a/regression-test/data/query/sql_functions/string_functions/test_string_function.out
+++ b/regression-test/data/query/sql_functions/string_functions/test_string_function.out
@@ -41,6 +41,18 @@ doris
-- !sql --
doris
+-- !sql --
+doris
+
+-- !sql --
+\N
+
+-- !sql --
+doris
+
+-- !sql --
+dororis
+
-- !sql --
true
diff --git a/regression-test/suites/query/sql_functions/string_functions/test_string_function.groovy b/regression-test/suites/query/sql_functions/string_functions/test_string_function.groovy
index 00351890ef..8ecbc0e3d5 100644
--- a/regression-test/suites/query/sql_functions/string_functions/test_string_function.groovy
+++ b/regression-test/suites/query/sql_functions/string_functions/test_string_function.groovy
@@ -38,6 +38,10 @@ suite("test_string_function", "query") {
qt_sql "select concat_ws(\"or\", \"d\", \"is\");"
qt_sql "select concat_ws(NULL, \"d\", \"is\");"
qt_sql "select concat_ws(\"or\", \"d\", NULL,\"is\");"
+ qt_sql "select concat_ws(\"or\", [\"d\", \"is\"]);"
+ qt_sql "select concat_ws(NULL, [\"d\", \"is\"]);"
+ qt_sql "select concat_ws(\"or\", [\"d\", NULL,\"is\"]);"
+ qt_sql "select concat_ws(\"or\", [\"d\", \"\",\"is\"]);"
qt_sql "select ends_with(\"Hello doris\", \"doris\");"
qt_sql "select ends_with(\"Hello doris\", \"Hello\");"
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@doris.apache.org
For additional commands, e-mail: commits-help@doris.apache.org