You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@doris.apache.org by yi...@apache.org on 2022/10/28 00:40:30 UTC
[doris] branch master updated: [Function](array) support array_range function (#13547)
This is an automated email from the ASF dual-hosted git repository.
yiguolei pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new 5dd052d386 [Function](array) support array_range function (#13547)
5dd052d386 is described below
commit 5dd052d386d40e35a44c25d5dce1b0ff1c98f23a
Author: zhangstar333 <87...@users.noreply.github.com>
AuthorDate: Fri Oct 28 08:40:24 2022 +0800
[Function](array) support array_range function (#13547)
* array_range with 3 impl
* [Function](array) support array_range function
* update
* update code
---
be/src/vec/CMakeLists.txt | 1 +
.../vec/functions/array/function_array_range.cpp | 195 +++++++++++++++++++++
.../functions/array/function_array_register.cpp | 2 +
.../sql-functions/array-functions/array_range.md | 76 ++++++++
docs/sidebars.json | 1 +
.../sql-functions/array-functions/array_range.md | 74 ++++++++
gensrc/script/doris_builtins_functions.py | 3 +
7 files changed, 352 insertions(+)
diff --git a/be/src/vec/CMakeLists.txt b/be/src/vec/CMakeLists.txt
index 2ba617295c..612fb3fac4 100644
--- a/be/src/vec/CMakeLists.txt
+++ b/be/src/vec/CMakeLists.txt
@@ -153,6 +153,7 @@ set(VEC_FILES
functions/array/function_array_slice.cpp
functions/array/function_array_difference.cpp
functions/array/function_array_enumerate.cpp
+ functions/array/function_array_range.cpp
exprs/table_function/vexplode_json_array.cpp
functions/math.cpp
functions/function_bitmap.cpp
diff --git a/be/src/vec/functions/array/function_array_range.cpp b/be/src/vec/functions/array/function_array_range.cpp
new file mode 100644
index 0000000000..0e2c33f5af
--- /dev/null
+++ b/be/src/vec/functions/array/function_array_range.cpp
@@ -0,0 +1,195 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "common/status.h"
+#include "vec/columns/column_array.h"
+#include "vec/columns/column_const.h"
+#include "vec/columns/columns_number.h"
+#include "vec/core/types.h"
+#include "vec/data_types/data_type_array.h"
+#include "vec/data_types/data_type_nullable.h"
+#include "vec/data_types/data_type_number.h"
+#include "vec/functions/function.h"
+#include "vec/functions/function_helpers.h"
+#include "vec/functions/simple_function_factory.h"
+#include "vec/utils/util.hpp"
+
+namespace doris::vectorized {
+
+template <typename Impl>
+class FunctionArrayRange : public IFunction {
+public:
+ static constexpr auto name = "array_range";
+
+ static FunctionPtr create() { return std::make_shared<FunctionArrayRange>(); }
+
+ /// Get function name.
+ String get_name() const override { return name; }
+
+ bool is_variadic() const override { return true; }
+
+ bool use_default_implementation_for_nulls() const override { return false; }
+
+ bool use_default_implementation_for_constants() const override { return true; }
+
+ ColumnNumbers get_arguments_that_are_always_constant() const override {
+ return {get_number_of_arguments()};
+ }
+
+ size_t get_number_of_arguments() const override {
+ return get_variadic_argument_types_impl().size();
+ }
+
+ DataTypes get_variadic_argument_types_impl() const override {
+ return Impl::get_variadic_argument_types();
+ }
+
+ DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
+ auto nested_type = make_nullable(std::make_shared<DataTypeInt32>());
+ auto res = std::make_shared<DataTypeArray>(nested_type);
+ return make_nullable(res);
+ }
+
+ Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
+ size_t result, size_t input_rows_count) override {
+ return Impl::execute_impl(context, block, arguments, result, input_rows_count);
+ }
+};
+
+struct RangeImplUtil {
+ static Status range_execute(Block& block, const ColumnNumbers& arguments, size_t result,
+ size_t input_rows_count) {
+ DCHECK_EQ(arguments.size(), 3);
+ auto return_nested_type = make_nullable(std::make_shared<DataTypeInt32>());
+ auto dest_array_column_ptr = ColumnArray::create(return_nested_type->create_column(),
+ ColumnArray::ColumnOffsets::create());
+ IColumn* dest_nested_column = &dest_array_column_ptr->get_data();
+ ColumnNullable* dest_nested_nullable_col =
+ reinterpret_cast<ColumnNullable*>(dest_nested_column);
+ dest_nested_column = dest_nested_nullable_col->get_nested_column_ptr();
+ auto& dest_nested_null_map = dest_nested_nullable_col->get_null_map_column().get_data();
+
+ auto args_null_map = ColumnUInt8::create(input_rows_count, 0);
+ ColumnPtr argument_columns[3];
+ for (int i = 0; i < 3; ++i) {
+ argument_columns[i] =
+ block.get_by_position(arguments[i]).column->convert_to_full_column_if_const();
+ if (auto* nullable = check_and_get_column<ColumnNullable>(*argument_columns[i])) {
+ // Danger: Here must dispose the null map data first! Because
+ // argument_columns[i]=nullable->get_nested_column_ptr(); will release the mem
+ // of column nullable mem of null map
+ VectorizedUtils::update_null_map(args_null_map->get_data(),
+ nullable->get_null_map_data());
+ argument_columns[i] = nullable->get_nested_column_ptr();
+ }
+ }
+ auto start_column = assert_cast<const ColumnVector<Int32>*>(argument_columns[0].get());
+ auto end_column = assert_cast<const ColumnVector<Int32>*>(argument_columns[1].get());
+ auto step_column = assert_cast<const ColumnVector<Int32>*>(argument_columns[2].get());
+
+ DCHECK(dest_nested_column != nullptr);
+ auto& dest_offsets = dest_array_column_ptr->get_offsets();
+ auto nested_column = reinterpret_cast<ColumnVector<Int32>*>(dest_nested_column);
+ dest_offsets.reserve(input_rows_count);
+ dest_nested_column->reserve(input_rows_count);
+ dest_nested_null_map.reserve(input_rows_count);
+
+ vector(start_column->get_data(), end_column->get_data(), step_column->get_data(),
+ args_null_map->get_data(), nested_column->get_data(), dest_nested_null_map,
+ dest_offsets);
+
+ block.get_by_position(result).column =
+ ColumnNullable::create(std::move(dest_array_column_ptr), std::move(args_null_map));
+ return Status::OK();
+ }
+
+private:
+ static void vector(const PaddedPODArray<Int32>& start, const PaddedPODArray<Int32>& end,
+ const PaddedPODArray<Int32>& step, NullMap& args_null_map,
+ PaddedPODArray<Int32>& nested_column,
+ PaddedPODArray<UInt8>& dest_nested_null_map,
+ ColumnArray::Offsets64& dest_offsets) {
+ int rows = start.size();
+ for (auto row = 0; row < rows; ++row) {
+ if (args_null_map[row] || start[row] < 0 || end[row] < 0 || step[row] < 0) {
+ nested_column.push_back(0);
+ dest_offsets.push_back(dest_offsets.back() + 1);
+ dest_nested_null_map.push_back(1);
+ args_null_map[row] = 1;
+ } else {
+ int offset = dest_offsets.back();
+ for (auto idx = start[row]; idx < end[row]; idx = idx + step[row]) {
+ nested_column.push_back(idx);
+ dest_nested_null_map.push_back(0);
+ offset++;
+ }
+ dest_offsets.push_back(offset);
+ }
+ }
+ }
+};
+
+struct RangeOneImpl {
+ static DataTypes get_variadic_argument_types() { return {std::make_shared<DataTypeInt32>()}; }
+
+ static Status execute_impl(FunctionContext* context, Block& block,
+ const ColumnNumbers& arguments, size_t result,
+ size_t input_rows_count) {
+ auto start_column = ColumnInt32::create(input_rows_count, 0);
+ auto step_column = ColumnInt32::create(input_rows_count, 1);
+ block.insert({std::move(start_column), std::make_shared<DataTypeInt32>(), "start_column"});
+ block.insert({std::move(step_column), std::make_shared<DataTypeInt32>(), "step_column"});
+ ColumnNumbers temp_arguments = {block.columns() - 2, arguments[0], block.columns() - 1};
+ return RangeImplUtil::range_execute(block, temp_arguments, result, input_rows_count);
+ }
+};
+
+struct RangeTwoImpl {
+ static DataTypes get_variadic_argument_types() {
+ return {std::make_shared<DataTypeInt32>(), std::make_shared<DataTypeInt32>()};
+ }
+
+ static Status execute_impl(FunctionContext* context, Block& block,
+ const ColumnNumbers& arguments, size_t result,
+ size_t input_rows_count) {
+ auto step_column = ColumnInt32::create(input_rows_count, 1);
+ block.insert({std::move(step_column), std::make_shared<DataTypeInt32>(), "step_column"});
+ ColumnNumbers temp_arguments = {arguments[0], arguments[1], block.columns() - 1};
+ return RangeImplUtil::range_execute(block, temp_arguments, result, input_rows_count);
+ }
+};
+
+struct RangeThreeImpl {
+ static DataTypes get_variadic_argument_types() {
+ return {std::make_shared<DataTypeInt32>(), std::make_shared<DataTypeInt32>(),
+ std::make_shared<DataTypeInt32>()};
+ }
+
+ static Status execute_impl(FunctionContext* context, Block& block,
+ const ColumnNumbers& arguments, size_t result,
+ size_t input_rows_count) {
+ return RangeImplUtil::range_execute(block, arguments, result, input_rows_count);
+ }
+};
+
+void register_function_array_range(SimpleFunctionFactory& factory) {
+ factory.register_function<FunctionArrayRange<RangeOneImpl>>();
+ factory.register_function<FunctionArrayRange<RangeTwoImpl>>();
+ factory.register_function<FunctionArrayRange<RangeThreeImpl>>();
+}
+
+} // namespace doris::vectorized
diff --git a/be/src/vec/functions/array/function_array_register.cpp b/be/src/vec/functions/array/function_array_register.cpp
index 7d718ccfe2..82c9103cb0 100644
--- a/be/src/vec/functions/array/function_array_register.cpp
+++ b/be/src/vec/functions/array/function_array_register.cpp
@@ -36,6 +36,7 @@ void register_function_array_intersect(SimpleFunctionFactory&);
void register_function_array_slice(SimpleFunctionFactory&);
void register_function_array_difference(SimpleFunctionFactory&);
void register_function_array_enumerate(SimpleFunctionFactory&);
+void register_function_array_range(SimpleFunctionFactory&);
void register_function_array(SimpleFunctionFactory& factory) {
register_function_array_element(factory);
@@ -52,6 +53,7 @@ void register_function_array(SimpleFunctionFactory& factory) {
register_function_array_slice(factory);
register_function_array_difference(factory);
register_function_array_enumerate(factory);
+ register_function_array_range(factory);
}
} // namespace doris::vectorized
diff --git a/docs/en/docs/sql-manual/sql-functions/array-functions/array_range.md b/docs/en/docs/sql-manual/sql-functions/array-functions/array_range.md
new file mode 100644
index 0000000000..0ceb58d755
--- /dev/null
+++ b/docs/en/docs/sql-manual/sql-functions/array-functions/array_range.md
@@ -0,0 +1,76 @@
+---
+{
+ "title": "array_range",
+ "language": "en"
+}
+---
+
+<!--
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements. See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership. The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing,
+software distributed under the License is distributed on an
+"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+KIND, either express or implied. See the License for the
+specific language governing permissions and limitations
+under the License.
+-->
+
+## array_range
+
+### description
+
+#### Syntax
+
+```
+ARRAY<Int> array_range(Int end)
+ARRAY<Int> array_range(Int start, Int end)
+ARRAY<Int> array_range(Int start, Int end, Int step)
+```
+The parameters are all positive integers.
+start default value is 0, and step default value is 1.
+Return the array which numbers from start to end - 1 by step.
+
+
+### notice
+
+`Only supported in vectorized engine`
+
+### example
+
+```
+mysql> set enable_vectorized_engine=true;
+
+mysql> select array_range(10);
++--------------------------------+
+| array_range(10) |
++--------------------------------+
+| [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] |
++--------------------------------+
+
+mysql> select array_range(10,20);
++------------------------------------------+
+| array_range(10, 20) |
++------------------------------------------+
+| [10, 11, 12, 13, 14, 15, 16, 17, 18, 19] |
++------------------------------------------+
+
+mysql> select array_range(0,20,2);
++-------------------------------------+
+| array_range(0, 20, 2) |
++-------------------------------------+
+| [0, 2, 4, 6, 8, 10, 12, 14, 16, 18] |
++-------------------------------------+
+```
+
+### keywords
+
+ARRAY, RANGE, ARRAY_RANGE
diff --git a/docs/sidebars.json b/docs/sidebars.json
index c0360c2b5c..4cf82d482b 100644
--- a/docs/sidebars.json
+++ b/docs/sidebars.json
@@ -257,6 +257,7 @@
"sql-manual/sql-functions/array-functions/countequal",
"sql-manual/sql-functions/array-functions/element_at",
"sql-manual/sql-functions/array-functions/array_avg",
+ "sql-manual/sql-functions/array-functions/array_range",
"sql-manual/sql-functions/array-functions/size",
"sql-manual/sql-functions/array-functions/array_distinct",
"sql-manual/sql-functions/array-functions/array_difference",
diff --git a/docs/zh-CN/docs/sql-manual/sql-functions/array-functions/array_range.md b/docs/zh-CN/docs/sql-manual/sql-functions/array-functions/array_range.md
new file mode 100644
index 0000000000..de656aa651
--- /dev/null
+++ b/docs/zh-CN/docs/sql-manual/sql-functions/array-functions/array_range.md
@@ -0,0 +1,74 @@
+---
+{
+ "title": "array_range",
+ "language": "zh-CN"
+}
+---
+
+<!--
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements. See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership. The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing,
+software distributed under the License is distributed on an
+"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+KIND, either express or implied. See the License for the
+specific language governing permissions and limitations
+under the License.
+-->
+
+## array_range
+
+### description
+
+#### Syntax
+
+```
+ARRAY<Int> array_range(Int end)
+ARRAY<Int> array_range(Int start, Int end)
+ARRAY<Int> array_range(Int start, Int end, Int step)
+```
+参数均为正整数 start 默认为 0, step 默认为 1。
+最终返回一个数组,从start 到 end - 1, 步长为 step。
+
+### notice
+
+`仅支持向量化引擎中使用`
+
+### example
+
+```
+mysql> set enable_vectorized_engine=true;
+
+mysql> select array_range(10);
++--------------------------------+
+| array_range(10) |
++--------------------------------+
+| [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] |
++--------------------------------+
+
+mysql> select array_range(10,20);
++------------------------------------------+
+| array_range(10, 20) |
++------------------------------------------+
+| [10, 11, 12, 13, 14, 15, 16, 17, 18, 19] |
++------------------------------------------+
+
+mysql> select array_range(0,20,2);
++-------------------------------------+
+| array_range(0, 20, 2) |
++-------------------------------------+
+| [0, 2, 4, 6, 8, 10, 12, 14, 16, 18] |
++-------------------------------------+
+```
+
+### keywords
+
+ARRAY, RANGE, ARRAY_RANGE
diff --git a/gensrc/script/doris_builtins_functions.py b/gensrc/script/doris_builtins_functions.py
index ec88c654b9..8e9b447cde 100755
--- a/gensrc/script/doris_builtins_functions.py
+++ b/gensrc/script/doris_builtins_functions.py
@@ -436,6 +436,9 @@ visible_functions = [
[['array_slice', '%element_slice%'], 'ARRAY_VARCHAR', ['ARRAY_VARCHAR', 'BIGINT', 'BIGINT'], '', '', '', 'vec', ''],
[['array_slice', '%element_slice%'], 'ARRAY_STRING', ['ARRAY_STRING', 'BIGINT', 'BIGINT'], '', '', '', 'vec', ''],
+ [['array_range'], 'ARRAY_INT', ['INT'], '', '', '', 'vec', 'ALWAYS_NULLABLE'],
+ [['array_range'], 'ARRAY_INT', ['INT', 'INT'], '', '', '', 'vec', 'ALWAYS_NULLABLE'],
+ [['array_range'], 'ARRAY_INT', ['INT', 'INT', 'INT'], '', '', '', 'vec', 'ALWAYS_NULLABLE'],
# reverse function for string builtin
[['reverse'], 'VARCHAR', ['VARCHAR'],
'_ZN5doris15StringFunctions7reverseEPN9doris_udf15FunctionContextERKNS1_9StringValE', '', '', 'vec', ''],
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@doris.apache.org
For additional commands, e-mail: commits-help@doris.apache.org