You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@doris.apache.org by pa...@apache.org on 2022/10/25 07:12:17 UTC

[doris] branch master updated: [feature-array](array-type) Add array function array_enumerate (#13612)

This is an automated email from the ASF dual-hosted git repository.

panxiaolei pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new 235c105554 [feature-array](array-type) Add array function array_enumerate (#13612)
235c105554 is described below

commit 235c105554d240a09cef3d21d1f3db3572c11198
Author: lihangyu <15...@163.com>
AuthorDate: Tue Oct 25 15:12:11 2022 +0800

    [feature-array](array-type) Add array function array_enumerate (#13612)
    
    Add array function array_enumerate
---
 be/src/vec/CMakeLists.txt                          |  1 +
 .../functions/array/function_array_enumerate.cpp   | 98 ++++++++++++++++++++++
 .../functions/array/function_array_register.cpp    |  2 +
 .../array-functions/array_enumerate.md             | 60 +++++++++++++
 .../array-functions/array_enumerate.md             | 59 +++++++++++++
 gensrc/script/doris_builtins_functions.py          | 17 ++++
 .../array_functions/test_array_functions.out       | 36 ++++++++
 .../array_functions/test_array_functions.groovy    |  4 +
 8 files changed, 277 insertions(+)

diff --git a/be/src/vec/CMakeLists.txt b/be/src/vec/CMakeLists.txt
index 845a1c0d75..d3ae2849de 100644
--- a/be/src/vec/CMakeLists.txt
+++ b/be/src/vec/CMakeLists.txt
@@ -152,6 +152,7 @@ set(VEC_FILES
   functions/array/function_array_intersect.cpp
   functions/array/function_array_slice.cpp
   functions/array/function_array_difference.cpp
+  functions/array/function_array_enumerate.cpp
   exprs/table_function/vexplode_json_array.cpp
   functions/math.cpp
   functions/function_bitmap.cpp
diff --git a/be/src/vec/functions/array/function_array_enumerate.cpp b/be/src/vec/functions/array/function_array_enumerate.cpp
new file mode 100644
index 0000000000..8f63e46ab1
--- /dev/null
+++ b/be/src/vec/functions/array/function_array_enumerate.cpp
@@ -0,0 +1,98 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <vec/columns/column_array.h>
+#include <vec/columns/column_nullable.h>
+#include <vec/columns/columns_number.h>
+#include <vec/data_types/data_type_array.h>
+#include <vec/data_types/data_type_number.h>
+#include <vec/functions/function.h>
+#include <vec/functions/function_helpers.h>
+#include <vec/functions/simple_function_factory.h>
+
+namespace doris::vectorized {
+
+class FunctionArrayEnumerate : public IFunction {
+public:
+    static constexpr auto name = "array_enumerate";
+    static FunctionPtr create() { return std::make_shared<FunctionArrayEnumerate>(); }
+    String get_name() const override { return name; }
+    bool use_default_implementation_for_nulls() const override { return false; }
+    size_t get_number_of_arguments() const override { return 1; }
+    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
+        const DataTypeArray* array_type =
+                check_and_get_data_type<DataTypeArray>(remove_nullable(arguments[0]).get());
+        if (!array_type) {
+            LOG(FATAL) << "First argument for function " + get_name() +
+                                  " must be an array but it has type " + arguments[0]->get_name() +
+                                  ".";
+        }
+
+        auto nested_type = assert_cast<const DataTypeArray&>(*array_type).get_nested_type();
+        bool is_nested_nullable = nested_type->is_nullable();
+        bool is_nullable = arguments[0]->is_nullable();
+        auto return_nested_type = std::make_shared<DataTypeInt64>();
+        DataTypePtr return_type = std::make_shared<DataTypeArray>(
+                is_nested_nullable ? make_nullable(return_nested_type) : return_nested_type);
+        if (is_nullable) {
+            return_type = make_nullable(return_type);
+        }
+        return return_type;
+    }
+    Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
+                        size_t result, size_t input_rows_count) override {
+        auto left_column =
+                block.get_by_position(arguments[0]).column->convert_to_full_column_if_const();
+        const ColumnArray* array =
+                check_and_get_column<ColumnArray>(remove_nullable(left_column->get_ptr()));
+        if (!array) {
+            return Status::RuntimeError(
+                    fmt::format("Illegal column {}, of first argument of function {}",
+                                left_column->get_name(), get_name()));
+        }
+        const ColumnArray::Offsets64& offsets = array->get_offsets();
+        auto res_nested = ColumnInt64::create();
+        ColumnInt64::Container& res_values = res_nested->get_data();
+        res_values.resize(array->get_data().size());
+        ColumnArray::Offset64 prev_off = 0;
+        for (auto off : offsets) {
+            for (ColumnArray::Offset64 j = prev_off; j < off; ++j) res_values[j] = j - prev_off + 1;
+            prev_off = off;
+        }
+
+        ColumnPtr nested_column = res_nested->get_ptr();
+        if (array->get_data().is_nullable()) {
+            nested_column = ColumnNullable::create(nested_column,
+                                                   ColumnUInt8::create(nested_column->size(), 0));
+        }
+        ColumnPtr res_column =
+                ColumnArray::create(std::move(nested_column), array->get_offsets_ptr());
+        if (block.get_by_position(arguments[0]).column->is_nullable()) {
+            const ColumnNullable* nullable = check_and_get_column<ColumnNullable>(left_column);
+            res_column = ColumnNullable::create(
+                    res_column, nullable->get_null_map_column().clone_resized(nullable->size()));
+        }
+        block.replace_by_position(result, std::move(res_column));
+        return Status::OK();
+    }
+};
+
+void register_function_array_enumerate(SimpleFunctionFactory& factory) {
+    factory.register_function<FunctionArrayEnumerate>();
+}
+
+} // namespace doris::vectorized
diff --git a/be/src/vec/functions/array/function_array_register.cpp b/be/src/vec/functions/array/function_array_register.cpp
index aff9e67d36..7d718ccfe2 100644
--- a/be/src/vec/functions/array/function_array_register.cpp
+++ b/be/src/vec/functions/array/function_array_register.cpp
@@ -35,6 +35,7 @@ void register_function_array_except(SimpleFunctionFactory&);
 void register_function_array_intersect(SimpleFunctionFactory&);
 void register_function_array_slice(SimpleFunctionFactory&);
 void register_function_array_difference(SimpleFunctionFactory&);
+void register_function_array_enumerate(SimpleFunctionFactory&);
 
 void register_function_array(SimpleFunctionFactory& factory) {
     register_function_array_element(factory);
@@ -50,6 +51,7 @@ void register_function_array(SimpleFunctionFactory& factory) {
     register_function_array_intersect(factory);
     register_function_array_slice(factory);
     register_function_array_difference(factory);
+    register_function_array_enumerate(factory);
 }
 
 } // namespace doris::vectorized
diff --git a/docs/en/docs/sql-manual/sql-functions/array-functions/array_enumerate.md b/docs/en/docs/sql-manual/sql-functions/array-functions/array_enumerate.md
new file mode 100644
index 0000000000..96f7617723
--- /dev/null
+++ b/docs/en/docs/sql-manual/sql-functions/array-functions/array_enumerate.md
@@ -0,0 +1,60 @@
+---
+{
+    "title": "ARRAY_ENUMERATE Function",
+    "language": "en"
+}
+---
+
+<!--
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements.  See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership.  The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License.  You may obtain a copy of the License at
+
+  http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing,
+software distributed under the License is distributed on an
+"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+KIND, either express or implied.  See the License for the
+specific language governing permissions and limitations
+under the License.
+-->
+
+## ARRAY_ENUMERATE
+
+### Name
+
+ARRAY_ENUMERATE
+
+### description
+
+Returns array sub item indexes eg. [1, 2, 3, …, length (arr) ]
+
+### example
+
+```shell
+mysql> create table array_type_table(k1 INT, k2 Array<STRING>) duplicate key (k1)
+    -> distributed by hash(k1) buckets 1 properties('replication_num' = '1');
+mysql> insert into array_type_table values (0, []), ("1", [NULL]), ("2", ["1", "2", "3"]), ("3", ["1", NULL, "3"]), ("4", NULL);
+mysql> set enable_vectorized_engine = true;    # enable vectorized engine
+mysql> select k2, array_enumerate(k2) from array_type_table;
++------------------+-----------------------+
+| k2               | array_enumerate(`k2`) |
++------------------+-----------------------+
+| []               | []                    |
+| [NULL]           | [1]                   |
+| ['1', '2', '3']  | [1, 2, 3]             |
+| ['1', NULL, '3'] | [1, 2, 3]             |
+| NULL             | NULL                  |
++------------------+-----------------------+
+5 rows in set (0.01 sec)
+```
+
+### keywords
+
+ARRAY,ENUMERATE,ARRAY_ENUMERATE
+
diff --git a/docs/zh-CN/docs/sql-manual/sql-functions/array-functions/array_enumerate.md b/docs/zh-CN/docs/sql-manual/sql-functions/array-functions/array_enumerate.md
new file mode 100644
index 0000000000..d7bee373e9
--- /dev/null
+++ b/docs/zh-CN/docs/sql-manual/sql-functions/array-functions/array_enumerate.md
@@ -0,0 +1,59 @@
+---
+{
+    "title": "ARRAY_ENUMERATE 函数",
+    "language": "zh-CN"
+}
+---
+
+<!--
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements.  See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership.  The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License.  You may obtain a copy of the License at
+
+  http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing,
+software distributed under the License is distributed on an
+"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+KIND, either express or implied.  See the License for the
+specific language governing permissions and limitations
+under the License.
+-->
+
+## ARRAY_ENUMERATE 
+
+### Name
+
+ARRAY_ENUMERATE
+
+### description
+
+返回数组下标, 例如  [1, 2, 3, …, length (arr) ]
+
+### example
+
+```shell
+mysql> create table array_type_table(k1 INT, k2 Array<STRING>) duplicate key (k1)
+    -> distributed by hash(k1) buckets 1 properties('replication_num' = '1');
+mysql> insert into array_type_table values (0, []), ("1", [NULL]), ("2", ["1", "2", "3"]), ("3", ["1", NULL, "3"]), ("4", NULL);
+mysql> set enable_vectorized_engine = true;    # enable vectorized engine
+mysql> select k2, array_enumerate(k2) from array_type_table;
++------------------+-----------------------+
+| k2               | array_enumerate(`k2`) |
++------------------+-----------------------+
+| []               | []                    |
+| [NULL]           | [1]                   |
+| ['1', '2', '3']  | [1, 2, 3]             |
+| ['1', NULL, '3'] | [1, 2, 3]             |
+| NULL             | NULL                  |
++------------------+-----------------------+
+5 rows in set (0.01 sec)
+```
+
+### keywords
+
+ARRAY,ENUMERATE,ARRAY_ENUMERATE
diff --git a/gensrc/script/doris_builtins_functions.py b/gensrc/script/doris_builtins_functions.py
index 68f1a4e48f..4715372acf 100755
--- a/gensrc/script/doris_builtins_functions.py
+++ b/gensrc/script/doris_builtins_functions.py
@@ -166,6 +166,23 @@ visible_functions = [
     [['array_contains'], 'BOOLEAN', ['ARRAY_VARCHAR', 'VARCHAR'], '', '', '', 'vec', ''],
     [['array_contains'], 'BOOLEAN', ['ARRAY_STRING', 'STRING'], '', '', '', 'vec', ''],
 
+
+    [['array_enumerate'], 'ARRAY_BIGINT', ['ARRAY_BOOLEAN'], '', '', '', 'vec', ''],
+    [['array_enumerate'], 'ARRAY_BIGINT', ['ARRAY_TINYINT'], '', '', '', 'vec', ''],
+    [['array_enumerate'], 'ARRAY_BIGINT', ['ARRAY_SMALLINT'], '', '', '', 'vec', ''],
+    [['array_enumerate'], 'ARRAY_BIGINT', ['ARRAY_INT'], '', '', '', 'vec', ''],
+    [['array_enumerate'], 'ARRAY_BIGINT', ['ARRAY_BIGINT'], '', '', '', 'vec', ''],
+    [['array_enumerate'], 'ARRAY_BIGINT', ['ARRAY_LARGEINT'], '', '', '', 'vec', ''],
+    [['array_enumerate'], 'ARRAY_BIGINT', ['ARRAY_DATETIME'], '', '', '', 'vec', ''],
+    [['array_enumerate'], 'ARRAY_BIGINT', ['ARRAY_DATE'], '', '', '', 'vec', ''],
+    [['array_enumerate'], 'ARRAY_BIGINT', ['ARRAY_DATETIMEV2'], '', '', '', 'vec', ''],
+    [['array_enumerate'], 'ARRAY_BIGINT', ['ARRAY_DATEV2'], '', '', '', 'vec', ''],
+    [['array_enumerate'], 'ARRAY_BIGINT', ['ARRAY_FLOAT'], '', '', '', 'vec', ''],
+    [['array_enumerate'], 'ARRAY_BIGINT', ['ARRAY_DOUBLE'], '', '', '', 'vec', ''],
+    [['array_enumerate'], 'ARRAY_BIGINT', ['ARRAY_DECIMALV2'], '', '', '', 'vec', ''],
+    [['array_enumerate'], 'ARRAY_BIGINT', ['ARRAY_VARCHAR'], '', '', '', 'vec', ''],
+    [['array_enumerate'], 'ARRAY_BIGINT', ['ARRAY_STRING'], '', '', '', 'vec', ''],
+
     [['countequal'], 'BIGINT', ['ARRAY_BOOLEAN', 'BOOLEAN'], '', '', '', 'vec', ''],
     [['countequal'], 'BIGINT', ['ARRAY_TINYINT', 'TINYINT'], '', '', '', 'vec', ''],
     [['countequal'], 'BIGINT', ['ARRAY_SMALLINT', 'SMALLINT'], '', '', '', 'vec', ''],
diff --git a/regression-test/data/query_p0/sql_functions/array_functions/test_array_functions.out b/regression-test/data/query_p0/sql_functions/array_functions/test_array_functions.out
index 8bcc53a962..7c3fa001a1 100644
--- a/regression-test/data/query_p0/sql_functions/array_functions/test_array_functions.out
+++ b/regression-test/data/query_p0/sql_functions/array_functions/test_array_functions.out
@@ -143,3 +143,39 @@
 6	\N
 7	\N
 
+-- !select --
+1	[1, 2, 3]
+2	[1]
+3	[]
+4	[1, 2, 3, 4, 5, 6, 7, 8, 9]
+5	[]
+6	[1, 2, 3, 4, 5, 6, 7, 8, 9]
+7	[1, 2, 3, 4, 5]
+
+-- !select --
+1	[1]
+2	[1]
+3	[1]
+4	\N
+5	\N
+6	\N
+7	\N
+
+-- !select --
+1	[1]
+2	\N
+3	\N
+4	\N
+5	\N
+6	\N
+7	\N
+
+-- !select --
+1	[1]
+2	\N
+3	\N
+4	\N
+5	\N
+6	\N
+7	\N
+
diff --git a/regression-test/suites/query_p0/sql_functions/array_functions/test_array_functions.groovy b/regression-test/suites/query_p0/sql_functions/array_functions/test_array_functions.groovy
index 6684f4db81..c4729e0b20 100644
--- a/regression-test/suites/query_p0/sql_functions/array_functions/test_array_functions.groovy
+++ b/regression-test/suites/query_p0/sql_functions/array_functions/test_array_functions.groovy
@@ -62,4 +62,8 @@ suite("test_array_functions") {
     qt_select "SELECT k1, array_contains(k5, 'hi') FROM ${tableName} ORDER BY k1"
     qt_select "SELECT k1, array_contains(k5, 'hi222') FROM ${tableName} ORDER BY k1"
     qt_select "SELECT k1, array_contains(k6, null) from ${tableName} ORDER BY k1"
+    qt_select "SELECT k1, array_enumerate(k2) from ${tableName} ORDER BY k1"
+    qt_select "SELECT k1, array_enumerate(k5) from ${tableName} ORDER BY k1"
+    qt_select "SELECT k1, array_enumerate(k6) from ${tableName} ORDER BY k1"
+    qt_select "SELECT k1, array_enumerate(k7) from ${tableName} ORDER BY k1"
 }


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@doris.apache.org
For additional commands, e-mail: commits-help@doris.apache.org