You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@doris.apache.org by xu...@apache.org on 2022/07/17 10:08:36 UTC

[doris] branch master updated: [feature-wip](array-type) explode support more sub types (#10673)

This is an automated email from the ASF dual-hosted git repository.

xuyang pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new 09d19e3f0f [feature-wip](array-type) explode support more sub types (#10673)
09d19e3f0f is described below

commit 09d19e3f0fa4ed7a3260a03919696b5e34b035d8
Author: camby <10...@qq.com>
AuthorDate: Sun Jul 17 18:08:30 2022 +0800

    [feature-wip](array-type) explode support more sub types (#10673)
    
    1. explode support more sub types;
    2. explode support nullable elements;
    
    Co-authored-by: cambyzju <zh...@baidu.com>
---
 be/src/vec/exprs/table_function/vexplode.cpp       | 54 ++++++-------
 be/src/vec/exprs/table_function/vexplode.h         |  7 +-
 be/src/vec/functions/array/function_array_utils.h  |  9 +++
 be/test/vec/function/table_function_test.cpp       | 36 ++++++++-
 .../sql-functions/table-functions/explode.md       | 87 +++++++++++++++++++++
 .../sql-functions/table-functions/explode.md       | 88 ++++++++++++++++++++++
 .../java/org/apache/doris/catalog/FunctionSet.java | 11 ++-
 .../main/java/org/apache/doris/catalog/Type.java   | 21 ++++++
 .../query/sql_functions/table_function/explode.out | 31 ++++++++
 .../sql_functions/table_function/explode.groovy    | 37 +++++++++
 10 files changed, 341 insertions(+), 40 deletions(-)

diff --git a/be/src/vec/exprs/table_function/vexplode.cpp b/be/src/vec/exprs/table_function/vexplode.cpp
index 2669213119..ba33a680cc 100644
--- a/be/src/vec/exprs/table_function/vexplode.cpp
+++ b/be/src/vec/exprs/table_function/vexplode.cpp
@@ -27,24 +27,16 @@ VExplodeTableFunction::VExplodeTableFunction() {
 
 Status VExplodeTableFunction::process_init(vectorized::Block* block) {
     CHECK(_vexpr_context->root()->children().size() == 1)
-            << "VExplodeTableFunction must be have 1 children but have "
+            << "VExplodeTableFunction only support 1 child but has "
             << _vexpr_context->root()->children().size();
 
     int value_column_idx = -1;
     _vexpr_context->root()->children()[0]->execute(_vexpr_context, block, &value_column_idx);
 
-    if (block->get_by_position(value_column_idx).column->is_nullable()) {
-        auto array_nullable_column = check_and_get_column<ColumnNullable>(
-                *block->get_by_position(value_column_idx).column);
-        _array_null_map = array_nullable_column->get_null_map_column().get_data().data();
-        _array_column =
-                check_and_get_column<ColumnArray>(array_nullable_column->get_nested_column_ptr());
-    } else {
-        _array_null_map = nullptr;
-        _array_column =
-                check_and_get_column<ColumnArray>(*block->get_by_position(value_column_idx).column);
-    }
-    if (!_array_column) {
+    _array_column =
+            block->get_by_position(value_column_idx).column->convert_to_full_column_if_const();
+
+    if (!extract_column_array_info(*_array_column, _detail)) {
         return Status::NotSupported("column type {} not supported now",
                                     block->get_by_position(value_column_idx).column->get_name());
     }
@@ -56,26 +48,22 @@ Status VExplodeTableFunction::process_row(size_t row_idx) {
     DCHECK(row_idx < _array_column->size());
     _is_current_empty = false;
     _eos = false;
+    _cur_offset = 0;
+    _array_offset = (*_detail.offsets_ptr)[row_idx - 1];
+    _cur_size = (*_detail.offsets_ptr)[row_idx] - _array_offset;
 
-    if (_array_null_map && _array_null_map[row_idx]) {
+    // array is NULL, or array is empty
+    if (_cur_size == 0 || (_detail.array_nullmap_data && _detail.array_nullmap_data[row_idx])) {
         _is_current_empty = true;
-        _cur_size = 0;
-        _cur_offset = 0;
-        _pos = 0;
-    } else {
-        _cur_size =
-                _array_column->get_offsets()[row_idx] - _array_column->get_offsets()[row_idx - 1];
-        _cur_offset = 0;
-        _is_current_empty = (_cur_size == 0);
-        _pos = _array_column->get_offsets()[row_idx - 1];
     }
+
     return Status::OK();
 }
 
 Status VExplodeTableFunction::process_close() {
     _array_column = nullptr;
-    _array_null_map = nullptr;
-    _pos = 0;
+    _detail.reset();
+    _array_offset = 0;
     return Status::OK();
 }
 
@@ -91,7 +79,13 @@ Status VExplodeTableFunction::get_value(void** output) {
         return Status::OK();
     }
 
-    *output = const_cast<char*>(_array_column->get_data().get_data_at(_pos + _cur_offset).data);
+    size_t pos = _array_offset + _cur_offset;
+    if (_detail.nested_nullmap_data && _detail.nested_nullmap_data[pos]) {
+        *output = nullptr;
+    } else {
+        *output = const_cast<char*>(_detail.nested_col->get_data_at(pos).data);
+    }
+
     return Status::OK();
 }
 
@@ -101,7 +95,13 @@ Status VExplodeTableFunction::get_value_length(int64_t* length) {
         return Status::OK();
     }
 
-    *length = _array_column->get_data().get_data_at(_pos + _cur_offset).size;
+    size_t pos = _array_offset + _cur_offset;
+    if (_detail.nested_nullmap_data && _detail.nested_nullmap_data[pos]) {
+        *length = 0;
+    } else {
+        *length = _detail.nested_col->get_data_at(pos).size;
+    }
+
     return Status::OK();
 }
 
diff --git a/be/src/vec/exprs/table_function/vexplode.h b/be/src/vec/exprs/table_function/vexplode.h
index da4909994b..b82f9be2e8 100644
--- a/be/src/vec/exprs/table_function/vexplode.h
+++ b/be/src/vec/exprs/table_function/vexplode.h
@@ -22,6 +22,7 @@
 #include "vec/columns/column_array.h"
 #include "vec/columns/column_nullable.h"
 #include "vec/common/string_ref.h"
+#include "vec/functions/array/function_array_utils.h"
 
 namespace doris::vectorized {
 
@@ -39,9 +40,9 @@ public:
     virtual Status get_value_length(int64_t* length) override;
 
 private:
-    const UInt8* _array_null_map;
-    const ColumnArray* _array_column;
-    size_t _pos;
+    ColumnPtr _array_column;
+    ColumnArrayExecutionData _detail;
+    size_t _array_offset; // start offset of array[row_idx]
 };
 
 } // namespace doris::vectorized
diff --git a/be/src/vec/functions/array/function_array_utils.h b/be/src/vec/functions/array/function_array_utils.h
index e7173489e0..c0f8aca8f9 100644
--- a/be/src/vec/functions/array/function_array_utils.h
+++ b/be/src/vec/functions/array/function_array_utils.h
@@ -22,6 +22,15 @@
 namespace doris::vectorized {
 
 struct ColumnArrayExecutionData {
+public:
+    void reset() {
+        array_nullmap_data = nullptr;
+        array_col = nullptr;
+        offsets_ptr = nullptr;
+        nested_nullmap_data = nullptr;
+        nested_col = nullptr;
+    }
+
 public:
     const UInt8* array_nullmap_data = nullptr;
     const ColumnArray* array_col = nullptr;
diff --git a/be/test/vec/function/table_function_test.cpp b/be/test/vec/function/table_function_test.cpp
index 19e53dc8a4..b83c019e32 100644
--- a/be/test/vec/function/table_function_test.cpp
+++ b/be/test/vec/function/table_function_test.cpp
@@ -71,11 +71,12 @@ TEST_F(TableFunctionTest, vexplode_outer) {
     // explode_outer(Array<Int32>)
     {
         InputTypeSet input_types = {TypeIndex::Array, TypeIndex::Int32};
-        Array vec = {Int32(1), Int32(2), Int32(3)};
+        Array vec = {Int32(1), Null(), Int32(2), Int32(3)};
         InputDataSet input_set = {{vec}, {Null()}, {Array()}};
 
         InputTypeSet output_types = {TypeIndex::Int32};
-        InputDataSet output_set = {{Int32(1)}, {Int32(2)}, {Int32(3)}, {Null()}, {Null()}};
+        InputDataSet output_set = {{Int32(1)}, {Null()}, {Int32(2)},
+                                   {Int32(3)}, {Null()}, {Null()}};
 
         check_vec_table_function(&explode_outer, input_types, input_set, output_types, output_set);
     }
@@ -92,6 +93,21 @@ TEST_F(TableFunctionTest, vexplode_outer) {
 
         check_vec_table_function(&explode_outer, input_types, input_set, output_types, output_set);
     }
+
+    // explode_outer(Array<Decimal>)
+    {
+        InputTypeSet input_types = {TypeIndex::Array, TypeIndex::Decimal128};
+        Array vec = {ut_type::DECIMALFIELD(17014116.67), ut_type::DECIMALFIELD(-17014116.67)};
+        InputDataSet input_set = {{Null()}, {Array()}, {vec}};
+
+        InputTypeSet output_types = {TypeIndex::Decimal128};
+        InputDataSet output_set = {{Null()},
+                                   {Null()},
+                                   {ut_type::DECIMAL(17014116.67)},
+                                   {ut_type::DECIMAL(-17014116.67)}};
+
+        check_vec_table_function(&explode_outer, input_types, input_set, output_types, output_set);
+    }
 }
 
 TEST_F(TableFunctionTest, vexplode) {
@@ -103,11 +119,11 @@ TEST_F(TableFunctionTest, vexplode) {
     {
         InputTypeSet input_types = {TypeIndex::Array, TypeIndex::Int32};
 
-        Array vec = {Int32(1), Int32(2), Int32(3)};
+        Array vec = {Int32(1), Null(), Int32(2), Int32(3)};
         InputDataSet input_set = {{vec}, {Null()}, {Array()}};
 
         InputTypeSet output_types = {TypeIndex::Int32};
-        InputDataSet output_set = {{Int32(1)}, {Int32(2)}, {Int32(3)}};
+        InputDataSet output_set = {{Int32(1)}, {Null()}, {Int32(2)}, {Int32(3)}};
 
         check_vec_table_function(&explode, input_types, input_set, output_types, output_set);
     }
@@ -123,6 +139,18 @@ TEST_F(TableFunctionTest, vexplode) {
 
         check_vec_table_function(&explode, input_types, input_set, output_types, output_set);
     }
+
+    // explode(Array<Date>)
+    {
+        InputTypeSet input_types = {TypeIndex::Array, TypeIndex::Date};
+        Array vec = {Null(), str_to_date_time("2022-01-02", false)};
+        InputDataSet input_set = {{Null()}, {Array()}, {vec}};
+
+        InputTypeSet output_types = {TypeIndex::Date};
+        InputDataSet output_set = {{Null()}, {std::string("2022-01-02")}};
+
+        check_vec_table_function(&explode, input_types, input_set, output_types, output_set);
+    }
 }
 
 TEST_F(TableFunctionTest, vexplode_numbers) {
diff --git a/docs/en/docs/sql-manual/sql-functions/table-functions/explode.md b/docs/en/docs/sql-manual/sql-functions/table-functions/explode.md
new file mode 100644
index 0000000000..128076d4f2
--- /dev/null
+++ b/docs/en/docs/sql-manual/sql-functions/table-functions/explode.md
@@ -0,0 +1,87 @@
+---
+{
+    "title": "explode",
+    "language": "en"
+}
+---
+
+<!--
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements.  See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership.  The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License.  You may obtain a copy of the License at
+
+  http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing,
+software distributed under the License is distributed on an
+"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+KIND, either express or implied.  See the License for the
+specific language governing permissions and limitations
+under the License.
+-->
+
+## explode
+
+### description
+
+Table functions must be used in conjunction with Lateral View.
+
+explode array column to rows. `explode_outer` will return NULL, while `array` is NULL or empty.
+`explode` and `explode_outer` both keep the nested NULL elements of array.
+
+grammar:
+
+```
+explode(expr)
+explode_outer(expr)
+```
+
+### example
+```
+mysql> set enable_vectorized_engine = true
+mysql> set enable_array_type = true
+
+mysql> select e1 from (select 1 k1) as t lateral view explode([1,2,3]) tmp1 as e1;
++------+
+| e1   |
++------+
+|    1 |
+|    2 |
+|    3 |
++------+
+
+mysql> select e1 from (select 1 k1) as t lateral view explode_outer(null) tmp1 as e1;
++------+
+| e1   |
++------+
+| NULL |
++------+
+
+mysql> select e1 from (select 1 k1) as t lateral view explode([]) tmp1 as e1;
+Empty set (0.010 sec)
+
+mysql> select e1 from (select 1 k1) as t lateral view explode([null,1,null]) tmp1 as e1;
++------+
+| e1   |
++------+
+| NULL |
+|    1 |
+| NULL |
++------+
+
+mysql> select e1 from (select 1 k1) as t lateral view explode_outer([null,1,null]) tmp1 as e1;
++------+
+| e1   |
++------+
+| NULL |
+|    1 |
+| NULL |
++------+
+```
+
+### keywords
+EXPLODE,EXPLODE_OUTER,ARRAY
\ No newline at end of file
diff --git a/docs/zh-CN/docs/sql-manual/sql-functions/table-functions/explode.md b/docs/zh-CN/docs/sql-manual/sql-functions/table-functions/explode.md
new file mode 100644
index 0000000000..e5da64bd4c
--- /dev/null
+++ b/docs/zh-CN/docs/sql-manual/sql-functions/table-functions/explode.md
@@ -0,0 +1,88 @@
+---
+{
+    "title": "explode",
+    "language": "zh-CN"
+}
+---
+
+<!--
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements.  See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership.  The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License.  You may obtain a copy of the License at
+
+  http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing,
+software distributed under the License is distributed on an
+"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+KIND, either express or implied.  See the License for the
+specific language governing permissions and limitations
+under the License.
+-->
+
+## explode
+
+### description
+
+表函数,需配合 Lateral View 使用。
+
+将 array 列展开成多行。当 array 为NULL或者为空时,`explode_outer` 返回NULL。
+`explode` 和 `explode_outer` 均会返回 array 内部的NULL元素。
+
+语法:
+
+```
+explode(expr)
+explode_outer(expr)
+```
+
+### example
+
+```
+mysql> set enable_vectorized_engine = true
+mysql> set enable_array_type = true
+
+mysql> select e1 from (select 1 k1) as t lateral view explode([1,2,3]) tmp1 as e1;
++------+
+| e1   |
++------+
+|    1 |
+|    2 |
+|    3 |
++------+
+
+mysql> select e1 from (select 1 k1) as t lateral view explode_outer(null) tmp1 as e1;
++------+
+| e1   |
++------+
+| NULL |
++------+
+
+mysql> select e1 from (select 1 k1) as t lateral view explode([]) tmp1 as e1;
+Empty set (0.010 sec)
+
+mysql> select e1 from (select 1 k1) as t lateral view explode([null,1,null]) tmp1 as e1;
++------+
+| e1   |
++------+
+| NULL |
+|    1 |
+| NULL |
++------+
+
+mysql> select e1 from (select 1 k1) as t lateral view explode_outer([null,1,null]) tmp1 as e1;
++------+
+| e1   |
++------+
+| NULL |
+|    1 |
+| NULL |
++------+
+```
+
+### keywords
+EXPLODE,EXPLODE_OUTER,ARRAY
diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/FunctionSet.java b/fe/fe-core/src/main/java/org/apache/doris/catalog/FunctionSet.java
index ed895641bb..ffdc4f6131 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/catalog/FunctionSet.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/FunctionSet.java
@@ -2734,11 +2734,10 @@ public class FunctionSet<T> {
                 "_ZN5doris19DummyTableFunctions22explode_numbersEPN9doris_udf15FunctionContextERKNS1_9IntValE");
 
         initTableFunctionListWithCombinator(EXPLODE);
-        addTableFunctionWithCombinator(EXPLODE, Type.INT, Function.NullableMode.ALWAYS_NULLABLE,
-                Lists.newArrayList(new ArrayType(Type.INT)), false,
-                "_ZN5doris19DummyTableFunctions7explodeEPN9doris_udf15FunctionContextERKNS1_13CollectionValE");
-        addTableFunctionWithCombinator(EXPLODE, Type.VARCHAR, Function.NullableMode.ALWAYS_NULLABLE,
-                Lists.newArrayList(new ArrayType(Type.VARCHAR)), false,
-                "_ZN5doris19DummyTableFunctions7explodeEPN9doris_udf15FunctionContextERKNS1_13CollectionValE");
+        for (Type subType : Type.getArraySubTypes()) {
+            addTableFunctionWithCombinator(EXPLODE, subType, Function.NullableMode.ALWAYS_NULLABLE,
+                    Lists.newArrayList(new ArrayType(subType)), false,
+                    "_ZN5doris19DummyTableFunctions7explodeEPN9doris_udf15FunctionContextERKNS1_13CollectionValE");
+        }
     }
 }
diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/Type.java b/fe/fe-core/src/main/java/org/apache/doris/catalog/Type.java
index 4e97454fe6..2709ca09f6 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/catalog/Type.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/Type.java
@@ -108,6 +108,7 @@ public abstract class Type {
     private static final ArrayList<ScalarType> integerTypes;
     private static final ArrayList<ScalarType> numericTypes;
     private static final ArrayList<ScalarType> supportedTypes;
+    private static final ArrayList<Type> arraySubTypes;
 
     static {
         integerTypes = Lists.newArrayList();
@@ -156,6 +157,22 @@ public abstract class Type {
         supportedTypes.add(TIME);
         supportedTypes.add(TIMEV2);
         supportedTypes.add(STRING);
+
+        arraySubTypes = Lists.newArrayList();
+        arraySubTypes.add(BOOLEAN);
+        arraySubTypes.add(TINYINT);
+        arraySubTypes.add(SMALLINT);
+        arraySubTypes.add(INT);
+        arraySubTypes.add(BIGINT);
+        arraySubTypes.add(LARGEINT);
+        arraySubTypes.add(FLOAT);
+        arraySubTypes.add(DOUBLE);
+        arraySubTypes.add(DECIMALV2);
+        arraySubTypes.add(DATE);
+        arraySubTypes.add(DATETIME);
+        arraySubTypes.add(CHAR);
+        arraySubTypes.add(VARCHAR);
+        arraySubTypes.add(STRING);
     }
 
     public static ArrayList<ScalarType> getIntegerTypes() {
@@ -170,6 +187,10 @@ public abstract class Type {
         return supportedTypes;
     }
 
+    public static ArrayList<Type> getArraySubTypes() {
+        return arraySubTypes;
+    }
+
     /**
      * The output of this is stored directly in the hive metastore as the column type.
      * The string must match exactly.
diff --git a/regression-test/data/query/sql_functions/table_function/explode.out b/regression-test/data/query/sql_functions/table_function/explode.out
new file mode 100644
index 0000000000..55bd9056c6
--- /dev/null
+++ b/regression-test/data/query/sql_functions/table_function/explode.out
@@ -0,0 +1,31 @@
+-- This file is automatically generated. You should know what you did if you want to edit this
+-- !explode --
+1
+2
+3
+
+-- !explode_outer --
+1
+2
+3
+
+-- !explode --
+
+-- !explode_outer --
+\N
+
+-- !explode --
+
+-- !explode_outer --
+\N
+
+-- !explode --
+\N
+1
+\N
+
+-- !explode_outer --
+\N
+1
+\N
+
diff --git a/regression-test/suites/query/sql_functions/table_function/explode.groovy b/regression-test/suites/query/sql_functions/table_function/explode.groovy
new file mode 100644
index 0000000000..7b46193c4b
--- /dev/null
+++ b/regression-test/suites/query/sql_functions/table_function/explode.groovy
@@ -0,0 +1,37 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+suite("explode") {
+    // vectorized
+    sql """ set enable_vectorized_engine = true """
+    sql """ set enable_array_type = true """
+
+    qt_explode """ select e1 from (select 1 k1) as t lateral view explode([1,2,3]) tmp1 as e1; """
+    qt_explode_outer """ select e1 from (select 1 k1) as t lateral view explode_outer([1,2,3]) tmp1 as e1; """
+
+    // array is null
+    qt_explode """ select e1 from (select 1 k1) as t lateral view explode(null) tmp1 as e1; """
+    qt_explode_outer """ select e1 from (select 1 k1) as t lateral view explode_outer(null) tmp1 as e1; """
+
+    // array is empty
+    qt_explode """ select e1 from (select 1 k1) as t lateral view explode([]) tmp1 as e1; """
+    qt_explode_outer """ select e1 from (select 1 k1) as t lateral view explode_outer([]) tmp1 as e1; """
+
+    // array with null elements
+    qt_explode """ select e1 from (select 1 k1) as t lateral view explode([null,1,null]) tmp1 as e1; """
+    qt_explode_outer """ select e1 from (select 1 k1) as t lateral view explode_outer([null,1,null]) tmp1 as e1; """
+}


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@doris.apache.org
For additional commands, e-mail: commits-help@doris.apache.org