You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@doris.apache.org by xu...@apache.org on 2022/07/17 10:08:36 UTC
[doris] branch master updated: [feature-wip](array-type) explode support more sub types (#10673)
This is an automated email from the ASF dual-hosted git repository.
xuyang pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new 09d19e3f0f [feature-wip](array-type) explode support more sub types (#10673)
09d19e3f0f is described below
commit 09d19e3f0fa4ed7a3260a03919696b5e34b035d8
Author: camby <10...@qq.com>
AuthorDate: Sun Jul 17 18:08:30 2022 +0800
[feature-wip](array-type) explode support more sub types (#10673)
1. explode support more sub types;
2. explode support nullable elements;
Co-authored-by: cambyzju <zh...@baidu.com>
---
be/src/vec/exprs/table_function/vexplode.cpp | 54 ++++++-------
be/src/vec/exprs/table_function/vexplode.h | 7 +-
be/src/vec/functions/array/function_array_utils.h | 9 +++
be/test/vec/function/table_function_test.cpp | 36 ++++++++-
.../sql-functions/table-functions/explode.md | 87 +++++++++++++++++++++
.../sql-functions/table-functions/explode.md | 88 ++++++++++++++++++++++
.../java/org/apache/doris/catalog/FunctionSet.java | 11 ++-
.../main/java/org/apache/doris/catalog/Type.java | 21 ++++++
.../query/sql_functions/table_function/explode.out | 31 ++++++++
.../sql_functions/table_function/explode.groovy | 37 +++++++++
10 files changed, 341 insertions(+), 40 deletions(-)
diff --git a/be/src/vec/exprs/table_function/vexplode.cpp b/be/src/vec/exprs/table_function/vexplode.cpp
index 2669213119..ba33a680cc 100644
--- a/be/src/vec/exprs/table_function/vexplode.cpp
+++ b/be/src/vec/exprs/table_function/vexplode.cpp
@@ -27,24 +27,16 @@ VExplodeTableFunction::VExplodeTableFunction() {
Status VExplodeTableFunction::process_init(vectorized::Block* block) {
CHECK(_vexpr_context->root()->children().size() == 1)
- << "VExplodeTableFunction must be have 1 children but have "
+ << "VExplodeTableFunction only support 1 child but has "
<< _vexpr_context->root()->children().size();
int value_column_idx = -1;
_vexpr_context->root()->children()[0]->execute(_vexpr_context, block, &value_column_idx);
- if (block->get_by_position(value_column_idx).column->is_nullable()) {
- auto array_nullable_column = check_and_get_column<ColumnNullable>(
- *block->get_by_position(value_column_idx).column);
- _array_null_map = array_nullable_column->get_null_map_column().get_data().data();
- _array_column =
- check_and_get_column<ColumnArray>(array_nullable_column->get_nested_column_ptr());
- } else {
- _array_null_map = nullptr;
- _array_column =
- check_and_get_column<ColumnArray>(*block->get_by_position(value_column_idx).column);
- }
- if (!_array_column) {
+ _array_column =
+ block->get_by_position(value_column_idx).column->convert_to_full_column_if_const();
+
+ if (!extract_column_array_info(*_array_column, _detail)) {
return Status::NotSupported("column type {} not supported now",
block->get_by_position(value_column_idx).column->get_name());
}
@@ -56,26 +48,22 @@ Status VExplodeTableFunction::process_row(size_t row_idx) {
DCHECK(row_idx < _array_column->size());
_is_current_empty = false;
_eos = false;
+ _cur_offset = 0;
+ _array_offset = (*_detail.offsets_ptr)[row_idx - 1];
+ _cur_size = (*_detail.offsets_ptr)[row_idx] - _array_offset;
- if (_array_null_map && _array_null_map[row_idx]) {
+ // array is NULL, or array is empty
+ if (_cur_size == 0 || (_detail.array_nullmap_data && _detail.array_nullmap_data[row_idx])) {
_is_current_empty = true;
- _cur_size = 0;
- _cur_offset = 0;
- _pos = 0;
- } else {
- _cur_size =
- _array_column->get_offsets()[row_idx] - _array_column->get_offsets()[row_idx - 1];
- _cur_offset = 0;
- _is_current_empty = (_cur_size == 0);
- _pos = _array_column->get_offsets()[row_idx - 1];
}
+
return Status::OK();
}
Status VExplodeTableFunction::process_close() {
_array_column = nullptr;
- _array_null_map = nullptr;
- _pos = 0;
+ _detail.reset();
+ _array_offset = 0;
return Status::OK();
}
@@ -91,7 +79,13 @@ Status VExplodeTableFunction::get_value(void** output) {
return Status::OK();
}
- *output = const_cast<char*>(_array_column->get_data().get_data_at(_pos + _cur_offset).data);
+ size_t pos = _array_offset + _cur_offset;
+ if (_detail.nested_nullmap_data && _detail.nested_nullmap_data[pos]) {
+ *output = nullptr;
+ } else {
+ *output = const_cast<char*>(_detail.nested_col->get_data_at(pos).data);
+ }
+
return Status::OK();
}
@@ -101,7 +95,13 @@ Status VExplodeTableFunction::get_value_length(int64_t* length) {
return Status::OK();
}
- *length = _array_column->get_data().get_data_at(_pos + _cur_offset).size;
+ size_t pos = _array_offset + _cur_offset;
+ if (_detail.nested_nullmap_data && _detail.nested_nullmap_data[pos]) {
+ *length = 0;
+ } else {
+ *length = _detail.nested_col->get_data_at(pos).size;
+ }
+
return Status::OK();
}
diff --git a/be/src/vec/exprs/table_function/vexplode.h b/be/src/vec/exprs/table_function/vexplode.h
index da4909994b..b82f9be2e8 100644
--- a/be/src/vec/exprs/table_function/vexplode.h
+++ b/be/src/vec/exprs/table_function/vexplode.h
@@ -22,6 +22,7 @@
#include "vec/columns/column_array.h"
#include "vec/columns/column_nullable.h"
#include "vec/common/string_ref.h"
+#include "vec/functions/array/function_array_utils.h"
namespace doris::vectorized {
@@ -39,9 +40,9 @@ public:
virtual Status get_value_length(int64_t* length) override;
private:
- const UInt8* _array_null_map;
- const ColumnArray* _array_column;
- size_t _pos;
+ ColumnPtr _array_column;
+ ColumnArrayExecutionData _detail;
+ size_t _array_offset; // start offset of array[row_idx]
};
} // namespace doris::vectorized
diff --git a/be/src/vec/functions/array/function_array_utils.h b/be/src/vec/functions/array/function_array_utils.h
index e7173489e0..c0f8aca8f9 100644
--- a/be/src/vec/functions/array/function_array_utils.h
+++ b/be/src/vec/functions/array/function_array_utils.h
@@ -22,6 +22,15 @@
namespace doris::vectorized {
struct ColumnArrayExecutionData {
+public:
+ void reset() {
+ array_nullmap_data = nullptr;
+ array_col = nullptr;
+ offsets_ptr = nullptr;
+ nested_nullmap_data = nullptr;
+ nested_col = nullptr;
+ }
+
public:
const UInt8* array_nullmap_data = nullptr;
const ColumnArray* array_col = nullptr;
diff --git a/be/test/vec/function/table_function_test.cpp b/be/test/vec/function/table_function_test.cpp
index 19e53dc8a4..b83c019e32 100644
--- a/be/test/vec/function/table_function_test.cpp
+++ b/be/test/vec/function/table_function_test.cpp
@@ -71,11 +71,12 @@ TEST_F(TableFunctionTest, vexplode_outer) {
// explode_outer(Array<Int32>)
{
InputTypeSet input_types = {TypeIndex::Array, TypeIndex::Int32};
- Array vec = {Int32(1), Int32(2), Int32(3)};
+ Array vec = {Int32(1), Null(), Int32(2), Int32(3)};
InputDataSet input_set = {{vec}, {Null()}, {Array()}};
InputTypeSet output_types = {TypeIndex::Int32};
- InputDataSet output_set = {{Int32(1)}, {Int32(2)}, {Int32(3)}, {Null()}, {Null()}};
+ InputDataSet output_set = {{Int32(1)}, {Null()}, {Int32(2)},
+ {Int32(3)}, {Null()}, {Null()}};
check_vec_table_function(&explode_outer, input_types, input_set, output_types, output_set);
}
@@ -92,6 +93,21 @@ TEST_F(TableFunctionTest, vexplode_outer) {
check_vec_table_function(&explode_outer, input_types, input_set, output_types, output_set);
}
+
+ // explode_outer(Array<Decimal>)
+ {
+ InputTypeSet input_types = {TypeIndex::Array, TypeIndex::Decimal128};
+ Array vec = {ut_type::DECIMALFIELD(17014116.67), ut_type::DECIMALFIELD(-17014116.67)};
+ InputDataSet input_set = {{Null()}, {Array()}, {vec}};
+
+ InputTypeSet output_types = {TypeIndex::Decimal128};
+ InputDataSet output_set = {{Null()},
+ {Null()},
+ {ut_type::DECIMAL(17014116.67)},
+ {ut_type::DECIMAL(-17014116.67)}};
+
+ check_vec_table_function(&explode_outer, input_types, input_set, output_types, output_set);
+ }
}
TEST_F(TableFunctionTest, vexplode) {
@@ -103,11 +119,11 @@ TEST_F(TableFunctionTest, vexplode) {
{
InputTypeSet input_types = {TypeIndex::Array, TypeIndex::Int32};
- Array vec = {Int32(1), Int32(2), Int32(3)};
+ Array vec = {Int32(1), Null(), Int32(2), Int32(3)};
InputDataSet input_set = {{vec}, {Null()}, {Array()}};
InputTypeSet output_types = {TypeIndex::Int32};
- InputDataSet output_set = {{Int32(1)}, {Int32(2)}, {Int32(3)}};
+ InputDataSet output_set = {{Int32(1)}, {Null()}, {Int32(2)}, {Int32(3)}};
check_vec_table_function(&explode, input_types, input_set, output_types, output_set);
}
@@ -123,6 +139,18 @@ TEST_F(TableFunctionTest, vexplode) {
check_vec_table_function(&explode, input_types, input_set, output_types, output_set);
}
+
+ // explode(Array<Date>)
+ {
+ InputTypeSet input_types = {TypeIndex::Array, TypeIndex::Date};
+ Array vec = {Null(), str_to_date_time("2022-01-02", false)};
+ InputDataSet input_set = {{Null()}, {Array()}, {vec}};
+
+ InputTypeSet output_types = {TypeIndex::Date};
+ InputDataSet output_set = {{Null()}, {std::string("2022-01-02")}};
+
+ check_vec_table_function(&explode, input_types, input_set, output_types, output_set);
+ }
}
TEST_F(TableFunctionTest, vexplode_numbers) {
diff --git a/docs/en/docs/sql-manual/sql-functions/table-functions/explode.md b/docs/en/docs/sql-manual/sql-functions/table-functions/explode.md
new file mode 100644
index 0000000000..128076d4f2
--- /dev/null
+++ b/docs/en/docs/sql-manual/sql-functions/table-functions/explode.md
@@ -0,0 +1,87 @@
+---
+{
+ "title": "explode",
+ "language": "en"
+}
+---
+
+<!--
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements. See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership. The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing,
+software distributed under the License is distributed on an
+"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+KIND, either express or implied. See the License for the
+specific language governing permissions and limitations
+under the License.
+-->
+
+## explode
+
+### description
+
+Table functions must be used in conjunction with Lateral View.
+
+explode array column to rows. `explode_outer` will return NULL, while `array` is NULL or empty.
+`explode` and `explode_outer` both keep the nested NULL elements of array.
+
+grammar:
+
+```
+explode(expr)
+explode_outer(expr)
+```
+
+### example
+```
+mysql> set enable_vectorized_engine = true
+mysql> set enable_array_type = true
+
+mysql> select e1 from (select 1 k1) as t lateral view explode([1,2,3]) tmp1 as e1;
++------+
+| e1 |
++------+
+| 1 |
+| 2 |
+| 3 |
++------+
+
+mysql> select e1 from (select 1 k1) as t lateral view explode_outer(null) tmp1 as e1;
++------+
+| e1 |
++------+
+| NULL |
++------+
+
+mysql> select e1 from (select 1 k1) as t lateral view explode([]) tmp1 as e1;
+Empty set (0.010 sec)
+
+mysql> select e1 from (select 1 k1) as t lateral view explode([null,1,null]) tmp1 as e1;
++------+
+| e1 |
++------+
+| NULL |
+| 1 |
+| NULL |
++------+
+
+mysql> select e1 from (select 1 k1) as t lateral view explode_outer([null,1,null]) tmp1 as e1;
++------+
+| e1 |
++------+
+| NULL |
+| 1 |
+| NULL |
++------+
+```
+
+### keywords
+EXPLODE,EXPLODE_OUTER,ARRAY
\ No newline at end of file
diff --git a/docs/zh-CN/docs/sql-manual/sql-functions/table-functions/explode.md b/docs/zh-CN/docs/sql-manual/sql-functions/table-functions/explode.md
new file mode 100644
index 0000000000..e5da64bd4c
--- /dev/null
+++ b/docs/zh-CN/docs/sql-manual/sql-functions/table-functions/explode.md
@@ -0,0 +1,88 @@
+---
+{
+ "title": "explode",
+ "language": "zh-CN"
+}
+---
+
+<!--
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements. See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership. The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing,
+software distributed under the License is distributed on an
+"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+KIND, either express or implied. See the License for the
+specific language governing permissions and limitations
+under the License.
+-->
+
+## explode
+
+### description
+
+表函数,需配合 Lateral View 使用。
+
+将 array 列展开成多行。当 array 为NULL或者为空时,`explode_outer` 返回NULL。
+`explode` 和 `explode_outer` 均会返回 array 内部的NULL元素。
+
+语法:
+
+```
+explode(expr)
+explode_outer(expr)
+```
+
+### example
+
+```
+mysql> set enable_vectorized_engine = true
+mysql> set enable_array_type = true
+
+mysql> select e1 from (select 1 k1) as t lateral view explode([1,2,3]) tmp1 as e1;
++------+
+| e1 |
++------+
+| 1 |
+| 2 |
+| 3 |
++------+
+
+mysql> select e1 from (select 1 k1) as t lateral view explode_outer(null) tmp1 as e1;
++------+
+| e1 |
++------+
+| NULL |
++------+
+
+mysql> select e1 from (select 1 k1) as t lateral view explode([]) tmp1 as e1;
+Empty set (0.010 sec)
+
+mysql> select e1 from (select 1 k1) as t lateral view explode([null,1,null]) tmp1 as e1;
++------+
+| e1 |
++------+
+| NULL |
+| 1 |
+| NULL |
++------+
+
+mysql> select e1 from (select 1 k1) as t lateral view explode_outer([null,1,null]) tmp1 as e1;
++------+
+| e1 |
++------+
+| NULL |
+| 1 |
+| NULL |
++------+
+```
+
+### keywords
+EXPLODE,EXPLODE_OUTER,ARRAY
diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/FunctionSet.java b/fe/fe-core/src/main/java/org/apache/doris/catalog/FunctionSet.java
index ed895641bb..ffdc4f6131 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/catalog/FunctionSet.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/FunctionSet.java
@@ -2734,11 +2734,10 @@ public class FunctionSet<T> {
"_ZN5doris19DummyTableFunctions22explode_numbersEPN9doris_udf15FunctionContextERKNS1_9IntValE");
initTableFunctionListWithCombinator(EXPLODE);
- addTableFunctionWithCombinator(EXPLODE, Type.INT, Function.NullableMode.ALWAYS_NULLABLE,
- Lists.newArrayList(new ArrayType(Type.INT)), false,
- "_ZN5doris19DummyTableFunctions7explodeEPN9doris_udf15FunctionContextERKNS1_13CollectionValE");
- addTableFunctionWithCombinator(EXPLODE, Type.VARCHAR, Function.NullableMode.ALWAYS_NULLABLE,
- Lists.newArrayList(new ArrayType(Type.VARCHAR)), false,
- "_ZN5doris19DummyTableFunctions7explodeEPN9doris_udf15FunctionContextERKNS1_13CollectionValE");
+ for (Type subType : Type.getArraySubTypes()) {
+ addTableFunctionWithCombinator(EXPLODE, subType, Function.NullableMode.ALWAYS_NULLABLE,
+ Lists.newArrayList(new ArrayType(subType)), false,
+ "_ZN5doris19DummyTableFunctions7explodeEPN9doris_udf15FunctionContextERKNS1_13CollectionValE");
+ }
}
}
diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/Type.java b/fe/fe-core/src/main/java/org/apache/doris/catalog/Type.java
index 4e97454fe6..2709ca09f6 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/catalog/Type.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/Type.java
@@ -108,6 +108,7 @@ public abstract class Type {
private static final ArrayList<ScalarType> integerTypes;
private static final ArrayList<ScalarType> numericTypes;
private static final ArrayList<ScalarType> supportedTypes;
+ private static final ArrayList<Type> arraySubTypes;
static {
integerTypes = Lists.newArrayList();
@@ -156,6 +157,22 @@ public abstract class Type {
supportedTypes.add(TIME);
supportedTypes.add(TIMEV2);
supportedTypes.add(STRING);
+
+ arraySubTypes = Lists.newArrayList();
+ arraySubTypes.add(BOOLEAN);
+ arraySubTypes.add(TINYINT);
+ arraySubTypes.add(SMALLINT);
+ arraySubTypes.add(INT);
+ arraySubTypes.add(BIGINT);
+ arraySubTypes.add(LARGEINT);
+ arraySubTypes.add(FLOAT);
+ arraySubTypes.add(DOUBLE);
+ arraySubTypes.add(DECIMALV2);
+ arraySubTypes.add(DATE);
+ arraySubTypes.add(DATETIME);
+ arraySubTypes.add(CHAR);
+ arraySubTypes.add(VARCHAR);
+ arraySubTypes.add(STRING);
}
public static ArrayList<ScalarType> getIntegerTypes() {
@@ -170,6 +187,10 @@ public abstract class Type {
return supportedTypes;
}
+ public static ArrayList<Type> getArraySubTypes() {
+ return arraySubTypes;
+ }
+
/**
* The output of this is stored directly in the hive metastore as the column type.
* The string must match exactly.
diff --git a/regression-test/data/query/sql_functions/table_function/explode.out b/regression-test/data/query/sql_functions/table_function/explode.out
new file mode 100644
index 0000000000..55bd9056c6
--- /dev/null
+++ b/regression-test/data/query/sql_functions/table_function/explode.out
@@ -0,0 +1,31 @@
+-- This file is automatically generated. You should know what you did if you want to edit this
+-- !explode --
+1
+2
+3
+
+-- !explode_outer --
+1
+2
+3
+
+-- !explode --
+
+-- !explode_outer --
+\N
+
+-- !explode --
+
+-- !explode_outer --
+\N
+
+-- !explode --
+\N
+1
+\N
+
+-- !explode_outer --
+\N
+1
+\N
+
diff --git a/regression-test/suites/query/sql_functions/table_function/explode.groovy b/regression-test/suites/query/sql_functions/table_function/explode.groovy
new file mode 100644
index 0000000000..7b46193c4b
--- /dev/null
+++ b/regression-test/suites/query/sql_functions/table_function/explode.groovy
@@ -0,0 +1,37 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+suite("explode") {
+ // vectorized
+ sql """ set enable_vectorized_engine = true """
+ sql """ set enable_array_type = true """
+
+ qt_explode """ select e1 from (select 1 k1) as t lateral view explode([1,2,3]) tmp1 as e1; """
+ qt_explode_outer """ select e1 from (select 1 k1) as t lateral view explode_outer([1,2,3]) tmp1 as e1; """
+
+ // array is null
+ qt_explode """ select e1 from (select 1 k1) as t lateral view explode(null) tmp1 as e1; """
+ qt_explode_outer """ select e1 from (select 1 k1) as t lateral view explode_outer(null) tmp1 as e1; """
+
+ // array is empty
+ qt_explode """ select e1 from (select 1 k1) as t lateral view explode([]) tmp1 as e1; """
+ qt_explode_outer """ select e1 from (select 1 k1) as t lateral view explode_outer([]) tmp1 as e1; """
+
+ // array with null elements
+ qt_explode """ select e1 from (select 1 k1) as t lateral view explode([null,1,null]) tmp1 as e1; """
+ qt_explode_outer """ select e1 from (select 1 k1) as t lateral view explode_outer([null,1,null]) tmp1 as e1; """
+}
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@doris.apache.org
For additional commands, e-mail: commits-help@doris.apache.org