You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@doris.apache.org by mo...@apache.org on 2023/01/31 04:16:50 UTC

[doris] 01/02: [Refactor](function) opt the exec of function with null column

This is an automated email from the ASF dual-hosted git repository.

morningman pushed a commit to branch branch-hive-test
in repository https://gitbox.apache.org/repos/asf/doris.git

commit c5c9fa54dd37d77ba7489a4966e959ccaf775391
Author: HappenLee <ha...@hotmail.com>
AuthorDate: Mon Jan 30 18:04:54 2023 +0800

    [Refactor](function) opt the exec of function with null column
---
 be/src/vec/exprs/vectorized_fn_call.cpp   |   2 +
 be/src/vec/functions/function.cpp         |  14 ++--
 be/src/vec/functions/function_cast.h      |  14 ++--
 be/src/vec/functions/function_helpers.cpp | 118 +++++++++++++-----------------
 be/src/vec/functions/function_helpers.h   |  26 +++----
 5 files changed, 80 insertions(+), 94 deletions(-)

diff --git a/be/src/vec/exprs/vectorized_fn_call.cpp b/be/src/vec/exprs/vectorized_fn_call.cpp
index 8a41e332b9..641a5c3b96 100644
--- a/be/src/vec/exprs/vectorized_fn_call.cpp
+++ b/be/src/vec/exprs/vectorized_fn_call.cpp
@@ -42,6 +42,8 @@ doris::Status VectorizedFnCall::prepare(doris::RuntimeState* state,
     argument_template.reserve(_children.size());
     std::vector<std::string_view> child_expr_name;
     for (auto child : _children) {
+        // TODO: rethink we really create column here. maybe only need nullptr just to
+        // get the function
         auto column = child->data_type()->create_column();
         argument_template.emplace_back(std::move(column), child->data_type(), child->expr_name());
         child_expr_name.emplace_back(child->expr_name());
diff --git a/be/src/vec/functions/function.cpp b/be/src/vec/functions/function.cpp
index 41f3141c06..662a2a58af 100644
--- a/be/src/vec/functions/function.cpp
+++ b/be/src/vec/functions/function.cpp
@@ -217,11 +217,12 @@ Status PreparedFunctionImpl::default_implementation_for_nulls(
     }
 
     if (null_presence.has_nullable) {
-        Block temporary_block = create_block_with_nested_columns(block, args, result);
+        auto [temporary_block, new_args, new_result] =
+                create_block_with_nested_columns(block, args, result);
         RETURN_IF_ERROR(execute_without_low_cardinality_columns(
-                context, temporary_block, args, result, temporary_block.rows(), dry_run));
+                context, temporary_block, new_args, new_result, temporary_block.rows(), dry_run));
         block.get_by_position(result).column =
-                wrap_in_nullable(temporary_block.get_by_position(result).column, block, args,
+                wrap_in_nullable(temporary_block.get_by_position(new_result).column, block, args,
                                  result, input_rows_count);
         *executed = true;
         return Status::OK();
@@ -295,10 +296,9 @@ DataTypePtr FunctionBuilderImpl::get_return_type_without_low_cardinality(
         }
         if (null_presence.has_nullable) {
             ColumnNumbers numbers(arguments.size());
-            for (size_t i = 0; i < arguments.size(); i++) {
-                numbers[i] = i;
-            }
-            Block nested_block = create_block_with_nested_columns(Block(arguments), numbers);
+            std::iota(numbers.begin(), numbers.end(), 0);
+            auto [nested_block, _] =
+                    create_block_with_nested_columns(Block(arguments), numbers, false);
             auto return_type = get_return_type_impl(
                     ColumnsWithTypeAndName(nested_block.begin(), nested_block.end()));
             return make_nullable(return_type);
diff --git a/be/src/vec/functions/function_cast.h b/be/src/vec/functions/function_cast.h
index e3baaecdd2..62b716d1a9 100644
--- a/be/src/vec/functions/function_cast.h
+++ b/be/src/vec/functions/function_cast.h
@@ -1592,9 +1592,10 @@ private:
                 Block tmp_block;
                 size_t tmp_res_index = 0;
                 if (source_is_nullable) {
-                    tmp_block = create_block_with_nested_columns_only_args(block, arguments);
-                    tmp_res_index = tmp_block.columns();
-                    tmp_block.insert({nullptr, nested_type, ""});
+                    auto [t_block, tmp_args, tmp_res] =
+                            create_block_with_nested_columns(block, arguments, result);
+                    tmp_block = std::move(t_block);
+                    tmp_res_index = tmp_res;
 
                     /// Perform the requested conversion.
                     RETURN_IF_ERROR(
@@ -1624,7 +1625,8 @@ private:
             return [wrapper, skip_not_null_check](FunctionContext* context, Block& block,
                                                   const ColumnNumbers& arguments,
                                                   const size_t result, size_t input_rows_count) {
-                Block tmp_block = create_block_with_nested_columns(block, arguments, result);
+                auto [tmp_block, tmp_args, tmp_res] =
+                        create_block_with_nested_columns(block, arguments, result);
 
                 /// Check that all values are not-NULL.
                 /// Check can be skipped in case if LowCardinality dictionary is transformed.
@@ -1640,8 +1642,8 @@ private:
                     }
                 }
 
-                RETURN_IF_ERROR(wrapper(context, tmp_block, arguments, result, input_rows_count));
-                block.get_by_position(result).column = tmp_block.get_by_position(result).column;
+                RETURN_IF_ERROR(wrapper(context, tmp_block, tmp_args, tmp_res, input_rows_count));
+                block.get_by_position(result).column = tmp_block.get_by_position(tmp_res).column;
                 return Status::OK();
             };
         } else {
diff --git a/be/src/vec/functions/function_helpers.cpp b/be/src/vec/functions/function_helpers.cpp
index fcfdd4b3a2..560f54e1a5 100644
--- a/be/src/vec/functions/function_helpers.cpp
+++ b/be/src/vec/functions/function_helpers.cpp
@@ -26,82 +26,68 @@
 
 namespace doris::vectorized {
 
-Block create_block_with_nested_columns_only_args(const Block& block, const ColumnNumbers& args) {
-    std::set<size_t> args_set(args.begin(), args.end());
+std::tuple<Block, ColumnNumbers> create_block_with_nested_columns(const Block& block,
+                                                                  const ColumnNumbers& args,
+                                                                  const bool need_check_same) {
     Block res;
-
-    for (auto i : args_set) {
-        const auto& col = block.get_by_position(i);
-
-        if (col.type->is_nullable()) {
-            const DataTypePtr& nested_type =
-                    static_cast<const DataTypeNullable&>(*col.type).get_nested_type();
-
-            if (!col.column) {
-                res.insert({nullptr, nested_type, col.name});
-            } else if (auto* nullable = check_and_get_column<ColumnNullable>(*col.column)) {
-                const auto& nested_col = nullable->get_nested_column_ptr();
-                res.insert({nested_col, nested_type, col.name});
-            } else if (auto* const_column = check_and_get_column<ColumnConst>(*col.column)) {
-                const auto& nested_col =
-                        check_and_get_column<ColumnNullable>(const_column->get_data_column())
-                                ->get_nested_column_ptr();
-                res.insert({ColumnConst::create(nested_col, col.column->size()), nested_type,
-                            col.name});
-            } else {
-                LOG(FATAL) << "Illegal column for DataTypeNullable";
+    ColumnNumbers res_args(args.size());
+
+    // only build temp block by args column, if args[i] == args[j]
+    // just keep one
+    for (size_t i = 0; i < args.size(); ++i) {
+        bool is_in_res = false;
+        size_t pre_loc = 0;
+
+        if (need_check_same) {
+            for (int j = 0; j < i; ++j) {
+                if (args[j] == args[i]) {
+                    is_in_res = true;
+                    pre_loc = j;
+                    break;
+                }
             }
-        } else {
-            res.insert(col);
         }
-    }
-
-    return res;
-}
 
-static Block create_block_with_nested_columns_impl(const Block& block,
-                                                   const std::unordered_set<size_t>& args) {
-    Block res;
-    size_t columns = block.columns();
-
-    for (size_t i = 0; i < columns; ++i) {
-        const auto& col = block.get_by_position(i);
-
-        if (args.count(i) && col.type->is_nullable()) {
-            const DataTypePtr& nested_type =
-                    static_cast<const DataTypeNullable&>(*col.type).get_nested_type();
-
-            if (!col.column) {
-                res.insert({nullptr, nested_type, col.name});
-            } else if (auto* nullable = check_and_get_column<ColumnNullable>(*col.column)) {
-                const auto& nested_col = nullable->get_nested_column_ptr();
-                res.insert({nested_col, nested_type, col.name});
-            } else if (auto* const_column = check_and_get_column<ColumnConst>(*col.column)) {
-                const auto& nested_col =
-                        check_and_get_column<ColumnNullable>(const_column->get_data_column())
-                                ->get_nested_column_ptr();
-                res.insert({ColumnConst::create(nested_col, col.column->size()), nested_type,
-                            col.name});
+        if (!is_in_res) {
+            const auto& col = block.get_by_position(i);
+            if (col.type->is_nullable()) {
+                const DataTypePtr& nested_type =
+                        static_cast<const DataTypeNullable&>(*col.type).get_nested_type();
+
+                if (!col.column) {
+                    res.insert({nullptr, nested_type, col.name});
+                } else if (auto* nullable = check_and_get_column<ColumnNullable>(*col.column)) {
+                    const auto& nested_col = nullable->get_nested_column_ptr();
+                    res.insert({nested_col, nested_type, col.name});
+                } else if (auto* const_column = check_and_get_column<ColumnConst>(*col.column)) {
+                    const auto& nested_col =
+                            check_and_get_column<ColumnNullable>(const_column->get_data_column())
+                                    ->get_nested_column_ptr();
+                    res.insert({ColumnConst::create(nested_col, col.column->size()), nested_type,
+                                col.name});
+                } else {
+                    LOG(FATAL) << "Illegal column for DataTypeNullable";
+                }
             } else {
-                LOG(FATAL) << "Illegal column for DataTypeNullable";
+                res.insert(col);
             }
-        } else
-            res.insert(col);
-    }
 
-    return res;
-}
+            res_args[i] = res.columns() - 1;
+        } else {
+            res_args[i] = pre_loc;
+        }
+    }
 
-Block create_block_with_nested_columns(const Block& block, const ColumnNumbers& args) {
-    std::unordered_set<size_t> args_set(args.begin(), args.end());
-    return create_block_with_nested_columns_impl(block, args_set);
+    return {res, res_args};
 }
 
-Block create_block_with_nested_columns(const Block& block, const ColumnNumbers& args,
-                                       size_t result) {
-    std::unordered_set<size_t> args_set(args.begin(), args.end());
-    args_set.insert(result);
-    return create_block_with_nested_columns_impl(block, args_set);
+std::tuple<Block, ColumnNumbers, size_t> create_block_with_nested_columns(const Block& block,
+                                                                          const ColumnNumbers& args,
+                                                                          size_t result) {
+    auto [res, res_args] = create_block_with_nested_columns(block, args, true);
+    // insert result column in temp block
+    res.insert(block.get_by_position(result));
+    return {res, res_args, res.columns() - 1};
 }
 
 void validate_argument_type(const IFunction& func, const DataTypes& arguments,
diff --git a/be/src/vec/functions/function_helpers.h b/be/src/vec/functions/function_helpers.h
index 777bd415dc..474ac435bf 100644
--- a/be/src/vec/functions/function_helpers.h
+++ b/be/src/vec/functions/function_helpers.h
@@ -86,21 +86,17 @@ inline std::enable_if_t<IsDecimalNumber<T>, Field> to_field(const T& x, UInt32 s
 
 Columns convert_const_tuple_to_constant_elements(const ColumnConst& column);
 
-/// Returns the copy of a given block in which each column specified in
-/// the "arguments" parameter is replaced with its respective nested
-/// column if it is nullable.
-Block create_block_with_nested_columns(const Block& block, const ColumnNumbers& args);
-
-/// Similar function as above. Additionally transform the result type if needed.
-Block create_block_with_nested_columns(const Block& block, const ColumnNumbers& args,
-                                       size_t result);
-
-/// Returns the copy of a given block in only args column specified in
-/// the "arguments" parameter is replaced with its respective nested
-/// column if it is nullable.
-/// TODO: the old funciton `create_block_with_nested_columns` have performance problem, replace all
-/// by the function and delete old one.
-Block create_block_with_nested_columns_only_args(const Block& block, const ColumnNumbers& args);
+/// Returns the copy of a tmp block and temp args order same as args
+/// in which only args column each column specified in the "arguments"
+/// parameter is replaced with its respective nested column if it is nullable.
+std::tuple<Block, ColumnNumbers> create_block_with_nested_columns(const Block& block,
+                                                                  const ColumnNumbers& args,
+                                                                  const bool need_check_same);
+
+// Same as above and return the new_res loc in tuple
+std::tuple<Block, ColumnNumbers, size_t> create_block_with_nested_columns(const Block& block,
+                                                                          const ColumnNumbers& args,
+                                                                          size_t result);
 
 /// Checks argument type at specified index with predicate.
 /// throws if there is no argument at specified index or if predicate returns false.


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@doris.apache.org
For additional commands, e-mail: commits-help@doris.apache.org