You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@doris.apache.org by yi...@apache.org on 2022/07/23 03:50:14 UTC

[doris] branch master updated: [Vectorized][Refactor] Refactor the function of `tuple_is_null`, only do work in hash join node (#11109)

This is an automated email from the ASF dual-hosted git repository.

yiguolei pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new fdb4193e1b [Vectorized][Refactor] Refactor the function of `tuple_is_null`, only do work in hash join node (#11109)
fdb4193e1b is described below

commit fdb4193e1b0961b104913b17e636a6a3fe02bac9
Author: HappenLee <ha...@hotmail.com>
AuthorDate: Sat Jul 23 11:50:07 2022 +0800

    [Vectorized][Refactor] Refactor the function of `tuple_is_null`, only do work in hash join node (#11109)
---
 be/src/vec/columns/column.h                        |  3 -
 be/src/vec/columns/column_nullable.cpp             | 10 +--
 be/src/vec/columns/column_nullable.h               |  2 -
 be/src/vec/columns/column_vector.cpp               |  4 +-
 be/src/vec/exec/join/vhash_join_node.cpp           | 97 ++++++++++++++++++++--
 be/src/vec/exec/join/vhash_join_node.h             | 10 ++-
 be/src/vec/exprs/vtuple_is_null_predicate.cpp      | 57 ++-----------
 be/src/vec/exprs/vtuple_is_null_predicate.h        |  4 +-
 .../doris/analysis/TupleIsNullPredicate.java       | 59 ++++++++++++-
 .../org/apache/doris/planner/HashJoinNode.java     |  5 +-
 gensrc/thrift/Exprs.thrift                         |  6 ++
 11 files changed, 178 insertions(+), 79 deletions(-)

diff --git a/be/src/vec/columns/column.h b/be/src/vec/columns/column.h
index 006389ded2..4513b12ec4 100644
--- a/be/src/vec/columns/column.h
+++ b/be/src/vec/columns/column.h
@@ -34,8 +34,6 @@ namespace doris::vectorized {
 
 class Arena;
 class Field;
-// TODO: Remove the trickly hint, after FE support better way to remove function tuple_is_null
-constexpr uint8_t JOIN_NULL_HINT = 2;
 
 /// Declares interface to store columns in memory.
 class IColumn : public COW<IColumn> {
@@ -178,7 +176,6 @@ public:
     /// indices_begin + indices_end represent the row indices of column src
     /// Warning:
     ///       if *indices == -1 means the row is null, only use in outer join, do not use in any other place
-    ///       insert JOIN_NULL_HINT in null map to hint the null is produced by outer join
     virtual void insert_indices_from(const IColumn& src, const int* indices_begin,
                                      const int* indices_end) = 0;
 
diff --git a/be/src/vec/columns/column_nullable.cpp b/be/src/vec/columns/column_nullable.cpp
index b8058a8d97..14eebe3150 100644
--- a/be/src/vec/columns/column_nullable.cpp
+++ b/be/src/vec/columns/column_nullable.cpp
@@ -116,10 +116,7 @@ StringRef ColumnNullable::serialize_value_into_arena(size_t n, Arena& arena,
     static constexpr auto s = sizeof(arr[0]);
 
     auto pos = arena.alloc_continue(s, begin);
-    // Value of `NULL` may be 1 or JOIN_NULL_HINT, we serialize both to 1.
-    // Because we need same key for both `NULL` values while processing `group by`.
-    UInt8* val = reinterpret_cast<UInt8*>(pos);
-    *val = (arr[n] ? 1 : 0);
+    memcpy(pos, &arr[n], s);
 
     if (arr[n]) return StringRef(pos, s);
 
@@ -129,11 +126,6 @@ StringRef ColumnNullable::serialize_value_into_arena(size_t n, Arena& arena,
     return StringRef(nested_ref.data - s, nested_ref.size + s);
 }
 
-void ColumnNullable::insert_join_null_data() {
-    get_nested_column().insert_default();
-    get_null_map_data().push_back(JOIN_NULL_HINT);
-}
-
 const char* ColumnNullable::deserialize_and_insert_from_arena(const char* pos) {
     UInt8 val = *reinterpret_cast<const UInt8*>(pos);
     pos += sizeof(val);
diff --git a/be/src/vec/columns/column_nullable.h b/be/src/vec/columns/column_nullable.h
index 06e3c9fa8a..6bd37d036f 100644
--- a/be/src/vec/columns/column_nullable.h
+++ b/be/src/vec/columns/column_nullable.h
@@ -84,8 +84,6 @@ public:
 
     /// Will insert null value if pos=nullptr
     void insert_data(const char* pos, size_t length) override;
-    /// JOIN_NULL_HINT in null map means null is generated by join, only use in tuple is null
-    void insert_join_null_data();
 
     void insert_many_strings(const StringRef* strings, size_t num) override;
 
diff --git a/be/src/vec/columns/column_vector.cpp b/be/src/vec/columns/column_vector.cpp
index c740a515a0..51ed9371d1 100644
--- a/be/src/vec/columns/column_vector.cpp
+++ b/be/src/vec/columns/column_vector.cpp
@@ -280,9 +280,7 @@ void ColumnVector<T>::insert_indices_from(const IColumn& src, const int* indices
             // Now Uint8 use to identify null and non null
             // 1. nullable column : offset == -1 means is null at the here, set true here
             // 2. real data column : offset == -1 what at is meaningless
-            // 3. JOIN_NULL_HINT only use in outer join to hint the null is produced by outer join
-            data[origin_size + i] =
-                    (offset == -1) ? T {JOIN_NULL_HINT} : src_vec.get_element(offset);
+            data[origin_size + i] = (offset == -1) ? T {1} : src_vec.get_element(offset);
         } else {
             data[origin_size + i] = (offset == -1) ? T {0} : src_vec.get_element(offset);
         }
diff --git a/be/src/vec/exec/join/vhash_join_node.cpp b/be/src/vec/exec/join/vhash_join_node.cpp
index 61f3be16bf..26eb565fae 100644
--- a/be/src/vec/exec/join/vhash_join_node.cpp
+++ b/be/src/vec/exec/join/vhash_join_node.cpp
@@ -23,6 +23,7 @@
 #include "runtime/runtime_filter_mgr.h"
 #include "util/defer_op.h"
 #include "vec/core/materialize_block.h"
+#include "vec/data_types/data_type_number.h"
 #include "vec/exprs/vexpr.h"
 #include "vec/exprs/vexpr_context.h"
 #include "vec/utils/template_helpers.hpp"
@@ -178,6 +179,12 @@ struct ProcessHashTableProbe {
               _items_counts(join_node->_items_counts),
               _build_block_offsets(join_node->_build_block_offsets),
               _build_block_rows(join_node->_build_block_rows),
+              _tuple_is_null_left_flags(
+                      reinterpret_cast<ColumnUInt8&>(*join_node->_tuple_is_null_left_flag_column)
+                              .get_data()),
+              _tuple_is_null_right_flags(
+                      reinterpret_cast<ColumnUInt8&>(*join_node->_tuple_is_null_right_flag_column)
+                              .get_data()),
               _rows_returned_counter(join_node->_rows_returned_counter),
               _search_hashtable_timer(join_node->_search_hashtable_timer),
               _build_side_output_timer(join_node->_build_side_output_timer),
@@ -214,7 +221,7 @@ struct ProcessHashTableProbe {
                                 if (_build_block_offsets[j] == -1) {
                                     DCHECK(mcol[i + column_offset]->is_nullable());
                                     assert_cast<ColumnNullable*>(mcol[i + column_offset].get())
-                                            ->insert_join_null_data();
+                                            ->insert_default();
                                 } else {
                                     auto& column = *_build_blocks[_build_block_offsets[j]]
                                                             .get_by_position(i)
@@ -246,9 +253,19 @@ struct ProcessHashTableProbe {
                 }
             }
         }
+
+        // Dispose right tuple is null flags columns
+        if constexpr (probe_all && !have_other_join_conjunct) {
+            _tuple_is_null_right_flags.resize(size);
+            auto* __restrict null_data = _tuple_is_null_right_flags.data();
+            for (int i = 0; i < size; ++i) {
+                null_data[i] = _build_block_rows[i] == -1;
+            }
+        }
     }
 
     // output probe side result column
+    template <bool have_other_join_conjunct = false>
     void probe_side_output_column(MutableColumns& mcol, const std::vector<bool>& output_slot_flags,
                                   int size) {
         for (int i = 0; i < output_slot_flags.size(); ++i) {
@@ -259,6 +276,10 @@ struct ProcessHashTableProbe {
                 mcol[i]->resize(size);
             }
         }
+
+        if constexpr (JoinOpType::value == TJoinOp::RIGHT_OUTER_JOIN && !have_other_join_conjunct) {
+            _tuple_is_null_left_flags.resize_fill(size, 0);
+        }
     }
     // Only process the join with no other join conjunt, because of no other join conjunt
     // the output block struct is same with mutable block. we can do more opt on it and simplify
@@ -484,7 +505,8 @@ struct ProcessHashTableProbe {
         }
         {
             SCOPED_TIMER(_probe_side_output_timer);
-            probe_side_output_column(mcol, _join_node->_left_output_slot_flags, current_offset);
+            probe_side_output_column<true>(mcol, _join_node->_left_output_slot_flags,
+                                           current_offset);
         }
         output_block->swap(mutable_block.to_block());
 
@@ -500,6 +522,9 @@ struct ProcessHashTableProbe {
                 auto new_filter_column = ColumnVector<UInt8>::create();
                 auto& filter_map = new_filter_column->get_data();
 
+                auto null_map_column = ColumnVector<UInt8>::create(column->size(), 0);
+                auto* __restrict null_map_data = null_map_column->get_data().data();
+
                 for (int i = 0; i < column->size(); ++i) {
                     auto join_hit = visited_map[i] != nullptr;
                     auto other_hit = column->get_bool(i);
@@ -514,6 +539,7 @@ struct ProcessHashTableProbe {
                                     ->get_null_map_data()[i] = true;
                         }
                     }
+                    null_map_data[i] = !join_hit || !other_hit;
 
                     if (join_hit) {
                         *visited_map[i] |= other_hit;
@@ -527,6 +553,12 @@ struct ProcessHashTableProbe {
                         filter_map.push_back(true);
                     }
                 }
+
+                for (int i = 0; i < column->size(); ++i) {
+                    if (filter_map[i]) {
+                        _tuple_is_null_right_flags.emplace_back(null_map_data[i]);
+                    }
+                }
                 output_block->get_by_position(result_column_id).column =
                         std::move(new_filter_column);
             } else if constexpr (JoinOpType::value == TJoinOp::LEFT_SEMI_JOIN) {
@@ -571,12 +603,20 @@ struct ProcessHashTableProbe {
                 output_block->get_by_position(result_column_id).column =
                         std::move(new_filter_column);
             } else if constexpr (JoinOpType::value == TJoinOp::RIGHT_SEMI_JOIN ||
-                                 JoinOpType::value == TJoinOp::RIGHT_ANTI_JOIN ||
-                                 JoinOpType::value == TJoinOp::RIGHT_OUTER_JOIN) {
+                                 JoinOpType::value == TJoinOp::RIGHT_ANTI_JOIN) {
                 for (int i = 0; i < column->size(); ++i) {
                     DCHECK(visited_map[i]);
                     *visited_map[i] |= column->get_bool(i);
                 }
+            } else if constexpr (JoinOpType::value == TJoinOp::RIGHT_OUTER_JOIN) {
+                auto filter_size = 0;
+                for (int i = 0; i < column->size(); ++i) {
+                    DCHECK(visited_map[i]);
+                    auto result = column->get_bool(i);
+                    *visited_map[i] |= result;
+                    filter_size += result;
+                }
+                _tuple_is_null_left_flags.resize_fill(filter_size, 0);
             } else {
                 // inner join do nothing
             }
@@ -642,10 +682,9 @@ struct ProcessHashTableProbe {
         if constexpr (JoinOpType::value == TJoinOp::RIGHT_OUTER_JOIN ||
                       JoinOpType::value == TJoinOp::FULL_OUTER_JOIN) {
             for (int i = 0; i < right_col_idx; ++i) {
-                for (int j = 0; j < block_size; ++j) {
-                    assert_cast<ColumnNullable*>(mcol[i].get())->insert_join_null_data();
-                }
+                assert_cast<ColumnNullable*>(mcol[i].get())->insert_many_defaults(block_size);
             }
+            _tuple_is_null_left_flags.resize_fill(block_size, 1);
         }
         *eos = iter == hash_table_ctx.hash_table.end();
 
@@ -667,6 +706,10 @@ private:
     std::vector<uint32_t>& _items_counts;
     std::vector<int8_t>& _build_block_offsets;
     std::vector<int>& _build_block_rows;
+    // only need set the tuple is null in RIGHT_OUTER_JOIN and FULL_OUTER_JOIN
+    ColumnUInt8::Container& _tuple_is_null_left_flags;
+    // only need set the tuple is null in LEFT_OUTER_JOIN and FULL_OUTER_JOIN
+    ColumnUInt8::Container& _tuple_is_null_right_flags;
 
     ProfileCounter* _rows_returned_counter;
     ProfileCounter* _search_hashtable_timer;
@@ -726,7 +769,6 @@ Status HashJoinNode::init(const TPlanNode& tnode, RuntimeState* state) {
     if (tnode.hash_join_node.join_op == TJoinOp::NULL_AWARE_LEFT_ANTI_JOIN) {
         return Status::InternalError("Do not support null aware left anti join");
     }
-    _row_desc_for_other_join_conjunt = RowDescriptor(child(0)->row_desc(), child(1)->row_desc());
 
     const bool build_stores_null = _join_op == TJoinOp::RIGHT_OUTER_JOIN ||
                                    _join_op == TJoinOp::FULL_OUTER_JOIN ||
@@ -797,6 +839,11 @@ Status HashJoinNode::init(const TPlanNode& tnode, RuntimeState* state) {
     init_output_slots_flags(child(0)->row_desc().tuple_descriptors(), _left_output_slot_flags);
     init_output_slots_flags(child(1)->row_desc().tuple_descriptors(), _right_output_slot_flags);
 
+    // only use in outer join as the bool column to mark for function of `tuple_is_null`
+    if (_is_outer_join) {
+        _tuple_is_null_left_flag_column = ColumnUInt8::create();
+        _tuple_is_null_right_flag_column = ColumnUInt8::create();
+    }
     return Status::OK();
 }
 
@@ -997,9 +1044,11 @@ Status HashJoinNode::get_next(RuntimeState* state, Block* output_block, bool* eo
         return Status::OK();
     }
 
+    _add_tuple_is_null_column(&temp_block);
     RETURN_IF_ERROR(
             VExprContext::filter_block(_vconjunct_ctx_ptr, &temp_block, temp_block.columns()));
     RETURN_IF_ERROR(_build_output_block(&temp_block, output_block));
+    _reset_tuple_is_null_column();
     reached_limit(output_block, eos);
 
     return st;
@@ -1439,4 +1488,36 @@ Status HashJoinNode::_build_output_block(Block* origin_block, Block* output_bloc
     return Status::OK();
 }
 
+void HashJoinNode::_add_tuple_is_null_column(doris::vectorized::Block* block) {
+    if (_is_outer_join) {
+        auto p0 = _tuple_is_null_left_flag_column->assume_mutable();
+        auto p1 = _tuple_is_null_right_flag_column->assume_mutable();
+        auto& left_null_map = reinterpret_cast<ColumnUInt8&>(*p0);
+        auto& right_null_map = reinterpret_cast<ColumnUInt8&>(*p1);
+        auto left_size = left_null_map.size();
+        auto right_size = right_null_map.size();
+
+        if (left_size == 0) {
+            DCHECK_EQ(right_size, block->rows());
+            left_null_map.get_data().resize_fill(right_size, 0);
+        }
+        if (right_size == 0) {
+            DCHECK_EQ(left_size, block->rows());
+            right_null_map.get_data().resize_fill(left_size, 0);
+        }
+
+        block->insert({std::move(p0), std::make_shared<vectorized::DataTypeUInt8>(),
+                       "left_tuples_is_null"});
+        block->insert({std::move(p1), std::make_shared<vectorized::DataTypeUInt8>(),
+                       "right_tuples_is_null"});
+    }
+}
+
+void HashJoinNode::_reset_tuple_is_null_column() {
+    if (_is_outer_join) {
+        reinterpret_cast<ColumnUInt8&>(*_tuple_is_null_left_flag_column).clear();
+        reinterpret_cast<ColumnUInt8&>(*_tuple_is_null_right_flag_column).clear();
+    }
+}
+
 } // namespace doris::vectorized
diff --git a/be/src/vec/exec/join/vhash_join_node.h b/be/src/vec/exec/join/vhash_join_node.h
index d3034e0bf0..36dec27fbb 100644
--- a/be/src/vec/exec/join/vhash_join_node.h
+++ b/be/src/vec/exec/join/vhash_join_node.h
@@ -230,7 +230,6 @@ private:
     const bool _is_outer_join;
     bool _have_other_join_conjunct = false;
 
-    RowDescriptor _row_desc_for_other_join_conjunt;
     Block _join_block;
 
     std::vector<uint32_t> _items_counts;
@@ -244,6 +243,9 @@ private:
     RowDescriptor _intermediate_row_desc;
     RowDescriptor _output_row_desc;
 
+    MutableColumnPtr _tuple_is_null_left_flag_column;
+    MutableColumnPtr _tuple_is_null_right_flag_column;
+
 private:
     void _hash_table_build_thread(RuntimeState* state, std::promise<Status>* status);
 
@@ -267,6 +269,12 @@ private:
 
     Status _build_output_block(Block* origin_block, Block* output_block);
 
+    // add tuple is null flag column to Block for filter conjunct and output expr
+    void _add_tuple_is_null_column(Block* block);
+
+    // reset the tuple is null flag column for the next call
+    void _reset_tuple_is_null_column();
+
     static std::vector<uint16_t> _convert_block_to_null(Block& block);
 
     template <class HashTableContext>
diff --git a/be/src/vec/exprs/vtuple_is_null_predicate.cpp b/be/src/vec/exprs/vtuple_is_null_predicate.cpp
index eb5679ec6f..6342dd32dc 100644
--- a/be/src/vec/exprs/vtuple_is_null_predicate.cpp
+++ b/be/src/vec/exprs/vtuple_is_null_predicate.cpp
@@ -28,54 +28,24 @@
 namespace doris::vectorized {
 
 VTupleIsNullPredicate::VTupleIsNullPredicate(const TExprNode& node)
-        : VExpr(node),
-          _expr_name(function_name),
-          _tuple_ids(node.tuple_is_null_pred.tuple_ids.begin(),
-                     node.tuple_is_null_pred.tuple_ids.end()) {}
+        : VExpr(node), _expr_name(function_name) {
+    DCHECK(node.tuple_is_null_pred.__isset.null_side);
+    _is_left_null_side = node.tuple_is_null_pred.null_side == TNullSide::LEFT;
+    _column_to_check = 0;
+}
 
 Status VTupleIsNullPredicate::prepare(RuntimeState* state, const RowDescriptor& desc,
                                       VExprContext* context) {
     RETURN_IF_ERROR(VExpr::prepare(state, desc, context));
     DCHECK_EQ(0, _children.size());
-    DCHECK_GT(_tuple_ids.size(), 0);
-
-    _column_to_check.reserve(_tuple_ids.size());
-    // Resolve tuple ids to column id, one tuple only need check one column to speed up
-    for (auto tuple_id : _tuple_ids) {
-        uint32_t loc = 0;
-        for (auto& tuple_desc : desc.tuple_descriptors()) {
-            if (tuple_desc->id() == tuple_id) {
-                _column_to_check.emplace_back(loc);
-                break;
-            }
-            loc += tuple_desc->slots().size();
-        }
-    }
+    _column_to_check =
+            _is_left_null_side ? desc.num_materialized_slots() : desc.num_materialized_slots() + 1;
 
     return Status::OK();
 }
 
 Status VTupleIsNullPredicate::execute(VExprContext* context, Block* block, int* result_column_id) {
-    size_t num_columns_without_result = block->columns();
-    auto target_rows = block->rows();
-    auto ans = ColumnVector<UInt8>::create(target_rows, 1);
-    auto* __restrict ans_map = ans->get_data().data();
-
-    for (auto col_id : _column_to_check) {
-        auto* __restrict null_map =
-                reinterpret_cast<const ColumnNullable&>(*block->get_by_position(col_id).column)
-                        .get_null_map_column()
-                        .get_data()
-                        .data();
-
-        for (int i = 0; i < target_rows; ++i) {
-            ans_map[i] &= null_map[i] == JOIN_NULL_HINT;
-        }
-    }
-
-    // prepare a column to save result
-    block->insert({std::move(ans), _data_type, _expr_name});
-    *result_column_id = num_columns_without_result;
+    *result_column_id = _column_to_check;
     return Status::OK();
 }
 
@@ -86,16 +56,7 @@ const std::string& VTupleIsNullPredicate::expr_name() const {
 std::string VTupleIsNullPredicate::debug_string() const {
     std::stringstream out;
     out << "TupleIsNullPredicate(_column_to_check=[";
-
-    bool first = true;
-    for (int i = 0; i < _column_to_check.size(); ++i) {
-        if (first) {
-            out << _column_to_check[i];
-            first = false;
-        } else {
-            out << ", " << _column_to_check[i];
-        }
-    }
+    out << _column_to_check;
     out << "])";
     return out.str();
 }
diff --git a/be/src/vec/exprs/vtuple_is_null_predicate.h b/be/src/vec/exprs/vtuple_is_null_predicate.h
index 3690b52898..2e83fb88ad 100644
--- a/be/src/vec/exprs/vtuple_is_null_predicate.h
+++ b/be/src/vec/exprs/vtuple_is_null_predicate.h
@@ -41,8 +41,8 @@ public:
 
 private:
     std::string _expr_name;
-    std::vector<TupleId> _tuple_ids;
-    std::vector<uint32_t> _column_to_check;
+    bool _is_left_null_side;
+    uint32_t _column_to_check;
 
 private:
     static const constexpr char* function_name = "tuple_is_null";
diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/TupleIsNullPredicate.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/TupleIsNullPredicate.java
index 1add0bbccc..fd92081afd 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/analysis/TupleIsNullPredicate.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/TupleIsNullPredicate.java
@@ -24,6 +24,7 @@ import org.apache.doris.common.AnalysisException;
 import org.apache.doris.common.UserException;
 import org.apache.doris.thrift.TExprNode;
 import org.apache.doris.thrift.TExprNodeType;
+import org.apache.doris.thrift.TNullSide;
 import org.apache.doris.thrift.TTupleIsNullPredicate;
 
 import com.google.common.base.Joiner;
@@ -41,17 +42,22 @@ import java.util.Objects;
  * The given tupleIds must be materialized and nullable at the appropriate PlanNode.
  */
 public class TupleIsNullPredicate extends Predicate {
-
     private List<TupleId> tupleIds = Lists.newArrayList();
+    private TNullSide nullSide = null;
 
     public TupleIsNullPredicate(List<TupleId> tupleIds) {
         Preconditions.checkState(tupleIds != null && !tupleIds.isEmpty());
         this.tupleIds.addAll(tupleIds);
     }
 
+    public TupleIsNullPredicate(TNullSide nullSide) {
+        this.nullSide = nullSide;
+    }
+
     protected TupleIsNullPredicate(TupleIsNullPredicate other) {
         super(other);
         tupleIds.addAll(other.tupleIds);
+        nullSide = other.nullSide;
     }
 
     @Override
@@ -83,9 +89,13 @@ public class TupleIsNullPredicate extends Predicate {
     protected void toThrift(TExprNode msg) {
         msg.node_type = TExprNodeType.TUPLE_IS_NULL_PRED;
         msg.tuple_is_null_pred = new TTupleIsNullPredicate();
+        msg.tuple_is_null_pred.setTupleIds(Lists.newArrayList());
         for (TupleId tid : tupleIds) {
             msg.tuple_is_null_pred.addToTupleIds(tid.asInt());
         }
+        if (nullSide != null) {
+            msg.tuple_is_null_pred.setNullSide(nullSide);
+        }
     }
 
     public List<TupleId> getTupleIds() {
@@ -135,6 +145,26 @@ public class TupleIsNullPredicate extends Predicate {
         return result;
     }
 
+    /**
+     * Makes each input expr nullable, if necessary, by wrapping it as follows:
+     * IF(TupleIsNull(nullSide), NULL, expr)
+     * <p>
+     * The given inputExprs are expected to be bound
+     * by null side tuple id once fully substituted against base tables. However, inputExprs may not yet
+     * be fully substituted at this point.
+     * <p>
+     * Returns a new list with the nullable exprs. only use in vectorized exec engine
+     */
+    public static List<Expr> wrapExprs(List<Expr> inputExprs,
+                                       TNullSide nullSide, Analyzer analyzer) throws UserException {
+        // Perform the wrapping.
+        List<Expr> result = Lists.newArrayListWithCapacity(inputExprs.size());
+        for (Expr e : inputExprs) {
+            result.add(wrapExpr(e, nullSide, analyzer));
+        }
+        return result;
+    }
+
     /**
      * Returns a new analyzed conditional expr 'IF(TupleIsNull(tids), NULL, expr)',
      * if required to make expr nullable. Otherwise, returns expr.
@@ -162,6 +192,33 @@ public class TupleIsNullPredicate extends Predicate {
         return ifExpr;
     }
 
+    /**
+     * Returns a new analyzed conditional expr 'IF(TupleIsNull(nullSide), NULL, expr)',
+     * if required to make expr nullable. Otherwise, returns expr. only use in vectorized exec engine
+     */
+    public static Expr wrapExpr(Expr expr, TNullSide nullSide, Analyzer analyzer)
+            throws UserException {
+        if (!requiresNullWrapping(expr, analyzer)) {
+            return expr;
+        }
+        List<Expr> params = Lists.newArrayList();
+        params.add(new TupleIsNullPredicate(nullSide));
+        params.add(new NullLiteral());
+        params.add(expr);
+        Expr ifExpr = new FunctionCallExpr("if", params);
+        ifExpr.analyzeNoThrow(analyzer);
+        // The type of function which is different from the type of expr will return the incorrect result in query.
+        // Example:
+        //   the type of expr is date
+        //   the type of function is int
+        //   So, the upper fragment will receive a int value instead of date while the result expr is date.
+        // If there is no cast function, the result of query will be incorrect.
+        if (expr.getType().getPrimitiveType() != ifExpr.getType().getPrimitiveType()) {
+            ifExpr = ifExpr.uncheckedCastTo(expr.getType());
+        }
+        return ifExpr;
+    }
+
     /**
      * Returns true if the given expr evaluates to a non-NULL value if all its contained
      * SlotRefs evaluate to NULL, false otherwise.
diff --git a/fe/fe-core/src/main/java/org/apache/doris/planner/HashJoinNode.java b/fe/fe-core/src/main/java/org/apache/doris/planner/HashJoinNode.java
index 78eb563ec7..f835922cb8 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/planner/HashJoinNode.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/planner/HashJoinNode.java
@@ -46,6 +46,7 @@ import org.apache.doris.statistics.StatsRecursiveDerive;
 import org.apache.doris.thrift.TEqJoinCondition;
 import org.apache.doris.thrift.TExplainLevel;
 import org.apache.doris.thrift.THashJoinNode;
+import org.apache.doris.thrift.TNullSide;
 import org.apache.doris.thrift.TPlanNode;
 import org.apache.doris.thrift.TPlanNodeType;
 
@@ -486,7 +487,7 @@ public class HashJoinNode extends PlanNode {
         // Then: add tuple is null in left child columns
         if (leftNullable && getChild(0).tblRefIds.size() == 1 && analyzer.isInlineView(getChild(0).tblRefIds.get(0))) {
             List<Expr> tupleIsNullLhs = TupleIsNullPredicate
-                    .wrapExprs(vSrcToOutputSMap.getLhs().subList(0, leftNullableNumber), getChild(0).getTupleIds(),
+                    .wrapExprs(vSrcToOutputSMap.getLhs().subList(0, leftNullableNumber), TNullSide.LEFT,
                             analyzer);
             tupleIsNullLhs
                     .addAll(vSrcToOutputSMap.getLhs().subList(leftNullableNumber, vSrcToOutputSMap.getLhs().size()));
@@ -500,7 +501,7 @@ public class HashJoinNode extends PlanNode {
                 int rightBeginIndex = vSrcToOutputSMap.size() - rightNullableNumber;
                 List<Expr> tupleIsNullLhs = TupleIsNullPredicate
                         .wrapExprs(vSrcToOutputSMap.getLhs().subList(rightBeginIndex, vSrcToOutputSMap.size()),
-                                getChild(1).getTupleIds(), analyzer);
+                                TNullSide.RIGHT, analyzer);
                 List<Expr> newLhsList = Lists.newArrayList();
                 if (rightBeginIndex > 0) {
                     newLhsList.addAll(vSrcToOutputSMap.getLhs().subList(0, rightBeginIndex));
diff --git a/gensrc/thrift/Exprs.thrift b/gensrc/thrift/Exprs.thrift
index 50c9119410..c2211631e1 100644
--- a/gensrc/thrift/Exprs.thrift
+++ b/gensrc/thrift/Exprs.thrift
@@ -121,8 +121,14 @@ struct TLiteralPredicate {
   2: required bool is_null
 }
 
+enum TNullSide {
+   LEFT,
+   RIGHT
+}
+
 struct TTupleIsNullPredicate {
   1: required list<Types.TTupleId> tuple_ids
+  2: optional TNullSide null_side
 }
 
 struct TSlotRef {


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@doris.apache.org
For additional commands, e-mail: commits-help@doris.apache.org