You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@doris.apache.org by yi...@apache.org on 2022/07/23 03:50:14 UTC
[doris] branch master updated: [Vectorized][Refactor] Refactor the function of `tuple_is_null`, only do work in hash join node (#11109)
This is an automated email from the ASF dual-hosted git repository.
yiguolei pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new fdb4193e1b [Vectorized][Refactor] Refactor the function of `tuple_is_null`, only do work in hash join node (#11109)
fdb4193e1b is described below
commit fdb4193e1b0961b104913b17e636a6a3fe02bac9
Author: HappenLee <ha...@hotmail.com>
AuthorDate: Sat Jul 23 11:50:07 2022 +0800
[Vectorized][Refactor] Refactor the function of `tuple_is_null`, only do work in hash join node (#11109)
---
be/src/vec/columns/column.h | 3 -
be/src/vec/columns/column_nullable.cpp | 10 +--
be/src/vec/columns/column_nullable.h | 2 -
be/src/vec/columns/column_vector.cpp | 4 +-
be/src/vec/exec/join/vhash_join_node.cpp | 97 ++++++++++++++++++++--
be/src/vec/exec/join/vhash_join_node.h | 10 ++-
be/src/vec/exprs/vtuple_is_null_predicate.cpp | 57 ++-----------
be/src/vec/exprs/vtuple_is_null_predicate.h | 4 +-
.../doris/analysis/TupleIsNullPredicate.java | 59 ++++++++++++-
.../org/apache/doris/planner/HashJoinNode.java | 5 +-
gensrc/thrift/Exprs.thrift | 6 ++
11 files changed, 178 insertions(+), 79 deletions(-)
diff --git a/be/src/vec/columns/column.h b/be/src/vec/columns/column.h
index 006389ded2..4513b12ec4 100644
--- a/be/src/vec/columns/column.h
+++ b/be/src/vec/columns/column.h
@@ -34,8 +34,6 @@ namespace doris::vectorized {
class Arena;
class Field;
-// TODO: Remove the trickly hint, after FE support better way to remove function tuple_is_null
-constexpr uint8_t JOIN_NULL_HINT = 2;
/// Declares interface to store columns in memory.
class IColumn : public COW<IColumn> {
@@ -178,7 +176,6 @@ public:
/// indices_begin + indices_end represent the row indices of column src
/// Warning:
/// if *indices == -1 means the row is null, only use in outer join, do not use in any other place
- /// insert JOIN_NULL_HINT in null map to hint the null is produced by outer join
virtual void insert_indices_from(const IColumn& src, const int* indices_begin,
const int* indices_end) = 0;
diff --git a/be/src/vec/columns/column_nullable.cpp b/be/src/vec/columns/column_nullable.cpp
index b8058a8d97..14eebe3150 100644
--- a/be/src/vec/columns/column_nullable.cpp
+++ b/be/src/vec/columns/column_nullable.cpp
@@ -116,10 +116,7 @@ StringRef ColumnNullable::serialize_value_into_arena(size_t n, Arena& arena,
static constexpr auto s = sizeof(arr[0]);
auto pos = arena.alloc_continue(s, begin);
- // Value of `NULL` may be 1 or JOIN_NULL_HINT, we serialize both to 1.
- // Because we need same key for both `NULL` values while processing `group by`.
- UInt8* val = reinterpret_cast<UInt8*>(pos);
- *val = (arr[n] ? 1 : 0);
+ memcpy(pos, &arr[n], s);
if (arr[n]) return StringRef(pos, s);
@@ -129,11 +126,6 @@ StringRef ColumnNullable::serialize_value_into_arena(size_t n, Arena& arena,
return StringRef(nested_ref.data - s, nested_ref.size + s);
}
-void ColumnNullable::insert_join_null_data() {
- get_nested_column().insert_default();
- get_null_map_data().push_back(JOIN_NULL_HINT);
-}
-
const char* ColumnNullable::deserialize_and_insert_from_arena(const char* pos) {
UInt8 val = *reinterpret_cast<const UInt8*>(pos);
pos += sizeof(val);
diff --git a/be/src/vec/columns/column_nullable.h b/be/src/vec/columns/column_nullable.h
index 06e3c9fa8a..6bd37d036f 100644
--- a/be/src/vec/columns/column_nullable.h
+++ b/be/src/vec/columns/column_nullable.h
@@ -84,8 +84,6 @@ public:
/// Will insert null value if pos=nullptr
void insert_data(const char* pos, size_t length) override;
- /// JOIN_NULL_HINT in null map means null is generated by join, only use in tuple is null
- void insert_join_null_data();
void insert_many_strings(const StringRef* strings, size_t num) override;
diff --git a/be/src/vec/columns/column_vector.cpp b/be/src/vec/columns/column_vector.cpp
index c740a515a0..51ed9371d1 100644
--- a/be/src/vec/columns/column_vector.cpp
+++ b/be/src/vec/columns/column_vector.cpp
@@ -280,9 +280,7 @@ void ColumnVector<T>::insert_indices_from(const IColumn& src, const int* indices
// Now Uint8 use to identify null and non null
// 1. nullable column : offset == -1 means is null at the here, set true here
// 2. real data column : offset == -1 what at is meaningless
- // 3. JOIN_NULL_HINT only use in outer join to hint the null is produced by outer join
- data[origin_size + i] =
- (offset == -1) ? T {JOIN_NULL_HINT} : src_vec.get_element(offset);
+ data[origin_size + i] = (offset == -1) ? T {1} : src_vec.get_element(offset);
} else {
data[origin_size + i] = (offset == -1) ? T {0} : src_vec.get_element(offset);
}
diff --git a/be/src/vec/exec/join/vhash_join_node.cpp b/be/src/vec/exec/join/vhash_join_node.cpp
index 61f3be16bf..26eb565fae 100644
--- a/be/src/vec/exec/join/vhash_join_node.cpp
+++ b/be/src/vec/exec/join/vhash_join_node.cpp
@@ -23,6 +23,7 @@
#include "runtime/runtime_filter_mgr.h"
#include "util/defer_op.h"
#include "vec/core/materialize_block.h"
+#include "vec/data_types/data_type_number.h"
#include "vec/exprs/vexpr.h"
#include "vec/exprs/vexpr_context.h"
#include "vec/utils/template_helpers.hpp"
@@ -178,6 +179,12 @@ struct ProcessHashTableProbe {
_items_counts(join_node->_items_counts),
_build_block_offsets(join_node->_build_block_offsets),
_build_block_rows(join_node->_build_block_rows),
+ _tuple_is_null_left_flags(
+ reinterpret_cast<ColumnUInt8&>(*join_node->_tuple_is_null_left_flag_column)
+ .get_data()),
+ _tuple_is_null_right_flags(
+ reinterpret_cast<ColumnUInt8&>(*join_node->_tuple_is_null_right_flag_column)
+ .get_data()),
_rows_returned_counter(join_node->_rows_returned_counter),
_search_hashtable_timer(join_node->_search_hashtable_timer),
_build_side_output_timer(join_node->_build_side_output_timer),
@@ -214,7 +221,7 @@ struct ProcessHashTableProbe {
if (_build_block_offsets[j] == -1) {
DCHECK(mcol[i + column_offset]->is_nullable());
assert_cast<ColumnNullable*>(mcol[i + column_offset].get())
- ->insert_join_null_data();
+ ->insert_default();
} else {
auto& column = *_build_blocks[_build_block_offsets[j]]
.get_by_position(i)
@@ -246,9 +253,19 @@ struct ProcessHashTableProbe {
}
}
}
+
+ // Dispose right tuple is null flags columns
+ if constexpr (probe_all && !have_other_join_conjunct) {
+ _tuple_is_null_right_flags.resize(size);
+ auto* __restrict null_data = _tuple_is_null_right_flags.data();
+ for (int i = 0; i < size; ++i) {
+ null_data[i] = _build_block_rows[i] == -1;
+ }
+ }
}
// output probe side result column
+ template <bool have_other_join_conjunct = false>
void probe_side_output_column(MutableColumns& mcol, const std::vector<bool>& output_slot_flags,
int size) {
for (int i = 0; i < output_slot_flags.size(); ++i) {
@@ -259,6 +276,10 @@ struct ProcessHashTableProbe {
mcol[i]->resize(size);
}
}
+
+ if constexpr (JoinOpType::value == TJoinOp::RIGHT_OUTER_JOIN && !have_other_join_conjunct) {
+ _tuple_is_null_left_flags.resize_fill(size, 0);
+ }
}
// Only process the join with no other join conjunt, because of no other join conjunt
// the output block struct is same with mutable block. we can do more opt on it and simplify
@@ -484,7 +505,8 @@ struct ProcessHashTableProbe {
}
{
SCOPED_TIMER(_probe_side_output_timer);
- probe_side_output_column(mcol, _join_node->_left_output_slot_flags, current_offset);
+ probe_side_output_column<true>(mcol, _join_node->_left_output_slot_flags,
+ current_offset);
}
output_block->swap(mutable_block.to_block());
@@ -500,6 +522,9 @@ struct ProcessHashTableProbe {
auto new_filter_column = ColumnVector<UInt8>::create();
auto& filter_map = new_filter_column->get_data();
+ auto null_map_column = ColumnVector<UInt8>::create(column->size(), 0);
+ auto* __restrict null_map_data = null_map_column->get_data().data();
+
for (int i = 0; i < column->size(); ++i) {
auto join_hit = visited_map[i] != nullptr;
auto other_hit = column->get_bool(i);
@@ -514,6 +539,7 @@ struct ProcessHashTableProbe {
->get_null_map_data()[i] = true;
}
}
+ null_map_data[i] = !join_hit || !other_hit;
if (join_hit) {
*visited_map[i] |= other_hit;
@@ -527,6 +553,12 @@ struct ProcessHashTableProbe {
filter_map.push_back(true);
}
}
+
+ for (int i = 0; i < column->size(); ++i) {
+ if (filter_map[i]) {
+ _tuple_is_null_right_flags.emplace_back(null_map_data[i]);
+ }
+ }
output_block->get_by_position(result_column_id).column =
std::move(new_filter_column);
} else if constexpr (JoinOpType::value == TJoinOp::LEFT_SEMI_JOIN) {
@@ -571,12 +603,20 @@ struct ProcessHashTableProbe {
output_block->get_by_position(result_column_id).column =
std::move(new_filter_column);
} else if constexpr (JoinOpType::value == TJoinOp::RIGHT_SEMI_JOIN ||
- JoinOpType::value == TJoinOp::RIGHT_ANTI_JOIN ||
- JoinOpType::value == TJoinOp::RIGHT_OUTER_JOIN) {
+ JoinOpType::value == TJoinOp::RIGHT_ANTI_JOIN) {
for (int i = 0; i < column->size(); ++i) {
DCHECK(visited_map[i]);
*visited_map[i] |= column->get_bool(i);
}
+ } else if constexpr (JoinOpType::value == TJoinOp::RIGHT_OUTER_JOIN) {
+ auto filter_size = 0;
+ for (int i = 0; i < column->size(); ++i) {
+ DCHECK(visited_map[i]);
+ auto result = column->get_bool(i);
+ *visited_map[i] |= result;
+ filter_size += result;
+ }
+ _tuple_is_null_left_flags.resize_fill(filter_size, 0);
} else {
// inner join do nothing
}
@@ -642,10 +682,9 @@ struct ProcessHashTableProbe {
if constexpr (JoinOpType::value == TJoinOp::RIGHT_OUTER_JOIN ||
JoinOpType::value == TJoinOp::FULL_OUTER_JOIN) {
for (int i = 0; i < right_col_idx; ++i) {
- for (int j = 0; j < block_size; ++j) {
- assert_cast<ColumnNullable*>(mcol[i].get())->insert_join_null_data();
- }
+ assert_cast<ColumnNullable*>(mcol[i].get())->insert_many_defaults(block_size);
}
+ _tuple_is_null_left_flags.resize_fill(block_size, 1);
}
*eos = iter == hash_table_ctx.hash_table.end();
@@ -667,6 +706,10 @@ private:
std::vector<uint32_t>& _items_counts;
std::vector<int8_t>& _build_block_offsets;
std::vector<int>& _build_block_rows;
+ // only need set the tuple is null in RIGHT_OUTER_JOIN and FULL_OUTER_JOIN
+ ColumnUInt8::Container& _tuple_is_null_left_flags;
+ // only need set the tuple is null in LEFT_OUTER_JOIN and FULL_OUTER_JOIN
+ ColumnUInt8::Container& _tuple_is_null_right_flags;
ProfileCounter* _rows_returned_counter;
ProfileCounter* _search_hashtable_timer;
@@ -726,7 +769,6 @@ Status HashJoinNode::init(const TPlanNode& tnode, RuntimeState* state) {
if (tnode.hash_join_node.join_op == TJoinOp::NULL_AWARE_LEFT_ANTI_JOIN) {
return Status::InternalError("Do not support null aware left anti join");
}
- _row_desc_for_other_join_conjunt = RowDescriptor(child(0)->row_desc(), child(1)->row_desc());
const bool build_stores_null = _join_op == TJoinOp::RIGHT_OUTER_JOIN ||
_join_op == TJoinOp::FULL_OUTER_JOIN ||
@@ -797,6 +839,11 @@ Status HashJoinNode::init(const TPlanNode& tnode, RuntimeState* state) {
init_output_slots_flags(child(0)->row_desc().tuple_descriptors(), _left_output_slot_flags);
init_output_slots_flags(child(1)->row_desc().tuple_descriptors(), _right_output_slot_flags);
+ // only use in outer join as the bool column to mark for function of `tuple_is_null`
+ if (_is_outer_join) {
+ _tuple_is_null_left_flag_column = ColumnUInt8::create();
+ _tuple_is_null_right_flag_column = ColumnUInt8::create();
+ }
return Status::OK();
}
@@ -997,9 +1044,11 @@ Status HashJoinNode::get_next(RuntimeState* state, Block* output_block, bool* eo
return Status::OK();
}
+ _add_tuple_is_null_column(&temp_block);
RETURN_IF_ERROR(
VExprContext::filter_block(_vconjunct_ctx_ptr, &temp_block, temp_block.columns()));
RETURN_IF_ERROR(_build_output_block(&temp_block, output_block));
+ _reset_tuple_is_null_column();
reached_limit(output_block, eos);
return st;
@@ -1439,4 +1488,36 @@ Status HashJoinNode::_build_output_block(Block* origin_block, Block* output_bloc
return Status::OK();
}
+void HashJoinNode::_add_tuple_is_null_column(doris::vectorized::Block* block) {
+ if (_is_outer_join) {
+ auto p0 = _tuple_is_null_left_flag_column->assume_mutable();
+ auto p1 = _tuple_is_null_right_flag_column->assume_mutable();
+ auto& left_null_map = reinterpret_cast<ColumnUInt8&>(*p0);
+ auto& right_null_map = reinterpret_cast<ColumnUInt8&>(*p1);
+ auto left_size = left_null_map.size();
+ auto right_size = right_null_map.size();
+
+ if (left_size == 0) {
+ DCHECK_EQ(right_size, block->rows());
+ left_null_map.get_data().resize_fill(right_size, 0);
+ }
+ if (right_size == 0) {
+ DCHECK_EQ(left_size, block->rows());
+ right_null_map.get_data().resize_fill(left_size, 0);
+ }
+
+ block->insert({std::move(p0), std::make_shared<vectorized::DataTypeUInt8>(),
+ "left_tuples_is_null"});
+ block->insert({std::move(p1), std::make_shared<vectorized::DataTypeUInt8>(),
+ "right_tuples_is_null"});
+ }
+}
+
+void HashJoinNode::_reset_tuple_is_null_column() {
+ if (_is_outer_join) {
+ reinterpret_cast<ColumnUInt8&>(*_tuple_is_null_left_flag_column).clear();
+ reinterpret_cast<ColumnUInt8&>(*_tuple_is_null_right_flag_column).clear();
+ }
+}
+
} // namespace doris::vectorized
diff --git a/be/src/vec/exec/join/vhash_join_node.h b/be/src/vec/exec/join/vhash_join_node.h
index d3034e0bf0..36dec27fbb 100644
--- a/be/src/vec/exec/join/vhash_join_node.h
+++ b/be/src/vec/exec/join/vhash_join_node.h
@@ -230,7 +230,6 @@ private:
const bool _is_outer_join;
bool _have_other_join_conjunct = false;
- RowDescriptor _row_desc_for_other_join_conjunt;
Block _join_block;
std::vector<uint32_t> _items_counts;
@@ -244,6 +243,9 @@ private:
RowDescriptor _intermediate_row_desc;
RowDescriptor _output_row_desc;
+ MutableColumnPtr _tuple_is_null_left_flag_column;
+ MutableColumnPtr _tuple_is_null_right_flag_column;
+
private:
void _hash_table_build_thread(RuntimeState* state, std::promise<Status>* status);
@@ -267,6 +269,12 @@ private:
Status _build_output_block(Block* origin_block, Block* output_block);
+ // add tuple is null flag column to Block for filter conjunct and output expr
+ void _add_tuple_is_null_column(Block* block);
+
+ // reset the tuple is null flag column for the next call
+ void _reset_tuple_is_null_column();
+
static std::vector<uint16_t> _convert_block_to_null(Block& block);
template <class HashTableContext>
diff --git a/be/src/vec/exprs/vtuple_is_null_predicate.cpp b/be/src/vec/exprs/vtuple_is_null_predicate.cpp
index eb5679ec6f..6342dd32dc 100644
--- a/be/src/vec/exprs/vtuple_is_null_predicate.cpp
+++ b/be/src/vec/exprs/vtuple_is_null_predicate.cpp
@@ -28,54 +28,24 @@
namespace doris::vectorized {
VTupleIsNullPredicate::VTupleIsNullPredicate(const TExprNode& node)
- : VExpr(node),
- _expr_name(function_name),
- _tuple_ids(node.tuple_is_null_pred.tuple_ids.begin(),
- node.tuple_is_null_pred.tuple_ids.end()) {}
+ : VExpr(node), _expr_name(function_name) {
+ DCHECK(node.tuple_is_null_pred.__isset.null_side);
+ _is_left_null_side = node.tuple_is_null_pred.null_side == TNullSide::LEFT;
+ _column_to_check = 0;
+}
Status VTupleIsNullPredicate::prepare(RuntimeState* state, const RowDescriptor& desc,
VExprContext* context) {
RETURN_IF_ERROR(VExpr::prepare(state, desc, context));
DCHECK_EQ(0, _children.size());
- DCHECK_GT(_tuple_ids.size(), 0);
-
- _column_to_check.reserve(_tuple_ids.size());
- // Resolve tuple ids to column id, one tuple only need check one column to speed up
- for (auto tuple_id : _tuple_ids) {
- uint32_t loc = 0;
- for (auto& tuple_desc : desc.tuple_descriptors()) {
- if (tuple_desc->id() == tuple_id) {
- _column_to_check.emplace_back(loc);
- break;
- }
- loc += tuple_desc->slots().size();
- }
- }
+ _column_to_check =
+ _is_left_null_side ? desc.num_materialized_slots() : desc.num_materialized_slots() + 1;
return Status::OK();
}
Status VTupleIsNullPredicate::execute(VExprContext* context, Block* block, int* result_column_id) {
- size_t num_columns_without_result = block->columns();
- auto target_rows = block->rows();
- auto ans = ColumnVector<UInt8>::create(target_rows, 1);
- auto* __restrict ans_map = ans->get_data().data();
-
- for (auto col_id : _column_to_check) {
- auto* __restrict null_map =
- reinterpret_cast<const ColumnNullable&>(*block->get_by_position(col_id).column)
- .get_null_map_column()
- .get_data()
- .data();
-
- for (int i = 0; i < target_rows; ++i) {
- ans_map[i] &= null_map[i] == JOIN_NULL_HINT;
- }
- }
-
- // prepare a column to save result
- block->insert({std::move(ans), _data_type, _expr_name});
- *result_column_id = num_columns_without_result;
+ *result_column_id = _column_to_check;
return Status::OK();
}
@@ -86,16 +56,7 @@ const std::string& VTupleIsNullPredicate::expr_name() const {
std::string VTupleIsNullPredicate::debug_string() const {
std::stringstream out;
out << "TupleIsNullPredicate(_column_to_check=[";
-
- bool first = true;
- for (int i = 0; i < _column_to_check.size(); ++i) {
- if (first) {
- out << _column_to_check[i];
- first = false;
- } else {
- out << ", " << _column_to_check[i];
- }
- }
+ out << _column_to_check;
out << "])";
return out.str();
}
diff --git a/be/src/vec/exprs/vtuple_is_null_predicate.h b/be/src/vec/exprs/vtuple_is_null_predicate.h
index 3690b52898..2e83fb88ad 100644
--- a/be/src/vec/exprs/vtuple_is_null_predicate.h
+++ b/be/src/vec/exprs/vtuple_is_null_predicate.h
@@ -41,8 +41,8 @@ public:
private:
std::string _expr_name;
- std::vector<TupleId> _tuple_ids;
- std::vector<uint32_t> _column_to_check;
+ bool _is_left_null_side;
+ uint32_t _column_to_check;
private:
static const constexpr char* function_name = "tuple_is_null";
diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/TupleIsNullPredicate.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/TupleIsNullPredicate.java
index 1add0bbccc..fd92081afd 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/analysis/TupleIsNullPredicate.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/TupleIsNullPredicate.java
@@ -24,6 +24,7 @@ import org.apache.doris.common.AnalysisException;
import org.apache.doris.common.UserException;
import org.apache.doris.thrift.TExprNode;
import org.apache.doris.thrift.TExprNodeType;
+import org.apache.doris.thrift.TNullSide;
import org.apache.doris.thrift.TTupleIsNullPredicate;
import com.google.common.base.Joiner;
@@ -41,17 +42,22 @@ import java.util.Objects;
* The given tupleIds must be materialized and nullable at the appropriate PlanNode.
*/
public class TupleIsNullPredicate extends Predicate {
-
private List<TupleId> tupleIds = Lists.newArrayList();
+ private TNullSide nullSide = null;
public TupleIsNullPredicate(List<TupleId> tupleIds) {
Preconditions.checkState(tupleIds != null && !tupleIds.isEmpty());
this.tupleIds.addAll(tupleIds);
}
+ public TupleIsNullPredicate(TNullSide nullSide) {
+ this.nullSide = nullSide;
+ }
+
protected TupleIsNullPredicate(TupleIsNullPredicate other) {
super(other);
tupleIds.addAll(other.tupleIds);
+ nullSide = other.nullSide;
}
@Override
@@ -83,9 +89,13 @@ public class TupleIsNullPredicate extends Predicate {
protected void toThrift(TExprNode msg) {
msg.node_type = TExprNodeType.TUPLE_IS_NULL_PRED;
msg.tuple_is_null_pred = new TTupleIsNullPredicate();
+ msg.tuple_is_null_pred.setTupleIds(Lists.newArrayList());
for (TupleId tid : tupleIds) {
msg.tuple_is_null_pred.addToTupleIds(tid.asInt());
}
+ if (nullSide != null) {
+ msg.tuple_is_null_pred.setNullSide(nullSide);
+ }
}
public List<TupleId> getTupleIds() {
@@ -135,6 +145,26 @@ public class TupleIsNullPredicate extends Predicate {
return result;
}
+ /**
+ * Makes each input expr nullable, if necessary, by wrapping it as follows:
+ * IF(TupleIsNull(nullSide), NULL, expr)
+ * <p>
+ * The given inputExprs are expected to be bound
+ * by null side tuple id once fully substituted against base tables. However, inputExprs may not yet
+ * be fully substituted at this point.
+ * <p>
+ * Returns a new list with the nullable exprs. only use in vectorized exec engine
+ */
+ public static List<Expr> wrapExprs(List<Expr> inputExprs,
+ TNullSide nullSide, Analyzer analyzer) throws UserException {
+ // Perform the wrapping.
+ List<Expr> result = Lists.newArrayListWithCapacity(inputExprs.size());
+ for (Expr e : inputExprs) {
+ result.add(wrapExpr(e, nullSide, analyzer));
+ }
+ return result;
+ }
+
/**
* Returns a new analyzed conditional expr 'IF(TupleIsNull(tids), NULL, expr)',
* if required to make expr nullable. Otherwise, returns expr.
@@ -162,6 +192,33 @@ public class TupleIsNullPredicate extends Predicate {
return ifExpr;
}
+ /**
+ * Returns a new analyzed conditional expr 'IF(TupleIsNull(nullSide), NULL, expr)',
+ * if required to make expr nullable. Otherwise, returns expr. only use in vectorized exec engine
+ */
+ public static Expr wrapExpr(Expr expr, TNullSide nullSide, Analyzer analyzer)
+ throws UserException {
+ if (!requiresNullWrapping(expr, analyzer)) {
+ return expr;
+ }
+ List<Expr> params = Lists.newArrayList();
+ params.add(new TupleIsNullPredicate(nullSide));
+ params.add(new NullLiteral());
+ params.add(expr);
+ Expr ifExpr = new FunctionCallExpr("if", params);
+ ifExpr.analyzeNoThrow(analyzer);
+ // The type of function which is different from the type of expr will return the incorrect result in query.
+ // Example:
+ // the type of expr is date
+ // the type of function is int
+ // So, the upper fragment will receive a int value instead of date while the result expr is date.
+ // If there is no cast function, the result of query will be incorrect.
+ if (expr.getType().getPrimitiveType() != ifExpr.getType().getPrimitiveType()) {
+ ifExpr = ifExpr.uncheckedCastTo(expr.getType());
+ }
+ return ifExpr;
+ }
+
/**
* Returns true if the given expr evaluates to a non-NULL value if all its contained
* SlotRefs evaluate to NULL, false otherwise.
diff --git a/fe/fe-core/src/main/java/org/apache/doris/planner/HashJoinNode.java b/fe/fe-core/src/main/java/org/apache/doris/planner/HashJoinNode.java
index 78eb563ec7..f835922cb8 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/planner/HashJoinNode.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/planner/HashJoinNode.java
@@ -46,6 +46,7 @@ import org.apache.doris.statistics.StatsRecursiveDerive;
import org.apache.doris.thrift.TEqJoinCondition;
import org.apache.doris.thrift.TExplainLevel;
import org.apache.doris.thrift.THashJoinNode;
+import org.apache.doris.thrift.TNullSide;
import org.apache.doris.thrift.TPlanNode;
import org.apache.doris.thrift.TPlanNodeType;
@@ -486,7 +487,7 @@ public class HashJoinNode extends PlanNode {
// Then: add tuple is null in left child columns
if (leftNullable && getChild(0).tblRefIds.size() == 1 && analyzer.isInlineView(getChild(0).tblRefIds.get(0))) {
List<Expr> tupleIsNullLhs = TupleIsNullPredicate
- .wrapExprs(vSrcToOutputSMap.getLhs().subList(0, leftNullableNumber), getChild(0).getTupleIds(),
+ .wrapExprs(vSrcToOutputSMap.getLhs().subList(0, leftNullableNumber), TNullSide.LEFT,
analyzer);
tupleIsNullLhs
.addAll(vSrcToOutputSMap.getLhs().subList(leftNullableNumber, vSrcToOutputSMap.getLhs().size()));
@@ -500,7 +501,7 @@ public class HashJoinNode extends PlanNode {
int rightBeginIndex = vSrcToOutputSMap.size() - rightNullableNumber;
List<Expr> tupleIsNullLhs = TupleIsNullPredicate
.wrapExprs(vSrcToOutputSMap.getLhs().subList(rightBeginIndex, vSrcToOutputSMap.size()),
- getChild(1).getTupleIds(), analyzer);
+ TNullSide.RIGHT, analyzer);
List<Expr> newLhsList = Lists.newArrayList();
if (rightBeginIndex > 0) {
newLhsList.addAll(vSrcToOutputSMap.getLhs().subList(0, rightBeginIndex));
diff --git a/gensrc/thrift/Exprs.thrift b/gensrc/thrift/Exprs.thrift
index 50c9119410..c2211631e1 100644
--- a/gensrc/thrift/Exprs.thrift
+++ b/gensrc/thrift/Exprs.thrift
@@ -121,8 +121,14 @@ struct TLiteralPredicate {
2: required bool is_null
}
+enum TNullSide {
+ LEFT,
+ RIGHT
+}
+
struct TTupleIsNullPredicate {
1: required list<Types.TTupleId> tuple_ids
+ 2: optional TNullSide null_side
}
struct TSlotRef {
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@doris.apache.org
For additional commands, e-mail: commits-help@doris.apache.org