You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@doris.apache.org by pa...@apache.org on 2023/06/12 08:01:21 UTC
[doris] branch master updated: [Opt](join) short circuit probe for join node (#20585)
This is an automated email from the ASF dual-hosted git repository.
panxiaolei pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new ea264ce9de [Opt](join) short circuit probe for join node (#20585)
ea264ce9de is described below
commit ea264ce9de3329d661a1e89a8d33d4496118fb64
Author: HappenLee <ha...@hotmail.com>
AuthorDate: Mon Jun 12 16:01:09 2023 +0800
[Opt](join) short circuit probe for join node (#20585)
Support the _short_circuit_for_probe for join node
---
be/src/pipeline/exec/operator.h | 3 ++-
be/src/vec/exec/join/vhash_join_node.cpp | 16 ++++++++--------
be/src/vec/exec/join/vhash_join_node.h | 11 +++++++++++
be/src/vec/exec/join/vjoin_node_base.h | 7 +++++++
4 files changed, 28 insertions(+), 9 deletions(-)
diff --git a/be/src/pipeline/exec/operator.h b/be/src/pipeline/exec/operator.h
index 2ef903b0e4..0a31435b8f 100644
--- a/be/src/pipeline/exec/operator.h
+++ b/be/src/pipeline/exec/operator.h
@@ -419,7 +419,8 @@ public:
return Status::OK();
}
node->prepare_for_next();
- node->push(state, _child_block.get(), _child_source_state == SourceState::FINISHED);
+ RETURN_IF_ERROR(node->push(state, _child_block.get(),
+ _child_source_state == SourceState::FINISHED));
}
if (!node->need_more_input_data()) {
diff --git a/be/src/vec/exec/join/vhash_join_node.cpp b/be/src/vec/exec/join/vhash_join_node.cpp
index 1628af157e..2dfb93a98d 100644
--- a/be/src/vec/exec/join/vhash_join_node.cpp
+++ b/be/src/vec/exec/join/vhash_join_node.cpp
@@ -521,7 +521,7 @@ Status HashJoinNode::close(RuntimeState* state) {
bool HashJoinNode::need_more_input_data() const {
return (_probe_block.rows() == 0 || _probe_index == _probe_block.rows()) && !_probe_eos &&
- !_short_circuit_for_null_in_probe_side;
+ !_short_circuit_for_probe;
}
void HashJoinNode::prepare_for_next() {
@@ -531,9 +531,8 @@ void HashJoinNode::prepare_for_next() {
Status HashJoinNode::pull(doris::RuntimeState* state, vectorized::Block* output_block, bool* eos) {
SCOPED_TIMER(_probe_timer);
- if (_short_circuit_for_null_in_probe_side) {
- // If we use a short-circuit strategy for null value in build side (e.g. if join operator is
- // NULL_AWARE_LEFT_ANTI_JOIN), we should return empty block directly.
+ if (_short_circuit_for_probe) {
+ // If we use a short-circuit strategy, should return empty block directly.
*eos = true;
return Status::OK();
}
@@ -664,9 +663,8 @@ Status HashJoinNode::push(RuntimeState* /*state*/, vectorized::Block* input_bloc
Status HashJoinNode::get_next(RuntimeState* state, Block* output_block, bool* eos) {
SCOPED_TIMER(_runtime_profile->total_time_counter());
- if (_short_circuit_for_null_in_probe_side) {
- // If we use a short-circuit strategy for null value in build side (e.g. if join operator is
- // NULL_AWARE_LEFT_ANTI_JOIN), we should return empty block directly.
+ if (_short_circuit_for_probe) {
+ // If we use a short-circuit strategy, should return empty block directly.
*eos = true;
return Status::OK();
}
@@ -952,6 +950,8 @@ Status HashJoinNode::sink(doris::RuntimeState* state, vectorized::Block* in_bloc
if (!_build_blocks->empty() && _join_op == TJoinOp::NULL_AWARE_LEFT_ANTI_JOIN) {
_probe_ignore_null = true;
}
+ _init_short_circuit_for_probe();
+
return Status::OK();
}
@@ -960,7 +960,7 @@ void HashJoinNode::debug_string(int indentation_level, std::stringstream* out) c
*out << "HashJoin(need_more_input_data=" << (need_more_input_data() ? "true" : "false")
<< " _probe_block.rows()=" << _probe_block.rows() << " _probe_index=" << _probe_index
<< " _probe_eos=" << _probe_eos
- << " _short_circuit_for_null_in_probe_side=" << _short_circuit_for_null_in_probe_side;
+ << " _short_circuit_for_probe_side=" << _short_circuit_for_probe;
*out << ")\n children=(";
ExecNode::debug_string(indentation_level, out);
*out << ")";
diff --git a/be/src/vec/exec/join/vhash_join_node.h b/be/src/vec/exec/join/vhash_join_node.h
index 7430bd6ab1..286a0783a6 100644
--- a/be/src/vec/exec/join/vhash_join_node.h
+++ b/be/src/vec/exec/join/vhash_join_node.h
@@ -253,6 +253,17 @@ public:
bool should_build_hash_table() const { return _should_build_hash_table; }
private:
+ void _init_short_circuit_for_probe() override {
+ _short_circuit_for_probe =
+ (_short_circuit_for_null_in_probe_side &&
+ _join_op == TJoinOp::NULL_AWARE_LEFT_ANTI_JOIN) ||
+ (_build_blocks->empty() && _join_op == TJoinOp::INNER_JOIN && !_is_mark_join) ||
+ (_build_blocks->empty() && _join_op == TJoinOp::LEFT_SEMI_JOIN && !_is_mark_join) ||
+ (_build_blocks->empty() && _join_op == TJoinOp::RIGHT_OUTER_JOIN) ||
+ (_build_blocks->empty() && _join_op == TJoinOp::RIGHT_SEMI_JOIN) ||
+ (_build_blocks->empty() && _join_op == TJoinOp::RIGHT_ANTI_JOIN);
+ }
+
// probe expr
VExprContextSPtrs _probe_expr_ctxs;
// build expr
diff --git a/be/src/vec/exec/join/vjoin_node_base.h b/be/src/vec/exec/join/vjoin_node_base.h
index 757670b31e..ce644c159e 100644
--- a/be/src/vec/exec/join/vjoin_node_base.h
+++ b/be/src/vec/exec/join/vjoin_node_base.h
@@ -93,6 +93,8 @@ protected:
// Materialize build relation. For HashJoin, it will build a hash table while a list of build blocks for NLJoin.
virtual Status _materialize_build_side(RuntimeState* state) = 0;
+ virtual void _init_short_circuit_for_probe() { _short_circuit_for_probe = false; }
+
TJoinOp::type _join_op;
JoinOpVariants _join_op_variants;
@@ -113,6 +115,11 @@ protected:
const bool _short_circuit_for_null_in_build_side = false;
bool _short_circuit_for_null_in_probe_side = false;
+ // For some join case, we can apply a short circuit strategy
+ // 1. _short_circuit_for_null_in_probe_side = true
+ // 2. build side rows is empty, Join op is: inner join/right outer join/left semi/right semi/right anti
+ bool _short_circuit_for_probe = false;
+
std::unique_ptr<RowDescriptor> _output_row_desc;
std::unique_ptr<RowDescriptor> _intermediate_row_desc;
// output expr
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@doris.apache.org
For additional commands, e-mail: commits-help@doris.apache.org