You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@doris.apache.org by pa...@apache.org on 2023/06/12 08:01:21 UTC

[doris] branch master updated: [Opt](join) short circuit probe for join node (#20585)

This is an automated email from the ASF dual-hosted git repository.

panxiaolei pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new ea264ce9de [Opt](join) short circuit probe for join node (#20585)
ea264ce9de is described below

commit ea264ce9de3329d661a1e89a8d33d4496118fb64
Author: HappenLee <ha...@hotmail.com>
AuthorDate: Mon Jun 12 16:01:09 2023 +0800

    [Opt](join) short circuit probe for join node (#20585)
    
    Support the _short_circuit_for_probe for join node
---
 be/src/pipeline/exec/operator.h          |  3 ++-
 be/src/vec/exec/join/vhash_join_node.cpp | 16 ++++++++--------
 be/src/vec/exec/join/vhash_join_node.h   | 11 +++++++++++
 be/src/vec/exec/join/vjoin_node_base.h   |  7 +++++++
 4 files changed, 28 insertions(+), 9 deletions(-)

diff --git a/be/src/pipeline/exec/operator.h b/be/src/pipeline/exec/operator.h
index 2ef903b0e4..0a31435b8f 100644
--- a/be/src/pipeline/exec/operator.h
+++ b/be/src/pipeline/exec/operator.h
@@ -419,7 +419,8 @@ public:
                 return Status::OK();
             }
             node->prepare_for_next();
-            node->push(state, _child_block.get(), _child_source_state == SourceState::FINISHED);
+            RETURN_IF_ERROR(node->push(state, _child_block.get(),
+                                       _child_source_state == SourceState::FINISHED));
         }
 
         if (!node->need_more_input_data()) {
diff --git a/be/src/vec/exec/join/vhash_join_node.cpp b/be/src/vec/exec/join/vhash_join_node.cpp
index 1628af157e..2dfb93a98d 100644
--- a/be/src/vec/exec/join/vhash_join_node.cpp
+++ b/be/src/vec/exec/join/vhash_join_node.cpp
@@ -521,7 +521,7 @@ Status HashJoinNode::close(RuntimeState* state) {
 
 bool HashJoinNode::need_more_input_data() const {
     return (_probe_block.rows() == 0 || _probe_index == _probe_block.rows()) && !_probe_eos &&
-           !_short_circuit_for_null_in_probe_side;
+           !_short_circuit_for_probe;
 }
 
 void HashJoinNode::prepare_for_next() {
@@ -531,9 +531,8 @@ void HashJoinNode::prepare_for_next() {
 
 Status HashJoinNode::pull(doris::RuntimeState* state, vectorized::Block* output_block, bool* eos) {
     SCOPED_TIMER(_probe_timer);
-    if (_short_circuit_for_null_in_probe_side) {
-        // If we use a short-circuit strategy for null value in build side (e.g. if join operator is
-        // NULL_AWARE_LEFT_ANTI_JOIN), we should return empty block directly.
+    if (_short_circuit_for_probe) {
+        // If we use a short-circuit strategy, should return empty block directly.
         *eos = true;
         return Status::OK();
     }
@@ -664,9 +663,8 @@ Status HashJoinNode::push(RuntimeState* /*state*/, vectorized::Block* input_bloc
 Status HashJoinNode::get_next(RuntimeState* state, Block* output_block, bool* eos) {
     SCOPED_TIMER(_runtime_profile->total_time_counter());
 
-    if (_short_circuit_for_null_in_probe_side) {
-        // If we use a short-circuit strategy for null value in build side (e.g. if join operator is
-        // NULL_AWARE_LEFT_ANTI_JOIN), we should return empty block directly.
+    if (_short_circuit_for_probe) {
+        // If we use a short-circuit strategy, should return empty block directly.
         *eos = true;
         return Status::OK();
     }
@@ -952,6 +950,8 @@ Status HashJoinNode::sink(doris::RuntimeState* state, vectorized::Block* in_bloc
     if (!_build_blocks->empty() && _join_op == TJoinOp::NULL_AWARE_LEFT_ANTI_JOIN) {
         _probe_ignore_null = true;
     }
+    _init_short_circuit_for_probe();
+
     return Status::OK();
 }
 
@@ -960,7 +960,7 @@ void HashJoinNode::debug_string(int indentation_level, std::stringstream* out) c
     *out << "HashJoin(need_more_input_data=" << (need_more_input_data() ? "true" : "false")
          << " _probe_block.rows()=" << _probe_block.rows() << " _probe_index=" << _probe_index
          << " _probe_eos=" << _probe_eos
-         << " _short_circuit_for_null_in_probe_side=" << _short_circuit_for_null_in_probe_side;
+         << " _short_circuit_for_probe_side=" << _short_circuit_for_probe;
     *out << ")\n children=(";
     ExecNode::debug_string(indentation_level, out);
     *out << ")";
diff --git a/be/src/vec/exec/join/vhash_join_node.h b/be/src/vec/exec/join/vhash_join_node.h
index 7430bd6ab1..286a0783a6 100644
--- a/be/src/vec/exec/join/vhash_join_node.h
+++ b/be/src/vec/exec/join/vhash_join_node.h
@@ -253,6 +253,17 @@ public:
     bool should_build_hash_table() const { return _should_build_hash_table; }
 
 private:
+    void _init_short_circuit_for_probe() override {
+        _short_circuit_for_probe =
+                (_short_circuit_for_null_in_probe_side &&
+                 _join_op == TJoinOp::NULL_AWARE_LEFT_ANTI_JOIN) ||
+                (_build_blocks->empty() && _join_op == TJoinOp::INNER_JOIN && !_is_mark_join) ||
+                (_build_blocks->empty() && _join_op == TJoinOp::LEFT_SEMI_JOIN && !_is_mark_join) ||
+                (_build_blocks->empty() && _join_op == TJoinOp::RIGHT_OUTER_JOIN) ||
+                (_build_blocks->empty() && _join_op == TJoinOp::RIGHT_SEMI_JOIN) ||
+                (_build_blocks->empty() && _join_op == TJoinOp::RIGHT_ANTI_JOIN);
+    }
+
     // probe expr
     VExprContextSPtrs _probe_expr_ctxs;
     // build expr
diff --git a/be/src/vec/exec/join/vjoin_node_base.h b/be/src/vec/exec/join/vjoin_node_base.h
index 757670b31e..ce644c159e 100644
--- a/be/src/vec/exec/join/vjoin_node_base.h
+++ b/be/src/vec/exec/join/vjoin_node_base.h
@@ -93,6 +93,8 @@ protected:
     // Materialize build relation. For HashJoin, it will build a hash table while a list of build blocks for NLJoin.
     virtual Status _materialize_build_side(RuntimeState* state) = 0;
 
+    virtual void _init_short_circuit_for_probe() { _short_circuit_for_probe = false; }
+
     TJoinOp::type _join_op;
     JoinOpVariants _join_op_variants;
 
@@ -113,6 +115,11 @@ protected:
     const bool _short_circuit_for_null_in_build_side = false;
     bool _short_circuit_for_null_in_probe_side = false;
 
+    // For some join case, we can apply a short circuit strategy
+    // 1. _short_circuit_for_null_in_probe_side = true
+    // 2. build side rows is empty, Join op is: inner join/right outer join/left semi/right semi/right anti
+    bool _short_circuit_for_probe = false;
+
     std::unique_ptr<RowDescriptor> _output_row_desc;
     std::unique_ptr<RowDescriptor> _intermediate_row_desc;
     // output expr


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@doris.apache.org
For additional commands, e-mail: commits-help@doris.apache.org