You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@doris.apache.org by yi...@apache.org on 2023/06/09 00:59:58 UTC

[doris] branch master updated: [refactor](profile) refactor the join profile when its shared hash table (#20391)

This is an automated email from the ASF dual-hosted git repository.

yiguolei pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new b60860c5e5 [refactor](profile) refactor the join profile when its shared hash table (#20391)
b60860c5e5 is described below

commit b60860c5e532ae54b6965736d4c4b8602aa99f29
Author: zhangstar333 <87...@users.noreply.github.com>
AuthorDate: Fri Jun 9 08:59:49 2023 +0800

    [refactor](profile) refactor the join profile when its shared hash table (#20391)
    
    in join node, if it's broadcast_join
    and shared hash table, some counter/timer about build hash table is useless,
    so we could add those counter/timer in faker profile, and those will not display in web profile.
---
 be/src/exec/exec_node.h                  |  8 +++++
 be/src/vec/exec/join/vhash_join_node.cpp | 62 ++++++++++++++++++--------------
 2 files changed, 43 insertions(+), 27 deletions(-)

diff --git a/be/src/exec/exec_node.h b/be/src/exec/exec_node.h
index 7f709de0e6..a2abd0eacb 100644
--- a/be/src/exec/exec_node.h
+++ b/be/src/exec/exec_node.h
@@ -227,6 +227,7 @@ public:
     void reached_limit(vectorized::Block* block, bool* eos);
     const std::vector<TupleId>& get_tuple_ids() const { return _tuple_ids; }
 
+    RuntimeProfile* faker_runtime_profile() const { return _faker_runtime_profile.get(); }
     RuntimeProfile* runtime_profile() const { return _runtime_profile.get(); }
     RuntimeProfile::Counter* memory_used_counter() const { return _memory_used_counter; }
 
@@ -289,6 +290,13 @@ protected:
     //
     OpentelemetrySpan _span;
 
+    //NOTICE: now add a faker profile, because sometimes the profile record is useless
+    //so we want remove some counters and timers, eg: in join node, if it's broadcast_join
+    //and shared hash table, some counter/timer about build hash table is useless,
+    //so we could add those counter/timer in faker profile, and those will not display in web profile.
+    std::unique_ptr<RuntimeProfile> _faker_runtime_profile =
+            std::make_unique<RuntimeProfile>("faker profile");
+
     // Execution options that are determined at runtime.  This is added to the
     // runtime profile at close().  Examples for options logged here would be
     // "Codegen Enabled"
diff --git a/be/src/vec/exec/join/vhash_join_node.cpp b/be/src/vec/exec/join/vhash_join_node.cpp
index 670953252d..c2b9d915ec 100644
--- a/be/src/vec/exec/join/vhash_join_node.cpp
+++ b/be/src/vec/exec/join/vhash_join_node.cpp
@@ -28,6 +28,7 @@
 #include <boost/iterator/iterator_facade.hpp>
 #include <functional>
 #include <map>
+#include <memory>
 #include <new>
 #include <ostream>
 #include <type_traits>
@@ -421,9 +422,30 @@ Status HashJoinNode::init(const TPlanNode& tnode, RuntimeState* state) {
 
 Status HashJoinNode::prepare(RuntimeState* state) {
     RETURN_IF_ERROR(VJoinNodeBase::prepare(state));
+    _should_build_hash_table = true;
+    if (_is_broadcast_join) {
+        runtime_profile()->add_info_string("BroadcastJoin", "true");
+        if (state->enable_share_hash_table_for_broadcast_join()) {
+            runtime_profile()->add_info_string("ShareHashTableEnabled", "true");
+            _shared_hashtable_controller =
+                    state->get_query_ctx()->get_shared_hash_table_controller();
+            _shared_hash_table_context = _shared_hashtable_controller->get_context(id());
+            _should_build_hash_table = _shared_hashtable_controller->should_build_hash_table(
+                    state->fragment_instance_id(), id());
+        } else {
+            runtime_profile()->add_info_string("ShareHashTableEnabled", "false");
+        }
+    }
+
+    //some profile record of build phase are useless when it's shared hash table so add in faker profile
+    RuntimeProfile* memory_usage = nullptr;
+    if (_should_build_hash_table) {
+        memory_usage = runtime_profile()->create_child("PeakMemoryUsage", true, true);
+        runtime_profile()->add_child(memory_usage, false, nullptr);
+    } else {
+        memory_usage = faker_runtime_profile();
+    }
 
-    auto* memory_usage = runtime_profile()->create_child("PeakMemoryUsage", true, true);
-    runtime_profile()->add_child(memory_usage, false, nullptr);
     _build_blocks_memory_usage = ADD_COUNTER(memory_usage, "BuildBlocks", TUnit::BYTES);
     _hash_table_memory_usage = ADD_COUNTER(memory_usage, "HashTable", TUnit::BYTES);
     _build_arena_memory_usage =
@@ -436,16 +458,17 @@ Status HashJoinNode::prepare(RuntimeState* state) {
     runtime_profile()->add_child(_build_phase_profile, false, nullptr);
     _build_get_next_timer = ADD_TIMER(_build_phase_profile, "BuildGetNextTime");
     _build_timer = ADD_TIMER(_build_phase_profile, "BuildTime");
-    _build_table_timer = ADD_TIMER(_build_phase_profile, "BuildTableTime");
-    _build_side_merge_block_timer = ADD_TIMER(_build_phase_profile, "BuildSideMergeBlockTime");
-    _build_table_insert_timer = ADD_TIMER(_build_phase_profile, "BuildTableInsertTime");
-    _build_expr_call_timer = ADD_TIMER(_build_phase_profile, "BuildExprCallTime");
-    _build_table_expanse_timer = ADD_TIMER(_build_phase_profile, "BuildTableExpanseTime");
-    _build_table_convert_timer =
-            ADD_TIMER(_build_phase_profile, "BuildTableConvertToPartitionedTime");
-    _build_rows_counter = ADD_COUNTER(_build_phase_profile, "BuildRows", TUnit::UNIT);
-    _build_side_compute_hash_timer = ADD_TIMER(_build_phase_profile, "BuildSideHashComputingTime");
-    _build_runtime_filter_timer = ADD_TIMER(_build_phase_profile, "BuildRuntimeFilterTime");
+
+    auto record_profile = _should_build_hash_table ? _build_phase_profile : faker_runtime_profile();
+    _build_table_timer = ADD_TIMER(record_profile, "BuildTableTime");
+    _build_side_merge_block_timer = ADD_TIMER(record_profile, "BuildSideMergeBlockTime");
+    _build_table_insert_timer = ADD_TIMER(record_profile, "BuildTableInsertTime");
+    _build_expr_call_timer = ADD_TIMER(record_profile, "BuildExprCallTime");
+    _build_table_expanse_timer = ADD_TIMER(record_profile, "BuildTableExpanseTime");
+    _build_table_convert_timer = ADD_TIMER(record_profile, "BuildTableConvertToPartitionedTime");
+    _build_rows_counter = ADD_COUNTER(record_profile, "BuildRows", TUnit::UNIT);
+    _build_side_compute_hash_timer = ADD_TIMER(record_profile, "BuildSideHashComputingTime");
+    _build_runtime_filter_timer = ADD_TIMER(record_profile, "BuildRuntimeFilterTime");
 
     // Probe phase
     auto probe_phase_profile = runtime_profile()->create_child("ProbePhase", true, true);
@@ -466,21 +489,6 @@ Status HashJoinNode::prepare(RuntimeState* state) {
     _build_buckets_counter = ADD_COUNTER(runtime_profile(), "BuildBuckets", TUnit::UNIT);
     _build_buckets_fill_counter = ADD_COUNTER(runtime_profile(), "FilledBuckets", TUnit::UNIT);
 
-    _should_build_hash_table = true;
-    if (_is_broadcast_join) {
-        runtime_profile()->add_info_string("BroadcastJoin", "true");
-        if (state->enable_share_hash_table_for_broadcast_join()) {
-            runtime_profile()->add_info_string("ShareHashTableEnabled", "true");
-            _shared_hashtable_controller =
-                    state->get_query_ctx()->get_shared_hash_table_controller();
-            _shared_hash_table_context = _shared_hashtable_controller->get_context(id());
-            _should_build_hash_table = _shared_hashtable_controller->should_build_hash_table(
-                    state->fragment_instance_id(), id());
-        } else {
-            runtime_profile()->add_info_string("ShareHashTableEnabled", "false");
-        }
-    }
-
     RETURN_IF_ERROR(VExpr::prepare(_build_expr_ctxs, state, child(1)->row_desc()));
     RETURN_IF_ERROR(VExpr::prepare(_probe_expr_ctxs, state, child(0)->row_desc()));
 


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@doris.apache.org
For additional commands, e-mail: commits-help@doris.apache.org