You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@doris.apache.org by yi...@apache.org on 2023/06/09 00:59:58 UTC
[doris] branch master updated: [refactor](profile) refactor the join profile when its shared hash table (#20391)
This is an automated email from the ASF dual-hosted git repository.
yiguolei pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new b60860c5e5 [refactor](profile) refactor the join profile when its shared hash table (#20391)
b60860c5e5 is described below
commit b60860c5e532ae54b6965736d4c4b8602aa99f29
Author: zhangstar333 <87...@users.noreply.github.com>
AuthorDate: Fri Jun 9 08:59:49 2023 +0800
[refactor](profile) refactor the join profile when its shared hash table (#20391)
in join node, if it's broadcast_join
and shared hash table, some counter/timer about build hash table is useless,
so we could add those counter/timer in faker profile, and those will not display in web profile.
---
be/src/exec/exec_node.h | 8 +++++
be/src/vec/exec/join/vhash_join_node.cpp | 62 ++++++++++++++++++--------------
2 files changed, 43 insertions(+), 27 deletions(-)
diff --git a/be/src/exec/exec_node.h b/be/src/exec/exec_node.h
index 7f709de0e6..a2abd0eacb 100644
--- a/be/src/exec/exec_node.h
+++ b/be/src/exec/exec_node.h
@@ -227,6 +227,7 @@ public:
void reached_limit(vectorized::Block* block, bool* eos);
const std::vector<TupleId>& get_tuple_ids() const { return _tuple_ids; }
+ RuntimeProfile* faker_runtime_profile() const { return _faker_runtime_profile.get(); }
RuntimeProfile* runtime_profile() const { return _runtime_profile.get(); }
RuntimeProfile::Counter* memory_used_counter() const { return _memory_used_counter; }
@@ -289,6 +290,13 @@ protected:
//
OpentelemetrySpan _span;
+ //NOTICE: now add a faker profile, because sometimes the profile record is useless
+ //so we want remove some counters and timers, eg: in join node, if it's broadcast_join
+ //and shared hash table, some counter/timer about build hash table is useless,
+ //so we could add those counter/timer in faker profile, and those will not display in web profile.
+ std::unique_ptr<RuntimeProfile> _faker_runtime_profile =
+ std::make_unique<RuntimeProfile>("faker profile");
+
// Execution options that are determined at runtime. This is added to the
// runtime profile at close(). Examples for options logged here would be
// "Codegen Enabled"
diff --git a/be/src/vec/exec/join/vhash_join_node.cpp b/be/src/vec/exec/join/vhash_join_node.cpp
index 670953252d..c2b9d915ec 100644
--- a/be/src/vec/exec/join/vhash_join_node.cpp
+++ b/be/src/vec/exec/join/vhash_join_node.cpp
@@ -28,6 +28,7 @@
#include <boost/iterator/iterator_facade.hpp>
#include <functional>
#include <map>
+#include <memory>
#include <new>
#include <ostream>
#include <type_traits>
@@ -421,9 +422,30 @@ Status HashJoinNode::init(const TPlanNode& tnode, RuntimeState* state) {
Status HashJoinNode::prepare(RuntimeState* state) {
RETURN_IF_ERROR(VJoinNodeBase::prepare(state));
+ _should_build_hash_table = true;
+ if (_is_broadcast_join) {
+ runtime_profile()->add_info_string("BroadcastJoin", "true");
+ if (state->enable_share_hash_table_for_broadcast_join()) {
+ runtime_profile()->add_info_string("ShareHashTableEnabled", "true");
+ _shared_hashtable_controller =
+ state->get_query_ctx()->get_shared_hash_table_controller();
+ _shared_hash_table_context = _shared_hashtable_controller->get_context(id());
+ _should_build_hash_table = _shared_hashtable_controller->should_build_hash_table(
+ state->fragment_instance_id(), id());
+ } else {
+ runtime_profile()->add_info_string("ShareHashTableEnabled", "false");
+ }
+ }
+
+ //some profile record of build phase are useless when it's shared hash table so add in faker profile
+ RuntimeProfile* memory_usage = nullptr;
+ if (_should_build_hash_table) {
+ memory_usage = runtime_profile()->create_child("PeakMemoryUsage", true, true);
+ runtime_profile()->add_child(memory_usage, false, nullptr);
+ } else {
+ memory_usage = faker_runtime_profile();
+ }
- auto* memory_usage = runtime_profile()->create_child("PeakMemoryUsage", true, true);
- runtime_profile()->add_child(memory_usage, false, nullptr);
_build_blocks_memory_usage = ADD_COUNTER(memory_usage, "BuildBlocks", TUnit::BYTES);
_hash_table_memory_usage = ADD_COUNTER(memory_usage, "HashTable", TUnit::BYTES);
_build_arena_memory_usage =
@@ -436,16 +458,17 @@ Status HashJoinNode::prepare(RuntimeState* state) {
runtime_profile()->add_child(_build_phase_profile, false, nullptr);
_build_get_next_timer = ADD_TIMER(_build_phase_profile, "BuildGetNextTime");
_build_timer = ADD_TIMER(_build_phase_profile, "BuildTime");
- _build_table_timer = ADD_TIMER(_build_phase_profile, "BuildTableTime");
- _build_side_merge_block_timer = ADD_TIMER(_build_phase_profile, "BuildSideMergeBlockTime");
- _build_table_insert_timer = ADD_TIMER(_build_phase_profile, "BuildTableInsertTime");
- _build_expr_call_timer = ADD_TIMER(_build_phase_profile, "BuildExprCallTime");
- _build_table_expanse_timer = ADD_TIMER(_build_phase_profile, "BuildTableExpanseTime");
- _build_table_convert_timer =
- ADD_TIMER(_build_phase_profile, "BuildTableConvertToPartitionedTime");
- _build_rows_counter = ADD_COUNTER(_build_phase_profile, "BuildRows", TUnit::UNIT);
- _build_side_compute_hash_timer = ADD_TIMER(_build_phase_profile, "BuildSideHashComputingTime");
- _build_runtime_filter_timer = ADD_TIMER(_build_phase_profile, "BuildRuntimeFilterTime");
+
+ auto record_profile = _should_build_hash_table ? _build_phase_profile : faker_runtime_profile();
+ _build_table_timer = ADD_TIMER(record_profile, "BuildTableTime");
+ _build_side_merge_block_timer = ADD_TIMER(record_profile, "BuildSideMergeBlockTime");
+ _build_table_insert_timer = ADD_TIMER(record_profile, "BuildTableInsertTime");
+ _build_expr_call_timer = ADD_TIMER(record_profile, "BuildExprCallTime");
+ _build_table_expanse_timer = ADD_TIMER(record_profile, "BuildTableExpanseTime");
+ _build_table_convert_timer = ADD_TIMER(record_profile, "BuildTableConvertToPartitionedTime");
+ _build_rows_counter = ADD_COUNTER(record_profile, "BuildRows", TUnit::UNIT);
+ _build_side_compute_hash_timer = ADD_TIMER(record_profile, "BuildSideHashComputingTime");
+ _build_runtime_filter_timer = ADD_TIMER(record_profile, "BuildRuntimeFilterTime");
// Probe phase
auto probe_phase_profile = runtime_profile()->create_child("ProbePhase", true, true);
@@ -466,21 +489,6 @@ Status HashJoinNode::prepare(RuntimeState* state) {
_build_buckets_counter = ADD_COUNTER(runtime_profile(), "BuildBuckets", TUnit::UNIT);
_build_buckets_fill_counter = ADD_COUNTER(runtime_profile(), "FilledBuckets", TUnit::UNIT);
- _should_build_hash_table = true;
- if (_is_broadcast_join) {
- runtime_profile()->add_info_string("BroadcastJoin", "true");
- if (state->enable_share_hash_table_for_broadcast_join()) {
- runtime_profile()->add_info_string("ShareHashTableEnabled", "true");
- _shared_hashtable_controller =
- state->get_query_ctx()->get_shared_hash_table_controller();
- _shared_hash_table_context = _shared_hashtable_controller->get_context(id());
- _should_build_hash_table = _shared_hashtable_controller->should_build_hash_table(
- state->fragment_instance_id(), id());
- } else {
- runtime_profile()->add_info_string("ShareHashTableEnabled", "false");
- }
- }
-
RETURN_IF_ERROR(VExpr::prepare(_build_expr_ctxs, state, child(1)->row_desc()));
RETURN_IF_ERROR(VExpr::prepare(_probe_expr_ctxs, state, child(0)->row_desc()));
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@doris.apache.org
For additional commands, e-mail: commits-help@doris.apache.org