You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@doris.apache.org by kx...@apache.org on 2023/07/06 12:01:17 UTC
[doris] 12/20: [profile](join) add collisions into profile (#21510)
This is an automated email from the ASF dual-hosted git repository.
kxiao pushed a commit to branch branch-2.0
in repository https://gitbox.apache.org/repos/asf/doris.git
commit 2a2d828daa7f25de27c282ff4485835e5cd6fb63
Author: Gabriel <ga...@gmail.com>
AuthorDate: Thu Jul 6 14:30:10 2023 +0800
[profile](join) add collisions into profile (#21510)
---
be/src/vec/common/hash_table/hash_table.h | 10 +---------
be/src/vec/common/hash_table/partitioned_hash_table.h | 8 ++++++++
be/src/vec/exec/join/process_hash_table_probe_impl.h | 1 +
be/src/vec/exec/join/vhash_join_node.cpp | 4 ++++
be/src/vec/exec/join/vhash_join_node.h | 2 ++
5 files changed, 16 insertions(+), 9 deletions(-)
diff --git a/be/src/vec/common/hash_table/hash_table.h b/be/src/vec/common/hash_table/hash_table.h
index 0b9d6bc3e1..a460f0bc2a 100644
--- a/be/src/vec/common/hash_table/hash_table.h
+++ b/be/src/vec/common/hash_table/hash_table.h
@@ -462,9 +462,7 @@ protected:
//factor that will trigger growing the hash table on insert.
static constexpr float MAX_BUCKET_OCCUPANCY_FRACTION = 0.5f;
-#ifdef DBMS_HASH_MAP_COUNT_COLLISIONS
mutable size_t collisions = 0;
-#endif
void set_partitioned_threshold(int threshold) { _partitioned_threshold = threshold; }
@@ -479,9 +477,7 @@ protected:
while (!buf[place_value].is_zero(*this) &&
!buf[place_value].key_equals(x, hash_value, *this)) {
place_value = grower.next(place_value);
-#ifdef DBMS_HASH_MAP_COUNT_COLLISIONS
++collisions;
-#endif
}
return place_value;
@@ -503,9 +499,7 @@ protected:
size_t ALWAYS_INLINE find_empty_cell(size_t place_value) const {
while (!buf[place_value].is_zero(*this)) {
place_value = grower.next(place_value);
-#ifdef DBMS_HASH_MAP_COUNT_COLLISIONS
++collisions;
-#endif
}
return place_value;
@@ -1090,9 +1084,7 @@ public:
bool add_elem_size_overflow(size_t add_size) const {
return grower.overflow(add_size + m_size);
}
-#ifdef DBMS_HASH_MAP_COUNT_COLLISIONS
- size_t getCollisions() const { return collisions; }
-#endif
+ int64_t get_collisions() const { return collisions; }
private:
/// Increase the size of the buffer.
diff --git a/be/src/vec/common/hash_table/partitioned_hash_table.h b/be/src/vec/common/hash_table/partitioned_hash_table.h
index c7cdbf684d..9990c4491e 100644
--- a/be/src/vec/common/hash_table/partitioned_hash_table.h
+++ b/be/src/vec/common/hash_table/partitioned_hash_table.h
@@ -153,6 +153,14 @@ public:
}
}
+ int64_t get_collisions() const {
+ size_t collisions = level0_sub_table.get_collisions();
+ for (size_t i = 0; i < NUM_LEVEL1_SUB_TABLES; i++) {
+ collisions += level1_sub_tables[i].get_collisions();
+ }
+ return collisions;
+ }
+
size_t get_buffer_size_in_bytes() const {
if (_is_partitioned) {
size_t buff_size = 0;
diff --git a/be/src/vec/exec/join/process_hash_table_probe_impl.h b/be/src/vec/exec/join/process_hash_table_probe_impl.h
index 5923dbf1c5..341adcbfc5 100644
--- a/be/src/vec/exec/join/process_hash_table_probe_impl.h
+++ b/be/src/vec/exec/join/process_hash_table_probe_impl.h
@@ -473,6 +473,7 @@ Status ProcessHashTableProbe<JoinOpType>::do_process_with_other_join_conjuncts(
auto& probe_row_match_iter =
std::get<ForwardIterator<Mapped>>(_join_node->_probe_row_match_iter);
if (probe_row_match_iter.ok()) {
+ SCOPED_TIMER(_search_hashtable_timer);
auto origin_offset = current_offset;
for (; probe_row_match_iter.ok() && current_offset < _batch_size;
++probe_row_match_iter) {
diff --git a/be/src/vec/exec/join/vhash_join_node.cpp b/be/src/vec/exec/join/vhash_join_node.cpp
index 33a4e6e35e..05976acadc 100644
--- a/be/src/vec/exec/join/vhash_join_node.cpp
+++ b/be/src/vec/exec/join/vhash_join_node.cpp
@@ -120,6 +120,8 @@ struct ProcessHashTableBuild {
int64_t bucket_bytes = hash_table_ctx.hash_table.get_buffer_size_in_bytes();
COUNTER_SET(_join_node->_hash_table_memory_usage, bucket_bytes);
COUNTER_SET(_join_node->_build_buckets_counter, bucket_size);
+ COUNTER_SET(_join_node->_build_collisions_counter,
+ hash_table_ctx.hash_table.get_collisions());
COUNTER_SET(_join_node->_build_buckets_fill_counter, filled_bucket_size);
auto hash_table_buckets = hash_table_ctx.hash_table.get_buffer_sizes_in_cells();
@@ -476,6 +478,8 @@ Status HashJoinNode::prepare(RuntimeState* state) {
_build_buckets_counter = ADD_COUNTER(runtime_profile(), "BuildBuckets", TUnit::UNIT);
_build_buckets_fill_counter = ADD_COUNTER(runtime_profile(), "FilledBuckets", TUnit::UNIT);
+ _build_collisions_counter = ADD_COUNTER(runtime_profile(), "BuildCollisions", TUnit::UNIT);
+
RETURN_IF_ERROR(VExpr::prepare(_build_expr_ctxs, state, child(1)->row_desc()));
RETURN_IF_ERROR(VExpr::prepare(_probe_expr_ctxs, state, child(0)->row_desc()));
diff --git a/be/src/vec/exec/join/vhash_join_node.h b/be/src/vec/exec/join/vhash_join_node.h
index 671d0d4170..3d85d6b227 100644
--- a/be/src/vec/exec/join/vhash_join_node.h
+++ b/be/src/vec/exec/join/vhash_join_node.h
@@ -308,6 +308,8 @@ private:
RuntimeProfile::Counter* _build_side_merge_block_timer;
RuntimeProfile::Counter* _build_runtime_filter_timer;
+ RuntimeProfile::Counter* _build_collisions_counter;
+
RuntimeProfile::Counter* _open_timer;
RuntimeProfile::Counter* _allocate_resource_timer;
RuntimeProfile::Counter* _process_other_join_conjunct_timer;
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@doris.apache.org
For additional commands, e-mail: commits-help@doris.apache.org