You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@doris.apache.org by kx...@apache.org on 2023/07/06 12:01:17 UTC

[doris] 12/20: [profile](join) add collisions into profile (#21510)

This is an automated email from the ASF dual-hosted git repository.

kxiao pushed a commit to branch branch-2.0
in repository https://gitbox.apache.org/repos/asf/doris.git

commit 2a2d828daa7f25de27c282ff4485835e5cd6fb63
Author: Gabriel <ga...@gmail.com>
AuthorDate: Thu Jul 6 14:30:10 2023 +0800

     [profile](join) add collisions into profile (#21510)
---
 be/src/vec/common/hash_table/hash_table.h             | 10 +---------
 be/src/vec/common/hash_table/partitioned_hash_table.h |  8 ++++++++
 be/src/vec/exec/join/process_hash_table_probe_impl.h  |  1 +
 be/src/vec/exec/join/vhash_join_node.cpp              |  4 ++++
 be/src/vec/exec/join/vhash_join_node.h                |  2 ++
 5 files changed, 16 insertions(+), 9 deletions(-)

diff --git a/be/src/vec/common/hash_table/hash_table.h b/be/src/vec/common/hash_table/hash_table.h
index 0b9d6bc3e1..a460f0bc2a 100644
--- a/be/src/vec/common/hash_table/hash_table.h
+++ b/be/src/vec/common/hash_table/hash_table.h
@@ -462,9 +462,7 @@ protected:
     //factor that will trigger growing the hash table on insert.
     static constexpr float MAX_BUCKET_OCCUPANCY_FRACTION = 0.5f;
 
-#ifdef DBMS_HASH_MAP_COUNT_COLLISIONS
     mutable size_t collisions = 0;
-#endif
 
     void set_partitioned_threshold(int threshold) { _partitioned_threshold = threshold; }
 
@@ -479,9 +477,7 @@ protected:
         while (!buf[place_value].is_zero(*this) &&
                !buf[place_value].key_equals(x, hash_value, *this)) {
             place_value = grower.next(place_value);
-#ifdef DBMS_HASH_MAP_COUNT_COLLISIONS
             ++collisions;
-#endif
         }
 
         return place_value;
@@ -503,9 +499,7 @@ protected:
     size_t ALWAYS_INLINE find_empty_cell(size_t place_value) const {
         while (!buf[place_value].is_zero(*this)) {
             place_value = grower.next(place_value);
-#ifdef DBMS_HASH_MAP_COUNT_COLLISIONS
             ++collisions;
-#endif
         }
 
         return place_value;
@@ -1090,9 +1084,7 @@ public:
     bool add_elem_size_overflow(size_t add_size) const {
         return grower.overflow(add_size + m_size);
     }
-#ifdef DBMS_HASH_MAP_COUNT_COLLISIONS
-    size_t getCollisions() const { return collisions; }
-#endif
+    int64_t get_collisions() const { return collisions; }
 
 private:
     /// Increase the size of the buffer.
diff --git a/be/src/vec/common/hash_table/partitioned_hash_table.h b/be/src/vec/common/hash_table/partitioned_hash_table.h
index c7cdbf684d..9990c4491e 100644
--- a/be/src/vec/common/hash_table/partitioned_hash_table.h
+++ b/be/src/vec/common/hash_table/partitioned_hash_table.h
@@ -153,6 +153,14 @@ public:
         }
     }
 
+    int64_t get_collisions() const {
+        size_t collisions = level0_sub_table.get_collisions();
+        for (size_t i = 0; i < NUM_LEVEL1_SUB_TABLES; i++) {
+            collisions += level1_sub_tables[i].get_collisions();
+        }
+        return collisions;
+    }
+
     size_t get_buffer_size_in_bytes() const {
         if (_is_partitioned) {
             size_t buff_size = 0;
diff --git a/be/src/vec/exec/join/process_hash_table_probe_impl.h b/be/src/vec/exec/join/process_hash_table_probe_impl.h
index 5923dbf1c5..341adcbfc5 100644
--- a/be/src/vec/exec/join/process_hash_table_probe_impl.h
+++ b/be/src/vec/exec/join/process_hash_table_probe_impl.h
@@ -473,6 +473,7 @@ Status ProcessHashTableProbe<JoinOpType>::do_process_with_other_join_conjuncts(
         auto& probe_row_match_iter =
                 std::get<ForwardIterator<Mapped>>(_join_node->_probe_row_match_iter);
         if (probe_row_match_iter.ok()) {
+            SCOPED_TIMER(_search_hashtable_timer);
             auto origin_offset = current_offset;
             for (; probe_row_match_iter.ok() && current_offset < _batch_size;
                  ++probe_row_match_iter) {
diff --git a/be/src/vec/exec/join/vhash_join_node.cpp b/be/src/vec/exec/join/vhash_join_node.cpp
index 33a4e6e35e..05976acadc 100644
--- a/be/src/vec/exec/join/vhash_join_node.cpp
+++ b/be/src/vec/exec/join/vhash_join_node.cpp
@@ -120,6 +120,8 @@ struct ProcessHashTableBuild {
             int64_t bucket_bytes = hash_table_ctx.hash_table.get_buffer_size_in_bytes();
             COUNTER_SET(_join_node->_hash_table_memory_usage, bucket_bytes);
             COUNTER_SET(_join_node->_build_buckets_counter, bucket_size);
+            COUNTER_SET(_join_node->_build_collisions_counter,
+                        hash_table_ctx.hash_table.get_collisions());
             COUNTER_SET(_join_node->_build_buckets_fill_counter, filled_bucket_size);
 
             auto hash_table_buckets = hash_table_ctx.hash_table.get_buffer_sizes_in_cells();
@@ -476,6 +478,8 @@ Status HashJoinNode::prepare(RuntimeState* state) {
     _build_buckets_counter = ADD_COUNTER(runtime_profile(), "BuildBuckets", TUnit::UNIT);
     _build_buckets_fill_counter = ADD_COUNTER(runtime_profile(), "FilledBuckets", TUnit::UNIT);
 
+    _build_collisions_counter = ADD_COUNTER(runtime_profile(), "BuildCollisions", TUnit::UNIT);
+
     RETURN_IF_ERROR(VExpr::prepare(_build_expr_ctxs, state, child(1)->row_desc()));
     RETURN_IF_ERROR(VExpr::prepare(_probe_expr_ctxs, state, child(0)->row_desc()));
 
diff --git a/be/src/vec/exec/join/vhash_join_node.h b/be/src/vec/exec/join/vhash_join_node.h
index 671d0d4170..3d85d6b227 100644
--- a/be/src/vec/exec/join/vhash_join_node.h
+++ b/be/src/vec/exec/join/vhash_join_node.h
@@ -308,6 +308,8 @@ private:
     RuntimeProfile::Counter* _build_side_merge_block_timer;
     RuntimeProfile::Counter* _build_runtime_filter_timer;
 
+    RuntimeProfile::Counter* _build_collisions_counter;
+
     RuntimeProfile::Counter* _open_timer;
     RuntimeProfile::Counter* _allocate_resource_timer;
     RuntimeProfile::Counter* _process_other_join_conjunct_timer;


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@doris.apache.org
For additional commands, e-mail: commits-help@doris.apache.org