You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@doris.apache.org by ga...@apache.org on 2022/10/13 08:05:08 UTC

[doris] branch master updated: [Improvement](join) compute hash values by vectorized way (#13335)

This is an automated email from the ASF dual-hosted git repository.

gabriellee pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new baf2689610 [Improvement](join) compute hash values by vectorized way (#13335)
baf2689610 is described below

commit baf2689610f09577c2552983bf0d09266e5623e8
Author: Gabriel <ga...@gmail.com>
AuthorDate: Thu Oct 13 16:04:58 2022 +0800

    [Improvement](join) compute hash values by vectorized way (#13335)
---
 be/src/vec/common/columns_hashing.h       |  6 ++---
 be/src/vec/common/columns_hashing_impl.h  |  5 ++++
 be/src/vec/common/hash_table/hash_table.h | 15 +++++++++++
 be/src/vec/exec/join/vhash_join_node.cpp  | 42 +++++++++++++++++++++++++------
 be/src/vec/exec/join/vhash_join_node.h    | 12 +++++++++
 5 files changed, 70 insertions(+), 10 deletions(-)

diff --git a/be/src/vec/common/columns_hashing.h b/be/src/vec/common/columns_hashing.h
index cc3e634d67..dfecead77e 100644
--- a/be/src/vec/common/columns_hashing.h
+++ b/be/src/vec/common/columns_hashing.h
@@ -131,9 +131,6 @@ struct HashMethodSerialized
 
     void set_serialized_keys(const StringRef* keys_) { keys = keys_; }
 
-protected:
-    friend class columns_hashing_impl::HashMethodBase<Self, Value, Mapped, false>;
-
     ALWAYS_INLINE KeyHolderType get_key_holder(size_t row, Arena& pool) const {
         if constexpr (keys_pre_serialized) {
             return KeyHolderType {keys[row], pool};
@@ -142,6 +139,9 @@ protected:
                     serialize_keys_to_pool_contiguous(row, keys_size, key_columns, pool), pool};
         }
     }
+
+protected:
+    friend class columns_hashing_impl::HashMethodBase<Self, Value, Mapped, false>;
 };
 
 template <typename HashMethod>
diff --git a/be/src/vec/common/columns_hashing_impl.h b/be/src/vec/common/columns_hashing_impl.h
index e2f03f26c6..13cc375c82 100644
--- a/be/src/vec/common/columns_hashing_impl.h
+++ b/be/src/vec/common/columns_hashing_impl.h
@@ -186,6 +186,11 @@ public:
         data.template prefetch<READ>(key_holder);
     }
 
+    template <bool READ, typename Data>
+    ALWAYS_INLINE void prefetch_by_hash(Data& data, size_t hash_value) {
+        data.template prefetch_by_hash<READ>(hash_value);
+    }
+
 protected:
     Cache cache;
 
diff --git a/be/src/vec/common/hash_table/hash_table.h b/be/src/vec/common/hash_table/hash_table.h
index e588ed1b8a..8a10634304 100644
--- a/be/src/vec/common/hash_table/hash_table.h
+++ b/be/src/vec/common/hash_table/hash_table.h
@@ -908,6 +908,15 @@ public:
         __builtin_prefetch(&buf[place_value]);
     }
 
+    template <bool READ>
+    void ALWAYS_INLINE prefetch_by_hash(size_t hash_value) {
+        // Two optional arguments:
+        // 'rw': 1 means the memory access is write
+        // 'locality': 0-3. 0 means no temporal locality. 3 means high temporal locality.
+        auto place_value = grower.place(hash_value);
+        __builtin_prefetch(&buf[place_value], READ ? 0 : 1, 1);
+    }
+
     template <bool READ, typename KeyHolder>
     void ALWAYS_INLINE prefetch(KeyHolder& key_holder) {
         // Two optional arguments:
@@ -967,6 +976,12 @@ public:
             emplace_non_zero(key_holder, it, inserted, hash_value);
     }
 
+    template <typename KeyHolder>
+    void ALWAYS_INLINE emplace(KeyHolder&& key_holder, LookupResult& it, size_t hash_value,
+                               bool& inserted) {
+        emplace(key_holder, it, inserted, hash_value);
+    }
+
     template <typename KeyHolder, typename Func>
     void ALWAYS_INLINE lazy_emplace(KeyHolder&& key_holder, LookupResult& it, Func&& f) {
         const auto& key = key_holder_get_key(key_holder);
diff --git a/be/src/vec/exec/join/vhash_join_node.cpp b/be/src/vec/exec/join/vhash_join_node.cpp
index dd77bc2a18..7134ec464d 100644
--- a/be/src/vec/exec/join/vhash_join_node.cpp
+++ b/be/src/vec/exec/join/vhash_join_node.cpp
@@ -45,7 +45,8 @@ struct ProcessHashTableBuild {
               _build_raw_ptrs(build_raw_ptrs),
               _join_node(join_node),
               _batch_size(batch_size),
-              _offset(offset) {}
+              _offset(offset),
+              _build_side_compute_hash_timer(join_node->_build_side_compute_hash_timer) {}
 
     template <bool ignore_null, bool build_unique, bool has_runtime_filter>
     void run(HashTableContext& hash_table_ctx, ConstNullMapPtr null_map) {
@@ -75,6 +76,26 @@ struct ProcessHashTableBuild {
             inserted_rows.reserve(_batch_size);
         }
 
+        _build_side_hash_values.resize(_rows);
+        auto& arena = _join_node->_arena;
+        {
+            SCOPED_TIMER(_build_side_compute_hash_timer);
+            for (size_t k = 0; k < _rows; ++k) {
+                if constexpr (ignore_null) {
+                    if ((*null_map)[k]) {
+                        continue;
+                    }
+                }
+                if constexpr (IsSerializedHashTableContextTraits<KeyGetter>::value) {
+                    _build_side_hash_values[k] =
+                            hash_table_ctx.hash_table.hash(key_getter.get_key_holder(k, arena).key);
+                } else {
+                    _build_side_hash_values[k] =
+                            hash_table_ctx.hash_table.hash(key_getter.get_key_holder(k, arena));
+                }
+            }
+        }
+
         for (size_t k = 0; k < _rows; ++k) {
             if constexpr (ignore_null) {
                 if ((*null_map)[k]) {
@@ -82,11 +103,11 @@ struct ProcessHashTableBuild {
                 }
             }
 
-            auto emplace_result =
-                    key_getter.emplace_key(hash_table_ctx.hash_table, k, _join_node->_arena);
+            auto emplace_result = key_getter.emplace_key(hash_table_ctx.hash_table,
+                                                         _build_side_hash_values[k], k, arena);
             if (k + PREFETCH_STEP < _rows) {
-                key_getter.template prefetch<false>(hash_table_ctx.hash_table, k + PREFETCH_STEP,
-                                                    _join_node->_arena);
+                key_getter.template prefetch_by_hash<false>(
+                        hash_table_ctx.hash_table, _build_side_hash_values[k + PREFETCH_STEP]);
             }
 
             if (emplace_result.is_inserted()) {
@@ -128,6 +149,9 @@ private:
     HashJoinNode* _join_node;
     int _batch_size;
     uint8_t _offset;
+
+    ProfileCounter* _build_side_compute_hash_timer;
+    std::vector<size_t> _build_side_hash_values;
 };
 
 template <class HashTableContext>
@@ -326,7 +350,6 @@ struct ProcessHashTableProbe {
                                                                           _arena)) {nullptr, false}
                                            : key_getter.find_key(hash_table_ctx.hash_table,
                                                                  _probe_index, _arena);
-                // prefetch is more useful while matching to multiple rows
                 if (_probe_index + PREFETCH_STEP < _probe_rows)
                     key_getter.template prefetch<true>(hash_table_ctx.hash_table,
                                                        _probe_index + PREFETCH_STEP, _arena);
@@ -445,7 +468,9 @@ struct ProcessHashTableProbe {
                             ? decltype(key_getter.find_key(hash_table_ctx.hash_table, _probe_index,
                                                            _arena)) {nullptr, false}
                             : key_getter.find_key(hash_table_ctx.hash_table, _probe_index, _arena);
-
+            if (_probe_index + PREFETCH_STEP < _probe_rows)
+                key_getter.template prefetch<true>(hash_table_ctx.hash_table,
+                                                   _probe_index + PREFETCH_STEP, _arena);
             if (find_result.is_found()) {
                 auto& mapped = find_result.get_mapped();
                 auto origin_offset = current_offset;
@@ -867,10 +892,12 @@ Status HashJoinNode::prepare(RuntimeState* state) {
     runtime_profile()->add_child(build_phase_profile, false, nullptr);
     _build_timer = ADD_TIMER(build_phase_profile, "BuildTime");
     _build_table_timer = ADD_TIMER(build_phase_profile, "BuildTableTime");
+    _build_side_merge_block_timer = ADD_TIMER(build_phase_profile, "BuildSideMergeBlockTime");
     _build_table_insert_timer = ADD_TIMER(build_phase_profile, "BuildTableInsertTime");
     _build_expr_call_timer = ADD_TIMER(build_phase_profile, "BuildExprCallTime");
     _build_table_expanse_timer = ADD_TIMER(build_phase_profile, "BuildTableExpanseTime");
     _build_rows_counter = ADD_COUNTER(build_phase_profile, "BuildRows", TUnit::UNIT);
+    _build_side_compute_hash_timer = ADD_TIMER(build_phase_profile, "BuildSideHashComputingTime");
 
     // Probe phase
     auto probe_phase_profile = runtime_profile()->create_child("ProbePhase", true, true);
@@ -1146,6 +1173,7 @@ Status HashJoinNode::_hash_table_build(RuntimeState* state) {
         _mem_used += block.allocated_bytes();
 
         if (block.rows() != 0) {
+            SCOPED_TIMER(_build_side_merge_block_timer);
             mutable_block.merge(block);
         }
 
diff --git a/be/src/vec/exec/join/vhash_join_node.h b/be/src/vec/exec/join/vhash_join_node.h
index 48cb54e67a..923999626d 100644
--- a/be/src/vec/exec/join/vhash_join_node.h
+++ b/be/src/vec/exec/join/vhash_join_node.h
@@ -50,6 +50,16 @@ struct SerializedHashTableContext {
     }
 };
 
+template <typename HashMethod>
+struct IsSerializedHashTableContextTraits {
+    constexpr static bool value = false;
+};
+
+template <typename Value, typename Mapped>
+struct IsSerializedHashTableContextTraits<ColumnsHashing::HashMethodSerialized<Value, Mapped>> {
+    constexpr static bool value = true;
+};
+
 // T should be UInt32 UInt64 UInt128
 template <class T>
 struct PrimaryTypeHashTableContext {
@@ -203,6 +213,8 @@ private:
     RuntimeProfile::Counter* _search_hashtable_timer;
     RuntimeProfile::Counter* _build_side_output_timer;
     RuntimeProfile::Counter* _probe_side_output_timer;
+    RuntimeProfile::Counter* _build_side_compute_hash_timer;
+    RuntimeProfile::Counter* _build_side_merge_block_timer;
 
     RuntimeProfile::Counter* _join_filter_timer;
 


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@doris.apache.org
For additional commands, e-mail: commits-help@doris.apache.org