You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@doris.apache.org by ga...@apache.org on 2022/10/13 08:05:08 UTC
[doris] branch master updated: [Improvement](join) compute hash values by vectorized way (#13335)
This is an automated email from the ASF dual-hosted git repository.
gabriellee pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new baf2689610 [Improvement](join) compute hash values by vectorized way (#13335)
baf2689610 is described below
commit baf2689610f09577c2552983bf0d09266e5623e8
Author: Gabriel <ga...@gmail.com>
AuthorDate: Thu Oct 13 16:04:58 2022 +0800
[Improvement](join) compute hash values by vectorized way (#13335)
---
be/src/vec/common/columns_hashing.h | 6 ++---
be/src/vec/common/columns_hashing_impl.h | 5 ++++
be/src/vec/common/hash_table/hash_table.h | 15 +++++++++++
be/src/vec/exec/join/vhash_join_node.cpp | 42 +++++++++++++++++++++++++------
be/src/vec/exec/join/vhash_join_node.h | 12 +++++++++
5 files changed, 70 insertions(+), 10 deletions(-)
diff --git a/be/src/vec/common/columns_hashing.h b/be/src/vec/common/columns_hashing.h
index cc3e634d67..dfecead77e 100644
--- a/be/src/vec/common/columns_hashing.h
+++ b/be/src/vec/common/columns_hashing.h
@@ -131,9 +131,6 @@ struct HashMethodSerialized
void set_serialized_keys(const StringRef* keys_) { keys = keys_; }
-protected:
- friend class columns_hashing_impl::HashMethodBase<Self, Value, Mapped, false>;
-
ALWAYS_INLINE KeyHolderType get_key_holder(size_t row, Arena& pool) const {
if constexpr (keys_pre_serialized) {
return KeyHolderType {keys[row], pool};
@@ -142,6 +139,9 @@ protected:
serialize_keys_to_pool_contiguous(row, keys_size, key_columns, pool), pool};
}
}
+
+protected:
+ friend class columns_hashing_impl::HashMethodBase<Self, Value, Mapped, false>;
};
template <typename HashMethod>
diff --git a/be/src/vec/common/columns_hashing_impl.h b/be/src/vec/common/columns_hashing_impl.h
index e2f03f26c6..13cc375c82 100644
--- a/be/src/vec/common/columns_hashing_impl.h
+++ b/be/src/vec/common/columns_hashing_impl.h
@@ -186,6 +186,11 @@ public:
data.template prefetch<READ>(key_holder);
}
+ template <bool READ, typename Data>
+ ALWAYS_INLINE void prefetch_by_hash(Data& data, size_t hash_value) {
+ data.template prefetch_by_hash<READ>(hash_value);
+ }
+
protected:
Cache cache;
diff --git a/be/src/vec/common/hash_table/hash_table.h b/be/src/vec/common/hash_table/hash_table.h
index e588ed1b8a..8a10634304 100644
--- a/be/src/vec/common/hash_table/hash_table.h
+++ b/be/src/vec/common/hash_table/hash_table.h
@@ -908,6 +908,15 @@ public:
__builtin_prefetch(&buf[place_value]);
}
+ template <bool READ>
+ void ALWAYS_INLINE prefetch_by_hash(size_t hash_value) {
+ // Two optional arguments:
+ // 'rw': 1 means the memory access is write
+ // 'locality': 0-3. 0 means no temporal locality. 3 means high temporal locality.
+ auto place_value = grower.place(hash_value);
+ __builtin_prefetch(&buf[place_value], READ ? 0 : 1, 1);
+ }
+
template <bool READ, typename KeyHolder>
void ALWAYS_INLINE prefetch(KeyHolder& key_holder) {
// Two optional arguments:
@@ -967,6 +976,12 @@ public:
emplace_non_zero(key_holder, it, inserted, hash_value);
}
+ template <typename KeyHolder>
+ void ALWAYS_INLINE emplace(KeyHolder&& key_holder, LookupResult& it, size_t hash_value,
+ bool& inserted) {
+ emplace(key_holder, it, inserted, hash_value);
+ }
+
template <typename KeyHolder, typename Func>
void ALWAYS_INLINE lazy_emplace(KeyHolder&& key_holder, LookupResult& it, Func&& f) {
const auto& key = key_holder_get_key(key_holder);
diff --git a/be/src/vec/exec/join/vhash_join_node.cpp b/be/src/vec/exec/join/vhash_join_node.cpp
index dd77bc2a18..7134ec464d 100644
--- a/be/src/vec/exec/join/vhash_join_node.cpp
+++ b/be/src/vec/exec/join/vhash_join_node.cpp
@@ -45,7 +45,8 @@ struct ProcessHashTableBuild {
_build_raw_ptrs(build_raw_ptrs),
_join_node(join_node),
_batch_size(batch_size),
- _offset(offset) {}
+ _offset(offset),
+ _build_side_compute_hash_timer(join_node->_build_side_compute_hash_timer) {}
template <bool ignore_null, bool build_unique, bool has_runtime_filter>
void run(HashTableContext& hash_table_ctx, ConstNullMapPtr null_map) {
@@ -75,6 +76,26 @@ struct ProcessHashTableBuild {
inserted_rows.reserve(_batch_size);
}
+ _build_side_hash_values.resize(_rows);
+ auto& arena = _join_node->_arena;
+ {
+ SCOPED_TIMER(_build_side_compute_hash_timer);
+ for (size_t k = 0; k < _rows; ++k) {
+ if constexpr (ignore_null) {
+ if ((*null_map)[k]) {
+ continue;
+ }
+ }
+ if constexpr (IsSerializedHashTableContextTraits<KeyGetter>::value) {
+ _build_side_hash_values[k] =
+ hash_table_ctx.hash_table.hash(key_getter.get_key_holder(k, arena).key);
+ } else {
+ _build_side_hash_values[k] =
+ hash_table_ctx.hash_table.hash(key_getter.get_key_holder(k, arena));
+ }
+ }
+ }
+
for (size_t k = 0; k < _rows; ++k) {
if constexpr (ignore_null) {
if ((*null_map)[k]) {
@@ -82,11 +103,11 @@ struct ProcessHashTableBuild {
}
}
- auto emplace_result =
- key_getter.emplace_key(hash_table_ctx.hash_table, k, _join_node->_arena);
+ auto emplace_result = key_getter.emplace_key(hash_table_ctx.hash_table,
+ _build_side_hash_values[k], k, arena);
if (k + PREFETCH_STEP < _rows) {
- key_getter.template prefetch<false>(hash_table_ctx.hash_table, k + PREFETCH_STEP,
- _join_node->_arena);
+ key_getter.template prefetch_by_hash<false>(
+ hash_table_ctx.hash_table, _build_side_hash_values[k + PREFETCH_STEP]);
}
if (emplace_result.is_inserted()) {
@@ -128,6 +149,9 @@ private:
HashJoinNode* _join_node;
int _batch_size;
uint8_t _offset;
+
+ ProfileCounter* _build_side_compute_hash_timer;
+ std::vector<size_t> _build_side_hash_values;
};
template <class HashTableContext>
@@ -326,7 +350,6 @@ struct ProcessHashTableProbe {
_arena)) {nullptr, false}
: key_getter.find_key(hash_table_ctx.hash_table,
_probe_index, _arena);
- // prefetch is more useful while matching to multiple rows
if (_probe_index + PREFETCH_STEP < _probe_rows)
key_getter.template prefetch<true>(hash_table_ctx.hash_table,
_probe_index + PREFETCH_STEP, _arena);
@@ -445,7 +468,9 @@ struct ProcessHashTableProbe {
? decltype(key_getter.find_key(hash_table_ctx.hash_table, _probe_index,
_arena)) {nullptr, false}
: key_getter.find_key(hash_table_ctx.hash_table, _probe_index, _arena);
-
+ if (_probe_index + PREFETCH_STEP < _probe_rows)
+ key_getter.template prefetch<true>(hash_table_ctx.hash_table,
+ _probe_index + PREFETCH_STEP, _arena);
if (find_result.is_found()) {
auto& mapped = find_result.get_mapped();
auto origin_offset = current_offset;
@@ -867,10 +892,12 @@ Status HashJoinNode::prepare(RuntimeState* state) {
runtime_profile()->add_child(build_phase_profile, false, nullptr);
_build_timer = ADD_TIMER(build_phase_profile, "BuildTime");
_build_table_timer = ADD_TIMER(build_phase_profile, "BuildTableTime");
+ _build_side_merge_block_timer = ADD_TIMER(build_phase_profile, "BuildSideMergeBlockTime");
_build_table_insert_timer = ADD_TIMER(build_phase_profile, "BuildTableInsertTime");
_build_expr_call_timer = ADD_TIMER(build_phase_profile, "BuildExprCallTime");
_build_table_expanse_timer = ADD_TIMER(build_phase_profile, "BuildTableExpanseTime");
_build_rows_counter = ADD_COUNTER(build_phase_profile, "BuildRows", TUnit::UNIT);
+ _build_side_compute_hash_timer = ADD_TIMER(build_phase_profile, "BuildSideHashComputingTime");
// Probe phase
auto probe_phase_profile = runtime_profile()->create_child("ProbePhase", true, true);
@@ -1146,6 +1173,7 @@ Status HashJoinNode::_hash_table_build(RuntimeState* state) {
_mem_used += block.allocated_bytes();
if (block.rows() != 0) {
+ SCOPED_TIMER(_build_side_merge_block_timer);
mutable_block.merge(block);
}
diff --git a/be/src/vec/exec/join/vhash_join_node.h b/be/src/vec/exec/join/vhash_join_node.h
index 48cb54e67a..923999626d 100644
--- a/be/src/vec/exec/join/vhash_join_node.h
+++ b/be/src/vec/exec/join/vhash_join_node.h
@@ -50,6 +50,16 @@ struct SerializedHashTableContext {
}
};
+template <typename HashMethod>
+struct IsSerializedHashTableContextTraits {
+ constexpr static bool value = false;
+};
+
+template <typename Value, typename Mapped>
+struct IsSerializedHashTableContextTraits<ColumnsHashing::HashMethodSerialized<Value, Mapped>> {
+ constexpr static bool value = true;
+};
+
// T should be UInt32 UInt64 UInt128
template <class T>
struct PrimaryTypeHashTableContext {
@@ -203,6 +213,8 @@ private:
RuntimeProfile::Counter* _search_hashtable_timer;
RuntimeProfile::Counter* _build_side_output_timer;
RuntimeProfile::Counter* _probe_side_output_timer;
+ RuntimeProfile::Counter* _build_side_compute_hash_timer;
+ RuntimeProfile::Counter* _build_side_merge_block_timer;
RuntimeProfile::Counter* _join_filter_timer;
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@doris.apache.org
For additional commands, e-mail: commits-help@doris.apache.org