You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@doris.apache.org by mo...@apache.org on 2022/05/24 03:28:16 UTC
[incubator-doris] 03/04: [vec][opt] opt hash join build resize hash table before insert data (#9735)
This is an automated email from the ASF dual-hosted git repository.
morningman pushed a commit to branch dev-1.0.1
in repository https://gitbox.apache.org/repos/asf/incubator-doris.git
commit 2265374d28c2411079e1b3a844b86e5859e41de1
Author: HappenLee <ha...@hotmail.com>
AuthorDate: Mon May 23 15:13:57 2022 +0800
[vec][opt] opt hash join build resize hash table before insert data (#9735)
Co-authored-by: lihaopeng <li...@baidu.com>
---
be/src/vec/common/hash_table/hash_table.h | 6 ++++++
be/src/vec/exec/join/vhash_join_node.cpp | 10 +++++++---
2 files changed, 13 insertions(+), 3 deletions(-)
diff --git a/be/src/vec/common/hash_table/hash_table.h b/be/src/vec/common/hash_table/hash_table.h
index f0a94b77f3..c55d806699 100644
--- a/be/src/vec/common/hash_table/hash_table.h
+++ b/be/src/vec/common/hash_table/hash_table.h
@@ -731,6 +731,12 @@ protected:
}
public:
+ void expanse_for_add_elem(size_t num_elem) {
+ if (add_elem_size_overflow(num_elem)) {
+ resize(grower.buf_size() + num_elem);
+ }
+ }
+
/// Insert a value. In the case of any more complex values, it is better to use the `emplace` function.
std::pair<LookupResult, bool> ALWAYS_INLINE insert(const value_type& x) {
std::pair<LookupResult, bool> res;
diff --git a/be/src/vec/exec/join/vhash_join_node.cpp b/be/src/vec/exec/join/vhash_join_node.cpp
index 7ec9f23439..8a8691a51d 100644
--- a/be/src/vec/exec/join/vhash_join_node.cpp
+++ b/be/src/vec/exec/join/vhash_join_node.cpp
@@ -67,6 +67,10 @@ struct ProcessHashTableBuild {
KeyGetter key_getter(_build_raw_ptrs, _join_node->_build_key_sz, nullptr);
SCOPED_TIMER(_join_node->_build_table_insert_timer);
+ // only not build_unique, we need expanse hash table before insert data
+ if constexpr (!build_unique) {
+ hash_table_ctx.hash_table.expanse_for_add_elem(_rows);
+ }
hash_table_ctx.hash_table.reset_resize_timer();
vector<int>& inserted_rows = _join_node->_inserted_rows[&_acquired_block];
@@ -980,8 +984,8 @@ Status HashJoinNode::_hash_table_build(RuntimeState* state) {
if (block.rows() != 0) { mutable_block.merge(block); }
// make one block for each 4 gigabytes
- constexpr static auto BUILD_BLOCK_MAX_SIZE = 4 * 1024UL * 1024UL * 1024UL;
- if (_mem_used - last_mem_used > BUILD_BLOCK_MAX_SIZE) {
+ constexpr static auto BUILD_BLOCK_MAX_SIZE = 4 * 1024UL * 1024UL * 1024UL;
+ if (UNLIKELY(_mem_used - last_mem_used > BUILD_BLOCK_MAX_SIZE)) {
_build_blocks.emplace_back(mutable_block.to_block());
// TODO:: Rethink may we should do the proess after we recevie all build blocks ?
// which is better.
@@ -1099,7 +1103,7 @@ Status HashJoinNode::extract_probe_join_column(Block& block, NullMap& null_map,
Status HashJoinNode::_process_build_block(RuntimeState* state, Block& block, uint8_t offset) {
SCOPED_TIMER(_build_table_timer);
size_t rows = block.rows();
- if (rows == 0) {
+ if (UNLIKELY(rows == 0)) {
return Status::OK();
}
COUNTER_UPDATE(_build_rows_counter, rows);
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@doris.apache.org
For additional commands, e-mail: commits-help@doris.apache.org