You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@doris.apache.org by yi...@apache.org on 2022/07/05 11:14:47 UTC

[doris] branch master updated: [bugfix] fix bug of vhash join build (#10614)

This is an automated email from the ASF dual-hosted git repository.

yiguolei pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new 3e87960202 [bugfix] fix bug of vhash join build (#10614)
3e87960202 is described below

commit 3e879602028eaf08040fb7a3e58672ca65cbc489
Author: TengJianPing <18...@users.noreply.github.com>
AuthorDate: Tue Jul 5 19:14:42 2022 +0800

    [bugfix] fix bug of vhash join build (#10614)
    
    * [bugfix] fix bug of vhash join build
    
    * format code
---
 be/src/vec/exec/join/vhash_join_node.cpp | 24 +++++++++++++++++++-----
 be/src/vec/exec/join/vhash_join_node.h   |  2 ++
 2 files changed, 21 insertions(+), 5 deletions(-)

diff --git a/be/src/vec/exec/join/vhash_join_node.cpp b/be/src/vec/exec/join/vhash_join_node.cpp
index f1b8f5b197..9c04fc9046 100644
--- a/be/src/vec/exec/join/vhash_join_node.cpp
+++ b/be/src/vec/exec/join/vhash_join_node.cpp
@@ -18,6 +18,7 @@
 #include "vec/exec/join/vhash_join_node.h"
 
 #include "gen_cpp/PlanNodes_types.h"
+#include "gutil/strings/substitute.h"
 #include "runtime/mem_tracker.h"
 #include "runtime/runtime_filter_mgr.h"
 #include "util/defer_op.h"
@@ -686,7 +687,7 @@ HashJoinNode::HashJoinNode(ObjectPool* pool, const TPlanNode& tnode, const Descr
     // avoid vector expand change block address.
     // one block can store 4g data, _build_blocks can store 128*4g data.
     // if probe data bigger than 512g, runtime filter maybe will core dump when insert data.
-    _build_blocks.reserve(128);
+    _build_blocks.reserve(_MAX_BUILD_BLOCK_COUNT);
 }
 
 HashJoinNode::~HashJoinNode() = default;
@@ -1023,6 +1024,9 @@ Status HashJoinNode::_hash_table_build(RuntimeState* state) {
     int64_t last_mem_used = 0;
     bool eos = false;
 
+    // make one block for each 4 gigabytes
+    constexpr static auto BUILD_BLOCK_MAX_SIZE = 4 * 1024UL * 1024UL * 1024UL;
+
     Block block;
     while (!eos) {
         block.clear_column_data();
@@ -1036,9 +1040,12 @@ Status HashJoinNode::_hash_table_build(RuntimeState* state) {
             mutable_block.merge(block);
         }
 
-        // make one block for each 4 gigabytes
-        constexpr static auto BUILD_BLOCK_MAX_SIZE = 4 * 1024UL * 1024UL * 1024UL;
         if (UNLIKELY(_mem_used - last_mem_used > BUILD_BLOCK_MAX_SIZE)) {
+            if (_build_blocks.size() == _MAX_BUILD_BLOCK_COUNT) {
+                return Status::NotSupported(
+                        strings::Substitute("data size of right table in hash join > $0",
+                                            BUILD_BLOCK_MAX_SIZE * _MAX_BUILD_BLOCK_COUNT));
+            }
             _build_blocks.emplace_back(mutable_block.to_block());
             // TODO:: Rethink may we should do the proess after we recevie all build blocks ?
             // which is better.
@@ -1050,8 +1057,15 @@ Status HashJoinNode::_hash_table_build(RuntimeState* state) {
         }
     }
 
-    _build_blocks.emplace_back(mutable_block.to_block());
-    RETURN_IF_ERROR(_process_build_block(state, _build_blocks[index], index));
+    if (!mutable_block.empty()) {
+        if (_build_blocks.size() == _MAX_BUILD_BLOCK_COUNT) {
+            return Status::NotSupported(
+                    strings::Substitute("data size of right table in hash join > $0",
+                                        BUILD_BLOCK_MAX_SIZE * _MAX_BUILD_BLOCK_COUNT));
+        }
+        _build_blocks.emplace_back(mutable_block.to_block());
+        RETURN_IF_ERROR(_process_build_block(state, _build_blocks[index], index));
+    }
 
     return std::visit(
             [&](auto&& arg) -> Status {
diff --git a/be/src/vec/exec/join/vhash_join_node.h b/be/src/vec/exec/join/vhash_join_node.h
index 175134b522..9ea79344f4 100644
--- a/be/src/vec/exec/join/vhash_join_node.h
+++ b/be/src/vec/exec/join/vhash_join_node.h
@@ -252,6 +252,8 @@ private:
 
     void _hash_table_init();
 
+    static const int _MAX_BUILD_BLOCK_COUNT = 128;
+
     template <class HashTableContext>
     friend struct ProcessHashTableBuild;
 


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@doris.apache.org
For additional commands, e-mail: commits-help@doris.apache.org