You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@doris.apache.org by yi...@apache.org on 2023/06/07 02:08:42 UTC

[doris] branch branch-1.2-lts updated: [fix](hashtable) Check query cancel status during build hash table #19970 (#20512)

This is an automated email from the ASF dual-hosted git repository.

yiguolei pushed a commit to branch branch-1.2-lts
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/branch-1.2-lts by this push:
     new 2fd0bf4f41 [fix](hashtable) Check query cancel status during build hash table #19970 (#20512)
2fd0bf4f41 is described below

commit 2fd0bf4f415a15acf95f1d63c6bc0a1034e9e5c4
Author: Xinyi Zou <zo...@gmail.com>
AuthorDate: Wed Jun 7 10:08:35 2023 +0800

    [fix](hashtable) Check query cancel status during build hash table #19970 (#20512)
    
    should cancel query during hash table build stage if the query is cancelled.
    Issue Number: close #xxx
    
    pick #19970
---
 be/src/vec/exec/join/vhash_join_node.cpp | 13 +++++++++++--
 be/src/vec/exec/vset_operation_node.cpp  | 18 ++++++++++++------
 be/src/vec/exec/vset_operation_node.h    |  2 +-
 3 files changed, 24 insertions(+), 9 deletions(-)

diff --git a/be/src/vec/exec/join/vhash_join_node.cpp b/be/src/vec/exec/join/vhash_join_node.cpp
index 587b1d65b9..3efea5d8c6 100644
--- a/be/src/vec/exec/join/vhash_join_node.cpp
+++ b/be/src/vec/exec/join/vhash_join_node.cpp
@@ -55,7 +55,8 @@ Overload(Callables&&... callables) -> Overload<Callables...>;
 template <class HashTableContext>
 struct ProcessHashTableBuild {
     ProcessHashTableBuild(int rows, Block& acquired_block, ColumnRawPtrs& build_raw_ptrs,
-                          HashJoinNode* join_node, int batch_size, uint8_t offset)
+                          HashJoinNode* join_node, int batch_size, uint8_t offset,
+                          RuntimeState* state)
             : _rows(rows),
               _skip_rows(0),
               _acquired_block(acquired_block),
@@ -63,6 +64,7 @@ struct ProcessHashTableBuild {
               _join_node(join_node),
               _batch_size(batch_size),
               _offset(offset),
+              _state(state),
               _build_side_compute_hash_timer(join_node->_build_side_compute_hash_timer) {}
 
     template <bool ignore_null, bool short_circuit_for_null>
@@ -121,6 +123,9 @@ struct ProcessHashTableBuild {
             }
 
             for (size_t k = 0; k < _rows; ++k) {
+                if (k % 65536 == 0) {
+                    RETURN_IF_CANCELLED(_state);
+                }
                 if constexpr (ignore_null) {
                     if ((*null_map)[k]) {
                         continue;
@@ -148,6 +153,9 @@ struct ProcessHashTableBuild {
         bool build_unique = _join_node->_build_unique;
 #define EMPLACE_IMPL(stmt)                                                                  \
     for (size_t k = 0; k < _rows; ++k) {                                                    \
+        if (k % 65536 == 0) {                                                               \
+            RETURN_IF_CANCELLED(_state);                                                    \
+        }                                                                                   \
         if constexpr (ignore_null) {                                                        \
             if ((*null_map)[k]) {                                                           \
                 continue;                                                                   \
@@ -208,6 +216,7 @@ private:
     HashJoinNode* _join_node;
     int _batch_size;
     uint8_t _offset;
+    RuntimeState* _state;
 
     ProfileCounter* _build_side_compute_hash_timer;
     std::vector<size_t> _build_side_hash_values;
@@ -911,7 +920,7 @@ Status HashJoinNode::_process_build_block(RuntimeState* state, Block& block, uin
                         auto short_circuit_for_null_in_build_side) -> Status {
                         using HashTableCtxType = std::decay_t<decltype(arg)>;
                         ProcessHashTableBuild<HashTableCtxType> hash_table_build_process(
-                                rows, block, raw_ptrs, this, state->batch_size(), offset);
+                                rows, block, raw_ptrs, this, state->batch_size(), offset, state);
                         return hash_table_build_process
                                 .template run<has_null_value, short_circuit_for_null_in_build_side>(
                                         arg,
diff --git a/be/src/vec/exec/vset_operation_node.cpp b/be/src/vec/exec/vset_operation_node.cpp
index 8ce115813c..418aa06d85 100644
--- a/be/src/vec/exec/vset_operation_node.cpp
+++ b/be/src/vec/exec/vset_operation_node.cpp
@@ -26,12 +26,14 @@ namespace vectorized {
 template <class HashTableContext>
 struct HashTableBuild {
     HashTableBuild(int rows, Block& acquired_block, ColumnRawPtrs& build_raw_ptrs,
-                   VSetOperationNode* operation_node, uint8_t offset)
+                   VSetOperationNode* operation_node, uint8_t offset,
+                   RuntimeState* state)
             : _rows(rows),
               _offset(offset),
               _acquired_block(acquired_block),
               _build_raw_ptrs(build_raw_ptrs),
-              _operation_node(operation_node) {}
+              _operation_node(operation_node),
+              _state(state) {}
 
     Status operator()(HashTableContext& hash_table_ctx) {
         using KeyGetter = typename HashTableContext::State;
@@ -51,6 +53,9 @@ struct HashTableBuild {
         }
 
         for (size_t k = 0; k < _rows; ++k) {
+            if (k % 65536 == 0) {
+                RETURN_IF_CANCELLED(_state);
+            }
             auto emplace_result = key_getter.emplace_key(hash_table_ctx.hash_table, k,
                                                          *(_operation_node->_arena));
 
@@ -72,6 +77,7 @@ private:
     Block& _acquired_block;
     ColumnRawPtrs& _build_raw_ptrs;
     VSetOperationNode* _operation_node;
+    RuntimeState* _state;
 };
 
 VSetOperationNode::VSetOperationNode(ObjectPool* pool, const TPlanNode& tnode,
@@ -271,7 +277,7 @@ Status VSetOperationNode::hash_table_build(RuntimeState* state) {
             _build_blocks.emplace_back(mutable_block.to_block());
             // TODO:: Rethink may we should do the proess after we recevie all build blocks ?
             // which is better.
-            RETURN_IF_ERROR(process_build_block(_build_blocks[index], index));
+            RETURN_IF_ERROR(process_build_block(_build_blocks[index], index, state));
             mutable_block = MutableBlock();
             ++index;
             last_mem_used = _mem_used;
@@ -280,11 +286,11 @@ Status VSetOperationNode::hash_table_build(RuntimeState* state) {
 
     _build_blocks.emplace_back(mutable_block.to_block());
     child(0)->close(state);
-    RETURN_IF_ERROR(process_build_block(_build_blocks[index], index));
+    RETURN_IF_ERROR(process_build_block(_build_blocks[index], index, state));
     return Status::OK();
 }
 
-Status VSetOperationNode::process_build_block(Block& block, uint8_t offset) {
+Status VSetOperationNode::process_build_block(Block& block, uint8_t offset, RuntimeState* state) {
     size_t rows = block.rows();
     if (rows == 0) {
         return Status::OK();
@@ -299,7 +305,7 @@ Status VSetOperationNode::process_build_block(Block& block, uint8_t offset) {
                 using HashTableCtxType = std::decay_t<decltype(arg)>;
                 if constexpr (!std::is_same_v<HashTableCtxType, std::monostate>) {
                     HashTableBuild<HashTableCtxType> hash_table_build_process(rows, block, raw_ptrs,
-                                                                              this, offset);
+                                                                              this, offset, state);
                     hash_table_build_process(arg);
                 } else {
                     LOG(FATAL) << "FATAL: uninited hash table";
diff --git a/be/src/vec/exec/vset_operation_node.h b/be/src/vec/exec/vset_operation_node.h
index 902f19efa4..d21231bf6f 100644
--- a/be/src/vec/exec/vset_operation_node.h
+++ b/be/src/vec/exec/vset_operation_node.h
@@ -49,7 +49,7 @@ protected:
     //It's time to abstract out the same methods and provide them directly to others;
     void hash_table_init();
     Status hash_table_build(RuntimeState* state);
-    Status process_build_block(Block& block, uint8_t offset);
+    Status process_build_block(Block& block, uint8_t offset, RuntimeState* state);
     Status extract_build_column(Block& block, ColumnRawPtrs& raw_ptrs);
     Status extract_probe_column(Block& block, ColumnRawPtrs& raw_ptrs, int child_id);
     template <bool keep_matched>


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@doris.apache.org
For additional commands, e-mail: commits-help@doris.apache.org