You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@doris.apache.org by yi...@apache.org on 2022/06/17 00:44:00 UTC

[incubator-doris] branch master updated: [fix](vectorized) intersect operator takes too long time to execute (#10183)

This is an automated email from the ASF dual-hosted git repository.

yiguolei pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-doris.git


The following commit(s) were added to refs/heads/master by this push:
     new 1cca319d18 [fix](vectorized) intersect operator takes too long time to execute (#10183)
1cca319d18 is described below

commit 1cca319d18385c5c3ba413af5507d240fe771d95
Author: starocean999 <40...@users.noreply.github.com>
AuthorDate: Fri Jun 17 08:43:53 2022 +0800

    [fix](vectorized) intersect operator takes too long time to execute (#10183)
    
    * fix itersect operator takes too long time to execute
    
    * modify code based on review comments
---
 be/src/vec/common/hash_table/hash_table.h |  6 +++++-
 be/src/vec/exec/vset_operation_node.h     | 20 ++++++++++++++++----
 2 files changed, 21 insertions(+), 5 deletions(-)

diff --git a/be/src/vec/common/hash_table/hash_table.h b/be/src/vec/common/hash_table/hash_table.h
index 166859d077..94adb772bd 100644
--- a/be/src/vec/common/hash_table/hash_table.h
+++ b/be/src/vec/common/hash_table/hash_table.h
@@ -886,8 +886,12 @@ public:
     }
 
     void delete_zero_key(Key key) {
-        if (Cell::is_zero(key, *this)) this->clear_get_has_zero();
+        if (this->get_has_zero() && Cell::is_zero(key, *this)) {
+            --m_size;
+            this->clear_get_has_zero();
+        }
     }
+
     void clear() {
         destroy_elements();
         this->clear_get_has_zero();
diff --git a/be/src/vec/exec/vset_operation_node.h b/be/src/vec/exec/vset_operation_node.h
index 4b9034a81e..9b770838b5 100644
--- a/be/src/vec/exec/vset_operation_node.h
+++ b/be/src/vec/exec/vset_operation_node.h
@@ -114,23 +114,35 @@ void VSetOperationNode::refresh_hash_table() {
 
                     arg.init_once();
                     auto& iter = arg.iter;
-                    for (; iter != arg.hash_table.end(); ++iter) {
+                    auto iter_end = arg.hash_table.end();
+                    while (iter != iter_end) {
                         auto& mapped = iter->get_second();
                         auto it = mapped.begin();
 
                         if constexpr (keep_matched) { //intersected
                             if (it->visited) {
                                 it->visited = false;
-                                if (is_need_shrink)
+                                if (is_need_shrink) {
                                     tmp_hash_table.hash_table.insert(iter->get_value());
+                                }
+                                ++iter;
                             } else {
-                                arg.hash_table.delete_zero_key(iter->get_first());
-                                iter->set_zero();
+                                if (!is_need_shrink) {
+                                    arg.hash_table.delete_zero_key(iter->get_first());
+                                    // the ++iter would check if the current key is zero. if it does, the iterator will be moved to the container's head.
+                                    // so we do ++iter before set_zero to make the iterator move to next valid key correctly.
+                                    auto iter_prev = iter;
+                                    ++iter;
+                                    iter_prev->set_zero();
+                                } else {
+                                    ++iter;
+                                }
                             }
                         } else { //except
                             if (!it->visited && is_need_shrink) {
                                 tmp_hash_table.hash_table.insert(iter->get_value());
                             }
+                            ++iter;
                         }
                     }
 


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@doris.apache.org
For additional commands, e-mail: commits-help@doris.apache.org