You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@doris.apache.org by mo...@apache.org on 2022/06/17 04:22:17 UTC

[incubator-doris] 03/04: [fix](vectorized) intersect operator takes too long time to execute (#10183)

This is an automated email from the ASF dual-hosted git repository.

morningman pushed a commit to branch dev-1.0.1
in repository https://gitbox.apache.org/repos/asf/incubator-doris.git

commit 334338417218b7eab8d8fa24d30318b44c13aca0
Author: starocean999 <40...@users.noreply.github.com>
AuthorDate: Fri Jun 17 08:43:53 2022 +0800

    [fix](vectorized) intersect operator takes too long time to execute (#10183)
    
    * fix itersect operator takes too long time to execute
    
    * modify code based on review comments
---
 be/src/vec/common/hash_table/hash_table.h |  7 +++++--
 be/src/vec/exec/vset_operation_node.h     | 20 ++++++++++++++++----
 2 files changed, 21 insertions(+), 6 deletions(-)

diff --git a/be/src/vec/common/hash_table/hash_table.h b/be/src/vec/common/hash_table/hash_table.h
index c55d806699..920c819694 100644
--- a/be/src/vec/common/hash_table/hash_table.h
+++ b/be/src/vec/common/hash_table/hash_table.h
@@ -887,9 +887,12 @@ public:
     }
 
     void delete_zero_key(Key key) {
-        if (Cell::is_zero(key, *this))
-             this->clear_get_has_zero();
+        if (this->get_has_zero() && Cell::is_zero(key, *this)) {
+            --m_size;
+            this->clear_get_has_zero();
+        }
     }
+
     void clear() {
         destroy_elements();
         this->clear_get_has_zero();
diff --git a/be/src/vec/exec/vset_operation_node.h b/be/src/vec/exec/vset_operation_node.h
index 1f8519c955..ba4eba3013 100644
--- a/be/src/vec/exec/vset_operation_node.h
+++ b/be/src/vec/exec/vset_operation_node.h
@@ -112,23 +112,35 @@ void VSetOperationNode::refresh_hash_table() {
 
                     arg.init_once();
                     auto& iter = arg.iter;
-                    for (; iter != arg.hash_table.end(); ++iter) {
+                    auto iter_end = arg.hash_table.end();
+                    while (iter != iter_end) {
                         auto& mapped = iter->get_second();
                         auto it = mapped.begin();
 
                         if constexpr (keep_matched) { //intersected
                             if (it->visited) {
                                 it->visited = false;
-                                if (is_need_shrink)
+                                if (is_need_shrink) {
                                     tmp_hash_table.hash_table.insert(iter->get_value());
+                                }
+                                ++iter;
                             } else {
-                                arg.hash_table.delete_zero_key(iter->get_first());
-                                iter->set_zero();
+                                if (!is_need_shrink) {
+                                    arg.hash_table.delete_zero_key(iter->get_first());
+                                    // the ++iter would check if the current key is zero. if it does, the iterator will be moved to the container's head.
+                                    // so we do ++iter before set_zero to make the iterator move to next valid key correctly.
+                                    auto iter_prev = iter;
+                                    ++iter;
+                                    iter_prev->set_zero();
+                                } else {
+                                    ++iter;
+                                }
                             }
                         } else { //except
                             if (!it->visited && is_need_shrink) {
                                 tmp_hash_table.hash_table.insert(iter->get_value());
                             }
+                            ++iter;
                         }
                     }
 


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@doris.apache.org
For additional commands, e-mail: commits-help@doris.apache.org