You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@doris.apache.org by mo...@apache.org on 2022/06/17 04:22:17 UTC
[incubator-doris] 03/04: [fix](vectorized) intersect operator takes too long time to execute (#10183)
This is an automated email from the ASF dual-hosted git repository.
morningman pushed a commit to branch dev-1.0.1
in repository https://gitbox.apache.org/repos/asf/incubator-doris.git
commit 334338417218b7eab8d8fa24d30318b44c13aca0
Author: starocean999 <40...@users.noreply.github.com>
AuthorDate: Fri Jun 17 08:43:53 2022 +0800
[fix](vectorized) intersect operator takes too long time to execute (#10183)
* fix itersect operator takes too long time to execute
* modify code based on review comments
---
be/src/vec/common/hash_table/hash_table.h | 7 +++++--
be/src/vec/exec/vset_operation_node.h | 20 ++++++++++++++++----
2 files changed, 21 insertions(+), 6 deletions(-)
diff --git a/be/src/vec/common/hash_table/hash_table.h b/be/src/vec/common/hash_table/hash_table.h
index c55d806699..920c819694 100644
--- a/be/src/vec/common/hash_table/hash_table.h
+++ b/be/src/vec/common/hash_table/hash_table.h
@@ -887,9 +887,12 @@ public:
}
void delete_zero_key(Key key) {
- if (Cell::is_zero(key, *this))
- this->clear_get_has_zero();
+ if (this->get_has_zero() && Cell::is_zero(key, *this)) {
+ --m_size;
+ this->clear_get_has_zero();
+ }
}
+
void clear() {
destroy_elements();
this->clear_get_has_zero();
diff --git a/be/src/vec/exec/vset_operation_node.h b/be/src/vec/exec/vset_operation_node.h
index 1f8519c955..ba4eba3013 100644
--- a/be/src/vec/exec/vset_operation_node.h
+++ b/be/src/vec/exec/vset_operation_node.h
@@ -112,23 +112,35 @@ void VSetOperationNode::refresh_hash_table() {
arg.init_once();
auto& iter = arg.iter;
- for (; iter != arg.hash_table.end(); ++iter) {
+ auto iter_end = arg.hash_table.end();
+ while (iter != iter_end) {
auto& mapped = iter->get_second();
auto it = mapped.begin();
if constexpr (keep_matched) { //intersected
if (it->visited) {
it->visited = false;
- if (is_need_shrink)
+ if (is_need_shrink) {
tmp_hash_table.hash_table.insert(iter->get_value());
+ }
+ ++iter;
} else {
- arg.hash_table.delete_zero_key(iter->get_first());
- iter->set_zero();
+ if (!is_need_shrink) {
+ arg.hash_table.delete_zero_key(iter->get_first());
+ // the ++iter would check if the current key is zero. if it does, the iterator will be moved to the container's head.
+ // so we do ++iter before set_zero to make the iterator move to next valid key correctly.
+ auto iter_prev = iter;
+ ++iter;
+ iter_prev->set_zero();
+ } else {
+ ++iter;
+ }
}
} else { //except
if (!it->visited && is_need_shrink) {
tmp_hash_table.hash_table.insert(iter->get_value());
}
+ ++iter;
}
}
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@doris.apache.org
For additional commands, e-mail: commits-help@doris.apache.org