You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@doris.apache.org by yi...@apache.org on 2022/06/08 16:25:36 UTC
[incubator-doris] branch master updated: [enhance] improve dict in-predicate evaluate (#10009)
This is an automated email from the ASF dual-hosted git repository.
yiguolei pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-doris.git
The following commit(s) were added to refs/heads/master by this push:
new 9c52b4a508 [enhance] improve dict in-predicate evaluate (#10009)
9c52b4a508 is described below
commit 9c52b4a508f7c4865ae077e286ea0b7794efa03b
Author: minghong <mi...@163.com>
AuthorDate: Thu Jun 9 00:25:30 2022 +0800
[enhance] improve dict in-predicate evaluate (#10009)
---
be/src/olap/in_list_predicate.cpp | 13 ++++++++-----
be/src/vec/columns/column_dictionary.h | 17 +++++++++--------
2 files changed, 17 insertions(+), 13 deletions(-)
diff --git a/be/src/olap/in_list_predicate.cpp b/be/src/olap/in_list_predicate.cpp
index b33ef09c49..82c8241368 100644
--- a/be/src/olap/in_list_predicate.cpp
+++ b/be/src/olap/in_list_predicate.cpp
@@ -132,13 +132,14 @@ IN_LIST_PRED_COLUMN_BLOCK_EVALUATE(NotInListPredicate, ==)
auto* nested_col_ptr = vectorized::check_and_get_column< \
vectorized::ColumnDictionary<vectorized::Int32>>(nested_col); \
auto& data_array = nested_col_ptr->get_data(); \
- auto dict_codes = nested_col_ptr->find_codes(_values); \
+ std::vector<bool> selected; \
+ nested_col_ptr->find_codes(_values, selected); \
for (uint16_t i = 0; i < *size; i++) { \
uint16_t idx = sel[i]; \
sel[new_size] = idx; \
const auto& cell_value = data_array[idx]; \
- bool ret = !null_bitmap[idx] && \
- (dict_codes.find(cell_value) OP dict_codes.end()); \
+ DCHECK(cell_value < selected.size()); \
+ bool ret = !null_bitmap[idx] && (selected[cell_value] OP false); \
new_size += _opposite ? !ret : ret; \
} \
} \
@@ -161,12 +162,14 @@ IN_LIST_PRED_COLUMN_BLOCK_EVALUATE(NotInListPredicate, ==)
reinterpret_cast<vectorized::ColumnDictionary<vectorized::Int32>&>( \
column); \
auto& data_array = dict_col.get_data(); \
- auto dict_codes = dict_col.find_codes(_values); \
+ std::vector<bool> selected; \
+ dict_col.find_codes(_values, selected); \
for (uint16_t i = 0; i < *size; i++) { \
uint16_t idx = sel[i]; \
sel[new_size] = idx; \
const auto& cell_value = data_array[idx]; \
- auto result = (dict_codes.find(cell_value) OP dict_codes.end()); \
+ DCHECK(cell_value < selected.size()); \
+ auto result = (selected[cell_value] OP false); \
new_size += _opposite ? !result : result; \
} \
} \
diff --git a/be/src/vec/columns/column_dictionary.h b/be/src/vec/columns/column_dictionary.h
index 532d7eb6a7..29db3a334c 100644
--- a/be/src/vec/columns/column_dictionary.h
+++ b/be/src/vec/columns/column_dictionary.h
@@ -258,9 +258,9 @@ public:
uint32_t get_hash_value(uint32_t idx) const { return _dict.get_hash_value(_codes[idx]); }
- phmap::flat_hash_set<int32_t> find_codes(
- const phmap::flat_hash_set<StringValue>& values) const {
- return _dict.find_codes(values);
+ void find_codes(const phmap::flat_hash_set<StringValue>& values,
+ std::vector<bool>& selected) const {
+ return _dict.find_codes(values, selected);
}
bool is_dict_sorted() const { return _dict_sorted; }
@@ -362,16 +362,17 @@ public:
return greater ? bound - greater + eq : bound - eq;
}
- phmap::flat_hash_set<int32_t> find_codes(
- const phmap::flat_hash_set<StringValue>& values) const {
- phmap::flat_hash_set<int32_t> code_set;
+ void find_codes(const phmap::flat_hash_set<StringValue>& values,
+ std::vector<bool>& selected) const {
+ size_t dict_word_num = _dict_data.size();
+ selected.resize(dict_word_num);
+ selected.assign(dict_word_num, false);
for (const auto& value : values) {
auto it = _inverted_index.find(value);
if (it != _inverted_index.end()) {
- code_set.insert(it->second);
+ selected[it->second] = true;
}
}
- return code_set;
}
void clear() {
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@doris.apache.org
For additional commands, e-mail: commits-help@doris.apache.org