You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@doris.apache.org by yi...@apache.org on 2022/06/08 16:25:36 UTC

[incubator-doris] branch master updated: [enhance] improve dict in-predicate evaluate (#10009)

This is an automated email from the ASF dual-hosted git repository.

yiguolei pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-doris.git


The following commit(s) were added to refs/heads/master by this push:
     new 9c52b4a508 [enhance] improve dict in-predicate evaluate (#10009)
9c52b4a508 is described below

commit 9c52b4a508f7c4865ae077e286ea0b7794efa03b
Author: minghong <mi...@163.com>
AuthorDate: Thu Jun 9 00:25:30 2022 +0800

    [enhance] improve dict in-predicate evaluate (#10009)
---
 be/src/olap/in_list_predicate.cpp      | 13 ++++++++-----
 be/src/vec/columns/column_dictionary.h | 17 +++++++++--------
 2 files changed, 17 insertions(+), 13 deletions(-)

diff --git a/be/src/olap/in_list_predicate.cpp b/be/src/olap/in_list_predicate.cpp
index b33ef09c49..82c8241368 100644
--- a/be/src/olap/in_list_predicate.cpp
+++ b/be/src/olap/in_list_predicate.cpp
@@ -132,13 +132,14 @@ IN_LIST_PRED_COLUMN_BLOCK_EVALUATE(NotInListPredicate, ==)
                     auto* nested_col_ptr = vectorized::check_and_get_column<                     \
                             vectorized::ColumnDictionary<vectorized::Int32>>(nested_col);        \
                     auto& data_array = nested_col_ptr->get_data();                               \
-                    auto dict_codes = nested_col_ptr->find_codes(_values);                       \
+                    std::vector<bool> selected;                                                  \
+                    nested_col_ptr->find_codes(_values, selected);                               \
                     for (uint16_t i = 0; i < *size; i++) {                                       \
                         uint16_t idx = sel[i];                                                   \
                         sel[new_size] = idx;                                                     \
                         const auto& cell_value = data_array[idx];                                \
-                        bool ret = !null_bitmap[idx] &&                                          \
-                                   (dict_codes.find(cell_value) OP dict_codes.end());            \
+                        DCHECK(cell_value < selected.size());                                    \
+                        bool ret = !null_bitmap[idx] && (selected[cell_value] OP false);         \
                         new_size += _opposite ? !ret : ret;                                      \
                     }                                                                            \
                 }                                                                                \
@@ -161,12 +162,14 @@ IN_LIST_PRED_COLUMN_BLOCK_EVALUATE(NotInListPredicate, ==)
                         reinterpret_cast<vectorized::ColumnDictionary<vectorized::Int32>&>(      \
                                 column);                                                         \
                 auto& data_array = dict_col.get_data();                                          \
-                auto dict_codes = dict_col.find_codes(_values);                                  \
+                std::vector<bool> selected;                                                      \
+                dict_col.find_codes(_values, selected);                                          \
                 for (uint16_t i = 0; i < *size; i++) {                                           \
                     uint16_t idx = sel[i];                                                       \
                     sel[new_size] = idx;                                                         \
                     const auto& cell_value = data_array[idx];                                    \
-                    auto result = (dict_codes.find(cell_value) OP dict_codes.end());             \
+                    DCHECK(cell_value < selected.size());                                        \
+                    auto result = (selected[cell_value] OP false);                               \
                     new_size += _opposite ? !result : result;                                    \
                 }                                                                                \
             }                                                                                    \
diff --git a/be/src/vec/columns/column_dictionary.h b/be/src/vec/columns/column_dictionary.h
index 532d7eb6a7..29db3a334c 100644
--- a/be/src/vec/columns/column_dictionary.h
+++ b/be/src/vec/columns/column_dictionary.h
@@ -258,9 +258,9 @@ public:
 
     uint32_t get_hash_value(uint32_t idx) const { return _dict.get_hash_value(_codes[idx]); }
 
-    phmap::flat_hash_set<int32_t> find_codes(
-            const phmap::flat_hash_set<StringValue>& values) const {
-        return _dict.find_codes(values);
+    void find_codes(const phmap::flat_hash_set<StringValue>& values,
+                    std::vector<bool>& selected) const {
+        return _dict.find_codes(values, selected);
     }
 
     bool is_dict_sorted() const { return _dict_sorted; }
@@ -362,16 +362,17 @@ public:
             return greater ? bound - greater + eq : bound - eq;
         }
 
-        phmap::flat_hash_set<int32_t> find_codes(
-                const phmap::flat_hash_set<StringValue>& values) const {
-            phmap::flat_hash_set<int32_t> code_set;
+        void find_codes(const phmap::flat_hash_set<StringValue>& values,
+                        std::vector<bool>& selected) const {
+            size_t dict_word_num = _dict_data.size();
+            selected.resize(dict_word_num);
+            selected.assign(dict_word_num, false);
             for (const auto& value : values) {
                 auto it = _inverted_index.find(value);
                 if (it != _inverted_index.end()) {
-                    code_set.insert(it->second);
+                    selected[it->second] = true;
                 }
             }
-            return code_set;
         }
 
         void clear() {


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@doris.apache.org
For additional commands, e-mail: commits-help@doris.apache.org