You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@doris.apache.org by mo...@apache.org on 2022/05/18 08:34:43 UTC

[incubator-doris] 08/09: [fix](storage) low_cardinality_optimize core dump when is null predicate (#9586)

This is an automated email from the ASF dual-hosted git repository.

morningman pushed a commit to branch dev-1.0.1
in repository https://gitbox.apache.org/repos/asf/incubator-doris.git

commit 0562bab929f2d2cb2a4dbec44f8e89560867bbaa
Author: ZenoYang <co...@qq.com>
AuthorDate: Wed May 18 14:57:13 2022 +0800

    [fix](storage) low_cardinality_optimize core dump when is null predicate (#9586)
    
    Issue Number: close #9555
    Make the last value of the dictionary null, when ColumnDict inserts a null value,
    add the encoding corresponding to the last value of the dictionary·
---
 be/src/olap/comparison_predicate.cpp   |  7 +++++--
 be/src/vec/columns/column_dictionary.h | 30 ++++++++++++++----------------
 2 files changed, 19 insertions(+), 18 deletions(-)

diff --git a/be/src/olap/comparison_predicate.cpp b/be/src/olap/comparison_predicate.cpp
index 45a89f92ad..363c75e5b8 100644
--- a/be/src/olap/comparison_predicate.cpp
+++ b/be/src/olap/comparison_predicate.cpp
@@ -147,6 +147,9 @@ COMPARISON_PRED_COLUMN_BLOCK_EVALUATE(GreaterPredicate, >)
 COMPARISON_PRED_COLUMN_BLOCK_EVALUATE(GreaterEqualPredicate, >=)
 
 // todo(zeno) define interface in IColumn to simplify code
+// If 1 OP 0 returns true, it means the predicate is > or >=
+// If 1 OP 1 returns true, it means the predicate is >= or <=
+// by this way, avoid redundant code
 #define COMPARISON_PRED_COLUMN_EVALUATE(CLASS, OP, IS_RANGE)                                       \
     template <class T>                                                                             \
     void CLASS<T>::evaluate(vectorized::IColumn& column, uint16_t* sel, uint16_t* size) const {    \
@@ -164,7 +167,7 @@ COMPARISON_PRED_COLUMN_BLOCK_EVALUATE(GreaterEqualPredicate, >=)
                             vectorized::ColumnDictionary<vectorized::Int32>>(nested_col);          \
                     auto& data_array = nested_col_ptr->get_data();                                 \
                     auto dict_code =                                                               \
-                            IS_RANGE ? nested_col_ptr->find_code_by_bound(_value, 0 OP 1, 1 OP 1)  \
+                            IS_RANGE ? nested_col_ptr->find_code_by_bound(_value, 1 OP 0, 1 OP 1)  \
                                      : nested_col_ptr->find_code(_value);                          \
                     for (uint16_t i = 0; i < *size; i++) {                                         \
                         uint16_t idx = sel[i];                                                     \
@@ -192,7 +195,7 @@ COMPARISON_PRED_COLUMN_BLOCK_EVALUATE(GreaterEqualPredicate, >=)
                 auto& dict_col =                                                                   \
                         reinterpret_cast<vectorized::ColumnDictionary<vectorized::Int32>&>(column);\
                 auto& data_array = dict_col.get_data();                                            \
-                auto dict_code = IS_RANGE ? dict_col.find_code_by_bound(_value, 0 OP 1, 1 OP 1)    \
+                auto dict_code = IS_RANGE ? dict_col.find_code_by_bound(_value, 1 OP 0, 1 OP 1)    \
                                           : dict_col.find_code(_value);                            \
                 for (uint16_t i = 0; i < *size; ++i) {                                             \
                     uint16_t idx = sel[i];                                                         \
diff --git a/be/src/vec/columns/column_dictionary.h b/be/src/vec/columns/column_dictionary.h
index 7d7117aee9..cc27ca1cdb 100644
--- a/be/src/vec/columns/column_dictionary.h
+++ b/be/src/vec/columns/column_dictionary.h
@@ -97,12 +97,10 @@ public:
     }
 
     void insert_data(const char* pos, size_t /*length*/) override {
-        _codes.push_back(unaligned_load<T>(pos));
+        LOG(FATAL) << "insert_data not supported in ColumnDictionary";
     }
 
-    void insert_data(const T value) { _codes.push_back(value); }
-
-    void insert_default() override { _codes.push_back(T()); }
+    void insert_default() override { _codes.push_back(_dict.get_null_code()); }
 
     void clear() override {
         _codes.clear();
@@ -218,13 +216,12 @@ public:
     void insert_many_dict_data(const int32_t* data_array, size_t start_index,
                                const StringRef* dict_array, size_t data_num,
                                uint32_t dict_num) override {
-        if (!is_dict_inited()) {
+        if (_dict.empty()) {
             _dict.reserve(dict_num);
             for (uint32_t i = 0; i < dict_num; ++i) {
                 auto value = StringValue(dict_array[i].data, dict_array[i].size);
                 _dict.insert_value(value);
             }
-            _dict_inited = true;
         }
 
         char* end_ptr = (char*)_codes.get_end_ptr();
@@ -266,8 +263,6 @@ public:
         return _dict.find_codes(values);
     }
 
-    bool is_dict_inited() const { return _dict_inited; }
-
     bool is_dict_sorted() const { return _dict_sorted; }
 
     bool is_dict_code_converted() const { return _dict_code_converted; }
@@ -304,13 +299,17 @@ public:
             if (it != _inverted_index.end()) {
                 return it->second;
             }
-            return -1;
+            return -2; // -1 is null code
+        }
+
+        T get_null_code() { return -1; }
+
+        inline StringValue& get_value(T code) {
+            return code >= _dict_data.size() ? _null_value : _dict_data[code];
         }
 
-        inline StringValue& get_value(T code) { return _dict_data[code]; }
-        
         inline void generate_hash_values() {
-            if (_hash_values.size() == 0) {
+            if (_hash_values.empty()) {
                 _hash_values.resize(_dict_data.size());
                 for (size_t i = 0; i < _dict_data.size(); i++) {
                     auto& sv = _dict_data[i];
@@ -387,7 +386,10 @@ public:
 
         size_t byte_size() { return _dict_data.size() * sizeof(_dict_data[0]); }
 
+        bool empty() { return _dict_data.empty(); }
+
     private:
+        StringValue _null_value = StringValue();
         StringValue::Comparator _comparator;
         // dict code -> dict value
         DictContainer _dict_data;
@@ -405,16 +407,12 @@ public:
 
 private:
     size_t _reserve_size;
-    bool _dict_inited = false;
     bool _dict_sorted = false;
     bool _dict_code_converted = false;
     Dictionary _dict;
     Container _codes;
 };
 
-template class ColumnDictionary<uint8_t>;
-template class ColumnDictionary<uint16_t>;
-template class ColumnDictionary<uint32_t>;
 template class ColumnDictionary<int32_t>;
 
 using ColumnDictI32 = vectorized::ColumnDictionary<doris::vectorized::Int32>;


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@doris.apache.org
For additional commands, e-mail: commits-help@doris.apache.org