You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@doris.apache.org by mo...@apache.org on 2022/05/18 08:34:43 UTC
[incubator-doris] 08/09: [fix](storage) low_cardinality_optimize core dump when is null predicate (#9586)
This is an automated email from the ASF dual-hosted git repository.
morningman pushed a commit to branch dev-1.0.1
in repository https://gitbox.apache.org/repos/asf/incubator-doris.git
commit 0562bab929f2d2cb2a4dbec44f8e89560867bbaa
Author: ZenoYang <co...@qq.com>
AuthorDate: Wed May 18 14:57:13 2022 +0800
[fix](storage) low_cardinality_optimize core dump when is null predicate (#9586)
Issue Number: close #9555
Make the last value of the dictionary null, when ColumnDict inserts a null value,
add the encoding corresponding to the last value of the dictionary·
---
be/src/olap/comparison_predicate.cpp | 7 +++++--
be/src/vec/columns/column_dictionary.h | 30 ++++++++++++++----------------
2 files changed, 19 insertions(+), 18 deletions(-)
diff --git a/be/src/olap/comparison_predicate.cpp b/be/src/olap/comparison_predicate.cpp
index 45a89f92ad..363c75e5b8 100644
--- a/be/src/olap/comparison_predicate.cpp
+++ b/be/src/olap/comparison_predicate.cpp
@@ -147,6 +147,9 @@ COMPARISON_PRED_COLUMN_BLOCK_EVALUATE(GreaterPredicate, >)
COMPARISON_PRED_COLUMN_BLOCK_EVALUATE(GreaterEqualPredicate, >=)
// todo(zeno) define interface in IColumn to simplify code
+// If 1 OP 0 returns true, it means the predicate is > or >=
+// If 1 OP 1 returns true, it means the predicate is >= or <=
+// by this way, avoid redundant code
#define COMPARISON_PRED_COLUMN_EVALUATE(CLASS, OP, IS_RANGE) \
template <class T> \
void CLASS<T>::evaluate(vectorized::IColumn& column, uint16_t* sel, uint16_t* size) const { \
@@ -164,7 +167,7 @@ COMPARISON_PRED_COLUMN_BLOCK_EVALUATE(GreaterEqualPredicate, >=)
vectorized::ColumnDictionary<vectorized::Int32>>(nested_col); \
auto& data_array = nested_col_ptr->get_data(); \
auto dict_code = \
- IS_RANGE ? nested_col_ptr->find_code_by_bound(_value, 0 OP 1, 1 OP 1) \
+ IS_RANGE ? nested_col_ptr->find_code_by_bound(_value, 1 OP 0, 1 OP 1) \
: nested_col_ptr->find_code(_value); \
for (uint16_t i = 0; i < *size; i++) { \
uint16_t idx = sel[i]; \
@@ -192,7 +195,7 @@ COMPARISON_PRED_COLUMN_BLOCK_EVALUATE(GreaterEqualPredicate, >=)
auto& dict_col = \
reinterpret_cast<vectorized::ColumnDictionary<vectorized::Int32>&>(column);\
auto& data_array = dict_col.get_data(); \
- auto dict_code = IS_RANGE ? dict_col.find_code_by_bound(_value, 0 OP 1, 1 OP 1) \
+ auto dict_code = IS_RANGE ? dict_col.find_code_by_bound(_value, 1 OP 0, 1 OP 1) \
: dict_col.find_code(_value); \
for (uint16_t i = 0; i < *size; ++i) { \
uint16_t idx = sel[i]; \
diff --git a/be/src/vec/columns/column_dictionary.h b/be/src/vec/columns/column_dictionary.h
index 7d7117aee9..cc27ca1cdb 100644
--- a/be/src/vec/columns/column_dictionary.h
+++ b/be/src/vec/columns/column_dictionary.h
@@ -97,12 +97,10 @@ public:
}
void insert_data(const char* pos, size_t /*length*/) override {
- _codes.push_back(unaligned_load<T>(pos));
+ LOG(FATAL) << "insert_data not supported in ColumnDictionary";
}
- void insert_data(const T value) { _codes.push_back(value); }
-
- void insert_default() override { _codes.push_back(T()); }
+ void insert_default() override { _codes.push_back(_dict.get_null_code()); }
void clear() override {
_codes.clear();
@@ -218,13 +216,12 @@ public:
void insert_many_dict_data(const int32_t* data_array, size_t start_index,
const StringRef* dict_array, size_t data_num,
uint32_t dict_num) override {
- if (!is_dict_inited()) {
+ if (_dict.empty()) {
_dict.reserve(dict_num);
for (uint32_t i = 0; i < dict_num; ++i) {
auto value = StringValue(dict_array[i].data, dict_array[i].size);
_dict.insert_value(value);
}
- _dict_inited = true;
}
char* end_ptr = (char*)_codes.get_end_ptr();
@@ -266,8 +263,6 @@ public:
return _dict.find_codes(values);
}
- bool is_dict_inited() const { return _dict_inited; }
-
bool is_dict_sorted() const { return _dict_sorted; }
bool is_dict_code_converted() const { return _dict_code_converted; }
@@ -304,13 +299,17 @@ public:
if (it != _inverted_index.end()) {
return it->second;
}
- return -1;
+ return -2; // -1 is null code
+ }
+
+ T get_null_code() { return -1; }
+
+ inline StringValue& get_value(T code) {
+ return code >= _dict_data.size() ? _null_value : _dict_data[code];
}
- inline StringValue& get_value(T code) { return _dict_data[code]; }
-
inline void generate_hash_values() {
- if (_hash_values.size() == 0) {
+ if (_hash_values.empty()) {
_hash_values.resize(_dict_data.size());
for (size_t i = 0; i < _dict_data.size(); i++) {
auto& sv = _dict_data[i];
@@ -387,7 +386,10 @@ public:
size_t byte_size() { return _dict_data.size() * sizeof(_dict_data[0]); }
+ bool empty() { return _dict_data.empty(); }
+
private:
+ StringValue _null_value = StringValue();
StringValue::Comparator _comparator;
// dict code -> dict value
DictContainer _dict_data;
@@ -405,16 +407,12 @@ public:
private:
size_t _reserve_size;
- bool _dict_inited = false;
bool _dict_sorted = false;
bool _dict_code_converted = false;
Dictionary _dict;
Container _codes;
};
-template class ColumnDictionary<uint8_t>;
-template class ColumnDictionary<uint16_t>;
-template class ColumnDictionary<uint32_t>;
template class ColumnDictionary<int32_t>;
using ColumnDictI32 = vectorized::ColumnDictionary<doris::vectorized::Int32>;
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@doris.apache.org
For additional commands, e-mail: commits-help@doris.apache.org