You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@doris.apache.org by yi...@apache.org on 2022/06/23 04:29:18 UTC
[doris] branch master updated: [optimize](storage)optimize date in storage layer (#8967)
This is an automated email from the ASF dual-hosted git repository.
yiguolei pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new d73f170eeb [optimize](storage)optimize date in storage layer (#8967)
d73f170eeb is described below
commit d73f170eeb5a9cc7194d5eb55df33eb7ccacbba7
Author: wangbo <wa...@apache.org>
AuthorDate: Thu Jun 23 12:29:10 2022 +0800
[optimize](storage)optimize date in storage layer (#8967)
* opt date in storage
* code style
Co-authored-by: Wang Bo <wa...@meituan.com>
---
be/src/olap/comparison_predicate.cpp | 45 ++++++++++++++++++----
be/src/olap/in_list_predicate.h | 32 ++++++++++++++-
be/src/olap/rowset/segment_v2/segment_iterator.cpp | 2 +-
be/src/olap/schema.cpp | 2 +-
be/src/olap/uint24.h | 2 +
be/src/vec/columns/column_vector.h | 19 +++++----
be/src/vec/columns/predicate_column.h | 36 +++++++++++++++++
be/src/vec/runtime/vdatetime_value.h | 13 +++++++
8 files changed, 131 insertions(+), 20 deletions(-)
diff --git a/be/src/olap/comparison_predicate.cpp b/be/src/olap/comparison_predicate.cpp
index fc50c354fd..91ef9f7156 100644
--- a/be/src/olap/comparison_predicate.cpp
+++ b/be/src/olap/comparison_predicate.cpp
@@ -224,6 +224,9 @@ COMPARISON_PRED_COLUMN_EVALUATE(LessEqualPredicate, <=, true)
COMPARISON_PRED_COLUMN_EVALUATE(GreaterPredicate, >, true)
COMPARISON_PRED_COLUMN_EVALUATE(GreaterEqualPredicate, >=, true)
+// todo(wb) for date type we use uint32_t to save it but using Predicate<uint24> to evaluate it.
+// This is done for compatibility with Row Version predicate.
+// We can use Predicate<uint32_t> for date after Row Version is removed.
#define COMPARISON_PRED_COLUMN_EVALUATE_VEC(CLASS, OP) \
template <class T> \
void CLASS<T>::evaluate_vec(vectorized::IColumn& column, uint16_t size, bool* flags) const { \
@@ -236,15 +239,40 @@ COMPARISON_PRED_COLUMN_EVALUATE(GreaterEqualPredicate, >=, true)
auto& null_bitmap = reinterpret_cast<const vectorized::ColumnVector<uint8_t>&>( \
*(nullable_column->get_null_map_column_ptr())) \
.get_data(); \
- for (uint16_t i = 0; i < size; i++) { \
- flags[i] = (data_array[i] OP _value) && (!null_bitmap[i]); \
+ if constexpr (std::is_same_v<T, uint24_t>) { \
+ auto& predicate_column = \
+ reinterpret_cast<const vectorized::PredicateColumnType<uint32_t>&>( \
+ nullable_column->get_nested_column()); \
+ uint32_t int32_val = 0; \
+ char* int32_val_ptr = (char*)&int32_val; \
+ memory_copy(int32_val_ptr, _value.get_data(), sizeof(uint24_t)); \
+ auto& data_array_uint32_t = predicate_column.get_data(); \
+ for (uint16_t i = 0; i < size; i++) { \
+ flags[i] = (data_array_uint32_t[i] OP int32_val) && (!null_bitmap[i]); \
+ } \
+ } else { \
+ for (uint16_t i = 0; i < size; i++) { \
+ flags[i] = (data_array[i] OP _value) && (!null_bitmap[i]); \
+ } \
} \
} else { \
- auto& predicate_column = \
- reinterpret_cast<vectorized::PredicateColumnType<T>&>(column); \
- auto& data_array = predicate_column.get_data(); \
- for (uint16_t i = 0; i < size; i++) { \
- flags[i] = data_array[i] OP _value; \
+ if constexpr (std::is_same_v<T, uint24_t>) { \
+ auto& predicate_column = \
+ reinterpret_cast<vectorized::PredicateColumnType<uint32_t>&>(column); \
+ uint32_t int32_val = 0; \
+ char* int32_val_ptr = (char*)&int32_val; \
+ memory_copy(int32_val_ptr, _value.get_data(), sizeof(uint24_t)); \
+ auto& data_array = predicate_column.get_data(); \
+ for (uint16_t i = 0; i < size; i++) { \
+ flags[i] = data_array[i] OP int32_val; \
+ } \
+ } else { \
+ auto& predicate_column = \
+ reinterpret_cast<vectorized::PredicateColumnType<T>&>(column); \
+ auto& data_array = predicate_column.get_data(); \
+ for (uint16_t i = 0; i < size; i++) { \
+ flags[i] = data_array[i] OP _value; \
+ } \
} \
} \
if (_opposite) { \
@@ -502,6 +530,7 @@ COMPARISON_PRED_BITMAP_EVALUATE(GreaterEqualPredicate, >=)
template CLASS<decimal12_t>::CLASS(uint32_t column_id, const decimal12_t& value, \
bool opposite); \
template CLASS<uint24_t>::CLASS(uint32_t column_id, const uint24_t& value, bool opposite); \
+ template CLASS<uint32_t>::CLASS(uint32_t column_id, const uint32_t& value, bool opposite); \
template CLASS<uint64_t>::CLASS(uint32_t column_id, const uint64_t& value, bool opposite); \
template CLASS<bool>::CLASS(uint32_t column_id, const bool& value, bool opposite);
@@ -663,6 +692,8 @@ COMPARISON_PRED_COLUMN_EVALUATE_DECLARATION(GreaterEqualPredicate)
bool* flags) const; \
template void CLASS<uint24_t>::evaluate_vec(vectorized::IColumn& column, uint16_t size, \
bool* flags) const; \
+ template void CLASS<uint32_t>::evaluate_vec(vectorized::IColumn& column, uint16_t size, \
+ bool* flags) const; \
template void CLASS<uint64_t>::evaluate_vec(vectorized::IColumn& column, uint16_t size, \
bool* flags) const; \
template void CLASS<bool>::evaluate_vec(vectorized::IColumn& column, uint16_t size, \
diff --git a/be/src/olap/in_list_predicate.h b/be/src/olap/in_list_predicate.h
index e39686abd4..04ec211568 100644
--- a/be/src/olap/in_list_predicate.h
+++ b/be/src/olap/in_list_predicate.h
@@ -290,7 +290,37 @@ private:
uint16_t* sel, uint16_t size) const {
uint16_t new_size = 0;
- if (column->is_column_dictionary()) {
+ if constexpr (std::is_same_v<T, uint24_t>) {
+ auto* nested_col_ptr =
+ vectorized::check_and_get_column<vectorized::PredicateColumnType<uint32_t>>(
+ column);
+ auto& data_array = nested_col_ptr->get_data();
+
+ uint24_t tmp_uint24_value;
+ for (uint16_t i = 0; i < size; i++) {
+ uint16_t idx = sel[i];
+ if constexpr (is_nullable) {
+ if ((*null_map)[idx]) {
+ if constexpr (is_opposite) {
+ sel[new_size++] = idx;
+ }
+ continue;
+ }
+ }
+
+ memcpy((char*)(&tmp_uint24_value), (char*)(&(data_array[idx])), sizeof(uint24_t));
+ if constexpr (!is_opposite) {
+ if (_operator(_values.find(tmp_uint24_value), _values.end())) {
+ sel[new_size++] = idx;
+ }
+ } else {
+ if (!_operator(_values.find(tmp_uint24_value), _values.end())) {
+ sel[new_size++] = idx;
+ }
+ }
+ }
+
+ } else if (column->is_column_dictionary()) {
if constexpr (std::is_same_v<T, StringValue>) {
auto* nested_col_ptr = vectorized::check_and_get_column<
vectorized::ColumnDictionary<vectorized::Int32>>(column);
diff --git a/be/src/olap/rowset/segment_v2/segment_iterator.cpp b/be/src/olap/rowset/segment_v2/segment_iterator.cpp
index 14e5b92935..b3945e11ec 100644
--- a/be/src/olap/rowset/segment_v2/segment_iterator.cpp
+++ b/be/src/olap/rowset/segment_v2/segment_iterator.cpp
@@ -661,7 +661,7 @@ void SegmentIterator::_vec_init_lazy_materialization() {
predicate->type() == PredicateType::IN_LIST ||
predicate->type() == PredicateType::NOT_IN_LIST ||
predicate->type() == PredicateType::IS_NULL ||
- predicate->type() == PredicateType::IS_NOT_NULL || type == OLAP_FIELD_TYPE_DATE ||
+ predicate->type() == PredicateType::IS_NOT_NULL ||
type == OLAP_FIELD_TYPE_DECIMAL) {
short_cir_pred_col_id_set.insert(cid);
_short_cir_eval_predicate.push_back(predicate);
diff --git a/be/src/olap/schema.cpp b/be/src/olap/schema.cpp
index 8c218ab8ed..a5e7896147 100644
--- a/be/src/olap/schema.cpp
+++ b/be/src/olap/schema.cpp
@@ -151,7 +151,7 @@ vectorized::IColumn::MutablePtr Schema::get_predicate_column_ptr(FieldType type)
return doris::vectorized::PredicateColumnType<doris::vectorized::Int128>::create();
case OLAP_FIELD_TYPE_DATE:
- return doris::vectorized::PredicateColumnType<uint24_t>::create();
+ return doris::vectorized::PredicateColumnType<uint32_t>::create();
case OLAP_FIELD_TYPE_DATETIME:
return doris::vectorized::PredicateColumnType<uint64_t>::create();
diff --git a/be/src/olap/uint24.h b/be/src/olap/uint24.h
index 1605d893c9..f56ca7ddc6 100644
--- a/be/src/olap/uint24.h
+++ b/be/src/olap/uint24.h
@@ -140,6 +140,8 @@ public:
return std::string(buf);
}
+ const uint8_t* get_data() const { return data; }
+
private:
uint8_t data[3];
} __attribute__((packed));
diff --git a/be/src/vec/columns/column_vector.h b/be/src/vec/columns/column_vector.h
index e57ffe4a9c..1d42455c76 100644
--- a/be/src/vec/columns/column_vector.h
+++ b/be/src/vec/columns/column_vector.h
@@ -169,17 +169,16 @@ public:
}
void insert_date_column(const char* data_ptr, size_t num) {
- size_t value_size = sizeof(uint24_t);
+ size_t input_value_size = sizeof(uint24_t);
+
for (int i = 0; i < num; i++) {
- const char* cur_ptr = data_ptr + value_size * i;
- uint64_t value = 0;
- value = *(unsigned char*)(cur_ptr + 2);
- value <<= 8;
- value |= *(unsigned char*)(cur_ptr + 1);
- value <<= 8;
- value |= *(unsigned char*)(cur_ptr);
- vectorized::VecDateTimeValue date = VecDateTimeValue::create_from_olap_date(value);
- this->insert_data(reinterpret_cast<char*>(&date), 0);
+ uint64_t val = 0;
+ memcpy((char*)(&val), data_ptr, input_value_size);
+ data_ptr += input_value_size;
+
+ VecDateTimeValue date;
+ date.set_olap_date(val);
+ data.push_back_without_reserve(unaligned_load<Int64>(reinterpret_cast<char*>(&date)));
}
}
diff --git a/be/src/vec/columns/predicate_column.h b/be/src/vec/columns/predicate_column.h
index eec3f1def7..27b1b7e5e0 100644
--- a/be/src/vec/columns/predicate_column.h
+++ b/be/src/vec/columns/predicate_column.h
@@ -63,6 +63,20 @@ private:
}
}
+ void insert_date32_to_res_column(const uint16_t* sel, size_t sel_size,
+ vectorized::ColumnVector<Int64>* res_ptr) {
+ res_ptr->reserve(sel_size);
+ auto& res_data = res_ptr->get_data();
+
+ for (size_t i = 0; i < sel_size; i++) {
+ uint64_t val = data[sel[i]];
+ VecDateTimeValue date;
+ date.set_olap_date(val);
+ res_data.push_back_without_reserve(
+ unaligned_load<Int64>(reinterpret_cast<char*>(&date)));
+ }
+ }
+
void insert_datetime_to_res_column(const uint16_t* sel, size_t sel_size,
vectorized::ColumnVector<Int64>* res_ptr) {
for (size_t i = 0; i < sel_size; i++) {
@@ -205,6 +219,21 @@ public:
}
}
+ void insert_many_date(const char* data_ptr, size_t num) {
+ size_t intput_type_size = sizeof(uint24_t);
+ size_t res_type_size = sizeof(uint32_t);
+ char* input_data_ptr = const_cast<char*>(data_ptr);
+
+ char* res_ptr = (char*)data.get_end_ptr();
+ memset(res_ptr, 0, res_type_size * num);
+ for (int i = 0; i < num; i++) {
+ memcpy(res_ptr, input_data_ptr, intput_type_size);
+ res_ptr += res_type_size;
+ input_data_ptr += intput_type_size;
+ }
+ data.set_end_ptr(res_ptr);
+ }
+
void insert_many_fix_len_data(const char* data_ptr, size_t num) override {
if constexpr (std::is_same_v<T, decimal12_t>) {
insert_many_in_copy_way(data_ptr, num);
@@ -212,6 +241,10 @@ public:
insert_many_in_copy_way(data_ptr, num);
} else if constexpr (std::is_same_v<T, StringValue>) {
// here is unreachable, just for compilation to be able to pass
+ } else if constexpr (std::is_same_v<
+ T,
+ uint32_t>) { // todo(wb) a trick type judge here,need refactor
+ insert_many_date(data_ptr, num);
} else {
insert_many_default_type(data_ptr, num);
}
@@ -405,6 +438,9 @@ public:
} else if constexpr (std::is_same_v<T, uint24_t>) {
insert_date_to_res_column(sel, sel_size,
reinterpret_cast<vectorized::ColumnVector<Int64>*>(col_ptr));
+ } else if constexpr (std::is_same_v<T, uint32_t>) { // a trick type judge, need refactor it.
+ insert_date32_to_res_column(
+ sel, sel_size, reinterpret_cast<vectorized::ColumnVector<Int64>*>(col_ptr));
} else if constexpr (std::is_same_v<T, doris::vectorized::Int128>) {
insert_default_value_res_column(
sel, sel_size,
diff --git a/be/src/vec/runtime/vdatetime_value.h b/be/src/vec/runtime/vdatetime_value.h
index 126b36f5b4..4c1f4cc137 100644
--- a/be/src/vec/runtime/vdatetime_value.h
+++ b/be/src/vec/runtime/vdatetime_value.h
@@ -223,6 +223,19 @@ public:
return check_range_and_set_time(year, month, day, hour, minute, second, _type);
}
+ //note(wb) not check in this method
+ void inline set_olap_date(uint64_t olap_date_val) {
+ _neg = 0;
+ _type = TIME_DATE;
+
+ _day = olap_date_val & 0x1f;
+ _month = (olap_date_val >> 5) & 0x0f;
+ _year = olap_date_val >> 9;
+ _hour = 0;
+ _minute = 0;
+ _second = 0;
+ }
+
uint64_t to_olap_date() const {
uint64_t val;
val = _year;
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@doris.apache.org
For additional commands, e-mail: commits-help@doris.apache.org