You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@doris.apache.org by mo...@apache.org on 2022/04/18 11:34:11 UTC
[incubator-doris] branch master updated: [fix](storage)(vectorized) query get wrong result when read datetime type column (#8872)
This is an automated email from the ASF dual-hosted git repository.
morningman pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-doris.git
The following commit(s) were added to refs/heads/master by this push:
new 681f960257 [fix](storage)(vectorized) query get wrong result when read datetime type column (#8872)
681f960257 is described below
commit 681f9602575492fd2a2c81bff5a13ddf61401ab0
Author: Pxl <95...@qq.com>
AuthorDate: Mon Apr 18 19:34:06 2022 +0800
[fix](storage)(vectorized) query get wrong result when read datetime type column (#8872)
---
be/src/olap/row_block2.cpp | 25 ++--
.../aggregate_function_window_funnel.h | 6 +-
be/src/vec/columns/column_vector.h | 9 +-
be/src/vec/columns/predicate_column.h | 8 +-
be/src/vec/exec/volap_scanner.cpp | 135 ---------------------
be/src/vec/exec/volap_scanner.h | 2 -
be/src/vec/functions/function_rpc.cpp | 14 ++-
be/src/vec/functions/function_timestamp.cpp | 5 +-
be/src/vec/runtime/vdatetime_value.h | 28 ++++-
9 files changed, 64 insertions(+), 168 deletions(-)
diff --git a/be/src/olap/row_block2.cpp b/be/src/olap/row_block2.cpp
index 4f2feec37b..8beca02192 100644
--- a/be/src/olap/row_block2.cpp
+++ b/be/src/olap/row_block2.cpp
@@ -236,11 +236,12 @@ Status RowBlockV2::_copy_data_to_column(int cid,
value |= *(unsigned char*)(ptr + 1);
value <<= 8;
value |= *(unsigned char*)(ptr);
- vectorized::VecDateTimeValue date;
- date.from_olap_date(value);
+ vectorized::VecDateTimeValue date =
+ vectorized::VecDateTimeValue::create_from_olap_date(value);
(column_int)->insert_data(reinterpret_cast<char*>(&date), 0);
- } else
+ } else {
column_int->insert_default();
+ }
}
break;
}
@@ -253,9 +254,9 @@ Status RowBlockV2::_copy_data_to_column(int cid,
auto ptr = reinterpret_cast<const char*>(column_block(cid).cell_ptr(row_idx));
uint64_t value = *reinterpret_cast<const uint64_t*>(ptr);
- vectorized::VecDateTimeValue data;
- data.from_olap_datetime(value);
- (column_int)->insert_data(reinterpret_cast<char*>(&data), 0);
+ vectorized::VecDateTimeValue datetime =
+ vectorized::VecDateTimeValue::create_from_olap_datetime(value);
+ (column_int)->insert_data(reinterpret_cast<char*>(&datetime), 0);
} else {
column_int->insert_default();
}
@@ -498,11 +499,12 @@ Status RowBlockV2::_append_data_to_column(const ColumnVectorBatch* batch, size_t
value |= *(unsigned char*)(ptr + 1);
value <<= 8;
value |= *(unsigned char*)(ptr);
- vectorized::VecDateTimeValue date;
- date.from_olap_date(value);
+ vectorized::VecDateTimeValue date =
+ vectorized::VecDateTimeValue::create_from_olap_date(value);
(column_int)->insert_data(reinterpret_cast<char*>(&date), 0);
- } else
+ } else {
column_int->insert_default();
+ }
}
break;
}
@@ -515,8 +517,9 @@ Status RowBlockV2::_append_data_to_column(const ColumnVectorBatch* batch, size_t
auto ptr = reinterpret_cast<const char*>(batch->cell_ptr(row_idx));
uint64_t value = *reinterpret_cast<const uint64_t*>(ptr);
- vectorized::VecDateTimeValue data(value);
- (column_int)->insert_data(reinterpret_cast<char*>(&data), 0);
+ vectorized::VecDateTimeValue datetime =
+ vectorized::VecDateTimeValue::create_from_olap_datetime(value);
+ (column_int)->insert_data(reinterpret_cast<char*>(&datetime), 0);
} else {
column_int->insert_default();
}
diff --git a/be/src/vec/aggregate_functions/aggregate_function_window_funnel.h b/be/src/vec/aggregate_functions/aggregate_function_window_funnel.h
index f4364eebb4..8f42a3398c 100644
--- a/be/src/vec/aggregate_functions/aggregate_function_window_funnel.h
+++ b/be/src/vec/aggregate_functions/aggregate_function_window_funnel.h
@@ -132,7 +132,8 @@ struct WindowFunnelState {
write_var_int(events.size(), out);
for (int64_t i = 0; i < events.size(); i++) {
- int64_t timestamp = events[i].first;
+ int64_t timestamp =
+ binary_cast<vectorized::VecDateTimeValue, vectorized::Int64>(events[i].first);
int event_idx = events[i].second;
write_var_int(timestamp, out);
write_var_int(event_idx, out);
@@ -152,7 +153,8 @@ struct WindowFunnelState {
read_var_int(timestamp, in);
read_var_int(event_idx, in);
- VecDateTimeValue time_value(timestamp);
+ VecDateTimeValue time_value =
+ binary_cast<vectorized::Int64, vectorized::VecDateTimeValue>(timestamp);
add(time_value, (int)event_idx, max_event_level, window);
}
}
diff --git a/be/src/vec/columns/column_vector.h b/be/src/vec/columns/column_vector.h
index 216e8fd0ed..e57ffe4a9c 100644
--- a/be/src/vec/columns/column_vector.h
+++ b/be/src/vec/columns/column_vector.h
@@ -21,6 +21,7 @@
#pragma once
#include <cmath>
+#include <type_traits>
#include "olap/uint24.h"
#include "vec/columns/column.h"
@@ -177,8 +178,7 @@ public:
value |= *(unsigned char*)(cur_ptr + 1);
value <<= 8;
value |= *(unsigned char*)(cur_ptr);
- vectorized::VecDateTimeValue date;
- date.from_olap_date(value);
+ vectorized::VecDateTimeValue date = VecDateTimeValue::create_from_olap_date(value);
this->insert_data(reinterpret_cast<char*>(&date), 0);
}
}
@@ -188,8 +188,9 @@ public:
for (int i = 0; i < num; i++) {
const char* cur_ptr = data_ptr + value_size * i;
uint64_t value = *reinterpret_cast<const uint64_t*>(cur_ptr);
- vectorized::VecDateTimeValue date(value);
- this->insert_data(reinterpret_cast<char*>(&date), 0);
+ vectorized::VecDateTimeValue datetime =
+ VecDateTimeValue::create_from_olap_datetime(value);
+ this->insert_data(reinterpret_cast<char*>(&datetime), 0);
}
}
diff --git a/be/src/vec/columns/predicate_column.h b/be/src/vec/columns/predicate_column.h
index f41c239b5e..7db73b9d0a 100644
--- a/be/src/vec/columns/predicate_column.h
+++ b/be/src/vec/columns/predicate_column.h
@@ -58,8 +58,7 @@ private:
void insert_date_to_res_column(const uint16_t* sel, size_t sel_size,
vectorized::ColumnVector<Int64>* res_ptr) {
for (size_t i = 0; i < sel_size; i++) {
- VecDateTimeValue date;
- date.from_olap_date(get_date_at(sel[i]));
+ VecDateTimeValue date = VecDateTimeValue::create_from_olap_date(get_date_at(sel[i]));
res_ptr->insert_data(reinterpret_cast<char*>(&date), 0);
}
}
@@ -68,8 +67,9 @@ private:
vectorized::ColumnVector<Int64>* res_ptr) {
for (size_t i = 0; i < sel_size; i++) {
uint64_t value = data[sel[i]];
- vectorized::VecDateTimeValue date(value);
- res_ptr->insert_data(reinterpret_cast<char*>(&date), 0);
+ vectorized::VecDateTimeValue datetime =
+ VecDateTimeValue::create_from_olap_datetime(value);
+ res_ptr->insert_data(reinterpret_cast<char*>(&datetime), 0);
}
}
diff --git a/be/src/vec/exec/volap_scanner.cpp b/be/src/vec/exec/volap_scanner.cpp
index c29d1aa60e..410fa8f6b6 100644
--- a/be/src/vec/exec/volap_scanner.cpp
+++ b/be/src/vec/exec/volap_scanner.cpp
@@ -83,139 +83,4 @@ Status VOlapScanner::get_block(RuntimeState* state, vectorized::Block* block, bo
void VOlapScanner::set_tablet_reader() {
_tablet_reader = std::make_unique<BlockReader>();
}
-
-void VOlapScanner::_convert_row_to_block(std::vector<vectorized::MutableColumnPtr>* columns) {
- size_t slots_size = _query_slots.size();
- for (int i = 0; i < slots_size; ++i) {
- SlotDescriptor* slot_desc = _query_slots[i];
- auto cid = _return_columns[i];
-
- auto* column_ptr = (*columns)[i].get();
- if (slot_desc->is_nullable()) {
- auto* nullable_column = reinterpret_cast<ColumnNullable*>((*columns)[i].get());
- if (_read_row_cursor.is_null(cid)) {
- nullable_column->insert_data(nullptr, 0);
- continue;
- } else {
- nullable_column->get_null_map_data().push_back(0);
- column_ptr = &nullable_column->get_nested_column();
- }
- }
-
- char* ptr = (char*)_read_row_cursor.cell_ptr(cid);
- switch (slot_desc->type().type) {
- case TYPE_BOOLEAN: {
- assert_cast<ColumnVector<UInt8>*>(column_ptr)->insert_data(ptr, 0);
- break;
- }
- case TYPE_TINYINT: {
- assert_cast<ColumnVector<Int8>*>(column_ptr)->insert_data(ptr, 0);
- break;
- }
- case TYPE_SMALLINT: {
- assert_cast<ColumnVector<Int16>*>(column_ptr)->insert_data(ptr, 0);
- break;
- }
- case TYPE_INT: {
- assert_cast<ColumnVector<Int32>*>(column_ptr)->insert_data(ptr, 0);
- break;
- }
- case TYPE_BIGINT: {
- assert_cast<ColumnVector<Int64>*>(column_ptr)->insert_data(ptr, 0);
- break;
- }
- case TYPE_LARGEINT: {
- assert_cast<ColumnVector<Int128>*>(column_ptr)->insert_data(ptr, 0);
- break;
- }
- case TYPE_FLOAT: {
- assert_cast<ColumnVector<Float32>*>(column_ptr)->insert_data(ptr, 0);
- break;
- }
- case TYPE_DOUBLE: {
- assert_cast<ColumnVector<Float64>*>(column_ptr)->insert_data(ptr, 0);
- break;
- }
- case TYPE_CHAR: {
- Slice* slice = reinterpret_cast<Slice*>(ptr);
- assert_cast<ColumnString*>(column_ptr)
- ->insert_data(slice->data, strnlen(slice->data, slice->size));
- break;
- }
- case TYPE_VARCHAR:
- case TYPE_STRING: {
- Slice* slice = reinterpret_cast<Slice*>(ptr);
- assert_cast<ColumnString*>(column_ptr)->insert_data(slice->data, slice->size);
- break;
- }
- case TYPE_OBJECT: {
- Slice* slice = reinterpret_cast<Slice*>(ptr);
- // insert_default()
- auto* target_column = assert_cast<ColumnBitmap*>(column_ptr);
-
- target_column->insert_default();
- BitmapValue* pvalue = nullptr;
- int pos = target_column->size() - 1;
- pvalue = &target_column->get_element(pos);
-
- if (slice->size != 0) {
- BitmapValue value;
- value.deserialize(slice->data);
- *pvalue = std::move(value);
- } else {
- *pvalue = std::move(*reinterpret_cast<BitmapValue*>(slice->data));
- }
- break;
- }
- case TYPE_HLL: {
- Slice* slice = reinterpret_cast<Slice*>(ptr);
- auto* target_column = assert_cast<ColumnHLL*>(column_ptr);
-
- target_column->insert_default();
- HyperLogLog* pvalue = nullptr;
- int pos = target_column->size() - 1;
- pvalue = &target_column->get_element(pos);
- if (slice->size != 0) {
- HyperLogLog value;
- value.deserialize(*slice);
- *pvalue = std::move(value);
- } else {
- *pvalue = std::move(*reinterpret_cast<HyperLogLog*>(slice->data));
- }
- break;
- }
- case TYPE_DECIMALV2: {
- int64_t int_value = *(int64_t*)(ptr);
- int32_t frac_value = *(int32_t*)(ptr + sizeof(int64_t));
- DecimalV2Value data(int_value, frac_value);
- assert_cast<ColumnDecimal<Decimal128>*>(column_ptr)
- ->insert_data(reinterpret_cast<char*>(&data), 0);
- break;
- }
- case TYPE_DATETIME: {
- uint64_t value = *reinterpret_cast<uint64_t*>(ptr);
- VecDateTimeValue data(value);
- assert_cast<ColumnVector<Int64>*>(column_ptr)
- ->insert_data(reinterpret_cast<char*>(&data), 0);
- break;
- }
- case TYPE_DATE: {
- uint64_t value = 0;
- value = *(unsigned char*)(ptr + 2);
- value <<= 8;
- value |= *(unsigned char*)(ptr + 1);
- value <<= 8;
- value |= *(unsigned char*)(ptr);
- VecDateTimeValue date;
- date.from_olap_date(value);
- assert_cast<ColumnVector<Int64>*>(column_ptr)
- ->insert_data(reinterpret_cast<char*>(&date), 0);
- break;
- }
- default: {
- break;
- }
- }
- }
-}
} // namespace doris::vectorized
diff --git a/be/src/vec/exec/volap_scanner.h b/be/src/vec/exec/volap_scanner.h
index b6ef7e32ff..8c1ccca99e 100644
--- a/be/src/vec/exec/volap_scanner.h
+++ b/be/src/vec/exec/volap_scanner.h
@@ -52,8 +52,6 @@ protected:
virtual void set_tablet_reader() override;
private:
- // TODO: Remove this function after we finish reader vec
- void _convert_row_to_block(std::vector<vectorized::MutableColumnPtr>* columns);
VExprContext* _vconjunct_ctx = nullptr;
bool _need_to_close = false;
};
diff --git a/be/src/vec/functions/function_rpc.cpp b/be/src/vec/functions/function_rpc.cpp
index 9208ae2295..9b2e11d08a 100644
--- a/be/src/vec/functions/function_rpc.cpp
+++ b/be/src/vec/functions/function_rpc.cpp
@@ -231,13 +231,16 @@ void convert_col_to_pvalue(const ColumnPtr& column, const DataTypePtr& data_type
PDateTime* date_time = arg->add_datetime_value();
if constexpr (nullable) {
if (!column->is_null_at(row_num)) {
- VecDateTimeValue v = VecDateTimeValue(column->get_int(row_num));
+ VecDateTimeValue v =
+ binary_cast<vectorized::Int64, vectorized::VecDateTimeValue>(
+ column->get_int(row_num));
date_time->set_day(v.day());
date_time->set_month(v.month());
date_time->set_year(v.year());
}
} else {
- VecDateTimeValue v = VecDateTimeValue(column->get_int(row_num));
+ VecDateTimeValue v = binary_cast<vectorized::Int64, vectorized::VecDateTimeValue>(
+ column->get_int(row_num));
date_time->set_day(v.day());
date_time->set_month(v.month());
date_time->set_year(v.year());
@@ -252,7 +255,9 @@ void convert_col_to_pvalue(const ColumnPtr& column, const DataTypePtr& data_type
PDateTime* date_time = arg->add_datetime_value();
if constexpr (nullable) {
if (!column->is_null_at(row_num)) {
- VecDateTimeValue v = VecDateTimeValue(column->get_int(row_num));
+ VecDateTimeValue v =
+ binary_cast<vectorized::Int64, vectorized::VecDateTimeValue>(
+ column->get_int(row_num));
date_time->set_day(v.day());
date_time->set_month(v.month());
date_time->set_year(v.year());
@@ -261,7 +266,8 @@ void convert_col_to_pvalue(const ColumnPtr& column, const DataTypePtr& data_type
date_time->set_second(v.second());
}
} else {
- VecDateTimeValue v = VecDateTimeValue(column->get_int(row_num));
+ VecDateTimeValue v = binary_cast<vectorized::Int64, vectorized::VecDateTimeValue>(
+ column->get_int(row_num));
date_time->set_day(v.day());
date_time->set_month(v.month());
date_time->set_year(v.year());
diff --git a/be/src/vec/functions/function_timestamp.cpp b/be/src/vec/functions/function_timestamp.cpp
index 45dc20a43f..22f159bd8e 100644
--- a/be/src/vec/functions/function_timestamp.cpp
+++ b/be/src/vec/functions/function_timestamp.cpp
@@ -89,8 +89,9 @@ struct MakeDateImpl {
auto& res_val = *reinterpret_cast<VecDateTimeValue*>(&res[i]);
- VecDateTimeValue ts_value {l * 10000000000 + 101000000};
- ts_value.set_type(TIME_DATE);
+ VecDateTimeValue ts_value = VecDateTimeValue();
+ ts_value.set_time(l, 1, 1, 0, 0, 0);
+
DateTimeVal ts_val;
ts_value.to_datetime_val(&ts_val);
if (ts_val.is_null) {
diff --git a/be/src/vec/runtime/vdatetime_value.h b/be/src/vec/runtime/vdatetime_value.h
index dd2e189493..728556186c 100644
--- a/be/src/vec/runtime/vdatetime_value.h
+++ b/be/src/vec/runtime/vdatetime_value.h
@@ -134,10 +134,10 @@ constexpr size_t max_char_length(const char* const* name, size_t end) {
static constexpr const char* s_month_name[] = {
"", "January", "February", "March", "April", "May", "June",
- "July", "August", "September", "October", "November", "December", NULL};
+ "July", "August", "September", "October", "November", "December", nullptr};
static constexpr const char* s_day_name[] = {"Monday", "Tuesday", "Wednesday", "Thursday",
- "Friday", "Saturday", "Sunday", NULL};
+ "Friday", "Saturday", "Sunday", nullptr};
static constexpr size_t MAX_DAY_NAME_LEN = max_char_length(s_day_name, std::size(s_day_name));
static constexpr size_t MAX_MONTH_NAME_LEN = max_char_length(s_month_name, std::size(s_month_name));
@@ -157,7 +157,27 @@ public:
_month(0), // so this is a difference between Vectorization mode and Rowbatch mode with DateTimeValue;
_year(0) {} // before int128 16 bytes ---> after int64 8 bytes
- explicit VecDateTimeValue(int64_t t) { from_date_int64(t); }
+ // The data format of DATE/DATETIME is different in storage layer and execute layer.
+ // So we should use diffrent creator to get data from value.
+ // We should use create_from_olap_xxx only at binary data scaned from storage engine and convert to typed data.
+ // At other case, we just use binary_cast<vectorized::Int64, vectorized::VecDateTimeValue>.
+
+ // olap storage layer date data format:
+ // 64 bits binary data [year(remaining bits), month(4 bits), day(5 bits)]
+ // execute layer date/datetime and olap storage layer datetime data format:
+ // 8 bytes interger data [year(remaining digits), month(2 digits), day(2 digits), hour(2 digits), minute(2 digits) ,second(2 digits)]
+
+ static VecDateTimeValue create_from_olap_date(uint64_t value) {
+ VecDateTimeValue date;
+ date.from_olap_date(value);
+ return date;
+ }
+
+ static VecDateTimeValue create_from_olap_datetime(uint64_t value) {
+ VecDateTimeValue datetime;
+ datetime.from_olap_datetime(value);
+ return datetime;
+ }
void set_time(uint32_t year, uint32_t month, uint32_t day, uint32_t hour, uint32_t minute,
uint32_t second);
@@ -594,7 +614,7 @@ private:
char* to_date_buffer(char* to) const;
char* to_time_buffer(char* to) const;
- // Used to convert to uint64_t
+ // Used to convert to int64_t
int64_t to_datetime_int64() const;
int64_t to_date_int64() const;
int64_t to_time_int64() const;
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@doris.apache.org
For additional commands, e-mail: commits-help@doris.apache.org