You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@doris.apache.org by mo...@apache.org on 2022/04/18 11:34:11 UTC

[incubator-doris] branch master updated: [fix](storage)(vectorized) query get wrong result when read datetime type column (#8872)

This is an automated email from the ASF dual-hosted git repository.

morningman pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-doris.git


The following commit(s) were added to refs/heads/master by this push:
     new 681f960257 [fix](storage)(vectorized) query get wrong result when read datetime type column (#8872)
681f960257 is described below

commit 681f9602575492fd2a2c81bff5a13ddf61401ab0
Author: Pxl <95...@qq.com>
AuthorDate: Mon Apr 18 19:34:06 2022 +0800

    [fix](storage)(vectorized) query get wrong result when read datetime type column (#8872)
---
 be/src/olap/row_block2.cpp                         |  25 ++--
 .../aggregate_function_window_funnel.h             |   6 +-
 be/src/vec/columns/column_vector.h                 |   9 +-
 be/src/vec/columns/predicate_column.h              |   8 +-
 be/src/vec/exec/volap_scanner.cpp                  | 135 ---------------------
 be/src/vec/exec/volap_scanner.h                    |   2 -
 be/src/vec/functions/function_rpc.cpp              |  14 ++-
 be/src/vec/functions/function_timestamp.cpp        |   5 +-
 be/src/vec/runtime/vdatetime_value.h               |  28 ++++-
 9 files changed, 64 insertions(+), 168 deletions(-)

diff --git a/be/src/olap/row_block2.cpp b/be/src/olap/row_block2.cpp
index 4f2feec37b..8beca02192 100644
--- a/be/src/olap/row_block2.cpp
+++ b/be/src/olap/row_block2.cpp
@@ -236,11 +236,12 @@ Status RowBlockV2::_copy_data_to_column(int cid,
                 value |= *(unsigned char*)(ptr + 1);
                 value <<= 8;
                 value |= *(unsigned char*)(ptr);
-                vectorized::VecDateTimeValue date;
-                date.from_olap_date(value);
+                vectorized::VecDateTimeValue date =
+                        vectorized::VecDateTimeValue::create_from_olap_date(value);
                 (column_int)->insert_data(reinterpret_cast<char*>(&date), 0);
-            } else
+            } else {
                 column_int->insert_default();
+            }
         }
         break;
     }
@@ -253,9 +254,9 @@ Status RowBlockV2::_copy_data_to_column(int cid,
                 auto ptr = reinterpret_cast<const char*>(column_block(cid).cell_ptr(row_idx));
 
                 uint64_t value = *reinterpret_cast<const uint64_t*>(ptr);
-                vectorized::VecDateTimeValue data;
-                data.from_olap_datetime(value);
-                (column_int)->insert_data(reinterpret_cast<char*>(&data), 0);
+                vectorized::VecDateTimeValue datetime =
+                        vectorized::VecDateTimeValue::create_from_olap_datetime(value);
+                (column_int)->insert_data(reinterpret_cast<char*>(&datetime), 0);
             } else {
                 column_int->insert_default();
             }
@@ -498,11 +499,12 @@ Status RowBlockV2::_append_data_to_column(const ColumnVectorBatch* batch, size_t
                 value |= *(unsigned char*)(ptr + 1);
                 value <<= 8;
                 value |= *(unsigned char*)(ptr);
-                vectorized::VecDateTimeValue date;
-                date.from_olap_date(value);
+                vectorized::VecDateTimeValue date =
+                        vectorized::VecDateTimeValue::create_from_olap_date(value);
                 (column_int)->insert_data(reinterpret_cast<char*>(&date), 0);
-            } else
+            } else {
                 column_int->insert_default();
+            }
         }
         break;
     }
@@ -515,8 +517,9 @@ Status RowBlockV2::_append_data_to_column(const ColumnVectorBatch* batch, size_t
                 auto ptr = reinterpret_cast<const char*>(batch->cell_ptr(row_idx));
 
                 uint64_t value = *reinterpret_cast<const uint64_t*>(ptr);
-                vectorized::VecDateTimeValue data(value);
-                (column_int)->insert_data(reinterpret_cast<char*>(&data), 0);
+                vectorized::VecDateTimeValue datetime =
+                        vectorized::VecDateTimeValue::create_from_olap_datetime(value);
+                (column_int)->insert_data(reinterpret_cast<char*>(&datetime), 0);
             } else {
                 column_int->insert_default();
             }
diff --git a/be/src/vec/aggregate_functions/aggregate_function_window_funnel.h b/be/src/vec/aggregate_functions/aggregate_function_window_funnel.h
index f4364eebb4..8f42a3398c 100644
--- a/be/src/vec/aggregate_functions/aggregate_function_window_funnel.h
+++ b/be/src/vec/aggregate_functions/aggregate_function_window_funnel.h
@@ -132,7 +132,8 @@ struct WindowFunnelState {
         write_var_int(events.size(), out);
 
         for (int64_t i = 0; i < events.size(); i++) {
-            int64_t timestamp = events[i].first;
+            int64_t timestamp =
+                    binary_cast<vectorized::VecDateTimeValue, vectorized::Int64>(events[i].first);
             int event_idx = events[i].second;
             write_var_int(timestamp, out);
             write_var_int(event_idx, out);
@@ -152,7 +153,8 @@ struct WindowFunnelState {
 
             read_var_int(timestamp, in);
             read_var_int(event_idx, in);
-            VecDateTimeValue time_value(timestamp);
+            VecDateTimeValue time_value =
+                    binary_cast<vectorized::Int64, vectorized::VecDateTimeValue>(timestamp);
             add(time_value, (int)event_idx, max_event_level, window);
         }
     }
diff --git a/be/src/vec/columns/column_vector.h b/be/src/vec/columns/column_vector.h
index 216e8fd0ed..e57ffe4a9c 100644
--- a/be/src/vec/columns/column_vector.h
+++ b/be/src/vec/columns/column_vector.h
@@ -21,6 +21,7 @@
 #pragma once
 
 #include <cmath>
+#include <type_traits>
 
 #include "olap/uint24.h"
 #include "vec/columns/column.h"
@@ -177,8 +178,7 @@ public:
             value |= *(unsigned char*)(cur_ptr + 1);
             value <<= 8;
             value |= *(unsigned char*)(cur_ptr);
-            vectorized::VecDateTimeValue date;
-            date.from_olap_date(value);
+            vectorized::VecDateTimeValue date = VecDateTimeValue::create_from_olap_date(value);
             this->insert_data(reinterpret_cast<char*>(&date), 0);
         }
     }
@@ -188,8 +188,9 @@ public:
         for (int i = 0; i < num; i++) {
             const char* cur_ptr = data_ptr + value_size * i;
             uint64_t value = *reinterpret_cast<const uint64_t*>(cur_ptr);
-            vectorized::VecDateTimeValue date(value);
-            this->insert_data(reinterpret_cast<char*>(&date), 0);
+            vectorized::VecDateTimeValue datetime =
+                    VecDateTimeValue::create_from_olap_datetime(value);
+            this->insert_data(reinterpret_cast<char*>(&datetime), 0);
         }
     }
 
diff --git a/be/src/vec/columns/predicate_column.h b/be/src/vec/columns/predicate_column.h
index f41c239b5e..7db73b9d0a 100644
--- a/be/src/vec/columns/predicate_column.h
+++ b/be/src/vec/columns/predicate_column.h
@@ -58,8 +58,7 @@ private:
     void insert_date_to_res_column(const uint16_t* sel, size_t sel_size,
                                    vectorized::ColumnVector<Int64>* res_ptr) {
         for (size_t i = 0; i < sel_size; i++) {
-            VecDateTimeValue date;
-            date.from_olap_date(get_date_at(sel[i]));
+            VecDateTimeValue date = VecDateTimeValue::create_from_olap_date(get_date_at(sel[i]));
             res_ptr->insert_data(reinterpret_cast<char*>(&date), 0);
         }
     }
@@ -68,8 +67,9 @@ private:
                                        vectorized::ColumnVector<Int64>* res_ptr) {
         for (size_t i = 0; i < sel_size; i++) {
             uint64_t value = data[sel[i]];
-            vectorized::VecDateTimeValue date(value);
-            res_ptr->insert_data(reinterpret_cast<char*>(&date), 0);
+            vectorized::VecDateTimeValue datetime =
+                    VecDateTimeValue::create_from_olap_datetime(value);
+            res_ptr->insert_data(reinterpret_cast<char*>(&datetime), 0);
         }
     }
 
diff --git a/be/src/vec/exec/volap_scanner.cpp b/be/src/vec/exec/volap_scanner.cpp
index c29d1aa60e..410fa8f6b6 100644
--- a/be/src/vec/exec/volap_scanner.cpp
+++ b/be/src/vec/exec/volap_scanner.cpp
@@ -83,139 +83,4 @@ Status VOlapScanner::get_block(RuntimeState* state, vectorized::Block* block, bo
 void VOlapScanner::set_tablet_reader() {
     _tablet_reader = std::make_unique<BlockReader>();
 }
-
-void VOlapScanner::_convert_row_to_block(std::vector<vectorized::MutableColumnPtr>* columns) {
-    size_t slots_size = _query_slots.size();
-    for (int i = 0; i < slots_size; ++i) {
-        SlotDescriptor* slot_desc = _query_slots[i];
-        auto cid = _return_columns[i];
-
-        auto* column_ptr = (*columns)[i].get();
-        if (slot_desc->is_nullable()) {
-            auto* nullable_column = reinterpret_cast<ColumnNullable*>((*columns)[i].get());
-            if (_read_row_cursor.is_null(cid)) {
-                nullable_column->insert_data(nullptr, 0);
-                continue;
-            } else {
-                nullable_column->get_null_map_data().push_back(0);
-                column_ptr = &nullable_column->get_nested_column();
-            }
-        }
-
-        char* ptr = (char*)_read_row_cursor.cell_ptr(cid);
-        switch (slot_desc->type().type) {
-        case TYPE_BOOLEAN: {
-            assert_cast<ColumnVector<UInt8>*>(column_ptr)->insert_data(ptr, 0);
-            break;
-        }
-        case TYPE_TINYINT: {
-            assert_cast<ColumnVector<Int8>*>(column_ptr)->insert_data(ptr, 0);
-            break;
-        }
-        case TYPE_SMALLINT: {
-            assert_cast<ColumnVector<Int16>*>(column_ptr)->insert_data(ptr, 0);
-            break;
-        }
-        case TYPE_INT: {
-            assert_cast<ColumnVector<Int32>*>(column_ptr)->insert_data(ptr, 0);
-            break;
-        }
-        case TYPE_BIGINT: {
-            assert_cast<ColumnVector<Int64>*>(column_ptr)->insert_data(ptr, 0);
-            break;
-        }
-        case TYPE_LARGEINT: {
-            assert_cast<ColumnVector<Int128>*>(column_ptr)->insert_data(ptr, 0);
-            break;
-        }
-        case TYPE_FLOAT: {
-            assert_cast<ColumnVector<Float32>*>(column_ptr)->insert_data(ptr, 0);
-            break;
-        }
-        case TYPE_DOUBLE: {
-            assert_cast<ColumnVector<Float64>*>(column_ptr)->insert_data(ptr, 0);
-            break;
-        }
-        case TYPE_CHAR: {
-            Slice* slice = reinterpret_cast<Slice*>(ptr);
-            assert_cast<ColumnString*>(column_ptr)
-                    ->insert_data(slice->data, strnlen(slice->data, slice->size));
-            break;
-        }
-        case TYPE_VARCHAR:
-        case TYPE_STRING: {
-            Slice* slice = reinterpret_cast<Slice*>(ptr);
-            assert_cast<ColumnString*>(column_ptr)->insert_data(slice->data, slice->size);
-            break;
-        }
-        case TYPE_OBJECT: {
-            Slice* slice = reinterpret_cast<Slice*>(ptr);
-            // insert_default()
-            auto* target_column = assert_cast<ColumnBitmap*>(column_ptr);
-
-            target_column->insert_default();
-            BitmapValue* pvalue = nullptr;
-            int pos = target_column->size() - 1;
-            pvalue = &target_column->get_element(pos);
-
-            if (slice->size != 0) {
-                BitmapValue value;
-                value.deserialize(slice->data);
-                *pvalue = std::move(value);
-            } else {
-                *pvalue = std::move(*reinterpret_cast<BitmapValue*>(slice->data));
-            }
-            break;
-        }
-        case TYPE_HLL: {
-            Slice* slice = reinterpret_cast<Slice*>(ptr);
-            auto* target_column = assert_cast<ColumnHLL*>(column_ptr);
-
-            target_column->insert_default();
-            HyperLogLog* pvalue = nullptr;
-            int pos = target_column->size() - 1;
-            pvalue = &target_column->get_element(pos);
-            if (slice->size != 0) {
-                HyperLogLog value;
-                value.deserialize(*slice);
-                *pvalue = std::move(value);
-            } else {
-                *pvalue = std::move(*reinterpret_cast<HyperLogLog*>(slice->data));
-            }
-            break;
-        }
-        case TYPE_DECIMALV2: {
-            int64_t int_value = *(int64_t*)(ptr);
-            int32_t frac_value = *(int32_t*)(ptr + sizeof(int64_t));
-            DecimalV2Value data(int_value, frac_value);
-            assert_cast<ColumnDecimal<Decimal128>*>(column_ptr)
-                    ->insert_data(reinterpret_cast<char*>(&data), 0);
-            break;
-        }
-        case TYPE_DATETIME: {
-            uint64_t value = *reinterpret_cast<uint64_t*>(ptr);
-            VecDateTimeValue data(value);
-            assert_cast<ColumnVector<Int64>*>(column_ptr)
-                    ->insert_data(reinterpret_cast<char*>(&data), 0);
-            break;
-        }
-        case TYPE_DATE: {
-            uint64_t value = 0;
-            value = *(unsigned char*)(ptr + 2);
-            value <<= 8;
-            value |= *(unsigned char*)(ptr + 1);
-            value <<= 8;
-            value |= *(unsigned char*)(ptr);
-            VecDateTimeValue date;
-            date.from_olap_date(value);
-            assert_cast<ColumnVector<Int64>*>(column_ptr)
-                    ->insert_data(reinterpret_cast<char*>(&date), 0);
-            break;
-        }
-        default: {
-            break;
-        }
-        }
-    }
-}
 } // namespace doris::vectorized
diff --git a/be/src/vec/exec/volap_scanner.h b/be/src/vec/exec/volap_scanner.h
index b6ef7e32ff..8c1ccca99e 100644
--- a/be/src/vec/exec/volap_scanner.h
+++ b/be/src/vec/exec/volap_scanner.h
@@ -52,8 +52,6 @@ protected:
     virtual void set_tablet_reader() override;
 
 private:
-    // TODO: Remove this function after we finish reader vec
-    void _convert_row_to_block(std::vector<vectorized::MutableColumnPtr>* columns);
     VExprContext* _vconjunct_ctx = nullptr;
     bool _need_to_close = false;
 };
diff --git a/be/src/vec/functions/function_rpc.cpp b/be/src/vec/functions/function_rpc.cpp
index 9208ae2295..9b2e11d08a 100644
--- a/be/src/vec/functions/function_rpc.cpp
+++ b/be/src/vec/functions/function_rpc.cpp
@@ -231,13 +231,16 @@ void convert_col_to_pvalue(const ColumnPtr& column, const DataTypePtr& data_type
             PDateTime* date_time = arg->add_datetime_value();
             if constexpr (nullable) {
                 if (!column->is_null_at(row_num)) {
-                    VecDateTimeValue v = VecDateTimeValue(column->get_int(row_num));
+                    VecDateTimeValue v =
+                            binary_cast<vectorized::Int64, vectorized::VecDateTimeValue>(
+                                    column->get_int(row_num));
                     date_time->set_day(v.day());
                     date_time->set_month(v.month());
                     date_time->set_year(v.year());
                 }
             } else {
-                VecDateTimeValue v = VecDateTimeValue(column->get_int(row_num));
+                VecDateTimeValue v = binary_cast<vectorized::Int64, vectorized::VecDateTimeValue>(
+                        column->get_int(row_num));
                 date_time->set_day(v.day());
                 date_time->set_month(v.month());
                 date_time->set_year(v.year());
@@ -252,7 +255,9 @@ void convert_col_to_pvalue(const ColumnPtr& column, const DataTypePtr& data_type
             PDateTime* date_time = arg->add_datetime_value();
             if constexpr (nullable) {
                 if (!column->is_null_at(row_num)) {
-                    VecDateTimeValue v = VecDateTimeValue(column->get_int(row_num));
+                    VecDateTimeValue v =
+                            binary_cast<vectorized::Int64, vectorized::VecDateTimeValue>(
+                                    column->get_int(row_num));
                     date_time->set_day(v.day());
                     date_time->set_month(v.month());
                     date_time->set_year(v.year());
@@ -261,7 +266,8 @@ void convert_col_to_pvalue(const ColumnPtr& column, const DataTypePtr& data_type
                     date_time->set_second(v.second());
                 }
             } else {
-                VecDateTimeValue v = VecDateTimeValue(column->get_int(row_num));
+                VecDateTimeValue v = binary_cast<vectorized::Int64, vectorized::VecDateTimeValue>(
+                        column->get_int(row_num));
                 date_time->set_day(v.day());
                 date_time->set_month(v.month());
                 date_time->set_year(v.year());
diff --git a/be/src/vec/functions/function_timestamp.cpp b/be/src/vec/functions/function_timestamp.cpp
index 45dc20a43f..22f159bd8e 100644
--- a/be/src/vec/functions/function_timestamp.cpp
+++ b/be/src/vec/functions/function_timestamp.cpp
@@ -89,8 +89,9 @@ struct MakeDateImpl {
 
             auto& res_val = *reinterpret_cast<VecDateTimeValue*>(&res[i]);
 
-            VecDateTimeValue ts_value {l * 10000000000 + 101000000};
-            ts_value.set_type(TIME_DATE);
+            VecDateTimeValue ts_value = VecDateTimeValue();
+            ts_value.set_time(l, 1, 1, 0, 0, 0);
+
             DateTimeVal ts_val;
             ts_value.to_datetime_val(&ts_val);
             if (ts_val.is_null) {
diff --git a/be/src/vec/runtime/vdatetime_value.h b/be/src/vec/runtime/vdatetime_value.h
index dd2e189493..728556186c 100644
--- a/be/src/vec/runtime/vdatetime_value.h
+++ b/be/src/vec/runtime/vdatetime_value.h
@@ -134,10 +134,10 @@ constexpr size_t max_char_length(const char* const* name, size_t end) {
 
 static constexpr const char* s_month_name[] = {
         "",     "January", "February",  "March",   "April",    "May",      "June",
-        "July", "August",  "September", "October", "November", "December", NULL};
+        "July", "August",  "September", "October", "November", "December", nullptr};
 
 static constexpr const char* s_day_name[] = {"Monday", "Tuesday",  "Wednesday", "Thursday",
-                                             "Friday", "Saturday", "Sunday",    NULL};
+                                             "Friday", "Saturday", "Sunday",    nullptr};
 
 static constexpr size_t MAX_DAY_NAME_LEN = max_char_length(s_day_name, std::size(s_day_name));
 static constexpr size_t MAX_MONTH_NAME_LEN = max_char_length(s_month_name, std::size(s_month_name));
@@ -157,7 +157,27 @@ public:
               _month(0), // so this is a difference between Vectorization mode and Rowbatch mode with DateTimeValue;
               _year(0) {} // before int128  16 bytes  --->  after int64 8 bytes
 
-    explicit VecDateTimeValue(int64_t t) { from_date_int64(t); }
+    // The data format of DATE/DATETIME is different in storage layer and execute layer.
+    // So we should use diffrent creator to get data from value.
+    // We should use create_from_olap_xxx only at binary data scaned from storage engine and convert to typed data.
+    // At other case, we just use binary_cast<vectorized::Int64, vectorized::VecDateTimeValue>.
+
+    // olap storage layer date data format:
+    // 64 bits binary data [year(remaining bits), month(4 bits), day(5 bits)]
+    // execute layer date/datetime and olap storage layer datetime data format:
+    // 8 bytes interger data [year(remaining digits), month(2 digits), day(2 digits), hour(2 digits), minute(2 digits) ,second(2 digits)]
+
+    static VecDateTimeValue create_from_olap_date(uint64_t value) {
+        VecDateTimeValue date;
+        date.from_olap_date(value);
+        return date;
+    }
+
+    static VecDateTimeValue create_from_olap_datetime(uint64_t value) {
+        VecDateTimeValue datetime;
+        datetime.from_olap_datetime(value);
+        return datetime;
+    }
 
     void set_time(uint32_t year, uint32_t month, uint32_t day, uint32_t hour, uint32_t minute,
                   uint32_t second);
@@ -594,7 +614,7 @@ private:
     char* to_date_buffer(char* to) const;
     char* to_time_buffer(char* to) const;
 
-    // Used to convert to uint64_t
+    // Used to convert to int64_t
     int64_t to_datetime_int64() const;
     int64_t to_date_int64() const;
     int64_t to_time_int64() const;


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@doris.apache.org
For additional commands, e-mail: commits-help@doris.apache.org