You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@doris.apache.org by kx...@apache.org on 2023/07/30 13:20:48 UTC

[doris] branch branch-2.0 updated: [Opt](parquet) opt the performance of date convertion (#22360)

This is an automated email from the ASF dual-hosted git repository.

kxiao pushed a commit to branch branch-2.0
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/branch-2.0 by this push:
     new 8d5d49362d [Opt](parquet) opt the performance of date convertion (#22360)
8d5d49362d is described below

commit 8d5d49362df9b8445b090d095172f49abb217177
Author: HappenLee <ha...@hotmail.com>
AuthorDate: Sun Jul 30 15:54:13 2023 +0800

    [Opt](parquet) opt the performance of date convertion (#22360)
    
    beforeļ¼š
    ```
    mysql>  select count(l_commitdate) from lineitem;
    +---------------------+
    | count(l_commitdate) |
    +---------------------+
    |           600037902 |
    +---------------------+
    1 row in set (1.61 sec)
    ```
    
    after:
    ```
    mysql>  select count(l_commitdate) from lineitem;
    +---------------------+
    | count(l_commitdate) |
    +---------------------+
    |           600037902 |
    +---------------------+
    1 row in set (0.86 sec)
    ```
---
 be/src/vec/exec/format/parquet/decoder.cpp                  | 6 ++++++
 be/src/vec/exec/format/parquet/decoder.h                    | 1 +
 be/src/vec/exec/format/parquet/fix_length_dict_decoder.hpp  | 7 ++++---
 be/src/vec/exec/format/parquet/fix_length_plain_decoder.cpp | 6 ++++--
 be/src/vec/runtime/vdatetime_value.h                        | 5 ++---
 5 files changed, 17 insertions(+), 8 deletions(-)

diff --git a/be/src/vec/exec/format/parquet/decoder.cpp b/be/src/vec/exec/format/parquet/decoder.cpp
index a1fc3706fd..539fc04a10 100644
--- a/be/src/vec/exec/format/parquet/decoder.cpp
+++ b/be/src/vec/exec/format/parquet/decoder.cpp
@@ -177,5 +177,11 @@ void Decoder::init(FieldSchema* field_schema, cctz::time_zone* ctz) {
             _decode_params->scale_to_nano_factor = 1000;
         }
     }
+
+    if (_decode_params->ctz) {
+        VecDateTimeValue t;
+        t.from_unixtime(0, *_decode_params->ctz);
+        _decode_params->offset_days = doris::calc_daynr(t.year(), t.month(), t.day());
+    }
 }
 } // namespace doris::vectorized
diff --git a/be/src/vec/exec/format/parquet/decoder.h b/be/src/vec/exec/format/parquet/decoder.h
index aacb3730ad..6c1030818c 100644
--- a/be/src/vec/exec/format/parquet/decoder.h
+++ b/be/src/vec/exec/format/parquet/decoder.h
@@ -71,6 +71,7 @@ struct DecodeParams {
     static const cctz::time_zone utc0;
     // schema.logicalType.TIMESTAMP.isAdjustedToUTC == true, we should set the time zone
     cctz::time_zone* ctz = nullptr;
+    size_t offset_days = 0;
     int64_t second_mask = 1;
     int64_t scale_to_nano_factor = 1;
     DecimalScaleParams decimal_scale;
diff --git a/be/src/vec/exec/format/parquet/fix_length_dict_decoder.hpp b/be/src/vec/exec/format/parquet/fix_length_dict_decoder.hpp
index 887797636e..817b5e7f96 100644
--- a/be/src/vec/exec/format/parquet/fix_length_dict_decoder.hpp
+++ b/be/src/vec/exec/format/parquet/fix_length_dict_decoder.hpp
@@ -216,15 +216,16 @@ protected:
         size_t data_index = column_data.size();
         column_data.resize(data_index + select_vector.num_values() - select_vector.num_filtered());
         size_t dict_index = 0;
+
         ColumnSelectVector::DataReadType read_type;
         while (size_t run_length = select_vector.get_next_run<has_filter>(&read_type)) {
             switch (read_type) {
             case ColumnSelectVector::CONTENT: {
                 for (size_t i = 0; i < run_length; ++i) {
-                    int64_t date_value = _dict_items[_indexes[dict_index++]];
+                    int64_t date_value =
+                            _dict_items[_indexes[dict_index++]] + _decode_params->offset_days;
                     auto& v = reinterpret_cast<CppType&>(column_data[data_index++]);
-                    v.from_unixtime(date_value * 24 * 60 * 60,
-                                    *_decode_params->ctz); // day to seconds
+                    v.get_date_from_daynr(date_value);
                     if constexpr (std::is_same_v<CppType, VecDateTimeValue>) {
                         // we should cast to date if using date v1.
                         v.cast_to_date();
diff --git a/be/src/vec/exec/format/parquet/fix_length_plain_decoder.cpp b/be/src/vec/exec/format/parquet/fix_length_plain_decoder.cpp
index 72d362fe61..940e70db79 100644
--- a/be/src/vec/exec/format/parquet/fix_length_plain_decoder.cpp
+++ b/be/src/vec/exec/format/parquet/fix_length_plain_decoder.cpp
@@ -248,14 +248,16 @@ Status FixLengthPlainDecoder::_decode_date(MutableColumnPtr& doris_column,
     size_t data_index = column_data.size();
     column_data.resize(data_index + select_vector.num_values() - select_vector.num_filtered());
     ColumnSelectVector::DataReadType read_type;
+
     while (size_t run_length = select_vector.get_next_run<has_filter>(&read_type)) {
         switch (read_type) {
         case ColumnSelectVector::CONTENT: {
             for (size_t i = 0; i < run_length; ++i) {
                 char* buf_start = _data->data + _offset;
-                int64_t date_value = static_cast<int64_t>(*reinterpret_cast<int32_t*>(buf_start));
+                int64_t date_value = static_cast<int64_t>(*reinterpret_cast<int32_t*>(buf_start)) +
+                                     _decode_params->offset_days;
                 auto& v = reinterpret_cast<CppType&>(column_data[data_index++]);
-                v.from_unixtime(date_value * 24 * 60 * 60, *_decode_params->ctz); // day to seconds
+                v.get_date_from_daynr(date_value);
                 if constexpr (std::is_same_v<CppType, VecDateTimeValue>) {
                     // we should cast to date if using date v1.
                     v.cast_to_date();
diff --git a/be/src/vec/runtime/vdatetime_value.h b/be/src/vec/runtime/vdatetime_value.h
index 891d6fb8b1..581e0a45ce 100644
--- a/be/src/vec/runtime/vdatetime_value.h
+++ b/be/src/vec/runtime/vdatetime_value.h
@@ -655,6 +655,8 @@ public:
         _type = TIME_DATETIME;
     }
 
+    bool get_date_from_daynr(uint64_t);
+
 private:
     // Used to make sure sizeof VecDateTimeValue
     friend class UnusedClass;
@@ -685,9 +687,6 @@ private:
     static uint8_t calc_week(const VecDateTimeValue& value, uint8_t mode, uint32_t* year,
                              bool disable_lut = false);
 
-    // This is private function which modify date but modify `_type`
-    bool get_date_from_daynr(uint64_t);
-
     // Helper to set max, min, zero
     void set_zero(int type);
     void set_max_time(bool neg);


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@doris.apache.org
For additional commands, e-mail: commits-help@doris.apache.org