You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@doris.apache.org by kx...@apache.org on 2023/07/30 13:20:48 UTC
[doris] branch branch-2.0 updated: [Opt](parquet) opt the performance of date convertion (#22360)
This is an automated email from the ASF dual-hosted git repository.
kxiao pushed a commit to branch branch-2.0
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-2.0 by this push:
new 8d5d49362d [Opt](parquet) opt the performance of date convertion (#22360)
8d5d49362d is described below
commit 8d5d49362df9b8445b090d095172f49abb217177
Author: HappenLee <ha...@hotmail.com>
AuthorDate: Sun Jul 30 15:54:13 2023 +0800
[Opt](parquet) opt the performance of date convertion (#22360)
beforeļ¼
```
mysql> select count(l_commitdate) from lineitem;
+---------------------+
| count(l_commitdate) |
+---------------------+
| 600037902 |
+---------------------+
1 row in set (1.61 sec)
```
after:
```
mysql> select count(l_commitdate) from lineitem;
+---------------------+
| count(l_commitdate) |
+---------------------+
| 600037902 |
+---------------------+
1 row in set (0.86 sec)
```
---
be/src/vec/exec/format/parquet/decoder.cpp | 6 ++++++
be/src/vec/exec/format/parquet/decoder.h | 1 +
be/src/vec/exec/format/parquet/fix_length_dict_decoder.hpp | 7 ++++---
be/src/vec/exec/format/parquet/fix_length_plain_decoder.cpp | 6 ++++--
be/src/vec/runtime/vdatetime_value.h | 5 ++---
5 files changed, 17 insertions(+), 8 deletions(-)
diff --git a/be/src/vec/exec/format/parquet/decoder.cpp b/be/src/vec/exec/format/parquet/decoder.cpp
index a1fc3706fd..539fc04a10 100644
--- a/be/src/vec/exec/format/parquet/decoder.cpp
+++ b/be/src/vec/exec/format/parquet/decoder.cpp
@@ -177,5 +177,11 @@ void Decoder::init(FieldSchema* field_schema, cctz::time_zone* ctz) {
_decode_params->scale_to_nano_factor = 1000;
}
}
+
+ if (_decode_params->ctz) {
+ VecDateTimeValue t;
+ t.from_unixtime(0, *_decode_params->ctz);
+ _decode_params->offset_days = doris::calc_daynr(t.year(), t.month(), t.day());
+ }
}
} // namespace doris::vectorized
diff --git a/be/src/vec/exec/format/parquet/decoder.h b/be/src/vec/exec/format/parquet/decoder.h
index aacb3730ad..6c1030818c 100644
--- a/be/src/vec/exec/format/parquet/decoder.h
+++ b/be/src/vec/exec/format/parquet/decoder.h
@@ -71,6 +71,7 @@ struct DecodeParams {
static const cctz::time_zone utc0;
// schema.logicalType.TIMESTAMP.isAdjustedToUTC == true, we should set the time zone
cctz::time_zone* ctz = nullptr;
+ size_t offset_days = 0;
int64_t second_mask = 1;
int64_t scale_to_nano_factor = 1;
DecimalScaleParams decimal_scale;
diff --git a/be/src/vec/exec/format/parquet/fix_length_dict_decoder.hpp b/be/src/vec/exec/format/parquet/fix_length_dict_decoder.hpp
index 887797636e..817b5e7f96 100644
--- a/be/src/vec/exec/format/parquet/fix_length_dict_decoder.hpp
+++ b/be/src/vec/exec/format/parquet/fix_length_dict_decoder.hpp
@@ -216,15 +216,16 @@ protected:
size_t data_index = column_data.size();
column_data.resize(data_index + select_vector.num_values() - select_vector.num_filtered());
size_t dict_index = 0;
+
ColumnSelectVector::DataReadType read_type;
while (size_t run_length = select_vector.get_next_run<has_filter>(&read_type)) {
switch (read_type) {
case ColumnSelectVector::CONTENT: {
for (size_t i = 0; i < run_length; ++i) {
- int64_t date_value = _dict_items[_indexes[dict_index++]];
+ int64_t date_value =
+ _dict_items[_indexes[dict_index++]] + _decode_params->offset_days;
auto& v = reinterpret_cast<CppType&>(column_data[data_index++]);
- v.from_unixtime(date_value * 24 * 60 * 60,
- *_decode_params->ctz); // day to seconds
+ v.get_date_from_daynr(date_value);
if constexpr (std::is_same_v<CppType, VecDateTimeValue>) {
// we should cast to date if using date v1.
v.cast_to_date();
diff --git a/be/src/vec/exec/format/parquet/fix_length_plain_decoder.cpp b/be/src/vec/exec/format/parquet/fix_length_plain_decoder.cpp
index 72d362fe61..940e70db79 100644
--- a/be/src/vec/exec/format/parquet/fix_length_plain_decoder.cpp
+++ b/be/src/vec/exec/format/parquet/fix_length_plain_decoder.cpp
@@ -248,14 +248,16 @@ Status FixLengthPlainDecoder::_decode_date(MutableColumnPtr& doris_column,
size_t data_index = column_data.size();
column_data.resize(data_index + select_vector.num_values() - select_vector.num_filtered());
ColumnSelectVector::DataReadType read_type;
+
while (size_t run_length = select_vector.get_next_run<has_filter>(&read_type)) {
switch (read_type) {
case ColumnSelectVector::CONTENT: {
for (size_t i = 0; i < run_length; ++i) {
char* buf_start = _data->data + _offset;
- int64_t date_value = static_cast<int64_t>(*reinterpret_cast<int32_t*>(buf_start));
+ int64_t date_value = static_cast<int64_t>(*reinterpret_cast<int32_t*>(buf_start)) +
+ _decode_params->offset_days;
auto& v = reinterpret_cast<CppType&>(column_data[data_index++]);
- v.from_unixtime(date_value * 24 * 60 * 60, *_decode_params->ctz); // day to seconds
+ v.get_date_from_daynr(date_value);
if constexpr (std::is_same_v<CppType, VecDateTimeValue>) {
// we should cast to date if using date v1.
v.cast_to_date();
diff --git a/be/src/vec/runtime/vdatetime_value.h b/be/src/vec/runtime/vdatetime_value.h
index 891d6fb8b1..581e0a45ce 100644
--- a/be/src/vec/runtime/vdatetime_value.h
+++ b/be/src/vec/runtime/vdatetime_value.h
@@ -655,6 +655,8 @@ public:
_type = TIME_DATETIME;
}
+ bool get_date_from_daynr(uint64_t);
+
private:
// Used to make sure sizeof VecDateTimeValue
friend class UnusedClass;
@@ -685,9 +687,6 @@ private:
static uint8_t calc_week(const VecDateTimeValue& value, uint8_t mode, uint32_t* year,
bool disable_lut = false);
- // This is private function which modify date but modify `_type`
- bool get_date_from_daynr(uint64_t);
-
// Helper to set max, min, zero
void set_zero(int type);
void set_max_time(bool neg);
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@doris.apache.org
For additional commands, e-mail: commits-help@doris.apache.org