You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@doris.apache.org by yi...@apache.org on 2023/06/26 01:24:00 UTC
[doris] branch branch-1.2-lts updated: [performace](colddata) opt cold data read performance (#21143)
This is an automated email from the ASF dual-hosted git repository.
yiguolei pushed a commit to branch branch-1.2-lts
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-1.2-lts by this push:
new b1f4d2817f [performace](colddata) opt cold data read performance (#21143)
b1f4d2817f is described below
commit b1f4d2817f572e24f0805f8e999efa4390838543
Author: yiguolei <67...@qq.com>
AuthorDate: Mon Jun 26 09:23:54 2023 +0800
[performace](colddata) opt cold data read performance (#21143)
cherry-pick #21141
---------
Co-authored-by: yiguolei <yi...@gmail.com>
---
be/src/olap/olap_common.h | 2 ++
be/src/olap/rowset/segment_v2/column_reader.cpp | 8 +++++++-
be/src/olap/rowset/segment_v2/column_reader.h | 1 +
be/src/olap/rowset/segment_v2/segment_iterator.cpp | 23 +++++++++++++++++++++-
be/src/vec/exec/scan/new_olap_scan_node.cpp | 4 ++++
be/src/vec/exec/scan/new_olap_scan_node.h | 2 ++
be/src/vec/exec/scan/new_olap_scanner.cpp | 4 ++++
7 files changed, 42 insertions(+), 2 deletions(-)
diff --git a/be/src/olap/olap_common.h b/be/src/olap/olap_common.h
index 5c6647ef60..a749715505 100644
--- a/be/src/olap/olap_common.h
+++ b/be/src/olap/olap_common.h
@@ -294,6 +294,8 @@ struct OlapReaderStatistics {
int64_t block_init_ns = 0;
int64_t block_init_seek_num = 0;
int64_t block_init_seek_ns = 0;
+ int64_t block_init_get_row_range_by_keys_ns = 0;
+ int64_t block_init_get_row_range_by_conditions_ns = 0;
int64_t first_read_ns = 0;
int64_t block_first_read_seek_num = 0;
int64_t block_first_read_seek_ns = 0;
diff --git a/be/src/olap/rowset/segment_v2/column_reader.cpp b/be/src/olap/rowset/segment_v2/column_reader.cpp
index b2c2b70974..8527a91c7d 100644
--- a/be/src/olap/rowset/segment_v2/column_reader.cpp
+++ b/be/src/olap/rowset/segment_v2/column_reader.cpp
@@ -641,7 +641,13 @@ Status FileColumnIterator::init(const ColumnIteratorOptions& opts) {
if (config::enable_low_cardinality_optimize &&
_reader->encoding_info()->encoding() == DICT_ENCODING) {
auto dict_encoding_type = _reader->get_dict_encoding_type();
- if (dict_encoding_type == ColumnReader::UNKNOWN_DICT_ENCODING) {
+ // Only if the column is a predicate column, then we need check the all dict encoding flag
+ // because we could rewrite the predciate to accelarate query speed. But if it is not a
+ // predicate column, then it is useless. And it has a bad impact on cold read(first time read)
+ // because it will load the column's ordinal index and zonemap index and maybe other indices.
+ // it has bad impact on primary key query. For example, select * from table where pk = 1, and
+ // the table has 2000 columns.
+ if (dict_encoding_type == ColumnReader::UNKNOWN_DICT_ENCODING && opts.is_predicate_column) {
seek_to_ordinal(_reader->num_rows() - 1);
_is_all_dict_encoding = _page.is_dict_encoding;
_reader->set_dict_encoding_type(_is_all_dict_encoding
diff --git a/be/src/olap/rowset/segment_v2/column_reader.h b/be/src/olap/rowset/segment_v2/column_reader.h
index f59eef221a..cdebf63b88 100644
--- a/be/src/olap/rowset/segment_v2/column_reader.h
+++ b/be/src/olap/rowset/segment_v2/column_reader.h
@@ -77,6 +77,7 @@ struct ColumnIteratorOptions {
// INDEX_PAGE including index_page, dict_page and short_key_page
PageTypePB type;
IOContext io_ctx;
+ bool is_predicate_column = false;
void sanity_check() const {
CHECK_NOTNULL(file_reader);
diff --git a/be/src/olap/rowset/segment_v2/segment_iterator.cpp b/be/src/olap/rowset/segment_v2/segment_iterator.cpp
index 9fded3c31d..94fc3dfa5d 100644
--- a/be/src/olap/rowset/segment_v2/segment_iterator.cpp
+++ b/be/src/olap/rowset/segment_v2/segment_iterator.cpp
@@ -194,9 +194,13 @@ Status SegmentIterator::_init(bool is_vec) {
RETURN_IF_ERROR(_init_bitmap_index_iterators());
// z-order can not use prefix index
if (_segment->_tablet_schema->sort_type() != SortType::ZORDER) {
+ SCOPED_RAW_TIMER(&_opts.stats->block_init_get_row_range_by_keys_ns);
RETURN_IF_ERROR(_get_row_ranges_by_keys());
}
- RETURN_IF_ERROR(_get_row_ranges_by_column_conditions());
+ {
+ SCOPED_RAW_TIMER(&_opts.stats->block_init_get_row_range_by_conditions_ns);
+ RETURN_IF_ERROR(_get_row_ranges_by_column_conditions());
+ }
if (is_vec) {
_vec_init_lazy_materialization();
_vec_init_char_column_id();
@@ -391,6 +395,22 @@ Status SegmentIterator::_init_return_column_iterators() {
if (_cur_rowid >= num_rows()) {
return Status::OK();
}
+ std::set<ColumnId> del_cond_id_set;
+ _opts.delete_condition_predicates->get_all_column_ids(del_cond_id_set);
+ std::vector<bool> tmp_is_pred_column;
+ tmp_is_pred_column.resize(_schema.columns().size(), false);
+ if (!_col_predicates.empty() || !del_cond_id_set.empty()) {
+ for (auto predicate : _col_predicates) {
+ auto cid = predicate->column_id();
+ tmp_is_pred_column[cid] = true;
+ }
+ // handle delete_condition
+ if (!del_cond_id_set.empty()) {
+ for (auto cid : del_cond_id_set) {
+ tmp_is_pred_column[cid] = true;
+ }
+ }
+ }
for (auto cid : _schema.column_ids()) {
int32_t unique_id = _opts.tablet_schema->column(cid).unique_id();
if (_column_iterators.count(unique_id) < 1) {
@@ -401,6 +421,7 @@ Status SegmentIterator::_init_return_column_iterators() {
iter_opts.use_page_cache = _opts.use_page_cache;
iter_opts.file_reader = _file_reader.get();
iter_opts.io_ctx = _opts.io_ctx;
+ iter_opts.is_predicate_column = tmp_is_pred_column[cid];
RETURN_IF_ERROR(_column_iterators[unique_id]->init(iter_opts));
}
}
diff --git a/be/src/vec/exec/scan/new_olap_scan_node.cpp b/be/src/vec/exec/scan/new_olap_scan_node.cpp
index 56a320f088..b68ffb0474 100644
--- a/be/src/vec/exec/scan/new_olap_scan_node.cpp
+++ b/be/src/vec/exec/scan/new_olap_scan_node.cpp
@@ -71,6 +71,10 @@ Status NewOlapScanNode::_init_profile() {
_raw_rows_counter = ADD_COUNTER(_segment_profile, "RawRowsRead", TUnit::UNIT);
_block_convert_timer = ADD_TIMER(_scanner_profile, "BlockConvertTime");
_block_init_timer = ADD_TIMER(_segment_profile, "BlockInitTime");
+ _block_init_get_row_range_by_keys_timer =
+ ADD_TIMER(_segment_profile, "BlockInitGetRowRangeByKeysTime");
+ _block_init_get_row_range_by_conditions_timer =
+ ADD_TIMER(_segment_profile, "BlockInitGetRowRangeByConditionsTime");
_block_init_seek_timer = ADD_TIMER(_segment_profile, "BlockInitSeekTime");
_block_init_seek_counter = ADD_COUNTER(_segment_profile, "BlockInitSeekCount", TUnit::UNIT);
_block_conditions_filtered_timer = ADD_TIMER(_segment_profile, "BlockConditionsFilteredTime");
diff --git a/be/src/vec/exec/scan/new_olap_scan_node.h b/be/src/vec/exec/scan/new_olap_scan_node.h
index 4e2869c945..d85087df80 100644
--- a/be/src/vec/exec/scan/new_olap_scan_node.h
+++ b/be/src/vec/exec/scan/new_olap_scan_node.h
@@ -98,6 +98,8 @@ private:
// Add more detail seek timer and counter profile
// Read process is split into 3 stages: init, first read, lazy read
RuntimeProfile::Counter* _block_init_timer = nullptr;
+ RuntimeProfile::Counter* _block_init_get_row_range_by_keys_timer = nullptr;
+ RuntimeProfile::Counter* _block_init_get_row_range_by_conditions_timer = nullptr;
RuntimeProfile::Counter* _block_init_seek_timer = nullptr;
RuntimeProfile::Counter* _block_init_seek_counter = nullptr;
RuntimeProfile::Counter* _block_conditions_filtered_timer = nullptr;
diff --git a/be/src/vec/exec/scan/new_olap_scanner.cpp b/be/src/vec/exec/scan/new_olap_scanner.cpp
index a1af97a062..a702b2b6e5 100644
--- a/be/src/vec/exec/scan/new_olap_scanner.cpp
+++ b/be/src/vec/exec/scan/new_olap_scanner.cpp
@@ -382,6 +382,10 @@ void NewOlapScanner::_update_counters_before_close() {
COUNTER_UPDATE(olap_parent->_vec_cond_timer, stats.vec_cond_ns);
COUNTER_UPDATE(olap_parent->_short_cond_timer, stats.short_cond_ns);
COUNTER_UPDATE(olap_parent->_block_init_timer, stats.block_init_ns);
+ COUNTER_UPDATE(olap_parent->_block_init_get_row_range_by_keys_timer,
+ stats.block_init_get_row_range_by_keys_ns);
+ COUNTER_UPDATE(olap_parent->_block_init_get_row_range_by_conditions_timer,
+ stats.block_init_get_row_range_by_conditions_ns);
COUNTER_UPDATE(olap_parent->_block_init_seek_timer, stats.block_init_seek_ns);
COUNTER_UPDATE(olap_parent->_block_init_seek_counter, stats.block_init_seek_num);
COUNTER_UPDATE(olap_parent->_block_conditions_filtered_timer,
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@doris.apache.org
For additional commands, e-mail: commits-help@doris.apache.org