You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@doris.apache.org by yi...@apache.org on 2022/06/23 07:32:49 UTC

[doris] branch master updated: [profile] add more detail profile in segment iterator (#10352)

This is an automated email from the ASF dual-hosted git repository.

yiguolei pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new 3370c10528 [profile] add more detail profile in segment iterator (#10352)
3370c10528 is described below

commit 3370c105286ac9f2d590d0bf43f811a5cb52171e
Author: yiguolei <67...@qq.com>
AuthorDate: Thu Jun 23 15:32:43 2022 +0800

    [profile] add more detail profile in segment iterator (#10352)
---
 be/src/olap/olap_common.h                          | 24 ++++++++++++++---
 be/src/olap/rowset/segment_v2/segment_iterator.cpp | 31 +++++++++++++++-------
 be/src/vec/exec/volap_scan_node.cpp                | 10 +++++++
 be/src/vec/exec/volap_scan_node.h                  | 22 +++++++++++----
 be/src/vec/exec/volap_scanner.cpp                  |  9 ++++++-
 5 files changed, 78 insertions(+), 18 deletions(-)

diff --git a/be/src/olap/olap_common.h b/be/src/olap/olap_common.h
index 304157e4e4..7396a3cbf1 100644
--- a/be/src/olap/olap_common.h
+++ b/be/src/olap/olap_common.h
@@ -257,11 +257,31 @@ struct OlapReaderStatistics {
     // total read bytes in memory
     int64_t bytes_read = 0;
 
+    int64_t block_fetch_ns = 0; // time of rowset reader's `next_batch()` call
     int64_t block_load_ns = 0;
     int64_t blocks_load = 0;
-    int64_t block_fetch_ns = 0; // time of rowset reader's `next_batch()` call
+    // Not used any more, will be removed after non-vectorized code is removed
     int64_t block_seek_num = 0;
+    // Not used any more, will be removed after non-vectorized code is removed
     int64_t block_seek_ns = 0;
+
+    // block_load_ns
+    //      block_init_ns
+    //          block_init_seek_ns
+    //      first_read_ns
+    //          block_first_read_seek_ns
+    //      lazy_read_ns
+    //          block_lazy_read_seek_ns
+    int64_t block_init_ns = 0;
+    int64_t block_init_seek_num = 0;
+    int64_t block_init_seek_ns = 0;
+    int64_t first_read_ns = 0;
+    int64_t block_first_read_seek_num = 0;
+    int64_t block_first_read_seek_ns = 0;
+    int64_t lazy_read_ns = 0;
+    int64_t block_lazy_read_seek_num = 0;
+    int64_t block_lazy_read_seek_ns = 0;
+
     int64_t block_convert_ns = 0;
 
     int64_t raw_rows_read = 0;
@@ -270,8 +290,6 @@ struct OlapReaderStatistics {
     int64_t rows_vec_del_cond_filtered = 0;
     int64_t vec_cond_ns = 0;
     int64_t short_cond_ns = 0;
-    int64_t first_read_ns = 0;
-    int64_t lazy_read_ns = 0;
     int64_t output_col_ns = 0;
 
     int64_t rows_key_range_filtered = 0;
diff --git a/be/src/olap/rowset/segment_v2/segment_iterator.cpp b/be/src/olap/rowset/segment_v2/segment_iterator.cpp
index b3945e11ec..19a04c5576 100644
--- a/be/src/olap/rowset/segment_v2/segment_iterator.cpp
+++ b/be/src/olap/rowset/segment_v2/segment_iterator.cpp
@@ -131,10 +131,13 @@ Status SegmentIterator::init(const StorageReadOptions& opts) {
     if (!opts.column_predicates.empty()) {
         _col_predicates = opts.column_predicates;
     }
+    // Read options will not change, so that just reserve here
+    _block_rowids.reserve(_opts.block_row_max);
     return Status::OK();
 }
 
 Status SegmentIterator::_init(bool is_vec) {
+    SCOPED_RAW_TIMER(&_opts.stats->block_init_ns);
     DorisMetrics::instance()->segment_read_total->increment(1);
     // get file handle from file descriptor of segment
     fs::BlockManager* block_mgr = fs::fs_util::block_manager(_segment->_path_desc);
@@ -445,7 +448,11 @@ Status SegmentIterator::_lookup_ordinal(const RowCursor& key, bool is_include, r
 
 // seek to the row and load that row to _key_cursor
 Status SegmentIterator::_seek_and_peek(rowid_t rowid) {
-    RETURN_IF_ERROR(_seek_columns(_seek_schema->column_ids(), rowid));
+    {
+        _opts.stats->block_init_seek_num += 1;
+        SCOPED_RAW_TIMER(&_opts.stats->block_init_seek_ns);
+        RETURN_IF_ERROR(_seek_columns(_seek_schema->column_ids(), rowid));
+    }
     size_t num_rows = 1;
     // please note that usually RowBlockV2.clear() is called to free MemPool memory before reading the next block,
     // but here since there won't be too many keys to seek, we don't call RowBlockV2.clear() so that we can use
@@ -484,8 +491,6 @@ void SegmentIterator::_init_lazy_materialization() {
 }
 
 Status SegmentIterator::_seek_columns(const std::vector<ColumnId>& column_ids, rowid_t pos) {
-    _opts.stats->block_seek_num += 1;
-    SCOPED_RAW_TIMER(&_opts.stats->block_seek_ns);
     for (auto cid : column_ids) {
         RETURN_IF_ERROR(_column_iterators[cid]->seek_to_ordinal(pos));
     }
@@ -508,15 +513,11 @@ Status SegmentIterator::next_batch(RowBlockV2* block) {
     SCOPED_RAW_TIMER(&_opts.stats->block_load_ns);
     if (UNLIKELY(!_inited)) {
         RETURN_IF_ERROR(_init());
-        if (_lazy_materialization_read) {
-            _block_rowids.reserve(block->capacity());
-        }
         _inited = true;
     }
 
     uint32_t nrows_read = 0;
     uint32_t nrows_read_limit = block->capacity();
-    _block_rowids.resize(nrows_read_limit);
     const auto& read_columns =
             _lazy_materialization_read ? _predicate_columns : block->schema()->column_ids();
 
@@ -534,6 +535,8 @@ Status SegmentIterator::next_batch(RowBlockV2* block) {
             }
             if (_cur_rowid == 0 || _cur_rowid != range_from) {
                 _cur_rowid = range_from;
+                _opts.stats->block_first_read_seek_num += 1;
+                SCOPED_RAW_TIMER(&_opts.stats->block_first_read_seek_ns);
                 RETURN_IF_ERROR(_seek_columns(read_columns, _cur_rowid));
             }
             size_t rows_to_read = range_to - range_from;
@@ -597,7 +600,11 @@ Status SegmentIterator::next_batch(RowBlockV2* block) {
                 ++j;
             }
             uint16_t range_size = j - i;
-            RETURN_IF_ERROR(_seek_columns(_non_predicate_columns, _block_rowids[sv[i]]));
+            {
+                _opts.stats->block_lazy_read_seek_num += 1;
+                SCOPED_RAW_TIMER(&_opts.stats->block_lazy_read_seek_ns);
+                RETURN_IF_ERROR(_seek_columns(_non_predicate_columns, _block_rowids[sv[i]]));
+            }
             RETURN_IF_ERROR(_read_columns(_non_predicate_columns, block, sv[i], range_size));
             i += range_size;
         }
@@ -839,6 +846,8 @@ Status SegmentIterator::_read_columns_by_index(uint32_t nrows_read_limit, uint32
         }
         if (_cur_rowid == 0 || _cur_rowid != range_from) {
             _cur_rowid = range_from;
+            _opts.stats->block_first_read_seek_num += 1;
+            SCOPED_RAW_TIMER(&_opts.stats->block_first_read_seek_ns);
             RETURN_IF_ERROR(_seek_columns(_first_read_column_ids, _cur_rowid));
         }
         size_t rows_to_read = range_to - range_from;
@@ -956,7 +965,11 @@ void SegmentIterator::_read_columns_by_rowids(std::vector<ColumnId>& read_column
             end_idx++;
         }
         size_t range = end_idx - start_idx;
-        _seek_columns(read_column_ids, rowid_vector[sel_rowid_idx[start_idx]]);
+        {
+            _opts.stats->block_lazy_read_seek_num += 1;
+            SCOPED_RAW_TIMER(&_opts.stats->block_lazy_read_seek_ns);
+            _seek_columns(read_column_ids, rowid_vector[sel_rowid_idx[start_idx]]);
+        }
         _read_columns(read_column_ids, *mutable_columns, range);
         start_idx += range;
     }
diff --git a/be/src/vec/exec/volap_scan_node.cpp b/be/src/vec/exec/volap_scan_node.cpp
index 1fdd398b22..aef88e4e0f 100644
--- a/be/src/vec/exec/volap_scan_node.cpp
+++ b/be/src/vec/exec/volap_scan_node.cpp
@@ -119,14 +119,24 @@ void VOlapScanNode::_init_counter(RuntimeState* state) {
     _block_fetch_timer = ADD_TIMER(_scanner_profile, "BlockFetchTime");
     _raw_rows_counter = ADD_COUNTER(_segment_profile, "RawRowsRead", TUnit::UNIT);
     _block_convert_timer = ADD_TIMER(_scanner_profile, "BlockConvertTime");
+    // Will be delete after non-vectorized code is removed
     _block_seek_timer = ADD_TIMER(_segment_profile, "BlockSeekTime");
     _block_seek_counter = ADD_COUNTER(_segment_profile, "BlockSeekCount", TUnit::UNIT);
+    _block_init_timer = ADD_TIMER(_segment_profile, "BlockInitTime");
+    _block_init_seek_timer = ADD_TIMER(_segment_profile, "BlockInitSeekTime");
+    _block_init_seek_counter = ADD_COUNTER(_segment_profile, "BlockInitSeekCount", TUnit::UNIT);
 
     _rows_vec_cond_counter = ADD_COUNTER(_segment_profile, "RowsVectorPredFiltered", TUnit::UNIT);
     _vec_cond_timer = ADD_TIMER(_segment_profile, "VectorPredEvalTime");
     _short_cond_timer = ADD_TIMER(_segment_profile, "ShortPredEvalTime");
     _first_read_timer = ADD_TIMER(_segment_profile, "FirstReadTime");
+    _first_read_seek_timer = ADD_TIMER(_segment_profile, "FirstReadSeekTime");
+    _first_read_seek_counter = ADD_COUNTER(_segment_profile, "FirstReadSeekCount", TUnit::UNIT);
+
     _lazy_read_timer = ADD_TIMER(_segment_profile, "LazyReadTime");
+    _lazy_read_seek_timer = ADD_TIMER(_segment_profile, "LazyReadSeekTime");
+    _lazy_read_seek_counter = ADD_COUNTER(_segment_profile, "LazyReadSeekCount", TUnit::UNIT);
+
     _output_col_timer = ADD_TIMER(_segment_profile, "OutputColumnTime");
 
     _stats_filtered_counter = ADD_COUNTER(_segment_profile, "RowsStatsFiltered", TUnit::UNIT);
diff --git a/be/src/vec/exec/volap_scan_node.h b/be/src/vec/exec/volap_scan_node.h
index 6e8e4a0fde..23ce669cf1 100644
--- a/be/src/vec/exec/volap_scan_node.h
+++ b/be/src/vec/exec/volap_scan_node.h
@@ -244,8 +244,6 @@ private:
     RuntimeProfile::Counter* _rows_vec_cond_counter = nullptr;
     RuntimeProfile::Counter* _vec_cond_timer = nullptr;
     RuntimeProfile::Counter* _short_cond_timer = nullptr;
-    RuntimeProfile::Counter* _first_read_timer = nullptr;
-    RuntimeProfile::Counter* _lazy_read_timer = nullptr;
     RuntimeProfile::Counter* _output_col_timer = nullptr;
 
     RuntimeProfile::Counter* _stats_filtered_counter = nullptr;
@@ -254,12 +252,26 @@ private:
     RuntimeProfile::Counter* _conditions_filtered_counter = nullptr;
     RuntimeProfile::Counter* _key_range_filtered_counter = nullptr;
 
+    RuntimeProfile::Counter* _block_fetch_timer = nullptr;
+    RuntimeProfile::Counter* _block_load_timer = nullptr;
+    RuntimeProfile::Counter* _block_load_counter = nullptr;
+    // Not used any more, will be removed after non-vectorized code is removed
     RuntimeProfile::Counter* _block_seek_timer = nullptr;
+    // Not used any more, will be removed after non-vectorized code is removed
     RuntimeProfile::Counter* _block_seek_counter = nullptr;
+    // Add more detail seek timer and counter profile
+    // Read process is split into 3 stages: init, first read, lazy read
+    RuntimeProfile::Counter* _block_init_timer = nullptr;
+    RuntimeProfile::Counter* _block_init_seek_timer = nullptr;
+    RuntimeProfile::Counter* _block_init_seek_counter = nullptr;
+    RuntimeProfile::Counter* _first_read_timer = nullptr;
+    RuntimeProfile::Counter* _first_read_seek_timer = nullptr;
+    RuntimeProfile::Counter* _first_read_seek_counter = nullptr;
+    RuntimeProfile::Counter* _lazy_read_timer = nullptr;
+    RuntimeProfile::Counter* _lazy_read_seek_timer = nullptr;
+    RuntimeProfile::Counter* _lazy_read_seek_counter = nullptr;
+
     RuntimeProfile::Counter* _block_convert_timer = nullptr;
-    RuntimeProfile::Counter* _block_load_timer = nullptr;
-    RuntimeProfile::Counter* _block_load_counter = nullptr;
-    RuntimeProfile::Counter* _block_fetch_timer = nullptr;
 
     RuntimeProfile::Counter* _index_load_timer = nullptr;
 
diff --git a/be/src/vec/exec/volap_scanner.cpp b/be/src/vec/exec/volap_scanner.cpp
index 6c942dfd0d..3ef387b75b 100644
--- a/be/src/vec/exec/volap_scanner.cpp
+++ b/be/src/vec/exec/volap_scanner.cpp
@@ -359,6 +359,7 @@ void VOlapScanner::update_counter() {
     COUNTER_UPDATE(_parent->_block_load_counter, stats.blocks_load);
     COUNTER_UPDATE(_parent->_block_fetch_timer, stats.block_fetch_ns);
     COUNTER_UPDATE(_parent->_block_seek_timer, stats.block_seek_ns);
+    COUNTER_UPDATE(_parent->_block_seek_counter, stats.block_seek_num);
     COUNTER_UPDATE(_parent->_block_convert_timer, stats.block_convert_ns);
 
     COUNTER_UPDATE(_parent->_raw_rows_counter, stats.raw_rows_read);
@@ -367,8 +368,15 @@ void VOlapScanner::update_counter() {
     // COUNTER_UPDATE(_parent->_filtered_rows_counter, stats.num_rows_filtered);
     COUNTER_UPDATE(_parent->_vec_cond_timer, stats.vec_cond_ns);
     COUNTER_UPDATE(_parent->_short_cond_timer, stats.short_cond_ns);
+    COUNTER_UPDATE(_parent->_block_init_timer, stats.block_init_ns);
+    COUNTER_UPDATE(_parent->_block_init_seek_timer, stats.block_init_seek_ns);
+    COUNTER_UPDATE(_parent->_block_init_seek_counter, stats.block_init_seek_num);
     COUNTER_UPDATE(_parent->_first_read_timer, stats.first_read_ns);
+    COUNTER_UPDATE(_parent->_first_read_seek_timer, stats.block_first_read_seek_ns);
+    COUNTER_UPDATE(_parent->_first_read_seek_counter, stats.block_first_read_seek_num);
     COUNTER_UPDATE(_parent->_lazy_read_timer, stats.lazy_read_ns);
+    COUNTER_UPDATE(_parent->_lazy_read_seek_timer, stats.block_lazy_read_seek_ns);
+    COUNTER_UPDATE(_parent->_lazy_read_seek_counter, stats.block_lazy_read_seek_num);
     COUNTER_UPDATE(_parent->_output_col_timer, stats.output_col_ns);
     COUNTER_UPDATE(_parent->_rows_vec_cond_counter, stats.rows_vec_cond_filtered);
 
@@ -392,7 +400,6 @@ void VOlapScanner::update_counter() {
 
     COUNTER_UPDATE(_parent->_bitmap_index_filter_counter, stats.rows_bitmap_index_filtered);
     COUNTER_UPDATE(_parent->_bitmap_index_filter_timer, stats.bitmap_index_filter_timer);
-    COUNTER_UPDATE(_parent->_block_seek_counter, stats.block_seek_num);
 
     COUNTER_UPDATE(_parent->_filtered_segment_counter, stats.filtered_segment_number);
     COUNTER_UPDATE(_parent->_total_segment_counter, stats.total_segment_number);


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@doris.apache.org
For additional commands, e-mail: commits-help@doris.apache.org