You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@doris.apache.org by mo...@apache.org on 2019/10/22 01:43:28 UTC

[incubator-doris] branch master updated: add profile for segment v2 (#2015)

This is an automated email from the ASF dual-hosted git repository.

morningman pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-doris.git


The following commit(s) were added to refs/heads/master by this push:
     new 9c2d149  add profile for segment v2 (#2015)
9c2d149 is described below

commit 9c2d149c3654686ab45abad1c72ffcc8d3ba5d1b
Author: kangpinghuang <ka...@126.com>
AuthorDate: Tue Oct 22 09:43:16 2019 +0800

    add profile for segment v2 (#2015)
---
 be/src/exec/olap_scan_node.cpp                     |  3 ++
 be/src/exec/olap_scan_node.h                       |  7 +++
 be/src/exec/olap_scanner.cpp                       |  3 ++
 be/src/olap/olap_common.h                          |  4 ++
 be/src/olap/rowset/beta_rowset_reader.cpp          | 45 ++++++++++-------
 be/src/olap/rowset/segment_v2/column_reader.cpp    | 58 +++++++++++++++-------
 be/src/olap/rowset/segment_v2/column_reader.h      | 27 ++++++----
 be/src/olap/rowset/segment_v2/segment.cpp          |  3 +-
 be/src/olap/rowset/segment_v2/segment_iterator.cpp | 36 +++++++++-----
 be/src/olap/rowset/segment_v2/segment_iterator.h   |  2 +
 .../segment_v2/column_reader_writer_test.cpp       |  6 ++-
 be/test/olap/rowset/segment_v2/segment_test.cpp    | 20 +++++++-
 12 files changed, 151 insertions(+), 63 deletions(-)

diff --git a/be/src/exec/olap_scan_node.cpp b/be/src/exec/olap_scan_node.cpp
index 1310224..c039c3e 100644
--- a/be/src/exec/olap_scan_node.cpp
+++ b/be/src/exec/olap_scan_node.cpp
@@ -139,6 +139,9 @@ void OlapScanNode::_init_counter(RuntimeState* state) {
     _index_load_timer = ADD_TIMER(_runtime_profile, "IndexLoadTime");
 
     _scan_timer = ADD_TIMER(_runtime_profile, "ScanTime");
+
+    _total_pages_num_counter = ADD_COUNTER(_runtime_profile, "TotalPagesNum", TUnit::UNIT);
+    _cached_pages_num_counter = ADD_COUNTER(_runtime_profile, "CachedPagesNum", TUnit::UNIT);
 }
 
 Status OlapScanNode::prepare(RuntimeState* state) {
diff --git a/be/src/exec/olap_scan_node.h b/be/src/exec/olap_scan_node.h
index fa280b4..d8c3716 100644
--- a/be/src/exec/olap_scan_node.h
+++ b/be/src/exec/olap_scan_node.h
@@ -266,6 +266,13 @@ private:
     RuntimeProfile::Counter* _block_fetch_timer = nullptr;
 
     RuntimeProfile::Counter* _index_load_timer = nullptr;
+
+    // total pages read
+    // used by segment v2
+    RuntimeProfile::Counter* _total_pages_num_counter = nullptr;
+    // page read from cache
+    // used by segment v2
+    RuntimeProfile::Counter* _cached_pages_num_counter = nullptr;
 };
 
 } // namespace doris
diff --git a/be/src/exec/olap_scanner.cpp b/be/src/exec/olap_scanner.cpp
index 79a41a9..b859e6f 100644
--- a/be/src/exec/olap_scanner.cpp
+++ b/be/src/exec/olap_scanner.cpp
@@ -479,6 +479,9 @@ void OlapScanner::update_counter() {
 
     COUNTER_UPDATE(_parent->_index_load_timer, _reader->stats().index_load_ns);
 
+    COUNTER_UPDATE(_parent->_total_pages_num_counter, _reader->stats().total_pages_num);
+    COUNTER_UPDATE(_parent->_cached_pages_num_counter, _reader->stats().cached_pages_num);
+
     DorisMetrics::query_scan_bytes.increment(_reader->stats().compressed_bytes_read);
     DorisMetrics::query_scan_rows.increment(_reader->stats().raw_rows_read);
 
diff --git a/be/src/olap/olap_common.h b/be/src/olap/olap_common.h
index a7197be..34203f2 100644
--- a/be/src/olap/olap_common.h
+++ b/be/src/olap/olap_common.h
@@ -229,6 +229,7 @@ struct OlapReaderStatistics {
     int64_t decompress_ns = 0;
     int64_t uncompressed_bytes_read = 0;
 
+    // total read bytes in memory
     int64_t bytes_read = 0;
 
     int64_t block_load_ns = 0;
@@ -246,6 +247,9 @@ struct OlapReaderStatistics {
     int64_t rows_del_filtered = 0;
 
     int64_t index_load_ns = 0;
+
+    int64_t total_pages_num = 0;
+    int64_t cached_pages_num = 0;
 };
 
 typedef uint32_t ColumnId;
diff --git a/be/src/olap/rowset/beta_rowset_reader.cpp b/be/src/olap/rowset/beta_rowset_reader.cpp
index 3d6910a..e2620dc 100644
--- a/be/src/olap/rowset/beta_rowset_reader.cpp
+++ b/be/src/olap/rowset/beta_rowset_reader.cpp
@@ -108,32 +108,39 @@ OLAPStatus BetaRowsetReader::init(RowsetReaderContext* read_context) {
 OLAPStatus BetaRowsetReader::next_block(RowBlock** block) {
     // read next input block
     _input_block->clear();
-    auto s = _iterator->next_batch(_input_block.get());
-    if (!s.ok()) {
-        if (s.is_end_of_file()) {
-            *block = nullptr;
-            return OLAP_ERR_DATA_EOF;
+    {
+        SCOPED_RAW_TIMER(&_context->stats->block_fetch_ns);
+        auto s = _iterator->next_batch(_input_block.get());
+        if (!s.ok()) {
+            if (s.is_end_of_file()) {
+                *block = nullptr;
+                return OLAP_ERR_DATA_EOF;
+            }
+            LOG(WARNING) << "failed to read next block: " << s.to_string();
+            return OLAP_ERR_ROWSET_READ_FAILED;
         }
-        LOG(WARNING) << "failed to read next block: " << s.to_string();
-        return OLAP_ERR_ROWSET_READ_FAILED;
     }
+
     // convert to output block
     _output_block->clear();
     size_t rows_read = 0;
     uint16_t* selection_vector = _input_block->selection_vector();
-    for (size_t i = 0; i < _input_block->selected_size(); ++i) {
-        uint16_t row_idx = selection_vector[i];
-        // deep copy row from input block to output block because
-        // RowBlock use MemPool and RowBlockV2 use Arena
-        // TODO(hkp): unify RowBlockV2 to use MemPool to boost performance
-        _output_block->get_row(row_idx, _row.get());
-        // convert return_columns to seek_columns
-        s = _input_block->deep_copy_to_row_cursor(row_idx, _row.get(), _output_block->mem_pool());
-        if (!s.ok()) {
-            LOG(WARNING) << "failed to copy row: " << s.to_string();
-            return OLAP_ERR_ROWSET_READ_FAILED;
+    {
+        SCOPED_RAW_TIMER(&_context->stats->block_convert_ns);
+        for (uint16_t i = 0; i < _input_block->selected_size(); ++i) {
+            uint16_t row_idx = selection_vector[i];
+            // deep copy row from input block to output block because
+            // RowBlock use MemPool and RowBlockV2 use Arena
+            // TODO(hkp): unify RowBlockV2 to use MemPool to boost performance
+            _output_block->get_row(row_idx, _row.get());
+            // convert return_columns to seek_columns
+            auto s = _input_block->deep_copy_to_row_cursor(row_idx, _row.get(), _output_block->mem_pool());
+            if (!s.ok()) {
+                LOG(WARNING) << "failed to copy row: " << s.to_string();
+                return OLAP_ERR_ROWSET_READ_FAILED;
+            }
+            ++rows_read;
         }
-        ++rows_read;
     }
     _output_block->set_pos(0);
     _output_block->set_limit(rows_read);
diff --git a/be/src/olap/rowset/segment_v2/column_reader.cpp b/be/src/olap/rowset/segment_v2/column_reader.cpp
index feb9680..0752ca9 100644
--- a/be/src/olap/rowset/segment_v2/column_reader.cpp
+++ b/be/src/olap/rowset/segment_v2/column_reader.cpp
@@ -110,13 +110,15 @@ Status ColumnReader::new_iterator(ColumnIterator** iterator) {
     return Status::OK();
 }
 
-Status ColumnReader::read_page(const PagePointer& pp, PageHandle* handle) {
+Status ColumnReader::read_page(const PagePointer& pp, OlapReaderStatistics* stats, PageHandle* handle) {
+    stats->total_pages_num++;
     auto cache = StoragePageCache::instance();
     PageCacheHandle cache_handle;
     StoragePageCache::CacheKey cache_key(_file->file_name(), pp.offset);
     if (cache->lookup(cache_key, &cache_handle)) {
         // we find page in cache, use it
         *handle = PageHandle(std::move(cache_handle));
+        stats->cached_pages_num++;
         return Status::OK();
     }
     // Now we read this from file.
@@ -129,7 +131,11 @@ Status ColumnReader::read_page(const PagePointer& pp, PageHandle* handle) {
     // this buffer will assigned uncompressed page, and origin content will be freed.
     std::unique_ptr<uint8_t[]> page(new uint8_t[page_size]);
     Slice page_slice(page.get(), page_size);
-    RETURN_IF_ERROR(_file->read_at(pp.offset, page_slice));
+    {
+        SCOPED_RAW_TIMER(&stats->io_ns);
+        RETURN_IF_ERROR(_file->read_at(pp.offset, page_slice));
+        stats->compressed_bytes_read += page_size;
+    }
 
     size_t data_size = page_size - 4;
     if (_opts.verify_checksum) {
@@ -148,7 +154,10 @@ Status ColumnReader::read_page(const PagePointer& pp, PageHandle* handle) {
         PageDecompressor decompressor(page_slice, _compress_codec);
 
         Slice uncompressed_page;
-        RETURN_IF_ERROR(decompressor.decompress_to(&uncompressed_page));
+        {
+            SCOPED_RAW_TIMER(&stats->decompress_ns);
+            RETURN_IF_ERROR(decompressor.decompress_to(&uncompressed_page));
+        }
 
         // If decompressor create new heap memory for uncompressed data,
         // assign this uncompressed page to page and page slice
@@ -156,6 +165,7 @@ Status ColumnReader::read_page(const PagePointer& pp, PageHandle* handle) {
             page.reset((uint8_t*)uncompressed_page.data);
         }
         page_slice = uncompressed_page;
+        stats->uncompressed_bytes_read += page_slice.size;
     }
     // insert this into cache and return the cache handle
     cache->insert(cache_key, page_slice, &cache_handle);
@@ -166,10 +176,10 @@ Status ColumnReader::read_page(const PagePointer& pp, PageHandle* handle) {
 }
 
 void ColumnReader::get_row_ranges_by_zone_map(CondColumn* cond_column,
-        const std::vector<CondColumn*>& delete_conditions,
+        const std::vector<CondColumn*>& delete_conditions, OlapReaderStatistics* stats,
         RowRanges* row_ranges) {
     std::vector<uint32_t> page_indexes;
-    _get_filtered_pages(cond_column, delete_conditions, &page_indexes);
+    _get_filtered_pages(cond_column, stats, delete_conditions, &page_indexes);
     _calculate_row_ranges(page_indexes, row_ranges);
 }
 
@@ -177,7 +187,7 @@ PagePointer ColumnReader::get_dict_page_pointer() const {
     return _meta.dict_page();
 }
 
-void ColumnReader::_get_filtered_pages(CondColumn* cond_column,
+void ColumnReader::_get_filtered_pages(CondColumn* cond_column, OlapReaderStatistics* stats,
         const std::vector<CondColumn*>& delete_conditions, std::vector<uint32_t>* page_indexes) {
     FieldType type = _type_info->type();
     const std::vector<ZoneMapPB>& zone_maps = _column_zone_map->get_column_zone_map();
@@ -200,20 +210,25 @@ void ColumnReader::_get_filtered_pages(CondColumn* cond_column,
                 max_value->set_null();
             }
         }
-        bool should_read = false;
         if (cond_column == nullptr || cond_column->eval({min_value.get(), max_value.get()})) {
-            should_read = true;
-        }
-        if (should_read) {
+            bool should_read = true;
             for (auto& col_cond : delete_conditions) {
                 if (col_cond->del_eval({min_value.get(), max_value.get()}) == DEL_SATISFIED) {
                     should_read = false;
+                    rowid_t page_first_id = _ordinal_index->get_first_row_id(i);
+                    rowid_t page_last_id = _ordinal_index->get_last_row_id(i);
+                    stats->rows_del_filtered =+ page_last_id - page_first_id + 1;
                     break;
                 }
             }
-        }
-        if (should_read) {
-            page_indexes->push_back(i);
+            if (should_read) {
+                page_indexes->push_back(i);
+            }
+        } else {
+            // page filtered by zone map
+            rowid_t page_first_id = _ordinal_index->get_first_row_id(i);
+            rowid_t page_last_id = _ordinal_index->get_last_row_id(i);
+            stats->rows_stats_filtered += page_last_id - page_first_id + 1;
         }
     }
 }
@@ -231,7 +246,8 @@ void ColumnReader::_calculate_row_ranges(const std::vector<uint32_t>& page_index
 Status ColumnReader::_init_ordinal_index() {
     PagePointer pp = _meta.ordinal_index_page();
     PageHandle ph;
-    RETURN_IF_ERROR(read_page(pp, &ph));
+    OlapReaderStatistics stats;
+    RETURN_IF_ERROR(read_page(pp, &stats, &ph));
 
     _ordinal_index.reset(new OrdinalPageIndex(ph.data(), _num_rows));
     RETURN_IF_ERROR(_ordinal_index->load());
@@ -244,7 +260,9 @@ Status ColumnReader::_init_column_zone_map() {
     if (_meta.has_zone_map_page()) {
         PagePointer pp = _meta.zone_map_page();
         PageHandle ph;
-        RETURN_IF_ERROR(read_page(pp, &ph));
+        // tmp statistics
+        OlapReaderStatistics stats;
+        RETURN_IF_ERROR(read_page(pp, &stats, &ph));
         _column_zone_map.reset(new ColumnZoneMap(ph.data()));
         RETURN_IF_ERROR(_column_zone_map->load());
     } else {
@@ -382,6 +400,9 @@ Status FileColumnIterator::next_batch(size_t* n, ColumnBlock* dst) {
         remaining -= nrows_in_page;
     }
     *n -= remaining;
+    // TODO(hkp): for string type, the bytes_read should be passed to page decoder
+    // bytes_read = data size + null bitmap size
+    _opts.stats->bytes_read += *n * dst->type_info()->size() + BitmapSize(dst->nrows());
     return Status::OK();
 }
 
@@ -402,7 +423,7 @@ Status FileColumnIterator::_load_next_page(bool* eos) {
 // it ready to read
 Status FileColumnIterator::_read_page(const OrdinalPageIndexIterator& iter, ParsedPage* page) {
     page->page_pointer = iter.page();
-    RETURN_IF_ERROR(_reader->read_page(page->page_pointer, &page->page_handle));
+    RETURN_IF_ERROR(_reader->read_page(page->page_pointer, _opts.stats, &page->page_handle));
     // TODO(zc): read page from file
     Slice data = page->page_handle.data();
 
@@ -444,7 +465,7 @@ Status FileColumnIterator::_read_page(const OrdinalPageIndexIterator& iter, Pars
         if (binary_dict_page_decoder->is_dict_encoding()) {
             if (_dict_decoder == nullptr) {
                 PagePointer pp = _reader->get_dict_page_pointer();
-                RETURN_IF_ERROR(_reader->read_page(pp, &_dict_page_handle));
+                RETURN_IF_ERROR(_reader->read_page(pp, _opts.stats, &_dict_page_handle));
 
                 _dict_decoder.reset(new BinaryPlainPageDecoder(_dict_page_handle.data()));
                 RETURN_IF_ERROR(_dict_decoder->init());
@@ -458,7 +479,8 @@ Status FileColumnIterator::_read_page(const OrdinalPageIndexIterator& iter, Pars
     return Status::OK();
 }
 
-Status DefaultValueColumnIterator::init() {
+Status DefaultValueColumnIterator::init(const ColumnIteratorOptions& opts) {
+    _opts = opts;
     // be consistent with segment v1
     if (_default_value == "NULL" && _is_nullable) {
         _is_default_value_null = true;
diff --git a/be/src/olap/rowset/segment_v2/column_reader.h b/be/src/olap/rowset/segment_v2/column_reader.h
index 50ac177..f4d9c77 100644
--- a/be/src/olap/rowset/segment_v2/column_reader.h
+++ b/be/src/olap/rowset/segment_v2/column_reader.h
@@ -52,6 +52,11 @@ struct ColumnReaderOptions {
     bool verify_checksum = true;
 };
 
+struct ColumnIteratorOptions {
+    // reader statistics
+    OlapReaderStatistics* stats = nullptr;
+};
+
 // There will be concurrent users to read the same column. So
 // we should do our best to reduce resource usage through share
 // same information, such as OrdinalPageIndex and Page data.
@@ -73,7 +78,7 @@ public:
     Status seek_at_or_before(rowid_t rowid, OrdinalPageIndexIterator* iter);
 
     // read a page from file into a page handle
-    Status read_page(const PagePointer& pp, PageHandle* handle);
+    Status read_page(const PagePointer& pp, OlapReaderStatistics* stats, PageHandle* handle);
 
     bool is_nullable() const { return _meta.is_nullable(); }
     const EncodingInfo* encoding_info() const { return _encoding_info; }
@@ -84,8 +89,8 @@ public:
     // get row ranges with zone map
     // cond_column is user's query predicate
     // delete_conditions is a vector of delete predicate of different version
-    void get_row_ranges_by_zone_map(CondColumn* cond_column,
-            const std::vector<CondColumn*>& delete_conditions, RowRanges* row_ranges);
+    void get_row_ranges_by_zone_map(CondColumn* cond_column, const std::vector<CondColumn*>& delete_conditions,
+            OlapReaderStatistics* stats, RowRanges* row_ranges);
 
     PagePointer get_dict_page_pointer() const;
 
@@ -96,7 +101,7 @@ private:
 
     Status _init_column_zone_map();
 
-    void _get_filtered_pages(CondColumn* cond_column,
+    void _get_filtered_pages(CondColumn* cond_column, OlapReaderStatistics* stats,
             const std::vector<CondColumn*>& delete_conditions, std::vector<uint32_t>* page_indexes);
 
     void _calculate_row_ranges(const std::vector<uint32_t>& page_indexes, RowRanges* row_ranges);
@@ -125,7 +130,10 @@ public:
     ColumnIterator() { }
     virtual ~ColumnIterator() { }
 
-    virtual Status init() { return Status::OK(); }
+    virtual Status init(const ColumnIteratorOptions& opts) {
+        _opts = opts;
+        return Status::OK();
+    }
 
     // Seek to the first entry in the column.
     virtual Status seek_to_first() = 0;
@@ -161,13 +169,10 @@ public:
     // release next_batch related resource
     Status finish_batch();
 #endif
+protected:
+    ColumnIteratorOptions _opts;
 };
 
-#if 0
-class DefaultValueIterator : public ColumnIterator {
-};
-#endif
-
 // This iterator is used to read column data from file
 class FileColumnIterator : public ColumnIterator {
 public:
@@ -220,7 +225,7 @@ public:
           _is_default_value_null(false),
           _value_size(0) { }
 
-    Status init() override;
+    Status init(const ColumnIteratorOptions& opts) override;
 
     Status seek_to_first() override {
         _current_rowid = 0;
diff --git a/be/src/olap/rowset/segment_v2/segment.cpp b/be/src/olap/rowset/segment_v2/segment.cpp
index 040be8c..39028d4 100644
--- a/be/src/olap/rowset/segment_v2/segment.cpp
+++ b/be/src/olap/rowset/segment_v2/segment.cpp
@@ -215,7 +215,8 @@ Status Segment::new_column_iterator(uint32_t cid, ColumnIterator** iter) {
         std::unique_ptr<DefaultValueColumnIterator> default_value_iter(
                 new DefaultValueColumnIterator(tablet_column.default_value(),
                 tablet_column.is_nullable(), tablet_column.type()));
-        RETURN_IF_ERROR(default_value_iter->init());
+        ColumnIteratorOptions iter_opts;
+        RETURN_IF_ERROR(default_value_iter->init(iter_opts));
         *iter = default_value_iter.release();
         return Status::OK();
     }
diff --git a/be/src/olap/rowset/segment_v2/segment_iterator.cpp b/be/src/olap/rowset/segment_v2/segment_iterator.cpp
index 0a50d05..ec5caa6 100644
--- a/be/src/olap/rowset/segment_v2/segment_iterator.cpp
+++ b/be/src/olap/rowset/segment_v2/segment_iterator.cpp
@@ -121,6 +121,9 @@ Status SegmentIterator::_prepare_seek(const StorageReadOptions::KeyRange& key_ra
     for (auto cid : _seek_schema->column_ids()) {
         if (_column_iterators[cid] == nullptr) {
             RETURN_IF_ERROR(_segment->new_column_iterator(cid, &_column_iterators[cid]));
+            ColumnIteratorOptions iter_opts;
+            iter_opts.stats = _opts.stats;
+            RETURN_IF_ERROR(_column_iterators[cid]->init(iter_opts));
         }
     }
 
@@ -168,7 +171,7 @@ Status SegmentIterator::_get_row_ranges_from_zone_map(RowRanges* zone_map_row_ra
         // get row ranges by zone map of this column
         RowRanges column_zone_map_row_ranges;
         _segment->_column_readers[cid]->get_row_ranges_by_zone_map(_opts.conditions->get_column(cid),
-                column_delete_conditions[cid], &column_zone_map_row_ranges);
+                column_delete_conditions[cid], _opts.stats, &column_zone_map_row_ranges);
         // intersection different columns's row ranges to get final row ranges by zone map
         RowRanges::ranges_intersection(origin_row_ranges, column_zone_map_row_ranges, &origin_row_ranges);
     }
@@ -184,10 +187,12 @@ Status SegmentIterator::_init_column_iterators() {
     for (auto cid : _schema.column_ids()) {
         if (_column_iterators[cid] == nullptr) {
             RETURN_IF_ERROR(_segment->new_column_iterator(cid, &_column_iterators[cid]));
+            ColumnIteratorOptions iter_opts;
+            iter_opts.stats = _opts.stats;
+            RETURN_IF_ERROR(_column_iterators[cid]->init(iter_opts));
         }
-
-        _column_iterators[cid]->seek_to_ordinal(_cur_rowid);
     }
+    _seek_columns(_schema.column_ids(), _cur_rowid);
     return Status::OK();
 }
 
@@ -267,9 +272,7 @@ Status SegmentIterator::_lookup_ordinal(const RowCursor& key, bool is_include,
 
 // seek to the row and load that row to _key_cursor
 Status SegmentIterator::_seek_and_peek(rowid_t rowid) {
-    for (auto cid : _seek_schema->column_ids()) {
-        _column_iterators[cid]->seek_to_ordinal(rowid);
-    }
+    _seek_columns(_seek_schema->column_ids(), rowid);
     size_t num_rows = 1;
     // please note that usually RowBlockV2.clear() is called to free MemPool memory before reading the next block,
     // but here since there won't be too many keys to seek, we don't call RowBlockV2.clear() so that we can use
@@ -301,7 +304,16 @@ Status SegmentIterator::_next_batch(RowBlockV2* block, size_t* rows_read) {
     return Status::OK();
 }
 
+Status SegmentIterator::_seek_columns(const std::vector<ColumnId>& column_ids, rowid_t pos) {
+    SCOPED_RAW_TIMER(&_opts.stats->block_seek_ns);
+    for (auto cid : column_ids) {
+        RETURN_IF_ERROR(_column_iterators[cid]->seek_to_ordinal(pos));
+    }
+    return Status::OK();
+}
+
 Status SegmentIterator::next_batch(RowBlockV2* block) {
+    SCOPED_RAW_TIMER(&_opts.stats->block_load_ns);
     if (UNLIKELY(!_inited)) {
         RETURN_IF_ERROR(_init());
         _inited = true;
@@ -327,9 +339,7 @@ Status SegmentIterator::next_batch(RowBlockV2* block) {
                 continue;
             }
             _cur_rowid = _row_ranges.get_range_from(_cur_range_id);
-            for (auto cid : block->schema()->column_ids()) {
-                RETURN_IF_ERROR(_column_iterators[cid]->seek_to_ordinal(_cur_rowid));
-            }
+            _seek_columns(block->schema()->column_ids(), _cur_rowid);
             break;
         }
     }
@@ -342,9 +352,7 @@ Status SegmentIterator::next_batch(RowBlockV2* block) {
     block->set_selected_size(rows_to_read);
     // update raw_rows_read counter
     // judge nullptr for unit test case
-    if (_opts.stats != nullptr) {
-        _opts.stats->raw_rows_read += block->num_rows();
-    }
+    _opts.stats->raw_rows_read += block->num_rows();
     if (block->num_rows() == 0) {
         return Status::EndOfFile("no more data in segment");
     }
@@ -354,12 +362,16 @@ Status SegmentIterator::next_batch(RowBlockV2* block) {
     if (_opts.column_predicates != nullptr) {
         // init selection position index
         uint16_t selected_size = block->selected_size();
+        uint16_t original_size = selected_size;
+        SCOPED_RAW_TIMER(&_opts.stats->vec_cond_ns);
         for (auto column_predicate : *_opts.column_predicates) {
             auto column_block = block->column_block(column_predicate->column_id());
             column_predicate->evaluate(&column_block, block->selection_vector(), &selected_size);
         }
         block->set_selected_size(selected_size);
+        _opts.stats->rows_vec_cond_filtered += original_size - selected_size;
     }
+    ++_opts.stats->blocks_load;
     return Status::OK();
 }
 
diff --git a/be/src/olap/rowset/segment_v2/segment_iterator.h b/be/src/olap/rowset/segment_v2/segment_iterator.h
index c2b4149..f3bae81 100644
--- a/be/src/olap/rowset/segment_v2/segment_iterator.h
+++ b/be/src/olap/rowset/segment_v2/segment_iterator.h
@@ -71,6 +71,8 @@ private:
     uint32_t segment_id() const { return _segment->id(); }
     uint32_t num_rows() const { return _segment->num_rows(); }
 
+    Status _seek_columns(const std::vector<ColumnId>& column_ids, rowid_t pos);
+
 private:
     std::shared_ptr<Segment> _segment;
     // TODO(zc): rethink if we need copy it
diff --git a/be/test/olap/rowset/segment_v2/column_reader_writer_test.cpp b/be/test/olap/rowset/segment_v2/column_reader_writer_test.cpp
index 3c4fb63..becc17d 100644
--- a/be/test/olap/rowset/segment_v2/column_reader_writer_test.cpp
+++ b/be/test/olap/rowset/segment_v2/column_reader_writer_test.cpp
@@ -117,7 +117,11 @@ void test_nullable_data(uint8_t* src_data, uint8_t* src_is_null, int num_rows, s
         ColumnIterator* iter = nullptr;
         st = reader.new_iterator(&iter);
         ASSERT_TRUE(st.ok());
-
+        ColumnIteratorOptions iter_opts;
+        OlapReaderStatistics stats;
+        iter_opts.stats = &stats;
+        st = iter->init(iter_opts);
+        ASSERT_TRUE(st.ok());
         // sequence read
         {
             st = iter->seek_to_first();
diff --git a/be/test/olap/rowset/segment_v2/segment_test.cpp b/be/test/olap/rowset/segment_v2/segment_test.cpp
index ec33b98..7ee2469 100644
--- a/be/test/olap/rowset/segment_v2/segment_test.cpp
+++ b/be/test/olap/rowset/segment_v2/segment_test.cpp
@@ -98,9 +98,11 @@ TEST_F(SegmentReaderWriterTest, normal) {
         ASSERT_TRUE(st.ok());
         ASSERT_EQ(4096, segment->num_rows());
         Schema schema(*tablet_schema);
+        OlapReaderStatistics stats;
         // scan all rows
         {
             StorageReadOptions read_opts;
+            read_opts.stats = &stats;
             std::unique_ptr<RowwiseIterator> iter;
             segment->new_iterator(schema, read_opts, &iter);
 
@@ -155,6 +157,7 @@ TEST_F(SegmentReaderWriterTest, normal) {
             }
 
             StorageReadOptions read_opts;
+            read_opts.stats = &stats;
             read_opts.key_ranges.emplace_back(lower_bound.get(), false, upper_bound.get(), true);
             std::unique_ptr<RowwiseIterator> iter;
             segment->new_iterator(schema, read_opts, &iter);
@@ -180,6 +183,7 @@ TEST_F(SegmentReaderWriterTest, normal) {
             }
 
             StorageReadOptions read_opts;
+            read_opts.stats = &stats;
             read_opts.key_ranges.emplace_back(lower_bound.get(), false, nullptr, false);
             std::unique_ptr<RowwiseIterator> iter;
             segment->new_iterator(schema, read_opts, &iter);
@@ -209,6 +213,7 @@ TEST_F(SegmentReaderWriterTest, normal) {
             }
 
             StorageReadOptions read_opts;
+            read_opts.stats = &stats;
             read_opts.key_ranges.emplace_back(lower_bound.get(), false, upper_bound.get(), false);
             std::unique_ptr<RowwiseIterator> iter;
             segment->new_iterator(schema, read_opts, &iter);
@@ -282,6 +287,7 @@ TEST_F(SegmentReaderWriterTest, TestZoneMap) {
         ASSERT_TRUE(st.ok());
         ASSERT_EQ(64 * 1024, segment->num_rows());
         Schema schema(*tablet_schema);
+        OlapReaderStatistics stats;
         // test empty segment iterator
         {
             // the first two page will be read by this condition
@@ -295,6 +301,7 @@ TEST_F(SegmentReaderWriterTest, TestZoneMap) {
             conditions->append_condition(condition);
 
             StorageReadOptions read_opts;
+            read_opts.stats = &stats;
             read_opts.conditions = conditions.get();
 
             std::unique_ptr<RowwiseIterator> iter;
@@ -318,6 +325,7 @@ TEST_F(SegmentReaderWriterTest, TestZoneMap) {
             conditions->append_condition(condition);
 
             StorageReadOptions read_opts;
+            read_opts.stats = &stats;
             read_opts.conditions = conditions.get();
 
             std::unique_ptr<RowwiseIterator> iter;
@@ -376,6 +384,7 @@ TEST_F(SegmentReaderWriterTest, TestZoneMap) {
             delete_conditions->append_condition(delete_condition);
 
             StorageReadOptions read_opts;
+            read_opts.stats = &stats;
             read_opts.conditions = conditions.get();
             read_opts.delete_conditions.push_back(delete_conditions.get());
 
@@ -546,9 +555,11 @@ TEST_F(SegmentReaderWriterTest, TestDefaultValueColumn) {
         ASSERT_TRUE(st.ok());
         ASSERT_EQ(4096, segment->num_rows());
         Schema schema(*new_tablet_schema_1);
+        OlapReaderStatistics stats;
         // scan all rows
         {
             StorageReadOptions read_opts;
+            read_opts.stats = &stats;
             std::unique_ptr<RowwiseIterator> iter;
             segment->new_iterator(schema, read_opts, &iter);
 
@@ -601,9 +612,11 @@ TEST_F(SegmentReaderWriterTest, TestDefaultValueColumn) {
         ASSERT_TRUE(st.ok());
         ASSERT_EQ(4096, segment->num_rows());
         Schema schema(*new_tablet_schema_1);
+        OlapReaderStatistics stats;
         // scan all rows
         {
             StorageReadOptions read_opts;
+            read_opts.stats = &stats;
             std::unique_ptr<RowwiseIterator> iter;
             segment->new_iterator(schema, read_opts, &iter);
 
@@ -696,10 +709,11 @@ TEST_F(SegmentReaderWriterTest, TestStringDict) {
         ASSERT_TRUE(st.ok());
         ASSERT_EQ(4096, segment->num_rows());
         Schema schema(*tablet_schema);
-
+        OlapReaderStatistics stats;
         // scan all rows
         {
             StorageReadOptions read_opts;
+            read_opts.stats = &stats;
             std::unique_ptr<RowwiseIterator> iter;
             segment->new_iterator(schema, read_opts, &iter);
 
@@ -745,6 +759,7 @@ TEST_F(SegmentReaderWriterTest, TestStringDict) {
             }
 
             StorageReadOptions read_opts;
+            read_opts.stats = &stats;
             read_opts.key_ranges.emplace_back(lower_bound.get(), false, nullptr, false);
             std::unique_ptr<RowwiseIterator> iter;
             segment->new_iterator(schema, read_opts, &iter);
@@ -775,6 +790,7 @@ TEST_F(SegmentReaderWriterTest, TestStringDict) {
             }
 
             StorageReadOptions read_opts;
+            read_opts.stats = &stats;
             read_opts.key_ranges.emplace_back(lower_bound.get(), false, upper_bound.get(), false);
             std::unique_ptr<RowwiseIterator> iter;
             segment->new_iterator(schema, read_opts, &iter);
@@ -797,6 +813,7 @@ TEST_F(SegmentReaderWriterTest, TestStringDict) {
             conditions->append_condition(condition);
 
             StorageReadOptions read_opts;
+            read_opts.stats = &stats;
             read_opts.conditions = conditions.get();
 
             std::unique_ptr<RowwiseIterator> iter;
@@ -847,6 +864,7 @@ TEST_F(SegmentReaderWriterTest, TestStringDict) {
             conditions->append_condition(condition);
 
             StorageReadOptions read_opts;
+            read_opts.stats = &stats;
             read_opts.conditions = conditions.get();
 
             std::unique_ptr<RowwiseIterator> iter;


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@doris.apache.org
For additional commands, e-mail: commits-help@doris.apache.org