You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@doris.apache.org by mo...@apache.org on 2019/10/22 01:43:28 UTC
[incubator-doris] branch master updated: add profile for segment v2
(#2015)
This is an automated email from the ASF dual-hosted git repository.
morningman pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-doris.git
The following commit(s) were added to refs/heads/master by this push:
new 9c2d149 add profile for segment v2 (#2015)
9c2d149 is described below
commit 9c2d149c3654686ab45abad1c72ffcc8d3ba5d1b
Author: kangpinghuang <ka...@126.com>
AuthorDate: Tue Oct 22 09:43:16 2019 +0800
add profile for segment v2 (#2015)
---
be/src/exec/olap_scan_node.cpp | 3 ++
be/src/exec/olap_scan_node.h | 7 +++
be/src/exec/olap_scanner.cpp | 3 ++
be/src/olap/olap_common.h | 4 ++
be/src/olap/rowset/beta_rowset_reader.cpp | 45 ++++++++++-------
be/src/olap/rowset/segment_v2/column_reader.cpp | 58 +++++++++++++++-------
be/src/olap/rowset/segment_v2/column_reader.h | 27 ++++++----
be/src/olap/rowset/segment_v2/segment.cpp | 3 +-
be/src/olap/rowset/segment_v2/segment_iterator.cpp | 36 +++++++++-----
be/src/olap/rowset/segment_v2/segment_iterator.h | 2 +
.../segment_v2/column_reader_writer_test.cpp | 6 ++-
be/test/olap/rowset/segment_v2/segment_test.cpp | 20 +++++++-
12 files changed, 151 insertions(+), 63 deletions(-)
diff --git a/be/src/exec/olap_scan_node.cpp b/be/src/exec/olap_scan_node.cpp
index 1310224..c039c3e 100644
--- a/be/src/exec/olap_scan_node.cpp
+++ b/be/src/exec/olap_scan_node.cpp
@@ -139,6 +139,9 @@ void OlapScanNode::_init_counter(RuntimeState* state) {
_index_load_timer = ADD_TIMER(_runtime_profile, "IndexLoadTime");
_scan_timer = ADD_TIMER(_runtime_profile, "ScanTime");
+
+ _total_pages_num_counter = ADD_COUNTER(_runtime_profile, "TotalPagesNum", TUnit::UNIT);
+ _cached_pages_num_counter = ADD_COUNTER(_runtime_profile, "CachedPagesNum", TUnit::UNIT);
}
Status OlapScanNode::prepare(RuntimeState* state) {
diff --git a/be/src/exec/olap_scan_node.h b/be/src/exec/olap_scan_node.h
index fa280b4..d8c3716 100644
--- a/be/src/exec/olap_scan_node.h
+++ b/be/src/exec/olap_scan_node.h
@@ -266,6 +266,13 @@ private:
RuntimeProfile::Counter* _block_fetch_timer = nullptr;
RuntimeProfile::Counter* _index_load_timer = nullptr;
+
+ // total pages read
+ // used by segment v2
+ RuntimeProfile::Counter* _total_pages_num_counter = nullptr;
+ // page read from cache
+ // used by segment v2
+ RuntimeProfile::Counter* _cached_pages_num_counter = nullptr;
};
} // namespace doris
diff --git a/be/src/exec/olap_scanner.cpp b/be/src/exec/olap_scanner.cpp
index 79a41a9..b859e6f 100644
--- a/be/src/exec/olap_scanner.cpp
+++ b/be/src/exec/olap_scanner.cpp
@@ -479,6 +479,9 @@ void OlapScanner::update_counter() {
COUNTER_UPDATE(_parent->_index_load_timer, _reader->stats().index_load_ns);
+ COUNTER_UPDATE(_parent->_total_pages_num_counter, _reader->stats().total_pages_num);
+ COUNTER_UPDATE(_parent->_cached_pages_num_counter, _reader->stats().cached_pages_num);
+
DorisMetrics::query_scan_bytes.increment(_reader->stats().compressed_bytes_read);
DorisMetrics::query_scan_rows.increment(_reader->stats().raw_rows_read);
diff --git a/be/src/olap/olap_common.h b/be/src/olap/olap_common.h
index a7197be..34203f2 100644
--- a/be/src/olap/olap_common.h
+++ b/be/src/olap/olap_common.h
@@ -229,6 +229,7 @@ struct OlapReaderStatistics {
int64_t decompress_ns = 0;
int64_t uncompressed_bytes_read = 0;
+ // total read bytes in memory
int64_t bytes_read = 0;
int64_t block_load_ns = 0;
@@ -246,6 +247,9 @@ struct OlapReaderStatistics {
int64_t rows_del_filtered = 0;
int64_t index_load_ns = 0;
+
+ int64_t total_pages_num = 0;
+ int64_t cached_pages_num = 0;
};
typedef uint32_t ColumnId;
diff --git a/be/src/olap/rowset/beta_rowset_reader.cpp b/be/src/olap/rowset/beta_rowset_reader.cpp
index 3d6910a..e2620dc 100644
--- a/be/src/olap/rowset/beta_rowset_reader.cpp
+++ b/be/src/olap/rowset/beta_rowset_reader.cpp
@@ -108,32 +108,39 @@ OLAPStatus BetaRowsetReader::init(RowsetReaderContext* read_context) {
OLAPStatus BetaRowsetReader::next_block(RowBlock** block) {
// read next input block
_input_block->clear();
- auto s = _iterator->next_batch(_input_block.get());
- if (!s.ok()) {
- if (s.is_end_of_file()) {
- *block = nullptr;
- return OLAP_ERR_DATA_EOF;
+ {
+ SCOPED_RAW_TIMER(&_context->stats->block_fetch_ns);
+ auto s = _iterator->next_batch(_input_block.get());
+ if (!s.ok()) {
+ if (s.is_end_of_file()) {
+ *block = nullptr;
+ return OLAP_ERR_DATA_EOF;
+ }
+ LOG(WARNING) << "failed to read next block: " << s.to_string();
+ return OLAP_ERR_ROWSET_READ_FAILED;
}
- LOG(WARNING) << "failed to read next block: " << s.to_string();
- return OLAP_ERR_ROWSET_READ_FAILED;
}
+
// convert to output block
_output_block->clear();
size_t rows_read = 0;
uint16_t* selection_vector = _input_block->selection_vector();
- for (size_t i = 0; i < _input_block->selected_size(); ++i) {
- uint16_t row_idx = selection_vector[i];
- // deep copy row from input block to output block because
- // RowBlock use MemPool and RowBlockV2 use Arena
- // TODO(hkp): unify RowBlockV2 to use MemPool to boost performance
- _output_block->get_row(row_idx, _row.get());
- // convert return_columns to seek_columns
- s = _input_block->deep_copy_to_row_cursor(row_idx, _row.get(), _output_block->mem_pool());
- if (!s.ok()) {
- LOG(WARNING) << "failed to copy row: " << s.to_string();
- return OLAP_ERR_ROWSET_READ_FAILED;
+ {
+ SCOPED_RAW_TIMER(&_context->stats->block_convert_ns);
+ for (uint16_t i = 0; i < _input_block->selected_size(); ++i) {
+ uint16_t row_idx = selection_vector[i];
+ // deep copy row from input block to output block because
+ // RowBlock use MemPool and RowBlockV2 use Arena
+ // TODO(hkp): unify RowBlockV2 to use MemPool to boost performance
+ _output_block->get_row(row_idx, _row.get());
+ // convert return_columns to seek_columns
+ auto s = _input_block->deep_copy_to_row_cursor(row_idx, _row.get(), _output_block->mem_pool());
+ if (!s.ok()) {
+ LOG(WARNING) << "failed to copy row: " << s.to_string();
+ return OLAP_ERR_ROWSET_READ_FAILED;
+ }
+ ++rows_read;
}
- ++rows_read;
}
_output_block->set_pos(0);
_output_block->set_limit(rows_read);
diff --git a/be/src/olap/rowset/segment_v2/column_reader.cpp b/be/src/olap/rowset/segment_v2/column_reader.cpp
index feb9680..0752ca9 100644
--- a/be/src/olap/rowset/segment_v2/column_reader.cpp
+++ b/be/src/olap/rowset/segment_v2/column_reader.cpp
@@ -110,13 +110,15 @@ Status ColumnReader::new_iterator(ColumnIterator** iterator) {
return Status::OK();
}
-Status ColumnReader::read_page(const PagePointer& pp, PageHandle* handle) {
+Status ColumnReader::read_page(const PagePointer& pp, OlapReaderStatistics* stats, PageHandle* handle) {
+ stats->total_pages_num++;
auto cache = StoragePageCache::instance();
PageCacheHandle cache_handle;
StoragePageCache::CacheKey cache_key(_file->file_name(), pp.offset);
if (cache->lookup(cache_key, &cache_handle)) {
// we find page in cache, use it
*handle = PageHandle(std::move(cache_handle));
+ stats->cached_pages_num++;
return Status::OK();
}
// Now we read this from file.
@@ -129,7 +131,11 @@ Status ColumnReader::read_page(const PagePointer& pp, PageHandle* handle) {
// this buffer will assigned uncompressed page, and origin content will be freed.
std::unique_ptr<uint8_t[]> page(new uint8_t[page_size]);
Slice page_slice(page.get(), page_size);
- RETURN_IF_ERROR(_file->read_at(pp.offset, page_slice));
+ {
+ SCOPED_RAW_TIMER(&stats->io_ns);
+ RETURN_IF_ERROR(_file->read_at(pp.offset, page_slice));
+ stats->compressed_bytes_read += page_size;
+ }
size_t data_size = page_size - 4;
if (_opts.verify_checksum) {
@@ -148,7 +154,10 @@ Status ColumnReader::read_page(const PagePointer& pp, PageHandle* handle) {
PageDecompressor decompressor(page_slice, _compress_codec);
Slice uncompressed_page;
- RETURN_IF_ERROR(decompressor.decompress_to(&uncompressed_page));
+ {
+ SCOPED_RAW_TIMER(&stats->decompress_ns);
+ RETURN_IF_ERROR(decompressor.decompress_to(&uncompressed_page));
+ }
// If decompressor create new heap memory for uncompressed data,
// assign this uncompressed page to page and page slice
@@ -156,6 +165,7 @@ Status ColumnReader::read_page(const PagePointer& pp, PageHandle* handle) {
page.reset((uint8_t*)uncompressed_page.data);
}
page_slice = uncompressed_page;
+ stats->uncompressed_bytes_read += page_slice.size;
}
// insert this into cache and return the cache handle
cache->insert(cache_key, page_slice, &cache_handle);
@@ -166,10 +176,10 @@ Status ColumnReader::read_page(const PagePointer& pp, PageHandle* handle) {
}
void ColumnReader::get_row_ranges_by_zone_map(CondColumn* cond_column,
- const std::vector<CondColumn*>& delete_conditions,
+ const std::vector<CondColumn*>& delete_conditions, OlapReaderStatistics* stats,
RowRanges* row_ranges) {
std::vector<uint32_t> page_indexes;
- _get_filtered_pages(cond_column, delete_conditions, &page_indexes);
+ _get_filtered_pages(cond_column, stats, delete_conditions, &page_indexes);
_calculate_row_ranges(page_indexes, row_ranges);
}
@@ -177,7 +187,7 @@ PagePointer ColumnReader::get_dict_page_pointer() const {
return _meta.dict_page();
}
-void ColumnReader::_get_filtered_pages(CondColumn* cond_column,
+void ColumnReader::_get_filtered_pages(CondColumn* cond_column, OlapReaderStatistics* stats,
const std::vector<CondColumn*>& delete_conditions, std::vector<uint32_t>* page_indexes) {
FieldType type = _type_info->type();
const std::vector<ZoneMapPB>& zone_maps = _column_zone_map->get_column_zone_map();
@@ -200,20 +210,25 @@ void ColumnReader::_get_filtered_pages(CondColumn* cond_column,
max_value->set_null();
}
}
- bool should_read = false;
if (cond_column == nullptr || cond_column->eval({min_value.get(), max_value.get()})) {
- should_read = true;
- }
- if (should_read) {
+ bool should_read = true;
for (auto& col_cond : delete_conditions) {
if (col_cond->del_eval({min_value.get(), max_value.get()}) == DEL_SATISFIED) {
should_read = false;
+ rowid_t page_first_id = _ordinal_index->get_first_row_id(i);
+ rowid_t page_last_id = _ordinal_index->get_last_row_id(i);
+ stats->rows_del_filtered =+ page_last_id - page_first_id + 1;
break;
}
}
- }
- if (should_read) {
- page_indexes->push_back(i);
+ if (should_read) {
+ page_indexes->push_back(i);
+ }
+ } else {
+ // page filtered by zone map
+ rowid_t page_first_id = _ordinal_index->get_first_row_id(i);
+ rowid_t page_last_id = _ordinal_index->get_last_row_id(i);
+ stats->rows_stats_filtered += page_last_id - page_first_id + 1;
}
}
}
@@ -231,7 +246,8 @@ void ColumnReader::_calculate_row_ranges(const std::vector<uint32_t>& page_index
Status ColumnReader::_init_ordinal_index() {
PagePointer pp = _meta.ordinal_index_page();
PageHandle ph;
- RETURN_IF_ERROR(read_page(pp, &ph));
+ OlapReaderStatistics stats;
+ RETURN_IF_ERROR(read_page(pp, &stats, &ph));
_ordinal_index.reset(new OrdinalPageIndex(ph.data(), _num_rows));
RETURN_IF_ERROR(_ordinal_index->load());
@@ -244,7 +260,9 @@ Status ColumnReader::_init_column_zone_map() {
if (_meta.has_zone_map_page()) {
PagePointer pp = _meta.zone_map_page();
PageHandle ph;
- RETURN_IF_ERROR(read_page(pp, &ph));
+ // tmp statistics
+ OlapReaderStatistics stats;
+ RETURN_IF_ERROR(read_page(pp, &stats, &ph));
_column_zone_map.reset(new ColumnZoneMap(ph.data()));
RETURN_IF_ERROR(_column_zone_map->load());
} else {
@@ -382,6 +400,9 @@ Status FileColumnIterator::next_batch(size_t* n, ColumnBlock* dst) {
remaining -= nrows_in_page;
}
*n -= remaining;
+ // TODO(hkp): for string type, the bytes_read should be passed to page decoder
+ // bytes_read = data size + null bitmap size
+ _opts.stats->bytes_read += *n * dst->type_info()->size() + BitmapSize(dst->nrows());
return Status::OK();
}
@@ -402,7 +423,7 @@ Status FileColumnIterator::_load_next_page(bool* eos) {
// it ready to read
Status FileColumnIterator::_read_page(const OrdinalPageIndexIterator& iter, ParsedPage* page) {
page->page_pointer = iter.page();
- RETURN_IF_ERROR(_reader->read_page(page->page_pointer, &page->page_handle));
+ RETURN_IF_ERROR(_reader->read_page(page->page_pointer, _opts.stats, &page->page_handle));
// TODO(zc): read page from file
Slice data = page->page_handle.data();
@@ -444,7 +465,7 @@ Status FileColumnIterator::_read_page(const OrdinalPageIndexIterator& iter, Pars
if (binary_dict_page_decoder->is_dict_encoding()) {
if (_dict_decoder == nullptr) {
PagePointer pp = _reader->get_dict_page_pointer();
- RETURN_IF_ERROR(_reader->read_page(pp, &_dict_page_handle));
+ RETURN_IF_ERROR(_reader->read_page(pp, _opts.stats, &_dict_page_handle));
_dict_decoder.reset(new BinaryPlainPageDecoder(_dict_page_handle.data()));
RETURN_IF_ERROR(_dict_decoder->init());
@@ -458,7 +479,8 @@ Status FileColumnIterator::_read_page(const OrdinalPageIndexIterator& iter, Pars
return Status::OK();
}
-Status DefaultValueColumnIterator::init() {
+Status DefaultValueColumnIterator::init(const ColumnIteratorOptions& opts) {
+ _opts = opts;
// be consistent with segment v1
if (_default_value == "NULL" && _is_nullable) {
_is_default_value_null = true;
diff --git a/be/src/olap/rowset/segment_v2/column_reader.h b/be/src/olap/rowset/segment_v2/column_reader.h
index 50ac177..f4d9c77 100644
--- a/be/src/olap/rowset/segment_v2/column_reader.h
+++ b/be/src/olap/rowset/segment_v2/column_reader.h
@@ -52,6 +52,11 @@ struct ColumnReaderOptions {
bool verify_checksum = true;
};
+struct ColumnIteratorOptions {
+ // reader statistics
+ OlapReaderStatistics* stats = nullptr;
+};
+
// There will be concurrent users to read the same column. So
// we should do our best to reduce resource usage through share
// same information, such as OrdinalPageIndex and Page data.
@@ -73,7 +78,7 @@ public:
Status seek_at_or_before(rowid_t rowid, OrdinalPageIndexIterator* iter);
// read a page from file into a page handle
- Status read_page(const PagePointer& pp, PageHandle* handle);
+ Status read_page(const PagePointer& pp, OlapReaderStatistics* stats, PageHandle* handle);
bool is_nullable() const { return _meta.is_nullable(); }
const EncodingInfo* encoding_info() const { return _encoding_info; }
@@ -84,8 +89,8 @@ public:
// get row ranges with zone map
// cond_column is user's query predicate
// delete_conditions is a vector of delete predicate of different version
- void get_row_ranges_by_zone_map(CondColumn* cond_column,
- const std::vector<CondColumn*>& delete_conditions, RowRanges* row_ranges);
+ void get_row_ranges_by_zone_map(CondColumn* cond_column, const std::vector<CondColumn*>& delete_conditions,
+ OlapReaderStatistics* stats, RowRanges* row_ranges);
PagePointer get_dict_page_pointer() const;
@@ -96,7 +101,7 @@ private:
Status _init_column_zone_map();
- void _get_filtered_pages(CondColumn* cond_column,
+ void _get_filtered_pages(CondColumn* cond_column, OlapReaderStatistics* stats,
const std::vector<CondColumn*>& delete_conditions, std::vector<uint32_t>* page_indexes);
void _calculate_row_ranges(const std::vector<uint32_t>& page_indexes, RowRanges* row_ranges);
@@ -125,7 +130,10 @@ public:
ColumnIterator() { }
virtual ~ColumnIterator() { }
- virtual Status init() { return Status::OK(); }
+ virtual Status init(const ColumnIteratorOptions& opts) {
+ _opts = opts;
+ return Status::OK();
+ }
// Seek to the first entry in the column.
virtual Status seek_to_first() = 0;
@@ -161,13 +169,10 @@ public:
// release next_batch related resource
Status finish_batch();
#endif
+protected:
+ ColumnIteratorOptions _opts;
};
-#if 0
-class DefaultValueIterator : public ColumnIterator {
-};
-#endif
-
// This iterator is used to read column data from file
class FileColumnIterator : public ColumnIterator {
public:
@@ -220,7 +225,7 @@ public:
_is_default_value_null(false),
_value_size(0) { }
- Status init() override;
+ Status init(const ColumnIteratorOptions& opts) override;
Status seek_to_first() override {
_current_rowid = 0;
diff --git a/be/src/olap/rowset/segment_v2/segment.cpp b/be/src/olap/rowset/segment_v2/segment.cpp
index 040be8c..39028d4 100644
--- a/be/src/olap/rowset/segment_v2/segment.cpp
+++ b/be/src/olap/rowset/segment_v2/segment.cpp
@@ -215,7 +215,8 @@ Status Segment::new_column_iterator(uint32_t cid, ColumnIterator** iter) {
std::unique_ptr<DefaultValueColumnIterator> default_value_iter(
new DefaultValueColumnIterator(tablet_column.default_value(),
tablet_column.is_nullable(), tablet_column.type()));
- RETURN_IF_ERROR(default_value_iter->init());
+ ColumnIteratorOptions iter_opts;
+ RETURN_IF_ERROR(default_value_iter->init(iter_opts));
*iter = default_value_iter.release();
return Status::OK();
}
diff --git a/be/src/olap/rowset/segment_v2/segment_iterator.cpp b/be/src/olap/rowset/segment_v2/segment_iterator.cpp
index 0a50d05..ec5caa6 100644
--- a/be/src/olap/rowset/segment_v2/segment_iterator.cpp
+++ b/be/src/olap/rowset/segment_v2/segment_iterator.cpp
@@ -121,6 +121,9 @@ Status SegmentIterator::_prepare_seek(const StorageReadOptions::KeyRange& key_ra
for (auto cid : _seek_schema->column_ids()) {
if (_column_iterators[cid] == nullptr) {
RETURN_IF_ERROR(_segment->new_column_iterator(cid, &_column_iterators[cid]));
+ ColumnIteratorOptions iter_opts;
+ iter_opts.stats = _opts.stats;
+ RETURN_IF_ERROR(_column_iterators[cid]->init(iter_opts));
}
}
@@ -168,7 +171,7 @@ Status SegmentIterator::_get_row_ranges_from_zone_map(RowRanges* zone_map_row_ra
// get row ranges by zone map of this column
RowRanges column_zone_map_row_ranges;
_segment->_column_readers[cid]->get_row_ranges_by_zone_map(_opts.conditions->get_column(cid),
- column_delete_conditions[cid], &column_zone_map_row_ranges);
+ column_delete_conditions[cid], _opts.stats, &column_zone_map_row_ranges);
// intersection different columns's row ranges to get final row ranges by zone map
RowRanges::ranges_intersection(origin_row_ranges, column_zone_map_row_ranges, &origin_row_ranges);
}
@@ -184,10 +187,12 @@ Status SegmentIterator::_init_column_iterators() {
for (auto cid : _schema.column_ids()) {
if (_column_iterators[cid] == nullptr) {
RETURN_IF_ERROR(_segment->new_column_iterator(cid, &_column_iterators[cid]));
+ ColumnIteratorOptions iter_opts;
+ iter_opts.stats = _opts.stats;
+ RETURN_IF_ERROR(_column_iterators[cid]->init(iter_opts));
}
-
- _column_iterators[cid]->seek_to_ordinal(_cur_rowid);
}
+ _seek_columns(_schema.column_ids(), _cur_rowid);
return Status::OK();
}
@@ -267,9 +272,7 @@ Status SegmentIterator::_lookup_ordinal(const RowCursor& key, bool is_include,
// seek to the row and load that row to _key_cursor
Status SegmentIterator::_seek_and_peek(rowid_t rowid) {
- for (auto cid : _seek_schema->column_ids()) {
- _column_iterators[cid]->seek_to_ordinal(rowid);
- }
+ _seek_columns(_seek_schema->column_ids(), rowid);
size_t num_rows = 1;
// please note that usually RowBlockV2.clear() is called to free MemPool memory before reading the next block,
// but here since there won't be too many keys to seek, we don't call RowBlockV2.clear() so that we can use
@@ -301,7 +304,16 @@ Status SegmentIterator::_next_batch(RowBlockV2* block, size_t* rows_read) {
return Status::OK();
}
+Status SegmentIterator::_seek_columns(const std::vector<ColumnId>& column_ids, rowid_t pos) {
+ SCOPED_RAW_TIMER(&_opts.stats->block_seek_ns);
+ for (auto cid : column_ids) {
+ RETURN_IF_ERROR(_column_iterators[cid]->seek_to_ordinal(pos));
+ }
+ return Status::OK();
+}
+
Status SegmentIterator::next_batch(RowBlockV2* block) {
+ SCOPED_RAW_TIMER(&_opts.stats->block_load_ns);
if (UNLIKELY(!_inited)) {
RETURN_IF_ERROR(_init());
_inited = true;
@@ -327,9 +339,7 @@ Status SegmentIterator::next_batch(RowBlockV2* block) {
continue;
}
_cur_rowid = _row_ranges.get_range_from(_cur_range_id);
- for (auto cid : block->schema()->column_ids()) {
- RETURN_IF_ERROR(_column_iterators[cid]->seek_to_ordinal(_cur_rowid));
- }
+ _seek_columns(block->schema()->column_ids(), _cur_rowid);
break;
}
}
@@ -342,9 +352,7 @@ Status SegmentIterator::next_batch(RowBlockV2* block) {
block->set_selected_size(rows_to_read);
// update raw_rows_read counter
// judge nullptr for unit test case
- if (_opts.stats != nullptr) {
- _opts.stats->raw_rows_read += block->num_rows();
- }
+ _opts.stats->raw_rows_read += block->num_rows();
if (block->num_rows() == 0) {
return Status::EndOfFile("no more data in segment");
}
@@ -354,12 +362,16 @@ Status SegmentIterator::next_batch(RowBlockV2* block) {
if (_opts.column_predicates != nullptr) {
// init selection position index
uint16_t selected_size = block->selected_size();
+ uint16_t original_size = selected_size;
+ SCOPED_RAW_TIMER(&_opts.stats->vec_cond_ns);
for (auto column_predicate : *_opts.column_predicates) {
auto column_block = block->column_block(column_predicate->column_id());
column_predicate->evaluate(&column_block, block->selection_vector(), &selected_size);
}
block->set_selected_size(selected_size);
+ _opts.stats->rows_vec_cond_filtered += original_size - selected_size;
}
+ ++_opts.stats->blocks_load;
return Status::OK();
}
diff --git a/be/src/olap/rowset/segment_v2/segment_iterator.h b/be/src/olap/rowset/segment_v2/segment_iterator.h
index c2b4149..f3bae81 100644
--- a/be/src/olap/rowset/segment_v2/segment_iterator.h
+++ b/be/src/olap/rowset/segment_v2/segment_iterator.h
@@ -71,6 +71,8 @@ private:
uint32_t segment_id() const { return _segment->id(); }
uint32_t num_rows() const { return _segment->num_rows(); }
+ Status _seek_columns(const std::vector<ColumnId>& column_ids, rowid_t pos);
+
private:
std::shared_ptr<Segment> _segment;
// TODO(zc): rethink if we need copy it
diff --git a/be/test/olap/rowset/segment_v2/column_reader_writer_test.cpp b/be/test/olap/rowset/segment_v2/column_reader_writer_test.cpp
index 3c4fb63..becc17d 100644
--- a/be/test/olap/rowset/segment_v2/column_reader_writer_test.cpp
+++ b/be/test/olap/rowset/segment_v2/column_reader_writer_test.cpp
@@ -117,7 +117,11 @@ void test_nullable_data(uint8_t* src_data, uint8_t* src_is_null, int num_rows, s
ColumnIterator* iter = nullptr;
st = reader.new_iterator(&iter);
ASSERT_TRUE(st.ok());
-
+ ColumnIteratorOptions iter_opts;
+ OlapReaderStatistics stats;
+ iter_opts.stats = &stats;
+ st = iter->init(iter_opts);
+ ASSERT_TRUE(st.ok());
// sequence read
{
st = iter->seek_to_first();
diff --git a/be/test/olap/rowset/segment_v2/segment_test.cpp b/be/test/olap/rowset/segment_v2/segment_test.cpp
index ec33b98..7ee2469 100644
--- a/be/test/olap/rowset/segment_v2/segment_test.cpp
+++ b/be/test/olap/rowset/segment_v2/segment_test.cpp
@@ -98,9 +98,11 @@ TEST_F(SegmentReaderWriterTest, normal) {
ASSERT_TRUE(st.ok());
ASSERT_EQ(4096, segment->num_rows());
Schema schema(*tablet_schema);
+ OlapReaderStatistics stats;
// scan all rows
{
StorageReadOptions read_opts;
+ read_opts.stats = &stats;
std::unique_ptr<RowwiseIterator> iter;
segment->new_iterator(schema, read_opts, &iter);
@@ -155,6 +157,7 @@ TEST_F(SegmentReaderWriterTest, normal) {
}
StorageReadOptions read_opts;
+ read_opts.stats = &stats;
read_opts.key_ranges.emplace_back(lower_bound.get(), false, upper_bound.get(), true);
std::unique_ptr<RowwiseIterator> iter;
segment->new_iterator(schema, read_opts, &iter);
@@ -180,6 +183,7 @@ TEST_F(SegmentReaderWriterTest, normal) {
}
StorageReadOptions read_opts;
+ read_opts.stats = &stats;
read_opts.key_ranges.emplace_back(lower_bound.get(), false, nullptr, false);
std::unique_ptr<RowwiseIterator> iter;
segment->new_iterator(schema, read_opts, &iter);
@@ -209,6 +213,7 @@ TEST_F(SegmentReaderWriterTest, normal) {
}
StorageReadOptions read_opts;
+ read_opts.stats = &stats;
read_opts.key_ranges.emplace_back(lower_bound.get(), false, upper_bound.get(), false);
std::unique_ptr<RowwiseIterator> iter;
segment->new_iterator(schema, read_opts, &iter);
@@ -282,6 +287,7 @@ TEST_F(SegmentReaderWriterTest, TestZoneMap) {
ASSERT_TRUE(st.ok());
ASSERT_EQ(64 * 1024, segment->num_rows());
Schema schema(*tablet_schema);
+ OlapReaderStatistics stats;
// test empty segment iterator
{
// the first two page will be read by this condition
@@ -295,6 +301,7 @@ TEST_F(SegmentReaderWriterTest, TestZoneMap) {
conditions->append_condition(condition);
StorageReadOptions read_opts;
+ read_opts.stats = &stats;
read_opts.conditions = conditions.get();
std::unique_ptr<RowwiseIterator> iter;
@@ -318,6 +325,7 @@ TEST_F(SegmentReaderWriterTest, TestZoneMap) {
conditions->append_condition(condition);
StorageReadOptions read_opts;
+ read_opts.stats = &stats;
read_opts.conditions = conditions.get();
std::unique_ptr<RowwiseIterator> iter;
@@ -376,6 +384,7 @@ TEST_F(SegmentReaderWriterTest, TestZoneMap) {
delete_conditions->append_condition(delete_condition);
StorageReadOptions read_opts;
+ read_opts.stats = &stats;
read_opts.conditions = conditions.get();
read_opts.delete_conditions.push_back(delete_conditions.get());
@@ -546,9 +555,11 @@ TEST_F(SegmentReaderWriterTest, TestDefaultValueColumn) {
ASSERT_TRUE(st.ok());
ASSERT_EQ(4096, segment->num_rows());
Schema schema(*new_tablet_schema_1);
+ OlapReaderStatistics stats;
// scan all rows
{
StorageReadOptions read_opts;
+ read_opts.stats = &stats;
std::unique_ptr<RowwiseIterator> iter;
segment->new_iterator(schema, read_opts, &iter);
@@ -601,9 +612,11 @@ TEST_F(SegmentReaderWriterTest, TestDefaultValueColumn) {
ASSERT_TRUE(st.ok());
ASSERT_EQ(4096, segment->num_rows());
Schema schema(*new_tablet_schema_1);
+ OlapReaderStatistics stats;
// scan all rows
{
StorageReadOptions read_opts;
+ read_opts.stats = &stats;
std::unique_ptr<RowwiseIterator> iter;
segment->new_iterator(schema, read_opts, &iter);
@@ -696,10 +709,11 @@ TEST_F(SegmentReaderWriterTest, TestStringDict) {
ASSERT_TRUE(st.ok());
ASSERT_EQ(4096, segment->num_rows());
Schema schema(*tablet_schema);
-
+ OlapReaderStatistics stats;
// scan all rows
{
StorageReadOptions read_opts;
+ read_opts.stats = &stats;
std::unique_ptr<RowwiseIterator> iter;
segment->new_iterator(schema, read_opts, &iter);
@@ -745,6 +759,7 @@ TEST_F(SegmentReaderWriterTest, TestStringDict) {
}
StorageReadOptions read_opts;
+ read_opts.stats = &stats;
read_opts.key_ranges.emplace_back(lower_bound.get(), false, nullptr, false);
std::unique_ptr<RowwiseIterator> iter;
segment->new_iterator(schema, read_opts, &iter);
@@ -775,6 +790,7 @@ TEST_F(SegmentReaderWriterTest, TestStringDict) {
}
StorageReadOptions read_opts;
+ read_opts.stats = &stats;
read_opts.key_ranges.emplace_back(lower_bound.get(), false, upper_bound.get(), false);
std::unique_ptr<RowwiseIterator> iter;
segment->new_iterator(schema, read_opts, &iter);
@@ -797,6 +813,7 @@ TEST_F(SegmentReaderWriterTest, TestStringDict) {
conditions->append_condition(condition);
StorageReadOptions read_opts;
+ read_opts.stats = &stats;
read_opts.conditions = conditions.get();
std::unique_ptr<RowwiseIterator> iter;
@@ -847,6 +864,7 @@ TEST_F(SegmentReaderWriterTest, TestStringDict) {
conditions->append_condition(condition);
StorageReadOptions read_opts;
+ read_opts.stats = &stats;
read_opts.conditions = conditions.get();
std::unique_ptr<RowwiseIterator> iter;
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@doris.apache.org
For additional commands, e-mail: commits-help@doris.apache.org