You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@doris.apache.org by kx...@apache.org on 2023/06/06 15:15:33 UTC
[doris] 31/36: [improvement](column reader) lazy load indices (#20456)
This is an automated email from the ASF dual-hosted git repository.
kxiao pushed a commit to branch branch-2.0-beta
in repository https://gitbox.apache.org/repos/asf/doris.git
commit f4d0089888fc21901725b0dbe2ed933620a84d6b
Author: TengJianPing <18...@users.noreply.github.com>
AuthorDate: Tue Jun 6 16:36:06 2023 +0800
[improvement](column reader) lazy load indices (#20456)
Currently when reading column data, all types of indice are read even if they are not actually used, this PR implements lazy load of indices.
---
be/src/olap/rowset/segment_v2/column_reader.cpp | 42 ++++++++++++++++---------
be/src/olap/rowset/segment_v2/column_reader.h | 34 ++++++++------------
2 files changed, 40 insertions(+), 36 deletions(-)
diff --git a/be/src/olap/rowset/segment_v2/column_reader.cpp b/be/src/olap/rowset/segment_v2/column_reader.cpp
index 2469fa5924..609c9fdc28 100644
--- a/be/src/olap/rowset/segment_v2/column_reader.cpp
+++ b/be/src/olap/rowset/segment_v2/column_reader.cpp
@@ -179,7 +179,8 @@ ColumnReader::ColumnReader(const ColumnReaderOptions& opts, const ColumnMetaPB&
_opts(opts),
_num_rows(num_rows),
_file_reader(std::move(file_reader)),
- _dict_encoding_type(UNKNOWN_DICT_ENCODING) {}
+ _dict_encoding_type(UNKNOWN_DICT_ENCODING),
+ _use_index_page_cache(!config::disable_storage_page_cache) {}
ColumnReader::~ColumnReader() = default;
@@ -195,15 +196,20 @@ Status ColumnReader::init() {
switch (index_meta.type()) {
case ORDINAL_INDEX:
_ordinal_index_meta = &index_meta.ordinal_index();
+ _ordinal_index.reset(
+ new OrdinalIndexReader(_file_reader, _ordinal_index_meta, _num_rows));
break;
case ZONE_MAP_INDEX:
_zone_map_index_meta = &index_meta.zone_map_index();
+ _zone_map_index.reset(new ZoneMapIndexReader(_file_reader, _zone_map_index_meta));
break;
case BITMAP_INDEX:
_bitmap_index_meta = &index_meta.bitmap_index();
+ _bitmap_index.reset(new BitmapIndexReader(_file_reader, _bitmap_index_meta));
break;
case BLOOM_FILTER_INDEX:
_bf_index_meta = &index_meta.bloom_filter_index();
+ _bloom_filter_index.reset(new BloomFilterIndexReader(_file_reader, _bf_index_meta));
break;
default:
return Status::Corruption("Bad file {}: invalid column index type {}",
@@ -220,7 +226,7 @@ Status ColumnReader::init() {
}
Status ColumnReader::new_bitmap_index_iterator(BitmapIndexIterator** iterator) {
- RETURN_IF_ERROR(_ensure_index_loaded());
+ RETURN_IF_ERROR(_load_bitmap_index(_use_index_page_cache, _opts.kept_in_memory));
RETURN_IF_ERROR(_bitmap_index->new_iterator(iterator));
return Status::OK();
}
@@ -261,8 +267,6 @@ Status ColumnReader::read_page(const ColumnIteratorOptions& iter_opts, const Pag
Status ColumnReader::get_row_ranges_by_zone_map(
const AndBlockColumnPredicate* col_predicates,
const std::vector<const ColumnPredicate*>* delete_predicates, RowRanges* row_ranges) {
- RETURN_IF_ERROR(_ensure_index_loaded());
-
std::vector<uint32_t> page_indexes;
RETURN_IF_ERROR(_get_filtered_pages(col_predicates, delete_predicates, &page_indexes));
RETURN_IF_ERROR(_calculate_row_ranges(page_indexes, row_ranges));
@@ -374,6 +378,8 @@ Status ColumnReader::_get_filtered_pages(
const AndBlockColumnPredicate* col_predicates,
const std::vector<const ColumnPredicate*>* delete_predicates,
std::vector<uint32_t>* page_indexes) {
+ RETURN_IF_ERROR(_load_zone_map_index(_use_index_page_cache, _opts.kept_in_memory));
+
FieldType type = _type_info->type();
const std::vector<ZoneMapPB>& zone_maps = _zone_map_index->page_zone_maps();
int32_t page_size = _zone_map_index->num_pages();
@@ -412,6 +418,7 @@ Status ColumnReader::_get_filtered_pages(
Status ColumnReader::_calculate_row_ranges(const std::vector<uint32_t>& page_indexes,
RowRanges* row_ranges) {
row_ranges->clear();
+ RETURN_IF_ERROR(_load_ordinal_index(_use_index_page_cache, _opts.kept_in_memory));
for (auto i : page_indexes) {
ordinal_t page_first_id = _ordinal_index->get_first_ordinal(i);
ordinal_t page_last_id = _ordinal_index->get_last_ordinal(i);
@@ -423,7 +430,8 @@ Status ColumnReader::_calculate_row_ranges(const std::vector<uint32_t>& page_ind
Status ColumnReader::get_row_ranges_by_bloom_filter(const AndBlockColumnPredicate* col_predicates,
RowRanges* row_ranges) {
- RETURN_IF_ERROR(_ensure_index_loaded());
+ RETURN_IF_ERROR(_load_ordinal_index(_use_index_page_cache, _opts.kept_in_memory));
+ RETURN_IF_ERROR(_load_bloom_filter_index(_use_index_page_cache, _opts.kept_in_memory));
RowRanges bf_row_ranges;
std::unique_ptr<BloomFilterIndexIterator> bf_iter;
RETURN_IF_ERROR(_bloom_filter_index->new_iterator(&bf_iter));
@@ -455,22 +463,25 @@ Status ColumnReader::get_row_ranges_by_bloom_filter(const AndBlockColumnPredicat
Status ColumnReader::_load_ordinal_index(bool use_page_cache, bool kept_in_memory) {
DCHECK(_ordinal_index_meta != nullptr);
- _ordinal_index.reset(new OrdinalIndexReader(_file_reader, _ordinal_index_meta, _num_rows));
- return _ordinal_index->load(use_page_cache, kept_in_memory);
+ return _load_ordinal_index_once.call([this, use_page_cache, kept_in_memory] {
+ return _ordinal_index->load(use_page_cache, kept_in_memory);
+ });
}
Status ColumnReader::_load_zone_map_index(bool use_page_cache, bool kept_in_memory) {
if (_zone_map_index_meta != nullptr) {
- _zone_map_index.reset(new ZoneMapIndexReader(_file_reader, _zone_map_index_meta));
- return _zone_map_index->load(use_page_cache, kept_in_memory);
+ return _load_zone_map_index_once.call([this, use_page_cache, kept_in_memory] {
+ return _zone_map_index->load(use_page_cache, kept_in_memory);
+ });
}
return Status::OK();
}
Status ColumnReader::_load_bitmap_index(bool use_page_cache, bool kept_in_memory) {
if (_bitmap_index_meta != nullptr) {
- _bitmap_index.reset(new BitmapIndexReader(_file_reader, _bitmap_index_meta));
- return _bitmap_index->load(use_page_cache, kept_in_memory);
+ return _load_bitmap_index_once.call([this, use_page_cache, kept_in_memory] {
+ return _bitmap_index->load(use_page_cache, kept_in_memory);
+ });
}
return Status::OK();
}
@@ -513,14 +524,15 @@ Status ColumnReader::_load_inverted_index_index(const TabletIndex* index_meta) {
Status ColumnReader::_load_bloom_filter_index(bool use_page_cache, bool kept_in_memory) {
if (_bf_index_meta != nullptr) {
- _bloom_filter_index.reset(new BloomFilterIndexReader(_file_reader, _bf_index_meta));
- return _bloom_filter_index->load(use_page_cache, kept_in_memory);
+ return _load_bloom_filter_index_once.call([this, use_page_cache, kept_in_memory] {
+ return _bloom_filter_index->load(use_page_cache, kept_in_memory);
+ });
}
return Status::OK();
}
Status ColumnReader::seek_to_first(OrdinalPageIndexIterator* iter) {
- RETURN_IF_ERROR(_ensure_index_loaded());
+ RETURN_IF_ERROR(_load_ordinal_index(_use_index_page_cache, _opts.kept_in_memory));
*iter = _ordinal_index->begin();
if (!iter->valid()) {
return Status::NotFound("Failed to seek to first rowid");
@@ -529,7 +541,7 @@ Status ColumnReader::seek_to_first(OrdinalPageIndexIterator* iter) {
}
Status ColumnReader::seek_at_or_before(ordinal_t ordinal, OrdinalPageIndexIterator* iter) {
- RETURN_IF_ERROR(_ensure_index_loaded());
+ RETURN_IF_ERROR(_load_ordinal_index(_use_index_page_cache, _opts.kept_in_memory));
*iter = _ordinal_index->seek_at_or_before(ordinal);
if (!iter->valid()) {
return Status::NotFound("Failed to seek to ordinal {}, ", ordinal);
diff --git a/be/src/olap/rowset/segment_v2/column_reader.h b/be/src/olap/rowset/segment_v2/column_reader.h
index 1fe87acb16..18f5aad760 100644
--- a/be/src/olap/rowset/segment_v2/column_reader.h
+++ b/be/src/olap/rowset/segment_v2/column_reader.h
@@ -173,26 +173,13 @@ public:
DictEncodingType get_dict_encoding_type() { return _dict_encoding_type; }
- void disable_index_meta_cache() { _index_meta_use_page_cache = false; }
+ void disable_index_meta_cache() { _use_index_page_cache = false; }
private:
ColumnReader(const ColumnReaderOptions& opts, const ColumnMetaPB& meta, uint64_t num_rows,
io::FileReaderSPtr file_reader);
Status init();
- // Read and load necessary column indexes into memory if it hasn't been loaded.
- // May be called multiple times, subsequent calls will no op.
- Status _ensure_index_loaded() {
- return _load_index_once.call([this] {
- bool use_page_cache = !config::disable_storage_page_cache && _index_meta_use_page_cache;
- RETURN_IF_ERROR(_load_zone_map_index(use_page_cache, _opts.kept_in_memory));
- RETURN_IF_ERROR(_load_ordinal_index(use_page_cache, _opts.kept_in_memory));
- RETURN_IF_ERROR(_load_bitmap_index(use_page_cache, _opts.kept_in_memory));
- RETURN_IF_ERROR(_load_bloom_filter_index(use_page_cache, _opts.kept_in_memory));
- return Status::OK();
- });
- }
-
// Read column inverted indexes into memory
// May be called multiple times, subsequent calls will no op.
Status _ensure_inverted_index_loaded(const TabletIndex* index_meta) {
@@ -201,11 +188,11 @@ private:
return Status::OK();
}
- Status _load_zone_map_index(bool use_page_cache, bool kept_in_memory);
- Status _load_ordinal_index(bool use_page_cache, bool kept_in_memory);
- Status _load_bitmap_index(bool use_page_cache, bool kept_in_memory);
- Status _load_inverted_index_index(const TabletIndex* index_meta);
- Status _load_bloom_filter_index(bool use_page_cache, bool kept_in_memory);
+ [[nodiscard]] Status _load_zone_map_index(bool use_page_cache, bool kept_in_memory);
+ [[nodiscard]] Status _load_ordinal_index(bool use_page_cache, bool kept_in_memory);
+ [[nodiscard]] Status _load_bitmap_index(bool use_page_cache, bool kept_in_memory);
+ [[nodiscard]] Status _load_inverted_index_index(const TabletIndex* index_meta);
+ [[nodiscard]] Status _load_bloom_filter_index(bool use_page_cache, bool kept_in_memory);
bool _zone_map_match_condition(const ZoneMapPB& zone_map, WrapperField* min_value_container,
WrapperField* max_value_container,
@@ -237,20 +224,25 @@ private:
const EncodingInfo* _encoding_info =
nullptr; // initialized in init(), used for create PageDecoder
+ bool _use_index_page_cache;
+
// meta for various column indexes (null if the index is absent)
- bool _index_meta_use_page_cache = true;
const ZoneMapIndexPB* _zone_map_index_meta = nullptr;
const OrdinalIndexPB* _ordinal_index_meta = nullptr;
const BitmapIndexPB* _bitmap_index_meta = nullptr;
const BloomFilterIndexPB* _bf_index_meta = nullptr;
- DorisCallOnce<Status> _load_index_once;
mutable std::mutex _load_index_lock;
std::unique_ptr<ZoneMapIndexReader> _zone_map_index;
std::unique_ptr<OrdinalIndexReader> _ordinal_index;
std::unique_ptr<BitmapIndexReader> _bitmap_index;
std::unique_ptr<InvertedIndexReader> _inverted_index;
std::unique_ptr<BloomFilterIndexReader> _bloom_filter_index;
+ DorisCallOnce<Status> _load_zone_map_index_once;
+ DorisCallOnce<Status> _load_ordinal_index_once;
+ DorisCallOnce<Status> _load_bitmap_index_once;
+ DorisCallOnce<Status> _load_bloom_filter_index_once;
+ DorisCallOnce<Status> _load_inverted_index_once;
std::vector<std::unique_ptr<ColumnReader>> _sub_readers;
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@doris.apache.org
For additional commands, e-mail: commits-help@doris.apache.org