You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@doris.apache.org by da...@apache.org on 2023/01/20 08:38:33 UTC
[doris] branch master updated: [improvement](vertical compaction) cache segment in vertical compaction (#16101)
This is an automated email from the ASF dual-hosted git repository.
dataroaring pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new 171404228f [improvement](vertical compaction) cache segment in vertical compaction (#16101)
171404228f is described below
commit 171404228f7b80d161c2aad6a3224234512c3c5a
Author: yixiutt <10...@users.noreply.github.com>
AuthorDate: Fri Jan 20 16:38:23 2023 +0800
[improvement](vertical compaction) cache segment in vertical compaction (#16101)
1.In vertical compaction, segments will be loaded for every column group, so
we should cache segment ptr to avoid too many repeated io.
2.fix vertical compaction data size bug
---
be/src/olap/rowset/beta_rowset_reader.cpp | 10 ++++++----
be/src/olap/rowset/beta_rowset_reader.h | 3 ++-
be/src/olap/rowset/rowset_reader.h | 3 ++-
be/src/olap/rowset/segment_v2/segment_writer.cpp | 3 +++
be/src/vec/olap/vertical_block_reader.cpp | 4 +++-
5 files changed, 16 insertions(+), 7 deletions(-)
diff --git a/be/src/olap/rowset/beta_rowset_reader.cpp b/be/src/olap/rowset/beta_rowset_reader.cpp
index 6612fff758..3b45b3cf3f 100644
--- a/be/src/olap/rowset/beta_rowset_reader.cpp
+++ b/be/src/olap/rowset/beta_rowset_reader.cpp
@@ -50,7 +50,8 @@ bool BetaRowsetReader::update_profile(RuntimeProfile* profile) {
}
Status BetaRowsetReader::get_segment_iterators(RowsetReaderContext* read_context,
- std::vector<RowwiseIterator*>* out_iters) {
+ std::vector<RowwiseIterator*>* out_iters,
+ bool use_cache) {
RETURN_NOT_OK(_rowset->load());
_context = read_context;
if (_context->stats != nullptr) {
@@ -163,9 +164,10 @@ Status BetaRowsetReader::get_segment_iterators(RowsetReaderContext* read_context
_read_options.runtime_state = read_context->runtime_state;
// load segments
- RETURN_NOT_OK(SegmentLoader::instance()->load_segments(
- _rowset, &_segment_cache_handle,
- read_context->reader_type == ReaderType::READER_QUERY));
+ // use cache is true when do vertica compaction
+ bool should_use_cache = use_cache || read_context->reader_type == ReaderType::READER_QUERY;
+ RETURN_NOT_OK(SegmentLoader::instance()->load_segments(_rowset, &_segment_cache_handle,
+ should_use_cache));
// create iterator for each segment
std::vector<std::unique_ptr<RowwiseIterator>> seg_iterators;
diff --git a/be/src/olap/rowset/beta_rowset_reader.h b/be/src/olap/rowset/beta_rowset_reader.h
index 889a7c2742..dea6814558 100644
--- a/be/src/olap/rowset/beta_rowset_reader.h
+++ b/be/src/olap/rowset/beta_rowset_reader.h
@@ -34,7 +34,8 @@ public:
Status init(RowsetReaderContext* read_context) override;
Status get_segment_iterators(RowsetReaderContext* read_context,
- std::vector<RowwiseIterator*>* out_iters) override;
+ std::vector<RowwiseIterator*>* out_iters,
+ bool use_cache = false) override;
void reset_read_options() override;
Status next_block(vectorized::Block* block) override;
Status next_block_view(vectorized::BlockView* block_view) override;
diff --git a/be/src/olap/rowset/rowset_reader.h b/be/src/olap/rowset/rowset_reader.h
index d1601d379a..4186088fe5 100644
--- a/be/src/olap/rowset/rowset_reader.h
+++ b/be/src/olap/rowset/rowset_reader.h
@@ -44,7 +44,8 @@ public:
virtual Status init(RowsetReaderContext* read_context) = 0;
virtual Status get_segment_iterators(RowsetReaderContext* read_context,
- std::vector<RowwiseIterator*>* out_iters) = 0;
+ std::vector<RowwiseIterator*>* out_iters,
+ bool use_cache = false) = 0;
virtual void reset_read_options() = 0;
virtual Status next_block(vectorized::Block* block) = 0;
diff --git a/be/src/olap/rowset/segment_v2/segment_writer.cpp b/be/src/olap/rowset/segment_v2/segment_writer.cpp
index ec62749c5c..c3e066db72 100644
--- a/be/src/olap/rowset/segment_v2/segment_writer.cpp
+++ b/be/src/olap/rowset/segment_v2/segment_writer.cpp
@@ -433,6 +433,9 @@ Status SegmentWriter::finalize_columns_index(uint64_t* index_size) {
Status SegmentWriter::finalize_footer(uint64_t* segment_file_size) {
RETURN_IF_ERROR(_write_footer());
+ // finish
+ RETURN_IF_ERROR(_file_writer->finalize());
+ *segment_file_size = _file_writer->bytes_appended();
return Status::OK();
}
diff --git a/be/src/vec/olap/vertical_block_reader.cpp b/be/src/vec/olap/vertical_block_reader.cpp
index dc7d3b58c6..897d001c0d 100644
--- a/be/src/vec/olap/vertical_block_reader.cpp
+++ b/be/src/vec/olap/vertical_block_reader.cpp
@@ -55,7 +55,9 @@ Status VerticalBlockReader::_get_segment_iterators(const ReaderParams& read_para
_reader_context.is_vertical_compaction = true;
for (auto& rs_reader : rs_readers) {
// segment iterator will be inited here
- RETURN_NOT_OK(rs_reader->get_segment_iterators(&_reader_context, segment_iters));
+ // In vertical compaction, every group will load segment so we should cache
+ // segment to avoid tot many s3 head request
+ RETURN_NOT_OK(rs_reader->get_segment_iterators(&_reader_context, segment_iters, true));
// if segments overlapping, all segment iterator should be inited in
// heap merge iterator. If segments are none overlapping, only first segment of this
// rowset will be inited and push to heap, other segment will be inited later when current
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@doris.apache.org
For additional commands, e-mail: commits-help@doris.apache.org