You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@doris.apache.org by da...@apache.org on 2023/01/20 08:38:33 UTC

[doris] branch master updated: [improvement](vertical compaction) cache segment in vertical compaction (#16101)

This is an automated email from the ASF dual-hosted git repository.

dataroaring pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new 171404228f [improvement](vertical compaction) cache segment in vertical compaction (#16101)
171404228f is described below

commit 171404228f7b80d161c2aad6a3224234512c3c5a
Author: yixiutt <10...@users.noreply.github.com>
AuthorDate: Fri Jan 20 16:38:23 2023 +0800

    [improvement](vertical compaction) cache segment in vertical compaction (#16101)
    
    1.In vertical compaction, segments will be loaded for every column group, so
    we should cache segment ptr to avoid too many repeated io.
    2.fix vertical compaction data size bug
---
 be/src/olap/rowset/beta_rowset_reader.cpp        | 10 ++++++----
 be/src/olap/rowset/beta_rowset_reader.h          |  3 ++-
 be/src/olap/rowset/rowset_reader.h               |  3 ++-
 be/src/olap/rowset/segment_v2/segment_writer.cpp |  3 +++
 be/src/vec/olap/vertical_block_reader.cpp        |  4 +++-
 5 files changed, 16 insertions(+), 7 deletions(-)

diff --git a/be/src/olap/rowset/beta_rowset_reader.cpp b/be/src/olap/rowset/beta_rowset_reader.cpp
index 6612fff758..3b45b3cf3f 100644
--- a/be/src/olap/rowset/beta_rowset_reader.cpp
+++ b/be/src/olap/rowset/beta_rowset_reader.cpp
@@ -50,7 +50,8 @@ bool BetaRowsetReader::update_profile(RuntimeProfile* profile) {
 }
 
 Status BetaRowsetReader::get_segment_iterators(RowsetReaderContext* read_context,
-                                               std::vector<RowwiseIterator*>* out_iters) {
+                                               std::vector<RowwiseIterator*>* out_iters,
+                                               bool use_cache) {
     RETURN_NOT_OK(_rowset->load());
     _context = read_context;
     if (_context->stats != nullptr) {
@@ -163,9 +164,10 @@ Status BetaRowsetReader::get_segment_iterators(RowsetReaderContext* read_context
     _read_options.runtime_state = read_context->runtime_state;
 
     // load segments
-    RETURN_NOT_OK(SegmentLoader::instance()->load_segments(
-            _rowset, &_segment_cache_handle,
-            read_context->reader_type == ReaderType::READER_QUERY));
+    // use cache is true when do vertica compaction
+    bool should_use_cache = use_cache || read_context->reader_type == ReaderType::READER_QUERY;
+    RETURN_NOT_OK(SegmentLoader::instance()->load_segments(_rowset, &_segment_cache_handle,
+                                                           should_use_cache));
 
     // create iterator for each segment
     std::vector<std::unique_ptr<RowwiseIterator>> seg_iterators;
diff --git a/be/src/olap/rowset/beta_rowset_reader.h b/be/src/olap/rowset/beta_rowset_reader.h
index 889a7c2742..dea6814558 100644
--- a/be/src/olap/rowset/beta_rowset_reader.h
+++ b/be/src/olap/rowset/beta_rowset_reader.h
@@ -34,7 +34,8 @@ public:
     Status init(RowsetReaderContext* read_context) override;
 
     Status get_segment_iterators(RowsetReaderContext* read_context,
-                                 std::vector<RowwiseIterator*>* out_iters) override;
+                                 std::vector<RowwiseIterator*>* out_iters,
+                                 bool use_cache = false) override;
     void reset_read_options() override;
     Status next_block(vectorized::Block* block) override;
     Status next_block_view(vectorized::BlockView* block_view) override;
diff --git a/be/src/olap/rowset/rowset_reader.h b/be/src/olap/rowset/rowset_reader.h
index d1601d379a..4186088fe5 100644
--- a/be/src/olap/rowset/rowset_reader.h
+++ b/be/src/olap/rowset/rowset_reader.h
@@ -44,7 +44,8 @@ public:
     virtual Status init(RowsetReaderContext* read_context) = 0;
 
     virtual Status get_segment_iterators(RowsetReaderContext* read_context,
-                                         std::vector<RowwiseIterator*>* out_iters) = 0;
+                                         std::vector<RowwiseIterator*>* out_iters,
+                                         bool use_cache = false) = 0;
     virtual void reset_read_options() = 0;
 
     virtual Status next_block(vectorized::Block* block) = 0;
diff --git a/be/src/olap/rowset/segment_v2/segment_writer.cpp b/be/src/olap/rowset/segment_v2/segment_writer.cpp
index ec62749c5c..c3e066db72 100644
--- a/be/src/olap/rowset/segment_v2/segment_writer.cpp
+++ b/be/src/olap/rowset/segment_v2/segment_writer.cpp
@@ -433,6 +433,9 @@ Status SegmentWriter::finalize_columns_index(uint64_t* index_size) {
 
 Status SegmentWriter::finalize_footer(uint64_t* segment_file_size) {
     RETURN_IF_ERROR(_write_footer());
+    // finish
+    RETURN_IF_ERROR(_file_writer->finalize());
+    *segment_file_size = _file_writer->bytes_appended();
     return Status::OK();
 }
 
diff --git a/be/src/vec/olap/vertical_block_reader.cpp b/be/src/vec/olap/vertical_block_reader.cpp
index dc7d3b58c6..897d001c0d 100644
--- a/be/src/vec/olap/vertical_block_reader.cpp
+++ b/be/src/vec/olap/vertical_block_reader.cpp
@@ -55,7 +55,9 @@ Status VerticalBlockReader::_get_segment_iterators(const ReaderParams& read_para
     _reader_context.is_vertical_compaction = true;
     for (auto& rs_reader : rs_readers) {
         // segment iterator will be inited here
-        RETURN_NOT_OK(rs_reader->get_segment_iterators(&_reader_context, segment_iters));
+        // In vertical compaction, every group will load segment so we should cache
+        // segment to avoid tot many s3 head request
+        RETURN_NOT_OK(rs_reader->get_segment_iterators(&_reader_context, segment_iters, true));
         // if segments overlapping, all segment iterator should be inited in
         // heap merge iterator. If segments are none overlapping, only first segment of this
         // rowset will be inited and push to heap, other segment will be inited later when current


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@doris.apache.org
For additional commands, e-mail: commits-help@doris.apache.org