You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@doris.apache.org by da...@apache.org on 2023/01/16 13:50:27 UTC

[doris] branch master updated: [refactor] refactor segment writer (#15705)

This is an automated email from the ASF dual-hosted git repository.

dataroaring pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new 65a4c8b163 [refactor] refactor segment writer (#15705)
65a4c8b163 is described below

commit 65a4c8b163a598894c1d1a313e85cfe928208586
Author: zbtzbtzbt <bi...@163.com>
AuthorDate: Mon Jan 16 21:50:21 2023 +0800

    [refactor] refactor segment writer (#15705)
    
    Co-authored-by: zhoubintao <12...@qq.com>
---
 be/src/olap/rowset/segment_v2/segment_writer.cpp   | 41 +++++++++++++---------
 be/src/olap/rowset/segment_v2/segment_writer.h     |  5 +--
 be/src/olap/rowset/vertical_beta_rowset_writer.cpp |  3 +-
 3 files changed, 30 insertions(+), 19 deletions(-)

diff --git a/be/src/olap/rowset/segment_v2/segment_writer.cpp b/be/src/olap/rowset/segment_v2/segment_writer.cpp
index df7f26c34f..7f07ca5b27 100644
--- a/be/src/olap/rowset/segment_v2/segment_writer.cpp
+++ b/be/src/olap/rowset/segment_v2/segment_writer.cpp
@@ -184,8 +184,7 @@ Status SegmentWriter::init(const std::vector<uint32_t>& col_ids, bool has_key) {
 
 Status SegmentWriter::append_block(const vectorized::Block* block, size_t row_pos,
                                    size_t num_rows) {
-    assert(block && num_rows > 0 && row_pos + num_rows <= block->rows() &&
-           block->columns() == _column_writers.size());
+    assert(block->columns() == _column_writers.size());
     _olap_data_convertor->set_source_content(block, row_pos, num_rows);
 
     // find all row pos for short key indexes
@@ -369,7 +368,7 @@ uint64_t SegmentWriter::estimate_segment_size() {
     return size;
 }
 
-Status SegmentWriter::finalize_columns(uint64_t* index_size) {
+Status SegmentWriter::finalize_columns_data() {
     if (_has_key) {
         _row_count = _num_rows_written;
     } else {
@@ -381,33 +380,36 @@ Status SegmentWriter::finalize_columns(uint64_t* index_size) {
         RETURN_IF_ERROR(column_writer->finish());
     }
     RETURN_IF_ERROR(_write_data());
-    uint64_t index_offset = _file_writer->bytes_appended();
+
+    return Status::OK();
+}
+
+Status SegmentWriter::finalize_columns_index(uint64_t* index_size) {
+    uint64_t index_start = _file_writer->bytes_appended();
     RETURN_IF_ERROR(_write_ordinal_index());
     RETURN_IF_ERROR(_write_zone_map());
     RETURN_IF_ERROR(_write_bitmap_index());
     RETURN_IF_ERROR(_write_inverted_index());
     RETURN_IF_ERROR(_write_bloom_filter_index());
 
-    *index_size = _file_writer->bytes_appended() - index_offset;
+    *index_size = _file_writer->bytes_appended() - index_start;
     if (_has_key) {
         if (_tablet_schema->keys_type() == UNIQUE_KEYS && _opts.enable_unique_key_merge_on_write) {
             RETURN_IF_ERROR(_write_primary_key_index());
         } else {
             RETURN_IF_ERROR(_write_short_key_index());
         }
-        *index_size = _file_writer->bytes_appended() - index_offset;
+        *index_size = _file_writer->bytes_appended() - index_start;
     }
+
     // reset all column writers and data_conveter
-    _reset_column_writers();
-    _column_ids.clear();
-    _olap_data_convertor.reset();
+    clear();
+
     return Status::OK();
 }
 
 Status SegmentWriter::finalize_footer(uint64_t* segment_file_size) {
     RETURN_IF_ERROR(_write_footer());
-    RETURN_IF_ERROR(_file_writer->finalize());
-    *segment_file_size = _file_writer->bytes_appended();
     return Status::OK();
 }
 
@@ -416,19 +418,26 @@ Status SegmentWriter::finalize(uint64_t* segment_file_size, uint64_t* index_size
     if (_data_dir != nullptr && _data_dir->reach_capacity_limit((int64_t)estimate_segment_size())) {
         return Status::InternalError("disk {} exceed capacity limit.", _data_dir->path_hash());
     }
-
-    RETURN_IF_ERROR(finalize_columns(index_size));
-
-    // writer footer
+    // write data
+    RETURN_IF_ERROR(finalize_columns_data());
+    // write index
+    RETURN_IF_ERROR(finalize_columns_index(index_size));
+    // write footer
     RETURN_IF_ERROR(finalize_footer(segment_file_size));
+    // finish
+    RETURN_IF_ERROR(_file_writer->finalize());
+    *segment_file_size = _file_writer->bytes_appended();
+
     return Status::OK();
 }
 
-void SegmentWriter::_reset_column_writers() {
+void SegmentWriter::clear() {
     for (auto& column_writer : _column_writers) {
         column_writer.reset();
     }
     _column_writers.clear();
+    _column_ids.clear();
+    _olap_data_convertor.reset();
 }
 
 // write column data to file one by one
diff --git a/be/src/olap/rowset/segment_v2/segment_writer.h b/be/src/olap/rowset/segment_v2/segment_writer.h
index 6e18a0735b..19135a866d 100644
--- a/be/src/olap/rowset/segment_v2/segment_writer.h
+++ b/be/src/olap/rowset/segment_v2/segment_writer.h
@@ -91,7 +91,8 @@ public:
 
     uint32_t get_segment_id() { return _segment_id; }
 
-    Status finalize_columns(uint64_t* index_size);
+    Status finalize_columns_data();
+    Status finalize_columns_index(uint64_t* index_size);
     Status finalize_footer(uint64_t* segment_file_size);
 
     static void init_column_meta(ColumnMetaPB* meta, uint32_t column_id, const TabletColumn& column,
@@ -124,7 +125,7 @@ private:
     void set_min_key(const Slice& key);
     void set_max_key(const Slice& key);
 
-    void _reset_column_writers();
+    void clear();
 
 private:
     uint32_t _segment_id;
diff --git a/be/src/olap/rowset/vertical_beta_rowset_writer.cpp b/be/src/olap/rowset/vertical_beta_rowset_writer.cpp
index ef7b2c4254..8c8714982b 100644
--- a/be/src/olap/rowset/vertical_beta_rowset_writer.cpp
+++ b/be/src/olap/rowset/vertical_beta_rowset_writer.cpp
@@ -100,7 +100,8 @@ Status VerticalBetaRowsetWriter::_flush_columns(
         std::unique_ptr<segment_v2::SegmentWriter>* segment_writer, bool is_key) {
     uint64_t index_size = 0;
     VLOG_NOTICE << "flush columns index: " << _cur_writer_idx;
-    RETURN_IF_ERROR((*segment_writer)->finalize_columns(&index_size));
+    RETURN_IF_ERROR((*segment_writer)->finalize_columns_data());
+    RETURN_IF_ERROR((*segment_writer)->finalize_columns_index(&index_size));
     if (is_key) {
         // record segment key bound
         KeyBoundsPB key_bounds;


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@doris.apache.org
For additional commands, e-mail: commits-help@doris.apache.org