You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@doris.apache.org by da...@apache.org on 2023/01/16 13:50:27 UTC
[doris] branch master updated: [refactor] refactor segment writer (#15705)
This is an automated email from the ASF dual-hosted git repository.
dataroaring pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new 65a4c8b163 [refactor] refactor segment writer (#15705)
65a4c8b163 is described below
commit 65a4c8b163a598894c1d1a313e85cfe928208586
Author: zbtzbtzbt <bi...@163.com>
AuthorDate: Mon Jan 16 21:50:21 2023 +0800
[refactor] refactor segment writer (#15705)
Co-authored-by: zhoubintao <12...@qq.com>
---
be/src/olap/rowset/segment_v2/segment_writer.cpp | 41 +++++++++++++---------
be/src/olap/rowset/segment_v2/segment_writer.h | 5 +--
be/src/olap/rowset/vertical_beta_rowset_writer.cpp | 3 +-
3 files changed, 30 insertions(+), 19 deletions(-)
diff --git a/be/src/olap/rowset/segment_v2/segment_writer.cpp b/be/src/olap/rowset/segment_v2/segment_writer.cpp
index df7f26c34f..7f07ca5b27 100644
--- a/be/src/olap/rowset/segment_v2/segment_writer.cpp
+++ b/be/src/olap/rowset/segment_v2/segment_writer.cpp
@@ -184,8 +184,7 @@ Status SegmentWriter::init(const std::vector<uint32_t>& col_ids, bool has_key) {
Status SegmentWriter::append_block(const vectorized::Block* block, size_t row_pos,
size_t num_rows) {
- assert(block && num_rows > 0 && row_pos + num_rows <= block->rows() &&
- block->columns() == _column_writers.size());
+ assert(block->columns() == _column_writers.size());
_olap_data_convertor->set_source_content(block, row_pos, num_rows);
// find all row pos for short key indexes
@@ -369,7 +368,7 @@ uint64_t SegmentWriter::estimate_segment_size() {
return size;
}
-Status SegmentWriter::finalize_columns(uint64_t* index_size) {
+Status SegmentWriter::finalize_columns_data() {
if (_has_key) {
_row_count = _num_rows_written;
} else {
@@ -381,33 +380,36 @@ Status SegmentWriter::finalize_columns(uint64_t* index_size) {
RETURN_IF_ERROR(column_writer->finish());
}
RETURN_IF_ERROR(_write_data());
- uint64_t index_offset = _file_writer->bytes_appended();
+
+ return Status::OK();
+}
+
+Status SegmentWriter::finalize_columns_index(uint64_t* index_size) {
+ uint64_t index_start = _file_writer->bytes_appended();
RETURN_IF_ERROR(_write_ordinal_index());
RETURN_IF_ERROR(_write_zone_map());
RETURN_IF_ERROR(_write_bitmap_index());
RETURN_IF_ERROR(_write_inverted_index());
RETURN_IF_ERROR(_write_bloom_filter_index());
- *index_size = _file_writer->bytes_appended() - index_offset;
+ *index_size = _file_writer->bytes_appended() - index_start;
if (_has_key) {
if (_tablet_schema->keys_type() == UNIQUE_KEYS && _opts.enable_unique_key_merge_on_write) {
RETURN_IF_ERROR(_write_primary_key_index());
} else {
RETURN_IF_ERROR(_write_short_key_index());
}
- *index_size = _file_writer->bytes_appended() - index_offset;
+ *index_size = _file_writer->bytes_appended() - index_start;
}
+
// reset all column writers and data_conveter
- _reset_column_writers();
- _column_ids.clear();
- _olap_data_convertor.reset();
+ clear();
+
return Status::OK();
}
Status SegmentWriter::finalize_footer(uint64_t* segment_file_size) {
RETURN_IF_ERROR(_write_footer());
- RETURN_IF_ERROR(_file_writer->finalize());
- *segment_file_size = _file_writer->bytes_appended();
return Status::OK();
}
@@ -416,19 +418,26 @@ Status SegmentWriter::finalize(uint64_t* segment_file_size, uint64_t* index_size
if (_data_dir != nullptr && _data_dir->reach_capacity_limit((int64_t)estimate_segment_size())) {
return Status::InternalError("disk {} exceed capacity limit.", _data_dir->path_hash());
}
-
- RETURN_IF_ERROR(finalize_columns(index_size));
-
- // writer footer
+ // write data
+ RETURN_IF_ERROR(finalize_columns_data());
+ // write index
+ RETURN_IF_ERROR(finalize_columns_index(index_size));
+ // write footer
RETURN_IF_ERROR(finalize_footer(segment_file_size));
+ // finish
+ RETURN_IF_ERROR(_file_writer->finalize());
+ *segment_file_size = _file_writer->bytes_appended();
+
return Status::OK();
}
-void SegmentWriter::_reset_column_writers() {
+void SegmentWriter::clear() {
for (auto& column_writer : _column_writers) {
column_writer.reset();
}
_column_writers.clear();
+ _column_ids.clear();
+ _olap_data_convertor.reset();
}
// write column data to file one by one
diff --git a/be/src/olap/rowset/segment_v2/segment_writer.h b/be/src/olap/rowset/segment_v2/segment_writer.h
index 6e18a0735b..19135a866d 100644
--- a/be/src/olap/rowset/segment_v2/segment_writer.h
+++ b/be/src/olap/rowset/segment_v2/segment_writer.h
@@ -91,7 +91,8 @@ public:
uint32_t get_segment_id() { return _segment_id; }
- Status finalize_columns(uint64_t* index_size);
+ Status finalize_columns_data();
+ Status finalize_columns_index(uint64_t* index_size);
Status finalize_footer(uint64_t* segment_file_size);
static void init_column_meta(ColumnMetaPB* meta, uint32_t column_id, const TabletColumn& column,
@@ -124,7 +125,7 @@ private:
void set_min_key(const Slice& key);
void set_max_key(const Slice& key);
- void _reset_column_writers();
+ void clear();
private:
uint32_t _segment_id;
diff --git a/be/src/olap/rowset/vertical_beta_rowset_writer.cpp b/be/src/olap/rowset/vertical_beta_rowset_writer.cpp
index ef7b2c4254..8c8714982b 100644
--- a/be/src/olap/rowset/vertical_beta_rowset_writer.cpp
+++ b/be/src/olap/rowset/vertical_beta_rowset_writer.cpp
@@ -100,7 +100,8 @@ Status VerticalBetaRowsetWriter::_flush_columns(
std::unique_ptr<segment_v2::SegmentWriter>* segment_writer, bool is_key) {
uint64_t index_size = 0;
VLOG_NOTICE << "flush columns index: " << _cur_writer_idx;
- RETURN_IF_ERROR((*segment_writer)->finalize_columns(&index_size));
+ RETURN_IF_ERROR((*segment_writer)->finalize_columns_data());
+ RETURN_IF_ERROR((*segment_writer)->finalize_columns_index(&index_size));
if (is_key) {
// record segment key bound
KeyBoundsPB key_bounds;
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@doris.apache.org
For additional commands, e-mail: commits-help@doris.apache.org