You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@doris.apache.org by ji...@apache.org on 2023/06/19 02:26:16 UTC
[doris] branch master updated: [Feature](inverted index) add inverted index size to tablet meta (#20916)
This is an automated email from the ASF dual-hosted git repository.
jianliangqi pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new f44d2cf296 [Feature](inverted index) add inverted index size to tablet meta (#20916)
f44d2cf296 is described below
commit f44d2cf296a7289f7f2751256f8184de163e85ec
Author: airborne12 <ai...@gmail.com>
AuthorDate: Mon Jun 19 10:26:10 2023 +0800
[Feature](inverted index) add inverted index size to tablet meta (#20916)
1. get inverted index size before segment writer's column writer clear, then add size to total data size and total index size
2. also do this in vertical compaction
---
be/src/olap/rowset/beta_rowset_writer.cpp | 8 ++++----
be/src/olap/rowset/segment_v2/segment_writer.cpp | 4 ++--
be/src/olap/rowset/segment_v2/segment_writer.h | 4 +++-
be/src/olap/rowset/vertical_beta_rowset_writer.cpp | 5 +++--
4 files changed, 12 insertions(+), 9 deletions(-)
diff --git a/be/src/olap/rowset/beta_rowset_writer.cpp b/be/src/olap/rowset/beta_rowset_writer.cpp
index 7f330a80b7..680645d751 100644
--- a/be/src/olap/rowset/beta_rowset_writer.cpp
+++ b/be/src/olap/rowset/beta_rowset_writer.cpp
@@ -802,8 +802,8 @@ Status BetaRowsetWriter::_flush_segment_writer(std::unique_ptr<segment_v2::Segme
Statistics segstat;
segstat.row_num = row_num;
- segstat.data_size = segment_size;
- segstat.index_size = index_size;
+ segstat.data_size = segment_size + (*writer)->get_inverted_index_file_size();
+ segstat.index_size = index_size + (*writer)->get_inverted_index_file_size();
segstat.key_bounds = key_bounds;
{
std::lock_guard<std::mutex> lock(_segid_statistics_map_mutex);
@@ -844,8 +844,8 @@ Status BetaRowsetWriter::flush_segment_writer_for_segcompaction(
Statistics segstat;
segstat.row_num = row_num;
- segstat.data_size = segment_size;
- segstat.index_size = index_size;
+ segstat.data_size = segment_size + (*writer)->get_inverted_index_file_size();
+ segstat.index_size = index_size + (*writer)->get_inverted_index_file_size();
segstat.key_bounds = key_bounds;
{
std::lock_guard<std::mutex> lock(_segid_statistics_map_mutex);
diff --git a/be/src/olap/rowset/segment_v2/segment_writer.cpp b/be/src/olap/rowset/segment_v2/segment_writer.cpp
index a5c9fa147b..561da9c3c6 100644
--- a/be/src/olap/rowset/segment_v2/segment_writer.cpp
+++ b/be/src/olap/rowset/segment_v2/segment_writer.cpp
@@ -728,7 +728,7 @@ uint64_t SegmentWriter::estimate_segment_size() {
return size;
}
-size_t SegmentWriter::get_inverted_index_file_size() {
+size_t SegmentWriter::try_get_inverted_index_file_size() {
size_t total_size = 0;
for (auto& column_writer : _column_writers) {
total_size += column_writer->get_inverted_index_size();
@@ -769,7 +769,7 @@ Status SegmentWriter::finalize_columns_index(uint64_t* index_size) {
}
*index_size = _file_writer->bytes_appended() - index_start;
}
-
+ _inverted_index_file_size = try_get_inverted_index_file_size();
// reset all column writers and data_conveter
clear();
diff --git a/be/src/olap/rowset/segment_v2/segment_writer.h b/be/src/olap/rowset/segment_v2/segment_writer.h
index 70de2cf765..31f652190c 100644
--- a/be/src/olap/rowset/segment_v2/segment_writer.h
+++ b/be/src/olap/rowset/segment_v2/segment_writer.h
@@ -105,8 +105,9 @@ public:
int64_t max_row_to_add(size_t row_avg_size_in_bytes);
uint64_t estimate_segment_size();
- size_t get_inverted_index_file_size();
+ size_t try_get_inverted_index_file_size();
+ size_t get_inverted_index_file_size() const { return _inverted_index_file_size; }
uint32_t num_rows_written() const { return _num_rows_written; }
uint32_t row_count() const { return _row_count; }
@@ -177,6 +178,7 @@ private:
SegmentFooterPB _footer;
size_t _num_key_columns;
size_t _num_short_key_columns;
+ size_t _inverted_index_file_size;
std::unique_ptr<ShortKeyIndexBuilder> _short_key_index_builder;
std::unique_ptr<PrimaryKeyIndexBuilder> _primary_key_index_builder;
std::vector<std::unique_ptr<ColumnWriter>> _column_writers;
diff --git a/be/src/olap/rowset/vertical_beta_rowset_writer.cpp b/be/src/olap/rowset/vertical_beta_rowset_writer.cpp
index bf003a713c..6c7cced969 100644
--- a/be/src/olap/rowset/vertical_beta_rowset_writer.cpp
+++ b/be/src/olap/rowset/vertical_beta_rowset_writer.cpp
@@ -137,7 +137,8 @@ Status VerticalBetaRowsetWriter::_flush_columns(
_segment_num_rows.resize(_cur_writer_idx + 1);
_segment_num_rows[_cur_writer_idx] = _segment_writers[_cur_writer_idx]->row_count();
}
- _total_index_size += static_cast<int64_t>(index_size);
+ _total_index_size +=
+ static_cast<int64_t>(index_size) + (*segment_writer)->get_inverted_index_file_size();
return Status::OK();
}
@@ -203,7 +204,7 @@ Status VerticalBetaRowsetWriter::final_flush() {
LOG(WARNING) << "Fail to finalize segment footer, " << st;
return st;
}
- _total_data_size += segment_size;
+ _total_data_size += segment_size + segment_writer->get_inverted_index_file_size();
segment_writer.reset();
}
return Status::OK();
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@doris.apache.org
For additional commands, e-mail: commits-help@doris.apache.org