You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@doris.apache.org by ji...@apache.org on 2023/06/19 02:26:16 UTC

[doris] branch master updated: [Feature](inverted index) add inverted index size to tablet meta (#20916)

This is an automated email from the ASF dual-hosted git repository.

jianliangqi pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new f44d2cf296 [Feature](inverted index) add inverted index size to tablet meta (#20916)
f44d2cf296 is described below

commit f44d2cf296a7289f7f2751256f8184de163e85ec
Author: airborne12 <ai...@gmail.com>
AuthorDate: Mon Jun 19 10:26:10 2023 +0800

    [Feature](inverted index) add inverted index size to tablet meta (#20916)
    
    1. get inverted index size before segment writer's column writer clear, then add size to total data size and total index size
    2. also do this in vertical compaction
---
 be/src/olap/rowset/beta_rowset_writer.cpp          | 8 ++++----
 be/src/olap/rowset/segment_v2/segment_writer.cpp   | 4 ++--
 be/src/olap/rowset/segment_v2/segment_writer.h     | 4 +++-
 be/src/olap/rowset/vertical_beta_rowset_writer.cpp | 5 +++--
 4 files changed, 12 insertions(+), 9 deletions(-)

diff --git a/be/src/olap/rowset/beta_rowset_writer.cpp b/be/src/olap/rowset/beta_rowset_writer.cpp
index 7f330a80b7..680645d751 100644
--- a/be/src/olap/rowset/beta_rowset_writer.cpp
+++ b/be/src/olap/rowset/beta_rowset_writer.cpp
@@ -802,8 +802,8 @@ Status BetaRowsetWriter::_flush_segment_writer(std::unique_ptr<segment_v2::Segme
 
     Statistics segstat;
     segstat.row_num = row_num;
-    segstat.data_size = segment_size;
-    segstat.index_size = index_size;
+    segstat.data_size = segment_size + (*writer)->get_inverted_index_file_size();
+    segstat.index_size = index_size + (*writer)->get_inverted_index_file_size();
     segstat.key_bounds = key_bounds;
     {
         std::lock_guard<std::mutex> lock(_segid_statistics_map_mutex);
@@ -844,8 +844,8 @@ Status BetaRowsetWriter::flush_segment_writer_for_segcompaction(
 
     Statistics segstat;
     segstat.row_num = row_num;
-    segstat.data_size = segment_size;
-    segstat.index_size = index_size;
+    segstat.data_size = segment_size + (*writer)->get_inverted_index_file_size();
+    segstat.index_size = index_size + (*writer)->get_inverted_index_file_size();
     segstat.key_bounds = key_bounds;
     {
         std::lock_guard<std::mutex> lock(_segid_statistics_map_mutex);
diff --git a/be/src/olap/rowset/segment_v2/segment_writer.cpp b/be/src/olap/rowset/segment_v2/segment_writer.cpp
index a5c9fa147b..561da9c3c6 100644
--- a/be/src/olap/rowset/segment_v2/segment_writer.cpp
+++ b/be/src/olap/rowset/segment_v2/segment_writer.cpp
@@ -728,7 +728,7 @@ uint64_t SegmentWriter::estimate_segment_size() {
     return size;
 }
 
-size_t SegmentWriter::get_inverted_index_file_size() {
+size_t SegmentWriter::try_get_inverted_index_file_size() {
     size_t total_size = 0;
     for (auto& column_writer : _column_writers) {
         total_size += column_writer->get_inverted_index_size();
@@ -769,7 +769,7 @@ Status SegmentWriter::finalize_columns_index(uint64_t* index_size) {
         }
         *index_size = _file_writer->bytes_appended() - index_start;
     }
-
+    _inverted_index_file_size = try_get_inverted_index_file_size();
     // reset all column writers and data_conveter
     clear();
 
diff --git a/be/src/olap/rowset/segment_v2/segment_writer.h b/be/src/olap/rowset/segment_v2/segment_writer.h
index 70de2cf765..31f652190c 100644
--- a/be/src/olap/rowset/segment_v2/segment_writer.h
+++ b/be/src/olap/rowset/segment_v2/segment_writer.h
@@ -105,8 +105,9 @@ public:
     int64_t max_row_to_add(size_t row_avg_size_in_bytes);
 
     uint64_t estimate_segment_size();
-    size_t get_inverted_index_file_size();
+    size_t try_get_inverted_index_file_size();
 
+    size_t get_inverted_index_file_size() const { return _inverted_index_file_size; }
     uint32_t num_rows_written() const { return _num_rows_written; }
     uint32_t row_count() const { return _row_count; }
 
@@ -177,6 +178,7 @@ private:
     SegmentFooterPB _footer;
     size_t _num_key_columns;
     size_t _num_short_key_columns;
+    size_t _inverted_index_file_size;
     std::unique_ptr<ShortKeyIndexBuilder> _short_key_index_builder;
     std::unique_ptr<PrimaryKeyIndexBuilder> _primary_key_index_builder;
     std::vector<std::unique_ptr<ColumnWriter>> _column_writers;
diff --git a/be/src/olap/rowset/vertical_beta_rowset_writer.cpp b/be/src/olap/rowset/vertical_beta_rowset_writer.cpp
index bf003a713c..6c7cced969 100644
--- a/be/src/olap/rowset/vertical_beta_rowset_writer.cpp
+++ b/be/src/olap/rowset/vertical_beta_rowset_writer.cpp
@@ -137,7 +137,8 @@ Status VerticalBetaRowsetWriter::_flush_columns(
         _segment_num_rows.resize(_cur_writer_idx + 1);
         _segment_num_rows[_cur_writer_idx] = _segment_writers[_cur_writer_idx]->row_count();
     }
-    _total_index_size += static_cast<int64_t>(index_size);
+    _total_index_size +=
+            static_cast<int64_t>(index_size) + (*segment_writer)->get_inverted_index_file_size();
     return Status::OK();
 }
 
@@ -203,7 +204,7 @@ Status VerticalBetaRowsetWriter::final_flush() {
             LOG(WARNING) << "Fail to finalize segment footer, " << st;
             return st;
         }
-        _total_data_size += segment_size;
+        _total_data_size += segment_size + segment_writer->get_inverted_index_file_size();
         segment_writer.reset();
     }
     return Status::OK();


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@doris.apache.org
For additional commands, e-mail: commits-help@doris.apache.org