You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@doris.apache.org by "zhannngchen (via GitHub)" <gi...@apache.org> on 2023/06/21 08:02:24 UTC

[GitHub] [doris] zhannngchen commented on a diff in pull request #20907: [Enhancement](Compaction) Caculate all committed rowsets delete bitmaps when do comapction

zhannngchen commented on code in PR #20907:
URL: https://github.com/apache/doris/pull/20907#discussion_r1236558263


##########
be/src/olap/compaction.cpp:
##########
@@ -529,8 +532,58 @@ Status Compaction::modify_rowsets(const Merger::Statistics* stats) {
             }
         }
 
+        // Here we will calculate all the rowsets delete bitmaps which are committed but not published to reduce the calculation pressure
+        // of publish phase.
+        // All rowsets which need to recalculate have been published so we don't need to acquire lock.
+        // Step1: collect this tablet's all committed rowsets' delete bitmaps
+        TxnManager::txn_tablet_map_t txn_tablet_map {};
+        StorageEngine::instance()->txn_manager()->get_all_tablet_txn_infos_by_tablet(
+                _tablet, txn_tablet_map);
+
+        // Step2: calculate all rowsets' delete bitmaps which are published during compaction.
+        // e.g. before compaction:
+        //       5-5 6-6 published
+        //       7-7 8-8 committed not published
+        // then 5-5 delete bitmap versions are 6, 7(dummy), 8(dummy).
+        // 6-6 delete bitmap versions are 7(dummy), 8(dummy).
+        //       when compaction:
+        //       5-5 6-6 7-7 published
+        //       8-8 committed not published
+        // then 5-5 delete bitmap versions are 6, 7, 8(dummy).
+        // 6-6 delete bitmap versions are 7, 8(dummy).
+        // 7-7 doesn't have delete bitmap.
+        // 8-8 has committed, so we want 7-7 delete bitmap version is 8(dummy).
+        // This part is to calculate 7-7's delete bitmap.
+        int64_t cur_max_version = _tablet->max_version().second;
+        for (const auto& it : txn_tablet_map) {
+            for (const auto& tablet_load_it : it.second) {
+                DeltaWriter* delta_writer =
+                        StorageEngine::instance()->txn_manager()->get_txn_tablet_delta_writer(
+                                it.first.second, _tablet->tablet_id());
+                if (!delta_writer) {
+                    continue;
+                }
+                const TabletTxnInfo& tablet_txn_info = tablet_load_it.second;
+                commit_rowset_delete_bitmap.merge(*tablet_txn_info.delete_bitmap);
+                auto beta_rowset = reinterpret_cast<BetaRowset*>(tablet_txn_info.rowset.get());
+                std::vector<segment_v2::SegmentSharedPtr> segments;
+                RETURN_IF_ERROR(beta_rowset->load_segments(&segments));
+                RETURN_IF_ERROR(_tablet->commit_phase_update_delete_bitmap(

Review Comment:
   We should not calculate delete bitmap here, which might be quite slow



##########
be/src/olap/compaction.cpp:
##########
@@ -529,8 +532,58 @@ Status Compaction::modify_rowsets(const Merger::Statistics* stats) {
             }
         }
 
+        // Here we will calculate all the rowsets delete bitmaps which are committed but not published to reduce the calculation pressure
+        // of publish phase.
+        // All rowsets which need to recalculate have been published so we don't need to acquire lock.
+        // Step1: collect this tablet's all committed rowsets' delete bitmaps
+        TxnManager::txn_tablet_map_t txn_tablet_map {};

Review Comment:
   we'd better to move this part after L589, do these works with lock, which can avoid potential consistency issues



##########
be/src/olap/compaction.cpp:
##########
@@ -529,8 +532,58 @@ Status Compaction::modify_rowsets(const Merger::Statistics* stats) {
             }
         }
 
+        // Here we will calculate all the rowsets delete bitmaps which are committed but not published to reduce the calculation pressure
+        // of publish phase.
+        // All rowsets which need to recalculate have been published so we don't need to acquire lock.
+        // Step1: collect this tablet's all committed rowsets' delete bitmaps
+        TxnManager::txn_tablet_map_t txn_tablet_map {};
+        StorageEngine::instance()->txn_manager()->get_all_tablet_txn_infos_by_tablet(
+                _tablet, txn_tablet_map);
+
+        // Step2: calculate all rowsets' delete bitmaps which are published during compaction.
+        // e.g. before compaction:
+        //       5-5 6-6 published
+        //       7-7 8-8 committed not published
+        // then 5-5 delete bitmap versions are 6, 7(dummy), 8(dummy).
+        // 6-6 delete bitmap versions are 7(dummy), 8(dummy).
+        //       when compaction:
+        //       5-5 6-6 7-7 published
+        //       8-8 committed not published
+        // then 5-5 delete bitmap versions are 6, 7, 8(dummy).
+        // 6-6 delete bitmap versions are 7, 8(dummy).
+        // 7-7 doesn't have delete bitmap.
+        // 8-8 has committed, so we want 7-7 delete bitmap version is 8(dummy).
+        // This part is to calculate 7-7's delete bitmap.
+        int64_t cur_max_version = _tablet->max_version().second;
+        for (const auto& it : txn_tablet_map) {
+            for (const auto& tablet_load_it : it.second) {
+                DeltaWriter* delta_writer =
+                        StorageEngine::instance()->txn_manager()->get_txn_tablet_delta_writer(
+                                it.first.second, _tablet->tablet_id());
+                if (!delta_writer) {
+                    continue;
+                }
+                const TabletTxnInfo& tablet_txn_info = tablet_load_it.second;
+                commit_rowset_delete_bitmap.merge(*tablet_txn_info.delete_bitmap);
+                auto beta_rowset = reinterpret_cast<BetaRowset*>(tablet_txn_info.rowset.get());
+                std::vector<segment_v2::SegmentSharedPtr> segments;
+                RETURN_IF_ERROR(beta_rowset->load_segments(&segments));
+                RETURN_IF_ERROR(_tablet->commit_phase_update_delete_bitmap(
+                        tablet_txn_info.rowset, tablet_txn_info.rowset_ids,
+                        tablet_txn_info.delete_bitmap, cur_max_version, segments, it.first.second,
+                        delta_writer->get_rowset_writer()));
+                // Step3: write back updated delete bitmap and tablet info.
+                StorageEngine::instance()->txn_manager()->set_txn_related_delete_bitmap(
+                        it.first.first, it.first.second, _tablet->tablet_id(),
+                        _tablet->schema_hash(), _tablet->tablet_uid(), true,
+                        tablet_txn_info.delete_bitmap, _tablet->all_rs_id(cur_max_version));

Review Comment:
   1. you should update the converted delete bitmap to txn_info
   2. add the compacted rowset's id to txn_info.rowset_ids



##########
be/src/olap/compaction.cpp:
##########
@@ -540,7 +593,7 @@ Status Compaction::modify_rowsets(const Merger::Statistics* stats) {
             // incremental data.
             _tablet->calc_compaction_output_rowset_delete_bitmap(
                     _input_rowsets, _rowid_conversion, version.second, UINT64_MAX, &missed_rows,
-                    &location_map, &output_rowset_delete_bitmap);
+                    &location_map, &output_rowset_delete_bitmap, commit_rowset_delete_bitmap);

Review Comment:
   you should not merge these un-published bitmap to Tablet's bitmap



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscribe@doris.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@doris.apache.org
For additional commands, e-mail: commits-help@doris.apache.org