You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@doris.apache.org by yi...@apache.org on 2022/07/30 11:50:17 UTC

[doris] branch master updated: [feature-wip](unique-key-merge-on-write) Add support for tablet migration, DSIP-018[5/3] (#11283)

This is an automated email from the ASF dual-hosted git repository.

yiguolei pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new 9333e79ae0 [feature-wip](unique-key-merge-on-write) Add support for tablet migration, DSIP-018[5/3] (#11283)
9333e79ae0 is described below

commit 9333e79ae0841992edf30e839483de598f304f46
Author: zhannngchen <48...@users.noreply.github.com>
AuthorDate: Sat Jul 30 19:50:11 2022 +0800

    [feature-wip](unique-key-merge-on-write) Add support for tablet migration, DSIP-018[5/3] (#11283)
---
 be/src/olap/rowset/beta_rowset_writer.cpp          |  2 +
 be/src/olap/snapshot_manager.cpp                   | 55 ++++++++++++++++-
 be/src/olap/tablet_meta.cpp                        | 35 +++++++++++
 be/src/olap/tablet_meta.h                          |  9 ++-
 be/src/olap/task/engine_storage_migration_task.cpp | 12 ++--
 be/src/olap/task/engine_storage_migration_task.h   |  8 +--
 be/test/olap/tablet_meta_test.cpp                  | 70 ++++++++++++++++++++++
 7 files changed, 180 insertions(+), 11 deletions(-)

diff --git a/be/src/olap/rowset/beta_rowset_writer.cpp b/be/src/olap/rowset/beta_rowset_writer.cpp
index 228fcee95b..491eed055d 100644
--- a/be/src/olap/rowset/beta_rowset_writer.cpp
+++ b/be/src/olap/rowset/beta_rowset_writer.cpp
@@ -161,6 +161,8 @@ Status BetaRowsetWriter::add_rowset(RowsetSharedPtr rowset) {
     _total_data_size += rowset->rowset_meta()->data_disk_size();
     _total_index_size += rowset->rowset_meta()->index_disk_size();
     _num_segment += rowset->num_segments();
+    // append key_bounds to current rowset
+    rowset->get_segments_key_bounds(&_segments_encoded_key_bounds);
     // TODO update zonemap
     if (rowset->rowset_meta()->has_delete_predicate()) {
         _rowset_meta->set_delete_predicate(rowset->rowset_meta()->delete_predicate());
diff --git a/be/src/olap/snapshot_manager.cpp b/be/src/olap/snapshot_manager.cpp
index 390bd66206..3d0e885b20 100644
--- a/be/src/olap/snapshot_manager.cpp
+++ b/be/src/olap/snapshot_manager.cpp
@@ -152,6 +152,7 @@ Status SnapshotManager::convert_rowset_ids(const std::string& clone_dir, int64_t
     tablet_schema.init_from_pb(new_tablet_meta_pb.schema());
 
     std::unordered_map<Version, RowsetMetaPB*, HashOfVersion> rs_version_map;
+    std::unordered_map<RowsetId, RowsetId, HashOfRowsetId> rowset_id_mapping;
     for (auto& visible_rowset : cloned_tablet_meta_pb.rs_metas()) {
         RowsetMetaPB* rowset_meta = new_tablet_meta_pb.add_rs_metas();
 
@@ -160,6 +161,13 @@ Status SnapshotManager::convert_rowset_ids(const std::string& clone_dir, int64_t
             RowsetId rowset_id = StorageEngine::instance()->next_rowset_id();
             RETURN_NOT_OK(_rename_rowset_id(visible_rowset, clone_dir, tablet_schema, rowset_id,
                                             rowset_meta));
+            RowsetId src_rs_id;
+            if (visible_rowset.rowset_id() > 0) {
+                src_rs_id.init(visible_rowset.rowset_id());
+            } else {
+                src_rs_id.init(visible_rowset.rowset_id_v2());
+            }
+            rowset_id_mapping[src_rs_id] = rowset_id;
         } else {
             // remote rowset
             *rowset_meta = visible_rowset;
@@ -184,6 +192,13 @@ Status SnapshotManager::convert_rowset_ids(const std::string& clone_dir, int64_t
             RowsetId rowset_id = StorageEngine::instance()->next_rowset_id();
             RETURN_NOT_OK(_rename_rowset_id(stale_rowset, clone_dir, tablet_schema, rowset_id,
                                             rowset_meta));
+            RowsetId src_rs_id;
+            if (stale_rowset.rowset_id() > 0) {
+                src_rs_id.init(stale_rowset.rowset_id());
+            } else {
+                src_rs_id.init(stale_rowset.rowset_id_v2());
+            }
+            rowset_id_mapping[src_rs_id] = rowset_id;
         } else {
             // remote rowset
             *rowset_meta = stale_rowset;
@@ -193,6 +208,21 @@ Status SnapshotManager::convert_rowset_ids(const std::string& clone_dir, int64_t
         rowset_meta->set_tablet_schema_hash(schema_hash);
     }
 
+    if (!rowset_id_mapping.empty() && cloned_tablet_meta_pb.has_delete_bitmap()) {
+        auto& cloned_del_bitmap_pb = cloned_tablet_meta_pb.delete_bitmap();
+        DeleteBitmapPB* new_del_bitmap_pb = new_tablet_meta_pb.mutable_delete_bitmap();
+        int rst_ids_size = cloned_del_bitmap_pb.rowset_ids_size();
+        for (size_t i = 0; i < rst_ids_size; ++i) {
+            RowsetId rst_id;
+            rst_id.init(cloned_del_bitmap_pb.rowset_ids(i));
+            // It should not happen, if we can't convert some rowid in delete bitmap, the
+            // data might be inconsist.
+            CHECK(rowset_id_mapping.find(rst_id) != rowset_id_mapping.end())
+                    << "can't find rowset_id " << rst_id.to_string() << " in convert_rowset_ids";
+            new_del_bitmap_pb->set_rowset_ids(i, rowset_id_mapping[rst_id].to_string());
+        }
+    }
+
     res = TabletMeta::save(cloned_meta_file, new_tablet_meta_pb);
     if (!res.ok()) {
         LOG(WARNING) << "fail to save converted tablet meta to dir='" << clone_dir;
@@ -356,6 +386,7 @@ Status SnapshotManager::_create_snapshot_files(const TabletSharedPtr& ref_tablet
             break;
         }
         std::vector<RowsetSharedPtr> consistent_rowsets;
+        DeleteBitmap delete_bitmap_snapshot(new_tablet_meta->tablet_id());
 
         /// If set missing_version, try to get all missing version.
         /// If some of them not exist in tablet, we will fall back to
@@ -381,14 +412,21 @@ Status SnapshotManager::_create_snapshot_files(const TabletSharedPtr& ref_tablet
                         break;
                     }
                 }
+
+                // Take a full snapshot, will revise according to missed rowset later.
+                if (ref_tablet->keys_type() == UNIQUE_KEYS &&
+                    ref_tablet->enable_unique_key_merge_on_write()) {
+                    delete_bitmap_snapshot = ref_tablet->tablet_meta()->delete_bitmap().snapshot(
+                            ref_tablet->max_version().second);
+                }
             }
 
+            int64_t version = -1;
             if (!res.ok() || !request.__isset.missing_version) {
                 /// not all missing versions are found, fall back to full snapshot.
                 res = Status::OK();         // reset res
                 consistent_rowsets.clear(); // reset vector
 
-                std::shared_lock rdlock(ref_tablet->get_header_lock());
                 // get latest version
                 const RowsetSharedPtr last_version = ref_tablet->rowset_with_max_version();
                 if (last_version == nullptr) {
@@ -398,7 +436,7 @@ Status SnapshotManager::_create_snapshot_files(const TabletSharedPtr& ref_tablet
                     break;
                 }
                 // get snapshot version, use request.version if specified
-                int32_t version = last_version->end_version();
+                version = last_version->end_version();
                 if (request.__isset.version) {
                     if (last_version->end_version() < request.version) {
                         LOG(WARNING) << "invalid make snapshot request. "
@@ -420,12 +458,21 @@ Status SnapshotManager::_create_snapshot_files(const TabletSharedPtr& ref_tablet
                 }
                 *allow_incremental_clone = false;
             } else {
+                version = ref_tablet->max_version().second;
                 *allow_incremental_clone = true;
             }
 
             // copy the tablet meta to new_tablet_meta inside header lock
             CHECK(res.ok()) << res;
             ref_tablet->generate_tablet_meta_copy_unlocked(new_tablet_meta);
+            // The delete bitmap update operation and the add_inc_rowset operation is not atomic,
+            // so delete bitmap may contains some data generated by invisible rowset, we should
+            // get rid of these useless bitmaps when doing snapshot.
+            if (ref_tablet->keys_type() == UNIQUE_KEYS &&
+                ref_tablet->enable_unique_key_merge_on_write()) {
+                delete_bitmap_snapshot =
+                        ref_tablet->tablet_meta()->delete_bitmap().snapshot(version);
+            }
         }
         {
             std::unique_lock wlock(ref_tablet->get_header_lock());
@@ -459,6 +506,10 @@ Status SnapshotManager::_create_snapshot_files(const TabletSharedPtr& ref_tablet
         // Clear it for safety reason.
         // Whether it is incremental or full snapshot, rowset information is stored in rs_meta.
         new_tablet_meta->revise_rs_metas(std::move(rs_metas));
+        if (ref_tablet->keys_type() == UNIQUE_KEYS &&
+            ref_tablet->enable_unique_key_merge_on_write()) {
+            new_tablet_meta->revise_delete_bitmap_unlocked(delete_bitmap_snapshot);
+        }
 
         if (snapshot_version == g_Types_constants.TSNAPSHOT_REQ_VERSION2) {
             res = new_tablet_meta->save(header_path);
diff --git a/be/src/olap/tablet_meta.cpp b/be/src/olap/tablet_meta.cpp
index bc88710854..2b6ccc40b5 100644
--- a/be/src/olap/tablet_meta.cpp
+++ b/be/src/olap/tablet_meta.cpp
@@ -644,6 +644,27 @@ void TabletMeta::revise_rs_metas(std::vector<RowsetMetaSharedPtr>&& rs_metas) {
     _stale_rs_metas.clear();
 }
 
+// This method should call after revise_rs_metas, since new rs_metas might be a subset
+// of original tablet, we should revise the delete_bitmap according to current rowset.
+//
+// Delete bitmap is protected by Tablet::_meta_lock, we don't need to acquire the
+// TabletMeta's _meta_lock
+void TabletMeta::revise_delete_bitmap_unlocked(const DeleteBitmap& delete_bitmap) {
+    _delete_bitmap = std::make_unique<DeleteBitmap>(tablet_id());
+    for (auto rs : _rs_metas) {
+        DeleteBitmap rs_bm(tablet_id());
+        delete_bitmap.subset({rs->rowset_id(), 0, 0}, {rs->rowset_id(), UINT32_MAX, INT64_MAX},
+                             &rs_bm);
+        _delete_bitmap->merge(rs_bm);
+    }
+    for (auto rs : _stale_rs_metas) {
+        DeleteBitmap rs_bm(tablet_id());
+        delete_bitmap.subset({rs->rowset_id(), 0, 0}, {rs->rowset_id(), UINT32_MAX, INT64_MAX},
+                             &rs_bm);
+        _delete_bitmap->merge(rs_bm);
+    }
+}
+
 void TabletMeta::delete_stale_rs_meta_by_version(const Version& version) {
     auto it = _stale_rs_metas.begin();
     while (it != _stale_rs_metas.end()) {
@@ -792,6 +813,20 @@ DeleteBitmap DeleteBitmap::snapshot() const {
     return DeleteBitmap(*this);
 }
 
+DeleteBitmap DeleteBitmap::snapshot(Version version) const {
+    // Take snapshot first, then remove keys greater than given version.
+    DeleteBitmap snapshot = this->snapshot();
+    auto it = snapshot.delete_bitmap.begin();
+    while (it != snapshot.delete_bitmap.end()) {
+        if (std::get<2>(it->first) > version) {
+            it = snapshot.delete_bitmap.erase(it);
+        } else {
+            it++;
+        }
+    }
+    return snapshot;
+}
+
 void DeleteBitmap::add(const BitmapKey& bmk, uint32_t row_id) {
     std::lock_guard l(lock);
     delete_bitmap[bmk].add(row_id);
diff --git a/be/src/olap/tablet_meta.h b/be/src/olap/tablet_meta.h
index 34a42ed510..ec3ad1f60f 100644
--- a/be/src/olap/tablet_meta.h
+++ b/be/src/olap/tablet_meta.h
@@ -159,6 +159,7 @@ public:
                          const std::vector<RowsetMetaSharedPtr>& to_delete,
                          bool same_version = false);
     void revise_rs_metas(std::vector<RowsetMetaSharedPtr>&& rs_metas);
+    void revise_delete_bitmap_unlocked(const DeleteBitmap& delete_bitmap);
 
     const std::vector<RowsetMetaSharedPtr>& all_stale_rs_metas() const;
     RowsetMetaSharedPtr acquire_rs_meta_by_version(const Version& version) const;
@@ -272,7 +273,7 @@ class DeleteBitmap {
 public:
     mutable std::shared_mutex lock;
     using SegmentId = uint32_t;
-    using Version = uint32_t;
+    using Version = uint64_t;
     using BitmapKey = std::tuple<RowsetId, SegmentId, Version>;
     std::map<BitmapKey, roaring::Roaring> delete_bitmap; // Ordered map
 
@@ -299,6 +300,12 @@ public:
      */
     DeleteBitmap snapshot() const;
 
+    /**
+     * Makes a snapshot of delete bimap on given version, read lock will be
+     * acquired temporary in this process
+     */
+    DeleteBitmap snapshot(Version version) const;
+
     /**
      * Marks the specific row deleted
      */
diff --git a/be/src/olap/task/engine_storage_migration_task.cpp b/be/src/olap/task/engine_storage_migration_task.cpp
index ac31948439..e3737a62e4 100644
--- a/be/src/olap/task/engine_storage_migration_task.cpp
+++ b/be/src/olap/task/engine_storage_migration_task.cpp
@@ -116,7 +116,7 @@ Status EngineStorageMigrationTask::_check_running_txns_until_timeout(
 
 Status EngineStorageMigrationTask::_gen_and_write_header_to_hdr_file(
         uint64_t shard, const std::string& full_path,
-        const std::vector<RowsetSharedPtr>& consistent_rowsets) {
+        const std::vector<RowsetSharedPtr>& consistent_rowsets, int64_t end_version) {
     // need hold migration lock and push lock outside
     Status res = Status::OK();
     int64_t tablet_id = _tablet->tablet_id();
@@ -124,7 +124,7 @@ Status EngineStorageMigrationTask::_gen_and_write_header_to_hdr_file(
     TabletMetaSharedPtr new_tablet_meta(new (std::nothrow) TabletMeta());
     {
         std::shared_lock rdlock(_tablet->get_header_lock());
-        _generate_new_header(shard, consistent_rowsets, new_tablet_meta);
+        _generate_new_header(shard, consistent_rowsets, new_tablet_meta, end_version);
     }
     std::string new_meta_file = full_path + "/" + std::to_string(tablet_id) + ".hdr";
     res = new_tablet_meta->save(new_meta_file);
@@ -299,7 +299,7 @@ Status EngineStorageMigrationTask::_migrate() {
         }
 
         // generate new tablet meta and write to hdr file
-        res = _gen_and_write_header_to_hdr_file(shard, full_path, consistent_rowsets);
+        res = _gen_and_write_header_to_hdr_file(shard, full_path, consistent_rowsets, end_version);
         if (!res.ok()) {
             break;
         }
@@ -321,7 +321,7 @@ Status EngineStorageMigrationTask::_migrate() {
 // TODO(ygl): lost some information here, such as cumulative layer point
 void EngineStorageMigrationTask::_generate_new_header(
         uint64_t new_shard, const std::vector<RowsetSharedPtr>& consistent_rowsets,
-        TabletMetaSharedPtr new_tablet_meta) {
+        TabletMetaSharedPtr new_tablet_meta, int64_t end_version) {
     _tablet->generate_tablet_meta_copy_unlocked(new_tablet_meta);
 
     std::vector<RowsetMetaSharedPtr> rs_metas;
@@ -329,6 +329,10 @@ void EngineStorageMigrationTask::_generate_new_header(
         rs_metas.push_back(rs->rowset_meta());
     }
     new_tablet_meta->revise_rs_metas(std::move(rs_metas));
+    if (_tablet->keys_type() == UNIQUE_KEYS && _tablet->enable_unique_key_merge_on_write()) {
+        DeleteBitmap bm = _tablet->tablet_meta()->delete_bitmap().snapshot(end_version);
+        new_tablet_meta->revise_delete_bitmap_unlocked(bm);
+    }
     new_tablet_meta->set_shard_id(new_shard);
     // should not save new meta here, because new tablet may failed
     // should not remove the old meta here, because the new header maybe not valid
diff --git a/be/src/olap/task/engine_storage_migration_task.h b/be/src/olap/task/engine_storage_migration_task.h
index b529b78c2a..c2c9b7a774 100644
--- a/be/src/olap/task/engine_storage_migration_task.h
+++ b/be/src/olap/task/engine_storage_migration_task.h
@@ -49,14 +49,14 @@ private:
     bool _is_rowsets_size_less_than_threshold(
             const std::vector<RowsetSharedPtr>& consistent_rowsets);
 
-    Status _gen_and_write_header_to_hdr_file(
-            uint64_t shard, const std::string& full_path,
-            const std::vector<RowsetSharedPtr>& consistent_rowsets);
+    Status _gen_and_write_header_to_hdr_file(uint64_t shard, const std::string& full_path,
+                                             const std::vector<RowsetSharedPtr>& consistent_rowsets,
+                                             int64_t end_version);
     Status _reload_tablet(const std::string& full_path);
 
     void _generate_new_header(uint64_t new_shard,
                               const std::vector<RowsetSharedPtr>& consistent_rowsets,
-                              TabletMetaSharedPtr new_tablet_meta);
+                              TabletMetaSharedPtr new_tablet_meta, int64_t end_version);
 
     // TODO: hkp
     // rewrite this function
diff --git a/be/test/olap/tablet_meta_test.cpp b/be/test/olap/tablet_meta_test.cpp
index f8b83329ec..fffe32f382 100644
--- a/be/test/olap/tablet_meta_test.cpp
+++ b/be/test/olap/tablet_meta_test.cpp
@@ -21,6 +21,8 @@
 
 #include <string>
 
+#include "testutil/mock_rowset.h"
+
 namespace doris {
 
 TEST(TabletMetaTest, SaveAndParse) {
@@ -41,6 +43,54 @@ TEST(TabletMetaTest, SaveAndParse) {
     EXPECT_EQ(old_tablet_meta, new_tablet_meta);
 }
 
+TEST(TabletMetaTest, TestReviseMeta) {
+    TabletMeta tablet_meta;
+    std::vector<RowsetSharedPtr> src_rowsets;
+    std::vector<RowsetId> rsids;
+    // src rowsets
+    for (int i = 0; i < 4; i++) {
+        RowsetMetaPB rs_meta_pb;
+        RowsetId rowset_id;
+        rowset_id.init(i);
+        rsids.push_back(rowset_id);
+        rs_meta_pb.set_rowset_id_v2(rowset_id.to_string());
+        rs_meta_pb.set_num_segments(2);
+        rs_meta_pb.set_start_version(i);
+        rs_meta_pb.set_end_version(i);
+        RowsetMetaSharedPtr meta_ptr = std::make_shared<RowsetMeta>();
+        meta_ptr->init_from_pb(rs_meta_pb);
+        RowsetSharedPtr rowset_ptr;
+        TabletSchema schema;
+        MockRowset::create_rowset(&schema, "", meta_ptr, &rowset_ptr, false);
+        src_rowsets.push_back(rowset_ptr);
+        tablet_meta.add_rs_meta(rowset_ptr->rowset_meta());
+    }
+    ASSERT_EQ(4, tablet_meta.all_rs_metas().size());
+
+    tablet_meta.delete_bitmap().add({rsids[0], 1, 1}, 1);
+    tablet_meta.delete_bitmap().add({rsids[1], 0, 2}, 2);
+    tablet_meta.delete_bitmap().add({rsids[2], 1, 1}, 1);
+    tablet_meta.delete_bitmap().add({rsids[3], 0, 2}, 3);
+    tablet_meta.delete_bitmap().add({rsids[3], 0, 4}, 4);
+    ASSERT_EQ(5, tablet_meta.delete_bitmap().delete_bitmap.size());
+
+    std::vector<RowsetMetaSharedPtr> new_rowsets;
+    new_rowsets.push_back(src_rowsets[2]->rowset_meta());
+    new_rowsets.push_back(src_rowsets[3]->rowset_meta());
+    tablet_meta.revise_rs_metas(std::move(new_rowsets));
+    // Take a snapshot with max_version=3.
+    DeleteBitmap snap = tablet_meta.delete_bitmap().snapshot(3);
+    tablet_meta.revise_delete_bitmap_unlocked(snap);
+    ASSERT_EQ(2, tablet_meta.all_rs_metas().size());
+    ASSERT_EQ(2, tablet_meta.delete_bitmap().delete_bitmap.size());
+    for (auto entry : tablet_meta.delete_bitmap().delete_bitmap) {
+        RowsetId rsid = std::get<0>(entry.first);
+        ASSERT_TRUE(rsid == rsids[2] || rsid == rsids[3]);
+        int64_t version = std::get<2>(entry.first);
+        ASSERT_TRUE(version <= 3); // should not contain versions greater than 3.
+    }
+}
+
 TEST(TabletMetaTest, TestDeleteBitmap) {
     std::unique_ptr<DeleteBitmap> dbmp(new DeleteBitmap(10086));
     auto gen1 = [&dbmp](int64_t max_rst_id, uint32_t max_seg_id, uint32_t max_row) {
@@ -71,6 +121,26 @@ TEST(TabletMetaTest, TestDeleteBitmap) {
 
     ASSERT_EQ(dbmp->delete_bitmap.size(), 10 * 20 + 2);
 
+    {
+        auto snap = dbmp->snapshot(1);
+        auto it = snap.delete_bitmap.begin();
+        while (it != snap.delete_bitmap.end()) {
+            ASSERT_TRUE(std::get<2>(it->first) <= 1);
+            it++;
+        }
+        ASSERT_EQ(snap.delete_bitmap.size(), 10 * 20 + 1);
+    }
+
+    {
+        auto snap = dbmp->snapshot(0);
+        auto it = snap.delete_bitmap.begin();
+        while (it != snap.delete_bitmap.end()) {
+            ASSERT_TRUE(std::get<2>(it->first) <= 0);
+            it++;
+        }
+        ASSERT_EQ(snap.delete_bitmap.size(), 10 * 20);
+    }
+
     { // Bitmap of certain verisons only get their own row ids
         auto bm = dbmp->get({RowsetId {2, 0, 1, 1}, 1, 2});
         ASSERT_EQ(bm->cardinality(), 1);


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@doris.apache.org
For additional commands, e-mail: commits-help@doris.apache.org