You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@doris.apache.org by yi...@apache.org on 2022/07/30 11:50:17 UTC
[doris] branch master updated: [feature-wip](unique-key-merge-on-write) Add support for tablet migration, DSIP-018[5/3] (#11283)
This is an automated email from the ASF dual-hosted git repository.
yiguolei pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new 9333e79ae0 [feature-wip](unique-key-merge-on-write) Add support for tablet migration, DSIP-018[5/3] (#11283)
9333e79ae0 is described below
commit 9333e79ae0841992edf30e839483de598f304f46
Author: zhannngchen <48...@users.noreply.github.com>
AuthorDate: Sat Jul 30 19:50:11 2022 +0800
[feature-wip](unique-key-merge-on-write) Add support for tablet migration, DSIP-018[5/3] (#11283)
---
be/src/olap/rowset/beta_rowset_writer.cpp | 2 +
be/src/olap/snapshot_manager.cpp | 55 ++++++++++++++++-
be/src/olap/tablet_meta.cpp | 35 +++++++++++
be/src/olap/tablet_meta.h | 9 ++-
be/src/olap/task/engine_storage_migration_task.cpp | 12 ++--
be/src/olap/task/engine_storage_migration_task.h | 8 +--
be/test/olap/tablet_meta_test.cpp | 70 ++++++++++++++++++++++
7 files changed, 180 insertions(+), 11 deletions(-)
diff --git a/be/src/olap/rowset/beta_rowset_writer.cpp b/be/src/olap/rowset/beta_rowset_writer.cpp
index 228fcee95b..491eed055d 100644
--- a/be/src/olap/rowset/beta_rowset_writer.cpp
+++ b/be/src/olap/rowset/beta_rowset_writer.cpp
@@ -161,6 +161,8 @@ Status BetaRowsetWriter::add_rowset(RowsetSharedPtr rowset) {
_total_data_size += rowset->rowset_meta()->data_disk_size();
_total_index_size += rowset->rowset_meta()->index_disk_size();
_num_segment += rowset->num_segments();
+ // append key_bounds to current rowset
+ rowset->get_segments_key_bounds(&_segments_encoded_key_bounds);
// TODO update zonemap
if (rowset->rowset_meta()->has_delete_predicate()) {
_rowset_meta->set_delete_predicate(rowset->rowset_meta()->delete_predicate());
diff --git a/be/src/olap/snapshot_manager.cpp b/be/src/olap/snapshot_manager.cpp
index 390bd66206..3d0e885b20 100644
--- a/be/src/olap/snapshot_manager.cpp
+++ b/be/src/olap/snapshot_manager.cpp
@@ -152,6 +152,7 @@ Status SnapshotManager::convert_rowset_ids(const std::string& clone_dir, int64_t
tablet_schema.init_from_pb(new_tablet_meta_pb.schema());
std::unordered_map<Version, RowsetMetaPB*, HashOfVersion> rs_version_map;
+ std::unordered_map<RowsetId, RowsetId, HashOfRowsetId> rowset_id_mapping;
for (auto& visible_rowset : cloned_tablet_meta_pb.rs_metas()) {
RowsetMetaPB* rowset_meta = new_tablet_meta_pb.add_rs_metas();
@@ -160,6 +161,13 @@ Status SnapshotManager::convert_rowset_ids(const std::string& clone_dir, int64_t
RowsetId rowset_id = StorageEngine::instance()->next_rowset_id();
RETURN_NOT_OK(_rename_rowset_id(visible_rowset, clone_dir, tablet_schema, rowset_id,
rowset_meta));
+ RowsetId src_rs_id;
+ if (visible_rowset.rowset_id() > 0) {
+ src_rs_id.init(visible_rowset.rowset_id());
+ } else {
+ src_rs_id.init(visible_rowset.rowset_id_v2());
+ }
+ rowset_id_mapping[src_rs_id] = rowset_id;
} else {
// remote rowset
*rowset_meta = visible_rowset;
@@ -184,6 +192,13 @@ Status SnapshotManager::convert_rowset_ids(const std::string& clone_dir, int64_t
RowsetId rowset_id = StorageEngine::instance()->next_rowset_id();
RETURN_NOT_OK(_rename_rowset_id(stale_rowset, clone_dir, tablet_schema, rowset_id,
rowset_meta));
+ RowsetId src_rs_id;
+ if (stale_rowset.rowset_id() > 0) {
+ src_rs_id.init(stale_rowset.rowset_id());
+ } else {
+ src_rs_id.init(stale_rowset.rowset_id_v2());
+ }
+ rowset_id_mapping[src_rs_id] = rowset_id;
} else {
// remote rowset
*rowset_meta = stale_rowset;
@@ -193,6 +208,21 @@ Status SnapshotManager::convert_rowset_ids(const std::string& clone_dir, int64_t
rowset_meta->set_tablet_schema_hash(schema_hash);
}
+ if (!rowset_id_mapping.empty() && cloned_tablet_meta_pb.has_delete_bitmap()) {
+ auto& cloned_del_bitmap_pb = cloned_tablet_meta_pb.delete_bitmap();
+ DeleteBitmapPB* new_del_bitmap_pb = new_tablet_meta_pb.mutable_delete_bitmap();
+ int rst_ids_size = cloned_del_bitmap_pb.rowset_ids_size();
+ for (size_t i = 0; i < rst_ids_size; ++i) {
+ RowsetId rst_id;
+ rst_id.init(cloned_del_bitmap_pb.rowset_ids(i));
+ // It should not happen, if we can't convert some rowid in delete bitmap, the
+ // data might be inconsist.
+ CHECK(rowset_id_mapping.find(rst_id) != rowset_id_mapping.end())
+ << "can't find rowset_id " << rst_id.to_string() << " in convert_rowset_ids";
+ new_del_bitmap_pb->set_rowset_ids(i, rowset_id_mapping[rst_id].to_string());
+ }
+ }
+
res = TabletMeta::save(cloned_meta_file, new_tablet_meta_pb);
if (!res.ok()) {
LOG(WARNING) << "fail to save converted tablet meta to dir='" << clone_dir;
@@ -356,6 +386,7 @@ Status SnapshotManager::_create_snapshot_files(const TabletSharedPtr& ref_tablet
break;
}
std::vector<RowsetSharedPtr> consistent_rowsets;
+ DeleteBitmap delete_bitmap_snapshot(new_tablet_meta->tablet_id());
/// If set missing_version, try to get all missing version.
/// If some of them not exist in tablet, we will fall back to
@@ -381,14 +412,21 @@ Status SnapshotManager::_create_snapshot_files(const TabletSharedPtr& ref_tablet
break;
}
}
+
+ // Take a full snapshot, will revise according to missed rowset later.
+ if (ref_tablet->keys_type() == UNIQUE_KEYS &&
+ ref_tablet->enable_unique_key_merge_on_write()) {
+ delete_bitmap_snapshot = ref_tablet->tablet_meta()->delete_bitmap().snapshot(
+ ref_tablet->max_version().second);
+ }
}
+ int64_t version = -1;
if (!res.ok() || !request.__isset.missing_version) {
/// not all missing versions are found, fall back to full snapshot.
res = Status::OK(); // reset res
consistent_rowsets.clear(); // reset vector
- std::shared_lock rdlock(ref_tablet->get_header_lock());
// get latest version
const RowsetSharedPtr last_version = ref_tablet->rowset_with_max_version();
if (last_version == nullptr) {
@@ -398,7 +436,7 @@ Status SnapshotManager::_create_snapshot_files(const TabletSharedPtr& ref_tablet
break;
}
// get snapshot version, use request.version if specified
- int32_t version = last_version->end_version();
+ version = last_version->end_version();
if (request.__isset.version) {
if (last_version->end_version() < request.version) {
LOG(WARNING) << "invalid make snapshot request. "
@@ -420,12 +458,21 @@ Status SnapshotManager::_create_snapshot_files(const TabletSharedPtr& ref_tablet
}
*allow_incremental_clone = false;
} else {
+ version = ref_tablet->max_version().second;
*allow_incremental_clone = true;
}
// copy the tablet meta to new_tablet_meta inside header lock
CHECK(res.ok()) << res;
ref_tablet->generate_tablet_meta_copy_unlocked(new_tablet_meta);
+ // The delete bitmap update operation and the add_inc_rowset operation is not atomic,
+ // so delete bitmap may contains some data generated by invisible rowset, we should
+ // get rid of these useless bitmaps when doing snapshot.
+ if (ref_tablet->keys_type() == UNIQUE_KEYS &&
+ ref_tablet->enable_unique_key_merge_on_write()) {
+ delete_bitmap_snapshot =
+ ref_tablet->tablet_meta()->delete_bitmap().snapshot(version);
+ }
}
{
std::unique_lock wlock(ref_tablet->get_header_lock());
@@ -459,6 +506,10 @@ Status SnapshotManager::_create_snapshot_files(const TabletSharedPtr& ref_tablet
// Clear it for safety reason.
// Whether it is incremental or full snapshot, rowset information is stored in rs_meta.
new_tablet_meta->revise_rs_metas(std::move(rs_metas));
+ if (ref_tablet->keys_type() == UNIQUE_KEYS &&
+ ref_tablet->enable_unique_key_merge_on_write()) {
+ new_tablet_meta->revise_delete_bitmap_unlocked(delete_bitmap_snapshot);
+ }
if (snapshot_version == g_Types_constants.TSNAPSHOT_REQ_VERSION2) {
res = new_tablet_meta->save(header_path);
diff --git a/be/src/olap/tablet_meta.cpp b/be/src/olap/tablet_meta.cpp
index bc88710854..2b6ccc40b5 100644
--- a/be/src/olap/tablet_meta.cpp
+++ b/be/src/olap/tablet_meta.cpp
@@ -644,6 +644,27 @@ void TabletMeta::revise_rs_metas(std::vector<RowsetMetaSharedPtr>&& rs_metas) {
_stale_rs_metas.clear();
}
+// This method should call after revise_rs_metas, since new rs_metas might be a subset
+// of original tablet, we should revise the delete_bitmap according to current rowset.
+//
+// Delete bitmap is protected by Tablet::_meta_lock, we don't need to acquire the
+// TabletMeta's _meta_lock
+void TabletMeta::revise_delete_bitmap_unlocked(const DeleteBitmap& delete_bitmap) {
+ _delete_bitmap = std::make_unique<DeleteBitmap>(tablet_id());
+ for (auto rs : _rs_metas) {
+ DeleteBitmap rs_bm(tablet_id());
+ delete_bitmap.subset({rs->rowset_id(), 0, 0}, {rs->rowset_id(), UINT32_MAX, INT64_MAX},
+ &rs_bm);
+ _delete_bitmap->merge(rs_bm);
+ }
+ for (auto rs : _stale_rs_metas) {
+ DeleteBitmap rs_bm(tablet_id());
+ delete_bitmap.subset({rs->rowset_id(), 0, 0}, {rs->rowset_id(), UINT32_MAX, INT64_MAX},
+ &rs_bm);
+ _delete_bitmap->merge(rs_bm);
+ }
+}
+
void TabletMeta::delete_stale_rs_meta_by_version(const Version& version) {
auto it = _stale_rs_metas.begin();
while (it != _stale_rs_metas.end()) {
@@ -792,6 +813,20 @@ DeleteBitmap DeleteBitmap::snapshot() const {
return DeleteBitmap(*this);
}
+DeleteBitmap DeleteBitmap::snapshot(Version version) const {
+ // Take snapshot first, then remove keys greater than given version.
+ DeleteBitmap snapshot = this->snapshot();
+ auto it = snapshot.delete_bitmap.begin();
+ while (it != snapshot.delete_bitmap.end()) {
+ if (std::get<2>(it->first) > version) {
+ it = snapshot.delete_bitmap.erase(it);
+ } else {
+ it++;
+ }
+ }
+ return snapshot;
+}
+
void DeleteBitmap::add(const BitmapKey& bmk, uint32_t row_id) {
std::lock_guard l(lock);
delete_bitmap[bmk].add(row_id);
diff --git a/be/src/olap/tablet_meta.h b/be/src/olap/tablet_meta.h
index 34a42ed510..ec3ad1f60f 100644
--- a/be/src/olap/tablet_meta.h
+++ b/be/src/olap/tablet_meta.h
@@ -159,6 +159,7 @@ public:
const std::vector<RowsetMetaSharedPtr>& to_delete,
bool same_version = false);
void revise_rs_metas(std::vector<RowsetMetaSharedPtr>&& rs_metas);
+ void revise_delete_bitmap_unlocked(const DeleteBitmap& delete_bitmap);
const std::vector<RowsetMetaSharedPtr>& all_stale_rs_metas() const;
RowsetMetaSharedPtr acquire_rs_meta_by_version(const Version& version) const;
@@ -272,7 +273,7 @@ class DeleteBitmap {
public:
mutable std::shared_mutex lock;
using SegmentId = uint32_t;
- using Version = uint32_t;
+ using Version = uint64_t;
using BitmapKey = std::tuple<RowsetId, SegmentId, Version>;
std::map<BitmapKey, roaring::Roaring> delete_bitmap; // Ordered map
@@ -299,6 +300,12 @@ public:
*/
DeleteBitmap snapshot() const;
+ /**
+ * Makes a snapshot of delete bimap on given version, read lock will be
+ * acquired temporary in this process
+ */
+ DeleteBitmap snapshot(Version version) const;
+
/**
* Marks the specific row deleted
*/
diff --git a/be/src/olap/task/engine_storage_migration_task.cpp b/be/src/olap/task/engine_storage_migration_task.cpp
index ac31948439..e3737a62e4 100644
--- a/be/src/olap/task/engine_storage_migration_task.cpp
+++ b/be/src/olap/task/engine_storage_migration_task.cpp
@@ -116,7 +116,7 @@ Status EngineStorageMigrationTask::_check_running_txns_until_timeout(
Status EngineStorageMigrationTask::_gen_and_write_header_to_hdr_file(
uint64_t shard, const std::string& full_path,
- const std::vector<RowsetSharedPtr>& consistent_rowsets) {
+ const std::vector<RowsetSharedPtr>& consistent_rowsets, int64_t end_version) {
// need hold migration lock and push lock outside
Status res = Status::OK();
int64_t tablet_id = _tablet->tablet_id();
@@ -124,7 +124,7 @@ Status EngineStorageMigrationTask::_gen_and_write_header_to_hdr_file(
TabletMetaSharedPtr new_tablet_meta(new (std::nothrow) TabletMeta());
{
std::shared_lock rdlock(_tablet->get_header_lock());
- _generate_new_header(shard, consistent_rowsets, new_tablet_meta);
+ _generate_new_header(shard, consistent_rowsets, new_tablet_meta, end_version);
}
std::string new_meta_file = full_path + "/" + std::to_string(tablet_id) + ".hdr";
res = new_tablet_meta->save(new_meta_file);
@@ -299,7 +299,7 @@ Status EngineStorageMigrationTask::_migrate() {
}
// generate new tablet meta and write to hdr file
- res = _gen_and_write_header_to_hdr_file(shard, full_path, consistent_rowsets);
+ res = _gen_and_write_header_to_hdr_file(shard, full_path, consistent_rowsets, end_version);
if (!res.ok()) {
break;
}
@@ -321,7 +321,7 @@ Status EngineStorageMigrationTask::_migrate() {
// TODO(ygl): lost some information here, such as cumulative layer point
void EngineStorageMigrationTask::_generate_new_header(
uint64_t new_shard, const std::vector<RowsetSharedPtr>& consistent_rowsets,
- TabletMetaSharedPtr new_tablet_meta) {
+ TabletMetaSharedPtr new_tablet_meta, int64_t end_version) {
_tablet->generate_tablet_meta_copy_unlocked(new_tablet_meta);
std::vector<RowsetMetaSharedPtr> rs_metas;
@@ -329,6 +329,10 @@ void EngineStorageMigrationTask::_generate_new_header(
rs_metas.push_back(rs->rowset_meta());
}
new_tablet_meta->revise_rs_metas(std::move(rs_metas));
+ if (_tablet->keys_type() == UNIQUE_KEYS && _tablet->enable_unique_key_merge_on_write()) {
+ DeleteBitmap bm = _tablet->tablet_meta()->delete_bitmap().snapshot(end_version);
+ new_tablet_meta->revise_delete_bitmap_unlocked(bm);
+ }
new_tablet_meta->set_shard_id(new_shard);
// should not save new meta here, because new tablet may failed
// should not remove the old meta here, because the new header maybe not valid
diff --git a/be/src/olap/task/engine_storage_migration_task.h b/be/src/olap/task/engine_storage_migration_task.h
index b529b78c2a..c2c9b7a774 100644
--- a/be/src/olap/task/engine_storage_migration_task.h
+++ b/be/src/olap/task/engine_storage_migration_task.h
@@ -49,14 +49,14 @@ private:
bool _is_rowsets_size_less_than_threshold(
const std::vector<RowsetSharedPtr>& consistent_rowsets);
- Status _gen_and_write_header_to_hdr_file(
- uint64_t shard, const std::string& full_path,
- const std::vector<RowsetSharedPtr>& consistent_rowsets);
+ Status _gen_and_write_header_to_hdr_file(uint64_t shard, const std::string& full_path,
+ const std::vector<RowsetSharedPtr>& consistent_rowsets,
+ int64_t end_version);
Status _reload_tablet(const std::string& full_path);
void _generate_new_header(uint64_t new_shard,
const std::vector<RowsetSharedPtr>& consistent_rowsets,
- TabletMetaSharedPtr new_tablet_meta);
+ TabletMetaSharedPtr new_tablet_meta, int64_t end_version);
// TODO: hkp
// rewrite this function
diff --git a/be/test/olap/tablet_meta_test.cpp b/be/test/olap/tablet_meta_test.cpp
index f8b83329ec..fffe32f382 100644
--- a/be/test/olap/tablet_meta_test.cpp
+++ b/be/test/olap/tablet_meta_test.cpp
@@ -21,6 +21,8 @@
#include <string>
+#include "testutil/mock_rowset.h"
+
namespace doris {
TEST(TabletMetaTest, SaveAndParse) {
@@ -41,6 +43,54 @@ TEST(TabletMetaTest, SaveAndParse) {
EXPECT_EQ(old_tablet_meta, new_tablet_meta);
}
+TEST(TabletMetaTest, TestReviseMeta) {
+ TabletMeta tablet_meta;
+ std::vector<RowsetSharedPtr> src_rowsets;
+ std::vector<RowsetId> rsids;
+ // src rowsets
+ for (int i = 0; i < 4; i++) {
+ RowsetMetaPB rs_meta_pb;
+ RowsetId rowset_id;
+ rowset_id.init(i);
+ rsids.push_back(rowset_id);
+ rs_meta_pb.set_rowset_id_v2(rowset_id.to_string());
+ rs_meta_pb.set_num_segments(2);
+ rs_meta_pb.set_start_version(i);
+ rs_meta_pb.set_end_version(i);
+ RowsetMetaSharedPtr meta_ptr = std::make_shared<RowsetMeta>();
+ meta_ptr->init_from_pb(rs_meta_pb);
+ RowsetSharedPtr rowset_ptr;
+ TabletSchema schema;
+ MockRowset::create_rowset(&schema, "", meta_ptr, &rowset_ptr, false);
+ src_rowsets.push_back(rowset_ptr);
+ tablet_meta.add_rs_meta(rowset_ptr->rowset_meta());
+ }
+ ASSERT_EQ(4, tablet_meta.all_rs_metas().size());
+
+ tablet_meta.delete_bitmap().add({rsids[0], 1, 1}, 1);
+ tablet_meta.delete_bitmap().add({rsids[1], 0, 2}, 2);
+ tablet_meta.delete_bitmap().add({rsids[2], 1, 1}, 1);
+ tablet_meta.delete_bitmap().add({rsids[3], 0, 2}, 3);
+ tablet_meta.delete_bitmap().add({rsids[3], 0, 4}, 4);
+ ASSERT_EQ(5, tablet_meta.delete_bitmap().delete_bitmap.size());
+
+ std::vector<RowsetMetaSharedPtr> new_rowsets;
+ new_rowsets.push_back(src_rowsets[2]->rowset_meta());
+ new_rowsets.push_back(src_rowsets[3]->rowset_meta());
+ tablet_meta.revise_rs_metas(std::move(new_rowsets));
+ // Take a snapshot with max_version=3.
+ DeleteBitmap snap = tablet_meta.delete_bitmap().snapshot(3);
+ tablet_meta.revise_delete_bitmap_unlocked(snap);
+ ASSERT_EQ(2, tablet_meta.all_rs_metas().size());
+ ASSERT_EQ(2, tablet_meta.delete_bitmap().delete_bitmap.size());
+ for (auto entry : tablet_meta.delete_bitmap().delete_bitmap) {
+ RowsetId rsid = std::get<0>(entry.first);
+ ASSERT_TRUE(rsid == rsids[2] || rsid == rsids[3]);
+ int64_t version = std::get<2>(entry.first);
+ ASSERT_TRUE(version <= 3); // should not contain versions greater than 3.
+ }
+}
+
TEST(TabletMetaTest, TestDeleteBitmap) {
std::unique_ptr<DeleteBitmap> dbmp(new DeleteBitmap(10086));
auto gen1 = [&dbmp](int64_t max_rst_id, uint32_t max_seg_id, uint32_t max_row) {
@@ -71,6 +121,26 @@ TEST(TabletMetaTest, TestDeleteBitmap) {
ASSERT_EQ(dbmp->delete_bitmap.size(), 10 * 20 + 2);
+ {
+ auto snap = dbmp->snapshot(1);
+ auto it = snap.delete_bitmap.begin();
+ while (it != snap.delete_bitmap.end()) {
+ ASSERT_TRUE(std::get<2>(it->first) <= 1);
+ it++;
+ }
+ ASSERT_EQ(snap.delete_bitmap.size(), 10 * 20 + 1);
+ }
+
+ {
+ auto snap = dbmp->snapshot(0);
+ auto it = snap.delete_bitmap.begin();
+ while (it != snap.delete_bitmap.end()) {
+ ASSERT_TRUE(std::get<2>(it->first) <= 0);
+ it++;
+ }
+ ASSERT_EQ(snap.delete_bitmap.size(), 10 * 20);
+ }
+
{ // Bitmap of certain verisons only get their own row ids
auto bm = dbmp->get({RowsetId {2, 0, 1, 1}, 1, 2});
ASSERT_EQ(bm->cardinality(), 1);
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@doris.apache.org
For additional commands, e-mail: commits-help@doris.apache.org