You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@doris.apache.org by mo...@apache.org on 2022/07/12 08:34:49 UTC
[doris] branch master updated: [feature-wip](unique-key-merge-on-write) Add delete bitmap for DSIP-018 (#10548)
This is an automated email from the ASF dual-hosted git repository.
morningman pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new 2084d8bdf3 [feature-wip](unique-key-merge-on-write) Add delete bitmap for DSIP-018 (#10548)
2084d8bdf3 is described below
commit 2084d8bdf36e9ec6bafd060c3d615b456937db24
Author: Compilation Success <10...@users.noreply.github.com>
AuthorDate: Tue Jul 12 16:34:42 2022 +0800
[feature-wip](unique-key-merge-on-write) Add delete bitmap for DSIP-018 (#10548)
Add delete bitmap for
DSIP-018: Support Merge-On-Write implementation for UNIQUE KEY data model
---
be/src/olap/tablet_meta.cpp | 134 ++++++++++++++++++++++-
be/src/olap/tablet_meta.h | 113 +++++++++++++++++++
be/test/olap/tablet_meta_test.cpp | 79 +++++++++++++
be/test/olap/test_data/header_without_inc_rs.txt | 3 +-
gensrc/proto/olap_file.proto | 9 ++
5 files changed, 335 insertions(+), 3 deletions(-)
diff --git a/be/src/olap/tablet_meta.cpp b/be/src/olap/tablet_meta.cpp
index bd3161fd30..3a0490c370 100644
--- a/be/src/olap/tablet_meta.cpp
+++ b/be/src/olap/tablet_meta.cpp
@@ -48,7 +48,8 @@ Status TabletMeta::create(const TCreateTabletReq& request, const TabletUid& tabl
return Status::OK();
}
-TabletMeta::TabletMeta() : _tablet_uid(0, 0), _schema(new TabletSchema) {}
+TabletMeta::TabletMeta()
+ : _tablet_uid(0, 0), _schema(new TabletSchema), _delete_bitmap(new DeleteBitmap()) {}
TabletMeta::TabletMeta(int64_t table_id, int64_t partition_id, int64_t tablet_id,
int64_t replica_id, int32_t schema_hash, uint64_t shard_id,
@@ -57,7 +58,7 @@ TabletMeta::TabletMeta(int64_t table_id, int64_t partition_id, int64_t tablet_id
TabletUid tablet_uid, TTabletType::type tabletType,
TStorageMedium::type t_storage_medium, const std::string& storage_name,
TCompressionType::type compression_type, const std::string& storage_policy)
- : _tablet_uid(0, 0), _schema(new TabletSchema) {
+ : _tablet_uid(0, 0), _schema(new TabletSchema), _delete_bitmap(new DeleteBitmap()) {
TabletMetaPB tablet_meta_pb;
tablet_meta_pb.set_table_id(table_id);
tablet_meta_pb.set_partition_id(partition_id);
@@ -456,6 +457,23 @@ void TabletMeta::init_from_pb(const TabletMetaPB& tablet_meta_pb) {
_remote_storage_name = tablet_meta_pb.remote_storage_name();
_storage_medium = tablet_meta_pb.storage_medium();
_cooldown_resource = tablet_meta_pb.storage_policy();
+
+ if (tablet_meta_pb.has_delete_bitmap()) {
+ int rst_ids_size = tablet_meta_pb.delete_bitmap().rowset_ids_size();
+ int seg_ids_size = tablet_meta_pb.delete_bitmap().segment_ids_size();
+ int versions_size = tablet_meta_pb.delete_bitmap().versions_size();
+ int seg_maps_size = tablet_meta_pb.delete_bitmap().segment_delete_bitmaps_size();
+ CHECK(rst_ids_size == seg_ids_size && seg_ids_size == seg_maps_size &&
+ seg_maps_size == versions_size);
+ for (size_t i = 0; i < rst_ids_size; ++i) {
+ RowsetId rst_id;
+ rst_id.init(tablet_meta_pb.delete_bitmap().rowset_ids(i));
+ auto seg_id = tablet_meta_pb.delete_bitmap().segment_ids(i);
+ uint32_t ver = tablet_meta_pb.delete_bitmap().versions(i);
+ auto bitmap = tablet_meta_pb.delete_bitmap().segment_delete_bitmaps(i).data();
+ delete_bitmap().delete_bitmap[{rst_id, seg_id, ver}] = roaring::Roaring::read(bitmap);
+ }
+ }
}
void TabletMeta::to_meta_pb(TabletMetaPB* tablet_meta_pb) {
@@ -505,6 +523,20 @@ void TabletMeta::to_meta_pb(TabletMetaPB* tablet_meta_pb) {
tablet_meta_pb->set_remote_storage_name(_remote_storage_name);
tablet_meta_pb->set_storage_medium(_storage_medium);
tablet_meta_pb->set_storage_policy(_cooldown_resource);
+
+ {
+ std::shared_lock l(delete_bitmap().lock);
+ DeleteBitmapPB* delete_bitmap_pb = tablet_meta_pb->mutable_delete_bitmap();
+ for (auto& [id, bitmap] : delete_bitmap().delete_bitmap) {
+ auto& [rowset_id, segment_id, ver] = id;
+ delete_bitmap_pb->add_rowset_ids(rowset_id.to_string());
+ delete_bitmap_pb->add_segment_ids(segment_id);
+ delete_bitmap_pb->add_versions(ver);
+ std::string bitmap_data(bitmap.getSizeInBytes(), '\0');
+ bitmap.write(bitmap_data.data());
+ *(delete_bitmap_pb->add_segment_delete_bitmaps()) = std::move(bitmap_data);
+ }
+ }
}
uint32_t TabletMeta::mem_size() const {
@@ -729,4 +761,102 @@ bool operator!=(const TabletMeta& a, const TabletMeta& b) {
return !(a == b);
}
+DeleteBitmap::DeleteBitmap() {}
+
+DeleteBitmap::DeleteBitmap(const DeleteBitmap& o) {
+ delete_bitmap = o.delete_bitmap; // just copy data
+}
+
+DeleteBitmap& DeleteBitmap::operator=(const DeleteBitmap& o) {
+ delete_bitmap = o.delete_bitmap; // just copy data
+ return *this;
+}
+
+DeleteBitmap::DeleteBitmap(DeleteBitmap&& o) {
+ delete_bitmap = std::move(o.delete_bitmap);
+}
+
+DeleteBitmap& DeleteBitmap::operator=(DeleteBitmap&& o) {
+ delete_bitmap = std::move(o.delete_bitmap);
+ return *this;
+}
+
+DeleteBitmap DeleteBitmap::snapshot() const {
+ std::shared_lock l(lock);
+ return DeleteBitmap(*this);
+}
+
+void DeleteBitmap::add(const BitmapKey& bmk, uint32_t row_id) {
+ std::lock_guard l(lock);
+ delete_bitmap[bmk].add(row_id);
+}
+
+int DeleteBitmap::remove(const BitmapKey& bmk, uint32_t row_id) {
+ std::lock_guard l(lock);
+ auto it = delete_bitmap.find(bmk);
+ if (it == delete_bitmap.end()) return -1;
+ it->second.remove(row_id);
+ return 0;
+}
+
+void DeleteBitmap::remove(const BitmapKey& start, const BitmapKey& end) {
+ std::lock_guard l(lock);
+ for (auto it = delete_bitmap.lower_bound(start); it != delete_bitmap.end();) {
+ auto& [k, _] = *it;
+ if (k >= end) {
+ break;
+ }
+ it = delete_bitmap.erase(it);
+ }
+}
+
+bool DeleteBitmap::contains(const BitmapKey& bmk, uint32_t row_id) const {
+ std::shared_lock l(lock);
+ auto it = delete_bitmap.find(bmk);
+ return it != delete_bitmap.end() && it->second.contains(row_id);
+}
+
+int DeleteBitmap::set(const BitmapKey& bmk, const roaring::Roaring& segment_delete_bitmap) {
+ std::lock_guard l(lock);
+ auto [_, inserted] = delete_bitmap.insert_or_assign(bmk, segment_delete_bitmap);
+ return inserted;
+}
+
+int DeleteBitmap::get(const BitmapKey& bmk, roaring::Roaring* segment_delete_bitmap) const {
+ std::shared_lock l(lock);
+ auto it = delete_bitmap.find(bmk);
+ if (it == delete_bitmap.end()) return -1;
+ *segment_delete_bitmap = it->second; // copy
+ return 0;
+}
+
+const roaring::Roaring* DeleteBitmap::get(const BitmapKey& bmk) const {
+ std::shared_lock l(lock);
+ auto it = delete_bitmap.find(bmk);
+ if (it == delete_bitmap.end()) return nullptr;
+ return &(it->second); // get address
+}
+
+void DeleteBitmap::subset(const BitmapKey& start, const BitmapKey& end,
+ DeleteBitmap* subset_rowset_map) const {
+ roaring::Roaring roaring;
+ DCHECK(start < end);
+ std::shared_lock l(lock);
+ for (auto it = delete_bitmap.upper_bound(start); it != delete_bitmap.end(); ++it) {
+ auto& [k, bm] = *it;
+ if (k >= end) {
+ break;
+ }
+ subset_rowset_map->set(k, bm);
+ }
+}
+
+void DeleteBitmap::merge(const DeleteBitmap& other) {
+ std::lock_guard l(lock);
+ for (auto& i : other.delete_bitmap) {
+ auto [j, succ] = this->delete_bitmap.insert(i);
+ if (!succ) j->second |= i.second;
+ }
+}
+
} // namespace doris
diff --git a/be/src/olap/tablet_meta.h b/be/src/olap/tablet_meta.h
index e0010d8f41..6e558d996a 100644
--- a/be/src/olap/tablet_meta.h
+++ b/be/src/olap/tablet_meta.h
@@ -67,6 +67,7 @@ class RowsetMeta;
class Rowset;
class DataDir;
class TabletMeta;
+class DeleteBitmap;
using TabletMetaSharedPtr = std::shared_ptr<TabletMeta>;
// Class encapsulates meta of tablet.
@@ -199,6 +200,8 @@ public:
_cooldown_resource = std::move(resource);
}
+ DeleteBitmap& delete_bitmap() { return *_delete_bitmap; }
+
private:
Status _save_meta(DataDir* data_dir);
void _init_column_from_tcolumn(uint32_t unique_id, const TColumn& tcolumn, ColumnPB* column);
@@ -239,9 +242,119 @@ private:
// FIXME(cyx): Currently `cooldown_resource` is equivalent to `storage_policy`.
io::ResourceId _cooldown_resource;
+ std::unique_ptr<DeleteBitmap> _delete_bitmap;
+
mutable std::shared_mutex _meta_lock;
};
+/**
+ * Wraps multiple bitmaps for recording rows (row id) that are deleted or
+ * overwritten.
+ *
+ * RowsetId and SegmentId are for locating segment, Version here is a single
+ * uint32_t means that at which "version" of the load causes the delete or
+ * overwrite.
+ *
+ * The start and end version of a load is the same, it's ok and straightforward
+ * to use a single uint32_t.
+ *
+ * e.g.
+ * There is a key "key1" in rowset id 1, version [1,1], segment id 1, row id 1.
+ * A new load also contains "key1", the rowset id 2, version [2,2], segment id 1
+ * the delete bitmap will be `{1,1,2} -> 1`, which means the "row id 1" in
+ * "rowset id 1, segment id 1" is deleted/overitten by some loads at "version 2"
+ */
+class DeleteBitmap {
+public:
+ mutable std::shared_mutex lock;
+ using SegmentId = uint32_t;
+ using Version = uint32_t;
+ using BitmapKey = std::tuple<RowsetId, SegmentId, Version>;
+ std::map<BitmapKey, roaring::Roaring> delete_bitmap; // Ordered map
+
+ DeleteBitmap();
+
+ /**
+ * Copy c-tor for making delete bitmap snapshot on read path
+ */
+ DeleteBitmap(const DeleteBitmap& r);
+ DeleteBitmap& operator=(const DeleteBitmap& r);
+ /**
+ * Move c-tor for making delete bitmap snapshot on read path
+ */
+ DeleteBitmap(DeleteBitmap&& r);
+ DeleteBitmap& operator=(DeleteBitmap&& r);
+
+ /**
+ * Makes a snapshot of delete bimap, read lock will be acquired in this
+ * process
+ */
+ DeleteBitmap snapshot() const;
+
+ /**
+ * Marks the specific row deleted
+ */
+ void add(const BitmapKey& bmk, uint32_t row_id);
+
+ /**
+ * Clears the deletetion mark specific row
+ *
+ * @return non-zero if the associated delete bimap does not exist
+ */
+ int remove(const BitmapKey& bmk, uint32_t row_id);
+
+ /**
+ * Clears bitmaps in range [lower_key, upper_key)
+ */
+ void remove(const BitmapKey& lower_key, const BitmapKey& upper_key);
+
+ /**
+ * Checks if the given row is marked deleted
+ *
+ * @return true if marked deleted
+ */
+ bool contains(const BitmapKey& bmk, uint32_t row_id) const;
+
+ /**
+ * Sets the bitmap of specific segment, it's may be insertion or replacement
+ *
+ * @return 0 if the insertion took place, 1 if the assignment took place
+ */
+ int set(const BitmapKey& bmk, const roaring::Roaring& segment_delete_bitmap);
+
+ /**
+ * Gets a copy of specific delete bmk
+ *
+ * @param segment_delete_bitmap output param
+ * @return non-zero if the associated delete bimap does not exist
+ */
+ int get(const BitmapKey& bmk, roaring::Roaring* segment_delete_bitmap) const;
+
+ /**
+ * Gets reference to a specific delete map, DO NOT use this function on a
+ * mutable DeleteBitmap object
+ * @return nullptr if the given bitmap does not exist
+ */
+ const roaring::Roaring* get(const BitmapKey& bmk) const;
+
+ /**
+ * Gets subset of delete_bitmap with given range [start, end)
+ *
+ * @parma start start
+ * @parma end end
+ * @parma subset_delete_map output param
+ */
+ void subset(const BitmapKey& start, const BitmapKey& end,
+ DeleteBitmap* subset_delete_map) const;
+
+ /**
+ * Merges the given delete bitmap into *this
+ *
+ * @param other
+ */
+ void merge(const DeleteBitmap& other);
+};
+
static const std::string SEQUENCE_COL = "__DORIS_SEQUENCE_COL__";
inline TabletUid TabletMeta::tablet_uid() const {
diff --git a/be/test/olap/tablet_meta_test.cpp b/be/test/olap/tablet_meta_test.cpp
index 67f9f95fa4..4f2b7f7afd 100644
--- a/be/test/olap/tablet_meta_test.cpp
+++ b/be/test/olap/tablet_meta_test.cpp
@@ -42,4 +42,83 @@ TEST(TabletMetaTest, SaveAndParse) {
EXPECT_EQ(old_tablet_meta, new_tablet_meta);
}
+TEST(TabletMetaTest, TestDeleteBitmap) {
+ std::unique_ptr<DeleteBitmap> dbmp(new DeleteBitmap());
+ auto gen1 = [&dbmp](int64_t max_rst_id, uint32_t max_seg_id, uint32_t max_row) {
+ for (int64_t i = 0; i < max_rst_id; ++i) {
+ for (uint32_t j = 0; j < max_seg_id; ++j) {
+ for (uint32_t k = 0; k < max_row; ++k) {
+ dbmp->add({RowsetId {2, 0, 1, i}, j, 0}, k);
+ }
+ }
+ }
+ };
+ gen1(10, 20, 1000);
+ dbmp->add({RowsetId {2, 0, 1, 2}, 2, 0}, 2); // redundant
+ {
+ roaring::Roaring d;
+ dbmp->get({RowsetId {2, 0, 1, 2}, 0, 0}, &d);
+ EXPECT_EQ(d.cardinality(), 1000);
+ d -= *dbmp->get({RowsetId {2, 0, 1, 2}, 0, 0});
+ EXPECT_EQ(d.cardinality(), 0);
+ }
+
+ // Add version 1 and 2
+ dbmp->add({RowsetId {2, 0, 1, 1}, 1, 1}, 1100);
+ dbmp->add({RowsetId {2, 0, 1, 1}, 1, 1}, 1101);
+ dbmp->add({RowsetId {2, 0, 1, 1}, 1, 1}, 1102);
+ dbmp->add({RowsetId {2, 0, 1, 1}, 1, 1}, 1103);
+ dbmp->add({RowsetId {2, 0, 1, 1}, 1, 2}, 1104);
+
+ ASSERT_EQ(dbmp->delete_bitmap.size(), 10 * 20 + 2);
+
+ { // Bitmap of certain verisons only get their own row ids
+ auto bm = dbmp->get({RowsetId {2, 0, 1, 1}, 1, 2});
+ ASSERT_EQ(bm->cardinality(), 1);
+ ASSERT_FALSE(bm->contains(999));
+ ASSERT_FALSE(bm->contains(1100));
+ ASSERT_TRUE(bm->contains(1104));
+ }
+
+ {
+ // test remove
+ // Nothing removed
+ dbmp->remove({RowsetId {2, 0, 1, 1}, 0, 0}, {RowsetId {2, 0, 1, 1}, 0, 0});
+ ASSERT_EQ(dbmp->delete_bitmap.size(), 10 * 20 + 2);
+ dbmp->remove({RowsetId {2, 0, 1, 100}, 0, 0}, {RowsetId {2, 0, 1, 100}, 50000, 0});
+ ASSERT_EQ(dbmp->delete_bitmap.size(), 10 * 20 + 2);
+
+ // Remove all seg of rowset {2,0,1,0}
+ dbmp->remove({RowsetId {2, 0, 1, 0}, 0, 0}, {RowsetId {2, 0, 1, 0}, 5000, 0});
+ ASSERT_EQ(dbmp->delete_bitmap.size(), 9 * 20 + 2);
+ // Remove all rowset {2,0,1,7} to {2,0,1,9}
+ dbmp->remove({RowsetId {2, 0, 1, 8}, 0, 0}, {RowsetId {2, 0, 1, 9}, 5000, 0});
+ ASSERT_EQ(dbmp->delete_bitmap.size(), 7 * 20 + 2);
+ }
+
+ {
+ DeleteBitmap db_upper;
+ dbmp->subset({RowsetId {2, 0, 1, 1}, 1, 0}, {RowsetId {2, 0, 1, 1}, 1000000, 0}, &db_upper);
+ roaring::Roaring d;
+ ASSERT_EQ(db_upper.get({RowsetId {2, 0, 1, 1}, 1, 1}, &d), 0);
+ ASSERT_EQ(d.cardinality(), 4);
+ ASSERT_EQ(db_upper.get({RowsetId {2, 0, 1, 1}, 1, 2}, &d), 0);
+ ASSERT_EQ(d.cardinality(), 1);
+ ASSERT_EQ(db_upper.delete_bitmap.size(), 20);
+ }
+
+ {
+ auto old_size = dbmp->delete_bitmap.size();
+ // test merge
+ DeleteBitmap other;
+ other.add({RowsetId {2, 0, 1, 1}, 1, 1}, 1100);
+ dbmp->merge(other);
+ ASSERT_EQ(dbmp->delete_bitmap.size(), old_size);
+ other.add({RowsetId {2, 0, 1, 1}, 1001, 1}, 1100);
+ other.add({RowsetId {2, 0, 1, 1}, 1002, 1}, 1100);
+ dbmp->merge(other);
+ ASSERT_EQ(dbmp->delete_bitmap.size(), old_size + 2);
+ }
+}
+
} // namespace doris
diff --git a/be/test/olap/test_data/header_without_inc_rs.txt b/be/test/olap/test_data/header_without_inc_rs.txt
index bde8d7f4d5..61fc42e419 100644
--- a/be/test/olap/test_data/header_without_inc_rs.txt
+++ b/be/test/olap/test_data/header_without_inc_rs.txt
@@ -146,5 +146,6 @@
"storage_medium": "HDD",
"remote_storage_name": "",
"replica_id": 0,
- "storage_policy": ""
+ "storage_policy": "",
+ "delete_bitmap": {}
}
diff --git a/gensrc/proto/olap_file.proto b/gensrc/proto/olap_file.proto
index 4385d5803d..a9ba857c7e 100644
--- a/gensrc/proto/olap_file.proto
+++ b/gensrc/proto/olap_file.proto
@@ -278,6 +278,7 @@ message TabletMetaPB {
optional string remote_storage_name = 20;
optional int64 replica_id = 21 [default = 0];
optional string storage_policy = 22;
+ optional DeleteBitmapPB delete_bitmap = 23;
}
message OLAPIndexHeaderMessage {
@@ -298,3 +299,11 @@ message OLAPDataHeaderMessage {
message OLAPRawDeltaHeaderMessage {
required int32 schema_hash = 2;
}
+
+message DeleteBitmapPB {
+ repeated string rowset_ids = 1;
+ repeated uint32 segment_ids = 2;
+ repeated int64 versions = 3;
+ // Serialized roaring bitmaps indexed with {rowset_id, segment_id, version}
+ repeated bytes segment_delete_bitmaps = 4;
+}
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@doris.apache.org
For additional commands, e-mail: commits-help@doris.apache.org