You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@doris.apache.org by GitBox <gi...@apache.org> on 2022/07/01 10:13:21 UTC

[GitHub] [doris] kpfly commented on a diff in pull request #10548: [WIP](unique-key-merge-on-write) Add delete bitmap for DSIP-018

kpfly commented on code in PR #10548:
URL: https://github.com/apache/doris/pull/10548#discussion_r911822999


##########
gensrc/proto/olap_file.proto:
##########
@@ -283,3 +284,10 @@ message OLAPDataHeaderMessage {
 message OLAPRawDeltaHeaderMessage {
     required int32 schema_hash = 2;
 }
+
+message DeleteBitmapPB {
+    repeated string rowset_ids = 1;
+    repeated uint32 segment_ids = 2;
+    repeated int64 versions = 3;
+    repeated bytes segment_delete_bitmaps = 4;

Review Comment:
   add some comments please.



##########
be/src/olap/tablet_meta.h:
##########
@@ -214,9 +217,120 @@ class TabletMeta {
     RowsetTypePB _preferred_rowset_type = BETA_ROWSET;
     std::string _remote_storage_name;
     StorageMediumPB _storage_medium = StorageMediumPB::HDD;
+    std::unique_ptr<DeleteBitmap> _delete_bitmap;
     std::shared_mutex _meta_lock;
 };
 
+/**
+ * Wraps multiple bitmaps for recording rows (row id) that are deleted or
+ * overwritten.
+ *
+ * RowsetId and SegmentId are for locating segment, Version here is a single
+ * uint32_t means that at which "version" of the load causes the delete or
+ * overwrite.
+ *
+ * The start and end version of a load is the same, it's ok and straightforward
+ * to use a single uint32_t.
+ *
+ * e.g.
+ * There is a key "key1" in rowset id 1, version [1,1], segment id 1, row id 1.
+ * A new load also contains "key1", the rowset id 2, version [2,2], segment id 1
+ * the delete bitmap will be `{1,1,2} -> 1`, which means the "row id 1" in
+ * "rowset id 1, segment id 1" is deleted/overitten by some loads at "version 2"
+ */
+class DeleteBitmap {
+    public:

Review Comment:
   indentation 



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscribe@doris.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@doris.apache.org
For additional commands, e-mail: commits-help@doris.apache.org