You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@doris.apache.org by da...@apache.org on 2023/06/28 03:47:14 UTC

[doris] branch master updated: [fix](partial-update) fix a coredump in commit_phase_update_delete_bitmap (#21254)

This is an automated email from the ASF dual-hosted git repository.

dataroaring pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new 21b30820fd [fix](partial-update) fix a coredump in commit_phase_update_delete_bitmap (#21254)
21b30820fd is described below

commit 21b30820fd48b5cf310ed5a7ca2a3b26d1644586
Author: zhannngchen <48...@users.noreply.github.com>
AuthorDate: Wed Jun 28 11:47:07 2023 +0800

    [fix](partial-update) fix a coredump in commit_phase_update_delete_bitmap (#21254)
---
 be/src/olap/delta_writer.cpp                     | 17 ++++++++++++++++-
 be/src/olap/rowset/segment_v2/segment_writer.cpp |  5 ++++-
 be/src/olap/tablet.cpp                           | 22 ++++++++++++++++++++++
 be/src/olap/tablet.h                             |  3 +++
 be/src/olap/txn_manager.cpp                      | 23 +----------------------
 be/src/olap/txn_manager.h                        |  4 ----
 6 files changed, 46 insertions(+), 28 deletions(-)

diff --git a/be/src/olap/delta_writer.cpp b/be/src/olap/delta_writer.cpp
index 16fc02dbc0..712b3efc37 100644
--- a/be/src/olap/delta_writer.cpp
+++ b/be/src/olap/delta_writer.cpp
@@ -453,9 +453,24 @@ Status DeltaWriter::close_wait(const PSlaveTabletNodes& slave_tablet_nodes,
             RETURN_IF_ERROR(_tablet->calc_delete_bitmap_between_segments(_cur_rowset, segments,
                                                                          _delete_bitmap));
         }
+
+        // commit_phase_update_delete_bitmap() may generate new segments, we need to create a new
+        // transient rowset writer to write the new segments, then merge it back the original
+        // rowset.
+        std::unique_ptr<RowsetWriter> rowset_writer;
+        _tablet->create_transient_rowset_writer(_cur_rowset, &rowset_writer);
         RETURN_IF_ERROR(_tablet->commit_phase_update_delete_bitmap(
                 _cur_rowset, _rowset_ids, _delete_bitmap, segments, _req.txn_id,
-                _rowset_writer.get()));
+                rowset_writer.get()));
+        if (_cur_rowset->tablet_schema()->is_partial_update()) {
+            // build rowset writer and merge transient rowset
+            RETURN_IF_ERROR(rowset_writer->flush());
+            RowsetSharedPtr transient_rowset = rowset_writer->build();
+            _cur_rowset->merge_rowset_meta(transient_rowset->rowset_meta());
+
+            // erase segment cache cause we will add a segment to rowset
+            SegmentLoader::instance()->erase_segment(_cur_rowset->rowset_id());
+        }
     }
     Status res = _storage_engine->txn_manager()->commit_txn(_req.partition_id, _tablet, _req.txn_id,
                                                             _req.load_id, _cur_rowset, false);
diff --git a/be/src/olap/rowset/segment_v2/segment_writer.cpp b/be/src/olap/rowset/segment_v2/segment_writer.cpp
index 0fac81e65e..d339b324a7 100644
--- a/be/src/olap/rowset/segment_v2/segment_writer.cpp
+++ b/be/src/olap/rowset/segment_v2/segment_writer.cpp
@@ -330,7 +330,10 @@ void SegmentWriter::_serialize_block_to_row_column(vectorized::Block& block) {
 Status SegmentWriter::append_block_with_partial_content(const vectorized::Block* block,
                                                         size_t row_pos, size_t num_rows) {
     CHECK(block->columns() > _tablet_schema->num_key_columns() &&
-          block->columns() < _tablet_schema->num_columns());
+          block->columns() < _tablet_schema->num_columns())
+            << "block columns: " << block->columns()
+            << ", num key columns: " << _tablet_schema->num_key_columns()
+            << ", total schema columns: " << _tablet_schema->num_columns();
     CHECK(_tablet_schema->keys_type() == UNIQUE_KEYS && _opts.enable_unique_key_merge_on_write);
 
     // find missing column cids
diff --git a/be/src/olap/tablet.cpp b/be/src/olap/tablet.cpp
index 1455d2d014..2d49643235 100644
--- a/be/src/olap/tablet.cpp
+++ b/be/src/olap/tablet.cpp
@@ -1902,6 +1902,28 @@ Status Tablet::create_rowset_writer(RowsetWriterContext& context,
     return RowsetFactory::create_rowset_writer(context, false, rowset_writer);
 }
 
+// create a rowset writer with rowset_id and seg_id
+// after writer, merge this transient rowset with original rowset
+Status Tablet::create_transient_rowset_writer(RowsetSharedPtr rowset_ptr,
+                                              std::unique_ptr<RowsetWriter>* rowset_writer) {
+    RowsetWriterContext context;
+    context.rowset_state = PREPARED;
+    context.segments_overlap = OVERLAPPING;
+    context.tablet_schema = std::make_shared<TabletSchema>();
+    context.tablet_schema->copy_from(*(rowset_ptr->tablet_schema()));
+    context.tablet_schema->set_partial_update_info(false, std::set<std::string>());
+    context.newest_write_timestamp = UnixSeconds();
+    context.tablet_id = table_id();
+    // ATTN: context.tablet is a shared_ptr, can't simply set it's value to `this`. We should
+    // get the shared_ptr from tablet_manager.
+    context.tablet = StorageEngine::instance()->tablet_manager()->get_tablet(tablet_id());
+    context.write_type = DataWriteType::TYPE_DIRECT;
+    RETURN_IF_ERROR(
+            create_transient_rowset_writer(context, rowset_ptr->rowset_id(), rowset_writer));
+    (*rowset_writer)->set_segment_start_id(rowset_ptr->num_segments());
+    return Status::OK();
+}
+
 Status Tablet::create_transient_rowset_writer(RowsetWriterContext& context,
                                               const RowsetId& rowset_id,
                                               std::unique_ptr<RowsetWriter>* rowset_writer) {
diff --git a/be/src/olap/tablet.h b/be/src/olap/tablet.h
index 1f55b503b8..724ab81243 100644
--- a/be/src/olap/tablet.h
+++ b/be/src/olap/tablet.h
@@ -328,6 +328,9 @@ public:
 
     Status create_rowset_writer(RowsetWriterContext& context,
                                 std::unique_ptr<RowsetWriter>* rowset_writer);
+
+    Status create_transient_rowset_writer(RowsetSharedPtr rowset_ptr,
+                                          std::unique_ptr<RowsetWriter>* rowset_writer);
     Status create_transient_rowset_writer(RowsetWriterContext& context, const RowsetId& rowset_id,
                                           std::unique_ptr<RowsetWriter>* rowset_writer);
 
diff --git a/be/src/olap/txn_manager.cpp b/be/src/olap/txn_manager.cpp
index ad799868aa..ebcb287389 100644
--- a/be/src/olap/txn_manager.cpp
+++ b/be/src/olap/txn_manager.cpp
@@ -372,7 +372,7 @@ Status TxnManager::publish_txn(OlapMeta* meta, TPartitionId partition_id,
     // update delete_bitmap
     if (tablet_txn_info.unique_key_merge_on_write) {
         std::unique_ptr<RowsetWriter> rowset_writer;
-        _create_transient_rowset_writer(tablet, rowset, &rowset_writer);
+        tablet->create_transient_rowset_writer(rowset, &rowset_writer);
 
         int64_t t2 = MonotonicMicros();
         RETURN_IF_ERROR(tablet->update_delete_bitmap(rowset, tablet_txn_info.rowset_ids,
@@ -450,27 +450,6 @@ Status TxnManager::publish_txn(OlapMeta* meta, TPartitionId partition_id,
     return status;
 }
 
-// create a rowset writer with rowset_id and seg_id
-// after writer, merge this transient rowset with original rowset
-Status TxnManager::_create_transient_rowset_writer(std::shared_ptr<Tablet> tablet,
-                                                   RowsetSharedPtr rowset_ptr,
-                                                   std::unique_ptr<RowsetWriter>* rowset_writer) {
-    RowsetWriterContext context;
-    context.rowset_state = PREPARED;
-    context.segments_overlap = OVERLAPPING;
-    context.tablet_schema = std::make_shared<TabletSchema>();
-    context.tablet_schema->copy_from(*(rowset_ptr->tablet_schema()));
-    context.tablet_schema->set_partial_update_info(false, std::set<std::string>());
-    context.newest_write_timestamp = UnixSeconds();
-    context.tablet_id = tablet->table_id();
-    context.tablet = tablet;
-    context.write_type = DataWriteType::TYPE_DIRECT;
-    RETURN_IF_ERROR(tablet->create_transient_rowset_writer(context, rowset_ptr->rowset_id(),
-                                                           rowset_writer));
-    (*rowset_writer)->set_segment_start_id(rowset_ptr->num_segments());
-    return Status::OK();
-}
-
 // txn could be rollbacked if it does not have related rowset
 // if the txn has related rowset then could not rollback it, because it
 // may be committed in another thread and our current thread meets errors when writing to data file
diff --git a/be/src/olap/txn_manager.h b/be/src/olap/txn_manager.h
index 36be3b03f5..fcebe2d9d1 100644
--- a/be/src/olap/txn_manager.h
+++ b/be/src/olap/txn_manager.h
@@ -214,10 +214,6 @@ private:
     void _insert_txn_partition_map_unlocked(int64_t transaction_id, int64_t partition_id);
     void _clear_txn_partition_map_unlocked(int64_t transaction_id, int64_t partition_id);
 
-    Status _create_transient_rowset_writer(std::shared_ptr<Tablet> tablet,
-                                           RowsetSharedPtr rowset_ptr,
-                                           std::unique_ptr<RowsetWriter>* rowset_writer);
-
 private:
     const int32_t _txn_map_shard_size;
 


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@doris.apache.org
For additional commands, e-mail: commits-help@doris.apache.org