You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@kudu.apache.org by mp...@apache.org on 2017/09/08 00:10:21 UTC

kudu git commit: KUDU-2134: Defer block transaction commit to the end of a tablet copy

Repository: kudu
Updated Branches:
  refs/heads/master cd6eee7a3 -> c85ac94b1


KUDU-2134: Defer block transaction commit to the end of a tablet copy

KUDU-2131 uncovered a regression that causes a tablet copy session
to expire reliably if the copied tablet is large and the tserver
already has a high number of containers. Even though the issue is
mitigated by LIFO container selection, we can completely avoid it by
deferring the block transaction commit to the end of tablet copy
session. There are mainly two reasons we may still want to do so:
 1) If DownloadWALs fails there's no reason to pay the fsync() price
    and commit all those blocks.
 2) While DownloadWALs is running the kernel has more time to eagerly
    flush the blocks, so the fsync()s at the end could be cheaper.

This patch moves the block transaction commit out from FetchAll() and
commits all downloaded blocks before replacing the superblock.

Change-Id: I4feabc08c2199aad8d08be56f09ac06924345f2b
Reviewed-on: http://gerrit.cloudera.org:8080/7966
Reviewed-by: Adar Dembo <ad...@cloudera.com>
Reviewed-by: Dan Burkert <da...@apache.org>
Tested-by: Kudu Jenkins
Reviewed-by: Mike Percy <mp...@apache.org>


Project: http://git-wip-us.apache.org/repos/asf/kudu/repo
Commit: http://git-wip-us.apache.org/repos/asf/kudu/commit/c85ac94b
Tree: http://git-wip-us.apache.org/repos/asf/kudu/tree/c85ac94b
Diff: http://git-wip-us.apache.org/repos/asf/kudu/diff/c85ac94b

Branch: refs/heads/master
Commit: c85ac94b1c91cbe58b0bbfa4147681087eb98837
Parents: cd6eee7
Author: hahao <ha...@cloudera.com>
Authored: Thu Sep 7 14:36:03 2017 -0700
Committer: Mike Percy <mp...@apache.org>
Committed: Fri Sep 8 00:10:02 2017 +0000

----------------------------------------------------------------------
 src/kudu/tserver/tablet_copy_client-test.cc | 11 ++++----
 src/kudu/tserver/tablet_copy_client.cc      | 32 +++++++++++++----------
 src/kudu/tserver/tablet_copy_client.h       | 33 ++++++++++++------------
 3 files changed, 40 insertions(+), 36 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/kudu/blob/c85ac94b/src/kudu/tserver/tablet_copy_client-test.cc
----------------------------------------------------------------------
diff --git a/src/kudu/tserver/tablet_copy_client-test.cc b/src/kudu/tserver/tablet_copy_client-test.cc
index a22d33d..646e263 100644
--- a/src/kudu/tserver/tablet_copy_client-test.cc
+++ b/src/kudu/tserver/tablet_copy_client-test.cc
@@ -69,8 +69,6 @@ using std::vector;
 DECLARE_string(block_manager);
 
 METRIC_DECLARE_counter(block_manager_total_disk_sync);
-METRIC_DECLARE_counter(block_manager_total_writable_blocks);
-METRIC_DECLARE_gauge_uint64(log_block_manager_containers);
 
 namespace kudu {
 namespace tserver {
@@ -178,9 +176,8 @@ TEST_F(TabletCopyClientTest, TestDownloadBlock) {
 
   // Check that the client downloaded the block and verification passed.
   BlockId new_block_id;
-  fs::BlockTransaction transaction;
-  ASSERT_OK(client_->DownloadBlock(block_id, &new_block_id, &transaction));
-  ASSERT_OK(transaction.CommitCreatedBlocks());
+  ASSERT_OK(client_->DownloadBlock(block_id, &new_block_id));
+  ASSERT_OK(client_->transaction_.CommitCreatedBlocks());
 
   // Ensure it placed the block where we expected it to.
   ASSERT_OK(ReadLocalBlockFile(fs_manager_.get(), new_block_id, &scratch, &slice));
@@ -243,8 +240,9 @@ TEST_F(TabletCopyClientTest, TestVerifyData) {
 }
 
 TEST_F(TabletCopyClientTest, TestDownloadAllBlocks) {
-  // Download all the blocks.
+  // Download and commit all the blocks.
   ASSERT_OK(client_->DownloadBlocks());
+  ASSERT_OK(client_->transaction_.CommitCreatedBlocks());
 
   // Verify the disk synchronization count.
   if (FLAGS_block_manager == "log") {
@@ -337,6 +335,7 @@ TEST_P(TabletCopyClientAbortTest, TestAbort) {
   int num_blocks_downloaded = 0;
   if (download_blocks == kDownloadBlocks) {
     ASSERT_OK(client_->DownloadBlocks());
+    ASSERT_OK(client_->transaction_.CommitCreatedBlocks());
     num_blocks_downloaded = num_remote_blocks;
   }
 

http://git-wip-us.apache.org/repos/asf/kudu/blob/c85ac94b/src/kudu/tserver/tablet_copy_client.cc
----------------------------------------------------------------------
diff --git a/src/kudu/tserver/tablet_copy_client.cc b/src/kudu/tserver/tablet_copy_client.cc
index 506a840..3110d71 100644
--- a/src/kudu/tserver/tablet_copy_client.cc
+++ b/src/kudu/tserver/tablet_copy_client.cc
@@ -110,7 +110,6 @@ using consensus::ConsensusMetadataManager;
 using consensus::MakeOpId;
 using consensus::OpId;
 using env_util::CopyFile;
-using fs::BlockTransaction;
 using fs::CreateBlockOptions;
 using fs::WritableBlock;
 using rpc::Messenger;
@@ -369,6 +368,14 @@ Status TabletCopyClient::FetchAll(const scoped_refptr<TabletReplica>& tablet_rep
 Status TabletCopyClient::Finish() {
   CHECK(meta_);
   CHECK_EQ(kStarted, state_);
+
+  // Defer the closures of all downloaded blocks to here, but before superblock
+  // replacement for the following reasons:
+  //  1) If DownloadWALs() fails there's no reason to commit all those blocks and do sync().
+  //  2) While DownloadWALs() is running the kernel has more time to eagerly flush the blocks,
+  //     so the fsync() operations could be cheaper.
+  //  3) Downloaded blocks should be made durable before replacing superblock.
+  RETURN_NOT_OK(transaction_.CommitCreatedBlocks());
   state_ = kFinished;
 
   // Replace tablet metadata superblock. This will set the tablet metadata state
@@ -508,7 +515,6 @@ Status TabletCopyClient::DownloadBlocks() {
   // as each block downloads.
   int block_count = 0;
   LOG_WITH_PREFIX(INFO) << "Starting download of " << num_remote_blocks << " data blocks...";
-  BlockTransaction transaction;
   for (const RowSetDataPB& src_rowset : remote_superblock_->rowsets()) {
     // Create rowset.
     RowSetDataPB* dst_rowset = superblock_->add_rowsets();
@@ -529,7 +535,7 @@ Status TabletCopyClient::DownloadBlocks() {
     for (const ColumnDataPB& src_col : src_rowset.columns()) {
       BlockIdPB new_block_id;
       RETURN_NOT_OK(DownloadAndRewriteBlock(src_col.block(), num_remote_blocks,
-                                            &block_count, &new_block_id, &transaction));
+                                            &block_count, &new_block_id));
       ColumnDataPB* dst_col = dst_rowset->add_columns();
       *dst_col = src_col;
       *dst_col->mutable_block() = new_block_id;
@@ -537,7 +543,7 @@ Status TabletCopyClient::DownloadBlocks() {
     for (const DeltaDataPB& src_redo : src_rowset.redo_deltas()) {
       BlockIdPB new_block_id;
       RETURN_NOT_OK(DownloadAndRewriteBlock(src_redo.block(), num_remote_blocks,
-                                            &block_count, &new_block_id, &transaction));
+                                            &block_count, &new_block_id));
       DeltaDataPB* dst_redo = dst_rowset->add_redo_deltas();
       *dst_redo = src_redo;
       *dst_redo->mutable_block() = new_block_id;
@@ -545,7 +551,7 @@ Status TabletCopyClient::DownloadBlocks() {
     for (const DeltaDataPB& src_undo : src_rowset.undo_deltas()) {
       BlockIdPB new_block_id;
       RETURN_NOT_OK(DownloadAndRewriteBlock(src_undo.block(), num_remote_blocks,
-                                            &block_count, &new_block_id, &transaction));
+                                            &block_count, &new_block_id));
       DeltaDataPB* dst_undo = dst_rowset->add_undo_deltas();
       *dst_undo = src_undo;
       *dst_undo->mutable_block() = new_block_id;
@@ -553,18 +559,18 @@ Status TabletCopyClient::DownloadBlocks() {
     if (src_rowset.has_bloom_block()) {
       BlockIdPB new_block_id;
       RETURN_NOT_OK(DownloadAndRewriteBlock(src_rowset.bloom_block(), num_remote_blocks,
-                                            &block_count, &new_block_id, &transaction));
+                                            &block_count, &new_block_id));
       *dst_rowset->mutable_bloom_block() = new_block_id;
     }
     if (src_rowset.has_adhoc_index_block()) {
       BlockIdPB new_block_id;
       RETURN_NOT_OK(DownloadAndRewriteBlock(src_rowset.adhoc_index_block(), num_remote_blocks,
-                                            &block_count, &new_block_id, &transaction));
+                                            &block_count, &new_block_id));
       *dst_rowset->mutable_adhoc_index_block() = new_block_id;
     }
   }
 
-  return transaction.CommitCreatedBlocks();
+  return Status::OK();
 }
 
 Status TabletCopyClient::DownloadWAL(uint64_t wal_segment_seqno) {
@@ -614,14 +620,13 @@ Status TabletCopyClient::WriteConsensusMetadata() {
 Status TabletCopyClient::DownloadAndRewriteBlock(const BlockIdPB& src_block_id,
                                                  int num_blocks,
                                                  int* block_count,
-                                                 BlockIdPB* dest_block_id,
-                                                 BlockTransaction* transaction) {
+                                                 BlockIdPB* dest_block_id) {
   BlockId old_block_id(BlockId::FromPB(src_block_id));
   SetStatusMessage(Substitute("Downloading block $0 ($1/$2)",
                               old_block_id.ToString(),
                               *block_count + 1, num_blocks));
   BlockId new_block_id;
-  RETURN_NOT_OK_PREPEND(DownloadBlock(old_block_id, &new_block_id, transaction),
+  RETURN_NOT_OK_PREPEND(DownloadBlock(old_block_id, &new_block_id),
       "Unable to download block with id " + old_block_id.ToString());
 
   new_block_id.CopyToPB(dest_block_id);
@@ -630,8 +635,7 @@ Status TabletCopyClient::DownloadAndRewriteBlock(const BlockIdPB& src_block_id,
 }
 
 Status TabletCopyClient::DownloadBlock(const BlockId& old_block_id,
-                                       BlockId* new_block_id,
-                                       BlockTransaction* transaction) {
+                                       BlockId* new_block_id) {
   VLOG_WITH_PREFIX(1) << "Downloading block with block_id " << old_block_id.ToString();
 
   unique_ptr<WritableBlock> block;
@@ -647,7 +651,7 @@ Status TabletCopyClient::DownloadBlock(const BlockId& old_block_id,
 
   *new_block_id = block->id();
   RETURN_NOT_OK_PREPEND(block->Finalize(), "Unable to finalize block");
-  transaction->AddCreatedBlock(std::move(block));
+  transaction_.AddCreatedBlock(std::move(block));
   return Status::OK();
 }
 

http://git-wip-us.apache.org/repos/asf/kudu/blob/c85ac94b/src/kudu/tserver/tablet_copy_client.h
----------------------------------------------------------------------
diff --git a/src/kudu/tserver/tablet_copy_client.h b/src/kudu/tserver/tablet_copy_client.h
index aba7c98..bdbea1e 100644
--- a/src/kudu/tserver/tablet_copy_client.h
+++ b/src/kudu/tserver/tablet_copy_client.h
@@ -24,9 +24,10 @@
 
 #include <gtest/gtest_prod.h>
 
+#include "kudu/fs/block_manager.h"
+#include "kudu/gutil/integral_types.h"
 #include "kudu/gutil/macros.h"
 #include "kudu/gutil/ref_counted.h"
-#include "kudu/gutil/integral_types.h"
 #include "kudu/util/metrics.h"
 #include "kudu/util/status.h"
 
@@ -43,10 +44,6 @@ class ConsensusMetadataManager;
 class ConsensusStatePB;
 } // namespace consensus
 
-namespace fs {
-class BlockTransaction;
-} // namespace fs
-
 namespace rpc {
 class Messenger;
 class RpcController;
@@ -115,11 +112,13 @@ class TabletCopyClient {
 
   // Runs a "full" tablet copy, copying the physical layout of a tablet
   // from the leader of the specified consensus configuration.
+  // Adds all downloaded blocks during copying to the tablet copy's transaction,
+  // which will be closed together at Finish().
   Status FetchAll(const scoped_refptr<tablet::TabletReplica>& tablet_replica);
 
-  // After downloading all files successfully, write out the completed
-  // replacement superblock. Must be called after Start() and FetchAll().
-  // Must not be called after Abort().
+  // After downloading all files successfully, commit all downloaded blocks.
+  // Write out the completed replacement superblock.
+  // Must be called after Start() and FetchAll(). Must not be called after Abort().
   Status Finish();
 
   // Abort an in-progress transfer and immediately delete the data blocks and
@@ -165,7 +164,8 @@ class TabletCopyClient {
   // Count the number of blocks on the remote (from 'remote_superblock_').
   int CountRemoteBlocks() const;
 
-  // Download all blocks belonging to a tablet sequentially.
+  // Download all blocks belonging to a tablet sequentially. Add all
+  // downloaded blocks to the tablet copy's transaction.
   //
   // Blocks are given new IDs upon creation. On success, 'superblock_'
   // is populated to reflect the new block IDs.
@@ -173,8 +173,8 @@ class TabletCopyClient {
 
   // Download the remote block specified by 'src_block_id'. 'num_blocks' should
   // be given as the total number of blocks there are to download (for logging
-  // purposes). Add the block to the given transaction, to close blocks belonging
-  // to a transaction together when the copying is complete.
+  // purposes). Add the block to the tablet copy's transaction, to close blocks
+  // belonging to the transaction together when the copying is complete.
   //
   // On success:
   // - 'dest_block_id' is set to the new ID of the downloaded block.
@@ -182,17 +182,15 @@ class TabletCopyClient {
   Status DownloadAndRewriteBlock(const BlockIdPB& src_block_id,
                                  int num_blocks,
                                  int* block_count,
-                                 BlockIdPB* dest_block_id,
-                                 fs::BlockTransaction* transaction);
+                                 BlockIdPB* dest_block_id);
 
   // Download a single block.
   // Data block is opened with new ID. After downloading, the block is finalized
-  // and added to the given transaction.
+  // and added to the tablet copy's transaction.
   //
   // On success, 'new_block_id' is set to the new ID of the downloaded block.
   Status DownloadBlock(const BlockId& old_block_id,
-                       BlockId* new_block_id,
-                       fs::BlockTransaction* transaction);
+                       BlockId* new_block_id);
 
   // Download a single remote file. The block and WAL implementations delegate
   // to this method when downloading files.
@@ -241,6 +239,9 @@ class TabletCopyClient {
 
   TabletCopyClientMetrics* tablet_copy_metrics_;
 
+  // Block transaction for the tablet copy.
+  fs::BlockTransaction transaction_;
+
   DISALLOW_COPY_AND_ASSIGN(TabletCopyClient);
 };