You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@kudu.apache.org by al...@apache.org on 2020/03/09 07:06:53 UTC

[kudu] branch master updated: [tserver] report the newly bootstrapped tablet after OpenTablet completes

This is an automated email from the ASF dual-hosted git repository.

alexey pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/kudu.git


The following commit(s) were added to refs/heads/master by this push:
     new fb0411f  [tserver] report the newly bootstrapped tablet after OpenTablet completes
fb0411f is described below

commit fb0411f1d0810e896f18ebb92f7547e630bb12c0
Author: zhangyifan27 <ch...@163.com>
AuthorDate: Wed Mar 4 22:54:43 2020 +0800

    [tserver] report the newly bootstrapped tablet after OpenTablet completes
    
    A tablet may be deleted after starting successfully, but it couldn't be
    deleted if not deleted from TransitionInProgressMap.
    
    This patch moves one of MarkDirty calls from TabletReplica::Start,
    and the report of tablet state change from BOOTSTRAPPING to RUNNING
    is postponed until open tablet completes and TransitionInProgressDeleter
    destroyed.
    
    This patch also adds a regression test which would not pass without the fix.
    
    Change-Id: I591dc8daceb81f9e800098be77c928d391cdc00a
    Reviewed-on: http://gerrit.cloudera.org:8080/15307
    Tested-by: Kudu Jenkins
    Reviewed-by: Alexey Serbin <as...@cloudera.com>
---
 .../integration-tests/ts_tablet_manager-itest.cc   | 68 ++++++++++++++++++++++
 src/kudu/tablet/tablet_replica.cc                  |  3 -
 src/kudu/tserver/ts_tablet_manager.cc              | 30 ++++++++--
 src/kudu/tserver/ts_tablet_manager.h               |  2 +
 4 files changed, 94 insertions(+), 9 deletions(-)

diff --git a/src/kudu/integration-tests/ts_tablet_manager-itest.cc b/src/kudu/integration-tests/ts_tablet_manager-itest.cc
index d77ad1b..f4be43d 100644
--- a/src/kudu/integration-tests/ts_tablet_manager-itest.cc
+++ b/src/kudu/integration-tests/ts_tablet_manager-itest.cc
@@ -19,6 +19,7 @@
 
 #include <cstdlib>
 #include <functional>
+#include <limits>
 #include <map>
 #include <memory>
 #include <ostream>
@@ -37,6 +38,8 @@
 #include "kudu/client/shared_ptr.h"
 #include "kudu/client/write_op.h"
 #include "kudu/common/partial_row.h"
+#include "kudu/common/wire_protocol.h"
+#include "kudu/common/wire_protocol.pb.h"
 #include "kudu/consensus/consensus.pb.h"
 #include "kudu/consensus/metadata.pb.h"
 #include "kudu/consensus/quorum_util.h"
@@ -87,6 +90,7 @@ DECLARE_double(leader_failure_max_missed_heartbeat_periods);
 DECLARE_int32(heartbeat_interval_ms);
 DECLARE_int32(metrics_retirement_age_ms);
 DECLARE_int32(raft_heartbeat_interval_ms);
+DECLARE_int32(tablet_open_inject_latency_ms);
 DEFINE_int32(num_election_test_loops, 3,
              "Number of random EmulateElectionForTests() loops to execute in "
              "TestReportNewLeaderOnLeaderChange");
@@ -911,5 +915,69 @@ TEST_F(TsTabletManagerITest, TestTableStats) {
   }
 }
 
+TEST_F(TsTabletManagerITest, TestDeleteTableDuringTabletCopy) {
+  MonoDelta kTimeout = MonoDelta::FromSeconds(30);
+  const int kNumTabletServers = 3;
+  const int kNumReplicas = 3;
+  {
+    InternalMiniClusterOptions opts;
+    opts.num_tablet_servers = kNumTabletServers;
+    NO_FATALS(StartCluster(std::move(opts)));
+  }
+  string tablet_id;
+  client::sp::shared_ptr<KuduTable> table;
+  itest::TabletServerMap ts_map;
+  ASSERT_OK(CreateTabletServerMap(cluster_->master_proxy(), client_messenger_, &ts_map));
+  ValueDeleter deleter(&ts_map);
+
+  auto tserver_tablets_count = [&](int index) {
+    return cluster_->mini_tablet_server(index)->ListTablets().size();
+  };
+
+  // Inject latency to test whether we could delete copying tablet.
+  FLAGS_tablet_open_inject_latency_ms = 50;
+
+  // Create a table with one tablet.
+  unique_ptr<KuduTableCreator> table_creator(client_->NewTableCreator());
+  ASSERT_OK(table_creator->table_name(kTableName)
+      .schema(&schema_)
+      .set_range_partition_columns({})
+      .num_replicas(kNumReplicas)
+      .Create());
+  ASSERT_EVENTUALLY([&] {
+    for (int i = 0; i < kNumTabletServers; ++i) {
+      ASSERT_EQ(1, tserver_tablets_count(i));
+    }
+  });
+  tablet_id = cluster_->mini_tablet_server(0)->ListTablets()[0];
+  ASSERT_OK(client_->OpenTable(kTableName, &table));
+
+  // Get the leader and follower tablet servers.
+  itest::TServerDetails* leader_ts;
+  ASSERT_OK(FindTabletLeader(ts_map, tablet_id, kTimeout, &leader_ts));
+  int leader_index = cluster_->tablet_server_index_by_uuid(leader_ts->uuid());
+  int follower_index = (leader_index + 1) % kNumTabletServers;
+  MiniTabletServer* follower = cluster_->mini_tablet_server(follower_index);
+  itest::TServerDetails* follower_ts = ts_map[follower->uuid()];
+
+  // Tombstone the tablet on the follower.
+  ASSERT_OK(itest::DeleteTabletWithRetries(follower_ts, tablet_id,
+                                           tablet::TabletDataState::TABLET_DATA_TOMBSTONED,
+                                           kTimeout));
+  // Copy tablet from leader_ts to follower_ts.
+  HostPort leader_addr;
+  ASSERT_OK(HostPortFromPB(leader_ts->registration.rpc_addresses(0), &leader_addr));
+  ASSERT_OK(itest::StartTabletCopy(follower_ts, tablet_id, leader_ts->uuid(),
+                                   leader_addr, std::numeric_limits<int64_t>::max(), kTimeout));
+
+  // Delete table during tablet copying.
+  ASSERT_OK(itest::DeleteTable(cluster_->master_proxy(), table->id(), kTableName, kTimeout));
+
+  // Finally all tablets deleted.
+  ASSERT_EVENTUALLY([&] {
+    ASSERT_EQ(0, tserver_tablets_count(follower_index));
+  });
+}
+
 } // namespace tserver
 } // namespace kudu
diff --git a/src/kudu/tablet/tablet_replica.cc b/src/kudu/tablet/tablet_replica.cc
index bdc3695..41b4324 100644
--- a/src/kudu/tablet/tablet_replica.cc
+++ b/src/kudu/tablet/tablet_replica.cc
@@ -256,9 +256,6 @@ Status TabletReplica::Start(const ConsensusBootstrapInfo& bootstrap_info,
     set_state(RUNNING);
   }
 
-  // Because we changed the tablet state, we need to re-report the tablet to the master.
-  mark_dirty_clbk_.Run("Started TabletReplica");
-
   return Status::OK();
 }
 
diff --git a/src/kudu/tserver/ts_tablet_manager.cc b/src/kudu/tserver/ts_tablet_manager.cc
index b614d7e..9497e36 100644
--- a/src/kudu/tserver/ts_tablet_manager.cc
+++ b/src/kudu/tserver/ts_tablet_manager.cc
@@ -141,6 +141,10 @@ DEFINE_int32(tablet_bootstrap_inject_latency_ms, 0,
              "For use in tests only.");
 TAG_FLAG(tablet_bootstrap_inject_latency_ms, unsafe);
 
+DEFINE_int32(tablet_open_inject_latency_ms, 0,
+            "Injects latency into the tablet opening. For use in tests only.");
+TAG_FLAG(tablet_open_inject_latency_ms, unsafe);
+
 DECLARE_bool(raft_prepare_replacement_before_eviction);
 
 METRIC_DEFINE_gauge_int32(server, tablets_num_not_initialized,
@@ -1075,11 +1079,8 @@ Status TSTabletManager::OpenTabletMeta(const string& tablet_id,
   return Status::OK();
 }
 
-// Note: 'deleter' is not used in the body of OpenTablet(), but is required
-// anyway because its destructor performs cleanup that should only happen when
-// OpenTablet() completes.
 void TSTabletManager::OpenTablet(const scoped_refptr<TabletReplica>& replica,
-                                 const scoped_refptr<TransitionInProgressDeleter>& /*deleter*/) {
+                                 const scoped_refptr<TransitionInProgressDeleter>& deleter) {
   const string& tablet_id = replica->tablet_id();
   TRACE_EVENT1("tserver", "TSTabletManager::OpenTablet",
                "tablet_id", tablet_id);
@@ -1158,6 +1159,12 @@ void TSTabletManager::OpenTablet(const scoped_refptr<TabletReplica>& replica,
     replica->RegisterMaintenanceOps(server_->maintenance_manager());
   }
 
+  if (PREDICT_FALSE(FLAGS_tablet_open_inject_latency_ms > 0)) {
+    LOG(WARNING) << "Injecting " << FLAGS_tablet_open_inject_latency_ms
+                 << "ms of latency into OpenTablet";
+    SleepFor(MonoDelta::FromMilliseconds(FLAGS_tablet_open_inject_latency_ms));
+  }
+
   // Now that the tablet has successfully opened, cancel the cleanup.
   fail_tablet.cancel();
 
@@ -1169,6 +1176,9 @@ void TSTabletManager::OpenTablet(const scoped_refptr<TabletReplica>& replica,
                    << Trace::CurrentTrace()->DumpToString();
     }
   }
+
+  deleter->Destroy();
+  MarkTabletDirty(tablet_id, "Open tablet completed");
 }
 
 void TSTabletManager::Shutdown() {
@@ -1639,11 +1649,19 @@ void TSTabletManager::SetNextUpdateTimeForTests() {
 
 TransitionInProgressDeleter::TransitionInProgressDeleter(
     TransitionInProgressMap* map, RWMutex* lock, string entry)
-    : in_progress_(map), lock_(lock), entry_(std::move(entry)) {}
+    : in_progress_(map), lock_(lock), entry_(std::move(entry)), is_destroyed_(false) {}
 
-TransitionInProgressDeleter::~TransitionInProgressDeleter() {
+void TransitionInProgressDeleter::Destroy() {
+  CHECK(!is_destroyed_);
   std::lock_guard<RWMutex> lock(*lock_);
   CHECK(in_progress_->erase(entry_));
+  is_destroyed_ = true;
+}
+
+TransitionInProgressDeleter::~TransitionInProgressDeleter() {
+  if (!is_destroyed_) {
+    Destroy();
+  }
 }
 
 } // namespace tserver
diff --git a/src/kudu/tserver/ts_tablet_manager.h b/src/kudu/tserver/ts_tablet_manager.h
index 01f3518..21f94a1 100644
--- a/src/kudu/tserver/ts_tablet_manager.h
+++ b/src/kudu/tserver/ts_tablet_manager.h
@@ -415,6 +415,7 @@ class TransitionInProgressDeleter : public RefCountedThreadSafe<TransitionInProg
  public:
   TransitionInProgressDeleter(TransitionInProgressMap* map, RWMutex* lock,
                               std::string entry);
+  void Destroy();
 
  private:
   friend class RefCountedThreadSafe<TransitionInProgressDeleter>;
@@ -423,6 +424,7 @@ class TransitionInProgressDeleter : public RefCountedThreadSafe<TransitionInProg
   TransitionInProgressMap* const in_progress_;
   RWMutex* const lock_;
   const std::string entry_;
+  bool is_destroyed_;
 };
 
 } // namespace tserver