You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@kudu.apache.org by ad...@apache.org on 2016/04/28 08:40:52 UTC

incubator-kudu git commit: master: when replacing tablets, don't expose mutated state too early

Repository: incubator-kudu
Updated Branches:
  refs/heads/master c2aed1b7c -> 59ff89dbc


master: when replacing tablets, don't expose mutated state too early

This patch plugs the last "expose new in-memory state before replication
succeeds" hole, which happens when CatalogManagerBgTasks decides to replace
a tablet. A side benefit of addressing it is the removal of more
crufty-looking abort code.

Speaking of, can you spot the bug in that crufty-looking abort code? Here it
is: IsCreateTableDone() can return true for tables whose tablets were never
assigned (because all of the tservers are dead, say). It can happen because
there's a brief window of time during which a table has all of its tablets
removed, at which point the answer to the question "are all of this table's
tablets RUNNING?" is "yes".

Change-Id: I00dcd0dae925fe8258eb2d160226e07b3a3b0f05
Reviewed-on: http://gerrit.cloudera.org:8080/2879
Tested-by: Kudu Jenkins
Reviewed-by: Todd Lipcon <to...@apache.org>


Project: http://git-wip-us.apache.org/repos/asf/incubator-kudu/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-kudu/commit/59ff89db
Tree: http://git-wip-us.apache.org/repos/asf/incubator-kudu/tree/59ff89db
Diff: http://git-wip-us.apache.org/repos/asf/incubator-kudu/diff/59ff89db

Branch: refs/heads/master
Commit: 59ff89dbcb21700fc50bca03b7cbf932c5fa3898
Parents: c2aed1b
Author: Adar Dembo <ad...@cloudera.com>
Authored: Mon Apr 25 19:00:10 2016 -0700
Committer: Adar Dembo <ad...@cloudera.com>
Committed: Thu Apr 28 06:40:08 2016 +0000

----------------------------------------------------------------------
 .../integration-tests/create-table-itest.cc     |  4 ++
 src/kudu/master/catalog_manager-test.cc         |  3 +-
 src/kudu/master/catalog_manager.cc              | 59 +++++++++-----------
 3 files changed, 32 insertions(+), 34 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-kudu/blob/59ff89db/src/kudu/integration-tests/create-table-itest.cc
----------------------------------------------------------------------
diff --git a/src/kudu/integration-tests/create-table-itest.cc b/src/kudu/integration-tests/create-table-itest.cc
index 2879bf9..32e6090 100644
--- a/src/kudu/integration-tests/create-table-itest.cc
+++ b/src/kudu/integration-tests/create-table-itest.cc
@@ -261,8 +261,12 @@ TEST_F(CreateTableITest, TestCreateTableWithDeadTServers) {
   Schema schema(GetSimpleTestSchema());
   client::KuduSchema client_schema(client::KuduSchemaFromSchema(schema));
   gscoped_ptr<client::KuduTableCreator> table_creator(client_->NewTableCreator());
+
+  // Don't bother waiting for table creation to finish; it'll never happen
+  // because all of the tservers are dead.
   CHECK_OK(table_creator->table_name(kTableName)
            .schema(&client_schema)
+           .wait(false)
            .Create());
 
   // Spin off a bunch of threads that repeatedly look up random key ranges in the table.

http://git-wip-us.apache.org/repos/asf/incubator-kudu/blob/59ff89db/src/kudu/master/catalog_manager-test.cc
----------------------------------------------------------------------
diff --git a/src/kudu/master/catalog_manager-test.cc b/src/kudu/master/catalog_manager-test.cc
index bd48309..0c7fb8a 100644
--- a/src/kudu/master/catalog_manager-test.cc
+++ b/src/kudu/master/catalog_manager-test.cc
@@ -50,9 +50,8 @@ TEST(TableInfoTest, TestAssignmentRanges) {
     partition->set_partition_key_start(start_key);
     partition->set_partition_key_end(end_key);
     meta_lock.mutable_data()->pb.set_state(SysTabletsEntryPB::RUNNING);
-
-    table->AddTablet(tablet);
     meta_lock.Commit();
+    table->AddTablet(tablet);
     tablets.push_back(make_scoped_refptr(tablet));
   }
 

http://git-wip-us.apache.org/repos/asf/incubator-kudu/blob/59ff89db/src/kudu/master/catalog_manager.cc
----------------------------------------------------------------------
diff --git a/src/kudu/master/catalog_manager.cc b/src/kudu/master/catalog_manager.cc
index 006bf63..f965d7d 100644
--- a/src/kudu/master/catalog_manager.cc
+++ b/src/kudu/master/catalog_manager.cc
@@ -251,11 +251,12 @@ class TabletLoader : public TabletVisitor {
     // Add the tablet to the tablet manager.
     catalog_manager_->tablet_map_[tablet->tablet_id()] = tablet;
 
-    // Add the tablet to the Tablet
-    if (!l.mutable_data()->is_deleted()) {
+    // Add the tablet to the Tablet.
+    bool is_deleted = l.mutable_data()->is_deleted();
+    l.Commit();
+    if (!is_deleted) {
       table->AddTablet(tablet);
     }
-    l.Commit();
 
     LOG(INFO) << "Loaded metadata for tablet " << tablet_id
               << " (table " << table->ToString() << ")";
@@ -449,12 +450,17 @@ class ScopedTabletInfoCommitter {
     aborted_ = true;
   }
 
-  // Release all write locks, committing any mutated tablet data.
   ~ScopedTabletInfoCommitter() {
+    Commit();
+  }
+
+  // Release all write locks, committing any mutated tablet data.
+  void Commit() {
     if (PREDICT_TRUE(!aborted_ && state_ == LOCKED)) {
       for (const auto& t : tablets_) {
         t->mutable_metadata()->CommitMutation();
       }
+      state_ = UNLOCKED;
     }
   }
 
@@ -870,7 +876,6 @@ Status CatalogManager::CreateTable(const CreateTableRequestPB* orig_req,
     tablets.push_back(t.get());
     tablet_refs.emplace_back(std::move(t));
   }
-  table->AddTablets(tablets);
   TRACE("Created new table and tablet info");
 
   // NOTE: the table and tablets are already locked for write at this point,
@@ -903,6 +908,7 @@ Status CatalogManager::CreateTable(const CreateTableRequestPB* orig_req,
   for (TabletInfo *tablet : tablets) {
     tablet->mutable_metadata()->CommitMutation();
   }
+  table->AddTablets(tablets);
 
   // g. Make the new table and tablets visible in the catalog.
   {
@@ -2111,7 +2117,7 @@ class AsyncCreateReplica : public RetrySpecificTSRpcTask {
     deadline_.AddDelta(MonoDelta::FromMilliseconds(FLAGS_tablet_creation_timeout_ms));
 
     TableMetadataLock table_lock(tablet->table().get(), TableMetadataLock::READ);
-    const SysTabletsEntryPB& tablet_pb = tablet->metadata().dirty().pb;
+    const SysTabletsEntryPB& tablet_pb = tablet->metadata().state().pb;
 
     req_.set_dest_uuid(permanent_uuid);
     req_.set_table_id(tablet->table()->id());
@@ -2632,12 +2638,6 @@ void CatalogManager::HandleAssignCreatingTablet(TabletInfo* tablet,
                << "the allowed timeout. Replacing with a new tablet "
                << replacement->tablet_id();
 
-  tablet->table()->AddTablet(replacement.get());
-  {
-    boost::lock_guard<LockType> l_maps(lock_);
-    tablet_map_[replacement->tablet_id()] = replacement;
-  }
-
   // Mark old tablet as replaced.
   tablet->mutable_metadata()->mutable_dirty()->set_state(
     SysTabletsEntryPB::REPLACED,
@@ -2778,32 +2778,27 @@ Status CatalogManager::ProcessPendingAssignments(
     LOG(WARNING) << "Aborting the current task due to error: " << s.ToString();
     // If there was an error, abort any mutations started by the
     // current task.
-    vector<string> tablet_ids_to_remove;
-    for (const auto& new_tablet : unlocker_out) {
-      TableInfo* table = new_tablet->table().get();
-      TableMetadataLock l_table(table, TableMetadataLock::READ);
-      if (table->RemoveTablet(
-          new_tablet->metadata().dirty().pb.partition().partition_key_start())) {
-        VLOG(1) << "Removed tablet " << new_tablet->tablet_id() << " from "
-            "table " << l_table.data().name();
-      }
-      tablet_ids_to_remove.push_back(new_tablet->tablet_id());
-    }
-    boost::lock_guard<LockType> l(lock_);
     unlocker_out.Abort();
     unlocker_in.Abort();
-    for (const string& tablet_id_to_remove : tablet_ids_to_remove) {
-      CHECK_EQ(tablet_map_.erase(tablet_id_to_remove), 1)
-          << "Unable to erase " << tablet_id_to_remove << " from tablet map.";
-    }
     return s;
   }
 
+  // Expose tablet metadata changes before the new tablets themselves.
+  unlocker_out.Commit();
+  unlocker_in.Commit();
+  {
+    boost::lock_guard<LockType> l(lock_);
+    for (const auto& new_tablet : unlocker_out) {
+      new_tablet->table()->AddTablet(new_tablet.get());
+      tablet_map_[new_tablet->tablet_id()] = new_tablet;
+    }
+  }
+
   // Send DeleteTablet requests to tablet servers serving deleted tablets.
   // This is asynchronous / non-blocking.
   for (TabletInfo* tablet : deferred.tablets_to_update) {
-    if (tablet->metadata().dirty().is_deleted()) {
-      SendDeleteTabletRequest(tablet, tablet->metadata().dirty().pb.state_msg());
+    if (tablet->metadata().state().is_deleted()) {
+      SendDeleteTabletRequest(tablet, tablet->metadata().state().pb.state_msg());
     }
   }
   // Send the CreateTablet() requests to the servers. This is asynchronous / non-blocking.
@@ -2849,7 +2844,7 @@ Status CatalogManager::SelectReplicasForTablet(const TSDescriptorVector& ts_desc
 void CatalogManager::SendCreateTabletRequests(const vector<TabletInfo*>& tablets) {
   for (TabletInfo *tablet : tablets) {
     const consensus::RaftConfigPB& config =
-        tablet->metadata().dirty().pb.committed_consensus_state().config();
+        tablet->metadata().state().pb.committed_consensus_state().config();
     tablet->set_last_update_time(MonoTime::Now(MonoTime::FINE));
     for (const RaftPeerPB& peer : config.peers()) {
       AsyncCreateReplica* task = new AsyncCreateReplica(master_, worker_pool_.get(),
@@ -3253,7 +3248,7 @@ void TableInfo::AddTablets(const vector<TabletInfo*>& tablets) {
 void TableInfo::AddTabletUnlocked(TabletInfo* tablet) {
   TabletInfo* old = nullptr;
   if (UpdateReturnCopy(&tablet_map_,
-                       tablet->metadata().dirty().pb.partition().partition_key_start(),
+                       tablet->metadata().state().pb.partition().partition_key_start(),
                        tablet, &old)) {
     VLOG(1) << "Replaced tablet " << old->tablet_id() << " with " << tablet->tablet_id();
     // TODO: can we assert that the replaced tablet is not in Running state?