You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@kudu.apache.org by ad...@apache.org on 2016/08/01 22:11:45 UTC

[8/9] kudu git commit: master: do not delete unknown tablets

master: do not delete unknown tablets

Quoting from docs/design-docs/multi-master-1.0.md:

"The master and/or tserver must enforce that all actions take effect
iff they were sent by the master that is currently the leader.

After an exhaustive audit of all master state changes (see appendix A), it
was determined that the current protection mechanisms built into each RPC
are sufficient to provide fencing. The one exception is orphaned replica
deletion done in response to a heartbeat. To protect against that, true
orphans (i.e. tablets for which no persistent record exists) will not be
deleted at all. As the master retains deleted table/tablet metadata in
perpetuity, this should ensure that true orphans appear only under drastic
circumstances, such as a tserver that heartbeats to the wrong cluster."

The new test isn't ideal in that it must wait some time to allow the tserver
to receive an RPC from the master, but on my laptop it does fail without the
fix, and it should fail fairly often in other machines/environments too.

Change-Id: I331f2d5bb06c38daa7b09854dbb24a7881723551
Reviewed-on: http://gerrit.cloudera.org:8080/3645
Tested-by: Kudu Jenkins
Reviewed-by: Todd Lipcon <to...@apache.org>


Project: http://git-wip-us.apache.org/repos/asf/kudu/repo
Commit: http://git-wip-us.apache.org/repos/asf/kudu/commit/42b65b03
Tree: http://git-wip-us.apache.org/repos/asf/kudu/tree/42b65b03
Diff: http://git-wip-us.apache.org/repos/asf/kudu/diff/42b65b03

Branch: refs/heads/master
Commit: 42b65b039a66b0d9ee191c8228a2463966d3b282
Parents: d648141
Author: Adar Dembo <ad...@cloudera.com>
Authored: Wed Jul 13 10:18:42 2016 -0700
Committer: Todd Lipcon <to...@apache.org>
Committed: Mon Aug 1 21:17:09 2016 +0000

----------------------------------------------------------------------
 .../integration-tests/create-table-itest.cc     |  1 -
 src/kudu/integration-tests/delete_table-test.cc | 51 +++++++++++++++++++-
 src/kudu/master/catalog_manager.cc              | 43 +++++++++++------
 3 files changed, 78 insertions(+), 17 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/kudu/blob/42b65b03/src/kudu/integration-tests/create-table-itest.cc
----------------------------------------------------------------------
diff --git a/src/kudu/integration-tests/create-table-itest.cc b/src/kudu/integration-tests/create-table-itest.cc
index 73a67cd..2164a18 100644
--- a/src/kudu/integration-tests/create-table-itest.cc
+++ b/src/kudu/integration-tests/create-table-itest.cc
@@ -35,7 +35,6 @@ using std::vector;
 
 METRIC_DECLARE_entity(server);
 METRIC_DECLARE_histogram(handler_latency_kudu_tserver_TabletServerAdminService_CreateTablet);
-METRIC_DECLARE_histogram(handler_latency_kudu_tserver_TabletServerAdminService_DeleteTablet);
 
 namespace kudu {
 

http://git-wip-us.apache.org/repos/asf/kudu/blob/42b65b03/src/kudu/integration-tests/delete_table-test.cc
----------------------------------------------------------------------
diff --git a/src/kudu/integration-tests/delete_table-test.cc b/src/kudu/integration-tests/delete_table-test.cc
index d1fcbc3..6a0d349 100644
--- a/src/kudu/integration-tests/delete_table-test.cc
+++ b/src/kudu/integration-tests/delete_table-test.cc
@@ -33,6 +33,7 @@
 #include "kudu/tablet/tablet.pb.h"
 #include "kudu/tserver/tserver.pb.h"
 #include "kudu/util/curl_util.h"
+#include "kudu/util/metrics.h"
 #include "kudu/util/subprocess.h"
 
 using kudu::client::KuduClient;
@@ -55,10 +56,14 @@ using kudu::tserver::ListTabletsResponsePB;
 using kudu::tserver::TabletServerErrorPB;
 using std::numeric_limits;
 using std::string;
+using std::unique_ptr;
 using std::unordered_map;
 using std::vector;
 using strings::Substitute;
 
+METRIC_DECLARE_entity(server);
+METRIC_DECLARE_histogram(handler_latency_kudu_tserver_TabletServerAdminService_DeleteTablet);
+
 namespace kudu {
 
 class DeleteTableTest : public ExternalMiniClusterITestBase {
@@ -853,8 +858,7 @@ int PrintOpenTabletFiles(pid_t pid, const string& tablet_id) {
 TEST_F(DeleteTableTest, TestFDsNotLeakedOnTabletTombstone) {
   const MonoDelta timeout = MonoDelta::FromSeconds(30);
 
-  vector<string> ts_flags, master_flags;
-  NO_FATALS(StartCluster(ts_flags, master_flags, 1));
+  NO_FATALS(StartCluster({}, {}, 1));
 
   // Create the table.
   TestWorkload workload(cluster_.get());
@@ -884,6 +888,49 @@ TEST_F(DeleteTableTest, TestFDsNotLeakedOnTabletTombstone) {
   ASSERT_EQ(0, PrintOpenTabletFiles(ets->pid(), tablet_id));
 }
 
+TEST_F(DeleteTableTest, TestUnknownTabletsAreNotDeleted) {
+  // Speed up heartbeating so that the unknown tablet is detected faster.
+  vector<string> extra_ts_flags = { "--heartbeat_interval_ms=10" };
+
+  NO_FATALS(StartCluster(extra_ts_flags, {}, 1));
+
+  Schema schema(GetSimpleTestSchema());
+  client::KuduSchema client_schema(client::KuduSchemaFromSchema(schema));
+  unique_ptr<KuduTableCreator> creator(client_->NewTableCreator());
+  ASSERT_OK(creator->table_name("test")
+      .schema(&client_schema)
+      .set_range_partition_columns({"key"})
+      .num_replicas(1)
+      .Create());
+
+  // Delete the master's metadata and start it back up. The tablet created
+  // above is now unknown, but should not be deleted!
+  cluster_->master()->Shutdown();
+  ASSERT_OK(env_->DeleteRecursively(cluster_->master()->data_dir()));
+  ASSERT_OK(cluster_->master()->Restart());
+  SleepFor(MonoDelta::FromSeconds(2));
+  int64_t num_delete_attempts;
+  ASSERT_OK(cluster_->tablet_server(0)->GetInt64Metric(
+      &METRIC_ENTITY_server, "kudu.tabletserver",
+      &METRIC_handler_latency_kudu_tserver_TabletServerAdminService_DeleteTablet,
+      "total_count", &num_delete_attempts));
+  ASSERT_EQ(0, num_delete_attempts);
+
+  // Now restart the master with orphan deletion enabled. The tablet should get
+  // deleted.
+  cluster_->master()->Shutdown();
+  cluster_->master()->mutable_flags()->push_back(
+      "--catalog_manager_delete_orphaned_tablets");
+  ASSERT_OK(cluster_->master()->Restart());
+  SleepFor(MonoDelta::FromSeconds(2));
+  ASSERT_OK(cluster_->tablet_server(0)->GetInt64Metric(
+      &METRIC_ENTITY_server, "kudu.tabletserver",
+      &METRIC_handler_latency_kudu_tserver_TabletServerAdminService_DeleteTablet,
+      "total_count", &num_delete_attempts));
+  ASSERT_EQ(1, num_delete_attempts);
+
+}
+
 // Parameterized test case for TABLET_DATA_DELETED deletions.
 class DeleteTableDeletedParamTest : public DeleteTableTest,
                                     public ::testing::WithParamInterface<const char*> {

http://git-wip-us.apache.org/repos/asf/kudu/blob/42b65b03/src/kudu/master/catalog_manager.cc
----------------------------------------------------------------------
diff --git a/src/kudu/master/catalog_manager.cc b/src/kudu/master/catalog_manager.cc
index 9cd0364..7d58038 100644
--- a/src/kudu/master/catalog_manager.cc
+++ b/src/kudu/master/catalog_manager.cc
@@ -91,7 +91,7 @@
 #include "kudu/util/trace.h"
 
 DEFINE_int32(master_ts_rpc_timeout_ms, 30 * 1000, // 30 sec
-             "Timeout used for the Master->TS async rpc calls.");
+             "Timeout used for the master->TS async rpc calls.");
 TAG_FLAG(master_ts_rpc_timeout_ms, advanced);
 
 DEFINE_int32(tablet_creation_timeout_ms, 30 * 1000, // 30 sec
@@ -132,7 +132,7 @@ TAG_FLAG(master_failover_catchup_timeout_ms, advanced);
 TAG_FLAG(master_failover_catchup_timeout_ms, experimental);
 
 DEFINE_bool(master_tombstone_evicted_tablet_replicas, true,
-            "Whether the Master should tombstone (delete) tablet replicas that "
+            "Whether the master should tombstone (delete) tablet replicas that "
             "are no longer part of the latest reported raft config.");
 TAG_FLAG(master_tombstone_evicted_tablet_replicas, hidden);
 
@@ -158,6 +158,14 @@ DEFINE_bool(catalog_manager_fail_ts_rpcs, false,
 TAG_FLAG(catalog_manager_fail_ts_rpcs, hidden);
 TAG_FLAG(catalog_manager_fail_ts_rpcs, runtime);
 
+DEFINE_bool(catalog_manager_delete_orphaned_tablets, false,
+            "Whether the master should delete tablets reported by tablet "
+            "servers for which there are no corresponding records in the "
+            "master's metadata. Use this option with care; it may cause "
+            "permanent tablet data loss under specific (and rare) cases of "
+            "master failures!");
+TAG_FLAG(catalog_manager_delete_orphaned_tablets, advanced);
+
 using std::pair;
 using std::shared_ptr;
 using std::string;
@@ -1793,20 +1801,27 @@ Status CatalogManager::HandleReportedTablet(TSDescriptor* ts_desc,
     tablet = FindPtrOrNull(tablet_map_, report.tablet_id());
   }
   if (!tablet) {
-    LOG(INFO) << "Got report from unknown tablet " << report.tablet_id()
-              << ": Sending delete request for this orphan tablet";
-    SendDeleteReplicaRequest(report.tablet_id(), TABLET_DATA_DELETED,
-                             boost::none, nullptr, ts_desc->permanent_uuid(),
-                             "Report from unknown tablet");
-    return Status::OK();
-  }
-  if (!tablet->table()) {
-    LOG(INFO) << "Got report from an orphaned tablet " << report.tablet_id();
-    SendDeleteReplicaRequest(report.tablet_id(), TABLET_DATA_DELETED,
-                             boost::none, nullptr, ts_desc->permanent_uuid(),
-                             "Report from an orphaned tablet");
+    // It'd be unsafe to ask the tserver to delete this tablet without first
+    // replicating something to our followers (i.e. to guarantee that we're the
+    // leader). For example, if we were a rogue master, we might be deleting a
+    // tablet created by a new master accidentally. But masters retain metadata
+    // for deleted tablets forever, so a tablet can only be truly unknown in
+    // the event of a serious misconfiguration, such as a tserver heartbeating
+    // to the wrong cluster. Therefore, it should be reasonable to ignore it
+    // and wait for an operator fix the situation.
+    if (FLAGS_catalog_manager_delete_orphaned_tablets) {
+      LOG(INFO) << "Deleting unknown tablet " << report.tablet_id();
+      SendDeleteReplicaRequest(report.tablet_id(), TABLET_DATA_DELETED,
+                               boost::none, nullptr, ts_desc->permanent_uuid(),
+                               "Report from unknown tablet");
+    } else {
+      LOG(WARNING) << "Ignoring report from unknown tablet: "
+                   << report.tablet_id();
+    }
     return Status::OK();
   }
+  DCHECK(tablet->table()); // guaranteed by TabletLoader
+
   VLOG(3) << "tablet report: " << report.ShortDebugString();
 
   // TODO: we don't actually need to do the COW here until we see we're going