You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@kudu.apache.org by ad...@apache.org on 2018/10/23 19:09:27 UTC

kudu git commit: catalog_manager: adjust maximum tablet count at table creation time

Repository: kudu
Updated Branches:
  refs/heads/master 7d65f495d -> b22138783


catalog_manager: adjust maximum tablet count at table creation time

The maximum tablet check introduced in commit 64983c454 was incomplete in
that it only considered the number of _tablets_ rather than the number of
_replicas_, even though replica creation itself places load on a tserver
during table creation. This patch clarifies the intended behavior and
incorporates the replication factor of the table into the check.

Note: users who override --max_create_tablets_per_ts are in for a rude
surprise as the maximum size of their tables will be effectively cut in
three. Given that the parameter isn't tagged as "stable" though, I think
communicating this change in behavior via release note is sufficient.

Change-Id: I00d91baddb1591476a7be27cba043e6354558208
Reviewed-on: http://gerrit.cloudera.org:8080/11716
Reviewed-by: Dan Burkert <da...@apache.org>
Tested-by: Kudu Jenkins
Reviewed-by: Alexey Serbin <as...@cloudera.com>


Project: http://git-wip-us.apache.org/repos/asf/kudu/repo
Commit: http://git-wip-us.apache.org/repos/asf/kudu/commit/b2213878
Tree: http://git-wip-us.apache.org/repos/asf/kudu/tree/b2213878
Diff: http://git-wip-us.apache.org/repos/asf/kudu/diff/b2213878

Branch: refs/heads/master
Commit: b22138783b96927b689ea14dc3c96a311e643970
Parents: 7d65f49
Author: Adar Dembo <ad...@cloudera.com>
Authored: Wed Oct 17 15:37:31 2018 -0700
Committer: Adar Dembo <ad...@cloudera.com>
Committed: Tue Oct 23 19:09:14 2018 +0000

----------------------------------------------------------------------
 docs/known_issues.adoc                          |  4 +-
 src/kudu/client/client-test.cc                  | 10 ++++-
 .../raft_consensus_stress-itest.cc              |  8 +---
 src/kudu/master/catalog_manager.cc              | 44 +++++++++++++-------
 4 files changed, 40 insertions(+), 26 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/kudu/blob/b2213878/docs/known_issues.adoc
----------------------------------------------------------------------
diff --git a/docs/known_issues.adoc b/docs/known_issues.adoc
index 687266e..83890be 100644
--- a/docs/known_issues.adoc
+++ b/docs/known_issues.adoc
@@ -139,8 +139,8 @@
 * The maximum number of tablets per tablet server is 2000, post-replication,
   but we recommend 1000 tablets or fewer per tablet server.
 
-* Maximum number of tablets per table for each tablet server is 60, post-replication,
-  at table-creation time.
+* Maximum number of tablets per table for each tablet server is 60,
+  post-replication (assuming the default replication factor of 3), at table-creation time.
 
 == Replication and Backup Limitations
 

http://git-wip-us.apache.org/repos/asf/kudu/blob/b2213878/src/kudu/client/client-test.cc
----------------------------------------------------------------------
diff --git a/src/kudu/client/client-test.cc b/src/kudu/client/client-test.cc
index cc4c9ab..efcdd68 100644
--- a/src/kudu/client/client-test.cc
+++ b/src/kudu/client/client-test.cc
@@ -4426,6 +4426,12 @@ TEST_F(ClientTest, TestCreateDuplicateTable) {
 TEST_F(ClientTest, TestCreateTableWithTooManyTablets) {
   FLAGS_max_create_tablets_per_ts = 1;
 
+  // Add two more tservers so that we can create a table with replication factor
+  // of 3 below.
+  ASSERT_OK(cluster_->AddTabletServer());
+  ASSERT_OK(cluster_->AddTabletServer());
+  ASSERT_OK(cluster_->WaitForTabletServerCount(3));
+
   KuduPartialRow* split1 = schema_.NewRow();
   ASSERT_OK(split1->SetInt32("key", 1));
 
@@ -4441,8 +4447,8 @@ TEST_F(ClientTest, TestCreateTableWithTooManyTablets) {
       .Create();
   ASSERT_TRUE(s.IsInvalidArgument());
   ASSERT_STR_CONTAINS(s.ToString(),
-                      "the requested number of tablets is over the "
-                      "maximum permitted at creation time (1)");
+                      "the requested number of tablet replicas is over the "
+                      "maximum permitted at creation time (3)");
 }
 
 // Tests for too many replicas, too few replicas, even replica count, etc.

http://git-wip-us.apache.org/repos/asf/kudu/blob/b2213878/src/kudu/integration-tests/raft_consensus_stress-itest.cc
----------------------------------------------------------------------
diff --git a/src/kudu/integration-tests/raft_consensus_stress-itest.cc b/src/kudu/integration-tests/raft_consensus_stress-itest.cc
index 1531caa..fee4cfd 100644
--- a/src/kudu/integration-tests/raft_consensus_stress-itest.cc
+++ b/src/kudu/integration-tests/raft_consensus_stress-itest.cc
@@ -121,12 +121,6 @@ TEST_F(RaftConsensusStressITest, RemoveReplaceInCycle) {
   const MonoDelta kTimeout = MonoDelta::FromSeconds(60 * kBuildCfgFactor);
   const MonoDelta kShortTimeout = MonoDelta::FromSeconds(1 * kBuildCfgFactor);
 
-  // Translate the replicas-per-server parameter into the catalog manager's
-  // --max_create_tablets_per_ts flag. Current logic in the catalog manager
-  // does not take into account the replication factor, that's why the
-  // additional division by kReplicationFactor.
-  const int kMaxCreateTabletsPerTs = kNumTablets / kReplicationFactor;
-
   // This test scenario induces a lot of faults/errors and it runs multiple
   // iterations. Tablet servers and master are too chatty in this case, logging
   // a lot of information. Setting --minloglevel=2 allow for logging only of
@@ -134,7 +128,7 @@ TEST_F(RaftConsensusStressITest, RemoveReplaceInCycle) {
   const vector<string> kMasterFlags = {
     Substitute("--minloglevel=$0", FLAGS_test_minloglevel),
     Substitute("--raft_prepare_replacement_before_eviction=$0", is_343_scheme),
-    Substitute("--max_create_tablets_per_ts=$0", kMaxCreateTabletsPerTs),
+    Substitute("--max_create_tablets_per_ts=$0", kNumTablets),
   };
   const vector<string> kTserverFlags = {
     Substitute("--minloglevel=$0", FLAGS_test_minloglevel),

http://git-wip-us.apache.org/repos/asf/kudu/blob/b2213878/src/kudu/master/catalog_manager.cc
----------------------------------------------------------------------
diff --git a/src/kudu/master/catalog_manager.cc b/src/kudu/master/catalog_manager.cc
index bffb4bc..6cb52df 100644
--- a/src/kudu/master/catalog_manager.cc
+++ b/src/kudu/master/catalog_manager.cc
@@ -191,8 +191,9 @@ DEFINE_int32(catalog_manager_bg_task_wait_ms, 1000,
              "between runs");
 TAG_FLAG(catalog_manager_bg_task_wait_ms, hidden);
 
-DEFINE_int32(max_create_tablets_per_ts, 20,
-             "The number of tablets per TS that can be requested for a new table.");
+DEFINE_int32(max_create_tablets_per_ts, 60,
+             "The number of tablet replicas per TS that can be requested for a "
+             "new table. If 0, no limit is enforced.");
 TAG_FLAG(max_create_tablets_per_ts, advanced);
 
 DEFINE_int32(master_failover_catchup_timeout_ms, 30 * 1000, // 30 sec
@@ -1472,22 +1473,11 @@ Status CatalogManager::CreateTable(const CreateTableRequestPB* orig_req,
         resp, MasterErrorPB::ILLEGAL_REPLICATION_FACTOR);
   }
 
-  // Verify that the total number of tablets is reasonable, relative to the number
-  // of live tablet servers.
+  // Verify that the number of replicas isn't larger than the number of live tablet
+  // servers.
   TSDescriptorVector ts_descs;
   master_->ts_manager()->GetAllLiveDescriptors(&ts_descs);
   const auto num_live_tservers = ts_descs.size();
-  const auto max_tablets = FLAGS_max_create_tablets_per_ts * num_live_tservers;
-  if (num_replicas > 1 && max_tablets > 0 && partitions.size() > max_tablets) {
-    return SetupError(Status::InvalidArgument(Substitute(
-            "the requested number of tablets is over the maximum permitted at creation time ($0), "
-            "additional tablets may be added by adding range partitions to the table post-creation",
-            max_tablets)),
-        resp, MasterErrorPB::TOO_MANY_TABLETS);
-  }
-
-  // Verify that the number of replicas isn't larger than the number of live tablet
-  // servers.
   if (FLAGS_catalog_manager_check_ts_count_for_create_table && num_replicas > num_live_tservers) {
     // Note: this error message is matched against in master-stress-test.
     return SetupError(Status::InvalidArgument(Substitute(
@@ -1496,6 +1486,30 @@ Status CatalogManager::CreateTable(const CreateTableRequestPB* orig_req,
         resp, MasterErrorPB::REPLICATION_FACTOR_TOO_HIGH);
   }
 
+  // Verify that the total number of replicas is reasonable.
+  //
+  // Table creation can generate a fair amount of load, both in the form of RPC
+  // traffic (due to Raft leader elections) and disk I/O (due to durably writing
+  // several files during both replica creation and leader elections).
+  //
+  // Ideally we would have more effective ways of mitigating this load (such
+  // as more efficient on-disk metadata management), but in lieu of that, we
+  // employ this coarse-grained check that prohibits up-front creation of too
+  // many replicas.
+  //
+  // Note: non-replicated tables are exempt because, by not using replication,
+  // they do not generate much of the load described above.
+  const auto max_replicas_total = FLAGS_max_create_tablets_per_ts * num_live_tservers;
+  if (num_replicas > 1 &&
+      max_replicas_total > 0 &&
+      partitions.size() * num_replicas > max_replicas_total) {
+    return SetupError(Status::InvalidArgument(Substitute(
+        "the requested number of tablet replicas is over the maximum permitted "
+        "at creation time ($0), additional tablets may be added by adding "
+        "range partitions to the table post-creation", max_replicas_total)),
+                      resp, MasterErrorPB::TOO_MANY_TABLETS);
+  }
+
   // Warn if the number of live tablet servers is not enough to re-replicate
   // a failed replica of the tablet.
   const auto num_ts_needed_for_rereplication =