You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@kudu.apache.org by ad...@apache.org on 2018/10/23 19:09:27 UTC
kudu git commit: catalog_manager: adjust maximum tablet count at
table creation time
Repository: kudu
Updated Branches:
refs/heads/master 7d65f495d -> b22138783
catalog_manager: adjust maximum tablet count at table creation time
The maximum tablet check introduced in commit 64983c454 was incomplete in
that it only considered the number of _tablets_ rather than the number of
_replicas_, even though replica creation itself places load on a tserver
during table creation. This patch clarifies the intended behavior and
incorporates the replication factor of the table into the check.
Note: users who override --max_create_tablets_per_ts are in for a rude
surprise as the maximum size of their tables will be effectively cut in
three. Given that the parameter isn't tagged as "stable" though, I think
communicating this change in behavior via release note is sufficient.
Change-Id: I00d91baddb1591476a7be27cba043e6354558208
Reviewed-on: http://gerrit.cloudera.org:8080/11716
Reviewed-by: Dan Burkert <da...@apache.org>
Tested-by: Kudu Jenkins
Reviewed-by: Alexey Serbin <as...@cloudera.com>
Project: http://git-wip-us.apache.org/repos/asf/kudu/repo
Commit: http://git-wip-us.apache.org/repos/asf/kudu/commit/b2213878
Tree: http://git-wip-us.apache.org/repos/asf/kudu/tree/b2213878
Diff: http://git-wip-us.apache.org/repos/asf/kudu/diff/b2213878
Branch: refs/heads/master
Commit: b22138783b96927b689ea14dc3c96a311e643970
Parents: 7d65f49
Author: Adar Dembo <ad...@cloudera.com>
Authored: Wed Oct 17 15:37:31 2018 -0700
Committer: Adar Dembo <ad...@cloudera.com>
Committed: Tue Oct 23 19:09:14 2018 +0000
----------------------------------------------------------------------
docs/known_issues.adoc | 4 +-
src/kudu/client/client-test.cc | 10 ++++-
.../raft_consensus_stress-itest.cc | 8 +---
src/kudu/master/catalog_manager.cc | 44 +++++++++++++-------
4 files changed, 40 insertions(+), 26 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/kudu/blob/b2213878/docs/known_issues.adoc
----------------------------------------------------------------------
diff --git a/docs/known_issues.adoc b/docs/known_issues.adoc
index 687266e..83890be 100644
--- a/docs/known_issues.adoc
+++ b/docs/known_issues.adoc
@@ -139,8 +139,8 @@
* The maximum number of tablets per tablet server is 2000, post-replication,
but we recommend 1000 tablets or fewer per tablet server.
-* Maximum number of tablets per table for each tablet server is 60, post-replication,
- at table-creation time.
+* Maximum number of tablets per table for each tablet server is 60,
+ post-replication (assuming the default replication factor of 3), at table-creation time.
== Replication and Backup Limitations
http://git-wip-us.apache.org/repos/asf/kudu/blob/b2213878/src/kudu/client/client-test.cc
----------------------------------------------------------------------
diff --git a/src/kudu/client/client-test.cc b/src/kudu/client/client-test.cc
index cc4c9ab..efcdd68 100644
--- a/src/kudu/client/client-test.cc
+++ b/src/kudu/client/client-test.cc
@@ -4426,6 +4426,12 @@ TEST_F(ClientTest, TestCreateDuplicateTable) {
TEST_F(ClientTest, TestCreateTableWithTooManyTablets) {
FLAGS_max_create_tablets_per_ts = 1;
+ // Add two more tservers so that we can create a table with replication factor
+ // of 3 below.
+ ASSERT_OK(cluster_->AddTabletServer());
+ ASSERT_OK(cluster_->AddTabletServer());
+ ASSERT_OK(cluster_->WaitForTabletServerCount(3));
+
KuduPartialRow* split1 = schema_.NewRow();
ASSERT_OK(split1->SetInt32("key", 1));
@@ -4441,8 +4447,8 @@ TEST_F(ClientTest, TestCreateTableWithTooManyTablets) {
.Create();
ASSERT_TRUE(s.IsInvalidArgument());
ASSERT_STR_CONTAINS(s.ToString(),
- "the requested number of tablets is over the "
- "maximum permitted at creation time (1)");
+ "the requested number of tablet replicas is over the "
+ "maximum permitted at creation time (3)");
}
// Tests for too many replicas, too few replicas, even replica count, etc.
http://git-wip-us.apache.org/repos/asf/kudu/blob/b2213878/src/kudu/integration-tests/raft_consensus_stress-itest.cc
----------------------------------------------------------------------
diff --git a/src/kudu/integration-tests/raft_consensus_stress-itest.cc b/src/kudu/integration-tests/raft_consensus_stress-itest.cc
index 1531caa..fee4cfd 100644
--- a/src/kudu/integration-tests/raft_consensus_stress-itest.cc
+++ b/src/kudu/integration-tests/raft_consensus_stress-itest.cc
@@ -121,12 +121,6 @@ TEST_F(RaftConsensusStressITest, RemoveReplaceInCycle) {
const MonoDelta kTimeout = MonoDelta::FromSeconds(60 * kBuildCfgFactor);
const MonoDelta kShortTimeout = MonoDelta::FromSeconds(1 * kBuildCfgFactor);
- // Translate the replicas-per-server parameter into the catalog manager's
- // --max_create_tablets_per_ts flag. Current logic in the catalog manager
- // does not take into account the replication factor, that's why the
- // additional division by kReplicationFactor.
- const int kMaxCreateTabletsPerTs = kNumTablets / kReplicationFactor;
-
// This test scenario induces a lot of faults/errors and it runs multiple
// iterations. Tablet servers and master are too chatty in this case, logging
// a lot of information. Setting --minloglevel=2 allow for logging only of
@@ -134,7 +128,7 @@ TEST_F(RaftConsensusStressITest, RemoveReplaceInCycle) {
const vector<string> kMasterFlags = {
Substitute("--minloglevel=$0", FLAGS_test_minloglevel),
Substitute("--raft_prepare_replacement_before_eviction=$0", is_343_scheme),
- Substitute("--max_create_tablets_per_ts=$0", kMaxCreateTabletsPerTs),
+ Substitute("--max_create_tablets_per_ts=$0", kNumTablets),
};
const vector<string> kTserverFlags = {
Substitute("--minloglevel=$0", FLAGS_test_minloglevel),
http://git-wip-us.apache.org/repos/asf/kudu/blob/b2213878/src/kudu/master/catalog_manager.cc
----------------------------------------------------------------------
diff --git a/src/kudu/master/catalog_manager.cc b/src/kudu/master/catalog_manager.cc
index bffb4bc..6cb52df 100644
--- a/src/kudu/master/catalog_manager.cc
+++ b/src/kudu/master/catalog_manager.cc
@@ -191,8 +191,9 @@ DEFINE_int32(catalog_manager_bg_task_wait_ms, 1000,
"between runs");
TAG_FLAG(catalog_manager_bg_task_wait_ms, hidden);
-DEFINE_int32(max_create_tablets_per_ts, 20,
- "The number of tablets per TS that can be requested for a new table.");
+DEFINE_int32(max_create_tablets_per_ts, 60,
+ "The number of tablet replicas per TS that can be requested for a "
+ "new table. If 0, no limit is enforced.");
TAG_FLAG(max_create_tablets_per_ts, advanced);
DEFINE_int32(master_failover_catchup_timeout_ms, 30 * 1000, // 30 sec
@@ -1472,22 +1473,11 @@ Status CatalogManager::CreateTable(const CreateTableRequestPB* orig_req,
resp, MasterErrorPB::ILLEGAL_REPLICATION_FACTOR);
}
- // Verify that the total number of tablets is reasonable, relative to the number
- // of live tablet servers.
+ // Verify that the number of replicas isn't larger than the number of live tablet
+ // servers.
TSDescriptorVector ts_descs;
master_->ts_manager()->GetAllLiveDescriptors(&ts_descs);
const auto num_live_tservers = ts_descs.size();
- const auto max_tablets = FLAGS_max_create_tablets_per_ts * num_live_tservers;
- if (num_replicas > 1 && max_tablets > 0 && partitions.size() > max_tablets) {
- return SetupError(Status::InvalidArgument(Substitute(
- "the requested number of tablets is over the maximum permitted at creation time ($0), "
- "additional tablets may be added by adding range partitions to the table post-creation",
- max_tablets)),
- resp, MasterErrorPB::TOO_MANY_TABLETS);
- }
-
- // Verify that the number of replicas isn't larger than the number of live tablet
- // servers.
if (FLAGS_catalog_manager_check_ts_count_for_create_table && num_replicas > num_live_tservers) {
// Note: this error message is matched against in master-stress-test.
return SetupError(Status::InvalidArgument(Substitute(
@@ -1496,6 +1486,30 @@ Status CatalogManager::CreateTable(const CreateTableRequestPB* orig_req,
resp, MasterErrorPB::REPLICATION_FACTOR_TOO_HIGH);
}
+ // Verify that the total number of replicas is reasonable.
+ //
+ // Table creation can generate a fair amount of load, both in the form of RPC
+ // traffic (due to Raft leader elections) and disk I/O (due to durably writing
+ // several files during both replica creation and leader elections).
+ //
+ // Ideally we would have more effective ways of mitigating this load (such
+ // as more efficient on-disk metadata management), but in lieu of that, we
+ // employ this coarse-grained check that prohibits up-front creation of too
+ // many replicas.
+ //
+ // Note: non-replicated tables are exempt because, by not using replication,
+ // they do not generate much of the load described above.
+ const auto max_replicas_total = FLAGS_max_create_tablets_per_ts * num_live_tservers;
+ if (num_replicas > 1 &&
+ max_replicas_total > 0 &&
+ partitions.size() * num_replicas > max_replicas_total) {
+ return SetupError(Status::InvalidArgument(Substitute(
+ "the requested number of tablet replicas is over the maximum permitted "
+ "at creation time ($0), additional tablets may be added by adding "
+ "range partitions to the table post-creation", max_replicas_total)),
+ resp, MasterErrorPB::TOO_MANY_TABLETS);
+ }
+
// Warn if the number of live tablet servers is not enough to re-replicate
// a failed replica of the tablet.
const auto num_ts_needed_for_rereplication =