You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@kudu.apache.org by al...@apache.org on 2018/11/01 03:33:43 UTC

[2/2] kudu git commit: [rebalancer] location-aware rebalancer (part 8/n)

[rebalancer] location-aware rebalancer (part 8/n)

Updated FindBestReplicaToReplace() to handle common and edge cases
of even replication factors.  Added corresponding unit tests as well.

Change-Id: I8f8831d254b2ca0d9a12e0ffbc336a59c3c5c8de
Reviewed-on: http://gerrit.cloudera.org:8080/11761
Tested-by: Kudu Jenkins
Reviewed-by: Will Berkeley <wd...@gmail.com>


Project: http://git-wip-us.apache.org/repos/asf/kudu/repo
Commit: http://git-wip-us.apache.org/repos/asf/kudu/commit/4ec2598a
Tree: http://git-wip-us.apache.org/repos/asf/kudu/tree/4ec2598a
Diff: http://git-wip-us.apache.org/repos/asf/kudu/diff/4ec2598a

Branch: refs/heads/master
Commit: 4ec2598a355bbbd1c00c4bdc03d37bafe04d0d5f
Parents: 81bba24
Author: Alexey Serbin <as...@cloudera.com>
Authored: Tue Oct 23 17:50:10 2018 -0700
Committer: Alexey Serbin <as...@cloudera.com>
Committed: Thu Nov 1 03:32:50 2018 +0000

----------------------------------------------------------------------
 src/kudu/tools/placement_policy_util-test.cc | 194 ++++++++++++++++++++++
 src/kudu/tools/placement_policy_util.cc      |  31 +++-
 2 files changed, 222 insertions(+), 3 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/kudu/blob/4ec2598a/src/kudu/tools/placement_policy_util-test.cc
----------------------------------------------------------------------
diff --git a/src/kudu/tools/placement_policy_util-test.cc b/src/kudu/tools/placement_policy_util-test.cc
index 68266f3..7d48cbe 100644
--- a/src/kudu/tools/placement_policy_util-test.cc
+++ b/src/kudu/tools/placement_policy_util-test.cc
@@ -470,6 +470,24 @@ TEST_F(ClusterLocationTest, NoCandidateMovesToFixPolicyViolations) {
       { { "t0", "L0" }, },
       {},
     },
+    {
+      // One RF=6 tablet with replica placement violating the placement policy.
+      {
+        { "T0", 6, { "t0", } },
+      },
+      {
+        { "L0", { "A", "B", "C", } },
+        { "L1", { "D", "E", } },
+        { "L2", { "F", } },
+      },
+      {
+        { "A", { "t0", } }, { "B", { "t0", } }, { "C", { "t0", } },
+        { "D", { "t0", } }, { "E", { "t0", } },
+        { "F", { "t0", } },
+      },
+      { { "t0", "L0" }, },
+      {}
+    },
   };
   for (auto idx = 0; idx < configs.size(); ++idx) {
     SCOPED_TRACE(Substitute("test config index: $0", idx));
@@ -490,5 +508,181 @@ TEST_F(ClusterLocationTest, NoCandidateMovesToFixPolicyViolations) {
   }
 }
 
+TEST_F(ClusterLocationTest, PlacementPolicyViolationsEvenRFEdgeCases) {
+  const vector<TestClusterConfig> configs = {
+    {
+      // One location, RF=2 and RF=4.
+      {
+        { "T0", 2, { "t0", } },
+        { "T1", 4, { "t1", } },
+      },
+      {
+        { "L0", { "A", "B", "C", "D", "E", } },
+      },
+      {
+        { "A", { "t0", "t1", } },
+        { "B", { "t0", "t1", } },
+        { "C", { "t1", } },
+        { "D", { "t1", } },
+      },
+      { { "t0", "L0" }, { "t1", "L0" }, },
+      {}
+    },
+    {
+      // Two locations, RF=2.
+      {
+        { "T0", 2, { "t0", "t1", } },
+      },
+      {
+        { "L0", { "A", "B", } },
+        { "L1", { "D", "E", } },
+      },
+      {
+        { "A", { "t0", } }, { "B", { "t0", } },
+        { "D", { "t1", } }, { "E", { "t1", } },
+      },
+      { { "t0", "L0" }, { "t1", "L1" }, },
+      {}
+    },
+    {
+      // Two locations, RF=2 and RF=4.
+      {
+        { "T0", 2, { "t0", } },
+        { "T1", 4, { "t1", } },
+      },
+      {
+        { "L0", { "A", "B", "C", } },
+        { "L1", { "D", "E", "F", } },
+      },
+      {
+        { "A", { "t0", "t1", } }, { "B", { "t0", "t1", } },
+        { "D", { "t1", } }, { "E", { "t1", } },
+      },
+      { { "t0", "L0" }, { "t1", "L1" }, },
+      {}
+    },
+    {
+      // Two locations, two tablets, RF=2.
+      {
+        { "T0", 2, { "t0", "t1", } },
+      },
+      {
+        { "L0", { "A", "B", } },
+        { "L1", { "C", "D", } },
+      },
+      {
+        { "A", { "t0", } }, { "B", { "t0", } },
+        { "C", { "t1", } }, { "D", { "t1", } },
+      },
+      { { "t0", "L0" }, { "t1", "L1" }, },
+      {}
+    },
+    {
+      // Three locations, RF=4.
+      {
+        { "T0", 4, { "t0", } },
+      },
+      {
+        { "L0", { "A", "B", } },
+        { "L1", { "D", "E", } },
+        { "L2", { "F", "G", } },
+      },
+      {
+        { "A", { "t0", } }, { "B", { "t0", } },
+        { "D", { "t0", } },
+        { "F", { "t0", } },
+      },
+      { { "t0", "L0" }, },
+      {}
+    },
+  };
+  NO_FATALS(RunTest(configs));
+}
+
+TEST_F(ClusterLocationTest, PlacementPolicyViolationsEvenRF) {
+  const vector<TestClusterConfig> configs = {
+    {
+      // Three locations, RF=6.
+      {
+        { "T0", 6, { "t0", } },
+      },
+      {
+        { "L0", { "A", "B", "C", } },
+        { "L1", { "D", "E", "F", } },
+        { "L2", { "G", "H", } },
+      },
+      {
+        { "A", { "t0", } }, { "B", { "t0", } }, { "C", { "t0", } },
+        { "D", { "t0", } }, { "F", { "t0", } },
+        { "H", { "t0", } },
+      },
+      { { "t0", "L0" }, },
+      { { "t0", "B" }, }
+    },
+    {
+      // Three locations, RF=8.
+      {
+        { "T0", 8, { "t0", } },
+      },
+      {
+        { "L0", { "A", "B", "C", } },
+        { "L1", { "D", "E", "F", "G", } },
+        { "L2", { "H", "J", } },
+      },
+      {
+        { "A", { "t0", } }, { "B", { "t0", } }, { "C", { "t0", } },
+        { "D", { "t0", } }, { "E", { "t0", } }, { "F", { "t0", } },
+        { "G", { "t0", } },
+        { "H", { "t0", } },
+      },
+      { { "t0", "L1" }, },
+      { { "t0", "D" }, }
+    },
+  };
+  NO_FATALS(RunTest(configs));
+}
+
+TEST_F(ClusterLocationTest, PlacementPolicyViolationsNoneEvenRF) {
+  const vector<TestClusterConfig> configs = {
+    {
+      // Three locations, RF=6.
+      {
+        { "T0", 6, { "t0", } },
+      },
+      {
+        { "L0", { "A", "B", "C", } },
+        { "L1", { "D", "E", "F", } },
+        { "L2", { "G", "H", } },
+      },
+      {
+        { "A", { "t0", } }, { "B", { "t0", } },
+        { "D", { "t0", } }, { "E", { "t0", } },
+        { "G", { "t0", } }, { "H", { "t0", } },
+      },
+      {},
+      {}
+    },
+    {
+      // Three locations, RF=8.
+      {
+        { "T0", 8, { "t0", } },
+      },
+      {
+        { "L0", { "A", "B", "C", } },
+        { "L1", { "D", "E", "F", } },
+        { "L2", { "G", "H", } },
+      },
+      {
+        { "A", { "t0", } }, { "B", { "t0", } }, { "C", { "t0", } },
+        { "D", { "t0", } }, { "E", { "t0", } }, { "F", { "t0", } },
+        { "G", { "t0", } }, { "H", { "t0", } },
+      },
+      {},
+      {}
+    },
+  };
+  NO_FATALS(RunTest(configs));
+}
+
 } // namespace tools
 } // namespace kudu

http://git-wip-us.apache.org/repos/asf/kudu/blob/4ec2598a/src/kudu/tools/placement_policy_util.cc
----------------------------------------------------------------------
diff --git a/src/kudu/tools/placement_policy_util.cc b/src/kudu/tools/placement_policy_util.cc
index d6494eb..be1b502 100644
--- a/src/kudu/tools/placement_policy_util.cc
+++ b/src/kudu/tools/placement_policy_util.cc
@@ -68,17 +68,42 @@ Status FindBestReplicaToReplace(
   }
 
   const auto& tablet_id = info.tablet_id;
+  const auto location_num = ts_id_by_location.size();
 
   // If a total number of locations is 2, it's impossible to make its replica
   // distribution conform with the placement policy constraints.
   const auto& table_id = FindOrDie(tablets_info.tablet_to_table_id, tablet_id);
   const auto& table_info = FindOrDie(tablets_info.tables_info, table_id);
-  if (ts_id_by_location.size() == 2 && table_info.replication_factor % 2 == 1) {
+
+  // There are a few edge cases which are most likely to occur, so let's have
+  // a special error message for those. In these cases there are too few
+  // locations relative to the replication factor, so it's impossible to find
+  // any replica movements to satisfy the placement policy constraints.
+  //
+  // One interesting case placing replicas of a tablet with RF=4 in a cluster
+  // with 3 locations. In that case, it's impossible to place the replicas to
+  // satisfy the placement policy's constraints, since any possible replicas
+  // placement does not allow to have the majority of the replicas online
+  // if any single location becomes unavailable. Below is the all the possible
+  // replica distributions for that case (modulo permutations of locations):
+  // if the first location becomes unavailable, the majority of the replicas
+  // is lost and the tablet becomes unavailable.
+  //
+  //   4 + 0 + 0
+  //   3 + 1 + 0
+  //   2 + 1 + 1
+  //
+  // Note that with 3 locations and higher replication factors (5, 6, etc.),
+  // there is always a way to place tablet replicas to conform with the
+  // restriction mentioned above.
+  if (location_num == 2 ||
+      (location_num == 3 && table_info.replication_factor == 4)) {
     return Status::ConfigurationError(Substitute(
         "tablet $0 (table name '$1'): replica distribution cannot conform "
         "with the placement policy constraints since its replication "
-        "factor is odd ($2) and there are two locations in the cluster",
-        tablet_id, table_info.name, table_info.replication_factor));
+        "factor is $2 and there are $3 locations in the cluster",
+        tablet_id, table_info.name,
+        table_info.replication_factor, location_num));
   }
 
   const auto& location = info.majority_location;