You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@kudu.apache.org by al...@apache.org on 2018/10/30 00:25:45 UTC

[1/2] kudu git commit: [rebalancer] location-aware rebalancer (part 5/n)

Repository: kudu
Updated Branches:
  refs/heads/master 34bb7f93b -> f731ea004


[rebalancer] location-aware rebalancer (part 5/n)

Added LocationBalancingAlgo and corresponding units tests.

Change-Id: I7ffff8446fec8b8f80b7c6112bdd9d53f3dbf506
Reviewed-on: http://gerrit.cloudera.org:8080/11746
Tested-by: Alexey Serbin <as...@cloudera.com>
Reviewed-by: Will Berkeley <wd...@gmail.com>


Project: http://git-wip-us.apache.org/repos/asf/kudu/repo
Commit: http://git-wip-us.apache.org/repos/asf/kudu/commit/87084c10
Tree: http://git-wip-us.apache.org/repos/asf/kudu/tree/87084c10
Diff: http://git-wip-us.apache.org/repos/asf/kudu/diff/87084c10

Branch: refs/heads/master
Commit: 87084c108e1836ebb7811eace93836ee872a253e
Parents: 34bb7f9
Author: Alexey Serbin <as...@cloudera.com>
Authored: Fri Oct 19 22:59:26 2018 -0700
Committer: Alexey Serbin <as...@cloudera.com>
Committed: Mon Oct 29 23:26:48 2018 +0000

----------------------------------------------------------------------
 src/kudu/tools/rebalance_algo-test.cc | 317 ++++++++++++++++++++++++++++-
 src/kudu/tools/rebalance_algo.cc      | 220 ++++++++++++++++++++
 src/kudu/tools/rebalance_algo.h       |  90 +++++++-
 3 files changed, 623 insertions(+), 4 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/kudu/blob/87084c10/src/kudu/tools/rebalance_algo-test.cc
----------------------------------------------------------------------
diff --git a/src/kudu/tools/rebalance_algo-test.cc b/src/kudu/tools/rebalance_algo-test.cc
index 212819f..3271339 100644
--- a/src/kudu/tools/rebalance_algo-test.cc
+++ b/src/kudu/tools/rebalance_algo-test.cc
@@ -25,6 +25,7 @@
 #include <memory>
 #include <set>
 #include <string>
+#include <unordered_map>
 #include <utility>
 #include <vector>
 
@@ -33,6 +34,7 @@
 #include <gtest/gtest.h>
 
 #include "kudu/gutil/macros.h"
+#include "kudu/gutil/map-util.h"
 #include "kudu/gutil/strings/substitute.h"
 #include "kudu/util/random.h"
 #include "kudu/util/status.h"
@@ -53,11 +55,20 @@ struct TestClusterConfig;
     } \
   } while (false)
 
+#define VERIFY_LOCATION_BALANCING_MOVES(test_config) \
+  do { \
+    for (auto idx = 0; idx < ARRAYSIZE((test_config)); ++idx) { \
+      SCOPED_TRACE(Substitute("test config index: $0", idx)); \
+      NO_FATALS(VerifyLocationRebalancingMoves((test_config)[idx])); \
+    } \
+  } while (false)
+
 using std::endl;
 using std::ostream;
 using std::ostringstream;
 using std::set;
 using std::string;
+using std::unordered_map;
 using std::vector;
 using strings::Substitute;
 
@@ -68,12 +79,19 @@ struct TablePerServerReplicas {
   const string table_id;
 
   // Number of replicas of this table on each server in the cluster.
+  // By definition, the indices in this container correspond to indices
+  // in TestClusterConfig::tserver_uuids.
   const vector<size_t> num_replicas_by_server;
 };
 
 // Structure to describe rebalancing-related state of the cluster expressively
 // enough for the tests.
 struct TestClusterConfig {
+  // Distribution of tablet servers by locations. If the map is empty, it's
+  // interpreted as if the cluster does not have any locations specified
+  // (i.e. all the tablet servers are all in the same unnamed location).
+  const unordered_map<string, set<string>> servers_by_location;
+
   // UUIDs of tablet servers; every element must be unique.
   const vector<string> tserver_uuids;
 
@@ -148,6 +166,17 @@ void ClusterConfigToClusterInfo(const TestClusterConfig& tcc,
     table_info_by_skew.emplace(max_count - min_count, std::move(info));
   }
 
+  // TODO(aserbin): add a consistency check on location-related fields.
+  auto& locality = result.locality;
+  locality.servers_by_location = tcc.servers_by_location;
+  for (const auto& elem : tcc.servers_by_location) {
+    const auto& location = elem.first;
+    const auto& server_ids = elem.second;
+    for (const auto& server_id : server_ids) {
+      EmplaceOrDie(&locality.location_by_ts_id, server_id, location);
+    }
+  }
+
   *cluster_info = std::move(result);
 }
 
@@ -163,6 +192,18 @@ void VerifyRebalancingMoves(const TestClusterConfig& cfg) {
   EXPECT_EQ(cfg.expected_moves, moves);
 }
 
+// Similar to VerifyRebalancingMoves(), but related to locations rebalancing.
+void VerifyLocationRebalancingMoves(const TestClusterConfig& cfg) {
+  vector<TableReplicaMove> moves;
+  {
+    ClusterInfo ci;
+    ClusterConfigToClusterInfo(cfg, &ci);
+    LocationBalancingAlgo algo;
+    ASSERT_OK(algo.GetNextMoves(ci, 0, &moves));
+  }
+  EXPECT_EQ(cfg.expected_moves, moves);
+}
+
 // Is 'cbi' balanced according to the two-dimensional greedy algorithm?
 bool IsBalanced(const ClusterBalanceInfo& cbi) {
   if (cbi.table_info_by_skew.empty()) {
@@ -223,7 +264,7 @@ TEST(RebalanceAlgoUnitTest, NoTableSkewInClusterBalanceInfoGetNextMoves) {
 
 // Test the behavior of the internal (non-public) algorithm's method
 // GetNextMove() when no input information is given.
-TEST(RebalanceAlgoUnitTest, EmptyClusterBalanceInfoGetNextMove) {
+TEST(RebalanceAlgoUnitTest, EmptyBalanceInfoGetNextMove) {
   boost::optional<TableReplicaMove> move;
   const ClusterInfo info;
   const auto s = TwoDimensionalGreedyAlgo().GetNextMove(info, &move);
@@ -231,6 +272,10 @@ TEST(RebalanceAlgoUnitTest, EmptyClusterBalanceInfoGetNextMove) {
   EXPECT_EQ(boost::none, move);
 }
 
+// Workaround for older libstdc++ (like on RH/CentOS 6). In case of newer
+// libstdc++/libc++ '{}' works as needed for an empty unordered map.
+static const decltype(TestClusterConfig::servers_by_location) kNoLocations;
+
 // Various scenarios of balanced configurations where no moves are expected
 // to happen.
 TEST(RebalanceAlgoUnitTest, AlreadyBalanced) {
@@ -238,6 +283,7 @@ TEST(RebalanceAlgoUnitTest, AlreadyBalanced) {
   const TestClusterConfig kConfigs[] = {
     {
       // A single tablet server with a single replica of the only table.
+      kNoLocations,
       { "0", },
       {
         { "A", { 1 } },
@@ -245,6 +291,7 @@ TEST(RebalanceAlgoUnitTest, AlreadyBalanced) {
     },
     {
       // A single tablet server in the cluster that hosts all replicas.
+      kNoLocations,
       { "0", },
       {
         { "A", { 1 } },
@@ -254,6 +301,7 @@ TEST(RebalanceAlgoUnitTest, AlreadyBalanced) {
     },
     {
       // Single table and 2 TS: 100 and 99 replicas at each.
+      kNoLocations,
       { "0", "1", },
       {
         { "A", { 100, 99, } },
@@ -261,6 +309,7 @@ TEST(RebalanceAlgoUnitTest, AlreadyBalanced) {
     },
     {
       // Table- and cluster-wise balanced configuration with one-off skew.
+      kNoLocations,
       { "0", "1", },
       {
         { "A", { 1, 1, } },
@@ -271,6 +320,7 @@ TEST(RebalanceAlgoUnitTest, AlreadyBalanced) {
       // A configuration which has zero skew cluster-wise, while the table-wise
       // balance has one-off skew: the algorithm should not try to correct
       // the latter.
+      kNoLocations,
       { "0", "1", },
       {
         { "A", { 1, 2, } },
@@ -280,6 +330,7 @@ TEST(RebalanceAlgoUnitTest, AlreadyBalanced) {
       },
     },
     {
+      kNoLocations,
       { "0", "1", "2", },
       {
         { "A", { 1, 0, 0, } },
@@ -290,6 +341,7 @@ TEST(RebalanceAlgoUnitTest, AlreadyBalanced) {
     {
       // A simple balanced case: 3 tablet servers, 3 tables with
       // one replica per server.
+      kNoLocations,
       { "0", "1", "2", },
       {
         { "A", { 1, 1, 1, } },
@@ -298,6 +350,7 @@ TEST(RebalanceAlgoUnitTest, AlreadyBalanced) {
       },
     },
     {
+      kNoLocations,
       { "0", "1", "2", },
       {
         { "A", { 0, 1, 1, } },
@@ -306,6 +359,7 @@ TEST(RebalanceAlgoUnitTest, AlreadyBalanced) {
       },
     },
     {
+      kNoLocations,
       { "0", "1", "2", },
       {
         { "A", { 2, 1, 1, } },
@@ -314,6 +368,7 @@ TEST(RebalanceAlgoUnitTest, AlreadyBalanced) {
       },
     },
     {
+      kNoLocations,
       { "0", "1", "2", },
       {
         { "A", { 1, 1, 0, } },
@@ -325,6 +380,7 @@ TEST(RebalanceAlgoUnitTest, AlreadyBalanced) {
       },
     },
     {
+      kNoLocations,
       { "0", "1", "2", },
       {
         { "A", { 1, 0, 1, } },
@@ -332,6 +388,7 @@ TEST(RebalanceAlgoUnitTest, AlreadyBalanced) {
       },
     },
     {
+      kNoLocations,
       { "0", "1", "2", },
       {
         { "B", { 1, 0, 1, } },
@@ -339,6 +396,7 @@ TEST(RebalanceAlgoUnitTest, AlreadyBalanced) {
       },
     },
     {
+      kNoLocations,
       { "0", "1", "2", },
       {
         { "A", { 2, 2, 1, } },
@@ -346,6 +404,7 @@ TEST(RebalanceAlgoUnitTest, AlreadyBalanced) {
       },
     },
     {
+      kNoLocations,
       { "0", "1", "2", },
       {
         { "A", { 2, 2, 1, } },
@@ -353,6 +412,7 @@ TEST(RebalanceAlgoUnitTest, AlreadyBalanced) {
       },
     },
     {
+      kNoLocations,
       { "0", "1", "2", },
       {
         { "A", { 2, 2, 1, } },
@@ -361,6 +421,7 @@ TEST(RebalanceAlgoUnitTest, AlreadyBalanced) {
       },
     },
     {
+      kNoLocations,
       { "0", "1", "2", },
       {
         { "A", { 0, 1, 0, } },
@@ -378,6 +439,7 @@ TEST(RebalanceAlgoUnitTest, AlreadyBalanced) {
 TEST(RebalanceAlgoUnitTest, TableWiseBalanced) {
   const TestClusterConfig kConfigs[] = {
     {
+      kNoLocations,
       { "0", "1", },
       {
         { "A", { 100, 99, } },
@@ -386,6 +448,7 @@ TEST(RebalanceAlgoUnitTest, TableWiseBalanced) {
       { { "A", "0", "1" }, }
     },
     {
+      kNoLocations,
       { "0", "1", },
       {
         { "A", { 1, 2, } },
@@ -396,6 +459,7 @@ TEST(RebalanceAlgoUnitTest, TableWiseBalanced) {
       { { "A", "1", "0" }, }
     },
     {
+      kNoLocations,
       { "0", "1", "2", },
       {
         { "A", { 1, 0, 0, } },
@@ -405,6 +469,7 @@ TEST(RebalanceAlgoUnitTest, TableWiseBalanced) {
       { { "A", "0", "2" }, }
     },
     {
+      kNoLocations,
       { "0", "1", "2", },
       {
         { "A", { 1, 1, 1, } },
@@ -414,6 +479,7 @@ TEST(RebalanceAlgoUnitTest, TableWiseBalanced) {
       { { "B", "2", "0" }, }
     },
     {
+      kNoLocations,
       { "0", "1", "2", },
       {
         { "A", { 1, 1, 0, } },
@@ -423,6 +489,7 @@ TEST(RebalanceAlgoUnitTest, TableWiseBalanced) {
       { { "B", "0", "1" }, }
     },
     {
+      kNoLocations,
       { "0", "1", "2", },
       {
         { "C", { 1, 0, 1, } },
@@ -442,6 +509,7 @@ TEST(RebalanceAlgoUnitTest, OneMoveNoCycling) {
   // that's why multiples of virtually same configuration.
   const TestClusterConfig kConfigs[] = {
     {
+      kNoLocations,
       { "0", "1", "2", },
       {
         { "A", { 1, 0, 1, } },
@@ -451,6 +519,7 @@ TEST(RebalanceAlgoUnitTest, OneMoveNoCycling) {
       { { "A", "0", "1" }, }
     },
     {
+      kNoLocations,
       { "0", "1", "2", },
       {
         { "A", { 1, 0, 1, } },
@@ -460,6 +529,7 @@ TEST(RebalanceAlgoUnitTest, OneMoveNoCycling) {
       { { "A", "0", "1" }, }
     },
     {
+      kNoLocations,
       { "0", "1", "2", },
       {
         { "B", { 1, 0, 1, } },
@@ -469,6 +539,7 @@ TEST(RebalanceAlgoUnitTest, OneMoveNoCycling) {
       { { "B", "0", "1" }, }
     },
     {
+      kNoLocations,
       { "0", "1", "2", },
       {
         { "B", { 1, 0, 1, } },
@@ -478,6 +549,7 @@ TEST(RebalanceAlgoUnitTest, OneMoveNoCycling) {
       { { "B", "0", "1" }, }
     },
     {
+      kNoLocations,
       { "0", "1", "2", },
       {
         { "C", { 1, 0, 1, } },
@@ -487,6 +559,7 @@ TEST(RebalanceAlgoUnitTest, OneMoveNoCycling) {
       { { "C", "0", "1" }, }
     },
     {
+      kNoLocations,
       { "0", "1", "2", },
       {
         { "C", { 1, 0, 1, } },
@@ -505,6 +578,7 @@ TEST(RebalanceAlgoUnitTest, OneMoveNoCycling) {
 TEST(RebalanceAlgoUnitTest, ClusterWiseBalanced) {
   const TestClusterConfig kConfigs[] = {
     {
+      kNoLocations,
       { "0", "1", },
       {
         { "A", { 2, 0, } },
@@ -515,6 +589,7 @@ TEST(RebalanceAlgoUnitTest, ClusterWiseBalanced) {
       }
     },
     {
+      kNoLocations,
       { "0", "1", },
       {
         { "A", { 1, 2, } },
@@ -527,6 +602,7 @@ TEST(RebalanceAlgoUnitTest, ClusterWiseBalanced) {
       }
     },
     {
+      kNoLocations,
       { "0", "1", "2", },
       {
         { "A", { 2, 1, 0, } },
@@ -538,6 +614,7 @@ TEST(RebalanceAlgoUnitTest, ClusterWiseBalanced) {
       }
     },
     {
+      kNoLocations,
       { "0", "1", "2", },
       {
         { "A", { 2, 1, 0, } },
@@ -558,6 +635,7 @@ TEST(RebalanceAlgoUnitTest, ClusterWiseBalanced) {
 TEST(RebalanceAlgoUnitTest, FewMoves) {
   const TestClusterConfig kConfigs[] = {
     {
+      kNoLocations,
       { "0", "1", },
       {
         { "A", { 2, 0, } },
@@ -565,6 +643,7 @@ TEST(RebalanceAlgoUnitTest, FewMoves) {
       { { "A", "0", "1" }, }
     },
     {
+      kNoLocations,
       { "0", "1", },
       {
         { "A", { 3, 0, } },
@@ -572,6 +651,7 @@ TEST(RebalanceAlgoUnitTest, FewMoves) {
       { { "A", "0", "1" }, }
     },
     {
+      kNoLocations,
       { "0", "1", },
       {
         { "A", { 4, 0, } },
@@ -582,6 +662,7 @@ TEST(RebalanceAlgoUnitTest, FewMoves) {
       }
     },
     {
+      kNoLocations,
       { "0", "1", },
       {
         { "A", { 1, 2, } },
@@ -593,6 +674,7 @@ TEST(RebalanceAlgoUnitTest, FewMoves) {
       }
     },
     {
+      kNoLocations,
       { "0", "1", },
       {
         { "A", { 4, 0, } },
@@ -605,6 +687,7 @@ TEST(RebalanceAlgoUnitTest, FewMoves) {
       }
     },
     {
+      kNoLocations,
       { "0", "1", "2", },
       {
         { "A", { 4, 2, 0, } },
@@ -618,6 +701,7 @@ TEST(RebalanceAlgoUnitTest, FewMoves) {
       }
     },
     {
+      kNoLocations,
       { "0", "1", "2", },
       {
         { "A", { 2, 1, 0, } },
@@ -631,6 +715,7 @@ TEST(RebalanceAlgoUnitTest, FewMoves) {
       }
     },
     {
+      kNoLocations,
       { "0", "1", "2", },
       {
         { "A", { 5, 1, 0, } },
@@ -642,6 +727,7 @@ TEST(RebalanceAlgoUnitTest, FewMoves) {
       }
     },
     {
+      kNoLocations,
       { "0", "1", "2", },
       {
         { "A", { 5, 1, 0, } },
@@ -664,6 +750,7 @@ TEST(RebalanceAlgoUnitTest, FewMoves) {
 // make them balanced moving many replicas around.
 TEST(RebalanceAlgoUnitTest, ManyMoves) {
   const TestClusterConfig kConfig = {
+    kNoLocations,
     { "0", "1", "2", },
     {
       { "A", { 100, 400, 100, } },
@@ -720,6 +807,7 @@ TEST(RebalanceAlgoUnitTest, RandomizedTest) {
       });
     }
     TestClusterConfig cfg{
+      kNoLocations,
       std::move(tserver_uuids),
       std::move(table_replicas),
       {}  // This tests checks achievement of balance, not the path to it.
@@ -740,11 +828,236 @@ TEST(RebalanceAlgoUnitTest, RandomizedTest) {
       while (!IsBalanced(ci.balance)) {
         ASSERT_OK(algo.GetNextMove(ci, &move));
         ASSERT_OK(TwoDimensionalGreedyAlgo::ApplyMove(*move, &ci.balance));
-        ASSERT_GE(num_moves_ub, ++num_moves) << "Too many moves! The algorithm is likely stuck";
+        ASSERT_GE(num_moves_ub, ++num_moves)
+            << "Too many moves! The algorithm is likely stuck";
       }
     }
   }
 }
 
+// Location-based rebalancing, the case of few moves because of slight (if any)
+// location load imbalance.
+TEST(RebalanceAlgoUnitTest, LocationBalancingFewMoves) {
+  const TestClusterConfig kConfigs[] = {
+    {
+      {
+        { "L0", { "0", "1", }, },
+        { "L1", { "2", }, },
+      },
+      { "0", "1", "2", },
+      { { "A", { 1, 0, 0, } }, },
+      {}
+    },
+    {
+      {
+        { "L0", { "0", "1", }, },
+        { "L1", { "2", }, },
+      },
+      { "0", "1", "2", },
+      { { "A", { 0, 0, 1, } }, },
+      {}
+    },
+    {
+      {
+        { "L0", { "0", "1", }, },
+        { "L1", { "2", }, },
+      },
+      { "0", "1", "2", },
+      { { "A", { 1, 1, 0, } }, },
+      {}
+    },
+    {
+      {
+        { "L0", { "0", "1", }, },
+        { "L1", { "2", }, },
+      },
+      { "0", "1", "2", },
+      { { "A", { 1, 1, 1, } }, },
+      {}
+    },
+    {
+      {
+        { "L0", { "0", "1", }, },
+        { "L1", { "2", }, },
+      },
+      { "0", "1", "2", },
+      { { "A", { 2, 1, 0, } }, },
+      { { "A", "0", "2" }, }
+    },
+    {
+      {
+        { "L0", { "0", "1", }, },
+        { "L1", { "2", }, },
+      },
+      { "0", "1", "2", },
+      { { "A", { 1, 1, 2, } }, },
+      {}
+    },
+    {
+      {
+        { "L0", { "0", "1", }, },
+        { "L1", { "2", }, },
+      },
+      { "0", "1", "2", },
+      { { "A", { 2, 1, 3, } }, },
+      { { "A", "2", "1" }, }
+    },
+    {
+      {
+        { "L0", { "0", "1", }, },
+        { "L1", { "2", }, },
+      },
+      { "0", "1", "2", },
+      { { "A", { 2, 4, 0, } }, },
+      {
+        { "A", "1", "2" },
+        { "A", "1", "2" },
+      }
+    },
+    {
+      {
+        { "L0", { "0", "1", }, },
+        { "L1", { "2", "3", "4", "5", }, },
+      },
+      { "0", "1", "2", "3", "4", "5" },
+      { { "A", { 1, 1, 1, 1, 1, 1, } }, },
+      {}
+    },
+    {
+      {
+        { "L0", { "0", "1", }, },
+        { "L1", { "2", "3", "4", "5", }, },
+      },
+      { "0", "1", "2", "3", "4", "5" },
+      { { "A", { 2, 0, 4, 0, 0, 0, } }, },
+      {}
+    },
+    {
+      {
+        { "L0", { "0", }, },
+        { "L1", { "1", "2", "3", "4", "5", }, },
+      },
+      { "0", "1", "2", "3", "4", "5", },
+      { { "A", { 0, 1, 1, 1, 1, 1, } }, },
+      {}
+    },
+    {
+      {
+        { "L0", { "0", }, },
+        { "L1", { "1", "2", "3", "4", "5", }, },
+      },
+      { "0", "1", "2", "3", "4", "5", },
+      { { "A", { 0, 5, 0, 0, 0, 0, } }, },
+      {}
+    },
+    {
+      {
+        { "L0", { "0", }, },
+        { "L1", { "1", "2", "3", "4", "5", }, },
+      },
+      { "0", "1", "2", "3", "4", "5", },
+      { { "A", { 2, 1, 1, 1, 1, 0, } }, },
+      { { "A", "0", "5" }, }
+    },
+  };
+  VERIFY_LOCATION_BALANCING_MOVES(kConfigs);
+}
+
+// A simple location-based rebalancing scenario, a single table.
+TEST(RebalanceAlgoUnitTest, LocationBalancingSimpleST) {
+  const TestClusterConfig kConfigs[] = {
+    {
+      {
+        { "L0", { "0", }, },
+        { "L1", { "1", }, },
+        { "L2", { "2", }, },
+      },
+      { "0", "1", "2", },
+      { { "A", { 2, 1, 0, } }, },
+      { { "A", "0", "2" }, }
+    },
+    {
+      {
+        { "L0", { "0", }, },
+        { "L1", { "1", }, },
+        { "L2", { "2", }, },
+      },
+      { "0", "1", "2", },
+      { { "A", { 6, 0, 0, } }, },
+      // TODO(aserbin): what about ordering?
+      {
+        { "A", "0", "2" },
+        { "A", "0", "1" },
+        { "A", "0", "1" },
+        { "A", "0", "2" },
+      }
+    },
+    {
+      {
+        { "L0", { "0", }, },
+        { "L1", { "1", }, },
+        { "L2", { "2", }, },
+      },
+      { "0", "1", "2", },
+      {
+        { "A", { 1, 0, 0, } },
+      },
+      {}
+    },
+  };
+  VERIFY_LOCATION_BALANCING_MOVES(kConfigs);
+}
+
+// A simple location-based rebalancing scenario, multiple tables.
+TEST(RebalanceAlgoUnitTest, LocationBalancingSimpleMT) {
+  const TestClusterConfig kConfigs[] = {
+    {
+      {
+        { "L0", { "0", }, },
+        { "L1", { "1", }, },
+        { "L2", { "2", }, },
+      },
+      { "0", "1", "2", },
+      {
+        { "A", { 2, 1, 1, } },
+        { "B", { 0, 0, 2, } },
+      },
+      { { "B", "2", "1" }, }
+    },
+    {
+      {
+        { "L0", { "0", }, },
+        { "L1", { "1", }, },
+        { "L2", { "2", }, },
+      },
+      { "0", "1", "2", },
+      {
+        { "A", { 2, 1, 0, } },
+        { "B", { 0, 0, 3, } },
+      },
+      {
+        { "B", "2", "1" },
+        { "B", "2", "0" },
+        { "A", "0", "2" },
+      }
+    },
+    {
+      {
+        { "L0", { "0", }, },
+        { "L1", { "1", }, },
+        { "L2", { "2", }, },
+      },
+      { "0", "1", "2", },
+      {
+        { "A", { 1, 0, 0, } },
+        { "B", { 1, 1, 2, } },
+        { "C", { 10, 9, 10, } },
+      },
+      {}
+    },
+  };
+  VERIFY_LOCATION_BALANCING_MOVES(kConfigs);
+}
+
 } // namespace tools
 } // namespace kudu

http://git-wip-us.apache.org/repos/asf/kudu/blob/87084c10/src/kudu/tools/rebalance_algo.cc
----------------------------------------------------------------------
diff --git a/src/kudu/tools/rebalance_algo.cc b/src/kudu/tools/rebalance_algo.cc
index c788761..a006258 100644
--- a/src/kudu/tools/rebalance_algo.cc
+++ b/src/kudu/tools/rebalance_algo.cc
@@ -18,28 +18,35 @@
 #include "kudu/tools/rebalance_algo.h"
 
 #include <algorithm>
+#include <cmath>
+#include <functional>
 #include <iostream>
 #include <iterator>
 #include <limits>
 #include <random>
 #include <string>
+#include <unordered_map>
 #include <utility>
 #include <vector>
 
 #include <boost/optional/optional.hpp>
 #include <glog/logging.h>
 
+#include "kudu/gutil/map-util.h"
 #include "kudu/gutil/port.h"
 #include "kudu/gutil/strings/substitute.h"
 #include "kudu/util/status.h"
 
 using std::back_inserter;
+using std::endl;
+using std::multimap;
 using std::numeric_limits;
 using std::ostringstream;
 using std::set_intersection;
 using std::shuffle;
 using std::sort;
 using std::string;
+using std::unordered_map;
 using std::vector;
 using strings::Substitute;
 
@@ -396,5 +403,218 @@ Status TwoDimensionalGreedyAlgo::GetMinMaxLoadedServers(
   return Status::OK();
 }
 
+Status LocationBalancingAlgo::GetNextMove(
+    const ClusterInfo& cluster_info,
+    boost::optional<TableReplicaMove>* move) {
+  DCHECK(move);
+  *move = boost::none;
+
+  // Per-table information on locations load.
+  // TODO(aserbin): maybe, move this container into ClusterInfo?
+  unordered_map<string, multimap<double, string>> location_load_info_by_table;
+
+  // A dictionary to map location-wise load imbalance into table identifier.
+  // The most imbalanced tables come last.
+  multimap<double, string> table_id_by_load_imbalance;
+  for (const auto& elem : cluster_info.balance.table_info_by_skew) {
+    const auto& table_info = elem.second;
+    // Number of replicas of all tablets comprising the table, per location.
+    unordered_map<string, int32_t> replica_num_per_location;
+    for (const auto& elem : table_info.servers_by_replica_count) {
+      auto replica_count = elem.first;
+      const auto& ts_id = elem.second;
+      const auto& location =
+          FindOrDie(cluster_info.locality.location_by_ts_id, ts_id);
+      LookupOrEmplace(&replica_num_per_location, location, 0) += replica_count;
+    }
+    multimap<double, string> location_by_load;
+    for (const auto& elem : replica_num_per_location) {
+      const auto& location = elem.first;
+      double replica_num = static_cast<double>(elem.second);
+      auto ts_num = FindOrDie(cluster_info.locality.servers_by_location,
+                              location).size();
+      CHECK_NE(0, ts_num);
+      location_by_load.emplace(replica_num / ts_num, location);
+    }
+
+    const auto& table_id = table_info.table_id;
+    const auto load_min = location_by_load.cbegin()->first;
+    const auto load_max = location_by_load.crbegin()->first;
+    const auto imbalance = load_max - load_min;
+    DCHECK(!std::isnan(imbalance));
+    table_id_by_load_imbalance.emplace(imbalance, table_id);
+    EmplaceOrDie(&location_load_info_by_table,
+                 table_id, std::move(location_by_load));
+  }
+
+  string imbalanced_table_id;
+  if (!IsBalancingNeeded(table_id_by_load_imbalance, &imbalanced_table_id)) {
+    // Nothing to do: all tables are location-balanced enough.
+    return Status::OK();
+  }
+
+  // Work on the most location-wise unbalanced tables first.
+  const auto& load_info = FindOrDie(
+      location_load_info_by_table, imbalanced_table_id);
+
+  vector<string> loc_loaded_least;
+  {
+    const auto min_range = load_info.equal_range(load_info.cbegin()->first);
+    for (auto it = min_range.first; it != min_range.second; ++it) {
+      loc_loaded_least.push_back(it->second);
+    }
+  }
+  DCHECK(!loc_loaded_least.empty());
+
+  vector<string> loc_loaded_most;
+  {
+    const auto max_range = load_info.equal_range(load_info.crbegin()->first);
+    for (auto it = max_range.first; it != max_range.second; ++it) {
+      loc_loaded_most.push_back(it->second);
+    }
+  }
+  DCHECK(!loc_loaded_most.empty());
+
+  if (PREDICT_FALSE(VLOG_IS_ON(1))) {
+    ostringstream s;
+    s << "[ ";
+    for (const auto& loc : loc_loaded_least) {
+      s << loc << " ";
+    }
+    s << "]";
+    VLOG(1) << "loc_loaded_least: " << s.str();
+
+    s.str("");
+    s << "[ ";
+    for (const auto& loc : loc_loaded_most) {
+      s << loc << " ";
+    }
+    s << "]";
+    VLOG(1) << "loc_leaded_most: " << s.str();
+  }
+
+  return FindBestMove(imbalanced_table_id, loc_loaded_least, loc_loaded_most,
+                      cluster_info, move);
+}
+
+bool LocationBalancingAlgo::IsBalancingNeeded(
+    const TableByLoadImbalance& imbalance_info,
+    string* most_imbalanced_table_id) {
+  if (PREDICT_FALSE(VLOG_IS_ON(1))) {
+    ostringstream ss;
+    ss << "Table imbalance report: " << endl;
+    for (const auto& elem : imbalance_info) {
+      ss << "  " << elem.second << ": " << elem.first << endl;
+    }
+    VLOG(1) << ss.str();
+  }
+
+  if (imbalance_info.empty()) {
+    // Nothing to do -- an empty cluster.
+    return false;
+  }
+
+  // Evaluate the maximum existing imbalance: is it possible to move replicas
+  // between tablet servers in different locations to make the skew less?
+  //
+  // TODO(aserbin): detect 'good enough' vs ideal cases, like (b) vs (a) in
+  //                the class-wide comment. In other words, find the minimum
+  //                load imbalance down to which it makes sense to try
+  //                cross-location rebalancing. Probably, it should be a policy
+  //                wrt what to prefer: ideal location-wide balance or minimum
+  //                number of replica moves between locations?
+  //
+  // The information on the most imbalanced table is in the last element
+  // of the map.
+  const auto it = imbalance_info.crbegin();
+  const auto imbalance = it->first;
+  if (imbalance > 1) {
+    *most_imbalanced_table_id = it->second;
+    return true;
+  }
+  return false;
+}
+
+// Given the set of the most and the least table-wise loaded locations, choose
+// the source and destination tablet server to move a replica of the specified
+// tablet to improve per-table location load balance as much as possible.
+Status LocationBalancingAlgo::FindBestMove(
+    const string& table_id,
+    const vector<string>& loc_loaded_least,
+    const vector<string>& loc_loaded_most,
+    const ClusterInfo& cluster_info,
+    boost::optional<TableReplicaMove>* move) {
+  // Among the available candidate locations, prefer those having the most and
+  // least loaded tablet servers in terms of total number of hosted replicas.
+  // The rationale is that the per-table location load is a relative metric
+  // (i.e. number of table replicas / number of tablet servers), but it's
+  // always beneficial to have less loaded servers in absolute terms.
+  //
+  // If there are multiple candiate tablet servers with the same extremum load,
+  // choose among them randomly.
+  //
+  // TODO(aserbin): implement fine-grained logic to select the best move among
+  //                the available candidates, if multiple choices are available.
+  //                For example, among candidates with the same number of
+  //                replicas, prefer candidates where the movement from one
+  //                server to another also improves the table-wise skew within
+  //                the destination location.
+  //
+
+  // Building auxiliary containers.
+  // TODO(aserbin): refactor and move some of those into the ClusterBalanceInfo.
+  typedef std::unordered_map<std::string, int32_t> ServerLoadMap;
+  ServerLoadMap load_by_ts;
+  for (const auto& elem : cluster_info.balance.servers_by_total_replica_count) {
+    EmplaceOrDie(&load_by_ts, elem.second, elem.first);
+  }
+
+  // Least loaded tablet servers from the destination locations.
+  multimap<int32_t, string> ts_id_by_load_least;
+  for (const auto& loc : loc_loaded_least) {
+    const auto& loc_ts_ids =
+        FindOrDie(cluster_info.locality.servers_by_location, loc);
+    for (const auto& ts_id : loc_ts_ids) {
+      ts_id_by_load_least.emplace(FindOrDie(load_by_ts, ts_id), ts_id);
+    }
+  }
+  // TODO(aserbin): separate into a function or lambda.
+  const auto min_load = ts_id_by_load_least.cbegin()->first;
+  const auto min_range = ts_id_by_load_least.equal_range(min_load);
+  auto it_min = min_range.first;
+#if 0
+  // TODO(aserbin): add randomness
+  const auto distance_min = distance(min_range.first, min_range.second);
+  std::advance(it_min, Uniform(distance_min));
+  CHECK_NE(min_range.second, it_min);
+#endif
+  const auto& dst_ts_id = it_min->second;
+
+  // Most loaded tablet servers from the source locations.
+  multimap<int32_t, string, std::greater<int32_t>> ts_id_by_load_most;
+  for (const auto& loc : loc_loaded_most) {
+    const auto& loc_ts_ids =
+        FindOrDie(cluster_info.locality.servers_by_location, loc);
+    for (const auto& ts_id : loc_ts_ids) {
+      ts_id_by_load_most.emplace(FindOrDie(load_by_ts, ts_id), ts_id);
+    }
+  }
+  const auto max_load = ts_id_by_load_most.cbegin()->first;
+  const auto max_range = ts_id_by_load_most.equal_range(max_load);
+  auto it_max = max_range.first;
+#if 0
+  // TODO(aserbin): add randomness
+  const auto distance_max = distance(max_range.first, max_range.second);
+  std::advance(it_max, Uniform(distance_max));
+  CHECK_NE(max_range.second, it_max);
+#endif
+  const auto& src_ts_id = it_max->second;
+  CHECK_NE(src_ts_id, dst_ts_id);
+
+  *move = { table_id, src_ts_id, dst_ts_id };
+
+  return Status::OK();
+}
+
 } // namespace tools
 } // namespace kudu

http://git-wip-us.apache.org/repos/asf/kudu/blob/87084c10/src/kudu/tools/rebalance_algo.h
----------------------------------------------------------------------
diff --git a/src/kudu/tools/rebalance_algo.h b/src/kudu/tools/rebalance_algo.h
index a32fdc7..af9dde0 100644
--- a/src/kudu/tools/rebalance_algo.h
+++ b/src/kudu/tools/rebalance_algo.h
@@ -81,10 +81,9 @@ struct ClusterLocalityInfo {
 };
 
 // Information on a cluster as input for various rebalancing algorithms.
-// As of now, contains only ClusterBalanceInfo, but ClusterLocalityInfo
-// is to be added once corresponding location-aware algorithms are implemented.
 struct ClusterInfo {
   ClusterBalanceInfo balance;
+  ClusterLocalityInfo locality;
 };
 
 // A directive to move some replica of a table between two tablet servers.
@@ -147,6 +146,7 @@ class TwoDimensionalGreedyAlgo : public RebalancingAlgo {
   explicit TwoDimensionalGreedyAlgo(
       EqualSkewOption opt = EqualSkewOption::PICK_RANDOM);
 
+ protected:
   Status GetNextMove(const ClusterInfo& cluster_info,
                      boost::optional<TableReplicaMove>* move) override;
 
@@ -154,6 +154,7 @@ class TwoDimensionalGreedyAlgo : public RebalancingAlgo {
   enum class ExtremumType { MAX, MIN, };
 
   FRIEND_TEST(RebalanceAlgoUnitTest, RandomizedTest);
+  FRIEND_TEST(RebalanceAlgoUnitTest, EmptyBalanceInfoGetNextMove);
   FRIEND_TEST(RebalanceAlgoUnitTest, EmptyClusterInfoGetNextMove);
 
   // Compute the intersection of the least or most loaded tablet servers for a
@@ -196,5 +197,90 @@ class TwoDimensionalGreedyAlgo : public RebalancingAlgo {
   std::mt19937 generator_;
 };
 
+// Algorithm to balance among locations in the cluster.
+//
+// The inter-location rebalancing is to minimize location load skew per table.
+// The idea is to equalize the density of the distribution of each table across
+// locations.
+//
+// Q: Why is it beneficial to equalize the density of table replicas across
+//    locations?
+// A: Assuming the homogeneous structure of the cluster (e.g., that's about
+//    having machines of the same hardware specs across the cluster) and
+//    uniform distribution of requests among all tables in the cluster
+//    (the latter is questionable, but in Kudu there isn't currently a way
+//    to specify any deviations anyway), that gives better usage
+//    of the available hardware resources.
+//
+// NOTE: probably, in the future we might add a notion of some preference in
+//       table placements regarding selected locations.
+//
+// Q: What is per-table location load skew?
+// A: Consider number of replicas per location for tablets comprising
+//    a table T. Assume we have locations L_0, ..., L_n, where
+//    replica_num(T, L_0), ..., replica_num(T, L_n) are numbers of replicas
+//    of T's tablets at corresponding locations. We want to make the following
+//    ratios to devicate as less as possible:
+//
+//    replica_num(T, L_0) / ts_num(L_0), ..., replica_num(T, L_n) / ts_num(L_n)
+//
+// ******* Some Examples *******
+//
+// Tablet T of replication factor 5, and locations L_0, ..., L_4. Consider
+// the following tablet servers disposition:
+//
+//   ts_num(L_0): 2
+//   ts_num(L_1): 2
+//   ts_num(L_2): 1
+//   ts_num(L_3): 1
+//   ts_num(L_4): 1
+//
+// What distribution of replicas is preferred for a tablet t0 of table T?
+//  (a) { L_0: 1, L_1: 1, L_2: 1, L_3: 1, L_4: 1 }
+//          skew 0.5: { 0.5, 0.5, 1.0, 1.0, 1.0 }
+//
+//  (b) { L_0: 2, L_1: 2, L_2: 1, L_3: 0, L_4: 0 }
+//          skew 1.0 : { 1.0, 1.0, 1.0, 0.0, 0.0 }
+//
+// The main idea is to prevent moving tablets if the distribution is 'good
+// enough'. E.g., the distribution of (b) is acceptable if the rebalancer finds
+// the replicas already placed like that, and it should not try to move
+// the replicas to achieve the ideal distribution of (a).
+//
+// How about:
+//  (c) { L_0: 0, L_1: 0, L_2: 1, L_3: 2, L_4: 2 }
+//          skew 2.0: { 0.0, 0.0, 1.0, 2.0, 2.0 }
+//
+// We want to move replicas to make the distribution (c) more balanced;
+// 2 movements gives us the 'ideal' location-wise replica placement.
+class LocationBalancingAlgo : public RebalancingAlgo {
+ protected:
+  Status GetNextMove(const ClusterInfo& cluster_info,
+                     boost::optional<TableReplicaMove>* move) override;
+ private:
+  FRIEND_TEST(RebalanceAlgoUnitTest, RandomizedTest);
+  typedef std::multimap<double, std::string> TableByLoadImbalance;
+
+  // Check if any rebalancing is needed across cluster locations based on the
+  // information provided by the 'imbalance_info' parameter. Returns 'true'
+  // if rebalancing is needed, 'false' otherwise. Upon returning 'true',
+  // the identifier of the most cross-location imbalanced table is output into
+  // the 'most_imbalanced_table_id' parameter (which must not be null).
+  static bool IsBalancingNeeded(
+      const TableByLoadImbalance& imbalance_info,
+      std::string* most_imbalanced_table_id);
+
+  // Given the set of the most and the least table-wise loaded locations, choose
+  // the source and destination tablet server to move a replica of the specified
+  // tablet to improve per-table location load balance as much as possible.
+  // If no replica can be moved to balance the load, the 'move' output parameter
+  // is set to 'boost::none'.
+  Status FindBestMove(const std::string& table_id,
+      const std::vector<std::string>& loc_loaded_least,
+      const std::vector<std::string>& loc_loaded_most,
+      const ClusterInfo& cluster_info,
+      boost::optional<TableReplicaMove>* move);
+};
+
 } // namespace tools
 } // namespace kudu


[2/2] kudu git commit: [rebalancer] location-aware rebalancer (part 6/n)

Posted by al...@apache.org.
[rebalancer] location-aware rebalancer (part 6/n)

Added SetReplace() and CheckCompleteReplace() auxiliary fuctions.
A follow-up patch will start using those.

Change-Id: I80b560d70c4d7383ee89917a359b4bb2f41bfd31
Reviewed-on: http://gerrit.cloudera.org:8080/11747
Tested-by: Alexey Serbin <as...@cloudera.com>
Reviewed-by: Will Berkeley <wd...@gmail.com>


Project: http://git-wip-us.apache.org/repos/asf/kudu/repo
Commit: http://git-wip-us.apache.org/repos/asf/kudu/commit/f731ea00
Tree: http://git-wip-us.apache.org/repos/asf/kudu/tree/f731ea00
Diff: http://git-wip-us.apache.org/repos/asf/kudu/diff/f731ea00

Branch: refs/heads/master
Commit: f731ea004590217fe21b133ed093a7e9d21e7d42
Parents: 87084c1
Author: Alexey Serbin <as...@cloudera.com>
Authored: Fri Oct 19 23:21:26 2018 -0700
Committer: Alexey Serbin <as...@cloudera.com>
Committed: Mon Oct 29 23:27:01 2018 +0000

----------------------------------------------------------------------
 src/kudu/tools/tool_replica_util.cc | 134 +++++++++++++++++++++++++++++++
 src/kudu/tools/tool_replica_util.h  |  23 ++++++
 2 files changed, 157 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/kudu/blob/f731ea00/src/kudu/tools/tool_replica_util.cc
----------------------------------------------------------------------
diff --git a/src/kudu/tools/tool_replica_util.cc b/src/kudu/tools/tool_replica_util.cc
index ccec47e..45bac17 100644
--- a/src/kudu/tools/tool_replica_util.cc
+++ b/src/kudu/tools/tool_replica_util.cc
@@ -377,6 +377,140 @@ Status CheckCompleteMove(const vector<string>& master_addresses,
   return Status::OK();
 }
 
+Status SetReplace(const client::sp::shared_ptr<client::KuduClient>& client,
+                  const string& tablet_id,
+                  const string& ts_uuid,
+                  const boost::optional<int64_t>& cas_opid_idx,
+                  bool* cas_failed) {
+  // Safely set the 'cas_failed' output parameter to 'false' to cover an earlier
+  // return due to an error.
+  if (cas_failed) {
+    *cas_failed = false;
+  }
+  // Find this tablet's leader replica. We need its UUID and RPC address.
+  string leader_uuid;
+  HostPort leader_hp;
+  RETURN_NOT_OK(GetTabletLeader(client, tablet_id, &leader_uuid, &leader_hp));
+  unique_ptr<ConsensusServiceProxy> proxy;
+  RETURN_NOT_OK(BuildProxy(leader_hp.host(), leader_hp.port(), &proxy));
+
+  // Get information on current replication scheme: the move scenario depends
+  // on the replication scheme used.
+  bool is_343_scheme;
+  ConsensusStatePB cstate;
+  RETURN_NOT_OK(GetConsensusState(proxy, tablet_id, leader_uuid,
+                                  client->default_admin_operation_timeout(),
+                                  &cstate, &is_343_scheme));
+  // The 3-2-3 replica management scheme (pre-KUDU-1097) does not process
+  // the attribute as expected.
+  if (!is_343_scheme) {
+    return Status::ConfigurationError(
+        "cluster is running in 3-2-3 management scheme");
+  }
+
+  // Check whether the REPLACE attribute is already set for the source replica.
+  for (const auto& peer : cstate.committed_config().peers()) {
+    if (peer.permanent_uuid() == ts_uuid && peer.attrs().replace()) {
+      // The replica is already marked with the REPLACE attribute.
+      return Status::OK();
+    }
+  }
+
+  BulkChangeConfigRequestPB req;
+  auto* change = req.add_config_changes();
+  change->set_type(MODIFY_PEER);
+  *change->mutable_peer()->mutable_permanent_uuid() = ts_uuid;
+  change->mutable_peer()->mutable_attrs()->set_replace(true);
+  consensus::ChangeConfigResponsePB resp;
+  RpcController rpc;
+  rpc.set_timeout(client->default_admin_operation_timeout());
+  req.set_dest_uuid(leader_uuid);
+  req.set_tablet_id(tablet_id);
+  if (cas_opid_idx) {
+    req.set_cas_config_opid_index(*cas_opid_idx);
+  }
+  RETURN_NOT_OK(proxy->BulkChangeConfig(req, &resp, &rpc));
+  if (resp.has_error()) {
+    if (resp.error().code() == tserver::TabletServerErrorPB::CAS_FAILED &&
+        cas_failed) {
+      *cas_failed = true;
+    }
+    return StatusFromPB(resp.error().status());
+  }
+  return Status::OK();
+}
+
+Status CheckCompleteReplace(const client::sp::shared_ptr<client::KuduClient>& client,
+                            const string& tablet_id,
+                            const string& ts_uuid,
+                            bool* is_complete,
+                            Status* completion_status) {
+  DCHECK(completion_status);
+  DCHECK(is_complete);
+  *is_complete = false;
+  // Get the latest leader info. It may change later, due to our actions or
+  // outside factors.
+  string leader_uuid;
+  HostPort leader_hp;
+  RETURN_NOT_OK(GetTabletLeader(client, tablet_id, &leader_uuid, &leader_hp));
+  unique_ptr<ConsensusServiceProxy> proxy;
+  RETURN_NOT_OK(BuildProxy(leader_hp.host(), leader_hp.port(), &proxy));
+
+  ConsensusStatePB cstate;
+  bool is_343_scheme;
+  RETURN_NOT_OK(GetConsensusState(proxy, tablet_id, leader_uuid,
+                                  client->default_admin_operation_timeout(),
+                                  &cstate, &is_343_scheme));
+  if (!is_343_scheme) {
+    return Status::ConfigurationError(
+        "cluster is not running in 3-4-3 replica management scheme");
+  }
+
+  bool is_all_voters = true;
+  for (const auto& peer : cstate.committed_config().peers()) {
+    if (peer.member_type() != RaftPeerPB::VOTER) {
+      is_all_voters = false;
+      break;
+    }
+  }
+
+  // Check if the replica slated for removal is still in the config.
+  bool ts_uuid_in_config = false;
+  for (const auto& peer : cstate.committed_config().peers()) {
+    if (peer.permanent_uuid() == ts_uuid) {
+      ts_uuid_in_config = true;
+      if (!peer.attrs().replace()) {
+        // Sanity check: the replica must have the REPLACE attribute set.
+        // Otherwise, something has changed in the middle and the replica will
+        // never be evicted, so it does not make sense to await its removal.
+        *is_complete = true;
+        *completion_status = Status::IllegalState(Substitute(
+            "$0: replica $1 does not have the REPLACE attribute set",
+            tablet_id, ts_uuid));
+      }
+      // There is not much sense demoting current leader if a newly added
+      // non-voter hasn't been promoted into voter role yet: the former leader
+      // replica will not be evicted prior the new non-voter replica becomes
+      // is promoted into voter. Demoting former leader too early might even
+      // delay promotion of already caught-up non-leader replica.
+      if (is_all_voters &&
+          leader_uuid == ts_uuid && leader_uuid == cstate.leader_uuid()) {
+        // The leader is the node we intend to remove; make it step down.
+        ignore_result(DoLeaderStepDown(tablet_id, leader_uuid, leader_hp,
+                                       LeaderStepDownMode::GRACEFUL, boost::none,
+                                       client->default_admin_operation_timeout()));
+      }
+      break;
+    }
+  }
+
+  if (!ts_uuid_in_config) {
+    *is_complete = true;
+    *completion_status = Status::OK();
+  }
+  return Status::OK();
+}
+
 
 Status ScheduleReplicaMove(const vector<string>& master_addresses,
                            const client::sp::shared_ptr<client::KuduClient>& client,

http://git-wip-us.apache.org/repos/asf/kudu/blob/f731ea00/src/kudu/tools/tool_replica_util.h
----------------------------------------------------------------------
diff --git a/src/kudu/tools/tool_replica_util.h b/src/kudu/tools/tool_replica_util.h
index 07b47c0..6c13e01 100644
--- a/src/kudu/tools/tool_replica_util.h
+++ b/src/kudu/tools/tool_replica_util.h
@@ -117,6 +117,29 @@ Status CheckCompleteMove(
     bool* is_complete,
     Status* completion_status);
 
+// Set the REPLACE attribute for the specified tablet replica. This is a no-op
+// if the replica already has the REPLACE attribute set.
+Status SetReplace(const client::sp::shared_ptr<client::KuduClient>& client,
+                  const std::string& tablet_id,
+                  const std::string& ts_uuid,
+                  const boost::optional<int64_t>& cas_opid_idx,
+                  bool* cas_failed = nullptr);
+
+// Check if the replica of the tablet 'tablet_id' previously hosted by tserver
+// identified by 'ts_uuid' is no longer hosted by the tablet server.
+// If there was a problem checking if the replica is in the config, non-OK
+// status is returned. On successful removal of the replica from the tablet
+// server, Status::OK() is returned and 'is_complete' output parameter
+// is set to 'true'. If the replica is still there but there was no error while
+// checking for the status of the replica in the config, Status::OK() is
+// returned and 'is_complete' is set to 'false'. The 'completion_status'
+// parameter contains valid information only if 'is_complete' is set to 'true'.
+Status CheckCompleteReplace(const client::sp::shared_ptr<client::KuduClient>& client,
+                            const std::string& tablet_id,
+                            const std::string& ts_uuid,
+                            bool* is_complete,
+                            Status* completion_status);
+
 // Schedule replica move operation for tablet with 'tablet_id', moving replica
 // from the tablet server 'from_ts_uuid' to tablet server 'to_ts_uuid'.
 Status ScheduleReplicaMove(