You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@kudu.apache.org by al...@apache.org on 2019/11/25 17:38:34 UTC

[kudu] 02/02: KUDU-3008 Spread replicas evenly with 2 locations and odd replica factors

This is an automated email from the ASF dual-hosted git repository.

alexey pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/kudu.git

commit 8a30baea1a89749d9bd7aab90ac34db031094dd3
Author: triplesheep <tr...@gmail.com>
AuthorDate: Fri Nov 22 15:04:25 2019 +0800

    KUDU-3008 Spread replicas evenly with 2 locations and odd replica factors
    
    In case of distributing 3 replicas among 2 locations, 1 + 2 is better
    than 3 + 0 because if all servers in the first location become unavailable,
    in the former case the tablet is still available, while in the latter it's
    not. Also, 1 + 2 is better than 0 + 3 because in case of catastrophic
    non-recoverable failure of the second location, no replica survives and
    all data is lost in the latter case, while in the former case there will
    be 1 replica and it may be used for manual data recovery.
    
    Change-Id: If96361f6be686679dd72f08e28d964eae25875d0
    Reviewed-on: http://gerrit.cloudera.org:8080/14783
    Tested-by: Kudu Jenkins
    Reviewed-by: Alexey Serbin <as...@cloudera.com>
---
 src/kudu/master/placement_policy-test.cc | 36 ++++++++++++++++++++++++++++++++
 src/kudu/master/placement_policy.cc      | 15 ++++++++-----
 2 files changed, 46 insertions(+), 5 deletions(-)

diff --git a/src/kudu/master/placement_policy-test.cc b/src/kudu/master/placement_policy-test.cc
index caf16d3..f1569c6 100644
--- a/src/kudu/master/placement_policy-test.cc
+++ b/src/kudu/master/placement_policy-test.cc
@@ -843,6 +843,42 @@ TEST_F(PlacementPolicyTest, PlaceTabletReplicasEmptyCluster_L2_EvenRFEdgeCase) {
   }
 }
 
+// Odd RF case: edge cases with 2 locaitons.
+// Make sure replicas are placed into both locations, even if ideal density
+// distribution would have them in a single location.
+TEST_F(PlacementPolicyTest, PlaceTabletReplicasCluster_L2_OddRFEdgeCase) {
+  const vector<LocationInfo> cluster_info = {
+      { "A", { { "A_ts0", 10 }, { "A_ts1", 10 }, { "A_ts2", 10 }, } },
+      { "B", { { "B_ts0", 9 }, { "B_ts1", 9 }, { "B_ts2", 9 }, } },
+  };
+  ASSERT_OK(Prepare(cluster_info));
+
+  const auto& all = descriptors();
+  PlacementPolicy policy(all, rng());
+
+  {
+    static constexpr auto num_replicas = 3;
+    TSDescriptorVector result;
+    ASSERT_OK(policy.PlaceTabletReplicas(num_replicas, none, &result));
+    ASSERT_EQ(num_replicas, result.size());
+    TSDescriptorsMap m;
+    ASSERT_OK(TSDescriptorVectorToMap(result, &m));
+    ASSERT_EQ(1, m.count("A_ts0") + m.count("A_ts1") + m.count("A_ts2"));
+    ASSERT_EQ(2, m.count("B_ts0") + m.count("B_ts1") + m.count("B_ts2"));
+  }
+
+  {
+    static constexpr auto num_replicas = 5;
+    TSDescriptorVector result;
+    ASSERT_OK(policy.PlaceTabletReplicas(num_replicas, none, &result));
+    ASSERT_EQ(num_replicas, result.size());
+    TSDescriptorsMap m;
+    ASSERT_OK(TSDescriptorVectorToMap(result, &m));
+    ASSERT_EQ(2, m.count("A_ts0") + m.count("A_ts1") + m.count("A_ts2"));
+    ASSERT_EQ(3, m.count("B_ts0") + m.count("B_ts1") + m.count("B_ts2"));
+  }
+}
+
 // Even RF case: place tablet replicas into a cluster with 3 locations.
 TEST_F(PlacementPolicyTest, PlaceTabletReplicasEmptyCluster_L3_EvenRF) {
   const vector<LocationInfo> cluster_info = {
diff --git a/src/kudu/master/placement_policy.cc b/src/kudu/master/placement_policy.cc
index c81a8ce..4b78076 100644
--- a/src/kudu/master/placement_policy.cc
+++ b/src/kudu/master/placement_policy.cc
@@ -334,11 +334,16 @@ Status PlacementPolicy::SelectLocation(
       // in case of 2 locations the best placement for 4 replicas would be
       // (2 + 2), while in case of 4 and more locations that's (1 + 1 + 1 + 1).
       // Similarly, in case of 2 locations and 6 replicas, the best placement
-      // is (3 + 3), while for 3 locations that's (2 + 2 + 2).
-      if ((num_locations == 2 && num_replicas % 2 == 0 &&
-           location_replicas_num + 1 > num_replicas / 2) ||
-          (num_locations > 2 &&
-           location_replicas_num + 1 >= (num_replicas + 1) / 2)) {
+      // is (3 + 3), while for 3 locations that's (2 + 2 + 2). In case of
+      // distributing 3 replicas among 2 locations, 1 + 2 is better than 3 + 0
+      // because if all servers in the first location become unavailable, in the
+      // former case the tablet is still available, while in the latter it's not.
+      // Also, 1 + 2 is better than 0 + 3 because in case of catastrophic
+      // non-recoverable failure of the second location, no replica survives and
+      // all data is lost in the latter case, while in the former case there will
+      // be 1 replica and it may be used for manual data recovery.
+      if ((num_locations == 2 && location_replicas_num + 1 > num_replicas / 2) ||
+          (num_locations > 2 && location_replicas_num + 1 >= (num_replicas + 1) / 2)) {
         // If possible, avoid placing the majority of the tablet's replicas
         // into a single location even if load-based criterion would favor that.
         // Prefer such a distribution of replicas that will keep the majority