You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@kudu.apache.org by al...@apache.org on 2019/11/25 17:38:34 UTC
[kudu] 02/02: KUDU-3008 Spread replicas evenly with 2 locations and
odd replica factors
This is an automated email from the ASF dual-hosted git repository.
alexey pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/kudu.git
commit 8a30baea1a89749d9bd7aab90ac34db031094dd3
Author: triplesheep <tr...@gmail.com>
AuthorDate: Fri Nov 22 15:04:25 2019 +0800
KUDU-3008 Spread replicas evenly with 2 locations and odd replica factors
In case of distributing 3 replicas among 2 locations, 1 + 2 is better
than 3 + 0 because if all servers in the first location become unavailable,
in the former case the tablet is still available, while in the latter it's
not. Also, 1 + 2 is better than 0 + 3 because in case of catastrophic
non-recoverable failure of the second location, no replica survives and
all data is lost in the latter case, while in the former case there will
be 1 replica and it may be used for manual data recovery.
Change-Id: If96361f6be686679dd72f08e28d964eae25875d0
Reviewed-on: http://gerrit.cloudera.org:8080/14783
Tested-by: Kudu Jenkins
Reviewed-by: Alexey Serbin <as...@cloudera.com>
---
src/kudu/master/placement_policy-test.cc | 36 ++++++++++++++++++++++++++++++++
src/kudu/master/placement_policy.cc | 15 ++++++++-----
2 files changed, 46 insertions(+), 5 deletions(-)
diff --git a/src/kudu/master/placement_policy-test.cc b/src/kudu/master/placement_policy-test.cc
index caf16d3..f1569c6 100644
--- a/src/kudu/master/placement_policy-test.cc
+++ b/src/kudu/master/placement_policy-test.cc
@@ -843,6 +843,42 @@ TEST_F(PlacementPolicyTest, PlaceTabletReplicasEmptyCluster_L2_EvenRFEdgeCase) {
}
}
+// Odd RF case: edge cases with 2 locaitons.
+// Make sure replicas are placed into both locations, even if ideal density
+// distribution would have them in a single location.
+TEST_F(PlacementPolicyTest, PlaceTabletReplicasCluster_L2_OddRFEdgeCase) {
+ const vector<LocationInfo> cluster_info = {
+ { "A", { { "A_ts0", 10 }, { "A_ts1", 10 }, { "A_ts2", 10 }, } },
+ { "B", { { "B_ts0", 9 }, { "B_ts1", 9 }, { "B_ts2", 9 }, } },
+ };
+ ASSERT_OK(Prepare(cluster_info));
+
+ const auto& all = descriptors();
+ PlacementPolicy policy(all, rng());
+
+ {
+ static constexpr auto num_replicas = 3;
+ TSDescriptorVector result;
+ ASSERT_OK(policy.PlaceTabletReplicas(num_replicas, none, &result));
+ ASSERT_EQ(num_replicas, result.size());
+ TSDescriptorsMap m;
+ ASSERT_OK(TSDescriptorVectorToMap(result, &m));
+ ASSERT_EQ(1, m.count("A_ts0") + m.count("A_ts1") + m.count("A_ts2"));
+ ASSERT_EQ(2, m.count("B_ts0") + m.count("B_ts1") + m.count("B_ts2"));
+ }
+
+ {
+ static constexpr auto num_replicas = 5;
+ TSDescriptorVector result;
+ ASSERT_OK(policy.PlaceTabletReplicas(num_replicas, none, &result));
+ ASSERT_EQ(num_replicas, result.size());
+ TSDescriptorsMap m;
+ ASSERT_OK(TSDescriptorVectorToMap(result, &m));
+ ASSERT_EQ(2, m.count("A_ts0") + m.count("A_ts1") + m.count("A_ts2"));
+ ASSERT_EQ(3, m.count("B_ts0") + m.count("B_ts1") + m.count("B_ts2"));
+ }
+}
+
// Even RF case: place tablet replicas into a cluster with 3 locations.
TEST_F(PlacementPolicyTest, PlaceTabletReplicasEmptyCluster_L3_EvenRF) {
const vector<LocationInfo> cluster_info = {
diff --git a/src/kudu/master/placement_policy.cc b/src/kudu/master/placement_policy.cc
index c81a8ce..4b78076 100644
--- a/src/kudu/master/placement_policy.cc
+++ b/src/kudu/master/placement_policy.cc
@@ -334,11 +334,16 @@ Status PlacementPolicy::SelectLocation(
// in case of 2 locations the best placement for 4 replicas would be
// (2 + 2), while in case of 4 and more locations that's (1 + 1 + 1 + 1).
// Similarly, in case of 2 locations and 6 replicas, the best placement
- // is (3 + 3), while for 3 locations that's (2 + 2 + 2).
- if ((num_locations == 2 && num_replicas % 2 == 0 &&
- location_replicas_num + 1 > num_replicas / 2) ||
- (num_locations > 2 &&
- location_replicas_num + 1 >= (num_replicas + 1) / 2)) {
+ // is (3 + 3), while for 3 locations that's (2 + 2 + 2). In case of
+ // distributing 3 replicas among 2 locations, 1 + 2 is better than 3 + 0
+ // because if all servers in the first location become unavailable, in the
+ // former case the tablet is still available, while in the latter it's not.
+ // Also, 1 + 2 is better than 0 + 3 because in case of catastrophic
+ // non-recoverable failure of the second location, no replica survives and
+ // all data is lost in the latter case, while in the former case there will
+ // be 1 replica and it may be used for manual data recovery.
+ if ((num_locations == 2 && location_replicas_num + 1 > num_replicas / 2) ||
+ (num_locations > 2 && location_replicas_num + 1 >= (num_replicas + 1) / 2)) {
// If possible, avoid placing the majority of the tablet's replicas
// into a single location even if load-based criterion would favor that.
// Prefer such a distribution of replicas that will keep the majority