You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@couchdb.apache.org by rn...@apache.org on 2014/08/01 11:11:00 UTC

[14/48] mem3 commit: updated refs/heads/windsor-merge to ff02b9a

Add function to assist with rebalancing

This function takes either a database name or a list of shards and a
list of target nodes to balance the shards across. Every node with
less than a fair share of shards will steal shards from the node with
the most shards as long as both shards are in the same zone.

BugzID: 18638


Project: http://git-wip-us.apache.org/repos/asf/couchdb-mem3/repo
Commit: http://git-wip-us.apache.org/repos/asf/couchdb-mem3/commit/d2171e9b
Tree: http://git-wip-us.apache.org/repos/asf/couchdb-mem3/tree/d2171e9b
Diff: http://git-wip-us.apache.org/repos/asf/couchdb-mem3/diff/d2171e9b

Branch: refs/heads/windsor-merge
Commit: d2171e9b4d7cfe78858a6139cdf74d79607fe3e6
Parents: 43cd763
Author: Robert Newson <ro...@cloudant.com>
Authored: Wed Apr 3 20:13:35 2013 +0100
Committer: Robert Newson <rn...@apache.org>
Committed: Wed Jul 23 18:46:25 2014 +0100

----------------------------------------------------------------------
 src/mem3_rebalance.erl | 81 +++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 81 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/couchdb-mem3/blob/d2171e9b/src/mem3_rebalance.erl
----------------------------------------------------------------------
diff --git a/src/mem3_rebalance.erl b/src/mem3_rebalance.erl
new file mode 100644
index 0000000..ca3c4a7
--- /dev/null
+++ b/src/mem3_rebalance.erl
@@ -0,0 +1,81 @@
+% Copyright 2013 Cloudant
+%
+% Licensed under the Apache License, Version 2.0 (the "License"); you may not
+% use this file except in compliance with the License. You may obtain a copy of
+% the License at
+%
+%   http://www.apache.org/licenses/LICENSE-2.0
+%
+% Unless required by applicable law or agreed to in writing, software
+% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+% License for the specific language governing permissions and limitations under
+% the License.
+
+-module(mem3_rebalance).
+
+-export([rebalance/1, rebalance/2]).
+-include("mem3.hrl").
+
+rebalance(DbName) ->
+    rebalance(DbName, mem3:nodes()).
+
+rebalance(DbName, TargetNodes) when is_binary(DbName) ->
+    rebalance(mem3:shards(DbName), TargetNodes);
+rebalance(Shards, TargetNodes) when is_list(Shards) ->
+    TargetLevel = length(Shards) div length(TargetNodes),
+    rebalance2(TargetLevel, Shards, TargetNodes, TargetNodes, []).
+
+rebalance2(_TargetLevel, Shards, _Nodes, [], Moves) ->
+    {Shards, Moves};
+rebalance2(TargetLevel, Shards, Nodes, [Node | Rest], Moves) ->
+    ShardsForNode = [S || S <- Shards, S#shard.node =:= Node],
+    CurrentLevel = length(ShardsForNode),
+    case CurrentLevel < TargetLevel of
+        true ->
+            case victim(TargetLevel, Shards, Nodes, Node) of
+                {ok, Victim} ->
+                    rebalance2(TargetLevel,
+                             replace(Victim, Victim#shard{node=Node}, Shards),
+                             Nodes, [Node|Rest], [{Victim, Node}|Moves]);
+                false ->
+                    rebalance2(TargetLevel, Shards, Nodes, Rest, Moves)
+            end;
+        false ->
+            rebalance2(TargetLevel, Shards, Nodes, Rest, Moves)
+    end.
+
+victim(TargetLevel, Shards, Nodes, TargetNode) ->
+    TargetZone = mem3:node_info(TargetNode, <<"zone">>),
+    CandidateNodes = lists:usort([Node || Node <- Nodes,
+                                     Node =/= TargetNode,
+                                     mem3:node_info(Node, <<"zone">>) =:= TargetZone]),
+    %% make {Node, ShardsInNode} list
+    GroupedByNode0 = [{Node, [S || S <- Shards, S#shard.node =:= Node]} || Node <- CandidateNodes],
+    %% don't take from a node below target level
+    GroupedByNode1 = [{N, SS} || {N, SS} <- GroupedByNode0, length(SS) > TargetLevel],
+    %% prefer to take from a node with more shards than others
+    GroupedByNode2 = lists:sort(fun largest_first/2, GroupedByNode1),
+    %% don't take a shard for a range the target already has
+    TargetRanges = lists:usort([S#shard.range || S <- Shards, S#shard.node =:= TargetNode]),
+    GroupedByNode3 = [{N, lists:filter(fun(S) -> not lists:member(S#shard.range, TargetRanges) end, SS)}
+                      || {N, SS} <- GroupedByNode2],
+    %% remove nodes with no candidates shards
+    GroupedByNode4 = [{N, SS} || {N, SS} <- GroupedByNode3, SS =/= []],
+    case GroupedByNode4 of
+        [{_, [Victim|_]} | _] -> {ok, Victim};
+        [] -> false
+    end.
+
+largest_first({_, A}, {_, B}) ->
+    length(A) >= length(B).
+
+replace(A, B, List) ->
+    replace(A, B, List, []).
+
+replace(_A, _B, [], Acc) ->
+    Acc;
+replace(A, B, [A | Rest], Acc) ->
+    replace(A, B, Rest, [B | Acc]);
+replace(A, B, [C | Rest], Acc) ->
+    replace(A, B, Rest, [C | Acc]).