You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@couchdb.apache.org by rn...@apache.org on 2022/08/17 21:05:10 UTC

[couchdb] 05/05: separate follower and candidate timeouts

This is an automated email from the ASF dual-hosted git repository.

rnewson pushed a commit to branch raft_storemodule
in repository https://gitbox.apache.org/repos/asf/couchdb.git

commit 725b2cdc1bff165ec4cfb4512440edaee575a470
Author: Robert Newson <rn...@apache.org>
AuthorDate: Wed Aug 17 21:57:53 2022 +0100

    separate follower and candidate timeouts
    
    From;
    
    ARC: Analysis of Raft Consensus - 4.2
    
    "As the authors use the same timer range for candidates and followers,
    in Figure 4.1 we are waiting a minimum of 150ms (and up to twice that)
    before restarting an election, despite the fact that, on average, a
    node receives all of its responses within 15ms"
    
    We separate the timeouts and set the candidate timeout smaller than
    the follower timeout. In a contested election (where multiple
    candidates each gain a minority of votes) we should elect a leader
    faster than otherwise.
---
 src/couch/src/couch_raft.erl | 41 +++++++++++++++++++++--------------------
 1 file changed, 21 insertions(+), 20 deletions(-)

diff --git a/src/couch/src/couch_raft.erl b/src/couch/src/couch_raft.erl
index 06025784e..98fb6f926 100644
--- a/src/couch/src/couch_raft.erl
+++ b/src/couch/src/couch_raft.erl
@@ -15,9 +15,6 @@
 -module(couch_raft).
 -behaviour(gen_statem).
 
--define(ELECTION_DELAY, 150).
--define(ELECTION_SPLAY, 150).
--define(LEADER_HEARTBEAT, 75).
 -define(CLIENT_TIMEOUT, 5_000).
 
 % maximum number of entries to send in one go.
@@ -78,12 +75,12 @@ handle_event(enter, _OldState, follower, Data) ->
     couch_log:notice("~p became follower in term ~B", [node(), Term]),
     Replies = [{reply, From, {error, deposed}} || From <- maps:values(Froms)],
     persist({keep_state, maps:without([nextIndex, matchIndex], Data#{votedFor => undefined, votesGranted => undefined, froms => #{}}),
-        [restart_election_timeout() | Replies]});
+        [state_timeout(follower) | Replies]});
 
 handle_event(enter, _OldState, candidate, Data) ->
     #{term := Term} = Data,
     couch_log:notice("~p became candidate in term ~B", [node(), Term]),
-    persist({keep_state, start_election(Data), restart_election_timeout()});
+    persist({keep_state, start_election(Data), state_timeout(candidate)});
 
 handle_event(enter, _OldState, leader, Data) ->
     #{store_module := StoreModule, cohort := Cohort, term := Term} = Data,
@@ -93,9 +90,9 @@ handle_event(enter, _OldState, leader, Data) ->
     {keep_state, Data#{
         nextIndex => maps:from_list([{Peer, LastIndex + 1} || Peer <- Peers]),
         matchIndex => maps:from_list([{Peer, 0} || Peer <- Peers])
-    }, restart_heartbeat_timeout()};
+    }, state_timeout(leader)};
 
-handle_event(cast, #{type := 'RequestVoteRequest', term := Term} = Msg, _State, #{term := CurrentTerm} = Data)
+handle_event(cast, #{type := 'RequestVoteRequest', term := Term} = Msg, State, #{term := CurrentTerm} = Data)
   when Term =< CurrentTerm ->
     #{
         source := MSource,
@@ -119,9 +116,9 @@ handle_event(cast, #{type := 'RequestVoteRequest', term := Term} = Msg, _State,
     cast(MSource, Reply, Data),
     if
         Grant ->
-            persist({keep_state, Data#{votedFor => MSource}, restart_election_timeout()});
+            persist({keep_state, Data#{votedFor => MSource}, state_timeout(State)});
         true ->
-            {keep_state_and_data, restart_election_timeout()}
+            {keep_state_and_data, state_timeout(State)}
     end;
 
 handle_event(cast, #{type := 'RequestVoteResponse', term := PastTerm}, _State, #{term := CurrentTerm}) when PastTerm < CurrentTerm ->
@@ -171,7 +168,7 @@ handle_event(cast, #{type := 'AppendEntriesRequest', term := Term} = Msg, State,
                 State == leader ->
                     keep_state_and_data;
                 true ->
-                    {keep_state_and_data, restart_election_timeout()}
+                    {keep_state_and_data, state_timeout(State)}
             end;
         Term == CurrentTerm andalso State == candidate ->
             {next_state, follower, Data, {next_event, cast, Msg}};
@@ -187,7 +184,7 @@ handle_event(cast, #{type := 'AppendEntriesRequest', term := Term} = Msg, State,
                     },
                     couch_log:debug("~p received heartbeat and everything matches, sending matchIndex:~p", [node(), MPrevLogIndex]),
                     cast(MSource, Reply, Data),
-                    {keep_state, update_state_machine(Data#{commitIndex => MCommitIndex}), restart_election_timeout()};
+                    {keep_state, update_state_machine(Data#{commitIndex => MCommitIndex}), state_timeout(State)};
                 true ->
                     Index = MPrevLogIndex + 1,
                     if
@@ -205,12 +202,12 @@ handle_event(cast, #{type := 'AppendEntriesRequest', term := Term} = Msg, State,
                                     },
                                     couch_log:notice("~p received entry:~p that's already applied, sending matchIndex:~p", [node(), MEntries, MPrevLogIndex + length(MEntries)]),
                                     cast(MSource, Reply, Data),
-                                    {keep_state, update_state_machine(Data#{commitIndex => MCommitIndex}), restart_election_timeout()};
+                                    {keep_state, update_state_machine(Data#{commitIndex => MCommitIndex}), state_timeout(State)};
                                 NthLogTerm /= FirstEntryTerm ->
                                     couch_log:notice("~p received conflicting entry:~p, deleting it", [node(), MEntries]),
                                     case StoreModule:truncate(LastIndex - 1, Data) of
                                         {ok, NewData} ->
-                                            {keep_state, NewData, [{next_event, cast, Msg}, restart_election_timeout()]};
+                                            {keep_state, NewData, [{next_event, cast, Msg}, state_timeout(State)]};
                                         {error, Reason} ->
                                             {stop, Reason}
                                     end
@@ -219,7 +216,7 @@ handle_event(cast, #{type := 'AppendEntriesRequest', term := Term} = Msg, State,
                             couch_log:notice("~p received new entries:~p, appending it to log", [node(), MEntries]),
                             case StoreModule:append(MEntries, Data) of
                                 {ok, _EntryIndex, NewData} ->
-                                    {keep_state, NewData, [{next_event, cast, Msg}, restart_election_timeout()]};
+                                    {keep_state, NewData, [{next_event, cast, Msg}, state_timeout(State)]};
                                 {error, Reason} ->
                                     {stop, Reason}
                             end
@@ -268,13 +265,13 @@ handle_event({call, From}, #{type := 'ClientRequest'}, _State, _Data) ->
 handle_event(state_timeout, new_election, State, Data) when State == follower; State == candidate ->
     #{term := Term} = Data,
     couch_log:notice("~p election timeout in state ~p, term ~B", [node(), State, Term]),
-    persist({next_state, candidate, start_election(Data), restart_election_timeout()});
+    persist({next_state, candidate, start_election(Data), state_timeout(State)});
 
 handle_event(state_timeout, heartbeat, leader, Data) ->
     #{term := Term} = Data,
     couch_log:debug("~p leader sending a heartbeat in term ~B", [node(), Term]),
     ok = send_append_entries(Data),
-    {keep_state, advance_commit_index(Data), restart_heartbeat_timeout()};
+    {keep_state, advance_commit_index(Data), state_timeout(leader)};
 
 handle_event(EventType, EventContent, State, Data) ->
     {stop, {unknown_event, EventType, EventContent, State, Data}}.
@@ -360,11 +357,15 @@ start_election(Data) ->
 cast(Node, Msg, #{name := Name}) ->
     gen_statem:cast({Name, Node}, Msg).
 
-restart_election_timeout() ->
-    {state_timeout, ?ELECTION_DELAY + rand:uniform(?ELECTION_SPLAY), new_election}.
 
-restart_heartbeat_timeout() ->
-    {state_timeout, ?LEADER_HEARTBEAT, heartbeat}.
+state_timeout(follower) ->
+    {state_timeout, 150 + rand:uniform(150), new_election};
+
+state_timeout(candidate) ->
+    {state_timeout, 15 + rand:uniform(15), new_election};
+
+state_timeout(leader) ->
+    {state_timeout, 75, heartbeat}.
 
 peers(Cohort) ->
     Cohort -- [node()].