You are viewing a plain text version of this content. The canonical link for it is here.
Posted to notifications@couchdb.apache.org by GitBox <gi...@apache.org> on 2021/09/08 23:08:23 UTC

[GitHub] [couchdb] rnewson commented on a change in pull request #3734: Improve fabric_util get_db timeout logic

rnewson commented on a change in pull request #3734:
URL: https://github.com/apache/couchdb/pull/3734#discussion_r704834096



##########
File path: src/fabric/src/fabric_util.erl
##########
@@ -125,15 +128,40 @@ get_shard([#shard{node = Node, name = Name} | Rest], Opts, Timeout, Factor) ->
             throw(Error);
         {Ref, Reason} ->
             couch_log:debug("Failed to open shard ~p because: ~p", [Name, Reason]),
-            get_shard(Rest, Opts, Timeout, Factor)
+            FmtReason = lists:flatten(io_lib:format("~p", [Reason])),
+            get_shard(Rest, Opts, Timeout, Factor, FmtReason)
         after Timeout ->
             couch_log:debug("Failed to open shard ~p after: ~p", [Name, Timeout]),
-            get_shard(Rest, Opts, Factor * Timeout, Factor)
+            NextTimeout = min(request_timeout(), Factor * Timeout),
+            FmtReason = lists:flatten(io_lib:format("timeout ~B msec", [Timeout])),
+            get_shard(Rest, Opts, NextTimeout, Factor, FmtReason)
         end
     after
         rexi_monitor:stop(Mon)
     end.
 
+
+get_db_timeout(N, Factor, MinTimeout, MaxTimeout) ->
+    %
+    % The progression of timeouts forms a geometric series:
+    %
+    %     MaxTimeout = T + T*F + T*F^2 + T*F^3 ...
+    %
+    % Where T is the initial timeout and F is the factor. The formula for
+    % the sum is:
+    %
+    %     Sum[T * F^I, I <- 0..N] = T * (1 - F^(N + 1)) / (1 - F)
+    %
+    % Then, for a given sum and factor we can calculate the initial timeout T:
+    %
+    %     T = Sum / ((1 - F^(N+1)) / (1 - F))
+    %
+    Timeout = MaxTimeout / ((1 - math:pow(Factor, N + 1)) / (1 - Factor)),

Review comment:
       we have exponential backoff algorithms elsewhere (and maybe some simple linear ones too), the proliferation of different timeout algorithms is confusing.

##########
File path: src/fabric/src/fabric_util.erl
##########
@@ -100,19 +100,22 @@ get_db(DbName) ->
     get_db(DbName, []).
 
 get_db(DbName, Options) ->
-    {Local, SameZone, DifferentZone} = mem3:group_by_proximity(mem3:shards(DbName)),
+    LiveShards = mem3:live_shards(DbName, [node() | nodes()]),

Review comment:
       this change is unrelated to the PR topic.




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: notifications-unsubscribe@couchdb.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org