You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@couchdb.apache.org by va...@apache.org on 2021/04/26 20:09:41 UTC

[couchdb] 01/02: Improve tx retry resilience when transaction restart

This is an automated email from the ASF dual-hosted git repository.

vatamane pushed a commit to branch improve-retry-handling-in-dbs-endpoints
in repository https://gitbox.apache.org/repos/asf/couchdb.git

commit 12c34bb5ad970100b08b80cbbb374d07fa8bd485
Author: Nick Vatamaniuc <va...@gmail.com>
AuthorDate: Mon Apr 26 15:25:42 2021 -0400

    Improve tx retry resilience when transaction restart
    
    When running integration tests with a "buggified" client [1], sometimes
    `fold_range_not_progressing` is triggered since it's possible retriable errors
    might be thrown 3 times in a row. Instead of bumping it arbitrarily, since we
    already have a retry limit in fabric2_server, start using that.
    
    [1] ```
     ERL_ZFLAGS="-erlfdb network_options '[client_buggify_enable, {client_buggify_section_activated_probability, 25}, {client_buggify_section_fired_probability, 25}]'" make elixir tests=test/elixir/test/basics_test.exs
    ```
---
 src/fabric/src/fabric2_fdb.erl                 | 6 ++----
 src/fabric/src/fabric2_server.erl              | 9 ++++++++-
 src/fabric/test/fabric2_changes_fold_tests.erl | 4 ++++
 3 files changed, 14 insertions(+), 5 deletions(-)

diff --git a/src/fabric/src/fabric2_fdb.erl b/src/fabric/src/fabric2_fdb.erl
index a4c3f89..e86b037 100644
--- a/src/fabric/src/fabric2_fdb.erl
+++ b/src/fabric/src/fabric2_fdb.erl
@@ -91,9 +91,6 @@
 -include("fabric2.hrl").
 
 
--define(MAX_FOLD_RANGE_RETRIES, 3).
-
-
 -record(fold_acc, {
     db,
     restart_tx,
@@ -1882,10 +1879,11 @@ restart_fold(Tx, #fold_acc{} = Acc) ->
 
     ok = erlfdb:reset(Tx),
 
+    MaxRetries = fabric2_server:get_retry_limit(),
     case {erase(?PDICT_FOLD_ACC_STATE), Acc#fold_acc.retries} of
         {#fold_acc{db = Db} = Acc1, _} ->
             Acc1#fold_acc{db = check_db_instance(Db), retries = 0};
-        {undefined, Retries} when Retries < ?MAX_FOLD_RANGE_RETRIES ->
+        {undefined, Retries} when Retries < MaxRetries ->
             Db = check_db_instance(Acc#fold_acc.db),
             Acc#fold_acc{db = Db, retries = Retries + 1};
         {undefined, _} ->
diff --git a/src/fabric/src/fabric2_server.erl b/src/fabric/src/fabric2_server.erl
index e427f20..8a4a8d8 100644
--- a/src/fabric/src/fabric2_server.erl
+++ b/src/fabric/src/fabric2_server.erl
@@ -27,7 +27,8 @@
     maybe_remove/1,
 
     fdb_directory/0,
-    fdb_cluster/0
+    fdb_cluster/0,
+    get_retry_limit/0
 ]).
 
 
@@ -194,6 +195,12 @@ fdb_directory() ->
 fdb_cluster() ->
     get_env(?FDB_CLUSTER).
 
+
+get_retry_limit() ->
+    Default = list_to_integer(?DEFAULT_RETRY_LIMIT),
+    config:get_integer(?TX_OPTIONS_SECTION, "retry_limit", Default).
+
+
 get_env(Key) ->
     case get(Key) of
         undefined ->
diff --git a/src/fabric/test/fabric2_changes_fold_tests.erl b/src/fabric/test/fabric2_changes_fold_tests.erl
index a8578f9..2f63883 100644
--- a/src/fabric/test/fabric2_changes_fold_tests.erl
+++ b/src/fabric/test/fabric2_changes_fold_tests.erl
@@ -81,6 +81,7 @@ changes_fold_test_() ->
 setup_all() ->
     Ctx = test_util:start_couch([fabric]),
     meck:new(erlfdb, [passthrough]),
+    meck:new(fabric2_server, [passthrough]),
     Ctx.
 
 
@@ -91,6 +92,7 @@ teardown_all(Ctx) ->
 
 setup() ->
     fabric2_test_util:tx_too_old_mock_erlfdb(),
+    meck:expect(fabric2_server, get_retry_limit, 0, 3),
     {ok, Db} = fabric2_db:create(?tempdb(), [{user_ctx, ?ADMIN_USER}]),
     Rows = lists:map(fun(Val) ->
         DocId = fabric2_util:uuid(),
@@ -111,6 +113,8 @@ setup() ->
 
 
 cleanup({Db, _DocIdRevs}) ->
+    meck:reset(fabric2_server),
+    meck:expect(fabric2_server, get_retry_limit, 0, meck:passthrough()),
     fabric2_test_util:tx_too_old_reset_errors(),
     ok = fabric2_db:delete(fabric2_db:name(Db), []).