You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@couchdb.apache.org by va...@apache.org on 2020/06/09 22:06:45 UTC

[couchdb] 01/01: In replicator, when rescheduling, pick only pending jobs which are not running

This is an automated email from the ASF dual-hosted git repository.

vatamane pushed a commit to branch only-consider-non-running-job-in-replicator-pending-jobs-fold
in repository https://gitbox.apache.org/repos/asf/couchdb.git

commit 345f11cf0f6957b909cfbc2ecfac8e2ada797095
Author: Nick Vatamaniuc <va...@apache.org>
AuthorDate: Tue Jun 9 17:53:56 2020 -0400

    In replicator, when rescheduling, pick only pending jobs which are not running
    
    Previously, when pending jobs were picked in the ets:foldl traversl, both
    running and non-running jobs were considered and a large number of running jobs
    could displace pending jobs in the accumulator. In the worst case, no crashed
    jobs would be restarted during rescheduling.
---
 src/couch_replicator/src/couch_replicator_scheduler.erl | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

diff --git a/src/couch_replicator/src/couch_replicator_scheduler.erl b/src/couch_replicator/src/couch_replicator_scheduler.erl
index 53c040e..641443a 100644
--- a/src/couch_replicator/src/couch_replicator_scheduler.erl
+++ b/src/couch_replicator/src/couch_replicator_scheduler.erl
@@ -456,6 +456,9 @@ pending_jobs(Count) when is_integer(Count), Count > 0 ->
     [Job || {_Started, Job} <- gb_sets:to_list(Set1)].
 
 
+pending_fold(#job{pid = Pid}, Acc) when is_pid(Pid) ->
+    Acc;
+
 pending_fold(Job, {Set, Now, Count, HealthThreshold}) ->
     Set1 = case {not_recently_crashed(Job, Now, HealthThreshold),
         gb_sets:size(Set) >= Count} of
@@ -1051,6 +1054,7 @@ scheduler_test_() ->
             [
                 t_pending_jobs_simple(),
                 t_pending_jobs_skip_crashed(),
+                t_pending_jobs_skip_running(),
                 t_one_job_starts(),
                 t_no_jobs_start_if_max_is_0(),
                 t_one_job_starts_if_max_is_1(),
@@ -1112,6 +1116,18 @@ t_pending_jobs_skip_crashed() ->
     end).
 
 
+t_pending_jobs_skip_running() ->
+   ?_test(begin
+        Job1 = continuous(1),
+        Job2 = continuous_running(2),
+        Job3 = oneshot(3),
+        Job4 = oneshot_running(4),
+        Jobs = [Job1, Job2, Job3, Job4],
+        setup_jobs(Jobs),
+        ?assertEqual([Job1, Job3], pending_jobs(4))
+    end).
+
+
 t_one_job_starts() ->
     ?_test(begin
         setup_jobs([oneshot(1)]),