You are viewing a plain text version of this content. The canonical link for it is here.

Posted to commits@couchdb.apache.org by va...@apache.org on 2019/11/01 19:30:08 UTC

[couchdb] branch handle-replication-process-restart-better updated (37a083e -> 89dc385)

This is an automated email from the ASF dual-hosted git repository.

vatamane pushed a change to branch handle-replication-process-restart-better
in repository https://gitbox.apache.org/repos/asf/couchdb.git.


 discard 37a083e  Do not mark replication jobs as failed if doc processor crashes
     new 89dc385  Do not mark replication jobs as failed if doc processor crashes

This update added new revisions after undoing existing revisions.
That is to say, some revisions that were in the old version of the
branch are not in the new version.  This situation occurs
when a user --force pushes a change and generates a repository
containing something like this:

 * -- * -- B -- O -- O -- O   (37a083e)
            \
             N -- N -- N   refs/heads/handle-replication-process-restart-better (89dc385)

You should already have received notification emails for all of the O
revisions, and so the following emails describe only the N revisions
from the common base, B.

Any revisions marked "omit" are not gone; other references still
refer to them.  Any revisions marked "discard" are gone forever.

The 1 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.


Summary of changes:
 src/couch_replicator/src/couch_replicator_doc_processor.erl | 1 -
 1 file changed, 1 deletion(-)

[couchdb] 01/01: Do not mark replication jobs as failed if doc processor crashes

Posted by va...@apache.org.

This is an automated email from the ASF dual-hosted git repository.

vatamane pushed a commit to branch handle-replication-process-restart-better
in repository https://gitbox.apache.org/repos/asf/couchdb.git

commit 89dc3852a1f1df4483dd1194dfa3561c16b0d4d3
Author: Nick Vatamaniuc <va...@apache.org>
AuthorDate: Fri Nov 1 13:46:44 2019 -0400

    Do not mark replication jobs as failed if doc processor crashes
    
    Previously if couch_replicator_doc_processor crashed, the job was marked as
    "failed". We now ignore that case. It's safe to do that since supervisor will
    restart it anyway, and it will rescan all the docs again. Most of all, we want
    to prevent the job becoming failed permanently and needing a manual
    intervention to restart it.
---
 .../src/couch_replicator_doc_processor.erl         | 29 +++++++++++++++++++---
 1 file changed, 25 insertions(+), 4 deletions(-)

diff --git a/src/couch_replicator/src/couch_replicator_doc_processor.erl b/src/couch_replicator/src/couch_replicator_doc_processor.erl
index 772037d..23cdeea 100644
--- a/src/couch_replicator/src/couch_replicator_doc_processor.erl
+++ b/src/couch_replicator/src/couch_replicator_doc_processor.erl
@@ -101,6 +101,9 @@ db_change(DbName, {ChangeProps} = Change, Server) ->
     try
         ok = process_change(DbName, Change)
     catch
+    exit:{Error, {gen_server, call, [?MODULE, _, _]}} ->
+        ErrMsg = "~p exited ~p while processing change from db ~p",
+        couch_log:error(ErrMsg, [?MODULE, Error, DbName]);
     _Tag:Error ->
         {RepProps} = get_json_value(doc, ChangeProps),
         DocId = get_json_value(<<"_id">>, RepProps),
@@ -611,6 +614,7 @@ cluster_membership_foldl(#rdoc{id = {DbName, DocId} = Id, rid = RepId}, nil) ->
 -include_lib("eunit/include/eunit.hrl").
 
 -define(DB, <<"db">>).
+-define(EXIT_DB, <<"exit_db">>).
 -define(DOC1, <<"doc1">>).
 -define(DOC2, <<"doc2">>).
 -define(R1, {"1", ""}).
@@ -625,6 +629,7 @@ doc_processor_test_() ->
         [
             t_bad_change(),
             t_regular_change(),
+            t_change_with_doc_processor_crash(),
             t_change_with_existing_job(),
             t_deleted_change(),
             t_triggered_change(),
@@ -658,6 +663,15 @@ t_regular_change() ->
     end).
 
 
+% Handle cases where doc processor exits or crashes while processing a change
+t_change_with_doc_processor_crash() ->
+    ?_test(begin
+        mock_existing_jobs_lookup([]),
+        ?assertEqual(acc, db_change(?EXIT_DB, change(), acc)),
+        ?assert(failed_state_not_updated())
+  end).
+
+
 % Regular change, parse to a #rep{} and then add job but there is already
 % a running job with same Id found.
 t_change_with_existing_job() ->
@@ -834,16 +848,19 @@ setup() ->
     meck:expect(couch_replicator_clustering, owner, 2, node()),
     meck:expect(couch_replicator_clustering, link_cluster_event_listener, 3,
         ok),
-    meck:expect(couch_replicator_doc_processor_worker, spawn_worker, 4, pid),
+    meck:expect(couch_replicator_doc_processor_worker, spawn_worker, fun
+        ({?EXIT_DB, _}, _, _, _) -> exit(kapow);
+        (_, _, _, _) -> pid
+    end),
     meck:expect(couch_replicator_scheduler, remove_job, 1, ok),
     meck:expect(couch_replicator_docs, remove_state_fields, 2, ok),
     meck:expect(couch_replicator_docs, update_failed, 3, ok),
     {ok, Pid} = start_link(),
+    unlink(Pid),
     Pid.
 
 
 teardown(Pid) ->
-    unlink(Pid),
     exit(Pid, kill),
     meck:unload().
 
@@ -871,10 +888,14 @@ did_not_spawn_worker() ->
 updated_doc_with_failed_state() ->
     1 == meck:num_calls(couch_replicator_docs, update_failed, '_').
 
+failed_state_not_updated() ->
+    0 == meck:num_calls(couch_replicator_docs, update_failed, '_').
 
 mock_existing_jobs_lookup(ExistingJobs) ->
-    meck:expect(couch_replicator_scheduler, find_jobs_by_doc,
-        fun(?DB, ?DOC1) -> ExistingJobs end).
+    meck:expect(couch_replicator_scheduler, find_jobs_by_doc, fun
+        (?EXIT_DB, ?DOC1) -> [];
+        (?DB, ?DOC1) -> ExistingJobs
+    end).
 
 
 test_rep(Id) ->