You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@couchdb.apache.org by wo...@apache.org on 2017/10/31 20:34:46 UTC

[couchdb] 17/17: Configurable delay before retrying on missing_doc error

This is an automated email from the ASF dual-hosted git repository.

wohali pushed a commit to branch 211-update-2
in repository https://gitbox.apache.org/repos/asf/couchdb.git

commit be5c3c7980b128259d8a6ce02adf845120c71938
Author: Nick Vatamaniuc <va...@apache.org>
AuthorDate: Mon Oct 30 14:41:23 2017 -0400

    Configurable delay before retrying on missing_doc error
    
    Implement a configurable delay before retrying a document fetch in replicator.
    
    missing_doc exceptions usually happen when there is a continuous replication
    set up and the source is updated. The change might appear in the changes feed,
    but when worker tries to fetch the document's revisions it talks to a
    node where internal replication hasn't caught up and so it throws an exception.
    
    Previously the delay was hard-coded at 0 (that is retrying was immediate). The
    replication would still make progress, but after crashing, retrying and
    generating a lot of unnecessary log noise. Since updating a source while
    continuous replication is running is a common scenario, it's worth optimizing
    for it and avoiding wasting resources and spamming logs.
---
 rel/overlay/etc/default.ini                          | 9 +++++++++
 src/couch_replicator/src/couch_replicator_worker.erl | 7 +++++++
 2 files changed, 16 insertions(+)

diff --git a/rel/overlay/etc/default.ini b/rel/overlay/etc/default.ini
index 4e61deb..745e5a8 100644
--- a/rel/overlay/etc/default.ini
+++ b/rel/overlay/etc/default.ini
@@ -399,6 +399,15 @@ verify_ssl_certificates = false
 ssl_certificate_max_depth = 3
 ; Maximum document ID length for replication.
 ;max_document_id_length = 0
+; How much time to wait before retrying after a missing doc exception. This
+; exception happens if the document was seen in the changes feed, but internal
+; replication hasn't caught up yet, and fetching document's revisions
+; fails. This a common scenario when source is updated while continous
+; replication is running. The retry period would depend on how quickly internal
+; replication is expected to catch up. In general this is an optimisation to
+; avoid crashing the whole replication job, which would consume more resources
+; and add log noise.
+;missing_doc_retry_msec = 2000
 
 [compaction_daemon]
 ; The delay, in seconds, between each check for which database and view indexes
diff --git a/src/couch_replicator/src/couch_replicator_worker.erl b/src/couch_replicator/src/couch_replicator_worker.erl
index 45ccefa..db6b72b 100644
--- a/src/couch_replicator/src/couch_replicator_worker.erl
+++ b/src/couch_replicator/src/couch_replicator_worker.erl
@@ -31,6 +31,7 @@
 -define(MAX_BULK_ATT_SIZE, 64 * 1024).
 -define(MAX_BULK_ATTS_PER_DOC, 8).
 -define(STATS_DELAY, 10000000).              % 10 seconds (in microseconds)
+-define(MISSING_DOC_RETRY_MSEC, 2000).
 
 -import(couch_replicator_utils, [
     open_db/1,
@@ -314,11 +315,17 @@ fetch_doc(Source, {Id, Revs, PAs}, DocHandler, Acc) ->
         couch_log:error("Retrying fetch and update of document `~s` as it is "
             "unexpectedly missing. Missing revisions are: ~s",
             [Id, couch_doc:revs_to_strs(Revs)]),
+        WaitMSec = config:get_integer("replicator", "missing_doc_retry_msec",
+            ?MISSING_DOC_RETRY_MSEC),
+        timer:sleep(WaitMSec),
         couch_replicator_api_wrap:open_doc_revs(Source, Id, Revs, [latest], DocHandler, Acc);
     throw:{missing_stub, _} ->
         couch_log:error("Retrying fetch and update of document `~s` due to out of "
             "sync attachment stubs. Missing revisions are: ~s",
             [Id, couch_doc:revs_to_strs(Revs)]),
+        WaitMSec = config:get_integer("replicator", "missing_doc_retry_msec",
+            ?MISSING_DOC_RETRY_MSEC),
+        timer:sleep(WaitMSec),
         couch_replicator_api_wrap:open_doc_revs(Source, Id, Revs, [latest], DocHandler, Acc)
     end.
 

-- 
To stop receiving notification emails like this one, please contact
"commits@couchdb.apache.org" <co...@couchdb.apache.org>.