You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@couchdb.apache.org by va...@apache.org on 2017/07/31 23:41:18 UTC

[couchdb] branch master updated: Save migrated replicator checkpoint documents immediately

This is an automated email from the ASF dual-hosted git repository.

vatamane pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/couchdb.git


The following commit(s) were added to refs/heads/master by this push:
     new 1022c25  Save migrated replicator checkpoint documents immediately
1022c25 is described below

commit 1022c2507631cc642693a6efc615c43bc4c1617f
Author: Nick Vatamaniuc <va...@apache.org>
AuthorDate: Sat Jul 29 01:50:56 2017 -0400

    Save migrated replicator checkpoint documents immediately
    
    Previously, if the replication id algorithm was updated, replicator would
    migrate checkpoint documents but keep them in memory. They would be written to
    their respective databases only if checkpoints need to be updated, which
    doesn't happen unless the source database changes. As a result it was possible
    for checkpoints to be lost. Here is how it could happen:
    
    1. Checkpoints were created for current (3) version of the replicator document.
    Assume the replication document contains some credentials tha look like
    'adm:pass', and the commputed v3 replication id is "3abc...".
    
    2. Replication id algorithm is updated to version 4. Version 4 ignores
    passwords, such that changing authentication from 'adm:pass' to 'adm:pass2'
    would not change the replication ids.
    
    3. Server code is updated with version 4. Replicator looks for checkpoints with
    the new version 4, which it calculates to be "4def...". It can't find it, so it
    looks for v3, it finds "3abc..." and decides to migrate it. However migration
    only happens in memory. That is, the checkpoint document is updated but it
    need a checkpoint to happen for it to be written to disk.
    
    4. There are no changes to the source db. So no checkpoints are forced to
    happen.
    
    5. User hears that the new replicator version is improved and passwords
    shouldn't alter the replication ids and all the checkpoints are reused. They
    update the replication document with their new credentials - adm:pass2.
    
    6. The updated document with 'adm:pass2' credentials is processed by the
    replicator. It computes the v4 replication id - "4def...". It's the same as
    before since it wasn't affected by pass -> pass2 change. That replication
    checkpoint document is not found on neither source not target. Replicator then
    computes v3 of the id to find the older version. However, v3 is affected by the
    passwords, so there it computes "3ghi..." which is different from previous v3
    which was "3abc..." It cannot find it. Computes v2 and checks, then v1, and
    eventually gives up not finding checkpoint and restart the change feed from 0
    again.
    
    To fix it, update `find_replication_logs` to also write the migrated
    replication checkpoint documents to their respective databases as soon as it
    finds them.
---
 .../src/couch_replicator_scheduler_job.erl               | 16 ++++++++++++++--
 1 file changed, 14 insertions(+), 2 deletions(-)

diff --git a/src/couch_replicator/src/couch_replicator_scheduler_job.erl b/src/couch_replicator/src/couch_replicator_scheduler_job.erl
index 88abe7c..6a57225 100644
--- a/src/couch_replicator/src/couch_replicator_scheduler_job.erl
+++ b/src/couch_replicator/src/couch_replicator_scheduler_job.erl
@@ -568,7 +568,7 @@ init_state(Rep) ->
     {ok, SourceInfo} = couch_replicator_api_wrap:get_db_info(Source),
     {ok, TargetInfo} = couch_replicator_api_wrap:get_db_info(Target),
 
-    [SourceLog, TargetLog] = find_replication_logs([Source, Target], Rep),
+    [SourceLog, TargetLog] = find_and_migrate_logs([Source, Target], Rep),
 
     {StartSeq0, History} = compare_replication_logs(SourceLog, TargetLog),
     StartSeq1 = get_value(since_seq, Options, StartSeq0),
@@ -610,7 +610,7 @@ init_state(Rep) ->
     State#rep_state{timer = start_timer(State)}.
 
 
-find_replication_logs(DbList, #rep{id = {BaseId, _}} = Rep) ->
+find_and_migrate_logs(DbList, #rep{id = {BaseId, _}} = Rep) ->
     LogId = ?l2b(?LOCAL_DOC_PREFIX ++ BaseId),
     fold_replication_logs(DbList, ?REP_ID_VERSION, LogId, LogId, Rep, []).
 
@@ -632,11 +632,23 @@ fold_replication_logs([Db | Rest] = Dbs, Vsn, LogId, NewId, Rep, Acc) ->
             Rest, ?REP_ID_VERSION, NewId, NewId, Rep, [Doc | Acc]);
     {ok, Doc} ->
         MigratedLog = #doc{id = NewId, body = Doc#doc.body},
+        maybe_save_migrated_log(Rep, Db, MigratedLog, Doc#doc.id),
         fold_replication_logs(
             Rest, ?REP_ID_VERSION, NewId, NewId, Rep, [MigratedLog | Acc])
     end.
 
 
+maybe_save_migrated_log(Rep, Db, #doc{} = Doc, OldId) ->
+    case get_value(use_checkpoints, Rep#rep.options, true) of
+        true ->
+            update_checkpoint(Db, Doc),
+            Msg = "Migrated replication checkpoint. Db:~p ~p -> ~p",
+            couch_log:notice(Msg, [httpdb_strip_creds(Db), OldId, Doc#doc.id]);
+        false ->
+            ok
+    end.
+
+
 spawn_changes_manager(Parent, ChangesQueue, BatchSize) ->
     spawn_link(fun() ->
         changes_manager_loop_open(Parent, ChangesQueue, BatchSize, 1)

-- 
To stop receiving notification emails like this one, please contact
['"commits@couchdb.apache.org" <co...@couchdb.apache.org>'].