You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@couchdb.apache.org by da...@apache.org on 2017/03/15 19:25:32 UTC

[03/10] mem3 commit: updated refs/heads/COUCHDB-3326-clustered-purge to e4e8928

Chunk missing revisions before attempting to save on target

In cases with pathological documents revision patterns (e.g., 10000 open
conflicts and tree depth of 300000 on a single document), attempting to
replicate the full revision tree in one batch causes the system to crash by
attempting to send an oversized message. We've observed messages of > 4GB in the
wild.

This patch divides the set of revisions-to-replicate for a single document into
chunks of a configurable size, thereby allowing operators to keep the system
stable when attempting to replicate these troublesome documents.

BugzID: 37676


Project: http://git-wip-us.apache.org/repos/asf/couchdb-mem3/repo
Commit: http://git-wip-us.apache.org/repos/asf/couchdb-mem3/commit/c4da61c8
Tree: http://git-wip-us.apache.org/repos/asf/couchdb-mem3/tree/c4da61c8
Diff: http://git-wip-us.apache.org/repos/asf/couchdb-mem3/diff/c4da61c8

Branch: refs/heads/COUCHDB-3326-clustered-purge
Commit: c4da61c8eb98cedd3cf7a28c293cb1f6d3ec8571
Parents: 252467c
Author: Benjamin Anderson <b...@banjiewen.net>
Authored: Wed Oct 29 12:52:30 2014 -0700
Committer: Eric Avdey <ei...@eiri.ca>
Committed: Thu Nov 24 13:55:18 2016 -0400

----------------------------------------------------------------------
 src/mem3_rep.erl | 32 ++++++++++++++++++++++++++++++--
 1 file changed, 30 insertions(+), 2 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/couchdb-mem3/blob/c4da61c8/src/mem3_rep.erl
----------------------------------------------------------------------
diff --git a/src/mem3_rep.erl b/src/mem3_rep.erl
index 938260d..ad7ac55 100644
--- a/src/mem3_rep.erl
+++ b/src/mem3_rep.erl
@@ -252,8 +252,10 @@ replicate_batch(#acc{target = #shard{node=Node, name=Name}} = Acc) ->
     [] ->
         ok;
     Missing ->
-        Docs = open_docs(Acc, Missing),
-        ok = save_on_target(Node, Name, Docs)
+        lists:map(fun(Chunk) ->
+            Docs = open_docs(Acc, Chunk),
+            ok = save_on_target(Node, Name, Docs)
+        end, chunk_revs(Missing))
     end,
     update_locals(Acc),
     {ok, Acc#acc{revcount=0, infos=[]}}.
@@ -271,6 +273,32 @@ find_missing_revs(Acc) ->
     ]).
 
 
+chunk_revs(Revs) ->
+    Limit = list_to_integer(config:get("mem3", "rev_chunk_size", "5000")),
+    chunk_revs(Revs, Limit).
+
+chunk_revs(Revs, Limit) ->
+    chunk_revs(Revs, {0, []}, [], Limit).
+
+chunk_revs([], {_Count, Chunk}, Chunks, _Limit) ->
+    [Chunk|Chunks];
+chunk_revs([{Id, R, A}|Revs], {Count, Chunk}, Chunks, Limit) when length(R) =< Limit - Count ->
+    chunk_revs(
+        Revs,
+        {Count + length(R), [{Id, R, A}|Chunk]},
+        Chunks,
+        Limit
+    );
+chunk_revs([{Id, R, A}|Revs], {Count, Chunk}, Chunks, Limit) ->
+    {This, Next} = lists:split(Limit - Count, R),
+    chunk_revs(
+        [{Id, Next, A}|Revs],
+        {0, []},
+        [[{Id, This, A}|Chunk]|Chunks],
+        Limit
+    ).
+
+
 open_docs(#acc{source=Source, infos=Infos}, Missing) ->
     lists:flatmap(fun({Id, Revs, _}) ->
         FDI = lists:keyfind(Id, #full_doc_info.id, Infos),