You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@couchdb.apache.org by da...@apache.org on 2019/01/15 19:35:58 UTC

[couchdb] branch master updated: Fix fabric_open_doc_revs

This is an automated email from the ASF dual-hosted git repository.

davisp pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/couchdb.git


The following commit(s) were added to refs/heads/master by this push:
     new 5269d79  Fix fabric_open_doc_revs
5269d79 is described below

commit 5269d7967667a5b8b9dfd923bf53d4c1b3ffcfb5
Author: Paul J. Davis <pa...@gmail.com>
AuthorDate: Mon Jan 14 18:32:34 2019 -0600

    Fix fabric_open_doc_revs
    
    There was a subtle bug when opening specific revisions in
    fabric_doc_open_revs due to a race condition between updates being
    applied across a cluster.
    
    The underlying cause here was due to the stemming after a document had
    been updated more than revs_limit number of times along with concurrent
    reads to a node that had not yet made the update. To illustrate lets
    consider a document A which has a revision history from `{N, RevN}` to
    `{N+1000, RevN+1000}` (assuming revs_limit is the default 1000). If we
    consider a single node perspective when an update comes in we added the
    new revision and stem the oldest revision. The docs the revisions on the
    node would be `{N+1, RevN+1}` to `{N+1001, RevN+1001}`.
    
    The bug exists when we attempt to open revisions on a different node
    that has yet to apply the new update. In this case when
    fabric_doc_open_revs could be called with `{N+1000, RevN+1000}`. This
    results in a response from fabric_doc_open_revs that includes two
    different `{ok, Doc}` results instead of the expected one instance. The
    reason for this is that one document has revisions `{N+1, RevN+1}` to
    `{N+1000, RevN+1000}` from the node that has applied the update, while
    the node without the update responds with revisions `{N, RevN}` to
    {N+1000, RevN+1000}`.
    
    To rephrase that, a node that has applied an update can end up returning
    a revision path that contains `revs_limit - 1` revisions while a node
    wihtout the update returns all `revs_limit` revisions. This slight
    change in the path prevented the responses from being properly combined
    into a single response.
    
    This bug has existed for many years. However, read repair effectively
    prevents it from being a significant issue by immediately fixing the
    revision history discrepancy. This was discovered due to the recent bug
    in read repair during a mixed cluster upgrade to a release including
    clustered purge. In this situation we end up crashing the design
    document cache which then leads to all of the design document requests
    being direct reads which can end up causing cluster nodes to OOM and
    die. The conditions require a significant number of design document
    edits coupled with already significant load to those modified design
    documents. The most direct example observed was a clustered that had a
    significant number of filtered replications in and out of the cluster.
---
 src/fabric/src/fabric_doc_open_revs.erl | 116 +++++++++++++++++++++++++-------
 1 file changed, 91 insertions(+), 25 deletions(-)

diff --git a/src/fabric/src/fabric_doc_open_revs.erl b/src/fabric/src/fabric_doc_open_revs.erl
index 15b8eac..8ac3f30 100644
--- a/src/fabric/src/fabric_doc_open_revs.erl
+++ b/src/fabric/src/fabric_doc_open_revs.erl
@@ -243,8 +243,7 @@ format_reply(true, Replies, _) ->
     tree_format_replies(Replies);
 
 format_reply(false, Replies, _) ->
-    Filtered = filter_reply(Replies),
-    dict_format_replies(Filtered).
+    dict_format_replies(Replies).
 
 
 tree_format_replies(RevTree) ->
@@ -260,22 +259,59 @@ tree_format_replies(RevTree) ->
 
 
 dict_format_replies(Dict) ->
-    lists:sort([Reply || {_, {Reply, _}} <- Dict]).
-
-filter_reply(Replies) ->
-    AllFoundRevs = lists:foldl(fun
-        ({{{not_found, missing}, _}, _}, Acc) ->
-            Acc;
-        ({{_, {Pos, [Rev | _]}}, _}, Acc) ->
-            [{Pos, Rev} | Acc]
-    end, [], Replies),
-    %% keep not_found replies only for the revs that don't also have doc reply
-    lists:filter(fun
-        ({{{not_found, missing}, Rev}, _}) ->
-            not lists:member(Rev, AllFoundRevs);
-        (_) ->
-            true
-    end, Replies).
+    Replies0 = [Reply || {_, {Reply, _}} <- Dict],
+
+    AllFoundRevs = lists:foldl(fun(Reply, Acc) ->
+        case Reply of
+            {ok, #doc{revs = {Pos, [RevId | _]}}} ->
+                [{Pos, RevId} | Acc];
+            _ ->
+                Acc
+        end
+    end, [], Replies0),
+
+    %% Drop any not_found replies for which we
+    %% found the revision on a different node.
+    Replies1 = lists:filter(fun(Reply) ->
+        case Reply of
+            {{not_found, missing}, Rev} ->
+                not lists:member(Rev, AllFoundRevs);
+            _ ->
+                true
+        end
+    end, Replies0),
+
+    % Remove replies with shorter revision
+    % paths for a given revision.
+    collapse_duplicate_revs(Replies1).
+
+
+collapse_duplicate_revs(Replies) ->
+    % The collapse logic requires that replies are
+    % sorted so that shorter rev paths are in
+    % the list just before longer lists.
+    %
+    % This somewhat implicitly relies on Erlang's
+    % sorting of [A, B] < [A, B, C] for all values
+    % of C.
+    collapse_duplicate_revs_int(lists:sort(Replies)).
+
+
+collapse_duplicate_revs_int([]) ->
+    [];
+
+collapse_duplicate_revs_int([{ok, Doc1}, {ok, Doc2} | Rest]) ->
+    {D1, R1} = Doc1#doc.revs,
+    {D2, R2} = Doc2#doc.revs,
+    Head = case D1 == D2 andalso lists:prefix(R1, R2) of
+        true -> [];
+        false -> [{ok, Doc1}]
+    end,
+    Head ++ collapse_duplicate_revs([{ok, Doc2} | Rest]);
+
+collapse_duplicate_revs_int([Reply | Rest]) ->
+    [Reply | collapse_duplicate_revs(Rest)].
+
 
 -ifdef(TEST).
 -include_lib("eunit/include/eunit.hrl").
@@ -313,7 +349,9 @@ revs() -> [{1,<<"foo">>}, {1,<<"bar">>}, {1,<<"baz">>}].
 
 foo1() -> {ok, #doc{revs = {1, [<<"foo">>]}}}.
 foo2() -> {ok, #doc{revs = {2, [<<"foo2">>, <<"foo">>]}}}.
+foo2stemmed() -> {ok, #doc{revs = {2, [<<"foo2">>]}}}.
 fooNF() -> {{not_found, missing}, {1,<<"foo">>}}.
+foo2NF() -> {{not_found, missing}, {2, <<"foo2">>}}.
 bar1() -> {ok, #doc{revs = {1, [<<"bar">>]}}}.
 barNF() -> {{not_found, missing}, {1,<<"bar">>}}.
 bazNF() -> {{not_found, missing}, {1,<<"baz">>}}.
@@ -351,7 +389,10 @@ open_doc_revs_test_() ->
             check_node_rev_unmodified_on_down_or_exit(),
             check_not_found_replies_are_removed_when_doc_found(),
             check_not_found_returned_when_one_of_docs_not_found(),
-            check_not_found_returned_when_doc_not_found()
+            check_not_found_returned_when_doc_not_found(),
+            check_longer_rev_list_returned(),
+            check_longer_rev_list_not_combined(),
+            check_not_found_removed_and_longer_rev_list()
         ]
     }.
 
@@ -685,24 +726,49 @@ check_node_rev_unmodified_on_down_or_exit() ->
 check_not_found_replies_are_removed_when_doc_found() ->
     ?_test(begin
         Replies = replies_to_dict([foo1(), bar1(), fooNF()]),
-        Expect = replies_to_dict([foo1(), bar1()]),
-        ?assertEqual(Expect, filter_reply(Replies))
+        Expect = [bar1(), foo1()],
+        ?assertEqual(Expect, dict_format_replies(Replies))
     end).
 
 check_not_found_returned_when_one_of_docs_not_found() ->
     ?_test(begin
         Replies = replies_to_dict([foo1(), foo2(), barNF()]),
-        Expect = replies_to_dict([foo1(), foo2(), barNF()]),
-        ?assertEqual(Expect, filter_reply(Replies))
+        Expect = [foo1(), foo2(), barNF()],
+        ?assertEqual(Expect, dict_format_replies(Replies))
     end).
 
 check_not_found_returned_when_doc_not_found() ->
     ?_test(begin
         Replies = replies_to_dict([fooNF(), barNF(), bazNF()]),
-        Expect = replies_to_dict([fooNF(), barNF(), bazNF()]),
-        ?assertEqual(Expect, filter_reply(Replies))
+        Expect = [barNF(), bazNF(), fooNF()],
+        ?assertEqual(Expect, dict_format_replies(Replies))
     end).
 
+check_longer_rev_list_returned() ->
+    ?_test(begin
+        Replies = replies_to_dict([foo2(), foo2stemmed()]),
+        Expect = [foo2()],
+        ?assertEqual(2, length(Replies)),
+        ?assertEqual(Expect, dict_format_replies(Replies))
+    end).
+
+check_longer_rev_list_not_combined() ->
+    ?_test(begin
+        Replies = replies_to_dict([foo2(), foo2stemmed(), bar1()]),
+        Expect = [bar1(), foo2()],
+        ?assertEqual(3, length(Replies)),
+        ?assertEqual(Expect, dict_format_replies(Replies))
+    end).
+
+check_not_found_removed_and_longer_rev_list() ->
+    ?_test(begin
+        Replies = replies_to_dict([foo2(), foo2stemmed(), foo2NF()]),
+        Expect = [foo2()],
+        ?assertEqual(3, length(Replies)),
+        ?assertEqual(Expect, dict_format_replies(Replies))
+    end).
+
+
 replies_to_dict(Replies) ->
     [reply_to_element(R) || R <- Replies].