You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@couchdb.apache.org by va...@apache.org on 2019/09/20 20:46:35 UTC

[couchdb] 01/01: Add revision stemming for interactive docs

This is an automated email from the ASF dual-hosted git repository.

vatamane pushed a commit to branch add-interactive-stemming
in repository https://gitbox.apache.org/repos/asf/couchdb.git

commit 9cce962846c88c168326047b748cbd7fb1bb97c0
Author: Nick Vatamaniuc <va...@apache.org>
AuthorDate: Fri Sep 20 16:45:44 2019 -0400

    Add revision stemming for interactive docs
---
 src/fabric/src/fabric2_db.erl            |  29 +++--
 src/fabric/test/fabric2_rev_stemming.erl | 204 +++++++++++++++++++++++++++++++
 2 files changed, 224 insertions(+), 9 deletions(-)

diff --git a/src/fabric/src/fabric2_db.erl b/src/fabric/src/fabric2_db.erl
index a316517..9ef0bd3 100644
--- a/src/fabric/src/fabric2_db.erl
+++ b/src/fabric/src/fabric2_db.erl
@@ -442,7 +442,7 @@ is_users_db(DbName) when is_binary(DbName) ->
 
 set_revs_limit(#{} = Db, RevsLimit) ->
     check_is_admin(Db),
-    RevsLimBin = ?uint2bin(RevsLimit),
+    RevsLimBin = ?uint2bin(max(1, RevsLimit)),
     Resp = fabric2_fdb:transactional(Db, fun(TxDb) ->
         fabric2_fdb:set_config(TxDb, <<"revs_limit">>, RevsLimBin)
     end),
@@ -1325,10 +1325,12 @@ update_doc_interactive(Db, Doc0, Future, _Options) ->
     % new revinfo map
     Doc2 = prep_and_validate(Db, Doc1, Target),
 
+    Doc3 = new_revid(Db, Doc2),
+
     #doc{
         deleted = NewDeleted,
         revs = {NewRevPos, [NewRev | NewRevPath]}
-    } = Doc3 = new_revid(Db, Doc2),
+    } = Doc4 = stem_revisions(Db, Doc3),
 
     NewRevInfo = #{
         winner => undefined,
@@ -1341,9 +1343,9 @@ update_doc_interactive(Db, Doc0, Future, _Options) ->
 
     % Gather the list of possible winnig revisions
     Possible = case Target == Winner of
-        true when not Doc3#doc.deleted ->
+        true when not Doc4#doc.deleted ->
             [NewRevInfo];
-        true when Doc3#doc.deleted ->
+        true when Doc4#doc.deleted ->
             case SecondPlace of
                 #{} -> [NewRevInfo, SecondPlace];
                 not_found -> [NewRevInfo]
@@ -1368,7 +1370,7 @@ update_doc_interactive(Db, Doc0, Future, _Options) ->
 
     ok = fabric2_fdb:write_doc(
             Db,
-            Doc3,
+            Doc4,
             NewWinner,
             Winner,
             ToUpdate,
@@ -1403,6 +1405,7 @@ update_doc_replicated(Db, Doc0, _Options) ->
     end, [], AllRevInfos),
 
     DocRevPath = fabric2_util:revinfo_to_path(DocRevInfo0),
+
     {NewTree, Status} = couch_key_tree:merge(RevTree, DocRevPath),
     if Status /= internal_node -> ok; true ->
         % We already know this revision so nothing
@@ -1416,10 +1419,9 @@ update_doc_replicated(Db, Doc0, _Options) ->
     % tree and use the combined path after stemming.
     {[{_, {RevPos, UnstemmedRevs}}], []}
             = couch_key_tree:get(NewTree, [{RevPos, Rev}]),
-    RevsLimit = fabric2_db:get_revs_limit(Db),
-    Doc1 = Doc0#doc{
-        revs = {RevPos, lists:sublist(UnstemmedRevs, RevsLimit)}
-    },
+
+    Doc1 = stem_revisions(Db, Doc0#doc{revs = {RevPos, UnstemmedRevs}}),
+
     {RevPos, [Rev | NewRevPath]} = Doc1#doc.revs,
     DocRevInfo1 = DocRevInfo0#{rev_path := NewRevPath},
 
@@ -1736,3 +1738,12 @@ set_design_doc_end_key(Options, rev) ->
             Key2 = max(EKeyGT, ?FIRST_DDOC_KEY),
             lists:keystore(end_key_gt, 1, Options, {end_key_gt, Key2})
     end.
+
+
+stem_revisions(#{} = Db, #doc{} = Doc) ->
+    #{revs_limit := RevsLimit} = Db,
+    #doc{revs = {RevPos, Revs}} = Doc,
+    case RevPos >= RevsLimit of
+        true -> Doc#doc{revs = {RevPos, lists:sublist(Revs, RevsLimit)}};
+        false -> Doc
+    end.
diff --git a/src/fabric/test/fabric2_rev_stemming.erl b/src/fabric/test/fabric2_rev_stemming.erl
new file mode 100644
index 0000000..99e086e
--- /dev/null
+++ b/src/fabric/test/fabric2_rev_stemming.erl
@@ -0,0 +1,204 @@
+% Licensed under the Apache License, Version 2.0 (the "License"); you may not
+% use this file except in compliance with the License. You may obtain a copy of
+% the License at
+%
+%   http://www.apache.org/licenses/LICENSE-2.0
+%
+% Unless required by applicable law or agreed to in writing, software
+% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+% License for the specific language governing permissions and limitations under
+% the License.
+
+-module(fabric2_rev_stemming).
+
+
+-include_lib("couch/include/couch_db.hrl").
+-include_lib("couch/include/couch_eunit.hrl").
+-include_lib("eunit/include/eunit.hrl").
+
+
+doc_crud_test_() ->
+    {
+        "Test document CRUD operations with stemming",
+        {
+            setup,
+            fun setup/0,
+            fun cleanup/1,
+            {with, [
+                fun update_doc/1,
+                fun update_doc_replicated_no_stemming/1,
+                fun update_doc_replicated_with_stemming/1,
+                fun update_doc_replicate_existing_rev/1,
+                fun update_winning_conflict_branch/1,
+                fun update_non_winning_conflict_branch/1,
+                fun delete_doc_basic/1,
+                fun recreate_doc_basic/1
+            ]}
+        }
+    }.
+
+
+setup() ->
+    Ctx = test_util:start_couch([fabric]),
+    {ok, Db} = fabric2_db:create(?tempdb(), [{user_ctx, ?ADMIN_USER}]),
+    {Db, Ctx}.
+
+
+cleanup({Db, Ctx}) ->
+    ok = fabric2_db:delete(fabric2_db:name(Db), []),
+    test_util:stop_couch(Ctx).
+
+
+update_doc({Db, _}) ->
+    ok = fabric2_db:set_revs_limit(Db, 2),
+    Doc1 = #doc{id = fabric2_util:uuid()},
+    {ok, {Pos1, Rev1}} = fabric2_db:update_doc(Db, Doc1),
+    Doc2 = Doc1#doc{revs = {Pos1, [Rev1]}},
+    {ok, {Pos2, Rev2}} = fabric2_db:update_doc(Db, Doc2),
+    Doc3 = Doc2#doc{revs = {Pos2, [Rev2, Rev1]}},
+    ?assertEqual({ok, Doc3}, fabric2_db:open_doc(Db, Doc2#doc.id)),
+
+    {ok, {_, Rev3}} = fabric2_db:update_doc(Db, Doc3),
+    {ok, Doc4} = fabric2_db:open_doc(Db, Doc3#doc.id),
+    ?assertEqual({3, [Rev3, Rev2]}, Doc4#doc.revs).
+
+
+update_doc_replicated_no_stemming({Db, _}) ->
+    ok = fabric2_db:set_revs_limit(Db, 2),
+    Rev1 = fabric2_util:uuid(),
+    Rev2 = fabric2_util:uuid(),
+    Doc = #doc{
+        id = fabric2_util:uuid(),
+        revs = {2, [Rev2, Rev1]}
+    },
+    {ok, _} = fabric2_db:update_doc(Db, Doc, [replicated_changes]),
+    {ok, #doc{revs = Revs}} = fabric2_db:open_doc(Db, Doc#doc.id),
+    ?assertEqual({2, [Rev2, Rev1]}, Revs).
+
+
+update_doc_replicated_with_stemming({Db, _}) ->
+    ok = fabric2_db:set_revs_limit(Db, 1),
+    Rev1 = fabric2_util:uuid(),
+    Rev2 = fabric2_util:uuid(),
+    Doc = #doc{
+        id = fabric2_util:uuid(),
+        revs = {2, [Rev2, Rev1]}
+    },
+    {ok, _} = fabric2_db:update_doc(Db, Doc, [replicated_changes]),
+    {ok, #doc{revs = Revs}} = fabric2_db:open_doc(Db, Doc#doc.id),
+    ?assertEqual({2, [Rev2]}, Revs).
+
+
+update_doc_replicate_existing_rev({Db, _}) ->
+    ok = fabric2_db:set_revs_limit(Db, 1),
+    Rev1 = fabric2_util:uuid(),
+    Rev2 = fabric2_util:uuid(),
+    Doc1 = #doc{
+        id = fabric2_util:uuid(),
+        revs = {2, [Rev2, Rev1]}
+    },
+    {ok, {2, _}} = fabric2_db:update_doc(Db, Doc1, [replicated_changes]),
+    {ok, []} = fabric2_db:update_docs(Db, [Doc1], [replicated_changes]),
+    {ok, Doc} =  fabric2_db:open_doc(Db, Doc1#doc.id),
+    ?assertEqual({2, [Rev2]}, Doc#doc.revs).
+
+
+update_winning_conflict_branch({Db, _}) ->
+    ok = fabric2_db:set_revs_limit(Db, 2),
+    [Rev1, Rev2, Rev3] = lists:sort([
+            fabric2_util:uuid(),
+            fabric2_util:uuid(),
+            fabric2_util:uuid()
+        ]),
+    Doc1 = #doc{
+        id = fabric2_util:uuid(),
+        revs = {2, [Rev3, Rev1]},
+        body = {[{<<"foo">>, <<"bar">>}]}
+    },
+    {ok, {2, _}} = fabric2_db:update_doc(Db, Doc1, [replicated_changes]),
+    Doc2 = Doc1#doc{
+        revs = {2, [Rev2, Rev1]},
+        body = {[{<<"bar">>, <<"foo">>}]}
+    },
+    {ok, {2, _}} = fabric2_db:update_doc(Db, Doc2, [replicated_changes]),
+    % Update the winning branch
+    Doc3 = Doc1#doc{
+        revs = {2, [Rev3, Rev1]},
+        body = {[{<<"baz">>, 2}]}
+    },
+    {ok, {3, Rev4}} = fabric2_db:update_doc(Db, Doc3),
+    {ok, Doc4} = fabric2_db:open_doc(Db, Doc3#doc.id),
+    % Assert we've got the correct winner
+    ?assertEqual({3, [Rev4, Rev3]}, Doc4#doc.revs),
+    ?assertEqual(Doc3#doc{revs = undefined}, Doc4#doc{revs = undefined}).
+
+
+update_non_winning_conflict_branch({Db, _}) ->
+    ok = fabric2_db:set_revs_limit(Db, 2),
+    [Rev1, Rev2, Rev3] = lists:sort([
+            fabric2_util:uuid(),
+            fabric2_util:uuid(),
+            fabric2_util:uuid()
+        ]),
+    Doc1 = #doc{
+        id = fabric2_util:uuid(),
+        revs = {2, [Rev3, Rev1]},
+        body = {[{<<"foo">>, <<"bar">>}]}
+    },
+    {ok, {2, _}} = fabric2_db:update_doc(Db, Doc1, [replicated_changes]),
+    Doc2 = Doc1#doc{
+        revs = {2, [Rev2, Rev1]},
+        body = {[{<<"bar">>, <<"foo">>}]}
+    },
+    {ok, {2, _}} = fabric2_db:update_doc(Db, Doc2, [replicated_changes]),
+    % Update the non winning branch
+    Doc3 = Doc1#doc{
+        revs = {2, [Rev2, Rev1]},
+        body = {[{<<"baz">>, 2}]}
+    },
+    {ok, {3, Rev4}} = fabric2_db:update_doc(Db, Doc3),
+    {ok, Doc4} = fabric2_db:open_doc(Db, Doc3#doc.id),
+    % Assert we've got the correct winner
+    ?assertEqual({3, [Rev4, Rev2]}, Doc4#doc.revs).
+
+
+delete_doc_basic({Db, _}) ->
+    ok = fabric2_db:set_revs_limit(Db, 1),
+    Doc1 = #doc{
+        id = fabric2_util:uuid(),
+        body = {[{<<"state">>, 1}]}
+    },
+    {ok, {Pos1, Rev1}} = fabric2_db:update_doc(Db, Doc1),
+    Doc2 = Doc1#doc{
+        revs = {Pos1, [Rev1]},
+        deleted = true,
+        body = {[{<<"state">>, 2}]}
+    },
+    {ok, {Pos2, Rev2}} = fabric2_db:update_doc(Db, Doc2),
+    Doc3 = Doc2#doc{revs = {Pos2, [Rev2]}},
+    ?assertEqual({ok, Doc3}, fabric2_db:open_doc(Db, Doc2#doc.id, [deleted])).
+
+
+recreate_doc_basic({Db, _}) ->
+    ok = fabric2_db:set_revs_limit(Db, 1),
+    Doc1 = #doc{
+        id = fabric2_util:uuid(),
+        body = {[{<<"state">>, 1}]}
+    },
+    {ok, {1, Rev1}} = fabric2_db:update_doc(Db, Doc1),
+    Doc2 = Doc1#doc{
+        revs = {1, [Rev1]},
+        deleted = true,
+        body = {[{<<"state">>, 2}]}
+    },
+    {ok, {2, Rev2}} = fabric2_db:update_doc(Db, Doc2),
+    Doc3 = Doc1#doc{
+        revs = {0, []},
+        deleted = false,
+        body = {[{<<"state">>, 3}]}
+    },
+    {ok, {3, Rev3}} = fabric2_db:update_doc(Db, Doc3),
+    {ok, Doc4} = fabric2_db:open_doc(Db, Doc3#doc.id),
+    ?assertEqual({3, [Rev3]}, Doc4#doc.revs),
+    ?assertEqual(Doc3#doc{revs = undefined}, Doc4#doc{revs = undefined}).