You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@couchdb.apache.org by va...@apache.org on 2019/09/20 20:46:34 UTC

[couchdb] branch add-interactive-stemming created (now 9cce962)

This is an automated email from the ASF dual-hosted git repository.

vatamane pushed a change to branch add-interactive-stemming
in repository https://gitbox.apache.org/repos/asf/couchdb.git.


      at 9cce962  Add revision stemming for interactive docs

This branch includes the following new commits:

     new 9cce962  Add revision stemming for interactive docs

The 1 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.



[couchdb] 01/01: Add revision stemming for interactive docs

Posted by va...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

vatamane pushed a commit to branch add-interactive-stemming
in repository https://gitbox.apache.org/repos/asf/couchdb.git

commit 9cce962846c88c168326047b748cbd7fb1bb97c0
Author: Nick Vatamaniuc <va...@apache.org>
AuthorDate: Fri Sep 20 16:45:44 2019 -0400

    Add revision stemming for interactive docs
---
 src/fabric/src/fabric2_db.erl            |  29 +++--
 src/fabric/test/fabric2_rev_stemming.erl | 204 +++++++++++++++++++++++++++++++
 2 files changed, 224 insertions(+), 9 deletions(-)

diff --git a/src/fabric/src/fabric2_db.erl b/src/fabric/src/fabric2_db.erl
index a316517..9ef0bd3 100644
--- a/src/fabric/src/fabric2_db.erl
+++ b/src/fabric/src/fabric2_db.erl
@@ -442,7 +442,7 @@ is_users_db(DbName) when is_binary(DbName) ->
 
 set_revs_limit(#{} = Db, RevsLimit) ->
     check_is_admin(Db),
-    RevsLimBin = ?uint2bin(RevsLimit),
+    RevsLimBin = ?uint2bin(max(1, RevsLimit)),
     Resp = fabric2_fdb:transactional(Db, fun(TxDb) ->
         fabric2_fdb:set_config(TxDb, <<"revs_limit">>, RevsLimBin)
     end),
@@ -1325,10 +1325,12 @@ update_doc_interactive(Db, Doc0, Future, _Options) ->
     % new revinfo map
     Doc2 = prep_and_validate(Db, Doc1, Target),
 
+    Doc3 = new_revid(Db, Doc2),
+
     #doc{
         deleted = NewDeleted,
         revs = {NewRevPos, [NewRev | NewRevPath]}
-    } = Doc3 = new_revid(Db, Doc2),
+    } = Doc4 = stem_revisions(Db, Doc3),
 
     NewRevInfo = #{
         winner => undefined,
@@ -1341,9 +1343,9 @@ update_doc_interactive(Db, Doc0, Future, _Options) ->
 
     % Gather the list of possible winnig revisions
     Possible = case Target == Winner of
-        true when not Doc3#doc.deleted ->
+        true when not Doc4#doc.deleted ->
             [NewRevInfo];
-        true when Doc3#doc.deleted ->
+        true when Doc4#doc.deleted ->
             case SecondPlace of
                 #{} -> [NewRevInfo, SecondPlace];
                 not_found -> [NewRevInfo]
@@ -1368,7 +1370,7 @@ update_doc_interactive(Db, Doc0, Future, _Options) ->
 
     ok = fabric2_fdb:write_doc(
             Db,
-            Doc3,
+            Doc4,
             NewWinner,
             Winner,
             ToUpdate,
@@ -1403,6 +1405,7 @@ update_doc_replicated(Db, Doc0, _Options) ->
     end, [], AllRevInfos),
 
     DocRevPath = fabric2_util:revinfo_to_path(DocRevInfo0),
+
     {NewTree, Status} = couch_key_tree:merge(RevTree, DocRevPath),
     if Status /= internal_node -> ok; true ->
         % We already know this revision so nothing
@@ -1416,10 +1419,9 @@ update_doc_replicated(Db, Doc0, _Options) ->
     % tree and use the combined path after stemming.
     {[{_, {RevPos, UnstemmedRevs}}], []}
             = couch_key_tree:get(NewTree, [{RevPos, Rev}]),
-    RevsLimit = fabric2_db:get_revs_limit(Db),
-    Doc1 = Doc0#doc{
-        revs = {RevPos, lists:sublist(UnstemmedRevs, RevsLimit)}
-    },
+
+    Doc1 = stem_revisions(Db, Doc0#doc{revs = {RevPos, UnstemmedRevs}}),
+
     {RevPos, [Rev | NewRevPath]} = Doc1#doc.revs,
     DocRevInfo1 = DocRevInfo0#{rev_path := NewRevPath},
 
@@ -1736,3 +1738,12 @@ set_design_doc_end_key(Options, rev) ->
             Key2 = max(EKeyGT, ?FIRST_DDOC_KEY),
             lists:keystore(end_key_gt, 1, Options, {end_key_gt, Key2})
     end.
+
+
+stem_revisions(#{} = Db, #doc{} = Doc) ->
+    #{revs_limit := RevsLimit} = Db,
+    #doc{revs = {RevPos, Revs}} = Doc,
+    case RevPos >= RevsLimit of
+        true -> Doc#doc{revs = {RevPos, lists:sublist(Revs, RevsLimit)}};
+        false -> Doc
+    end.
diff --git a/src/fabric/test/fabric2_rev_stemming.erl b/src/fabric/test/fabric2_rev_stemming.erl
new file mode 100644
index 0000000..99e086e
--- /dev/null
+++ b/src/fabric/test/fabric2_rev_stemming.erl
@@ -0,0 +1,204 @@
+% Licensed under the Apache License, Version 2.0 (the "License"); you may not
+% use this file except in compliance with the License. You may obtain a copy of
+% the License at
+%
+%   http://www.apache.org/licenses/LICENSE-2.0
+%
+% Unless required by applicable law or agreed to in writing, software
+% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+% License for the specific language governing permissions and limitations under
+% the License.
+
+-module(fabric2_rev_stemming).
+
+
+-include_lib("couch/include/couch_db.hrl").
+-include_lib("couch/include/couch_eunit.hrl").
+-include_lib("eunit/include/eunit.hrl").
+
+
+doc_crud_test_() ->
+    {
+        "Test document CRUD operations with stemming",
+        {
+            setup,
+            fun setup/0,
+            fun cleanup/1,
+            {with, [
+                fun update_doc/1,
+                fun update_doc_replicated_no_stemming/1,
+                fun update_doc_replicated_with_stemming/1,
+                fun update_doc_replicate_existing_rev/1,
+                fun update_winning_conflict_branch/1,
+                fun update_non_winning_conflict_branch/1,
+                fun delete_doc_basic/1,
+                fun recreate_doc_basic/1
+            ]}
+        }
+    }.
+
+
+setup() ->
+    Ctx = test_util:start_couch([fabric]),
+    {ok, Db} = fabric2_db:create(?tempdb(), [{user_ctx, ?ADMIN_USER}]),
+    {Db, Ctx}.
+
+
+cleanup({Db, Ctx}) ->
+    ok = fabric2_db:delete(fabric2_db:name(Db), []),
+    test_util:stop_couch(Ctx).
+
+
+update_doc({Db, _}) ->
+    ok = fabric2_db:set_revs_limit(Db, 2),
+    Doc1 = #doc{id = fabric2_util:uuid()},
+    {ok, {Pos1, Rev1}} = fabric2_db:update_doc(Db, Doc1),
+    Doc2 = Doc1#doc{revs = {Pos1, [Rev1]}},
+    {ok, {Pos2, Rev2}} = fabric2_db:update_doc(Db, Doc2),
+    Doc3 = Doc2#doc{revs = {Pos2, [Rev2, Rev1]}},
+    ?assertEqual({ok, Doc3}, fabric2_db:open_doc(Db, Doc2#doc.id)),
+
+    {ok, {_, Rev3}} = fabric2_db:update_doc(Db, Doc3),
+    {ok, Doc4} = fabric2_db:open_doc(Db, Doc3#doc.id),
+    ?assertEqual({3, [Rev3, Rev2]}, Doc4#doc.revs).
+
+
+update_doc_replicated_no_stemming({Db, _}) ->
+    ok = fabric2_db:set_revs_limit(Db, 2),
+    Rev1 = fabric2_util:uuid(),
+    Rev2 = fabric2_util:uuid(),
+    Doc = #doc{
+        id = fabric2_util:uuid(),
+        revs = {2, [Rev2, Rev1]}
+    },
+    {ok, _} = fabric2_db:update_doc(Db, Doc, [replicated_changes]),
+    {ok, #doc{revs = Revs}} = fabric2_db:open_doc(Db, Doc#doc.id),
+    ?assertEqual({2, [Rev2, Rev1]}, Revs).
+
+
+update_doc_replicated_with_stemming({Db, _}) ->
+    ok = fabric2_db:set_revs_limit(Db, 1),
+    Rev1 = fabric2_util:uuid(),
+    Rev2 = fabric2_util:uuid(),
+    Doc = #doc{
+        id = fabric2_util:uuid(),
+        revs = {2, [Rev2, Rev1]}
+    },
+    {ok, _} = fabric2_db:update_doc(Db, Doc, [replicated_changes]),
+    {ok, #doc{revs = Revs}} = fabric2_db:open_doc(Db, Doc#doc.id),
+    ?assertEqual({2, [Rev2]}, Revs).
+
+
+update_doc_replicate_existing_rev({Db, _}) ->
+    ok = fabric2_db:set_revs_limit(Db, 1),
+    Rev1 = fabric2_util:uuid(),
+    Rev2 = fabric2_util:uuid(),
+    Doc1 = #doc{
+        id = fabric2_util:uuid(),
+        revs = {2, [Rev2, Rev1]}
+    },
+    {ok, {2, _}} = fabric2_db:update_doc(Db, Doc1, [replicated_changes]),
+    {ok, []} = fabric2_db:update_docs(Db, [Doc1], [replicated_changes]),
+    {ok, Doc} =  fabric2_db:open_doc(Db, Doc1#doc.id),
+    ?assertEqual({2, [Rev2]}, Doc#doc.revs).
+
+
+update_winning_conflict_branch({Db, _}) ->
+    ok = fabric2_db:set_revs_limit(Db, 2),
+    [Rev1, Rev2, Rev3] = lists:sort([
+            fabric2_util:uuid(),
+            fabric2_util:uuid(),
+            fabric2_util:uuid()
+        ]),
+    Doc1 = #doc{
+        id = fabric2_util:uuid(),
+        revs = {2, [Rev3, Rev1]},
+        body = {[{<<"foo">>, <<"bar">>}]}
+    },
+    {ok, {2, _}} = fabric2_db:update_doc(Db, Doc1, [replicated_changes]),
+    Doc2 = Doc1#doc{
+        revs = {2, [Rev2, Rev1]},
+        body = {[{<<"bar">>, <<"foo">>}]}
+    },
+    {ok, {2, _}} = fabric2_db:update_doc(Db, Doc2, [replicated_changes]),
+    % Update the winning branch
+    Doc3 = Doc1#doc{
+        revs = {2, [Rev3, Rev1]},
+        body = {[{<<"baz">>, 2}]}
+    },
+    {ok, {3, Rev4}} = fabric2_db:update_doc(Db, Doc3),
+    {ok, Doc4} = fabric2_db:open_doc(Db, Doc3#doc.id),
+    % Assert we've got the correct winner
+    ?assertEqual({3, [Rev4, Rev3]}, Doc4#doc.revs),
+    ?assertEqual(Doc3#doc{revs = undefined}, Doc4#doc{revs = undefined}).
+
+
+update_non_winning_conflict_branch({Db, _}) ->
+    ok = fabric2_db:set_revs_limit(Db, 2),
+    [Rev1, Rev2, Rev3] = lists:sort([
+            fabric2_util:uuid(),
+            fabric2_util:uuid(),
+            fabric2_util:uuid()
+        ]),
+    Doc1 = #doc{
+        id = fabric2_util:uuid(),
+        revs = {2, [Rev3, Rev1]},
+        body = {[{<<"foo">>, <<"bar">>}]}
+    },
+    {ok, {2, _}} = fabric2_db:update_doc(Db, Doc1, [replicated_changes]),
+    Doc2 = Doc1#doc{
+        revs = {2, [Rev2, Rev1]},
+        body = {[{<<"bar">>, <<"foo">>}]}
+    },
+    {ok, {2, _}} = fabric2_db:update_doc(Db, Doc2, [replicated_changes]),
+    % Update the non winning branch
+    Doc3 = Doc1#doc{
+        revs = {2, [Rev2, Rev1]},
+        body = {[{<<"baz">>, 2}]}
+    },
+    {ok, {3, Rev4}} = fabric2_db:update_doc(Db, Doc3),
+    {ok, Doc4} = fabric2_db:open_doc(Db, Doc3#doc.id),
+    % Assert we've got the correct winner
+    ?assertEqual({3, [Rev4, Rev2]}, Doc4#doc.revs).
+
+
+delete_doc_basic({Db, _}) ->
+    ok = fabric2_db:set_revs_limit(Db, 1),
+    Doc1 = #doc{
+        id = fabric2_util:uuid(),
+        body = {[{<<"state">>, 1}]}
+    },
+    {ok, {Pos1, Rev1}} = fabric2_db:update_doc(Db, Doc1),
+    Doc2 = Doc1#doc{
+        revs = {Pos1, [Rev1]},
+        deleted = true,
+        body = {[{<<"state">>, 2}]}
+    },
+    {ok, {Pos2, Rev2}} = fabric2_db:update_doc(Db, Doc2),
+    Doc3 = Doc2#doc{revs = {Pos2, [Rev2]}},
+    ?assertEqual({ok, Doc3}, fabric2_db:open_doc(Db, Doc2#doc.id, [deleted])).
+
+
+recreate_doc_basic({Db, _}) ->
+    ok = fabric2_db:set_revs_limit(Db, 1),
+    Doc1 = #doc{
+        id = fabric2_util:uuid(),
+        body = {[{<<"state">>, 1}]}
+    },
+    {ok, {1, Rev1}} = fabric2_db:update_doc(Db, Doc1),
+    Doc2 = Doc1#doc{
+        revs = {1, [Rev1]},
+        deleted = true,
+        body = {[{<<"state">>, 2}]}
+    },
+    {ok, {2, Rev2}} = fabric2_db:update_doc(Db, Doc2),
+    Doc3 = Doc1#doc{
+        revs = {0, []},
+        deleted = false,
+        body = {[{<<"state">>, 3}]}
+    },
+    {ok, {3, Rev3}} = fabric2_db:update_doc(Db, Doc3),
+    {ok, Doc4} = fabric2_db:open_doc(Db, Doc3#doc.id),
+    ?assertEqual({3, [Rev3]}, Doc4#doc.revs),
+    ?assertEqual(Doc3#doc{revs = undefined}, Doc4#doc{revs = undefined}).