You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@couchdb.apache.org by da...@apache.org on 2019/12/06 21:24:57 UTC

[couchdb] branch prototype/fdb-layer-get-dbs-info updated (f4945ca -> 1f8cc31)

This is an automated email from the ASF dual-hosted git repository.

davisp pushed a change to branch prototype/fdb-layer-get-dbs-info
in repository https://gitbox.apache.org/repos/asf/couchdb.git.


 discard f4945ca  Add database size tests
 discard 1613319  Delete attachments when possible
     new cb03642  Delete attachments when possible
     new 1f8cc31  Add database size tests

This update added new revisions after undoing existing revisions.
That is to say, some revisions that were in the old version of the
branch are not in the new version.  This situation occurs
when a user --force pushes a change and generates a repository
containing something like this:

 * -- * -- B -- O -- O -- O   (f4945ca)
            \
             N -- N -- N   refs/heads/prototype/fdb-layer-get-dbs-info (1f8cc31)

You should already have received notification emails for all of the O
revisions, and so the following emails describe only the N revisions
from the common base, B.

Any revisions marked "omit" are not gone; other references still
refer to them.  Any revisions marked "discard" are gone forever.

The 2 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.


Summary of changes:
 src/fabric/src/fabric2_fdb.erl | 77 ++++++++++++++++++++++++------------------
 1 file changed, 44 insertions(+), 33 deletions(-)


[couchdb] 01/02: Delete attachments when possible

Posted by da...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

davisp pushed a commit to branch prototype/fdb-layer-get-dbs-info
in repository https://gitbox.apache.org/repos/asf/couchdb.git

commit cb03642eee65144e4ef62a4f711fe6f827274fe4
Author: Paul J. Davis <pa...@gmail.com>
AuthorDate: Fri Dec 6 14:40:55 2019 -0600

    Delete attachments when possible
---
 src/fabric/include/fabric2.hrl  |   4 +-
 src/fabric/src/fabric2_db.erl   |  12 ++--
 src/fabric/src/fabric2_fdb.erl  | 127 +++++++++++++++++++++++++++++++++++-----
 src/fabric/src/fabric2_util.erl |  13 ++++
 4 files changed, 136 insertions(+), 20 deletions(-)

diff --git a/src/fabric/include/fabric2.hrl b/src/fabric/include/fabric2.hrl
index fc07f33..78b59c3 100644
--- a/src/fabric/include/fabric2.hrl
+++ b/src/fabric/include/fabric2.hrl
@@ -37,14 +37,16 @@
 -define(DB_ATTS, 23).
 -define(DB_VIEWS, 24).
 -define(DB_LOCAL_DOC_BODIES, 25).
+-define(DB_ATT_NAMES, 26).
 
 
 % Versions
 
 % 0 - Initial implementation
 % 1 - Added size information
+% 2 - Added attachment hash
 
--define(CURR_REV_FORMAT, 1).
+-define(CURR_REV_FORMAT, 2).
 
 % 0 - Adding local doc versions
 
diff --git a/src/fabric/src/fabric2_db.erl b/src/fabric/src/fabric2_db.erl
index 1bc53ca..c57d33d 100644
--- a/src/fabric/src/fabric2_db.erl
+++ b/src/fabric/src/fabric2_db.erl
@@ -1411,7 +1411,8 @@ update_doc_interactive(Db, Doc0, Future, _Options) ->
 
     #doc{
         deleted = NewDeleted,
-        revs = {NewRevPos, [NewRev | NewRevPath]}
+        revs = {NewRevPos, [NewRev | NewRevPath]},
+        atts = Atts
     } = Doc4 = stem_revisions(Db, Doc3),
 
     NewRevInfo = #{
@@ -1421,7 +1422,8 @@ update_doc_interactive(Db, Doc0, Future, _Options) ->
         rev_path => NewRevPath,
         sequence => undefined,
         branch_count => undefined,
-        rev_size => null
+        rev_size => null,
+        att_hash => fabric2_util:hash_atts(Atts)
     },
 
     % Gather the list of possible winnig revisions
@@ -1467,7 +1469,8 @@ update_doc_replicated(Db, Doc0, _Options) ->
     #doc{
         id = DocId,
         deleted = Deleted,
-        revs = {RevPos, [Rev | RevPath]}
+        revs = {RevPos, [Rev | RevPath]},
+        atts = Atts
     } = Doc0,
 
     DocRevInfo0 = #{
@@ -1477,7 +1480,8 @@ update_doc_replicated(Db, Doc0, _Options) ->
         rev_path => RevPath,
         sequence => undefined,
         branch_count => undefined,
-        rev_size => null
+        rev_size => null,
+        att_hash => fabric2_util:hash_atts(Atts)
     },
 
     AllRevInfos = fabric2_fdb:get_all_revs(Db, DocId),
diff --git a/src/fabric/src/fabric2_fdb.erl b/src/fabric/src/fabric2_fdb.erl
index a91f82a..6fefbc0 100644
--- a/src/fabric/src/fabric2_fdb.erl
+++ b/src/fabric/src/fabric2_fdb.erl
@@ -623,13 +623,40 @@ write_doc(#{} = Db0, Doc, NewWinner0, OldWinner, ToUpdate, ToRemove) ->
 
     #doc{
         id = DocId,
-        deleted = Deleted
+        deleted = Deleted,
+        atts = Atts
     } = Doc,
 
     % Doc body
 
     {ok, RevSize} = write_doc_body(Db, Doc),
 
+    % Attachment bookkeeping
+
+    % If a document's attachments have changed we have to scan
+    % for any attachments that may need to be deleted. The check
+    % for `>= 2` is a bit subtle. The important point is that
+    % one of the revisions will be from the new document so we
+    % have to find at least one more beyond that to assert that
+    % the attachments have not changed.
+    AttHash = fabric2_util:hash_atts(Atts),
+    RevsToCheck = [NewWinner0] ++ ToUpdate ++ ToRemove,
+    AttHashCount = lists:foldl(fun(Att, Count) ->
+        #{att_hash := RevAttHash} = Att,
+        case RevAttHash == AttHash of
+            true -> Count + 1;
+            false -> Count
+        end
+    end, 0, RevsToCheck),
+    if
+        %% AttHashCount == length(RevsToCheck) ->
+        %%     ok;
+        AttHashCount >= 2 ->
+            ok;
+        true ->
+            cleanup_attachments(Db, DocId, Doc, ToRemove)
+    end,
+
     % Revision tree
 
     NewWinner = NewWinner0#{
@@ -823,6 +850,9 @@ write_attachment(#{} = Db, DocId, Data) when is_binary(Data) ->
     AttId = fabric2_util:uuid(),
     Chunks = chunkify_binary(Data),
 
+    IdKey = erlfdb_tuple:pack({?DB_ATT_NAMES, DocId, AttId}, DbPrefix),
+    ok = erlfdb:set(Tx, IdKey, <<>>),
+
     lists:foldl(fun(Chunk, ChunkId) ->
         AttKey = erlfdb_tuple:pack({?DB_ATTS, DocId, AttId, ChunkId}, DbPrefix),
         ok = erlfdb:set(Tx, AttKey, Chunk),
@@ -1084,17 +1114,73 @@ clear_doc_body(#{} = Db, DocId, #{} = RevInfo) ->
     ok = erlfdb:clear_range(Tx, StartKey, EndKey).
 
 
+cleanup_attachments(Db, DocId, NewDoc, ToRemove) ->
+    #{
+        tx := Tx,
+        db_prefix := DbPrefix
+    } = Db,
+
+    RemoveRevs = lists:map(fun(#{revid := RevId}) -> RevId end, ToRemove),
+
+    % Gather all known document revisions
+    {ok, DiskDocs} = fabric2_db:open_doc_revs(Db, DocId, all, []),
+    AllDocs = [{ok, NewDoc} | DiskDocs],
+
+    % Get referenced attachment ids
+    ActiveIdSet = lists:foldl(fun({ok, Doc}, Acc) ->
+        #doc{
+            revs = {Pos, [Rev | _]}
+        } = Doc,
+        case lists:member({Pos, Rev}, RemoveRevs) of
+            true ->
+                Acc;
+            false ->
+                lists:foldl(fun(Att, InnerAcc) ->
+                    {loc, _Db, _DocId, AttId} = couch_att:fetch(data, Att),
+                    sets:add_element(AttId, InnerAcc)
+                end, Acc, Doc#doc.atts)
+        end
+    end, sets:new(), AllDocs),
+
+    AttPrefix = erlfdb_tuple:pack({?DB_ATT_NAMES, DocId}, DbPrefix),
+    Options = [{streaming_mode, want_all}],
+    Future = erlfdb:get_range_startswith(Tx, AttPrefix, Options),
+
+    ExistingIdSet = lists:foldl(fun({K, _}, Acc) ->
+        {?DB_ATT_NAMES, DocId, AttId} = erlfdb_tuple:unpack(K, DbPrefix),
+        sets:add_element(AttId, Acc)
+    end, sets:new(), erlfdb:wait(Future)),
+
+    AttsToRemove = sets:subtract(ExistingIdSet, ActiveIdSet),
+
+    lists:foreach(fun(AttId) ->
+        IdKey = erlfdb_tuple:pack({?DB_ATT_NAMES, DocId, AttId}, DbPrefix),
+        erlfdb:clear(Tx, IdKey),
+
+        ChunkKey = erlfdb_tuple:pack({?DB_ATTS, DocId, AttId}, DbPrefix),
+        erlfdb:clear_range_startswith(Tx, ChunkKey)
+    end, sets:to_list(AttsToRemove)).
+
+
 revinfo_to_fdb(Tx, DbPrefix, DocId, #{winner := true} = RevId) ->
     #{
         deleted := Deleted,
         rev_id := {RevPos, Rev},
         rev_path := RevPath,
         branch_count := BranchCount,
-        rev_size := RevSize
+        rev_size := RevSize,
+        att_hash := AttHash
     } = RevId,
     VS = new_versionstamp(Tx),
     Key = {?DB_REVS, DocId, not Deleted, RevPos, Rev},
-    Val = {?CURR_REV_FORMAT, VS, BranchCount, list_to_tuple(RevPath), RevSize},
+    Val = {
+        ?CURR_REV_FORMAT,
+        VS,
+        BranchCount,
+        list_to_tuple(RevPath),
+        RevSize,
+        AttHash
+    },
     KBin = erlfdb_tuple:pack(Key, DbPrefix),
     VBin = erlfdb_tuple:pack_vs(Val),
     {KBin, VBin, VS};
@@ -1104,18 +1190,19 @@ revinfo_to_fdb(_Tx, DbPrefix, DocId, #{} = RevId) ->
         deleted := Deleted,
         rev_id := {RevPos, Rev},
         rev_path := RevPath,
-        rev_size := RevSize
+        rev_size := RevSize,
+        att_hash := AttHash
     } = RevId,
     Key = {?DB_REVS, DocId, not Deleted, RevPos, Rev},
-    Val = {?CURR_REV_FORMAT, list_to_tuple(RevPath), RevSize},
+    Val = {?CURR_REV_FORMAT, list_to_tuple(RevPath), RevSize, AttHash},
     KBin = erlfdb_tuple:pack(Key, DbPrefix),
     VBin = erlfdb_tuple:pack(Val),
     {KBin, VBin, undefined}.
 
 
-fdb_to_revinfo(Key, {?CURR_REV_FORMAT, _, _, _, _} = Val) ->
+fdb_to_revinfo(Key, {?CURR_REV_FORMAT, _, _, _, _, _} = Val) ->
     {?DB_REVS, _DocId, NotDeleted, RevPos, Rev} = Key,
-    {_RevFormat, Sequence, BranchCount, RevPath, RevSize} = Val,
+    {_RevFormat, Sequence, BranchCount, RevPath, RevSize, AttHash} = Val,
     #{
         winner => true,
         deleted => not NotDeleted,
@@ -1123,12 +1210,13 @@ fdb_to_revinfo(Key, {?CURR_REV_FORMAT, _, _, _, _} = Val) ->
         rev_path => tuple_to_list(RevPath),
         sequence => Sequence,
         branch_count => BranchCount,
-        rev_size => RevSize
+        rev_size => RevSize,
+        att_hash => AttHash
     };
 
-fdb_to_revinfo(Key, {?CURR_REV_FORMAT, _, _} = Val)  ->
+fdb_to_revinfo(Key, {?CURR_REV_FORMAT, _, _, _} = Val)  ->
     {?DB_REVS, _DocId, NotDeleted, RevPos, Rev} = Key,
-    {_RevFormat, RevPath, RevSize} = Val,
+    {_RevFormat, RevPath, RevSize, AttHash} = Val,
     #{
         winner => false,
         deleted => not NotDeleted,
@@ -1136,15 +1224,24 @@ fdb_to_revinfo(Key, {?CURR_REV_FORMAT, _, _} = Val)  ->
         rev_path => tuple_to_list(RevPath),
         sequence => undefined,
         branch_count => undefined,
-        rev_size => RevSize
+        rev_size => RevSize,
+        att_hash => AttHash
     };
 
-fdb_to_revinfo(Key, {0, S, B, R}) ->
-    Val = {?CURR_REV_FORMAT, S, B, R, null},
+fdb_to_revinfo(Key, {0, Seq, BCount, RPath}) ->
+    Val = {?CURR_REV_FORMAT, Seq, BCount, RPath, null, <<>>},
+    fdb_to_revinfo(Key, Val);
+
+fdb_to_revinfo(Key, {0, RPath}) ->
+    Val = {?CURR_REV_FORMAT, RPath, null, <<>>},
+    fdb_to_revinfo(Key, Val);
+
+fdb_to_revinfo(Key, {1, Seq, BCount, RPath, Size}) ->
+    Val = {?CURR_REV_FORMAT, Seq, BCount, RPath, Size, <<>>},
     fdb_to_revinfo(Key, Val);
 
-fdb_to_revinfo(Key, {0, R}) ->
-    Val = {?CURR_REV_FORMAT, R, null},
+fdb_to_revinfo(Key, {1, RPath, Size}) ->
+    Val = {?CURR_REV_FORMAT, RPath, Size, <<>>},
     fdb_to_revinfo(Key, Val).
 
 
diff --git a/src/fabric/src/fabric2_util.erl b/src/fabric/src/fabric2_util.erl
index 2b8e49e..6bd4324 100644
--- a/src/fabric/src/fabric2_util.erl
+++ b/src/fabric/src/fabric2_util.erl
@@ -25,6 +25,8 @@
 
     validate_security_object/1,
 
+    hash_atts/1,
+
     dbname_ends_with/2,
 
     get_value/2,
@@ -124,6 +126,17 @@ validate_json_list_of_strings(Member, Props) ->
     end.
 
 
+hash_atts([]) ->
+    <<>>;
+
+hash_atts(Atts) ->
+    Md5St = lists:foldl(fun(Att, Acc) ->
+        {loc, _Db, _DocId, AttId} = couch_att:fetch(data, Att),
+        couch_hash:md5_hash_update(Acc, AttId)
+    end, couch_hash:md5_hash_init(), Atts),
+    couch_hash:md5_hash_final(Md5St).
+
+
 dbname_ends_with(#{} = Db, Suffix) ->
     dbname_ends_with(fabric2_db:name(Db), Suffix);
 


[couchdb] 02/02: Add database size tests

Posted by da...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

davisp pushed a commit to branch prototype/fdb-layer-get-dbs-info
in repository https://gitbox.apache.org/repos/asf/couchdb.git

commit 1f8cc31a38e29e386c428bbb9a99ac87450aedf9
Author: Paul J. Davis <pa...@gmail.com>
AuthorDate: Fri Dec 6 14:59:30 2019 -0600

    Add database size tests
---
 src/fabric/test/fabric2_db_size_tests.erl | 198 ++++++++++++++++++++++++++++++
 1 file changed, 198 insertions(+)

diff --git a/src/fabric/test/fabric2_db_size_tests.erl b/src/fabric/test/fabric2_db_size_tests.erl
new file mode 100644
index 0000000..fc95e0e
--- /dev/null
+++ b/src/fabric/test/fabric2_db_size_tests.erl
@@ -0,0 +1,198 @@
+% Licensed under the Apache License, Version 2.0 (the "License"); you may not
+% use this file except in compliance with the License. You may obtain a copy of
+% the License at
+%
+%   http://www.apache.org/licenses/LICENSE-2.0
+%
+% Unless required by applicable law or agreed to in writing, software
+% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+% License for the specific language governing permissions and limitations under
+% the License.
+
+-module(fabric2_db_size_tests).
+
+
+-include_lib("couch/include/couch_db.hrl").
+-include_lib("couch/include/couch_eunit.hrl").
+-include_lib("eunit/include/eunit.hrl").
+-include("fabric2_test.hrl").
+
+
+db_size_test_() ->
+    {
+        "Test document CRUD operations",
+        {
+            setup,
+            fun setup/0,
+            fun cleanup/1,
+            with([
+                ?TDEF(empty_size),
+                ?TDEF(new_doc),
+                ?TDEF(edit_doc),
+                ?TDEF(del_doc),
+                ?TDEF(conflicted_doc),
+                ?TDEF(del_conflict)
+            ])
+        }
+    }.
+
+
+setup() ->
+    Ctx = test_util:start_couch([fabric]),
+    {ok, Db} = fabric2_db:create(?tempdb(), [{user_ctx, ?ADMIN_USER}]),
+    {Db, Ctx}.
+
+
+cleanup({Db, Ctx}) ->
+    ok = fabric2_db:delete(fabric2_db:name(Db), []),
+    test_util:stop_couch(Ctx).
+
+
+empty_size({Db, _}) ->
+    ?assertEqual(2, db_size(Db)).
+
+
+new_doc({Db, _}) ->
+    increases(Db, fun() ->
+        create_doc(Db)
+    end).
+
+
+edit_doc({Db, _}) ->
+    DocId = fabric2_util:uuid(),
+    {ok, RevId1} = increases(Db, fun() ->
+        create_doc(Db, DocId)
+    end),
+    {ok, RevId2} = increases(Db, fun() ->
+        update_doc(Db, DocId, RevId1, {[{<<"foo">>, <<"bar">>}]})
+    end),
+    decreases(Db, fun() ->
+        update_doc(Db, DocId, RevId2)
+    end).
+
+
+del_doc({Db, _}) ->
+    DocId = fabric2_util:uuid(),
+    {ok, RevId} = increases(Db, fun() ->
+        create_doc(Db, DocId, {[{<<"foo">>, <<"bar">>}]})
+    end),
+    % The change here is -11 becuase we're going from
+    % {"foo":"bar"} == 13 bytes to {} == 2 bytes.
+    % I.e., 2 - 13 == -11
+    diff(Db, fun() ->
+        delete_doc(Db, DocId, RevId)
+    end, -11).
+
+
+conflicted_doc({Db, _}) ->
+    DocId = fabric2_util:uuid(),
+    Before = db_size(Db),
+    {ok, RevId1} = increases(Db, fun() ->
+        create_doc(Db, DocId, {[{<<"foo">>, <<"bar">>}]})
+    end),
+    Between = db_size(Db),
+    increases(Db, fun() ->
+        create_conflict(Db, DocId, RevId1, {[{<<"foo">>, <<"bar">>}]})
+    end),
+    After = db_size(Db),
+    ?assertEqual(After - Between, Between - Before).
+
+
+del_conflict({Db, _}) ->
+    DocId = fabric2_util:uuid(),
+    {ok, RevId1} = increases(Db, fun() ->
+        create_doc(Db, DocId, {[{<<"foo">>, <<"bar">>}]})
+    end),
+    {ok, RevId2} = increases(Db, fun() ->
+        create_conflict(Db, DocId, RevId1, {[{<<"foo">>, <<"bar">>}]})
+    end),
+    decreases(Db, fun() ->
+        {ok, RevId3} = delete_doc(Db, DocId, RevId2),
+        ?debugFmt("~p ~p ~p", [RevId1, RevId2, RevId3])
+    end).
+
+
+create_doc(Db) ->
+    create_doc(Db, fabric2_util:uuid()).
+
+
+create_doc(Db, DocId) when is_binary(DocId) ->
+    create_doc(Db, DocId, {[]});
+create_doc(Db, {Props} = Body) when is_list(Props) ->
+    create_doc(Db, fabric2_util:uuid(), Body).
+
+
+create_doc(Db, DocId, Body) ->
+    Doc = #doc{
+        id = DocId,
+        body = Body
+    },
+    fabric2_db:update_doc(Db, Doc).
+
+
+create_conflict(Db, DocId, RevId) ->
+    create_conflict(Db, DocId, RevId, {[]}).
+
+
+create_conflict(Db, DocId, RevId, Body) ->
+    {Pos, _} = RevId,
+    % Only keep the first 16 bytes of the UUID
+    % so that we match the normal sized revs
+    <<NewRev:16/binary, _/binary>> = fabric2_util:uuid(),
+    Doc = #doc{
+        id = DocId,
+        revs = {Pos, [NewRev]},
+        body = Body
+    },
+    fabric2_db:update_doc(Db, Doc, [replicated_changes]).
+
+
+update_doc(Db, DocId, RevId) ->
+    update_doc(Db, DocId, RevId, {[]}).
+
+
+update_doc(Db, DocId, {Pos, Rev}, Body) ->
+    Doc = #doc{
+        id = DocId,
+        revs = {Pos, [Rev]},
+        body = Body
+    },
+    fabric2_db:update_doc(Db, Doc).
+
+
+delete_doc(Db, DocId, RevId) ->
+    delete_doc(Db, DocId, RevId, {[]}).
+
+
+delete_doc(Db, DocId, {Pos, Rev}, Body) ->
+    Doc = #doc{
+        id = DocId,
+        revs = {Pos, [Rev]},
+        deleted = true,
+        body = Body
+    },
+    fabric2_db:update_doc(Db, Doc).
+
+
+constant(Db, Fun) -> check(Db, Fun, fun erlang:'=='/2).
+increases(Db, Fun) -> check(Db, Fun, fun erlang:'>'/2).
+decreases(Db, Fun) -> check(Db, Fun, fun erlang:'<'/2).
+diff(Db, Fun, Change) -> check(Db, Fun, fun(A, B) -> (A - B) == Change end).
+
+check(Db, Fun, Cmp) ->
+    Before = db_size(Db),
+    Result = Fun(),
+    After = db_size(Db),
+    ?debugFmt("~p :: ~p ~p", [erlang:fun_info(Cmp), After, Before]),
+    ?assert(Cmp(After, Before)),
+    Result.
+
+
+db_size(Info) when is_list(Info) ->
+    {sizes, {Sizes}} = lists:keyfind(sizes, 1, Info),
+    {<<"external">>, External} = lists:keyfind(<<"external">>, 1, Sizes),
+    External;
+db_size(Db) when is_map(Db) ->
+    {ok, Info} = fabric2_db:get_db_info(Db),
+    db_size(Info).