You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@couchdb.apache.org by da...@apache.org on 2019/01/02 20:53:52 UTC

[couchdb] branch feature/database-partition-limits updated (cc00b4f -> 06f4445)

This is an automated email from the ASF dual-hosted git repository.

davisp pushed a change to branch feature/database-partition-limits
in repository https://gitbox.apache.org/repos/asf/couchdb.git.


 discard cc00b4f  Enforce partition size limits
 discard d0a4ac4  Use an accumulator when merging revision trees
    omit 60bbe3f  Add Elixir tests for database partitions
    omit 6312363  Support partitioned queries in Mango
    omit d609bec  Use index names when testing index selection
    omit 021aa7b  Optimize offset/limit for partition queries
    omit 2463ee3  Optimize all_docs queries in a single partition
    omit 16e53ee  Implement partitioned views
     add f7b0822  Implement partitioned views
     add 546894f  Optimize all_docs queries in a single partition
     add b90c6c5  Optimize offset/limit for partition queries
     add 739c262  Use index names when testing index selection
     add 56c1404  Support partitioned queries in Mango
     add 3c8e0c6  Add Elixir tests for database partitions
     new cb79853  Use an accumulator when merging revision trees
     new 06f4445  Enforce partition size limits

This update added new revisions after undoing existing revisions.
That is to say, some revisions that were in the old version of the
branch are not in the new version.  This situation occurs
when a user --force pushes a change and generates a repository
containing something like this:

 * -- * -- B -- O -- O -- O   (cc00b4f)
            \
             N -- N -- N   refs/heads/feature/database-partition-limits (06f4445)

You should already have received notification emails for all of the O
revisions, and so the following emails describe only the N revisions
from the common base, B.

Any revisions marked "omit" are not gone; other references still
refer to them.  Any revisions marked "discard" are gone forever.

The 2 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.


Summary of changes:
 src/couch_mrview/src/couch_mrview_updater.erl | 6 ++++--
 test/elixir/test/partition_crud_test.exs      | 6 +++---
 2 files changed, 7 insertions(+), 5 deletions(-)


[couchdb] 01/02: Use an accumulator when merging revision trees

Posted by da...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

davisp pushed a commit to branch feature/database-partition-limits
in repository https://gitbox.apache.org/repos/asf/couchdb.git

commit cb79853f988b23d5ede6c7f76fb464e027fe31e2
Author: Paul J. Davis <pa...@gmail.com>
AuthorDate: Fri Dec 14 10:31:02 2018 -0600

    Use an accumulator when merging revision trees
    
    This cleans up the `couch_db_updater:merge_rev_trees/7` to instead use
    an accumulator argument.
---
 src/couch/src/couch_db_updater.erl | 57 ++++++++++++++++++++++++++++----------
 1 file changed, 43 insertions(+), 14 deletions(-)

diff --git a/src/couch/src/couch_db_updater.erl b/src/couch/src/couch_db_updater.erl
index c0974aa..95508e2 100644
--- a/src/couch/src/couch_db_updater.erl
+++ b/src/couch/src/couch_db_updater.erl
@@ -23,6 +23,15 @@
 -define(IDLE_LIMIT_DEFAULT, 61000).
 
 
+-record(merge_acc, {
+    revs_limit,
+    merge_conflicts,
+    add_infos = [],
+    rem_seqs = [],
+    cur_seq
+}).
+
+
 init({Engine, DbName, FilePath, Options0}) ->
     erlang:put(io_priority, {db_update, DbName}),
     update_idle_limit_from_config(),
@@ -450,11 +459,18 @@ doc_tag(#doc{meta=Meta}) ->
         Else -> throw({invalid_doc_tag, Else})
     end.
 
-merge_rev_trees(_Limit, _Merge, [], [], AccNewInfos, AccRemoveSeqs, AccSeq) ->
-    {ok, lists:reverse(AccNewInfos), AccRemoveSeqs, AccSeq};
-merge_rev_trees(Limit, MergeConflicts, [NewDocs|RestDocsList],
-        [OldDocInfo|RestOldInfo], AccNewInfos, AccRemoveSeqs, AccSeq) ->
-    erlang:put(last_id_merged, OldDocInfo#full_doc_info.id), % for debugging
+merge_rev_trees([], [], Acc) ->
+    {ok, Acc#merge_acc{
+        add_infos = lists:reverse(Acc#merge_acc.add_infos)
+    }};
+merge_rev_trees([NewDocs | RestDocsList], [OldDocInfo | RestOldInfo], Acc) ->
+    #merge_acc{
+        revs_limit = Limit,
+        merge_conflicts = MergeConflicts
+    } = Acc,
+
+    % Track doc ids so we can debug large revision trees
+    erlang:put(last_id_merged, OldDocInfo#full_doc_info.id),
     NewDocInfo0 = lists:foldl(fun({Client, NewDoc}, OldInfoAcc) ->
         merge_rev_tree(OldInfoAcc, NewDoc, Client, MergeConflicts)
     end, OldDocInfo, NewDocs),
@@ -475,22 +491,25 @@ merge_rev_trees(Limit, MergeConflicts, [NewDocs|RestDocsList],
     end,
     if NewDocInfo2 == OldDocInfo ->
         % nothing changed
-        merge_rev_trees(Limit, MergeConflicts, RestDocsList, RestOldInfo,
-            AccNewInfos, AccRemoveSeqs, AccSeq);
+        merge_rev_trees(RestDocsList, RestOldInfo, Acc);
     true ->
         % We have updated the document, give it a new update_seq. Its
         % important to note that the update_seq on OldDocInfo should
         % be identical to the value on NewDocInfo1.
         OldSeq = OldDocInfo#full_doc_info.update_seq,
         NewDocInfo3 = NewDocInfo2#full_doc_info{
-            update_seq = AccSeq + 1
+            update_seq = Acc#merge_acc.cur_seq + 1
         },
         RemoveSeqs = case OldSeq of
-            0 -> AccRemoveSeqs;
-            _ -> [OldSeq | AccRemoveSeqs]
+            0 -> Acc#merge_acc.rem_seqs;
+            _ -> [OldSeq | Acc#merge_acc.rem_seqs]
         end,
-        merge_rev_trees(Limit, MergeConflicts, RestDocsList, RestOldInfo,
-            [NewDocInfo3|AccNewInfos], RemoveSeqs, AccSeq+1)
+        NewAcc = Acc#merge_acc{
+            add_infos = [NewDocInfo3 | Acc#merge_acc.add_infos],
+            rem_seqs = RemoveSeqs,
+            cur_seq = Acc#merge_acc.cur_seq + 1
+        },
+        merge_rev_trees(RestDocsList, RestOldInfo, NewAcc)
     end.
 
 merge_rev_tree(OldInfo, NewDoc, Client, false)
@@ -599,8 +618,18 @@ update_docs_int(Db, DocsList, LocalDocs, MergeConflicts, FullCommit) ->
             #full_doc_info{id=Id}
     end, Ids, OldDocLookups),
     % Merge the new docs into the revision trees.
-    {ok, NewFullDocInfos, RemSeqs, _} = merge_rev_trees(RevsLimit,
-            MergeConflicts, DocsList, OldDocInfos, [], [], UpdateSeq),
+    AccIn = #merge_acc{
+        revs_limit = RevsLimit,
+        merge_conflicts = MergeConflicts,
+        add_infos = [],
+        rem_seqs = [],
+        cur_seq = UpdateSeq
+    },
+    {ok, AccOut} = merge_rev_trees(DocsList, OldDocInfos, AccIn),
+    #merge_acc{
+        add_infos = NewFullDocInfos,
+        rem_seqs = RemSeqs
+    } = AccOut,
 
     % Write out the document summaries (the bodies are stored in the nodes of
     % the trees, the attachments are already written to disk)


[couchdb] 02/02: Enforce partition size limits

Posted by da...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

davisp pushed a commit to branch feature/database-partition-limits
in repository https://gitbox.apache.org/repos/asf/couchdb.git

commit 06f4445a04d6a9b00e4ca3ed07c405f1f7a6d3b5
Author: Paul J. Davis <pa...@gmail.com>
AuthorDate: Fri Dec 14 11:06:03 2018 -0600

    Enforce partition size limits
    
    This limit helps prevent users from inadvertently misusing partitions by
    refusing to add documents when the size of a partition exceeds 10GiB.
    
    Co-authored-by: Robert Newson <rn...@apache.org>
---
 rel/overlay/etc/default.ini        |  5 +++
 src/chttpd/src/chttpd.erl          |  3 ++
 src/couch/src/couch_db_updater.erl | 81 ++++++++++++++++++++++++++++++++++++--
 3 files changed, 85 insertions(+), 4 deletions(-)

diff --git a/rel/overlay/etc/default.ini b/rel/overlay/etc/default.ini
index a77add4..ae9d313 100644
--- a/rel/overlay/etc/default.ini
+++ b/rel/overlay/etc/default.ini
@@ -64,6 +64,11 @@ default_engine = couch
 ; move deleted databases/shards there instead. You can then manually delete
 ; these files later, as desired.
 ;enable_database_recovery = false
+;
+; Set the maximum size allowed for a partition. This helps users avoid
+; inadvertently abusing partitions resulting in hot shards. The default
+; is 10GiB. A value of 0 or less will disable partition size checks.
+;max_partition_size = 10737418240
 
 [couchdb_engines]
 ; The keys in this section are the filename extension that
diff --git a/src/chttpd/src/chttpd.erl b/src/chttpd/src/chttpd.erl
index 2f241cd..6558b1e 100644
--- a/src/chttpd/src/chttpd.erl
+++ b/src/chttpd/src/chttpd.erl
@@ -873,6 +873,9 @@ error_info(conflict) ->
     {409, <<"conflict">>, <<"Document update conflict.">>};
 error_info({conflict, _}) ->
     {409, <<"conflict">>, <<"Document update conflict.">>};
+error_info({partition_overflow, DocId}) ->
+    Descr = <<"'", DocId/binary, "' exceeds partition limit">>,
+    {403, <<"partition_overflow">>, Descr};
 error_info({{not_found, missing}, {_, _}}) ->
     {409, <<"not_found">>, <<"missing_rev">>};
 error_info({forbidden, Error, Msg}) ->
diff --git a/src/couch/src/couch_db_updater.erl b/src/couch/src/couch_db_updater.erl
index 95508e2..00fee90 100644
--- a/src/couch/src/couch_db_updater.erl
+++ b/src/couch/src/couch_db_updater.erl
@@ -21,6 +21,7 @@
 -include("couch_db_int.hrl").
 
 -define(IDLE_LIMIT_DEFAULT, 61000).
+-define(DEFAULT_MAX_PARTITION_SIZE, 16#280000000). % 10 GiB
 
 
 -record(merge_acc, {
@@ -28,7 +29,8 @@
     merge_conflicts,
     add_infos = [],
     rem_seqs = [],
-    cur_seq
+    cur_seq,
+    full_partitions = []
 }).
 
 
@@ -466,13 +468,22 @@ merge_rev_trees([], [], Acc) ->
 merge_rev_trees([NewDocs | RestDocsList], [OldDocInfo | RestOldInfo], Acc) ->
     #merge_acc{
         revs_limit = Limit,
-        merge_conflicts = MergeConflicts
+        merge_conflicts = MergeConflicts,
+        full_partitions = FullPartitions
     } = Acc,
 
     % Track doc ids so we can debug large revision trees
     erlang:put(last_id_merged, OldDocInfo#full_doc_info.id),
     NewDocInfo0 = lists:foldl(fun({Client, NewDoc}, OldInfoAcc) ->
-        merge_rev_tree(OldInfoAcc, NewDoc, Client, MergeConflicts)
+        NewInfo = merge_rev_tree(OldInfoAcc, NewDoc, Client, MergeConflicts),
+        case is_overflowed(NewInfo, OldInfoAcc, FullPartitions) of
+            true when not MergeConflicts ->
+                DocId = NewInfo#doc.id,
+                send_result(Client, NewDoc, {partition_overflow, DocId}),
+                OldInfoAcc;
+            false ->
+                NewInfo
+        end
     end, OldDocInfo, NewDocs),
     NewDocInfo1 = maybe_stem_full_doc_info(NewDocInfo0, Limit),
     % When MergeConflicts is false, we updated #full_doc_info.deleted on every
@@ -595,6 +606,16 @@ merge_rev_tree(OldInfo, NewDoc, _Client, true) ->
     {NewTree, _} = couch_key_tree:merge(OldTree, NewTree0),
     OldInfo#full_doc_info{rev_tree = NewTree}.
 
+is_overflowed(_New, _Old, []) ->
+    false;
+is_overflowed(Old, Old, _FullPartitions) ->
+    false;
+is_overflowed(New, Old, FullPartitions) ->
+    Partition = couch_partition:from_docid(New#full_doc_info.id),
+    NewSize = estimate_size(New),
+    OldSize = estimate_size(Old),
+    lists:member(Partition, FullPartitions) andalso NewSize > OldSize.
+
 maybe_stem_full_doc_info(#full_doc_info{rev_tree = Tree} = Info, Limit) ->
     case config:get_boolean("couchdb", "stem_interactive_updates", true) of
         true ->
@@ -617,13 +638,31 @@ update_docs_int(Db, DocsList, LocalDocs, MergeConflicts, FullCommit) ->
         (Id, not_found) ->
             #full_doc_info{id=Id}
     end, Ids, OldDocLookups),
+
+    %% Get the list of full partitions
+    FullPartitions = case couch_db:is_partitioned(Db) of
+        true ->
+            case max_partition_size() of
+                N when N =< 0 ->
+                    [];
+                Max ->
+                    Partitions = lists:usort(lists:map(fun(Id) ->
+                        couch_partition:from_docid(Id)
+                    end, Ids)),
+                    [P || P <- Partitions, partition_size(Db, P) >= Max]
+            end;
+        false ->
+            []
+    end,
+
     % Merge the new docs into the revision trees.
     AccIn = #merge_acc{
         revs_limit = RevsLimit,
         merge_conflicts = MergeConflicts,
         add_infos = [],
         rem_seqs = [],
-        cur_seq = UpdateSeq
+        cur_seq = UpdateSeq,
+        full_partitions = FullPartitions
     },
     {ok, AccOut} = merge_rev_trees(DocsList, OldDocInfos, AccIn),
     #merge_acc{
@@ -685,6 +724,40 @@ increment_local_doc_revs(#doc{revs = {0, [RevStr | _]}} = Doc) ->
 increment_local_doc_revs(#doc{}) ->
     {error, <<"Invalid rev format">>}.
 
+max_partition_size() ->
+    config:get_integer("couchdb", "max_partition_size",
+            ?DEFAULT_MAX_PARTITION_SIZE).
+
+partition_size(Db, Partition) ->
+    {ok, Info} = couch_db:get_partition_info(Db, Partition),
+    Sizes = couch_util:get_value(sizes, Info),
+    couch_util:get_value(external, Sizes).
+
+estimate_size(#full_doc_info{} = FDI) ->
+    #full_doc_info{rev_tree = RevTree} = FDI,
+    Fun = fun
+        (_Rev, Value, leaf, SizesAcc) ->
+            case Value of
+                #doc{} = Doc ->
+                    ExternalSize = get_meta_body_size(Value#doc.meta),
+                    {size_info, AttSizeInfo} =
+                        lists:keyfind(size_info, 1, Doc#doc.meta),
+                    Leaf = #leaf{
+                        sizes = #size_info{
+                            external = ExternalSize
+                        },
+                        atts = AttSizeInfo
+                    },
+                    add_sizes(leaf, Leaf, SizesAcc);
+                #leaf{} ->
+                    add_sizes(leaf, Value, SizesAcc)
+            end;
+        (_Rev, _Value, branch, SizesAcc) ->
+            SizesAcc
+    end,
+    {_, FinalES, FinalAtts} = couch_key_tree:fold(Fun, {0, 0, []}, RevTree),
+    TotalAttSize = lists:foldl(fun({_, S}, A) -> S + A end, 0, FinalAtts),
+    FinalES + TotalAttSize.
 
 purge_docs(Db, []) ->
     {ok, Db, []};