You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@couchdb.apache.org by va...@apache.org on 2023/04/19 03:51:42 UTC

[couchdb] 02/11: mango: introduce support for covering indexes

This is an automated email from the ASF dual-hosted git repository.

vatamane pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/couchdb.git

commit c83b5ccb6757dafdee982deb3dfe7bb3cee9f67f
Author: Gabor Pali <ga...@ibm.com>
AuthorDate: Thu Mar 16 20:23:44 2023 +0100

    mango: introduce support for covering indexes
    
    As a performance improvement, shorten the gap between Mango
    queries and the underlying map-reduce views: try to serve
    requests without pulling documents from the primary data set, i.e.
    run the query with `include_docs` set to `false` when there is a
    chance that it can be "covered" by the chosen index.  The rows in
    the results are then built from the information stored there.
    
    Extend the response on the `_explain` endpoint to show information
    in the `covered` Boolean attribute about the query would be covered
    by the index or not.
    
    Remarks:
    
    - This should be a transparent optimization, without any semantical
      effect on the queries.
    
    - Because the main purpose of indexes is to store keys and the
      document identifiers, the change will only work in cases when
      the selected fields overlap with those.  The chance of being
      covered could be increased by adding more non-key fields to the
      index, but that is not in scope here.
---
 src/mango/src/mango_cursor_view.erl | 86 ++++++++++++++++++++++++++-----------
 src/mango/src/mango_idx_view.erl    | 18 +++++++-
 2 files changed, 77 insertions(+), 27 deletions(-)

diff --git a/src/mango/src/mango_cursor_view.erl b/src/mango/src/mango_cursor_view.erl
index a8a255f72..8e79f608e 100644
--- a/src/mango/src/mango_cursor_view.erl
+++ b/src/mango/src/mango_cursor_view.erl
@@ -39,15 +39,19 @@
 % viewcbargs wraps up the arguments that view_cb uses into a single
 % entry in the mrargs.extra list. We use a Map to allow us to later
 % add fields without having old messages causing errors/crashes.
-viewcbargs_new(Selector, Fields) ->
+viewcbargs_new(Selector, Fields, CoveringIndex) ->
     #{
         selector => Selector,
-        fields => Fields
+        fields => Fields,
+        covering_index => CoveringIndex
     }.
+
 viewcbargs_get(selector, Args) when is_map(Args) ->
     maps:get(selector, Args, undefined);
 viewcbargs_get(fields, Args) when is_map(Args) ->
-    maps:get(fields, Args, undefined).
+    maps:get(fields, Args, undefined);
+viewcbargs_get(covering_index, Args) when is_map(Args) ->
+    maps:get(covering_index, Args, undefined).
 
 create(Db, Indexes, Selector, Opts) ->
     FieldRanges = mango_idx_view:field_ranges(Selector),
@@ -73,13 +77,11 @@ create(Db, Indexes, Selector, Opts) ->
         bookmark = Bookmark
     }}.
 
-explain(Cursor) ->
-    #cursor{
-        opts = Opts
-    } = Cursor,
-
+explain(#cursor{opts = Opts} = Cursor) ->
     BaseArgs = base_args(Cursor),
-    Args = apply_opts(Opts, BaseArgs),
+    Args0 = apply_opts(Opts, BaseArgs),
+    #cursor{index = Index, fields = Fields} = Cursor,
+    Args = consider_index_coverage(Index, Fields, Args0),
 
     [
         {mrargs,
@@ -94,7 +96,8 @@ explain(Cursor) ->
                 {stable, Args#mrargs.stable},
                 {update, Args#mrargs.update},
                 {conflicts, Args#mrargs.conflicts}
-            ]}}
+            ]}},
+        {covered, mango_idx_view:covers(Index, Fields)}
     ].
 
 % replace internal values that cannot
@@ -125,6 +128,13 @@ base_args(#cursor{index = Idx, selector = Selector, fields = Fields} = Cursor) -
                     mango_idx:end_key(Idx, Cursor#cursor.ranges)
                 }
         end,
+    CoveringIndex =
+        case mango_idx_view:covers(Idx, Fields) of
+            true ->
+                Idx;
+            false ->
+                undefined
+        end,
     #mrargs{
         view_type = map,
         reduce = false,
@@ -137,7 +147,7 @@ base_args(#cursor{index = Idx, selector = Selector, fields = Fields} = Cursor) -
             {callback, {?MODULE, view_cb}},
             % TODO remove selector. It supports older nodes during version upgrades.
             {selector, Selector},
-            {callback_args, viewcbargs_new(Selector, Fields)},
+            {callback_args, viewcbargs_new(Selector, Fields, CoveringIndex)},
 
             {ignore_partition_query_limit, true}
         ]
@@ -157,7 +167,8 @@ execute(#cursor{db = Db, index = Idx, execution_stats = Stats} = Cursor0, UserFu
             BaseArgs = base_args(Cursor),
             #cursor{opts = Opts, bookmark = Bookmark} = Cursor,
             Args0 = apply_opts(Opts, BaseArgs),
-            Args = mango_json_bookmark:update_args(Bookmark, Args0),
+            Args1 = consider_index_coverage(Idx, Cursor#cursor.fields, Args0),
+            Args = mango_json_bookmark:update_args(Bookmark, Args1),
             UserCtx = couch_util:get_value(user_ctx, Opts, #user_ctx{}),
             DbOpts = [{user_ctx, UserCtx}],
             Result =
@@ -280,29 +291,25 @@ view_cb({row, Row}, #mrargs{extra = Options} = Acc) ->
     % or in the new record in `callback_args`. This is to support mid-upgrade
     % clusters where the non-upgraded coordinator nodes will send the older style.
     % TODO remove this in a couple of couchdb versions.
-    {Selector, Fields} =
+    {Selector, Fields, CoveringIndex} =
         case couch_util:get_value(callback_args, Options) of
             % old style
             undefined ->
-                {couch_util:get_value(selector, Options), undefined};
+                {couch_util:get_value(selector, Options), undefined, undefined};
             % new style - assume a viewcbargs
             Args = #{} ->
-                {viewcbargs_get(selector, Args), viewcbargs_get(fields, Args)}
+                {
+                    viewcbargs_get(selector, Args),
+                    viewcbargs_get(fields, Args),
+                    viewcbargs_get(covering_index, Args)
+                }
         end,
-    case ViewRow#view_row.doc of
-        null ->
-            maybe_send_mango_ping();
-        undefined ->
-            % include_docs=false. Use quorum fetch at coordinator
-            ok = rexi:stream2(ViewRow),
-            set_mango_msg_timestamp();
-        Doc ->
-            % We slightly abuse the doc field in the view response here,
+    Process =
+        fun(Doc) ->
+            % slightly abuse the doc field in the view response here,
             % because we may return something other than the full document:
             % we may have projected the requested `fields` from the query.
             % However, this oddness is confined to being visible in this module.
-            put(mango_docs_examined, get(mango_docs_examined) + 1),
-            couch_stats:increment_counter([mango, docs_examined]),
             case match_and_extract_doc(Doc, Selector, Fields) of
                 {match, FinalDoc} ->
                     FinalViewRow = ViewRow#view_row{doc = FinalDoc},
@@ -311,6 +318,21 @@ view_cb({row, Row}, #mrargs{extra = Options} = Acc) ->
                 {no_match, undefined} ->
                     maybe_send_mango_ping()
             end
+        end,
+    case {ViewRow#view_row.doc, CoveringIndex} of
+        {null, _} ->
+            maybe_send_mango_ping();
+        {undefined, Index = #idx{}} ->
+            Doc = derive_doc_from_index(Index, ViewRow),
+            Process(Doc);
+        {undefined, _} ->
+            % include_docs=false. Use quorum fetch at coordinator
+            ok = rexi:stream2(ViewRow),
+            set_mango_msg_timestamp();
+        {Doc, _} ->
+            put(mango_docs_examined, get(mango_docs_examined) + 1),
+            couch_stats:increment_counter([mango, docs_examined]),
+            Process(Doc)
     end,
     {ok, Acc};
 view_cb(complete, Acc) ->
@@ -338,6 +360,14 @@ match_and_extract_doc(Doc, Selector, Fields) ->
             {no_match, undefined}
     end.
 
+derive_doc_from_index(Index, #view_row{id = DocId, key = Keys}) ->
+    Columns = mango_idx:columns(Index),
+    lists:foldr(
+        fun({Column, Key}, Doc) -> mango_doc:set_field(Doc, Column, Key) end,
+        mango_doc:set_field({[]}, <<"_id">>, DocId),
+        lists:zip(Columns, Keys)
+    ).
+
 maybe_send_mango_ping() ->
     Current = os:timestamp(),
     LastPing = get(mango_last_msg_timestamp),
@@ -482,6 +512,10 @@ apply_opts([{_, _} | Rest], Args) ->
     % Ignore unknown options
     apply_opts(Rest, Args).
 
+consider_index_coverage(Index, Fields, #mrargs{include_docs = IncludeDocs0} = Args) ->
+    IncludeDocs = IncludeDocs0 andalso (not mango_idx_view:covers(Index, Fields)),
+    Args#mrargs{include_docs = IncludeDocs}.
+
 doc_member_and_extract(Cursor, RowProps) ->
     Db = Cursor#cursor.db,
     Opts = Cursor#cursor.opts,
diff --git a/src/mango/src/mango_idx_view.erl b/src/mango/src/mango_idx_view.erl
index ff8f6c6bb..3ef410e12 100644
--- a/src/mango/src/mango_idx_view.erl
+++ b/src/mango/src/mango_idx_view.erl
@@ -26,7 +26,9 @@
 
     indexable_fields/1,
     field_ranges/1,
-    field_ranges/2
+    field_ranges/2,
+
+    covers/2
 ]).
 
 -include_lib("couch/include/couch_db.hrl").
@@ -521,3 +523,17 @@ can_use_sort([Col | RestCols], SortFields, Selector) ->
         true -> can_use_sort(RestCols, SortFields, Selector);
         false -> false
     end.
+
+% There is no information available about the full set of fields which
+% comes the following consequences: an index cannot (reliably) cover
+% an "all fields" type of query and nested fields are out of scope.
+covers(_, all_fields) ->
+    false;
+covers(Idx, Fields) ->
+    case mango_idx:def(Idx) of
+        all_docs ->
+            false;
+        _ ->
+            Available = [<<"_id">> | columns(Idx)],
+            sets:is_subset(sets:from_list(Fields), sets:from_list(Available))
+    end.