You are viewing a plain text version of this content. The canonical link for it is here.
Posted to notifications@couchdb.apache.org by GitBox <gi...@apache.org> on 2022/10/25 22:58:40 UTC

[GitHub] [couchdb] jaydoane commented on a diff in pull request #4238: Optimize _bulk_get endpoint

jaydoane commented on code in PR #4238:
URL: https://github.com/apache/couchdb/pull/4238#discussion_r1004912556


##########
src/chttpd/src/chttpd_db.erl:
##########
@@ -2363,6 +2288,175 @@ set_namespace(NS, #mrargs{} = Args) ->
 
 %% /db/_bulk_get stuff
 
+bulk_get_is_multipart(#httpd{mochi_req = MochiReq}) ->
+    Json = MochiReq:accepts_content_type("application/json"),
+    Mixed = MochiReq:accepts_content_type("multipart/mixed"),
+    Related = MochiReq:accepts_content_type("multipart/related"),
+    not Json andalso (Mixed orelse Related).
+
+bulk_get_docs(Db, #{} = ArgsMap, Options) ->
+    % Sort args by doc ID to hopefully make querying B-trees a bit faster
+    KeyFun = fun({Ref, {DocId, _, _}}) -> {DocId, Ref} end,
+    CmpFun = fun(A, B) -> KeyFun(A) =< KeyFun(B) end,
+    ArgsList = lists:sort(CmpFun, maps:to_list(ArgsMap)),
+    % Split out known errors. Later, before returning, recombine them back into
+    % the final result map.
+    PartFun = fun({_Ref, {_DocId, RevsOrError, _DocOpts}}) ->
+        case RevsOrError of
+            L when is_list(L) -> true;
+            all -> true;
+            {error, _} -> false
+        end
+    end,
+    {ValidArgs, ErrorArgs} = lists:partition(PartFun, ArgsList),
+    UseBatches = config:get_boolean("chttpd", "bulk_get_use_batches", true),
+    Responses =
+        case UseBatches of
+            true -> bulk_get_docs_batched(Db, ValidArgs, Options);
+            false -> bulk_get_docs_individually(Db, ValidArgs, Options)
+        end,
+    MapFun = fun({Ref, {DocId, Response, _}} = RespTuple) ->
+        case Response of
+            [] ->
+                % Remap empty reponses to errors. This is a peculiarity of the

Review Comment:
   Calling this annoyance a "peculiarity" seems quite gracious!



##########
src/chttpd/src/chttpd_db.erl:
##########
@@ -2363,6 +2288,175 @@ set_namespace(NS, #mrargs{} = Args) ->
 
 %% /db/_bulk_get stuff
 
+bulk_get_is_multipart(#httpd{mochi_req = MochiReq}) ->
+    Json = MochiReq:accepts_content_type("application/json"),
+    Mixed = MochiReq:accepts_content_type("multipart/mixed"),
+    Related = MochiReq:accepts_content_type("multipart/related"),
+    not Json andalso (Mixed orelse Related).
+
+bulk_get_docs(Db, #{} = ArgsMap, Options) ->
+    % Sort args by doc ID to hopefully make querying B-trees a bit faster
+    KeyFun = fun({Ref, {DocId, _, _}}) -> {DocId, Ref} end,
+    CmpFun = fun(A, B) -> KeyFun(A) =< KeyFun(B) end,
+    ArgsList = lists:sort(CmpFun, maps:to_list(ArgsMap)),
+    % Split out known errors. Later, before returning, recombine them back into
+    % the final result map.
+    PartFun = fun({_Ref, {_DocId, RevsOrError, _DocOpts}}) ->
+        case RevsOrError of
+            L when is_list(L) -> true;
+            all -> true;
+            {error, _} -> false
+        end
+    end,
+    {ValidArgs, ErrorArgs} = lists:partition(PartFun, ArgsList),
+    UseBatches = config:get_boolean("chttpd", "bulk_get_use_batches", true),
+    Responses =
+        case UseBatches of
+            true -> bulk_get_docs_batched(Db, ValidArgs, Options);
+            false -> bulk_get_docs_individually(Db, ValidArgs, Options)
+        end,
+    MapFun = fun({Ref, {DocId, Response, _}} = RespTuple) ->
+        case Response of
+            [] ->
+                % Remap empty reponses to errors. This is a peculiarity of the
+                % _bulk_get HTTP API. If revision was not specifed, `undefined`
+                % must be returned as the error revision ID.
+                #{Ref := {_, Revs, _}} = ArgsMap,
+                RevStr = bulk_get_rev_error(Revs),
+                Error = {RevStr, <<"not_found">>, <<"missing">>},
+                {Ref, {DocId, {error, Error}, []}};
+            [_ | _] = DocRevisions ->
+                chttpd_stats:incr_reads(length(DocRevisions)),
+                RespTuple;
+            _ ->
+                RespTuple
+        end
+    end,
+    % Recombine with the inital known errors and return as a map
+    maps:from_list(lists:map(MapFun, Responses) ++ ErrorArgs).
+
+bulk_get_docs_batched(Db, Args, Options) when is_list(Args) ->
+    % Args is [{Ref, {DocId, Revs, DocOpts}}, ...] but fabric:open_revs/3
+    % accepts [{{DocId, Revs}, DocOpts}, ...] so we need to transform them
+    ArgsFun = fun({_Ref, {DocId, Revs, DocOpts}}) ->
+        {{DocId, Revs}, DocOpts}
+    end,
+    OpenRevsArgs = lists:map(ArgsFun, Args),
+    case fabric:open_revs(Db, OpenRevsArgs, Options) of
+        {ok, Responses} ->
+            ZipFun = fun({Ref, {DocId, _Rev, DocOpts}}, Response) ->
+                {Ref, {DocId, Response, DocOpts ++ Options}}
+            end,
+            lists:zipwith(ZipFun, Args, Responses);
+        {error, Error} ->
+            % Distribute error to all request args, so it looks like they
+            % individually failed with that error
+            MapFun = fun({Ref, {DocId, Revs, _DocOpts}}) ->
+                RevStr = bulk_get_rev_error(Revs),
+                Tag = internal_fabric_error,
+                % This error will be emitted as json so make sure it's rendered
+                % to a string first.
+                Reason = couch_util:to_binary(Error),
+                {Ref, {DocId, {error, {RevStr, Tag, Reason}}, []}}
+            end,
+            lists:map(MapFun, Args)
+    end.
+
+bulk_get_docs_individually(Db, Args, Options) when is_list(Args) ->
+    MapFun = fun({Ref, {DocId, Revs, DocOpts}}) ->
+        case fabric:open_revs(Db, DocId, Revs, DocOpts ++ Options) of
+            {ok, Response} ->
+                {Ref, {DocId, Response, DocOpts}};
+            {error, Error} ->
+                RevStr = bulk_get_rev_error(Revs),
+                Tag = internal_fabric_error,
+                % This error will be emitted as json so make sure it's rendered
+                % to a string first.
+                Reason = couch_util:to_binary(Error),
+                {Ref, {DocId, {error, {RevStr, Tag, Reason}}, []}}
+        end
+    end,
+    lists:map(MapFun, Args).
+
+bulk_get_ret_json(#httpd{} = Req, ArgsRefs, ResultsMap, Options) ->
+    send_json(Req, 200, #{
+        <<"results">> => lists:map(
+            fun(Ref) ->
+                #{Ref := {DocId, Result, DocOpts}} = ResultsMap,
+                % We are about to encode the document into json and some of the
+                % provided general options might affect that so we make sure to
+                % combine all doc options and the general options together
+                AllOptions = DocOpts ++ Options,
+                #{
+                    <<"id">> => DocId,
+                    <<"docs">> => bulk_get_result(DocId, Result, AllOptions)
+                }
+            end,
+            ArgsRefs
+        )
+    }).
+
+bulk_get_result(DocId, {error, {Rev, Error, Reason}}, _Options) ->
+    [bulk_get_json_error_map(DocId, Rev, Error, Reason)];
+bulk_get_result(DocId, [_ | _] = DocRevs, Options) ->
+    MapFun = fun
+        ({ok, Doc}) ->
+            #{<<"ok">> => couch_doc:to_json_obj(Doc, Options)};
+        ({{Error, Reason}, RevId}) ->
+            Rev = couch_doc:rev_to_str(RevId),
+            bulk_get_json_error_map(DocId, Rev, Error, Reason)
+    end,
+    lists:map(MapFun, DocRevs).
+
+bulk_get_json_error_map(DocId, Rev, Error, Reason) ->
+    #{
+        <<"error">> => #{
+            <<"id">> => DocId,
+            <<"rev">> => Rev,
+            <<"error">> => Error,
+            <<"reason">> => Reason
+        }
+    }.
+
+bulk_get_ret_multipart(#httpd{} = Req, ArgsRefs, ResultsMap, Options) ->
+    MochiReq = Req#httpd.mochi_req,
+    Mixed = MochiReq:accepts_content_type("multipart/mixed"),
+    MpType =
+        case Mixed of
+            true -> "multipart/mixed";
+            false -> "multipart/related"
+        end,
+    Boundary = bulk_get_multipart_boundary(),
+    BoundaryCType = MpType ++ "; boundary=\"" ++ ?b2l(Boundary) ++ "\"",
+    CType = {"Content-Type", BoundaryCType},
+    {ok, Resp} = start_chunked_response(Req, 200, [CType]),
+    ForeachFun = fun(Ref) ->
+        #{Ref := {DocId, Result, DocOpts}} = ResultsMap,
+        case Result of
+            [_ | _] = DocRevs ->
+                AllOptions = DocOpts ++ Options,
+                send_docs_multipart_bulk_get(DocRevs, AllOptions, Boundary, Resp);
+            {error, {RevId, Error, Reason}} ->
+                EJson = bulk_get_json_error_map(DocId, RevId, Error, Reason),
+                Json = ?JSON_ENCODE(map_get(<<"error">>, EJson)),
+                ErrCType = <<"Content-Type: application/json">>,
+                Prefix = <<"\r\n", ErrCType/binary, "; error=\"true\"\r\n\r\n">>,
+                ErrorChunk = [<<"\r\n--", Boundary/binary>>, Prefix, Json],
+                couch_httpd:send_chunk(Resp, ErrorChunk)
+        end
+    end,
+    lists:foreach(ForeachFun, ArgsRefs),
+    case ArgsRefs of
+        [] ->
+            % Didn't send any docs, don't need to send a closing boundary
+            ok;
+        [_ | _] ->

Review Comment:
   👍 



##########
src/chttpd/test/eunit/chttpd_bulk_get_test.erl:
##########
@@ -654,6 +1303,34 @@ t_missing_rev_latest({_, DbUrl}) ->
         Res
     ).
 
+t_fabric_worker_error({_, DbUrl}) ->
+    % Check the case of latest and a missing rev

Review Comment:
   Copy-pasta error?



##########
src/chttpd/src/chttpd_db.erl:
##########
@@ -2363,6 +2288,175 @@ set_namespace(NS, #mrargs{} = Args) ->
 
 %% /db/_bulk_get stuff
 
+bulk_get_is_multipart(#httpd{mochi_req = MochiReq}) ->
+    Json = MochiReq:accepts_content_type("application/json"),
+    Mixed = MochiReq:accepts_content_type("multipart/mixed"),
+    Related = MochiReq:accepts_content_type("multipart/related"),
+    not Json andalso (Mixed orelse Related).
+
+bulk_get_docs(Db, #{} = ArgsMap, Options) ->
+    % Sort args by doc ID to hopefully make querying B-trees a bit faster
+    KeyFun = fun({Ref, {DocId, _, _}}) -> {DocId, Ref} end,
+    CmpFun = fun(A, B) -> KeyFun(A) =< KeyFun(B) end,
+    ArgsList = lists:sort(CmpFun, maps:to_list(ArgsMap)),
+    % Split out known errors. Later, before returning, recombine them back into
+    % the final result map.
+    PartFun = fun({_Ref, {_DocId, RevsOrError, _DocOpts}}) ->
+        case RevsOrError of
+            L when is_list(L) -> true;
+            all -> true;
+            {error, _} -> false
+        end
+    end,
+    {ValidArgs, ErrorArgs} = lists:partition(PartFun, ArgsList),
+    UseBatches = config:get_boolean("chttpd", "bulk_get_use_batches", true),
+    Responses =
+        case UseBatches of
+            true -> bulk_get_docs_batched(Db, ValidArgs, Options);
+            false -> bulk_get_docs_individually(Db, ValidArgs, Options)
+        end,
+    MapFun = fun({Ref, {DocId, Response, _}} = RespTuple) ->
+        case Response of
+            [] ->
+                % Remap empty reponses to errors. This is a peculiarity of the
+                % _bulk_get HTTP API. If revision was not specifed, `undefined`
+                % must be returned as the error revision ID.
+                #{Ref := {_, Revs, _}} = ArgsMap,
+                RevStr = bulk_get_rev_error(Revs),
+                Error = {RevStr, <<"not_found">>, <<"missing">>},
+                {Ref, {DocId, {error, Error}, []}};
+            [_ | _] = DocRevisions ->

Review Comment:
   Although it gives you the opportunity to use this awesome pattern a lot ;)



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: notifications-unsubscribe@couchdb.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org