You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@couchdb.apache.org by da...@apache.org on 2018/08/23 20:43:42 UTC

[couchdb] branch master updated: Fix builtin _sum reduce function

This is an automated email from the ASF dual-hosted git repository.

davisp pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/couchdb.git


The following commit(s) were added to refs/heads/master by this push:
     new d2d2261  Fix builtin _sum reduce function
d2d2261 is described below

commit d2d2261c460bd91770100011e10351d48644038b
Author: Paul J. Davis <pa...@gmail.com>
AuthorDate: Wed Aug 22 14:54:31 2018 -0500

    Fix builtin _sum reduce function
    
    The builting _sum reduce function has no protection against overflowing
    reduce values. Users can emit objects with enough unique keys to cause
    the builtin _sum to create objects that are exceedingly large in the
    inner nodes of the view B+Tree.
    
    This change adds the same logic that applies to JavaScript reduce
    functions to check if a reduce function is properly reducing its input.
---
 src/couch/src/couch_query_servers.erl        | 27 +++++++-
 src/couch/test/couch_query_servers_tests.erl | 95 ++++++++++++++++++++++++++++
 2 files changed, 121 insertions(+), 1 deletion(-)

diff --git a/src/couch/src/couch_query_servers.erl b/src/couch/src/couch_query_servers.erl
index 7047364..c6d255f 100644
--- a/src/couch/src/couch_query_servers.erl
+++ b/src/couch/src/couch_query_servers.erl
@@ -171,7 +171,8 @@ builtin_reduce(_Re, [], _KVs, Acc) ->
     {ok, lists:reverse(Acc)};
 builtin_reduce(Re, [<<"_sum",_/binary>>|BuiltinReds], KVs, Acc) ->
     Sum = builtin_sum_rows(KVs, 0),
-    builtin_reduce(Re, BuiltinReds, KVs, [Sum|Acc]);
+    Red = check_sum_overflow(?term_size(KVs), ?term_size(Sum), Sum),
+    builtin_reduce(Re, BuiltinReds, KVs, [Red|Acc]);
 builtin_reduce(reduce, [<<"_count",_/binary>>|BuiltinReds], KVs, Acc) ->
     Count = length(KVs),
     builtin_reduce(reduce, BuiltinReds, KVs, [Count|Acc]);
@@ -247,6 +248,30 @@ sum_arrays([X|Xs], [Y|Ys]) when is_number(X), is_number(Y) ->
 sum_arrays(Else, _) ->
     throw_sum_error(Else).
 
+check_sum_overflow(InSize, OutSize, Sum) ->
+    Overflowed = OutSize > 4906 andalso OutSize * 2 > InSize,
+    case config:get("query_server_config", "reduce_limit", "true") of
+        "true" when Overflowed ->
+            Msg = log_sum_overflow(InSize, OutSize),
+            {[
+                {<<"error">>, <<"builtin_reduce_error">>},
+                {<<"reason">>, Msg}
+            ]};
+        "log" when Overflowed ->
+            log_sum_overflow(InSize, OutSize),
+            Sum;
+        _ ->
+            Sum
+    end.
+
+log_sum_overflow(InSize, OutSize) ->
+    Fmt = "Reduce output must shrink more rapidly: "
+            "input size: ~b "
+            "output size: ~b",
+    Msg = iolist_to_binary(io_lib:format(Fmt, [InSize, OutSize])),
+    couch_log:error(Msg, []),
+    Msg.
+
 builtin_stats(_, []) ->
     {0, 0, 0, 0, 0};
 builtin_stats(_, [[_,First]|Rest]) ->
diff --git a/src/couch/test/couch_query_servers_tests.erl b/src/couch/test/couch_query_servers_tests.erl
new file mode 100644
index 0000000..f8df896
--- /dev/null
+++ b/src/couch/test/couch_query_servers_tests.erl
@@ -0,0 +1,95 @@
+% Licensed under the Apache License, Version 2.0 (the "License"); you may not
+% use this file except in compliance with the License. You may obtain a copy of
+% the License at
+%
+%   http://www.apache.org/licenses/LICENSE-2.0
+%
+% Unless required by applicable law or agreed to in writing, software
+% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+% License for the specific language governing permissions and limitations under
+% the License.
+
+-module(couch_query_servers_tests).
+
+-include_lib("couch/include/couch_eunit.hrl").
+
+
+setup() ->
+    meck:new([config, couch_log]).
+
+
+teardown(_) ->
+    meck:unload().
+
+
+sum_overflow_test_() ->
+    {
+        "Test overflow detection in the _sum reduce function",
+        {
+            setup,
+            fun setup/0,
+            fun teardown/1,
+            [
+                fun should_return_error_on_overflow/0,
+                fun should_return_object_on_log/0,
+                fun should_return_object_on_false/0
+            ]
+        }
+    }.
+
+
+should_return_error_on_overflow() ->
+    meck:reset([config, couch_log]),
+    meck:expect(
+            config, get, ["query_server_config", "reduce_limit", "true"],
+            "true"
+        ),
+    meck:expect(couch_log, error, ['_', '_'], ok),
+    KVs = gen_sum_kvs(),
+    {ok, [Result]} = couch_query_servers:reduce(<<"foo">>, [<<"_sum">>], KVs),
+    ?assertMatch({[{<<"error">>, <<"builtin_reduce_error">>} | _]}, Result),
+    ?assert(meck:called(config, get, '_')),
+    ?assert(meck:called(couch_log, error, '_')).
+
+
+should_return_object_on_log() ->
+    meck:reset([config, couch_log]),
+    meck:expect(
+            config, get, ["query_server_config", "reduce_limit", "true"],
+            "log"
+        ),
+    meck:expect(couch_log, error, ['_', '_'], ok),
+    KVs = gen_sum_kvs(),
+    {ok, [Result]} = couch_query_servers:reduce(<<"foo">>, [<<"_sum">>], KVs),
+    ?assertMatch({[_ | _]}, Result),
+    Keys = [K || {K, _} <- element(1, Result)],
+    ?assert(not lists:member(<<"error">>, Keys)),
+    ?assert(meck:called(config, get, '_')),
+    ?assert(meck:called(couch_log, error, '_')).
+
+
+should_return_object_on_false() ->
+    meck:reset([config, couch_log]),
+    meck:expect(
+            config, get, ["query_server_config", "reduce_limit", "true"],
+            "false"
+        ),
+    meck:expect(couch_log, error, ['_', '_'], ok),
+    KVs = gen_sum_kvs(),
+    {ok, [Result]} = couch_query_servers:reduce(<<"foo">>, [<<"_sum">>], KVs),
+    ?assertMatch({[_ | _]}, Result),
+    Keys = [K || {K, _} <- element(1, Result)],
+    ?assert(not lists:member(<<"error">>, Keys)),
+    ?assert(meck:called(config, get, '_')),
+    ?assertNot(meck:called(couch_log, error, '_')).
+
+
+gen_sum_kvs() ->
+    lists:map(fun(I) ->
+        Props = lists:map(fun(_) ->
+            K = couch_util:encodeBase64Url(crypto:strong_rand_bytes(16)),
+            {K, 1}
+        end, lists:seq(1, 20)),
+        [I, {Props}]
+    end, lists:seq(1, 10)).