You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@couchdb.apache.org by da...@apache.org on 2018/08/23 18:57:23 UTC

[couchdb] branch fix-builtin-sum updated (84505f0 -> 2d141af)

This is an automated email from the ASF dual-hosted git repository.

davisp pushed a change to branch fix-builtin-sum
in repository https://gitbox.apache.org/repos/asf/couchdb.git.


    omit 84505f0  Fix builtin _sum reduce function
     add 2350cd4  Switch rexi_sup restart strategy to rest_for_one
     new 2d141af  Fix builtin _sum reduce function

This update added new revisions after undoing existing revisions.
That is to say, some revisions that were in the old version of the
branch are not in the new version.  This situation occurs
when a user --force pushes a change and generates a repository
containing something like this:

 * -- * -- B -- O -- O -- O   (84505f0)
            \
             N -- N -- N   refs/heads/fix-builtin-sum (2d141af)

You should already have received notification emails for all of the O
revisions, and so the following emails describe only the N revisions
from the common base, B.

Any revisions marked "omit" are not gone; other references still
refer to them.  Any revisions marked "discard" are gone forever.

The 1 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.


Summary of changes:
 src/rexi/src/rexi_sup.erl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)


[couchdb] 01/01: Fix builtin _sum reduce function

Posted by da...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

davisp pushed a commit to branch fix-builtin-sum
in repository https://gitbox.apache.org/repos/asf/couchdb.git

commit 2d141af2ccea6370d70b52996d68392b505f48f9
Author: Paul J. Davis <pa...@gmail.com>
AuthorDate: Wed Aug 22 14:54:31 2018 -0500

    Fix builtin _sum reduce function
    
    The builting _sum reduce function has no protection against overflowing
    reduce values. Users can emit objects with enough unique keys to cause
    the builtin _sum to create objects that are exceedingly large in the
    inner nodes of the view B+Tree.
    
    This change adds the same logic that applies to JavaScript reduce
    functions to check if a reduce function is properly reducing its input.
---
 src/couch/src/couch_query_servers.erl        | 27 +++++++-
 src/couch/test/couch_query_servers_tests.erl | 95 ++++++++++++++++++++++++++++
 2 files changed, 121 insertions(+), 1 deletion(-)

diff --git a/src/couch/src/couch_query_servers.erl b/src/couch/src/couch_query_servers.erl
index 7047364..c6d255f 100644
--- a/src/couch/src/couch_query_servers.erl
+++ b/src/couch/src/couch_query_servers.erl
@@ -171,7 +171,8 @@ builtin_reduce(_Re, [], _KVs, Acc) ->
     {ok, lists:reverse(Acc)};
 builtin_reduce(Re, [<<"_sum",_/binary>>|BuiltinReds], KVs, Acc) ->
     Sum = builtin_sum_rows(KVs, 0),
-    builtin_reduce(Re, BuiltinReds, KVs, [Sum|Acc]);
+    Red = check_sum_overflow(?term_size(KVs), ?term_size(Sum), Sum),
+    builtin_reduce(Re, BuiltinReds, KVs, [Red|Acc]);
 builtin_reduce(reduce, [<<"_count",_/binary>>|BuiltinReds], KVs, Acc) ->
     Count = length(KVs),
     builtin_reduce(reduce, BuiltinReds, KVs, [Count|Acc]);
@@ -247,6 +248,30 @@ sum_arrays([X|Xs], [Y|Ys]) when is_number(X), is_number(Y) ->
 sum_arrays(Else, _) ->
     throw_sum_error(Else).
 
+check_sum_overflow(InSize, OutSize, Sum) ->
+    Overflowed = OutSize > 4906 andalso OutSize * 2 > InSize,
+    case config:get("query_server_config", "reduce_limit", "true") of
+        "true" when Overflowed ->
+            Msg = log_sum_overflow(InSize, OutSize),
+            {[
+                {<<"error">>, <<"builtin_reduce_error">>},
+                {<<"reason">>, Msg}
+            ]};
+        "log" when Overflowed ->
+            log_sum_overflow(InSize, OutSize),
+            Sum;
+        _ ->
+            Sum
+    end.
+
+log_sum_overflow(InSize, OutSize) ->
+    Fmt = "Reduce output must shrink more rapidly: "
+            "input size: ~b "
+            "output size: ~b",
+    Msg = iolist_to_binary(io_lib:format(Fmt, [InSize, OutSize])),
+    couch_log:error(Msg, []),
+    Msg.
+
 builtin_stats(_, []) ->
     {0, 0, 0, 0, 0};
 builtin_stats(_, [[_,First]|Rest]) ->
diff --git a/src/couch/test/couch_query_servers_tests.erl b/src/couch/test/couch_query_servers_tests.erl
new file mode 100644
index 0000000..f8df896
--- /dev/null
+++ b/src/couch/test/couch_query_servers_tests.erl
@@ -0,0 +1,95 @@
+% Licensed under the Apache License, Version 2.0 (the "License"); you may not
+% use this file except in compliance with the License. You may obtain a copy of
+% the License at
+%
+%   http://www.apache.org/licenses/LICENSE-2.0
+%
+% Unless required by applicable law or agreed to in writing, software
+% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+% License for the specific language governing permissions and limitations under
+% the License.
+
+-module(couch_query_servers_tests).
+
+-include_lib("couch/include/couch_eunit.hrl").
+
+
+setup() ->
+    meck:new([config, couch_log]).
+
+
+teardown(_) ->
+    meck:unload().
+
+
+sum_overflow_test_() ->
+    {
+        "Test overflow detection in the _sum reduce function",
+        {
+            setup,
+            fun setup/0,
+            fun teardown/1,
+            [
+                fun should_return_error_on_overflow/0,
+                fun should_return_object_on_log/0,
+                fun should_return_object_on_false/0
+            ]
+        }
+    }.
+
+
+should_return_error_on_overflow() ->
+    meck:reset([config, couch_log]),
+    meck:expect(
+            config, get, ["query_server_config", "reduce_limit", "true"],
+            "true"
+        ),
+    meck:expect(couch_log, error, ['_', '_'], ok),
+    KVs = gen_sum_kvs(),
+    {ok, [Result]} = couch_query_servers:reduce(<<"foo">>, [<<"_sum">>], KVs),
+    ?assertMatch({[{<<"error">>, <<"builtin_reduce_error">>} | _]}, Result),
+    ?assert(meck:called(config, get, '_')),
+    ?assert(meck:called(couch_log, error, '_')).
+
+
+should_return_object_on_log() ->
+    meck:reset([config, couch_log]),
+    meck:expect(
+            config, get, ["query_server_config", "reduce_limit", "true"],
+            "log"
+        ),
+    meck:expect(couch_log, error, ['_', '_'], ok),
+    KVs = gen_sum_kvs(),
+    {ok, [Result]} = couch_query_servers:reduce(<<"foo">>, [<<"_sum">>], KVs),
+    ?assertMatch({[_ | _]}, Result),
+    Keys = [K || {K, _} <- element(1, Result)],
+    ?assert(not lists:member(<<"error">>, Keys)),
+    ?assert(meck:called(config, get, '_')),
+    ?assert(meck:called(couch_log, error, '_')).
+
+
+should_return_object_on_false() ->
+    meck:reset([config, couch_log]),
+    meck:expect(
+            config, get, ["query_server_config", "reduce_limit", "true"],
+            "false"
+        ),
+    meck:expect(couch_log, error, ['_', '_'], ok),
+    KVs = gen_sum_kvs(),
+    {ok, [Result]} = couch_query_servers:reduce(<<"foo">>, [<<"_sum">>], KVs),
+    ?assertMatch({[_ | _]}, Result),
+    Keys = [K || {K, _} <- element(1, Result)],
+    ?assert(not lists:member(<<"error">>, Keys)),
+    ?assert(meck:called(config, get, '_')),
+    ?assertNot(meck:called(couch_log, error, '_')).
+
+
+gen_sum_kvs() ->
+    lists:map(fun(I) ->
+        Props = lists:map(fun(_) ->
+            K = couch_util:encodeBase64Url(crypto:strong_rand_bytes(16)),
+            {K, 1}
+        end, lists:seq(1, 20)),
+        [I, {Props}]
+    end, lists:seq(1, 10)).