You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@couchdb.apache.org by da...@apache.org on 2018/08/22 19:56:34 UTC

[couchdb] 01/01: Fix builtin _sum reduce function

This is an automated email from the ASF dual-hosted git repository.

davisp pushed a commit to branch fix-builtin-sum
in repository https://gitbox.apache.org/repos/asf/couchdb.git

commit dd0039950ce109292f8dc5e4d099b62c4f6902ff
Author: Paul J. Davis <pa...@gmail.com>
AuthorDate: Wed Aug 22 14:54:31 2018 -0500

    Fix builtin _sum reduce function
    
    The builting _sum reduce function has no protection against overflowing
    reduce values. Users can emit objects with enough unique keys to cause
    the builtin _sum to create objects that are exceedingly large in the
    inner nodes of the view B+Tree.
    
    This change adds the same logic that applies to JavaScript reduce
    functions to check if a reduce function is properly reducing its input.
---
 src/couch/src/couch_query_servers.erl | 27 ++++++++++++++++++++++++++-
 1 file changed, 26 insertions(+), 1 deletion(-)

diff --git a/src/couch/src/couch_query_servers.erl b/src/couch/src/couch_query_servers.erl
index 7047364..dce11dc 100644
--- a/src/couch/src/couch_query_servers.erl
+++ b/src/couch/src/couch_query_servers.erl
@@ -171,7 +171,8 @@ builtin_reduce(_Re, [], _KVs, Acc) ->
     {ok, lists:reverse(Acc)};
 builtin_reduce(Re, [<<"_sum",_/binary>>|BuiltinReds], KVs, Acc) ->
     Sum = builtin_sum_rows(KVs, 0),
-    builtin_reduce(Re, BuiltinReds, KVs, [Sum|Acc]);
+    Red = check_sum_overflow(?term_size(KVs), ?term_size(Sum), Sum),
+    builtin_reduce(Re, BuiltinReds, KVs, [Red|Acc]);
 builtin_reduce(reduce, [<<"_count",_/binary>>|BuiltinReds], KVs, Acc) ->
     Count = length(KVs),
     builtin_reduce(reduce, BuiltinReds, KVs, [Count|Acc]);
@@ -247,6 +248,30 @@ sum_arrays([X|Xs], [Y|Ys]) when is_number(X), is_number(Y) ->
 sum_arrays(Else, _) ->
     throw_sum_error(Else).
 
+check_sum_overflow(InSize, OutSize, Sum) ->
+    Overflowed = OutSize > 4906 andalso OutSize * 2 > InSize,
+    case config:get("query_server_config", "reduce_limit", "true") of
+        "true" when Overflowed ->
+            Msg = log_sum_overflow(InSize, OutSize),
+            {[
+                {<<"error">>, <<"builtin_reduce_error">>},
+                {<<"reason">>, Msg}
+            ]};
+        "log" when OutSize > 4096 andalso OutSize > 2 * InSize ->
+            log_sum_overflow(InSize, OutSize),
+            Sum;
+        _ ->
+            Sum
+    end.
+
+log_sum_overflow(InSize, OutSize) ->
+    Fmt = "Reduce output must shrink more rapidly: "
+            "input size: ~b "
+            "output size: ~b",
+    Msg = iolist_to_binary(io_lib:format(Fmt, [InSize, OutSize])),
+    couch_log:error(Msg, []),
+    Msg.
+
 builtin_stats(_, []) ->
     {0, 0, 0, 0, 0};
 builtin_stats(_, [[_,First]|Rest]) ->