You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@couchdb.apache.org by da...@apache.org on 2018/08/22 19:56:33 UTC

[couchdb] branch fix-builtin-sum created (now dd00399)

This is an automated email from the ASF dual-hosted git repository.

davisp pushed a change to branch fix-builtin-sum
in repository https://gitbox.apache.org/repos/asf/couchdb.git.


      at dd00399  Fix builtin _sum reduce function

This branch includes the following new commits:

     new dd00399  Fix builtin _sum reduce function

The 1 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.



[couchdb] 01/01: Fix builtin _sum reduce function

Posted by da...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

davisp pushed a commit to branch fix-builtin-sum
in repository https://gitbox.apache.org/repos/asf/couchdb.git

commit dd0039950ce109292f8dc5e4d099b62c4f6902ff
Author: Paul J. Davis <pa...@gmail.com>
AuthorDate: Wed Aug 22 14:54:31 2018 -0500

    Fix builtin _sum reduce function
    
    The builting _sum reduce function has no protection against overflowing
    reduce values. Users can emit objects with enough unique keys to cause
    the builtin _sum to create objects that are exceedingly large in the
    inner nodes of the view B+Tree.
    
    This change adds the same logic that applies to JavaScript reduce
    functions to check if a reduce function is properly reducing its input.
---
 src/couch/src/couch_query_servers.erl | 27 ++++++++++++++++++++++++++-
 1 file changed, 26 insertions(+), 1 deletion(-)

diff --git a/src/couch/src/couch_query_servers.erl b/src/couch/src/couch_query_servers.erl
index 7047364..dce11dc 100644
--- a/src/couch/src/couch_query_servers.erl
+++ b/src/couch/src/couch_query_servers.erl
@@ -171,7 +171,8 @@ builtin_reduce(_Re, [], _KVs, Acc) ->
     {ok, lists:reverse(Acc)};
 builtin_reduce(Re, [<<"_sum",_/binary>>|BuiltinReds], KVs, Acc) ->
     Sum = builtin_sum_rows(KVs, 0),
-    builtin_reduce(Re, BuiltinReds, KVs, [Sum|Acc]);
+    Red = check_sum_overflow(?term_size(KVs), ?term_size(Sum), Sum),
+    builtin_reduce(Re, BuiltinReds, KVs, [Red|Acc]);
 builtin_reduce(reduce, [<<"_count",_/binary>>|BuiltinReds], KVs, Acc) ->
     Count = length(KVs),
     builtin_reduce(reduce, BuiltinReds, KVs, [Count|Acc]);
@@ -247,6 +248,30 @@ sum_arrays([X|Xs], [Y|Ys]) when is_number(X), is_number(Y) ->
 sum_arrays(Else, _) ->
     throw_sum_error(Else).
 
+check_sum_overflow(InSize, OutSize, Sum) ->
+    Overflowed = OutSize > 4906 andalso OutSize * 2 > InSize,
+    case config:get("query_server_config", "reduce_limit", "true") of
+        "true" when Overflowed ->
+            Msg = log_sum_overflow(InSize, OutSize),
+            {[
+                {<<"error">>, <<"builtin_reduce_error">>},
+                {<<"reason">>, Msg}
+            ]};
+        "log" when OutSize > 4096 andalso OutSize > 2 * InSize ->
+            log_sum_overflow(InSize, OutSize),
+            Sum;
+        _ ->
+            Sum
+    end.
+
+log_sum_overflow(InSize, OutSize) ->
+    Fmt = "Reduce output must shrink more rapidly: "
+            "input size: ~b "
+            "output size: ~b",
+    Msg = iolist_to_binary(io_lib:format(Fmt, [InSize, OutSize])),
+    couch_log:error(Msg, []),
+    Msg.
+
 builtin_stats(_, []) ->
     {0, 0, 0, 0, 0};
 builtin_stats(_, [[_,First]|Rest]) ->