You are viewing a plain text version of this content. The canonical link for it is here.

Posted to commits@couchdb.apache.org by va...@apache.org on 2021/10/13 06:14:56 UTC

[couchdb] branch fix-reduce-collation-bug created (now 8d4f03c)

This is an automated email from the ASF dual-hosted git repository.

vatamane pushed a change to branch fix-reduce-collation-bug
in repository https://gitbox.apache.org/repos/asf/couchdb.git.


      at 8d4f03c  Fix reduce view row collation with unicode equivalent keys

This branch includes the following new commits:

     new 8d4f03c  Fix reduce view row collation with unicode equivalent keys

The 1 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.

[couchdb] 01/01: Fix reduce view row collation with unicode equivalent keys

Posted by va...@apache.org.

This is an automated email from the ASF dual-hosted git repository.

vatamane pushed a commit to branch fix-reduce-collation-bug
in repository https://gitbox.apache.org/repos/asf/couchdb.git

commit 8d4f03c41c8160e63cb8de610703ffd3862dc057
Author: Nick Vatamaniuc <va...@apache.org>
AuthorDate: Wed Oct 13 01:52:00 2021 -0400

    Fix reduce view row collation with unicode equivalent keys
    
    Previously, view reduce collation with keys relied on the keys in the
    rows returned from the view shards to exactly match (=:=) the keys
    specified in the args. However, in the case when there are multiple
    rows which compare equal with the unicode collator, that may not
    always be the case.
    
    In that case when the rows are fetched from the row dict by key, they
    should be matched using the same collation algorithm as the one used
    on the view shards. Since, the collation module we use doesn't export
    an equivalence function, we mimic it by using the `(not A < B) and
    (not B < A)` logic. This relies on the subtle fact that all the things
    we pass into the less/2 function have a defined comparison
    order. Another option is to use the Erlang standard library
    `string:equal(A, B, false, nfd)` function, which is more
    direct. However, it could eventually hit a case when the Erlang
    standard libraries NFD equivalence algorithm migth not agree with our
    collation library's (libicu) idea of equivalence.
---
 src/fabric/src/fabric_view.erl | 23 ++++++++++++++++++++---
 1 file changed, 20 insertions(+), 3 deletions(-)

diff --git a/src/fabric/src/fabric_view.erl b/src/fabric/src/fabric_view.erl
index bd5e42f..6c30646 100644
--- a/src/fabric/src/fabric_view.erl
+++ b/src/fabric/src/fabric_view.erl
@@ -242,9 +242,8 @@ get_next_row(#collector{reducer = RedSrc} = St) when RedSrc =/= undefined ->
         collation = Collation
     } = St,
     {Key, RestKeys} = find_next_key(Keys, Dir, Collation, RowDict),
-    case dict:find(Key, RowDict) of
-    {ok, Records} ->
-        NewRowDict = dict:erase(Key, RowDict),
+    case reduce_row_dict_take(Key, RowDict, Collation) of
+    {Records, NewRowDict} ->
         Counters = lists:foldl(fun(#view_row{worker={Worker,From}}, CntrsAcc) ->
             case From of
                 {Pid, _} when is_pid(Pid) ->
@@ -269,6 +268,24 @@ get_next_row(State) ->
     Counters1 = fabric_dict:update_counter(Worker, -1, Counters0),
     {Row, State#collector{rows = Rest, counters=Counters1}}.
 
+reduce_row_dict_take(Key, Dict, <<"raw">>) ->
+    dict:take(Key, Dict);
+reduce_row_dict_take(Key, Dict, _Collation) ->
+    IsEq = fun(K, _) -> (not couch_ejson_compare:less_json(K, Key)) andalso
+        (not couch_ejson_compare:less_json(Key, K))
+    end,
+    KVs = dict:to_list(dict:filter(IsEq, Dict)),
+    case KVs of
+        [] ->
+            error;
+        [_ | _] ->
+            {Keys, Vals} = lists:unzip(KVs),
+            NewDict = lists:foldl(fun(K, Acc) ->
+                dict:erase(K, Acc)
+            end, Dict, Keys),
+            {lists:flatten(Vals), NewDict}
+    end.
+
 %% TODO: rectify nil <-> undefined discrepancies
 find_next_key(nil, Dir, Collation, RowDict) ->
     find_next_key(undefined, Dir, Collation, RowDict);