You are viewing a plain text version of this content. The canonical link for it is here.
Posted to notifications@couchdb.apache.org by GitBox <gi...@apache.org> on 2020/03/05 18:25:15 UTC

[GitHub] [couchdb] davisp opened a new pull request #2638: Prototype/fdb layer couch views size tests

davisp opened a new pull request #2638: Prototype/fdb layer couch views size tests
URL: https://github.com/apache/couchdb/pull/2638
 
 
   ## Overview
   
   Increase test coverage of tracking view size information and fix a couple bugs found while adding the tests.
   
   ## Testing recommendations
   
   `make check-fdb`
   
   ## Checklist
   
   - [x] Code is written and works correctly
   - [x] Changes are covered by tests
   - [x] Any new configurable parameters are documented in `rel/overlay/etc/default.ini`
   - [ ] A PR for documentation changes has been made in https://github.com/apache/couchdb-documentation
   

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
users@infra.apache.org


With regards,
Apache Git Services

[GitHub] [couchdb] davisp commented on a change in pull request #2638: Prototype/fdb layer couch views size tests

Posted by GitBox <gi...@apache.org>.
davisp commented on a change in pull request #2638: Prototype/fdb layer couch views size tests
URL: https://github.com/apache/couchdb/pull/2638#discussion_r388512261
 
 

 ##########
 File path: src/couch_views/test/couch_views_size_test.erl
 ##########
 @@ -0,0 +1,562 @@
+% Licensed under the Apache License, Version 2.0 (the "License"); you may not
+% use this file except in compliance with the License. You may obtain a copy of
+% the License at
+%
+%   http://www.apache.org/licenses/LICENSE-2.0
+%
+% Unless required by applicable law or agreed to in writing, software
+% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+% License for the specific language governing permissions and limitations under
+% the License.
+
+-module(couch_views_size_test).
+
+-include_lib("eunit/include/eunit.hrl").
+-include_lib("couch/include/couch_db.hrl").
+-include_lib("couch/include/couch_eunit.hrl").
+-include_lib("couch_mrview/include/couch_mrview.hrl").
+-include_lib("fabric/include/fabric2.hrl").
+-include_lib("couch_views/include/couch_views.hrl").
+
+% N.B., we should move to couch_ejson_size instead
+% of erlang:external_size
+%
+% to calculate view size:
+% total = 0
+% for (fdb_k, fdb_v) in VIEW_MAP_RANGE:
+%   {EncUserKey, EncUserval} = erlfdb_tuple:unpack(fdb_v),
+%   UserKey = couch_views_encoding:decode(EncUserKey),
+%   UserVal = couch_views_encoding:decode(EncUserVal),
+%   total += erlang:external_size(UserKey),
+%   total += erlang:external_size(UserVal)
+%
+% Our goal in checking the size calculations is that we cover
+% as much of the possible key mutation space as possible while
+% not relying on fuzzing out the edge cases. Conceptually we have
+% two sets of keys E and U. E is keys as currently exist in the
+% view, and U is the new set of keys corresponding to an update.
+%
+% Both sets E and U have the same possible set of state variables:
+%
+% 1. N unique keys, where 0 =< N =< infinity
+% 2. D keys with duplicates, where 0 =< D =< N,
+% 3. R repeats for each member of D, for 2 =< R =< infinity
+%
+% Given two sets S1 and S2, we then have a set of transition variables:
+%
+% 1. deltaN - shared unique keys, where 0 =< deltaN =< N
+% 2. deltaD - shared duplicates, where 0 =< deltaD =< N
+% 3. deltaR - shared repeats for each D, where 2 =< deltaR =< infinity
+%
+% To search our state transition space, we can create two functions to
+% first define our start and end states, and for each transition we have
+% a function that defines the shared overlap between states.
+%
+% Given a list of transitions are checks then become simple in that
+% we can iterate over each transition checking that our index is valid
+% after each one. Index validation will purely look at the existing
+% state of the index in fdb and validate correctness.
+
+-define(NUM_SINGLE_TESTS, 100).
+-define(NUM_MULTI_TESTS, 100).
+
+-define(N_DOMAIN, [0, 1, 2, 5]).
+-define(D_DOMAIN, [0, 1, 2, 5]).
+-define(R_DOMAIN, [2, 4]).
+
+-define(DELTA_N_DOMAIN, [0, 1, 2, 5]).
+-define(DELTA_D_DOMAIN, [0, 1, 2, 5]).
+-define(DELTA_R_DOMAIN, [1, 2, 4]).
+
+
+generate_sets() ->
+    permute(?N_DOMAIN, ?D_DOMAIN, ?R_DOMAIN, fun(N, D, R) ->
+        % We can't have more duplicates than total keys
+        case D > N of
+            true -> throw(skip);
+            false -> ok
+        end,
+
+        % Only include one of the repeat values
+        % for our zero sets
+        case D == 0 of
+            true when R == 2 -> ok;
+            true -> throw(skip);
+            false -> ok
+        end,
+
+        % Replace R with a sentinel value for sanity
+        % when there are no dupes to have repeats
+        ActualR = if D == 0 -> 0; true -> R end,
+
+        {N, D, ActualR}
+    end).
+
+
+generate_transitions() ->
+    Sets = generate_sets(),
+    Pairs = [{Set1, Set2} || Set1 <- Sets, Set2 <- Sets],
+    lists:flatmap(fun({{N1, D1, _R1} = S1, {N2, D2, _R2} = S2}) ->
+        Filter = fun(DeltaN, DeltaD, DeltaR) ->
+            % Can't share more keys than the smaller of the
+            % two sets
+            case DeltaN > min(N1, N2) of
+                true -> throw(skip);
+                false -> ok
+            end,
+
+            % For DeltaD == 0, all combinations of DeltaD and
+            % DeltaR are equivalent tests
+            case DeltaN == 0 of
+                true when DeltaD == 0, DeltaR == 1 -> ok;
+                true -> throw(skip);
+                false -> ok
+            end,
+
+            % Can't share more dupes than exist in either set
+            % or the total number of shared keys
+            case DeltaD > min(D1, D2) orelse DeltaD > DeltaN of
+                true -> throw(skip);
+                false -> ok
+            end,
+
+            % For DeltaD == 0, all DeltaR correspond to the
+            % same test so only include one instance
+            case DeltaD == 0 of
+                true when DeltaR == 1 -> ok;
+                true -> throw(skip);
+                false -> ok
+            end,
+
+            % If we have more non-repeated keys in our
+            % transition than there's "room" for in the target
+            % set it isn't a valid test case.
+            TransitionNonRepeats = DeltaN - DeltaD,
+            TargetNonRepeats = N2 - D2,
+            case TransitionNonRepeats > TargetNonRepeats of
+                true -> throw(skip);
+                false -> ok
+            end,
+
+            {S1, S2, {DeltaN, DeltaD, DeltaR}}
+        end,
+        permute(?DELTA_N_DOMAIN, ?DELTA_D_DOMAIN, ?DELTA_R_DOMAIN, Filter)
+    end, Pairs).
+
+
+permute(NList, DList, RList, Filter) ->
+    % Technically we could call into Filter in each
+    % outer loops to conditionally skip inner loops.
+    % If someone comes along looking to speed up the
+    % fixture setup time, this would likely be an
+    % easy win.
+    lists:foldl(fun(N, NAcc) ->
+        lists:foldl(fun(D, DAcc) ->
+            lists:foldl(fun(R, RAcc) ->
+                try
+                    [Filter(N, D, R) | RAcc]
+                catch throw:skip ->
+                    RAcc
+                end
+            end, DAcc, RList)
+        end, NAcc, DList)
+    end, [], NList).
+
+
+row_transition_test_() ->
+    {
+        "Test view size tracking",
+        {
+            setup,
+            fun setup/0,
+            fun cleanup/1,
+            fun create_transition_tests/1
+        }
+    }.
+
+
+setup() ->
+    Ctx = test_util:start_couch([
+            fabric,
+            couch_jobs,
+            couch_js,
+            couch_views
+        ]),
+    {ok, Db} = fabric2_db:create(?tempdb(), [{user_ctx, ?ADMIN_USER}]),
+    {Ctx, Db}.
+
+
+cleanup({Ctx, Db}) ->
+    ok = fabric2_db:delete(fabric2_db:name(Db), []),
+    test_util:stop_couch(Ctx).
+
+
+create_transition_tests({_Ctx, Db}) ->
+    Transitions = generate_transitions(),
+    Single = lists:flatmap(fun(T) ->
+        Name = lists:flatten(io_lib:format("single ~s", [tname(T)])),
+        [{Name, fun() -> check_single_transition(Db, T) end}]
+    end, lists:sort(Transitions)),
+    Multi = lists:flatmap(fun(T) ->
+        Name = lists:flatten(io_lib:format("multi ~s", [tname(T)])),
+        [{Name, fun() -> check_multi_transition(Db, T) end}]
+    end, lists:sort(group(shuffle(Transitions)))),
+    subset(?NUM_SINGLE_TESTS, Single) ++ subset(?NUM_MULTI_TESTS, Multi).
+
+
+check_single_transition(Db, {Set1, Set2, Transition}) ->
+    clear_views(Db),
+    InitKVs = init_set(Set1, [a, b, c, d, e]),
+    CommonKVs = reduce_set(Transition, InitKVs),
+    FinalKVs = fill_set(Set2, CommonKVs, [v, w, x, y, z]),
+    {InitJSONKVs, Bindings} = unlabel(InitKVs, #{}),
+    {FinalJSONKVs, _} = unlabel(FinalKVs, Bindings),
+
+    Sig = couch_uuids:random(),
+    DocId = couch_uuids:random(),
+
+    fabric2_fdb:transactional(Db, fun(TxDb) ->
+        write_docs(TxDb, Sig, [make_doc(DocId, InitJSONKVs)])
+    end),
+
+    fabric2_fdb:transactional(Db, fun(TxDb) ->
 
 Review comment:
   I kept them separate because of the atomic operations around mutating sizes. I think *technically* they could be mixed in terms of fdb's transaction semantics, however, that wouldn't be testing the same situations that we have in real life where these two updates are triggered by document updates and so on.

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
users@infra.apache.org


With regards,
Apache Git Services

[GitHub] [couchdb] davisp commented on a change in pull request #2638: Prototype/fdb layer couch views size tests

Posted by GitBox <gi...@apache.org>.
davisp commented on a change in pull request #2638: Prototype/fdb layer couch views size tests
URL: https://github.com/apache/couchdb/pull/2638#discussion_r388496678
 
 

 ##########
 File path: src/couch_views/test/couch_views_indexer_test.erl
 ##########
 @@ -388,6 +389,77 @@ multipe_identical_keys_from_same_doc(Db) ->
         ], Out).
 
 
+fewer_multipe_identical_keys_from_same_doc(Db) ->
+    DDoc = create_ddoc(multi_emit_same),
+
+    Doc0 = couch_doc:from_json_obj({[
+        {<<"_id">>, list_to_binary(integer_to_list(0))},
+        {<<"val">>, 1},
+        {<<"extra">>, 3}
+    ]}),
+
+    {ok, _} = fabric2_db:update_doc(Db, DDoc, []),
+    {ok, {Pos, Rev}} = fabric2_db:update_doc(Db, Doc0, []),
+
+    {ok, Out1} = couch_views:query(
+            Db,
+            DDoc,
+            <<"map_fun1">>,
+            fun fold_fun/2,
+            [],
+            #mrargs{}
+        ),
+
+    ?assertEqual([
+            {row, [
 
 Review comment:
   Done.

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
users@infra.apache.org


With regards,
Apache Git Services

[GitHub] [couchdb] davisp commented on a change in pull request #2638: Prototype/fdb layer couch views size tests

Posted by GitBox <gi...@apache.org>.
davisp commented on a change in pull request #2638: Prototype/fdb layer couch views size tests
URL: https://github.com/apache/couchdb/pull/2638#discussion_r388522123
 
 

 ##########
 File path: src/couch_views/test/couch_views_size_test.erl
 ##########
 @@ -0,0 +1,562 @@
+% Licensed under the Apache License, Version 2.0 (the "License"); you may not
+% use this file except in compliance with the License. You may obtain a copy of
+% the License at
+%
+%   http://www.apache.org/licenses/LICENSE-2.0
+%
+% Unless required by applicable law or agreed to in writing, software
+% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+% License for the specific language governing permissions and limitations under
+% the License.
+
+-module(couch_views_size_test).
+
+-include_lib("eunit/include/eunit.hrl").
+-include_lib("couch/include/couch_db.hrl").
+-include_lib("couch/include/couch_eunit.hrl").
+-include_lib("couch_mrview/include/couch_mrview.hrl").
+-include_lib("fabric/include/fabric2.hrl").
+-include_lib("couch_views/include/couch_views.hrl").
+
+% N.B., we should move to couch_ejson_size instead
+% of erlang:external_size
+%
+% to calculate view size:
+% total = 0
+% for (fdb_k, fdb_v) in VIEW_MAP_RANGE:
+%   {EncUserKey, EncUserval} = erlfdb_tuple:unpack(fdb_v),
+%   UserKey = couch_views_encoding:decode(EncUserKey),
+%   UserVal = couch_views_encoding:decode(EncUserVal),
+%   total += erlang:external_size(UserKey),
+%   total += erlang:external_size(UserVal)
+%
+% Our goal in checking the size calculations is that we cover
+% as much of the possible key mutation space as possible while
+% not relying on fuzzing out the edge cases. Conceptually we have
+% two sets of keys E and U. E is keys as currently exist in the
+% view, and U is the new set of keys corresponding to an update.
+%
+% Both sets E and U have the same possible set of state variables:
+%
+% 1. N unique keys, where 0 =< N =< infinity
+% 2. D keys with duplicates, where 0 =< D =< N,
+% 3. R repeats for each member of D, for 2 =< R =< infinity
+%
+% Given two sets S1 and S2, we then have a set of transition variables:
+%
+% 1. deltaN - shared unique keys, where 0 =< deltaN =< N
+% 2. deltaD - shared duplicates, where 0 =< deltaD =< N
+% 3. deltaR - shared repeats for each D, where 2 =< deltaR =< infinity
+%
+% To search our state transition space, we can create two functions to
+% first define our start and end states, and for each transition we have
+% a function that defines the shared overlap between states.
+%
+% Given a list of transitions are checks then become simple in that
+% we can iterate over each transition checking that our index is valid
+% after each one. Index validation will purely look at the existing
+% state of the index in fdb and validate correctness.
+
+-define(NUM_SINGLE_TESTS, 100).
+-define(NUM_MULTI_TESTS, 100).
+
+-define(N_DOMAIN, [0, 1, 2, 5]).
+-define(D_DOMAIN, [0, 1, 2, 5]).
+-define(R_DOMAIN, [2, 4]).
+
+-define(DELTA_N_DOMAIN, [0, 1, 2, 5]).
+-define(DELTA_D_DOMAIN, [0, 1, 2, 5]).
+-define(DELTA_R_DOMAIN, [1, 2, 4]).
+
+
+generate_sets() ->
+    permute(?N_DOMAIN, ?D_DOMAIN, ?R_DOMAIN, fun(N, D, R) ->
+        % We can't have more duplicates than total keys
+        case D > N of
+            true -> throw(skip);
+            false -> ok
+        end,
+
+        % Only include one of the repeat values
+        % for our zero sets
+        case D == 0 of
+            true when R == 2 -> ok;
+            true -> throw(skip);
+            false -> ok
+        end,
+
+        % Replace R with a sentinel value for sanity
+        % when there are no dupes to have repeats
+        ActualR = if D == 0 -> 0; true -> R end,
+
+        {N, D, ActualR}
+    end).
+
+
+generate_transitions() ->
+    Sets = generate_sets(),
+    Pairs = [{Set1, Set2} || Set1 <- Sets, Set2 <- Sets],
+    lists:flatmap(fun({{N1, D1, _R1} = S1, {N2, D2, _R2} = S2}) ->
+        Filter = fun(DeltaN, DeltaD, DeltaR) ->
+            % Can't share more keys than the smaller of the
+            % two sets
+            case DeltaN > min(N1, N2) of
+                true -> throw(skip);
+                false -> ok
+            end,
+
+            % For DeltaD == 0, all combinations of DeltaD and
+            % DeltaR are equivalent tests
+            case DeltaN == 0 of
+                true when DeltaD == 0, DeltaR == 1 -> ok;
+                true -> throw(skip);
+                false -> ok
+            end,
+
+            % Can't share more dupes than exist in either set
+            % or the total number of shared keys
+            case DeltaD > min(D1, D2) orelse DeltaD > DeltaN of
+                true -> throw(skip);
+                false -> ok
+            end,
+
+            % For DeltaD == 0, all DeltaR correspond to the
+            % same test so only include one instance
+            case DeltaD == 0 of
+                true when DeltaR == 1 -> ok;
+                true -> throw(skip);
+                false -> ok
+            end,
+
+            % If we have more non-repeated keys in our
+            % transition than there's "room" for in the target
+            % set it isn't a valid test case.
+            TransitionNonRepeats = DeltaN - DeltaD,
+            TargetNonRepeats = N2 - D2,
+            case TransitionNonRepeats > TargetNonRepeats of
+                true -> throw(skip);
+                false -> ok
+            end,
+
+            {S1, S2, {DeltaN, DeltaD, DeltaR}}
+        end,
+        permute(?DELTA_N_DOMAIN, ?DELTA_D_DOMAIN, ?DELTA_R_DOMAIN, Filter)
+    end, Pairs).
+
+
+permute(NList, DList, RList, Filter) ->
+    % Technically we could call into Filter in each
+    % outer loops to conditionally skip inner loops.
+    % If someone comes along looking to speed up the
+    % fixture setup time, this would likely be an
+    % easy win.
+    lists:foldl(fun(N, NAcc) ->
+        lists:foldl(fun(D, DAcc) ->
+            lists:foldl(fun(R, RAcc) ->
+                try
+                    [Filter(N, D, R) | RAcc]
+                catch throw:skip ->
+                    RAcc
+                end
+            end, DAcc, RList)
+        end, NAcc, DList)
+    end, [], NList).
+
+
+row_transition_test_() ->
+    {
+        "Test view size tracking",
+        {
+            setup,
+            fun setup/0,
+            fun cleanup/1,
+            fun create_transition_tests/1
+        }
+    }.
+
+
+setup() ->
+    Ctx = test_util:start_couch([
+            fabric,
+            couch_jobs,
+            couch_js,
+            couch_views
+        ]),
+    {ok, Db} = fabric2_db:create(?tempdb(), [{user_ctx, ?ADMIN_USER}]),
+    {Ctx, Db}.
+
+
+cleanup({Ctx, Db}) ->
+    ok = fabric2_db:delete(fabric2_db:name(Db), []),
+    test_util:stop_couch(Ctx).
+
+
+create_transition_tests({_Ctx, Db}) ->
+    Transitions = generate_transitions(),
+    Single = lists:flatmap(fun(T) ->
+        Name = lists:flatten(io_lib:format("single ~s", [tname(T)])),
+        [{Name, fun() -> check_single_transition(Db, T) end}]
+    end, lists:sort(Transitions)),
+    Multi = lists:flatmap(fun(T) ->
+        Name = lists:flatten(io_lib:format("multi ~s", [tname(T)])),
+        [{Name, fun() -> check_multi_transition(Db, T) end}]
+    end, lists:sort(group(shuffle(Transitions)))),
+    subset(?NUM_SINGLE_TESTS, Single) ++ subset(?NUM_MULTI_TESTS, Multi).
+
+
+check_single_transition(Db, {Set1, Set2, Transition}) ->
+    clear_views(Db),
+    InitKVs = init_set(Set1, [a, b, c, d, e]),
+    CommonKVs = reduce_set(Transition, InitKVs),
+    FinalKVs = fill_set(Set2, CommonKVs, [v, w, x, y, z]),
+    {InitJSONKVs, Bindings} = unlabel(InitKVs, #{}),
+    {FinalJSONKVs, _} = unlabel(FinalKVs, Bindings),
+
+    Sig = couch_uuids:random(),
+    DocId = couch_uuids:random(),
+
+    fabric2_fdb:transactional(Db, fun(TxDb) ->
+        write_docs(TxDb, Sig, [make_doc(DocId, InitJSONKVs)])
+    end),
+
+    fabric2_fdb:transactional(Db, fun(TxDb) ->
+        write_docs(TxDb, Sig, [make_doc(DocId, FinalJSONKVs)])
+    end),
+
+    validate_index(Db, Sig, #{DocId => FinalJSONKVs}).
+
+
+check_multi_transition(Db, Transitions) ->
+    clear_views(Db),
+
+    {Docs, IdMap} = lists:mapfoldl(fun({Set1, Set2, Transition}, IdMapAcc) ->
+        DocId = couch_uuids:random(),
+        InitKVs = init_set(Set1, [a, b, c, d, e]),
+        CommonKVs = reduce_set(Transition, InitKVs),
+        FinalKVs = fill_set(Set2, CommonKVs, [v, w, x, y, z]),
+        {InitJSONKVs, Bindings} = unlabel(InitKVs, #{}),
+        {FinalJSONKVs, _} = unlabel(FinalKVs, Bindings),
+        InitDoc = make_doc(DocId, InitJSONKVs),
+        FinalDoc = make_doc(DocId, FinalJSONKVs),
+        {{InitDoc, FinalDoc}, maps:put(DocId, FinalJSONKVs, IdMapAcc)}
+    end, #{}, Transitions),
+
+    {InitDocs, FinalDocs} = lists:unzip(Docs),
+
+    Sig = couch_uuids:random(),
+
+    fabric2_fdb:transactional(Db, fun(TxDb) ->
+        write_docs(TxDb, Sig, InitDocs)
+    end),
+
+    fabric2_fdb:transactional(Db, fun(TxDb) ->
+        write_docs(TxDb, Sig, FinalDocs)
+    end),
+
+    validate_index(Db, Sig, IdMap).
+
+
+clear_views(Db) ->
+    fabric2_fdb:transactional(Db, fun(TxDb) ->
+        #{
+            tx := Tx,
+            db_prefix := DbPrefix
+        } = TxDb,
+        {Start, End} = erlfdb_tuple:range({?DB_VIEWS}, DbPrefix),
+        erlfdb:clear_range(Tx, Start, End),
+
+        GlobalKey = {?DB_STATS, <<"sizes">>, <<"views">>},
+        BinGlobalKey = erlfdb_tuple:pack(GlobalKey, DbPrefix),
+        erlfdb:set(Tx, BinGlobalKey, ?uint2bin(0))
+    end).
+
+
+write_docs(TxDb, Sig, Docs) ->
+    Mrst = #mrst{
+        sig = Sig,
+        views = [#mrview{
+            id_num = 1
+        }]
+    },
+    IdxState = #{
+        last_seq => <<"foo">>
+    },
+    couch_views_indexer:write_docs(TxDb, Mrst, Docs, IdxState).
+
+
+validate_index(Db, Sig, JSONRows) ->
+    #{
+        db_prefix := DbPrefix
+    } = Db,
+    Rows = fabric2_fdb:transactional(Db, fun(TxDb) ->
+        #{
+            tx := Tx
+        } = TxDb,
+        {Start, End} = erlfdb_tuple:range({?DB_VIEWS}, DbPrefix),
+        erlfdb:get_range(Tx, Start, End)
+    end),
+
+    InitAcc = #{
+        row_count => 0,
+        kv_size => 0,
+        ids => #{},
+        rows => []
+    },
+
+    MapData = lists:foldl(fun({Key, Value}, Acc) ->
+        case erlfdb_tuple:unpack(Key, DbPrefix) of
+            {?DB_VIEWS, ?VIEW_INFO, ?VIEW_UPDATE_SEQ, Sig} ->
+                ?assertEqual(<<"foo">>, Value),
+                Acc;
+            {?DB_VIEWS, ?VIEW_INFO, ?VIEW_ROW_COUNT, Sig, 1} ->
+                maps:put(row_count, ?bin2uint(Value), Acc);
+            {?DB_VIEWS, ?VIEW_INFO, ?VIEW_KV_SIZE, Sig, 1} ->
+                maps:put(kv_size, ?bin2uint(Value), Acc);
+            {?DB_VIEWS, ?VIEW_DATA, Sig, ?VIEW_ID_RANGE, DocId, 1} ->
+                [
+                    TotalKeys, TotalSize, UniqueKeys
+                ] = couch_views_encoding:decode(Value),
+                maps:update_with(ids, fun(Ids) ->
+                    false = maps:is_key(DocId, Ids),
+                    maps:put(DocId, {TotalKeys, TotalSize, UniqueKeys}, Ids)
+                end, Acc);
+            {?DB_VIEWS, ?VIEW_DATA, Sig, ?VIEW_MAP_RANGE, 1, MapKey, _DupeId} ->
+                {EncKey, DocId} = MapKey,
+                {UserKey, UserVal} = erlfdb_tuple:unpack(Value),
+
+                UserJsonKey = couch_views_encoding:decode(UserKey),
+                UserJsonVal = couch_views_encoding:decode(UserVal),
+
+                % Assert our encoded sort key is correct
+                EncKey = couch_views_encoding:encode(UserJsonKey, key),
 
 Review comment:
   Done.

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
users@infra.apache.org


With regards,
Apache Git Services

[GitHub] [couchdb] nickva commented on a change in pull request #2638: Prototype/fdb layer couch views size tests

Posted by GitBox <gi...@apache.org>.
nickva commented on a change in pull request #2638: Prototype/fdb layer couch views size tests
URL: https://github.com/apache/couchdb/pull/2638#discussion_r388518621
 
 

 ##########
 File path: src/couch_views/test/couch_views_size_test.erl
 ##########
 @@ -0,0 +1,562 @@
+% Licensed under the Apache License, Version 2.0 (the "License"); you may not
+% use this file except in compliance with the License. You may obtain a copy of
+% the License at
+%
+%   http://www.apache.org/licenses/LICENSE-2.0
+%
+% Unless required by applicable law or agreed to in writing, software
+% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+% License for the specific language governing permissions and limitations under
+% the License.
+
+-module(couch_views_size_test).
+
+-include_lib("eunit/include/eunit.hrl").
+-include_lib("couch/include/couch_db.hrl").
+-include_lib("couch/include/couch_eunit.hrl").
+-include_lib("couch_mrview/include/couch_mrview.hrl").
+-include_lib("fabric/include/fabric2.hrl").
+-include_lib("couch_views/include/couch_views.hrl").
+
+% N.B., we should move to couch_ejson_size instead
+% of erlang:external_size
+%
+% to calculate view size:
+% total = 0
+% for (fdb_k, fdb_v) in VIEW_MAP_RANGE:
+%   {EncUserKey, EncUserval} = erlfdb_tuple:unpack(fdb_v),
+%   UserKey = couch_views_encoding:decode(EncUserKey),
+%   UserVal = couch_views_encoding:decode(EncUserVal),
+%   total += erlang:external_size(UserKey),
+%   total += erlang:external_size(UserVal)
+%
+% Our goal in checking the size calculations is that we cover
+% as much of the possible key mutation space as possible while
+% not relying on fuzzing out the edge cases. Conceptually we have
+% two sets of keys E and U. E is keys as currently exist in the
+% view, and U is the new set of keys corresponding to an update.
+%
+% Both sets E and U have the same possible set of state variables:
+%
+% 1. N unique keys, where 0 =< N =< infinity
+% 2. D keys with duplicates, where 0 =< D =< N,
+% 3. R repeats for each member of D, for 2 =< R =< infinity
+%
+% Given two sets S1 and S2, we then have a set of transition variables:
+%
+% 1. deltaN - shared unique keys, where 0 =< deltaN =< N
+% 2. deltaD - shared duplicates, where 0 =< deltaD =< N
+% 3. deltaR - shared repeats for each D, where 2 =< deltaR =< infinity
+%
+% To search our state transition space, we can create two functions to
+% first define our start and end states, and for each transition we have
+% a function that defines the shared overlap between states.
+%
+% Given a list of transitions are checks then become simple in that
+% we can iterate over each transition checking that our index is valid
+% after each one. Index validation will purely look at the existing
+% state of the index in fdb and validate correctness.
+
+-define(NUM_SINGLE_TESTS, 100).
+-define(NUM_MULTI_TESTS, 100).
+
+-define(N_DOMAIN, [0, 1, 2, 5]).
+-define(D_DOMAIN, [0, 1, 2, 5]).
+-define(R_DOMAIN, [2, 4]).
+
+-define(DELTA_N_DOMAIN, [0, 1, 2, 5]).
+-define(DELTA_D_DOMAIN, [0, 1, 2, 5]).
+-define(DELTA_R_DOMAIN, [1, 2, 4]).
+
+
+generate_sets() ->
+    permute(?N_DOMAIN, ?D_DOMAIN, ?R_DOMAIN, fun(N, D, R) ->
+        % We can't have more duplicates than total keys
+        case D > N of
+            true -> throw(skip);
+            false -> ok
+        end,
+
+        % Only include one of the repeat values
+        % for our zero sets
+        case D == 0 of
+            true when R == 2 -> ok;
+            true -> throw(skip);
+            false -> ok
+        end,
+
+        % Replace R with a sentinel value for sanity
+        % when there are no dupes to have repeats
+        ActualR = if D == 0 -> 0; true -> R end,
+
+        {N, D, ActualR}
+    end).
+
+
+generate_transitions() ->
+    Sets = generate_sets(),
+    Pairs = [{Set1, Set2} || Set1 <- Sets, Set2 <- Sets],
+    lists:flatmap(fun({{N1, D1, _R1} = S1, {N2, D2, _R2} = S2}) ->
+        Filter = fun(DeltaN, DeltaD, DeltaR) ->
+            % Can't share more keys than the smaller of the
+            % two sets
+            case DeltaN > min(N1, N2) of
+                true -> throw(skip);
+                false -> ok
+            end,
+
+            % For DeltaD == 0, all combinations of DeltaD and
+            % DeltaR are equivalent tests
+            case DeltaN == 0 of
+                true when DeltaD == 0, DeltaR == 1 -> ok;
+                true -> throw(skip);
+                false -> ok
+            end,
+
+            % Can't share more dupes than exist in either set
+            % or the total number of shared keys
+            case DeltaD > min(D1, D2) orelse DeltaD > DeltaN of
+                true -> throw(skip);
+                false -> ok
+            end,
+
+            % For DeltaD == 0, all DeltaR correspond to the
+            % same test so only include one instance
+            case DeltaD == 0 of
+                true when DeltaR == 1 -> ok;
+                true -> throw(skip);
+                false -> ok
+            end,
+
+            % If we have more non-repeated keys in our
+            % transition than there's "room" for in the target
+            % set it isn't a valid test case.
+            TransitionNonRepeats = DeltaN - DeltaD,
+            TargetNonRepeats = N2 - D2,
+            case TransitionNonRepeats > TargetNonRepeats of
+                true -> throw(skip);
+                false -> ok
+            end,
+
+            {S1, S2, {DeltaN, DeltaD, DeltaR}}
+        end,
+        permute(?DELTA_N_DOMAIN, ?DELTA_D_DOMAIN, ?DELTA_R_DOMAIN, Filter)
+    end, Pairs).
+
+
+permute(NList, DList, RList, Filter) ->
+    % Technically we could call into Filter in each
+    % outer loops to conditionally skip inner loops.
+    % If someone comes along looking to speed up the
+    % fixture setup time, this would likely be an
+    % easy win.
+    lists:foldl(fun(N, NAcc) ->
+        lists:foldl(fun(D, DAcc) ->
+            lists:foldl(fun(R, RAcc) ->
+                try
+                    [Filter(N, D, R) | RAcc]
+                catch throw:skip ->
+                    RAcc
+                end
+            end, DAcc, RList)
+        end, NAcc, DList)
+    end, [], NList).
+
+
+row_transition_test_() ->
+    {
+        "Test view size tracking",
+        {
+            setup,
+            fun setup/0,
+            fun cleanup/1,
+            fun create_transition_tests/1
+        }
+    }.
+
+
+setup() ->
+    Ctx = test_util:start_couch([
+            fabric,
+            couch_jobs,
+            couch_js,
+            couch_views
+        ]),
+    {ok, Db} = fabric2_db:create(?tempdb(), [{user_ctx, ?ADMIN_USER}]),
+    {Ctx, Db}.
+
+
+cleanup({Ctx, Db}) ->
+    ok = fabric2_db:delete(fabric2_db:name(Db), []),
+    test_util:stop_couch(Ctx).
+
+
+create_transition_tests({_Ctx, Db}) ->
+    Transitions = generate_transitions(),
+    Single = lists:flatmap(fun(T) ->
+        Name = lists:flatten(io_lib:format("single ~s", [tname(T)])),
+        [{Name, fun() -> check_single_transition(Db, T) end}]
+    end, lists:sort(Transitions)),
+    Multi = lists:flatmap(fun(T) ->
+        Name = lists:flatten(io_lib:format("multi ~s", [tname(T)])),
+        [{Name, fun() -> check_multi_transition(Db, T) end}]
+    end, lists:sort(group(shuffle(Transitions)))),
+    subset(?NUM_SINGLE_TESTS, Single) ++ subset(?NUM_MULTI_TESTS, Multi).
+
+
+check_single_transition(Db, {Set1, Set2, Transition}) ->
+    clear_views(Db),
+    InitKVs = init_set(Set1, [a, b, c, d, e]),
+    CommonKVs = reduce_set(Transition, InitKVs),
+    FinalKVs = fill_set(Set2, CommonKVs, [v, w, x, y, z]),
+    {InitJSONKVs, Bindings} = unlabel(InitKVs, #{}),
+    {FinalJSONKVs, _} = unlabel(FinalKVs, Bindings),
+
+    Sig = couch_uuids:random(),
+    DocId = couch_uuids:random(),
+
+    fabric2_fdb:transactional(Db, fun(TxDb) ->
+        write_docs(TxDb, Sig, [make_doc(DocId, InitJSONKVs)])
+    end),
+
+    fabric2_fdb:transactional(Db, fun(TxDb) ->
 
 Review comment:
   Ah, ok. Makes sense.

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
users@infra.apache.org


With regards,
Apache Git Services

[GitHub] [couchdb] davisp commented on a change in pull request #2638: Prototype/fdb layer couch views size tests

Posted by GitBox <gi...@apache.org>.
davisp commented on a change in pull request #2638: Prototype/fdb layer couch views size tests
URL: https://github.com/apache/couchdb/pull/2638#discussion_r388494724
 
 

 ##########
 File path: src/couch_views/test/couch_views_size_test.erl
 ##########
 @@ -0,0 +1,564 @@
+% Licensed under the Apache License, Version 2.0 (the "License"); you may not
+% use this file except in compliance with the License. You may obtain a copy of
+% the License at
+%
+%   http://www.apache.org/licenses/LICENSE-2.0
+%
+% Unless required by applicable law or agreed to in writing, software
+% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+% License for the specific language governing permissions and limitations under
+% the License.
+
+-module(couch_views_size_test).
+
+-include_lib("eunit/include/eunit.hrl").
+-include_lib("couch/include/couch_db.hrl").
+-include_lib("couch/include/couch_eunit.hrl").
+-include_lib("couch_mrview/include/couch_mrview.hrl").
+-include_lib("fabric/include/fabric2.hrl").
+-include_lib("couch_views/include/couch_views.hrl").
+
+% N.B., we should move to couch_ejson_size instead
+% of erlang:external_size
+%
+% to calculate view size:
+% total = 0
+% for (fdb_k, fdb_v) in VIEW_MAP_RANGE:
+%   {EncUserKey, EncUserval} = erlfdb_tuple:unpack(fdb_v),
+%   UserKey = couch_views_encoding:decode(EncUserKey),
+%   UserVal = couch_views_encoding:decode(EncUserVal),
+%   total += erlang:external_size(UserKey),
+%   total += erlang:external_size(UserVal)
+%
+% Our goal in checking the size calculations is that we cover
+% as much of the possible key mutation space as possible while
+% not relying on fuzzing out the edge cases. Conceptually we have
+% two sets of keys E and U. E is keys as currently exist in the
+% view, and U is the new set of keys corresponding to an update.
+%
+% Both sets E and U have the same possible set of state variables:
+%
+% 1. N unique keys, where 0 =< N =< infinity
+% 2. D keys with duplicates, where 0 =< D =< N,
+% 3. R repeats for each member of D, for 2 =< R =< infinity
+%
+% Given two sets S1 and S2, we then have a set of transition variables:
+%
+% 1. deltaN - shared unique keys, where 0 =< deltaN =< N
+% 2. deltaD - shared duplicates, where 0 =< deltaD =< N
+% 3. deltaR - shared repeats for each D, where 2 =< deltaR =< infinity
+%
+% To search our state transition space, we can create two functions to
+% first define our start and end states, and for each transition we have
+% a function that defines the shared overlap between states.
+%
+% Given a list of transitions are checks then become simple in that
+% we can iterate over each transition checking that our index is valid
+% after each one. Index validation will purely look at the existing
+% state of the index in fdb and validate correctness.
+
+-define(NUM_SINGLE_TESTS, 100).
+-define(NUM_MULTI_TESTS, 100).
+
+-define(N_DOMAIN, [0, 1, 2, 5]).
+-define(D_DOMAIN, [0, 1, 2, 5]).
+-define(R_DOMAIN, [2, 4]).
+
+-define(DELTA_N_DOMAIN, [0, 1, 2, 5]).
+-define(DELTA_D_DOMAIN, [0, 1, 2, 5]).
+-define(DELTA_R_DOMAIN, [1, 2, 4]).
+
+
+generate_sets() ->
+    permute(?N_DOMAIN, ?D_DOMAIN, ?R_DOMAIN, fun(N, D, R) ->
+        % We can't have more duplicates than total keys
+        case D > N of
+            true -> throw(skip);
+            false -> ok
+        end,
+
+        % Only include one of the repeat values
+        % for our zero sets
+        case D == 0 of
+            true when R == 2 -> ok;
+            true -> throw(skip);
+            false -> ok
+        end,
+
+        % Replace R with a sentinel value for sanity
+        % when there are no dupes to have repeats
+        ActualR = if D == 0 -> 0; true -> R end,
+
+        {N, D, ActualR}
+    end).
+
+
+generate_transitions() ->
+    Sets = generate_sets(),
+    %Pairs0 = [{Set1, Set2} || Set1 <- Sets, Set2 <- Sets],
 
 Review comment:
   Ooops, meant to remove that after testing.

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
users@infra.apache.org


With regards,
Apache Git Services

[GitHub] [couchdb] nickva commented on a change in pull request #2638: Prototype/fdb layer couch views size tests

Posted by GitBox <gi...@apache.org>.
nickva commented on a change in pull request #2638: Prototype/fdb layer couch views size tests
URL: https://github.com/apache/couchdb/pull/2638#discussion_r388487449
 
 

 ##########
 File path: src/couch_views/test/couch_views_indexer_test.erl
 ##########
 @@ -388,6 +389,77 @@ multipe_identical_keys_from_same_doc(Db) ->
         ], Out).
 
 
+fewer_multipe_identical_keys_from_same_doc(Db) ->
+    DDoc = create_ddoc(multi_emit_same),
+
+    Doc0 = couch_doc:from_json_obj({[
+        {<<"_id">>, list_to_binary(integer_to_list(0))},
+        {<<"val">>, 1},
+        {<<"extra">>, 3}
+    ]}),
+
+    {ok, _} = fabric2_db:update_doc(Db, DDoc, []),
+    {ok, {Pos, Rev}} = fabric2_db:update_doc(Db, Doc0, []),
+
+    {ok, Out1} = couch_views:query(
+            Db,
+            DDoc,
+            <<"map_fun1">>,
+            fun fold_fun/2,
+            [],
+            #mrargs{}
+        ),
+
+    ?assertEqual([
+            {row, [
+                {id, <<"0">>},
+                {key, 1},
+                {value, 1}
+            ]},
+            {row, [
+                {id, <<"0">>},
+                {key, 1},
+                {value, 2}
+            ]},
+            {row, [
+                {id, <<"0">>},
+                {key, 1},
+                {value, 3}
+            ]}
+        ], Out1),
+
+    Doc1 = couch_doc:from_json_obj({[
+        {<<"_id">>, list_to_binary(integer_to_list(0))},
 
 Review comment:
   integer_to_binary/1

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
users@infra.apache.org


With regards,
Apache Git Services

[GitHub] [couchdb] nickva commented on a change in pull request #2638: Prototype/fdb layer couch views size tests

Posted by GitBox <gi...@apache.org>.
nickva commented on a change in pull request #2638: Prototype/fdb layer couch views size tests
URL: https://github.com/apache/couchdb/pull/2638#discussion_r388493215
 
 

 ##########
 File path: src/couch_views/test/couch_views_size_test.erl
 ##########
 @@ -0,0 +1,564 @@
+% Licensed under the Apache License, Version 2.0 (the "License"); you may not
+% use this file except in compliance with the License. You may obtain a copy of
+% the License at
+%
+%   http://www.apache.org/licenses/LICENSE-2.0
+%
+% Unless required by applicable law or agreed to in writing, software
+% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+% License for the specific language governing permissions and limitations under
+% the License.
+
+-module(couch_views_size_test).
+
+-include_lib("eunit/include/eunit.hrl").
+-include_lib("couch/include/couch_db.hrl").
+-include_lib("couch/include/couch_eunit.hrl").
+-include_lib("couch_mrview/include/couch_mrview.hrl").
+-include_lib("fabric/include/fabric2.hrl").
+-include_lib("couch_views/include/couch_views.hrl").
+
+% N.B., we should move to couch_ejson_size instead
+% of erlang:external_size
+%
+% to calculate view size:
+% total = 0
+% for (fdb_k, fdb_v) in VIEW_MAP_RANGE:
+%   {EncUserKey, EncUserval} = erlfdb_tuple:unpack(fdb_v),
+%   UserKey = couch_views_encoding:decode(EncUserKey),
+%   UserVal = couch_views_encoding:decode(EncUserVal),
+%   total += erlang:external_size(UserKey),
+%   total += erlang:external_size(UserVal)
+%
+% Our goal in checking the size calculations is that we cover
+% as much of the possible key mutation space as possible while
+% not relying on fuzzing out the edge cases. Conceptually we have
+% two sets of keys E and U. E is keys as currently exist in the
+% view, and U is the new set of keys corresponding to an update.
+%
+% Both sets E and U have the same possible set of state variables:
+%
+% 1. N unique keys, where 0 =< N =< infinity
+% 2. D keys with duplicates, where 0 =< D =< N,
+% 3. R repeats for each member of D, for 2 =< R =< infinity
+%
+% Given two sets S1 and S2, we then have a set of transition variables:
+%
+% 1. deltaN - shared unique keys, where 0 =< deltaN =< N
+% 2. deltaD - shared duplicates, where 0 =< deltaD =< N
+% 3. deltaR - shared repeats for each D, where 2 =< deltaR =< infinity
+%
+% To search our state transition space, we can create two functions to
+% first define our start and end states, and for each transition we have
+% a function that defines the shared overlap between states.
+%
+% Given a list of transitions are checks then become simple in that
+% we can iterate over each transition checking that our index is valid
+% after each one. Index validation will purely look at the existing
+% state of the index in fdb and validate correctness.
+
+-define(NUM_SINGLE_TESTS, 100).
+-define(NUM_MULTI_TESTS, 100).
+
+-define(N_DOMAIN, [0, 1, 2, 5]).
+-define(D_DOMAIN, [0, 1, 2, 5]).
+-define(R_DOMAIN, [2, 4]).
+
+-define(DELTA_N_DOMAIN, [0, 1, 2, 5]).
+-define(DELTA_D_DOMAIN, [0, 1, 2, 5]).
+-define(DELTA_R_DOMAIN, [1, 2, 4]).
+
+
+generate_sets() ->
+    permute(?N_DOMAIN, ?D_DOMAIN, ?R_DOMAIN, fun(N, D, R) ->
+        % We can't have more duplicates than total keys
+        case D > N of
+            true -> throw(skip);
+            false -> ok
+        end,
+
+        % Only include one of the repeat values
+        % for our zero sets
+        case D == 0 of
+            true when R == 2 -> ok;
+            true -> throw(skip);
+            false -> ok
+        end,
+
+        % Replace R with a sentinel value for sanity
+        % when there are no dupes to have repeats
+        ActualR = if D == 0 -> 0; true -> R end,
+
+        {N, D, ActualR}
+    end).
+
+
+generate_transitions() ->
+    Sets = generate_sets(),
+    %Pairs0 = [{Set1, Set2} || Set1 <- Sets, Set2 <- Sets],
 
 Review comment:
   The comment is how to generate a smaller list of transitions?

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
users@infra.apache.org


With regards,
Apache Git Services

[GitHub] [couchdb] nickva commented on a change in pull request #2638: Prototype/fdb layer couch views size tests

Posted by GitBox <gi...@apache.org>.
nickva commented on a change in pull request #2638: Prototype/fdb layer couch views size tests
URL: https://github.com/apache/couchdb/pull/2638#discussion_r388485753
 
 

 ##########
 File path: src/couch_views/test/couch_views_indexer_test.erl
 ##########
 @@ -388,6 +389,77 @@ multipe_identical_keys_from_same_doc(Db) ->
         ], Out).
 
 
+fewer_multipe_identical_keys_from_same_doc(Db) ->
+    DDoc = create_ddoc(multi_emit_same),
+
+    Doc0 = couch_doc:from_json_obj({[
+        {<<"_id">>, list_to_binary(integer_to_list(0))},
 
 Review comment:
   minor nit: can use `integer_to_binary/1` directly

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
users@infra.apache.org


With regards,
Apache Git Services

[GitHub] [couchdb] davisp commented on issue #2638: Prototype/fdb layer couch views size tests

Posted by GitBox <gi...@apache.org>.
davisp commented on issue #2638: Prototype/fdb layer couch views size tests
URL: https://github.com/apache/couchdb/pull/2638#issuecomment-595427326
 
 
   A note on the last line in @nickva's comment about view sizes. That last line that's a 0 is because the value sizes in the script exceed the thresholds added recently so no docs are included in the view which gives a size of 0.

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
users@infra.apache.org


With regards,
Apache Git Services

[GitHub] [couchdb] davisp commented on a change in pull request #2638: Prototype/fdb layer couch views size tests

Posted by GitBox <gi...@apache.org>.
davisp commented on a change in pull request #2638: Prototype/fdb layer couch views size tests
URL: https://github.com/apache/couchdb/pull/2638#discussion_r388496956
 
 

 ##########
 File path: src/couch_views/test/couch_views_indexer_test.erl
 ##########
 @@ -388,6 +389,77 @@ multipe_identical_keys_from_same_doc(Db) ->
         ], Out).
 
 
+fewer_multipe_identical_keys_from_same_doc(Db) ->
+    DDoc = create_ddoc(multi_emit_same),
+
+    Doc0 = couch_doc:from_json_obj({[
+        {<<"_id">>, list_to_binary(integer_to_list(0))},
+        {<<"val">>, 1},
+        {<<"extra">>, 3}
+    ]}),
+
+    {ok, _} = fabric2_db:update_doc(Db, DDoc, []),
+    {ok, {Pos, Rev}} = fabric2_db:update_doc(Db, Doc0, []),
+
+    {ok, Out1} = couch_views:query(
+            Db,
+            DDoc,
+            <<"map_fun1">>,
+            fun fold_fun/2,
+            [],
+            #mrargs{}
+        ),
+
+    ?assertEqual([
+            {row, [
+                {id, <<"0">>},
+                {key, 1},
+                {value, 1}
+            ]},
+            {row, [
+                {id, <<"0">>},
+                {key, 1},
+                {value, 2}
+            ]},
+            {row, [
+                {id, <<"0">>},
+                {key, 1},
+                {value, 3}
+            ]}
+        ], Out1),
+
+    Doc1 = couch_doc:from_json_obj({[
+        {<<"_id">>, list_to_binary(integer_to_list(0))},
 
 Review comment:
   Went with `<<"0">>` directly.

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
users@infra.apache.org


With regards,
Apache Git Services

[GitHub] [couchdb] nickva commented on a change in pull request #2638: Prototype/fdb layer couch views size tests

Posted by GitBox <gi...@apache.org>.
nickva commented on a change in pull request #2638: Prototype/fdb layer couch views size tests
URL: https://github.com/apache/couchdb/pull/2638#discussion_r388506274
 
 

 ##########
 File path: src/couch_views/test/couch_views_size_test.erl
 ##########
 @@ -0,0 +1,562 @@
+% Licensed under the Apache License, Version 2.0 (the "License"); you may not
+% use this file except in compliance with the License. You may obtain a copy of
+% the License at
+%
+%   http://www.apache.org/licenses/LICENSE-2.0
+%
+% Unless required by applicable law or agreed to in writing, software
+% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+% License for the specific language governing permissions and limitations under
+% the License.
+
+-module(couch_views_size_test).
+
+-include_lib("eunit/include/eunit.hrl").
+-include_lib("couch/include/couch_db.hrl").
+-include_lib("couch/include/couch_eunit.hrl").
+-include_lib("couch_mrview/include/couch_mrview.hrl").
+-include_lib("fabric/include/fabric2.hrl").
+-include_lib("couch_views/include/couch_views.hrl").
+
+% N.B., we should move to couch_ejson_size instead
+% of erlang:external_size
+%
+% to calculate view size:
+% total = 0
+% for (fdb_k, fdb_v) in VIEW_MAP_RANGE:
+%   {EncUserKey, EncUserval} = erlfdb_tuple:unpack(fdb_v),
+%   UserKey = couch_views_encoding:decode(EncUserKey),
+%   UserVal = couch_views_encoding:decode(EncUserVal),
+%   total += erlang:external_size(UserKey),
+%   total += erlang:external_size(UserVal)
+%
+% Our goal in checking the size calculations is that we cover
+% as much of the possible key mutation space as possible while
+% not relying on fuzzing out the edge cases. Conceptually we have
+% two sets of keys E and U. E is keys as currently exist in the
+% view, and U is the new set of keys corresponding to an update.
+%
+% Both sets E and U have the same possible set of state variables:
+%
+% 1. N unique keys, where 0 =< N =< infinity
+% 2. D keys with duplicates, where 0 =< D =< N,
+% 3. R repeats for each member of D, for 2 =< R =< infinity
+%
+% Given two sets S1 and S2, we then have a set of transition variables:
+%
+% 1. deltaN - shared unique keys, where 0 =< deltaN =< N
+% 2. deltaD - shared duplicates, where 0 =< deltaD =< N
+% 3. deltaR - shared repeats for each D, where 2 =< deltaR =< infinity
+%
+% To search our state transition space, we can create two functions to
+% first define our start and end states, and for each transition we have
+% a function that defines the shared overlap between states.
+%
+% Given a list of transitions are checks then become simple in that
+% we can iterate over each transition checking that our index is valid
+% after each one. Index validation will purely look at the existing
+% state of the index in fdb and validate correctness.
+
+-define(NUM_SINGLE_TESTS, 100).
+-define(NUM_MULTI_TESTS, 100).
+
+-define(N_DOMAIN, [0, 1, 2, 5]).
+-define(D_DOMAIN, [0, 1, 2, 5]).
+-define(R_DOMAIN, [2, 4]).
+
+-define(DELTA_N_DOMAIN, [0, 1, 2, 5]).
+-define(DELTA_D_DOMAIN, [0, 1, 2, 5]).
+-define(DELTA_R_DOMAIN, [1, 2, 4]).
+
+
+generate_sets() ->
+    permute(?N_DOMAIN, ?D_DOMAIN, ?R_DOMAIN, fun(N, D, R) ->
+        % We can't have more duplicates than total keys
+        case D > N of
+            true -> throw(skip);
+            false -> ok
+        end,
+
+        % Only include one of the repeat values
+        % for our zero sets
+        case D == 0 of
+            true when R == 2 -> ok;
+            true -> throw(skip);
+            false -> ok
+        end,
+
+        % Replace R with a sentinel value for sanity
+        % when there are no dupes to have repeats
+        ActualR = if D == 0 -> 0; true -> R end,
+
+        {N, D, ActualR}
+    end).
+
+
+generate_transitions() ->
+    Sets = generate_sets(),
+    Pairs = [{Set1, Set2} || Set1 <- Sets, Set2 <- Sets],
+    lists:flatmap(fun({{N1, D1, _R1} = S1, {N2, D2, _R2} = S2}) ->
+        Filter = fun(DeltaN, DeltaD, DeltaR) ->
+            % Can't share more keys than the smaller of the
+            % two sets
+            case DeltaN > min(N1, N2) of
+                true -> throw(skip);
+                false -> ok
+            end,
+
+            % For DeltaD == 0, all combinations of DeltaD and
+            % DeltaR are equivalent tests
+            case DeltaN == 0 of
+                true when DeltaD == 0, DeltaR == 1 -> ok;
+                true -> throw(skip);
+                false -> ok
+            end,
+
+            % Can't share more dupes than exist in either set
+            % or the total number of shared keys
+            case DeltaD > min(D1, D2) orelse DeltaD > DeltaN of
+                true -> throw(skip);
+                false -> ok
+            end,
+
+            % For DeltaD == 0, all DeltaR correspond to the
+            % same test so only include one instance
+            case DeltaD == 0 of
+                true when DeltaR == 1 -> ok;
+                true -> throw(skip);
+                false -> ok
+            end,
+
+            % If we have more non-repeated keys in our
+            % transition than there's "room" for in the target
+            % set it isn't a valid test case.
+            TransitionNonRepeats = DeltaN - DeltaD,
+            TargetNonRepeats = N2 - D2,
+            case TransitionNonRepeats > TargetNonRepeats of
+                true -> throw(skip);
+                false -> ok
+            end,
+
+            {S1, S2, {DeltaN, DeltaD, DeltaR}}
+        end,
+        permute(?DELTA_N_DOMAIN, ?DELTA_D_DOMAIN, ?DELTA_R_DOMAIN, Filter)
+    end, Pairs).
+
+
+permute(NList, DList, RList, Filter) ->
+    % Technically we could call into Filter in each
+    % outer loops to conditionally skip inner loops.
+    % If someone comes along looking to speed up the
+    % fixture setup time, this would likely be an
+    % easy win.
+    lists:foldl(fun(N, NAcc) ->
+        lists:foldl(fun(D, DAcc) ->
+            lists:foldl(fun(R, RAcc) ->
+                try
+                    [Filter(N, D, R) | RAcc]
+                catch throw:skip ->
+                    RAcc
+                end
+            end, DAcc, RList)
+        end, NAcc, DList)
+    end, [], NList).
+
+
+row_transition_test_() ->
+    {
+        "Test view size tracking",
+        {
+            setup,
+            fun setup/0,
+            fun cleanup/1,
+            fun create_transition_tests/1
+        }
+    }.
+
+
+setup() ->
+    Ctx = test_util:start_couch([
+            fabric,
+            couch_jobs,
+            couch_js,
+            couch_views
+        ]),
+    {ok, Db} = fabric2_db:create(?tempdb(), [{user_ctx, ?ADMIN_USER}]),
+    {Ctx, Db}.
+
+
+cleanup({Ctx, Db}) ->
+    ok = fabric2_db:delete(fabric2_db:name(Db), []),
+    test_util:stop_couch(Ctx).
+
+
+create_transition_tests({_Ctx, Db}) ->
+    Transitions = generate_transitions(),
+    Single = lists:flatmap(fun(T) ->
+        Name = lists:flatten(io_lib:format("single ~s", [tname(T)])),
+        [{Name, fun() -> check_single_transition(Db, T) end}]
+    end, lists:sort(Transitions)),
+    Multi = lists:flatmap(fun(T) ->
+        Name = lists:flatten(io_lib:format("multi ~s", [tname(T)])),
+        [{Name, fun() -> check_multi_transition(Db, T) end}]
+    end, lists:sort(group(shuffle(Transitions)))),
+    subset(?NUM_SINGLE_TESTS, Single) ++ subset(?NUM_MULTI_TESTS, Multi).
+
+
+check_single_transition(Db, {Set1, Set2, Transition}) ->
+    clear_views(Db),
+    InitKVs = init_set(Set1, [a, b, c, d, e]),
+    CommonKVs = reduce_set(Transition, InitKVs),
+    FinalKVs = fill_set(Set2, CommonKVs, [v, w, x, y, z]),
+    {InitJSONKVs, Bindings} = unlabel(InitKVs, #{}),
+    {FinalJSONKVs, _} = unlabel(FinalKVs, Bindings),
+
+    Sig = couch_uuids:random(),
+    DocId = couch_uuids:random(),
+
+    fabric2_fdb:transactional(Db, fun(TxDb) ->
+        write_docs(TxDb, Sig, [make_doc(DocId, InitJSONKVs)])
+    end),
+
+    fabric2_fdb:transactional(Db, fun(TxDb) ->
 
 Review comment:
   Just curious, is it important that they run in separate transactions for the test purpose or just to avoid timeouts by keeping the size down?

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
users@infra.apache.org


With regards,
Apache Git Services

[GitHub] [couchdb] davisp merged pull request #2638: Prototype/fdb layer couch views size tests

Posted by GitBox <gi...@apache.org>.
davisp merged pull request #2638: Prototype/fdb layer couch views size tests
URL: https://github.com/apache/couchdb/pull/2638
 
 
   

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
users@infra.apache.org


With regards,
Apache Git Services

[GitHub] [couchdb] davisp commented on a change in pull request #2638: Prototype/fdb layer couch views size tests

Posted by GitBox <gi...@apache.org>.
davisp commented on a change in pull request #2638: Prototype/fdb layer couch views size tests
URL: https://github.com/apache/couchdb/pull/2638#discussion_r388496737
 
 

 ##########
 File path: src/couch_views/test/couch_views_size_test.erl
 ##########
 @@ -0,0 +1,564 @@
+% Licensed under the Apache License, Version 2.0 (the "License"); you may not
+% use this file except in compliance with the License. You may obtain a copy of
+% the License at
+%
+%   http://www.apache.org/licenses/LICENSE-2.0
+%
+% Unless required by applicable law or agreed to in writing, software
+% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+% License for the specific language governing permissions and limitations under
+% the License.
+
+-module(couch_views_size_test).
+
+-include_lib("eunit/include/eunit.hrl").
+-include_lib("couch/include/couch_db.hrl").
+-include_lib("couch/include/couch_eunit.hrl").
+-include_lib("couch_mrview/include/couch_mrview.hrl").
+-include_lib("fabric/include/fabric2.hrl").
+-include_lib("couch_views/include/couch_views.hrl").
+
+% N.B., we should move to couch_ejson_size instead
+% of erlang:external_size
+%
+% to calculate view size:
+% total = 0
+% for (fdb_k, fdb_v) in VIEW_MAP_RANGE:
+%   {EncUserKey, EncUserval} = erlfdb_tuple:unpack(fdb_v),
+%   UserKey = couch_views_encoding:decode(EncUserKey),
+%   UserVal = couch_views_encoding:decode(EncUserVal),
+%   total += erlang:external_size(UserKey),
+%   total += erlang:external_size(UserVal)
+%
+% Our goal in checking the size calculations is that we cover
+% as much of the possible key mutation space as possible while
+% not relying on fuzzing out the edge cases. Conceptually we have
+% two sets of keys E and U. E is keys as currently exist in the
+% view, and U is the new set of keys corresponding to an update.
+%
+% Both sets E and U have the same possible set of state variables:
+%
+% 1. N unique keys, where 0 =< N =< infinity
+% 2. D keys with duplicates, where 0 =< D =< N,
+% 3. R repeats for each member of D, for 2 =< R =< infinity
+%
+% Given two sets S1 and S2, we then have a set of transition variables:
+%
+% 1. deltaN - shared unique keys, where 0 =< deltaN =< N
+% 2. deltaD - shared duplicates, where 0 =< deltaD =< N
+% 3. deltaR - shared repeats for each D, where 2 =< deltaR =< infinity
+%
+% To search our state transition space, we can create two functions to
+% first define our start and end states, and for each transition we have
+% a function that defines the shared overlap between states.
+%
+% Given a list of transitions are checks then become simple in that
+% we can iterate over each transition checking that our index is valid
+% after each one. Index validation will purely look at the existing
+% state of the index in fdb and validate correctness.
+
+-define(NUM_SINGLE_TESTS, 100).
+-define(NUM_MULTI_TESTS, 100).
+
+-define(N_DOMAIN, [0, 1, 2, 5]).
+-define(D_DOMAIN, [0, 1, 2, 5]).
+-define(R_DOMAIN, [2, 4]).
+
+-define(DELTA_N_DOMAIN, [0, 1, 2, 5]).
+-define(DELTA_D_DOMAIN, [0, 1, 2, 5]).
+-define(DELTA_R_DOMAIN, [1, 2, 4]).
+
+
+generate_sets() ->
+    permute(?N_DOMAIN, ?D_DOMAIN, ?R_DOMAIN, fun(N, D, R) ->
+        % We can't have more duplicates than total keys
+        case D > N of
+            true -> throw(skip);
+            false -> ok
+        end,
+
+        % Only include one of the repeat values
+        % for our zero sets
+        case D == 0 of
+            true when R == 2 -> ok;
+            true -> throw(skip);
+            false -> ok
+        end,
+
+        % Replace R with a sentinel value for sanity
+        % when there are no dupes to have repeats
+        ActualR = if D == 0 -> 0; true -> R end,
+
+        {N, D, ActualR}
+    end).
+
+
+generate_transitions() ->
+    Sets = generate_sets(),
+    %Pairs0 = [{Set1, Set2} || Set1 <- Sets, Set2 <- Sets],
 
 Review comment:
   Removed.

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
users@infra.apache.org


With regards,
Apache Git Services

[GitHub] [couchdb] nickva commented on a change in pull request #2638: Prototype/fdb layer couch views size tests

Posted by GitBox <gi...@apache.org>.
nickva commented on a change in pull request #2638: Prototype/fdb layer couch views size tests
URL: https://github.com/apache/couchdb/pull/2638#discussion_r388511667
 
 

 ##########
 File path: src/couch_views/test/couch_views_size_test.erl
 ##########
 @@ -0,0 +1,562 @@
+% Licensed under the Apache License, Version 2.0 (the "License"); you may not
+% use this file except in compliance with the License. You may obtain a copy of
+% the License at
+%
+%   http://www.apache.org/licenses/LICENSE-2.0
+%
+% Unless required by applicable law or agreed to in writing, software
+% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+% License for the specific language governing permissions and limitations under
+% the License.
+
+-module(couch_views_size_test).
+
+-include_lib("eunit/include/eunit.hrl").
+-include_lib("couch/include/couch_db.hrl").
+-include_lib("couch/include/couch_eunit.hrl").
+-include_lib("couch_mrview/include/couch_mrview.hrl").
+-include_lib("fabric/include/fabric2.hrl").
+-include_lib("couch_views/include/couch_views.hrl").
+
+% N.B., we should move to couch_ejson_size instead
+% of erlang:external_size
+%
+% to calculate view size:
+% total = 0
+% for (fdb_k, fdb_v) in VIEW_MAP_RANGE:
+%   {EncUserKey, EncUserval} = erlfdb_tuple:unpack(fdb_v),
+%   UserKey = couch_views_encoding:decode(EncUserKey),
+%   UserVal = couch_views_encoding:decode(EncUserVal),
+%   total += erlang:external_size(UserKey),
+%   total += erlang:external_size(UserVal)
+%
+% Our goal in checking the size calculations is that we cover
+% as much of the possible key mutation space as possible while
+% not relying on fuzzing out the edge cases. Conceptually we have
+% two sets of keys E and U. E is keys as currently exist in the
+% view, and U is the new set of keys corresponding to an update.
+%
+% Both sets E and U have the same possible set of state variables:
+%
+% 1. N unique keys, where 0 =< N =< infinity
+% 2. D keys with duplicates, where 0 =< D =< N,
+% 3. R repeats for each member of D, for 2 =< R =< infinity
+%
+% Given two sets S1 and S2, we then have a set of transition variables:
+%
+% 1. deltaN - shared unique keys, where 0 =< deltaN =< N
+% 2. deltaD - shared duplicates, where 0 =< deltaD =< N
+% 3. deltaR - shared repeats for each D, where 2 =< deltaR =< infinity
+%
+% To search our state transition space, we can create two functions to
+% first define our start and end states, and for each transition we have
+% a function that defines the shared overlap between states.
+%
+% Given a list of transitions are checks then become simple in that
+% we can iterate over each transition checking that our index is valid
+% after each one. Index validation will purely look at the existing
+% state of the index in fdb and validate correctness.
+
+-define(NUM_SINGLE_TESTS, 100).
+-define(NUM_MULTI_TESTS, 100).
+
+-define(N_DOMAIN, [0, 1, 2, 5]).
+-define(D_DOMAIN, [0, 1, 2, 5]).
+-define(R_DOMAIN, [2, 4]).
+
+-define(DELTA_N_DOMAIN, [0, 1, 2, 5]).
+-define(DELTA_D_DOMAIN, [0, 1, 2, 5]).
+-define(DELTA_R_DOMAIN, [1, 2, 4]).
+
+
+generate_sets() ->
+    permute(?N_DOMAIN, ?D_DOMAIN, ?R_DOMAIN, fun(N, D, R) ->
+        % We can't have more duplicates than total keys
+        case D > N of
+            true -> throw(skip);
+            false -> ok
+        end,
+
+        % Only include one of the repeat values
+        % for our zero sets
+        case D == 0 of
+            true when R == 2 -> ok;
+            true -> throw(skip);
+            false -> ok
+        end,
+
+        % Replace R with a sentinel value for sanity
+        % when there are no dupes to have repeats
+        ActualR = if D == 0 -> 0; true -> R end,
+
+        {N, D, ActualR}
+    end).
+
+
+generate_transitions() ->
+    Sets = generate_sets(),
+    Pairs = [{Set1, Set2} || Set1 <- Sets, Set2 <- Sets],
+    lists:flatmap(fun({{N1, D1, _R1} = S1, {N2, D2, _R2} = S2}) ->
+        Filter = fun(DeltaN, DeltaD, DeltaR) ->
+            % Can't share more keys than the smaller of the
+            % two sets
+            case DeltaN > min(N1, N2) of
+                true -> throw(skip);
+                false -> ok
+            end,
+
+            % For DeltaD == 0, all combinations of DeltaD and
+            % DeltaR are equivalent tests
+            case DeltaN == 0 of
+                true when DeltaD == 0, DeltaR == 1 -> ok;
+                true -> throw(skip);
+                false -> ok
+            end,
+
+            % Can't share more dupes than exist in either set
+            % or the total number of shared keys
+            case DeltaD > min(D1, D2) orelse DeltaD > DeltaN of
+                true -> throw(skip);
+                false -> ok
+            end,
+
+            % For DeltaD == 0, all DeltaR correspond to the
+            % same test so only include one instance
+            case DeltaD == 0 of
+                true when DeltaR == 1 -> ok;
+                true -> throw(skip);
+                false -> ok
+            end,
+
+            % If we have more non-repeated keys in our
+            % transition than there's "room" for in the target
+            % set it isn't a valid test case.
+            TransitionNonRepeats = DeltaN - DeltaD,
+            TargetNonRepeats = N2 - D2,
+            case TransitionNonRepeats > TargetNonRepeats of
+                true -> throw(skip);
+                false -> ok
+            end,
+
+            {S1, S2, {DeltaN, DeltaD, DeltaR}}
+        end,
+        permute(?DELTA_N_DOMAIN, ?DELTA_D_DOMAIN, ?DELTA_R_DOMAIN, Filter)
+    end, Pairs).
+
+
+permute(NList, DList, RList, Filter) ->
+    % Technically we could call into Filter in each
+    % outer loops to conditionally skip inner loops.
+    % If someone comes along looking to speed up the
+    % fixture setup time, this would likely be an
+    % easy win.
+    lists:foldl(fun(N, NAcc) ->
+        lists:foldl(fun(D, DAcc) ->
+            lists:foldl(fun(R, RAcc) ->
+                try
+                    [Filter(N, D, R) | RAcc]
+                catch throw:skip ->
+                    RAcc
+                end
+            end, DAcc, RList)
+        end, NAcc, DList)
+    end, [], NList).
+
+
+row_transition_test_() ->
+    {
+        "Test view size tracking",
+        {
+            setup,
+            fun setup/0,
+            fun cleanup/1,
+            fun create_transition_tests/1
+        }
+    }.
+
+
+setup() ->
+    Ctx = test_util:start_couch([
+            fabric,
+            couch_jobs,
+            couch_js,
+            couch_views
+        ]),
+    {ok, Db} = fabric2_db:create(?tempdb(), [{user_ctx, ?ADMIN_USER}]),
+    {Ctx, Db}.
+
+
+cleanup({Ctx, Db}) ->
+    ok = fabric2_db:delete(fabric2_db:name(Db), []),
+    test_util:stop_couch(Ctx).
+
+
+create_transition_tests({_Ctx, Db}) ->
+    Transitions = generate_transitions(),
+    Single = lists:flatmap(fun(T) ->
+        Name = lists:flatten(io_lib:format("single ~s", [tname(T)])),
+        [{Name, fun() -> check_single_transition(Db, T) end}]
+    end, lists:sort(Transitions)),
+    Multi = lists:flatmap(fun(T) ->
+        Name = lists:flatten(io_lib:format("multi ~s", [tname(T)])),
+        [{Name, fun() -> check_multi_transition(Db, T) end}]
+    end, lists:sort(group(shuffle(Transitions)))),
+    subset(?NUM_SINGLE_TESTS, Single) ++ subset(?NUM_MULTI_TESTS, Multi).
+
+
+check_single_transition(Db, {Set1, Set2, Transition}) ->
+    clear_views(Db),
+    InitKVs = init_set(Set1, [a, b, c, d, e]),
+    CommonKVs = reduce_set(Transition, InitKVs),
+    FinalKVs = fill_set(Set2, CommonKVs, [v, w, x, y, z]),
+    {InitJSONKVs, Bindings} = unlabel(InitKVs, #{}),
+    {FinalJSONKVs, _} = unlabel(FinalKVs, Bindings),
+
+    Sig = couch_uuids:random(),
+    DocId = couch_uuids:random(),
+
+    fabric2_fdb:transactional(Db, fun(TxDb) ->
+        write_docs(TxDb, Sig, [make_doc(DocId, InitJSONKVs)])
+    end),
+
+    fabric2_fdb:transactional(Db, fun(TxDb) ->
+        write_docs(TxDb, Sig, [make_doc(DocId, FinalJSONKVs)])
+    end),
+
+    validate_index(Db, Sig, #{DocId => FinalJSONKVs}).
+
+
+check_multi_transition(Db, Transitions) ->
+    clear_views(Db),
+
+    {Docs, IdMap} = lists:mapfoldl(fun({Set1, Set2, Transition}, IdMapAcc) ->
+        DocId = couch_uuids:random(),
+        InitKVs = init_set(Set1, [a, b, c, d, e]),
+        CommonKVs = reduce_set(Transition, InitKVs),
+        FinalKVs = fill_set(Set2, CommonKVs, [v, w, x, y, z]),
+        {InitJSONKVs, Bindings} = unlabel(InitKVs, #{}),
+        {FinalJSONKVs, _} = unlabel(FinalKVs, Bindings),
+        InitDoc = make_doc(DocId, InitJSONKVs),
+        FinalDoc = make_doc(DocId, FinalJSONKVs),
+        {{InitDoc, FinalDoc}, maps:put(DocId, FinalJSONKVs, IdMapAcc)}
+    end, #{}, Transitions),
+
+    {InitDocs, FinalDocs} = lists:unzip(Docs),
+
+    Sig = couch_uuids:random(),
+
+    fabric2_fdb:transactional(Db, fun(TxDb) ->
+        write_docs(TxDb, Sig, InitDocs)
+    end),
+
+    fabric2_fdb:transactional(Db, fun(TxDb) ->
+        write_docs(TxDb, Sig, FinalDocs)
+    end),
+
+    validate_index(Db, Sig, IdMap).
+
+
+clear_views(Db) ->
+    fabric2_fdb:transactional(Db, fun(TxDb) ->
+        #{
+            tx := Tx,
+            db_prefix := DbPrefix
+        } = TxDb,
+        {Start, End} = erlfdb_tuple:range({?DB_VIEWS}, DbPrefix),
+        erlfdb:clear_range(Tx, Start, End),
+
+        GlobalKey = {?DB_STATS, <<"sizes">>, <<"views">>},
+        BinGlobalKey = erlfdb_tuple:pack(GlobalKey, DbPrefix),
+        erlfdb:set(Tx, BinGlobalKey, ?uint2bin(0))
+    end).
+
+
+write_docs(TxDb, Sig, Docs) ->
+    Mrst = #mrst{
+        sig = Sig,
+        views = [#mrview{
+            id_num = 1
+        }]
+    },
+    IdxState = #{
+        last_seq => <<"foo">>
+    },
+    couch_views_indexer:write_docs(TxDb, Mrst, Docs, IdxState).
+
+
+validate_index(Db, Sig, JSONRows) ->
+    #{
+        db_prefix := DbPrefix
+    } = Db,
+    Rows = fabric2_fdb:transactional(Db, fun(TxDb) ->
+        #{
+            tx := Tx
+        } = TxDb,
+        {Start, End} = erlfdb_tuple:range({?DB_VIEWS}, DbPrefix),
+        erlfdb:get_range(Tx, Start, End)
+    end),
+
+    InitAcc = #{
+        row_count => 0,
+        kv_size => 0,
+        ids => #{},
+        rows => []
+    },
+
+    MapData = lists:foldl(fun({Key, Value}, Acc) ->
+        case erlfdb_tuple:unpack(Key, DbPrefix) of
+            {?DB_VIEWS, ?VIEW_INFO, ?VIEW_UPDATE_SEQ, Sig} ->
+                ?assertEqual(<<"foo">>, Value),
+                Acc;
+            {?DB_VIEWS, ?VIEW_INFO, ?VIEW_ROW_COUNT, Sig, 1} ->
+                maps:put(row_count, ?bin2uint(Value), Acc);
+            {?DB_VIEWS, ?VIEW_INFO, ?VIEW_KV_SIZE, Sig, 1} ->
+                maps:put(kv_size, ?bin2uint(Value), Acc);
+            {?DB_VIEWS, ?VIEW_DATA, Sig, ?VIEW_ID_RANGE, DocId, 1} ->
+                [
+                    TotalKeys, TotalSize, UniqueKeys
+                ] = couch_views_encoding:decode(Value),
+                maps:update_with(ids, fun(Ids) ->
+                    false = maps:is_key(DocId, Ids),
+                    maps:put(DocId, {TotalKeys, TotalSize, UniqueKeys}, Ids)
+                end, Acc);
+            {?DB_VIEWS, ?VIEW_DATA, Sig, ?VIEW_MAP_RANGE, 1, MapKey, _DupeId} ->
+                {EncKey, DocId} = MapKey,
+                {UserKey, UserVal} = erlfdb_tuple:unpack(Value),
+
+                UserJsonKey = couch_views_encoding:decode(UserKey),
+                UserJsonVal = couch_views_encoding:decode(UserVal),
+
+                % Assert our encoded sort key is correct
+                EncKey = couch_views_encoding:encode(UserJsonKey, key),
 
 Review comment:
   since we're testing could use an `?assertEqual` here

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
users@infra.apache.org


With regards,
Apache Git Services

[GitHub] [couchdb] davisp commented on a change in pull request #2638: Prototype/fdb layer couch views size tests

Posted by GitBox <gi...@apache.org>.
davisp commented on a change in pull request #2638: Prototype/fdb layer couch views size tests
URL: https://github.com/apache/couchdb/pull/2638#discussion_r388497060
 
 

 ##########
 File path: src/couch_views/test/couch_views_indexer_test.erl
 ##########
 @@ -388,6 +389,77 @@ multipe_identical_keys_from_same_doc(Db) ->
         ], Out).
 
 
+fewer_multipe_identical_keys_from_same_doc(Db) ->
+    DDoc = create_ddoc(multi_emit_same),
+
+    Doc0 = couch_doc:from_json_obj({[
+        {<<"_id">>, list_to_binary(integer_to_list(0))},
 
 Review comment:
   Went with <<"0">> directly.

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
users@infra.apache.org


With regards,
Apache Git Services

[GitHub] [couchdb] davisp commented on a change in pull request #2638: Prototype/fdb layer couch views size tests

Posted by GitBox <gi...@apache.org>.
davisp commented on a change in pull request #2638: Prototype/fdb layer couch views size tests
URL: https://github.com/apache/couchdb/pull/2638#discussion_r388512954
 
 

 ##########
 File path: src/couch_views/test/couch_views_size_test.erl
 ##########
 @@ -0,0 +1,562 @@
+% Licensed under the Apache License, Version 2.0 (the "License"); you may not
+% use this file except in compliance with the License. You may obtain a copy of
+% the License at
+%
+%   http://www.apache.org/licenses/LICENSE-2.0
+%
+% Unless required by applicable law or agreed to in writing, software
+% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+% License for the specific language governing permissions and limitations under
+% the License.
+
+-module(couch_views_size_test).
+
+-include_lib("eunit/include/eunit.hrl").
+-include_lib("couch/include/couch_db.hrl").
+-include_lib("couch/include/couch_eunit.hrl").
+-include_lib("couch_mrview/include/couch_mrview.hrl").
+-include_lib("fabric/include/fabric2.hrl").
+-include_lib("couch_views/include/couch_views.hrl").
+
+% N.B., we should move to couch_ejson_size instead
+% of erlang:external_size
+%
+% to calculate view size:
+% total = 0
+% for (fdb_k, fdb_v) in VIEW_MAP_RANGE:
+%   {EncUserKey, EncUserval} = erlfdb_tuple:unpack(fdb_v),
+%   UserKey = couch_views_encoding:decode(EncUserKey),
+%   UserVal = couch_views_encoding:decode(EncUserVal),
+%   total += erlang:external_size(UserKey),
+%   total += erlang:external_size(UserVal)
+%
+% Our goal in checking the size calculations is that we cover
+% as much of the possible key mutation space as possible while
+% not relying on fuzzing out the edge cases. Conceptually we have
+% two sets of keys E and U. E is keys as currently exist in the
+% view, and U is the new set of keys corresponding to an update.
+%
+% Both sets E and U have the same possible set of state variables:
+%
+% 1. N unique keys, where 0 =< N =< infinity
+% 2. D keys with duplicates, where 0 =< D =< N,
+% 3. R repeats for each member of D, for 2 =< R =< infinity
+%
+% Given two sets S1 and S2, we then have a set of transition variables:
+%
+% 1. deltaN - shared unique keys, where 0 =< deltaN =< N
+% 2. deltaD - shared duplicates, where 0 =< deltaD =< N
+% 3. deltaR - shared repeats for each D, where 2 =< deltaR =< infinity
+%
+% To search our state transition space, we can create two functions to
+% first define our start and end states, and for each transition we have
+% a function that defines the shared overlap between states.
+%
+% Given a list of transitions are checks then become simple in that
+% we can iterate over each transition checking that our index is valid
+% after each one. Index validation will purely look at the existing
+% state of the index in fdb and validate correctness.
+
+-define(NUM_SINGLE_TESTS, 100).
+-define(NUM_MULTI_TESTS, 100).
+
+-define(N_DOMAIN, [0, 1, 2, 5]).
+-define(D_DOMAIN, [0, 1, 2, 5]).
+-define(R_DOMAIN, [2, 4]).
+
+-define(DELTA_N_DOMAIN, [0, 1, 2, 5]).
+-define(DELTA_D_DOMAIN, [0, 1, 2, 5]).
+-define(DELTA_R_DOMAIN, [1, 2, 4]).
+
+
+generate_sets() ->
+    permute(?N_DOMAIN, ?D_DOMAIN, ?R_DOMAIN, fun(N, D, R) ->
+        % We can't have more duplicates than total keys
+        case D > N of
+            true -> throw(skip);
+            false -> ok
+        end,
+
+        % Only include one of the repeat values
+        % for our zero sets
+        case D == 0 of
+            true when R == 2 -> ok;
+            true -> throw(skip);
+            false -> ok
+        end,
+
+        % Replace R with a sentinel value for sanity
+        % when there are no dupes to have repeats
+        ActualR = if D == 0 -> 0; true -> R end,
+
+        {N, D, ActualR}
+    end).
+
+
+generate_transitions() ->
+    Sets = generate_sets(),
+    Pairs = [{Set1, Set2} || Set1 <- Sets, Set2 <- Sets],
+    lists:flatmap(fun({{N1, D1, _R1} = S1, {N2, D2, _R2} = S2}) ->
+        Filter = fun(DeltaN, DeltaD, DeltaR) ->
+            % Can't share more keys than the smaller of the
+            % two sets
+            case DeltaN > min(N1, N2) of
+                true -> throw(skip);
+                false -> ok
+            end,
+
+            % For DeltaD == 0, all combinations of DeltaD and
+            % DeltaR are equivalent tests
+            case DeltaN == 0 of
+                true when DeltaD == 0, DeltaR == 1 -> ok;
+                true -> throw(skip);
+                false -> ok
+            end,
+
+            % Can't share more dupes than exist in either set
+            % or the total number of shared keys
+            case DeltaD > min(D1, D2) orelse DeltaD > DeltaN of
+                true -> throw(skip);
+                false -> ok
+            end,
+
+            % For DeltaD == 0, all DeltaR correspond to the
+            % same test so only include one instance
+            case DeltaD == 0 of
+                true when DeltaR == 1 -> ok;
+                true -> throw(skip);
+                false -> ok
+            end,
+
+            % If we have more non-repeated keys in our
+            % transition than there's "room" for in the target
+            % set it isn't a valid test case.
+            TransitionNonRepeats = DeltaN - DeltaD,
+            TargetNonRepeats = N2 - D2,
+            case TransitionNonRepeats > TargetNonRepeats of
+                true -> throw(skip);
+                false -> ok
+            end,
+
+            {S1, S2, {DeltaN, DeltaD, DeltaR}}
+        end,
+        permute(?DELTA_N_DOMAIN, ?DELTA_D_DOMAIN, ?DELTA_R_DOMAIN, Filter)
+    end, Pairs).
+
+
+permute(NList, DList, RList, Filter) ->
+    % Technically we could call into Filter in each
+    % outer loops to conditionally skip inner loops.
+    % If someone comes along looking to speed up the
+    % fixture setup time, this would likely be an
+    % easy win.
+    lists:foldl(fun(N, NAcc) ->
+        lists:foldl(fun(D, DAcc) ->
+            lists:foldl(fun(R, RAcc) ->
+                try
+                    [Filter(N, D, R) | RAcc]
+                catch throw:skip ->
+                    RAcc
+                end
+            end, DAcc, RList)
+        end, NAcc, DList)
+    end, [], NList).
+
+
+row_transition_test_() ->
+    {
+        "Test view size tracking",
+        {
+            setup,
+            fun setup/0,
+            fun cleanup/1,
+            fun create_transition_tests/1
+        }
+    }.
+
+
+setup() ->
+    Ctx = test_util:start_couch([
+            fabric,
+            couch_jobs,
+            couch_js,
+            couch_views
+        ]),
+    {ok, Db} = fabric2_db:create(?tempdb(), [{user_ctx, ?ADMIN_USER}]),
+    {Ctx, Db}.
+
+
+cleanup({Ctx, Db}) ->
+    ok = fabric2_db:delete(fabric2_db:name(Db), []),
+    test_util:stop_couch(Ctx).
+
+
+create_transition_tests({_Ctx, Db}) ->
+    Transitions = generate_transitions(),
+    Single = lists:flatmap(fun(T) ->
+        Name = lists:flatten(io_lib:format("single ~s", [tname(T)])),
+        [{Name, fun() -> check_single_transition(Db, T) end}]
+    end, lists:sort(Transitions)),
+    Multi = lists:flatmap(fun(T) ->
+        Name = lists:flatten(io_lib:format("multi ~s", [tname(T)])),
+        [{Name, fun() -> check_multi_transition(Db, T) end}]
+    end, lists:sort(group(shuffle(Transitions)))),
+    subset(?NUM_SINGLE_TESTS, Single) ++ subset(?NUM_MULTI_TESTS, Multi).
+
+
+check_single_transition(Db, {Set1, Set2, Transition}) ->
+    clear_views(Db),
+    InitKVs = init_set(Set1, [a, b, c, d, e]),
+    CommonKVs = reduce_set(Transition, InitKVs),
+    FinalKVs = fill_set(Set2, CommonKVs, [v, w, x, y, z]),
+    {InitJSONKVs, Bindings} = unlabel(InitKVs, #{}),
+    {FinalJSONKVs, _} = unlabel(FinalKVs, Bindings),
+
+    Sig = couch_uuids:random(),
+    DocId = couch_uuids:random(),
+
+    fabric2_fdb:transactional(Db, fun(TxDb) ->
+        write_docs(TxDb, Sig, [make_doc(DocId, InitJSONKVs)])
+    end),
+
+    fabric2_fdb:transactional(Db, fun(TxDb) ->
 
 Review comment:
   To put that another way, I wanted to avoid all of the iterating over changes and calling out to JavaScript so that I was just testing this particular bit of code. But I didn't want to remove the way that its actually executed when its part of the larger indexer system.

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
users@infra.apache.org


With regards,
Apache Git Services

[GitHub] [couchdb] nickva commented on a change in pull request #2638: Prototype/fdb layer couch views size tests

Posted by GitBox <gi...@apache.org>.
nickva commented on a change in pull request #2638: Prototype/fdb layer couch views size tests
URL: https://github.com/apache/couchdb/pull/2638#discussion_r388487260
 
 

 ##########
 File path: src/couch_views/test/couch_views_indexer_test.erl
 ##########
 @@ -388,6 +389,77 @@ multipe_identical_keys_from_same_doc(Db) ->
         ], Out).
 
 
+fewer_multipe_identical_keys_from_same_doc(Db) ->
+    DDoc = create_ddoc(multi_emit_same),
+
+    Doc0 = couch_doc:from_json_obj({[
+        {<<"_id">>, list_to_binary(integer_to_list(0))},
+        {<<"val">>, 1},
+        {<<"extra">>, 3}
+    ]}),
+
+    {ok, _} = fabric2_db:update_doc(Db, DDoc, []),
+    {ok, {Pos, Rev}} = fabric2_db:update_doc(Db, Doc0, []),
+
+    {ok, Out1} = couch_views:query(
+            Db,
+            DDoc,
+            <<"map_fun1">>,
+            fun fold_fun/2,
+            [],
+            #mrargs{}
+        ),
+
+    ?assertEqual([
+            {row, [
 
 Review comment:
   To reduce number of lines, could have a `row(Id, Key, Val)` so then it will look like
   
   ```
   ?assertEqual([
       row(<<"0">>, 1, 1),
       row(<<"0">>, 1, 2),
       row(<<"0">>, 1, 3)
   ], Out1).
   ```
   

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
users@infra.apache.org


With regards,
Apache Git Services