You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@couchdb.apache.org by pg...@apache.org on 2023/10/05 10:16:55 UTC
[couchdb] 03/03: mango: de-duplicate results on returning them in `nouveau`
This is an automated email from the ASF dual-hosted git repository.
pgj pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/couchdb.git
commit ce2607a5c4cbddee39f0f66eabd6948489a8014e
Author: Gabor Pali <ga...@ibm.com>
AuthorDate: Wed Oct 4 13:18:48 2023 +0200
mango: de-duplicate results on returning them in `nouveau`
This is a port of a fix applied to the `text` cursor where
occasionally duplicated documents were filtered out. This happens
when moving between pages and an interleaved update is applied.
---
src/mango/src/mango_cursor_nouveau.erl | 55 +++++++++++++++++++++-------------
1 file changed, 35 insertions(+), 20 deletions(-)
diff --git a/src/mango/src/mango_cursor_nouveau.erl b/src/mango/src/mango_cursor_nouveau.erl
index 26358409a..2588e94d9 100644
--- a/src/mango/src/mango_cursor_nouveau.erl
+++ b/src/mango/src/mango_cursor_nouveau.erl
@@ -35,7 +35,8 @@
user_fun,
user_acc,
fields,
- execution_stats
+ execution_stats,
+ documents_seen
}).
create(Db, {Indexes, Trace}, Selector, Opts) ->
@@ -103,7 +104,8 @@ execute(Cursor, UserFun, UserAcc) ->
user_fun = UserFun,
user_acc = UserAcc,
fields = Cursor#cursor.fields,
- execution_stats = mango_execution_stats:log_start(Stats)
+ execution_stats = mango_execution_stats:log_start(Stats),
+ documents_seen = sets:new([{version, 2}])
},
try
case Query of
@@ -171,28 +173,41 @@ handle_hit(CAcc0, Hit, Doc) ->
#cacc{
limit = Limit,
skip = Skip,
- execution_stats = Stats
+ execution_stats = Stats,
+ documents_seen = Seen
} = CAcc0,
- CAcc1 = update_bookmark(CAcc0, Hit),
Stats1 = mango_execution_stats:incr_docs_examined(Stats),
couch_stats:increment_counter([mango, docs_examined]),
- CAcc2 = CAcc1#cacc{execution_stats = Stats1},
- case mango_selector:match(CAcc2#cacc.selector, Doc) of
- true when Skip > 0 ->
- CAcc2#cacc{skip = Skip - 1};
- true when Limit == 0 ->
- % We hit this case if the user spcified with a
- % zero limit. Notice that in this case we need
- % to return the bookmark from before this match
- throw({stop, CAcc0});
- true when Limit == 1 ->
- NewCAcc = apply_user_fun(CAcc2, Doc),
- throw({stop, NewCAcc});
- true when Limit > 1 ->
- NewCAcc = apply_user_fun(CAcc2, Doc),
- NewCAcc#cacc{limit = Limit - 1};
+ CAcc1 = CAcc0#cacc{execution_stats = Stats1},
+ case mango_selector:match(CAcc1#cacc.selector, Doc) of
+ true ->
+ DocId = mango_doc:get_field(Doc, <<"_id">>),
+ case sets:is_element(DocId, Seen) of
+ true ->
+ CAcc1;
+ false ->
+ CAcc2 = update_bookmark(CAcc1, Hit),
+ CAcc3 = CAcc2#cacc{
+ documents_seen = sets:add_element(DocId, Seen)
+ },
+ if
+ Skip > 0 ->
+ CAcc3#cacc{skip = Skip - 1};
+ Limit == 0 ->
+ % We hit this case if the user specified with a
+ % zero limit. Notice that in this case we need
+ % to return the bookmark from before this match.
+ throw({stop, CAcc0});
+ Limit == 1 ->
+ CAcc4 = apply_user_fun(CAcc3, Doc),
+ throw({stop, CAcc4});
+ Limit > 1 ->
+ CAcc4 = apply_user_fun(CAcc3, Doc),
+ CAcc4#cacc{limit = Limit - 1}
+ end
+ end;
false ->
- CAcc2
+ CAcc1
end.
apply_user_fun(CAcc, Doc) ->