You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@couchdb.apache.org by pg...@apache.org on 2023/10/05 10:16:55 UTC

[couchdb] 03/03: mango: de-duplicate results on returning them in `nouveau`

This is an automated email from the ASF dual-hosted git repository.

pgj pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/couchdb.git

commit ce2607a5c4cbddee39f0f66eabd6948489a8014e
Author: Gabor Pali <ga...@ibm.com>
AuthorDate: Wed Oct 4 13:18:48 2023 +0200

    mango: de-duplicate results on returning them in `nouveau`
    
    This is a port of a fix applied to the `text` cursor where
    occasionally duplicated documents were filtered out.  This happens
    when moving between pages and an interleaved update is applied.
---
 src/mango/src/mango_cursor_nouveau.erl | 55 +++++++++++++++++++++-------------
 1 file changed, 35 insertions(+), 20 deletions(-)

diff --git a/src/mango/src/mango_cursor_nouveau.erl b/src/mango/src/mango_cursor_nouveau.erl
index 26358409a..2588e94d9 100644
--- a/src/mango/src/mango_cursor_nouveau.erl
+++ b/src/mango/src/mango_cursor_nouveau.erl
@@ -35,7 +35,8 @@
     user_fun,
     user_acc,
     fields,
-    execution_stats
+    execution_stats,
+    documents_seen
 }).
 
 create(Db, {Indexes, Trace}, Selector, Opts) ->
@@ -103,7 +104,8 @@ execute(Cursor, UserFun, UserAcc) ->
         user_fun = UserFun,
         user_acc = UserAcc,
         fields = Cursor#cursor.fields,
-        execution_stats = mango_execution_stats:log_start(Stats)
+        execution_stats = mango_execution_stats:log_start(Stats),
+        documents_seen = sets:new([{version, 2}])
     },
     try
         case Query of
@@ -171,28 +173,41 @@ handle_hit(CAcc0, Hit, Doc) ->
     #cacc{
         limit = Limit,
         skip = Skip,
-        execution_stats = Stats
+        execution_stats = Stats,
+        documents_seen = Seen
     } = CAcc0,
-    CAcc1 = update_bookmark(CAcc0, Hit),
     Stats1 = mango_execution_stats:incr_docs_examined(Stats),
     couch_stats:increment_counter([mango, docs_examined]),
-    CAcc2 = CAcc1#cacc{execution_stats = Stats1},
-    case mango_selector:match(CAcc2#cacc.selector, Doc) of
-        true when Skip > 0 ->
-            CAcc2#cacc{skip = Skip - 1};
-        true when Limit == 0 ->
-            % We hit this case if the user spcified with a
-            % zero limit. Notice that in this case we need
-            % to return the bookmark from before this match
-            throw({stop, CAcc0});
-        true when Limit == 1 ->
-            NewCAcc = apply_user_fun(CAcc2, Doc),
-            throw({stop, NewCAcc});
-        true when Limit > 1 ->
-            NewCAcc = apply_user_fun(CAcc2, Doc),
-            NewCAcc#cacc{limit = Limit - 1};
+    CAcc1 = CAcc0#cacc{execution_stats = Stats1},
+    case mango_selector:match(CAcc1#cacc.selector, Doc) of
+        true ->
+            DocId = mango_doc:get_field(Doc, <<"_id">>),
+            case sets:is_element(DocId, Seen) of
+                true ->
+                    CAcc1;
+                false ->
+                    CAcc2 = update_bookmark(CAcc1, Hit),
+                    CAcc3 = CAcc2#cacc{
+                        documents_seen = sets:add_element(DocId, Seen)
+                    },
+                    if
+                        Skip > 0 ->
+                            CAcc3#cacc{skip = Skip - 1};
+                        Limit == 0 ->
+                            % We hit this case if the user specified with a
+                            % zero limit. Notice that in this case we need
+                            % to return the bookmark from before this match.
+                            throw({stop, CAcc0});
+                        Limit == 1 ->
+                            CAcc4 = apply_user_fun(CAcc3, Doc),
+                            throw({stop, CAcc4});
+                        Limit > 1 ->
+                            CAcc4 = apply_user_fun(CAcc3, Doc),
+                            CAcc4#cacc{limit = Limit - 1}
+                    end
+            end;
         false ->
-            CAcc2
+            CAcc1
     end.
 
 apply_user_fun(CAcc, Doc) ->