You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@couchdb.apache.org by to...@apache.org on 2021/10/21 00:15:46 UTC

[couchdb] 01/01: fix case_clause when HitId and DocId do not match when include_docs=true

This is an automated email from the ASF dual-hosted git repository.

tonysun83 pushed a commit to branch fix-case-clause-dreyfus
in repository https://gitbox.apache.org/repos/asf/couchdb.git

commit 653ab26dc545a8fbfb07723a64f01080be39a08d
Author: Tony Sun <to...@gmail.com>
AuthorDate: Wed Oct 20 17:07:06 2021 -0700

    fix case_clause when HitId and DocId do not match when include_docs=true
    
    Sometimes, a clouseau index may become out of sync with the db or
    become corrupt under certain conditions (heap exhaustion, garbage collection).
    This leads to certain HitIds that get returned which may not exist
    in the current db. Before this fix, we assume the Hit's Id will
    automatically match an Id return from:
    
    fabric:all_docs(DbName, fun callback/2, [], [{keys, DocIds}, {include_docs, true}]).
    
    When the document does not exist, {error, false} is returned for the row.
    This fix  accounts for this scenario and possible other scenarios
    by returning an error to the user and also logging the error when the
    Hit Id does not match what is returned from the _all_docs call.
---
 src/dreyfus/src/dreyfus_httpd.erl | 49 +++++++++++++++++++++++++++++++++------
 1 file changed, 42 insertions(+), 7 deletions(-)

diff --git a/src/dreyfus/src/dreyfus_httpd.erl b/src/dreyfus/src/dreyfus_httpd.erl
index 007dace..167086c 100644
--- a/src/dreyfus/src/dreyfus_httpd.erl
+++ b/src/dreyfus/src/dreyfus_httpd.erl
@@ -532,16 +532,18 @@ hits_to_json(DbName, IncludeDocs, Hits) ->
     if IncludeDocs ->
         chttpd_stats:incr_reads(length(Hits)),
         {ok, JsonDocs} = dreyfus_fabric:get_json_docs(DbName, Ids),
-        lists:zipwith(fun(Hit, {Id, Doc}) ->
+        lists:zipwith(fun
+            (Hit, {Id, Doc}) ->
                 case Hit of
-                    {Id, Order, Fields} ->
-                        {[{id, Id}, {order, Order}, {fields, {Fields}}, Doc]};
-                    {Id, Order, Fields, Highlights} ->
+                    {HitId, Order, Fields} ->
+                        Doc1 = hits_id_validator(HitId, {Id, Doc}),
+                        {[{id, Id}, {order, Order}, {fields, {Fields}}, Doc1]};
+                    {HitId, Order, Fields, Highlights} ->
+                        Doc1 = hits_id_validator(HitId, {Id, Doc}),
                         {[{id, Id}, {order, Order}, {fields, {Fields}},
-                          {highlights, {Highlights}}, Doc]}
+                          {highlights, {Highlights}}, Doc1]}
                 end
-            end, HitData, JsonDocs);
-
+        end, HitData, JsonDocs);
     true ->
         lists:map(fun(Hit) ->
                 case Hit of
@@ -553,6 +555,21 @@ hits_to_json(DbName, IncludeDocs, Hits) ->
           end, HitData)
     end.
 
+
+% _all_docs returned {error, false} meaning document  doesn't exist
+hits_id_validator(HitId, {error, false}) ->
+    couch_log:error("HitId: ~p does not exist in db. Index may be out of sync or corrupt", [HitId]),
+    {error, <<"not_found">>};
+hits_id_validator(HitId, {error, Reason}) ->
+    couch_log:error("HitId: ~p does not exist in db. Reason: ~p", [HitId, Reason]),
+    {error, Reason};
+hits_id_validator(HitId, {DocId, Doc}) when DocId =:= HitId ->
+    Doc;
+hits_id_validator(HitId, {DocId, _}) ->
+    couch_log:error("HitId: ~p DocId: ~p mismatch. Index maybe out of sync or corrupt.", [HitId, DocId]),
+    {error, <<"docid mismatch">>}.
+
+
 get_hit_data(Hit) ->
     Id = couch_util:get_value(<<"_id">>, Hit#hit.fields),
     Fields = lists:keydelete(<<"_id">>, 1, Hit#hit.fields),
@@ -564,6 +581,7 @@ get_hit_data(Hit) ->
             {Id, {Id, Hit#hit.order, Fields0, Highlights}}
     end.
 
+
 group_to_json(DbName, IncludeDocs, {Name, TotalHits, Hits}, UseNewApi) ->
     {TotalHitsKey, HitsKey} = case UseNewApi of
         true -> {total_rows, rows};
@@ -612,3 +630,20 @@ backoff_and_retry(Req, Db, DDoc, RetryCount, RetryPause, Error) ->
             timer:sleep(RetryPause),
             handle_search_req(Req, Db, DDoc, RetryCount + 1, RetryPause * 2)
     end.
+
+
+-ifdef(TEST).
+-include_lib("eunit/include/eunit.hrl").
+
+hits_id_validator_test() ->
+    Doc = {doc, {[
+        {<<"_id">>, <<"foo">>},
+        {<<"_rev">>, <<"1-4c6114c65e295552ab1019e2b046b10e">>},
+        {<<"foo">>, <<"bar">>}
+    ]}},
+    ?assertEqual(Doc, hits_id_validator(<<"foo">>, {<<"foo">>, Doc})),
+    ?assertEqual({error, <<"not_found">>}, hits_id_validator(<<"foo>">>, {error, false})),
+    ?assertEqual({error, some_reason}, hits_id_validator(<<"foo>">>, {error, some_reason})),
+    ?assertEqual({error, <<"docid mismatch">>}, hits_id_validator(<<"bar">>, {<<"foo">>, Doc})).
+
+-endif.
\ No newline at end of file