You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@couchdb.apache.org by va...@apache.org on 2022/11/08 14:42:34 UTC

[couchdb] branch optimize-smoosh updated (d7feed4ac -> 7eaad646b)

This is an automated email from the ASF dual-hosted git repository.

vatamane pushed a change to branch optimize-smoosh
in repository https://gitbox.apache.org/repos/asf/couchdb.git


    from d7feed4ac Improve smoosh_priority_queue
     new 3d04371b2 Add warning comment about couch_db:get_design_docs/1
     new 7eaad646b Improve fabric index cleanup

The 2 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.


Summary of changes:
 src/couch/src/couch_db.erl                         |   5 +
 src/couch/test/exunit/fabric_test.exs              | 101 ---------
 src/couch_mrview/src/couch_mrview_cleanup.erl      |  97 +++++----
 src/couch_mrview/src/couch_mrview_util.erl         |  93 ++++++--
 .../test/eunit/couch_mrview_util_tests.erl         | 139 ++++++++++++
 src/fabric/src/fabric.erl                          |  74 ++-----
 src/fabric/test/eunit/fabric_tests.erl             | 237 ++++++++++++++++++---
 7 files changed, 500 insertions(+), 246 deletions(-)
 delete mode 100644 src/couch/test/exunit/fabric_test.exs


[couchdb] 02/02: Improve fabric index cleanup

Posted by va...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

vatamane pushed a commit to branch optimize-smoosh
in repository https://gitbox.apache.org/repos/asf/couchdb.git

commit 7eaad646b0fd0eaadbd7045d84e088dcc1b9a3c4
Author: Nick Vatamaniuc <va...@gmail.com>
AuthorDate: Mon Nov 7 22:42:14 2022 -0500

    Improve fabric index cleanup
    
     * Clean-up stale view purge checkpoints. Previously we didn't, and purge
       progress could have stalled purge progress by keeping around inactive
       lagging purge checkpoints.
    
     * couch_mrview_cleanup attempted to clean purge checkpoints but that didn't
       work for clustered databases, only for local ones. Nowadays most dbs
       are clustered so make sure those work as well.
    
     * DRY-out code from both fabric inactive index cleanup and
       couch_mrview_cleanup modules. Move some of the common code to
       couch_mrview_util module. couch_mrvew_cleanup is the only place in charge
       the cleanup logic now.
    
     * Consolidate and improve tests. Utility functions to get all index files,
       purge checkpoint and signatures are now tested with couch_mrview_util tests,
       and end-to-end fabric cleanup tests are in fabric_tests. Since fabirc_tests
       covers all the test scenarios from fabric_test.exs, remove fabric_test.exs
       so we don't have test duplicated and get same coverage.
---
 src/couch/test/exunit/fabric_test.exs              | 101 ---------
 src/couch_mrview/src/couch_mrview_cleanup.erl      |  97 +++++----
 src/couch_mrview/src/couch_mrview_util.erl         |  93 ++++++--
 .../test/eunit/couch_mrview_util_tests.erl         | 139 ++++++++++++
 src/fabric/src/fabric.erl                          |  74 ++-----
 src/fabric/test/eunit/fabric_tests.erl             | 237 ++++++++++++++++++---
 6 files changed, 495 insertions(+), 246 deletions(-)

diff --git a/src/couch/test/exunit/fabric_test.exs b/src/couch/test/exunit/fabric_test.exs
deleted file mode 100644
index bdb84e9a2..000000000
--- a/src/couch/test/exunit/fabric_test.exs
+++ /dev/null
@@ -1,101 +0,0 @@
-defmodule Couch.Test.Fabric do
-  use Couch.Test.ExUnit.Case
-  alias Couch.Test.Utils
-
-  alias Couch.Test.Setup
-
-  alias Couch.Test.Setup.Step
-
-  import Couch.DBTest
-
-  import Utils
-
-  @admin {:user_ctx, user_ctx(roles: ["_admin"])}
-
-  def with_db(context, setup) do
-    setup =
-      setup
-      |> Setup.Common.with_db()
-      |> Setup.run()
-
-    context =
-      Map.merge(context, %{
-        db_name: setup |> Setup.get(:db) |> Step.Create.DB.name()
-      })
-
-    {context, setup}
-  end
-
-  describe "Fabric miscellaneous API" do
-    @describetag setup: &__MODULE__.with_db/2
-    test "Get inactive_index_files", ctx do
-      {:ok, _rev} = update_doc(ctx.db_name, %{"_id" => "doc1"})
-
-      design_doc = %{
-        "_id" => "_design/test",
-        "language" => "javascript",
-        "views" => %{
-          "view" => %{
-            "map" => "function(doc){emit(doc._id, doc._rev)}"
-          }
-        }
-      }
-
-      {:ok, rev1} = update_doc(ctx.db_name, design_doc)
-      wait_sig_update(ctx.db_name, "test", "")
-      prev_active = get_active_sig(ctx.db_name, "test")
-
-      updated_design_doc =
-        put_in(design_doc, ["views", "view", "map"], "function(doc){emit(doc._id, null)}")
-
-      {:ok, rev2} =
-        update_doc(
-          ctx.db_name,
-          Map.put(updated_design_doc, "_rev", rev1)
-        )
-
-      assert rev1 != rev2
-      wait_sig_update(ctx.db_name, "test", prev_active)
-
-      {:ok, info} = :fabric.get_view_group_info(ctx.db_name, "_design/test")
-      active = info[:signature]
-
-      files = Enum.map(:fabric.inactive_index_files(ctx.db_name), &List.to_string/1)
-
-      assert [] != files, "We should have some inactive"
-
-      assert not Enum.any?(files, fn
-               file_path -> String.contains?(file_path, active)
-             end),
-             "We are not suppose to return active views"
-
-      assert Enum.all?(files, fn
-               file_path -> String.contains?(file_path, prev_active)
-             end),
-             "We expect all files to contain previous active signature"
-    end
-  end
-
-  defp update_doc(db_name, body) do
-    json_body = :jiffy.decode(:jiffy.encode(body))
-
-    case :fabric.update_doc(db_name, json_body, [@admin]) do
-      {:ok, rev} ->
-        {:ok, :couch_doc.rev_to_str(rev)}
-
-      error ->
-        error
-    end
-  end
-
-  defp get_active_sig(db_name, ddoc_id) do
-    {:ok, info} = :fabric.get_view_group_info(db_name, "_design/#{ddoc_id}")
-    info[:signature]
-  end
-
-  defp wait_sig_update(db_name, ddoc_id, prev_active) do
-    retry_until(fn ->
-      get_active_sig(db_name, ddoc_id) != prev_active
-    end)
-  end
-end
diff --git a/src/couch_mrview/src/couch_mrview_cleanup.erl b/src/couch_mrview/src/couch_mrview_cleanup.erl
index 417605c55..5b5afbdce 100644
--- a/src/couch_mrview/src/couch_mrview_cleanup.erl
+++ b/src/couch_mrview/src/couch_mrview_cleanup.erl
@@ -12,57 +12,62 @@
 
 -module(couch_mrview_cleanup).
 
--export([run/1]).
+-export([
+    run/1,
+    cleanup_purges/3,
+    cleanup_indices/2
+]).
 
 -include_lib("couch/include/couch_db.hrl").
--include_lib("couch_mrview/include/couch_mrview.hrl").
 
 run(Db) ->
-    RootDir = couch_index_util:root_dir(),
-    DbName = couch_db:name(Db),
+    Indices = couch_mrview_util:get_index_files(Db),
+    Checkpoints = couch_mrview_util:get_purge_checkpoints(Db),
+    {ok, Db1} = couch_db:reopen(Db),
+    Sigs = couch_mrview_util:get_signatures(Db1),
+    ok = cleanup_purges(Db1, Sigs, Checkpoints),
+    ok = cleanup_indices(Sigs, Indices).
 
-    {ok, DesignDocs} = couch_db:get_design_docs(Db),
-    SigFiles = lists:foldl(
-        fun(DDocInfo, SFAcc) ->
-            {ok, DDoc} = couch_db:open_doc_int(Db, DDocInfo, [ejson_body]),
-            {ok, InitState} = couch_mrview_util:ddoc_to_mrst(DbName, DDoc),
-            Sig = InitState#mrst.sig,
-            IFName = couch_mrview_util:index_file(DbName, Sig),
-            CFName = couch_mrview_util:compaction_file(DbName, Sig),
-            [IFName, CFName | SFAcc]
-        end,
-        [],
-        [DD || DD <- DesignDocs, DD#full_doc_info.deleted == false]
-    ),
+cleanup_purges(DbName, Sigs, Checkpoints) when is_binary(DbName) ->
+    couch_util:with_db(DbName, fun(Db) ->
+        cleanup_purges(Db, Sigs, Checkpoints)
+    end);
+cleanup_purges(Db, #{} = Sigs, #{} = CheckpointsMap) ->
+    InactiveMap = maps:without(maps:keys(Sigs), CheckpointsMap),
+    InactiveCheckpoints = maps:values(InactiveMap),
+    DeleteFun = fun(DocId) -> delete_checkpoint(Db, DocId) end,
+    lists:foreach(DeleteFun, InactiveCheckpoints).
 
-    IdxDir = couch_index_util:index_dir(mrview, DbName),
-    DiskFiles = filelib:wildcard(filename:join(IdxDir, "*")),
+cleanup_indices(#{} = Sigs, #{} = IndexMap) ->
+    Fun = fun(_, Files) -> lists:foreach(fun delete_file/1, Files) end,
+    maps:map(Fun, maps:without(maps:keys(Sigs), IndexMap)),
+    ok.
 
-    % We need to delete files that have no ddoc.
-    ToDelete = DiskFiles -- SigFiles,
+delete_file(File) ->
+    RootDir = couch_index_util:root_dir(),
+    couch_log:debug("~p : deleting inactive index : ~s", [?MODULE, File]),
+    try
+        couch_file:delete(RootDir, File, [sync])
+    catch
+        Tag:Error ->
+            ErrLog = "~p : error deleting inactive index file ~s ~p:~p",
+            couch_log:error(ErrLog, [?MODULE, File, Tag, Error]),
+            ok
+    end.
 
-    lists:foreach(
-        fun(FN) ->
-            couch_log:debug("Deleting stale view file: ~s", [FN]),
-            couch_file:delete(RootDir, FN, [sync]),
-            case couch_mrview_util:verify_view_filename(FN) of
-                true ->
-                    Sig = couch_mrview_util:get_signature_from_filename(FN),
-                    DocId = couch_mrview_util:get_local_purge_doc_id(Sig),
-                    case couch_db:open_doc(Db, DocId, []) of
-                        {ok, LocalPurgeDoc} ->
-                            couch_db:update_doc(
-                                Db,
-                                LocalPurgeDoc#doc{deleted = true},
-                                [?ADMIN_CTX]
-                            );
-                        {not_found, _} ->
-                            ok
-                    end;
-                false ->
-                    ok
-            end
-        end,
-        ToDelete
-    ),
-    ok.
+delete_checkpoint(Db, DocId) ->
+    DbName = couch_db:name(Db),
+    LogMsg = "~p : deleting inactive purge checkpoint ~s : ~s",
+    couch_log:debug(LogMsg, [?MODULE, DbName, DocId]),
+    try couch_db:open_doc(Db, DocId, []) of
+        {ok, Doc = #doc{}} ->
+            Deleted = Doc#doc{deleted = true, body = {[]}},
+            couch_db:update_doc(Db, Deleted, [?ADMIN_CTX]);
+        {not_found, _} ->
+            ok
+    catch
+        Tag:Error ->
+            ErrLog = "~p : error deleting checkpoint ~s : ~s error: ~p:~p",
+            couch_log:error(ErrLog, [?MODULE, DbName, DocId, Tag, Error]),
+            ok
+    end.
diff --git a/src/couch_mrview/src/couch_mrview_util.erl b/src/couch_mrview/src/couch_mrview_util.erl
index 9e3d292ed..65e1dd24c 100644
--- a/src/couch_mrview/src/couch_mrview_util.erl
+++ b/src/couch_mrview/src/couch_mrview_util.erl
@@ -15,6 +15,7 @@
 -export([get_view/4, get_view_index_pid/4]).
 -export([get_local_purge_doc_id/1, get_value_from_options/2]).
 -export([verify_view_filename/1, get_signature_from_filename/1]).
+-export([get_signatures/1, get_purge_checkpoints/1, get_index_files/1]).
 -export([ddoc_to_mrst/2, init_state/4, reset_index/3]).
 -export([make_header/1]).
 -export([index_file/2, compaction_file/2, open_file/1]).
@@ -53,6 +54,11 @@
         true -> B
     end)
 ).
+-define(IS_HEX(C),
+    ((C >= $0 andalso C =< $9) orelse
+        (C >= $a andalso C =< $f) orelse
+        (C >= $A andalso C =< $F))
+).
 
 -include_lib("couch/include/couch_db.hrl").
 -include_lib("couch_mrview/include/couch_mrview.hrl").
@@ -70,31 +76,78 @@ get_value_from_options(Key, Options) ->
     end.
 
 verify_view_filename(FileName) ->
-    FilePathList = filename:split(FileName),
-    PureFN = lists:last(FilePathList),
-    case filename:extension(PureFN) of
+    case filename:extension(FileName) of
         ".view" ->
-            Sig = filename:basename(PureFN),
-            case
-                [
-                    Ch
-                 || Ch <- Sig,
-                    not (((Ch >= $0) and (Ch =< $9)) orelse
-                        ((Ch >= $a) and (Ch =< $f)) orelse
-                        ((Ch >= $A) and (Ch =< $F)))
-                ] == []
-            of
-                true -> true;
-                false -> false
-            end;
+            Sig = get_signature_from_filename(FileName),
+            lists:all(fun(C) -> ?IS_HEX(C) end, Sig);
         _ ->
             false
     end.
 
-get_signature_from_filename(FileName) ->
-    FilePathList = filename:split(FileName),
-    PureFN = lists:last(FilePathList),
-    filename:basename(PureFN, ".view").
+get_signature_from_filename(Path) ->
+    filename:basename(filename:basename(Path, ".view"), ".compact").
+
+% Returns map of `Sig => true` elements with all the active signatures..
+% Sig is a hex-encoded binary.
+%
+get_signatures(DbName) when is_binary(DbName) ->
+    couch_util:with_db(DbName, fun get_signatures/1);
+get_signatures(Db) ->
+    DbName = couch_db:name(Db),
+    % get_design_docs/1 returns ejson for clustered shards, and
+    % #full_doc_info{}'s for other cases.
+    {ok, DDocs} = couch_db:get_design_docs(Db),
+    FoldFun = fun
+        ({[_ | _]} = EJsonDoc, Acc) ->
+            Doc = couch_doc:from_json_obj(EJsonDoc),
+            {ok, Mrst} = ddoc_to_mrst(DbName, Doc),
+            Sig = couch_util:to_hex_bin(Mrst#mrst.sig),
+            Acc#{Sig => true};
+        (#full_doc_info{} = FDI, Acc) ->
+            {ok, Doc} = couch_db:open_doc_int(Db, FDI, [ejson_body]),
+            {ok, Mrst} = ddoc_to_mrst(DbName, Doc),
+            Sig = couch_util:to_hex_bin(Mrst#mrst.sig),
+            Acc#{Sig => true}
+    end,
+    lists:foldl(FoldFun, #{}, DDocs).
+
+% Returns a map of `Sig => DocId` elements for all the purge view
+% checkpoint docs. Sig is a hex-encoded binary.
+%
+get_purge_checkpoints(DbName) when is_binary(DbName) ->
+    couch_util:with_db(DbName, fun get_purge_checkpoints/1);
+get_purge_checkpoints(Db) ->
+    FoldFun = fun(#doc{id = Id}, Acc) ->
+        case Id of
+            <<?LOCAL_DOC_PREFIX, "purge-mrview-", Sig/binary>> ->
+                {ok, Acc#{Sig => Id}};
+            _ ->
+                {stop, Acc}
+        end
+    end,
+    Opts = [{start_key, <<?LOCAL_DOC_PREFIX, "purge-mrview-">>}],
+    {ok, Signatures = #{}} = couch_db:fold_local_docs(Db, FoldFun, #{}, Opts),
+    Signatures.
+
+% Returns a map of `Sig => [FilePath, ...]` elements. Sig is a hex-encoded
+% binary and Filepsignature as a binary but FilePaths are lists as they
+% intended to be passed to couch_file and file module functions.
+%
+get_index_files(DbName) when is_binary(DbName) ->
+    IdxDir = couch_index_util:index_dir(mrview, DbName),
+    WildcardPath = filename:join(IdxDir, "*"),
+    FoldFun = fun(F, Acc) ->
+        case verify_view_filename(F) of
+            true ->
+                Sig = ?l2b(get_signature_from_filename(F)),
+                maps:update_with(Sig, fun(Fs) -> [F | Fs] end, [F], Acc);
+            false ->
+                Acc
+        end
+    end,
+    lists:foldl(FoldFun, #{}, filelib:wildcard(WildcardPath));
+get_index_files(Db) ->
+    get_index_files(couch_db:name(Db)).
 
 get_view(Db, DDoc, ViewName, Args0) ->
     case get_view_index_state(Db, DDoc, ViewName, Args0) of
diff --git a/src/couch_mrview/test/eunit/couch_mrview_util_tests.erl b/src/couch_mrview/test/eunit/couch_mrview_util_tests.erl
index a495fd82c..cda80255c 100644
--- a/src/couch_mrview/test/eunit/couch_mrview_util_tests.erl
+++ b/src/couch_mrview/test/eunit/couch_mrview_util_tests.erl
@@ -13,8 +13,11 @@
 -module(couch_mrview_util_tests).
 
 -include_lib("couch/include/couch_eunit.hrl").
+-include_lib("couch/include/couch_db.hrl").
 -include_lib("couch_mrview/include/couch_mrview.hrl").
 
+-define(DDOC_ID, <<"_design/bar">>).
+
 couch_mrview_util_test_() ->
     [
         ?_assertEqual(0, validate_group_level(undefined, undefined)),
@@ -35,3 +38,139 @@ validate_group_level(Group, GroupLevel) ->
     Args0 = #mrargs{group = Group, group_level = GroupLevel, view_type = red},
     Args1 = couch_mrview_util:validate_args(Args0),
     Args1#mrargs.group_level.
+
+get_signature_from_filename_test() ->
+    Sig = "da817c3d3f7413c1a610f25635a0c521",
+    P1 = "/x.1667618375_design/mrview/da817c3d3f7413c1a610f25635a0c521.view",
+    P2 = "/x.1667618375_design/mrview/da817c3d3f7413c1a610f25635a0c521.compact.view",
+    P3 = "/x.1667618375_design/mrview/da817c3d3f7413c1a610f25635a0c521",
+    ?assertEqual(Sig, couch_mrview_util:get_signature_from_filename(P1)),
+    ?assertEqual(Sig, couch_mrview_util:get_signature_from_filename(P2)),
+    ?assertEqual(Sig, couch_mrview_util:get_signature_from_filename(P3)).
+
+verify_view_filename_test() ->
+    P1 = "/x.1667618375_design/mrview/da817c3d3f7413c1a610f25635a0c521.view",
+    P2 = "/x.1667618375_design/mrview/da817c3d3f7413c1a610f25635a0c521.compact.view",
+    P3 = "/x.1667618375_design/mrview/da817c3d3f7413c1a610f25635a0c521",
+    ?assert(couch_mrview_util:verify_view_filename(P1)),
+    ?assert(couch_mrview_util:verify_view_filename(P2)),
+    ?assertNot(couch_mrview_util:verify_view_filename(P3)),
+    ?assertNot(couch_mrview_util:verify_view_filename("")),
+    ?assertNot(couch_mrview_util:verify_view_filename("foo.view")).
+
+setup() ->
+    DbName = ?tempdb(),
+    ok = fabric:create_db(DbName, [?ADMIN_CTX, {q, 2}]),
+    DDoc = couch_mrview_test_util:ddoc(map),
+    {ok, _} = fabric:update_doc(DbName, DDoc, [?ADMIN_CTX]),
+    {ok, _} = fabric:query_view(DbName, <<"bar">>, <<"baz">>, #mrargs{}),
+    {ok, Db} = couch_mrview_test_util:init_db(?tempdb(), map),
+    {ok, _} = couch_mrview:query_view(Db, ?DDOC_ID, <<"baz">>),
+    {DbName, Db}.
+
+teardown({DbName, Db}) ->
+    couch_db:close(Db),
+    couch_server:delete(couch_db:name(Db), [?ADMIN_CTX]),
+    ok = fabric:delete_db(DbName, [?ADMIN_CTX]).
+
+get_signatures_test_() ->
+    {
+        setup,
+        fun() -> test_util:start_couch([fabric]) end,
+        fun test_util:stop_couch/1,
+        {
+            foreach,
+            fun setup/0,
+            fun teardown/1,
+            [
+                ?TDEF_FE(t_get_signatures_local),
+                ?TDEF_FE(t_get_signatures_clustered),
+                ?TDEF_FE(t_get_purge_checkpoints_local),
+                ?TDEF_FE(t_get_purge_checkpoints_clustered),
+                ?TDEF_FE(t_get_index_files_local),
+                ?TDEF_FE(t_get_index_files_clustered)
+            ]
+        }
+    }.
+
+t_get_signatures_local({_, Db}) ->
+    DbName = couch_db:name(Db),
+    Sigs = couch_mrview_util:get_signatures(DbName),
+    ?assert(is_map(Sigs)),
+    ?assertEqual(1, map_size(Sigs)),
+    [{Sig, true}] = maps:to_list(Sigs),
+    {ok, Info} = couch_mrview:get_info(Db, ?DDOC_ID),
+    ?assertEqual(proplists:get_value(signature, Info), Sig),
+
+    {ok, DDoc} = couch_db:open_doc(Db, ?DDOC_ID, [?ADMIN_CTX]),
+    Deleted = DDoc#doc{deleted = true, body = {[]}},
+    {ok, _} = couch_db:update_doc(Db, Deleted, []),
+    ?assertEqual(#{}, couch_mrview_util:get_signatures(DbName)).
+
+t_get_signatures_clustered({DbName, _Db}) ->
+    [Shard1, Shard2] = mem3:local_shards(DbName),
+    ShardName1 = mem3:name(Shard1),
+    ShardName2 = mem3:name(Shard2),
+    Sigs = couch_mrview_util:get_signatures(ShardName1),
+    ?assertEqual(Sigs, couch_mrview_util:get_signatures(ShardName2)),
+    ?assert(is_map(Sigs)),
+    ?assertEqual(1, map_size(Sigs)),
+    [{Sig, true}] = maps:to_list(Sigs),
+    {ok, Info} = couch_mrview:get_info(ShardName1, ?DDOC_ID),
+    ?assertEqual(proplists:get_value(signature, Info), Sig),
+
+    {ok, DDoc} = fabric:open_doc(DbName, ?DDOC_ID, [?ADMIN_CTX]),
+    Deleted = DDoc#doc{deleted = true, body = {[]}},
+    {ok, _} = fabric:update_doc(DbName, Deleted, [?ADMIN_CTX]),
+    ?assertEqual(#{}, couch_mrview_util:get_signatures(ShardName1)),
+    ?assertEqual(#{}, couch_mrview_util:get_signatures(ShardName2)).
+
+t_get_purge_checkpoints_local({_, Db}) ->
+    DbName = couch_db:name(Db),
+    Sigs = couch_mrview_util:get_purge_checkpoints(DbName),
+    ?assert(is_map(Sigs)),
+    ?assertEqual(1, map_size(Sigs)),
+    [{Sig, <<"_local/", _/binary>>}] = maps:to_list(Sigs),
+    {ok, Info} = couch_mrview:get_info(Db, ?DDOC_ID),
+    ?assertEqual(proplists:get_value(signature, Info), Sig).
+
+t_get_purge_checkpoints_clustered({DbName, _Db}) ->
+    {ok, _} = fabric:query_view(DbName, <<"bar">>, <<"baz">>, #mrargs{}),
+    [Shard1, Shard2] = mem3:local_shards(DbName),
+    ShardName1 = mem3:name(Shard1),
+    ShardName2 = mem3:name(Shard2),
+    Sigs1 = couch_mrview_util:get_purge_checkpoints(ShardName1),
+    Sigs2 = couch_mrview_util:get_purge_checkpoints(ShardName2),
+    ?assertEqual(lists:sort(maps:keys(Sigs1)), lists:sort(maps:keys(Sigs2))),
+    ?assert(is_map(Sigs1)),
+    ?assertEqual(1, map_size(Sigs1)),
+    [{Sig, <<"_local/", _/binary>>}] = maps:to_list(Sigs1),
+    {ok, Info} = couch_mrview:get_info(ShardName1, ?DDOC_ID),
+    ?assertEqual(proplists:get_value(signature, Info), Sig).
+
+t_get_index_files_local({_, Db}) ->
+    DbName = couch_db:name(Db),
+    SigFilesMap = couch_mrview_util:get_index_files(DbName),
+    ?assert(is_map(SigFilesMap)),
+    ?assertEqual(1, map_size(SigFilesMap)),
+    [{Sig, [File]}] = maps:to_list(SigFilesMap),
+    ?assertMatch({ok, _}, file:read_file_info(File)),
+    {ok, Info} = couch_mrview:get_info(Db, ?DDOC_ID),
+    ?assertEqual(proplists:get_value(signature, Info), Sig).
+
+t_get_index_files_clustered({DbName, _Db}) ->
+    {ok, _} = fabric:query_view(DbName, <<"bar">>, <<"baz">>, #mrargs{}),
+    [Shard1, Shard2] = mem3:local_shards(DbName),
+    ShardName1 = mem3:name(Shard1),
+    ShardName2 = mem3:name(Shard2),
+    SigFilesMap1 = couch_mrview_util:get_index_files(ShardName1),
+    SigFilesMap2 = couch_mrview_util:get_index_files(ShardName2),
+    SigKeys1 = lists:sort(maps:keys(SigFilesMap1)),
+    SigKeys2 = lists:sort(maps:keys(SigFilesMap2)),
+    ?assertEqual(SigKeys1, SigKeys2),
+    ?assert(is_map(SigFilesMap1)),
+    ?assertEqual(1, map_size(SigFilesMap1)),
+    [{Sig, [File]}] = maps:to_list(SigFilesMap1),
+    ?assertMatch({ok, _}, file:read_file_info(File)),
+    {ok, Info} = couch_mrview:get_info(ShardName1, ?DDOC_ID),
+    ?assertEqual(proplists:get_value(signature, Info), Sig).
diff --git a/src/fabric/src/fabric.erl b/src/fabric/src/fabric.erl
index e61521da0..acd7f1190 100644
--- a/src/fabric/src/fabric.erl
+++ b/src/fabric/src/fabric.erl
@@ -65,7 +65,6 @@
     cleanup_index_files/1,
     cleanup_index_files_all_nodes/1,
     dbname/1,
-    inactive_index_files/1,
     db_uuids/1
 ]).
 
@@ -586,58 +585,34 @@ cleanup_index_files() ->
 -spec cleanup_index_files(dbname()) -> ok.
 cleanup_index_files(DbName) ->
     try
-        lists:foreach(
-            fun(File) ->
-                file:delete(File)
-            end,
-            inactive_index_files(DbName)
-        )
+        ShardNames = [mem3:name(S) || S <- mem3:local_shards(dbname(DbName))],
+        cleanup_local_indices_and_purge_checkpoints(ShardNames)
     catch
-        error:Error ->
-            couch_log:error(
-                "~p:cleanup_index_files. Error: ~p",
-                [?MODULE, Error]
-            ),
+        error:database_does_not_exist ->
             ok
     end.
 
-%% @doc inactive index files for a specific db
--spec inactive_index_files(dbname()) -> ok.
-inactive_index_files(DbName) ->
-    {ok, DesignDocs} = fabric:design_docs(DbName),
-
-    ActiveSigs = maps:from_list(
-        lists:map(
-            fun(#doc{id = GroupId}) ->
-                {ok, Info} = fabric:get_view_group_info(DbName, GroupId),
-                {binary_to_list(couch_util:get_value(signature, Info)), nil}
-            end,
-            [couch_doc:from_json_obj(DD) || DD <- DesignDocs]
-        )
-    ),
-
-    FileList = lists:flatmap(
-        fun(#shard{name = ShardName}) ->
-            IndexDir = couch_index_util:index_dir(mrview, ShardName),
-            filelib:wildcard([IndexDir, "/*"])
-        end,
-        mem3:local_shards(dbname(DbName))
-    ),
+cleanup_local_indices_and_purge_checkpoints([]) ->
+    ok;
+cleanup_local_indices_and_purge_checkpoints([_ | _] = Dbs) ->
+    AllIndices = lists:map(fun couch_mrview_util:get_index_files/1, Dbs),
+    AllPurges = lists:map(fun couch_mrview_util:get_purge_checkpoints/1, Dbs),
+    Sigs = couch_mrview_util:get_signatures(hd(Dbs)),
+    ok = cleanup_purges(Sigs, AllPurges, Dbs),
+    ok = cleanup_indices(Sigs, AllIndices).
+
+cleanup_purges(Sigs, AllPurges, Dbs) ->
+    Fun = fun(DbPurges, Db) ->
+        couch_mrview_cleanup:cleanup_purges(Db, Sigs, DbPurges)
+    end,
+    lists:zipwith(Fun, AllPurges, Dbs),
+    ok.
 
-    if
-        ActiveSigs =:= [] ->
-            FileList;
-        true ->
-            %% <sig>.view and <sig>.compact.view where <sig> is in ActiveSigs
-            %% will be excluded from FileList because they are active view
-            %% files and should not be deleted.
-            lists:filter(
-                fun(FilePath) ->
-                    not maps:is_key(get_view_sig_from_filename(FilePath), ActiveSigs)
-                end,
-                FileList
-            )
-    end.
+cleanup_indices(Sigs, AllIndices) ->
+    Fun = fun(DbIndices) ->
+        couch_mrview_cleanup:cleanup_indices(Sigs, DbIndices)
+    end,
+    lists:foreach(Fun, AllIndices).
 
 %% @doc clean up index files for a specific db on all nodes
 -spec cleanup_index_files_all_nodes(dbname()) -> [reference()].
@@ -791,9 +766,6 @@ kl_to_record(KeyList, RecName) ->
 set_namespace(NS, #mrargs{extra = Extra} = Args) ->
     Args#mrargs{extra = [{namespace, NS} | Extra]}.
 
-get_view_sig_from_filename(FilePath) ->
-    filename:basename(filename:basename(FilePath, ".view"), ".compact").
-
 -ifdef(TEST).
 -include_lib("eunit/include/eunit.hrl").
 
diff --git a/src/fabric/test/eunit/fabric_tests.erl b/src/fabric/test/eunit/fabric_tests.erl
index c0e2b626b..9679806ef 100644
--- a/src/fabric/test/eunit/fabric_tests.erl
+++ b/src/fabric/test/eunit/fabric_tests.erl
@@ -12,48 +12,229 @@
 
 -module(fabric_tests).
 
+-include_lib("couch/include/couch_db.hrl").
 -include_lib("couch/include/couch_eunit.hrl").
 
 cleanup_index_files_test_() ->
     {
-        setup,
+        foreach,
         fun setup/0,
         fun teardown/1,
-        fun(Ctx) ->
-            [
-                t_cleanup_index_files(),
-                t_cleanup_index_files_with_existing_db(Ctx),
-                t_cleanup_index_files_with_deleted_db(Ctx)
-            ]
-        end
+        [
+            ?TDEF_FE(t_cleanup_index_files),
+            ?TDEF_FE(t_cleanup_index_files_with_existing_db),
+            ?TDEF_FE(t_cleanup_index_files_with_view_data),
+            ?TDEF_FE(t_cleanup_index_files_with_deleted_db),
+            ?TDEF_FE(t_cleanup_index_file_after_ddoc_update),
+            ?TDEF_FE(t_cleanup_index_file_after_ddoc_delete)
+        ]
     }.
 
 setup() ->
     Ctx = test_util:start_couch([fabric]),
-    % TempDb is deleted in the test "t_cleanup_index_files_with_deleted_db".
-    TempDb = ?tempdb(),
-    fabric:create_db(TempDb),
-    {Ctx, TempDb}.
+    DbName = ?tempdb(),
+    fabric:create_db(DbName, [{q, 1}]),
+    create_ddoc(DbName, <<"_design/foo">>, <<"bar">>),
+    {ok, _} = fabric:query_view(DbName, <<"foo">>, <<"bar">>),
+    create_ddoc(DbName, <<"_design/boo">>, <<"baz">>),
+    {ok, _} = fabric:query_view(DbName, <<"boo">>, <<"baz">>),
+    {Ctx, DbName}.
 
-teardown({Ctx, _TempDb}) ->
+teardown({Ctx, DbName}) ->
+    fabric:delete_db(DbName),
     test_util:stop_couch(Ctx).
 
-t_cleanup_index_files() ->
-    ?_assert(
-        lists:all(fun(Res) -> Res =:= ok end, fabric:cleanup_index_files())
+t_cleanup_index_files(_) ->
+    CheckFun = fun(Res) -> Res =:= ok end,
+    ?assert(lists:all(CheckFun, fabric:cleanup_index_files())).
+
+t_cleanup_index_files_with_existing_db({_, DbName}) ->
+    ?assertEqual(ok, fabric:cleanup_index_files(DbName)).
+
+t_cleanup_index_files_with_view_data({_, DbName}) ->
+    Sigs = sigs(DbName),
+    Indices = indices(DbName),
+    Purges = purges(DbName),
+    ok = fabric:cleanup_index_files(DbName),
+    % We haven't inadvertently removed any active index bits
+    ?assertEqual(Sigs, sigs(DbName)),
+    ?assertEqual(Indices, indices(DbName)),
+    ?assertEqual(Purges, purges(DbName)).
+
+t_cleanup_index_files_with_deleted_db(_) ->
+    SomeDb = ?tempdb(),
+    ?assertEqual(ok, fabric:cleanup_index_files(SomeDb)).
+
+t_cleanup_index_file_after_ddoc_update({_, DbName}) ->
+    ?assertEqual(
+        [
+            "4bcdf852098ff6b0578ddf472c320e9c.view",
+            "da817c3d3f7413c1a610f25635a0c521.view"
+        ],
+        indices(DbName)
+    ),
+    ?assertEqual(
+        [
+            <<"_local/purge-mrview-4bcdf852098ff6b0578ddf472c320e9c">>,
+            <<"_local/purge-mrview-da817c3d3f7413c1a610f25635a0c521">>
+        ],
+        purges(DbName)
+    ),
+
+    update_ddoc(DbName, <<"_design/foo">>, <<"bar1">>),
+    ok = fabric:cleanup_index_files(DbName),
+    {ok, _} = fabric:query_view(DbName, <<"foo">>, <<"bar1">>),
+
+    % One 4bc stays, da8 should  gone and 9e3 is added
+    ?assertEqual(
+        [
+            "4bcdf852098ff6b0578ddf472c320e9c.view",
+            "9e355b0fee411b4257036b8fca56f263.view"
+        ],
+        indices(DbName)
+    ),
+    ?assertEqual(
+        [
+            <<"_local/purge-mrview-4bcdf852098ff6b0578ddf472c320e9c">>,
+            <<"_local/purge-mrview-9e355b0fee411b4257036b8fca56f263">>
+        ],
+        purges(DbName)
     ).
 
-t_cleanup_index_files_with_existing_db({_Ctx, TempDb}) ->
-    ?_assertEqual(ok, fabric:cleanup_index_files(TempDb)).
+t_cleanup_index_file_after_ddoc_delete({_, DbName}) ->
+    ?assertEqual(
+        [
+            "4bcdf852098ff6b0578ddf472c320e9c.view",
+            "da817c3d3f7413c1a610f25635a0c521.view"
+        ],
+        indices(DbName)
+    ),
+    ?assertEqual(
+        [
+            <<"_local/purge-mrview-4bcdf852098ff6b0578ddf472c320e9c">>,
+            <<"_local/purge-mrview-da817c3d3f7413c1a610f25635a0c521">>
+        ],
+        purges(DbName)
+    ),
+
+    delete_ddoc(DbName, <<"_design/foo">>),
+    ok = fabric:cleanup_index_files(DbName),
+
+    % 4bc stays the same, da8 should be gone
+    ?assertEqual(
+        [
+            "4bcdf852098ff6b0578ddf472c320e9c.view"
+        ],
+        indices(DbName)
+    ),
+    ?assertEqual(
+        [
+            <<"_local/purge-mrview-4bcdf852098ff6b0578ddf472c320e9c">>
+        ],
+        purges(DbName)
+    ),
+
+    delete_ddoc(DbName, <<"_design/boo">>),
+    ok = fabric:cleanup_index_files(DbName),
+
+    ?assertEqual([], indices(DbName)),
+    ?assertEqual([], purges(DbName)),
+
+    % cleaning a db with all deleted indices should still work
+    ok = fabric:cleanup_index_files(DbName),
 
-t_cleanup_index_files_with_deleted_db({_Ctx, TempDb}) ->
-    ?_test(
-        begin
-            fabric:delete_db(TempDb, []),
-            ?assertError(
-                database_does_not_exist,
-                fabric:inactive_index_files(TempDb)
+    ?assertEqual([], indices(DbName)),
+    ?assertEqual([], purges(DbName)).
+
+shard_names(DbName) ->
+    [mem3:name(S) || S <- mem3:local_shards(DbName)].
+
+% Sorted list of sigs
+%
+sigs(DbName) ->
+    case shard_names(DbName) of
+        [] ->
+            [];
+        [SomeDb | _] ->
+            Sigs = couch_mrview_util:get_signatures(SomeDb),
+            lists:sort(maps:keys(Sigs))
+    end.
+
+% Sorted list of index files
+%
+indices(DbName) ->
+    case shard_names(DbName) of
+        [] ->
+            [];
+        [_ | _] = Dbs ->
+            AllIndices = lists:map(fun couch_mrview_util:get_index_files/1, Dbs),
+            AsList = lists:sort(
+                lists:foldl(
+                    fun(Indices, Acc) ->
+                        maps:values(Indices) ++ Acc
+                    end,
+                    [],
+                    AllIndices
+                )
             ),
-            ?assertEqual(ok, fabric:cleanup_index_files(TempDb))
-        end
-    ).
+            % Keep only file names and extensions. Since we use q=1, we shouldn't
+            % have any duplicates
+            [filename:basename(F) || F <- AsList]
+    end.
+
+% Sorted list of purge checkpoint doc ids
+%
+purges(DbName) ->
+    case shard_names(DbName) of
+        [] ->
+            [];
+        [_ | _] = Dbs ->
+            AllPurges = lists:map(fun couch_mrview_util:get_purge_checkpoints/1, Dbs),
+            lists:sort(
+                lists:foldl(
+                    fun(Purges, Acc) ->
+                        maps:values(Purges) ++ Acc
+                    end,
+                    [],
+                    AllPurges
+                )
+            )
+    end.
+
+create_ddoc(DbName, DDName, ViewName) ->
+    DDoc = couch_doc:from_json_obj(
+        {[
+            {<<"_id">>, DDName},
+            {<<"language">>, <<"javascript">>},
+            {<<"views">>,
+                {[
+                    {ViewName,
+                        {[
+                            {<<"map">>, <<"function(doc) { emit(doc.value, null); }">>}
+                        ]}}
+                ]}}
+        ]}
+    ),
+    fabric:update_doc(DbName, DDoc, [?ADMIN_CTX]).
+
+update_ddoc(DbName, DDName, ViewName) ->
+    {ok, DDoc0} = fabric:open_doc(DbName, DDName, [?ADMIN_CTX]),
+    DDoc = DDoc0#doc{
+        body =
+            {[
+                {<<"language">>, <<"javascript">>},
+                {<<"views">>,
+                    {[
+                        {ViewName,
+                            {[
+                                {<<"map">>, <<"function(doc) { emit(doc.value, 1); }">>}
+                            ]}}
+                    ]}}
+            ]}
+    },
+    fabric:update_doc(DbName, DDoc, [?ADMIN_CTX]).
+
+delete_ddoc(DbName, DDName) ->
+    {ok, DDoc0} = fabric:open_doc(DbName, DDName, [?ADMIN_CTX]),
+    DDoc = DDoc0#doc{deleted = true, body = {[]}},
+    fabric:update_doc(DbName, DDoc, [?ADMIN_CTX]).


[couchdb] 01/02: Add warning comment about couch_db:get_design_docs/1

Posted by va...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

vatamane pushed a commit to branch optimize-smoosh
in repository https://gitbox.apache.org/repos/asf/couchdb.git

commit 3d04371b2fd93f72b91ff6cdface824c56b18ec2
Author: Nick Vatamaniuc <va...@gmail.com>
AuthorDate: Mon Nov 7 17:01:49 2022 -0500

    Add warning comment about couch_db:get_design_docs/1
    
    It turns out it returns different document types ejson vs #full_doc_info{}
    depending whether the db name starts with "shards/" or not.
---
 src/couch/src/couch_db.erl | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/src/couch/src/couch_db.erl b/src/couch/src/couch_db.erl
index 969b93636..cb4936193 100644
--- a/src/couch/src/couch_db.erl
+++ b/src/couch/src/couch_db.erl
@@ -687,6 +687,11 @@ get_design_doc(#db{} = Db, DDocId0) ->
     DDocId = couch_util:normalize_ddoc_id(DDocId0),
     couch_db:open_doc_int(Db, DDocId, [ejson_body]).
 
+% Note: get_design_docs/1 for clustered db shards returns docs as ejson {[_ |
+% _]} and for simple node-local dbs it returns #full_docs_info{} records. To
+% obtain ejson docs in that case would need to call couch_db:open_doc_int(Db,
+% FDI, [ejson_body]) for each one of them.
+%
 get_design_docs(#db{name = <<"shards/", _/binary>> = ShardDbName}) ->
     DbName = mem3:dbname(ShardDbName),
     {_, Ref} = spawn_monitor(fun() -> exit(fabric:design_docs(DbName)) end),