You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@couchdb.apache.org by va...@apache.org on 2022/11/10 22:51:57 UTC

[couchdb] branch main updated (21dfdf504 -> c5fb09480)

This is an automated email from the ASF dual-hosted git repository.

vatamane pushed a change to branch main
in repository https://gitbox.apache.org/repos/asf/couchdb.git


    from 21dfdf504 Fixing whitspaces of config option admin_only_all_dbs (#4256)
     new a02c483cc Improve fabric index cleanup
     new c5fb09480 Update couch_mrview_debug with a few new functions

The 2 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.


Summary of changes:
 src/couch/src/couch_db.erl                         |   5 +
 src/couch/test/exunit/fabric_test.exs              | 101 ---------
 src/couch_mrview/src/couch_mrview_cleanup.erl      |  97 +++++----
 src/couch_mrview/src/couch_mrview_debug.erl        | 128 ++++++++++-
 src/couch_mrview/src/couch_mrview_util.erl         |  93 ++++++--
 .../test/eunit/couch_mrview_util_tests.erl         | 139 ++++++++++++
 src/fabric/src/fabric.erl                          |  74 ++-----
 src/fabric/test/eunit/fabric_tests.erl             | 237 ++++++++++++++++++---
 8 files changed, 626 insertions(+), 248 deletions(-)
 delete mode 100644 src/couch/test/exunit/fabric_test.exs


[couchdb] 01/02: Improve fabric index cleanup

Posted by va...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

vatamane pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/couchdb.git

commit a02c483ccf7a6f367eeb37325f520a66ae67d012
Author: Nick Vatamaniuc <va...@gmail.com>
AuthorDate: Mon Nov 7 22:42:14 2022 -0500

    Improve fabric index cleanup
    
     * Clean-up stale view purge checkpoints. Previously we didn't and purge
       progress could have stalled by keeping around inactive(lagging) lagging
       purge checkpoints.
    
     * couch_mrview_cleanup attempted to clean purge checkpoints but that didn't
       work for clustered databases, only for local ones. Nowadays most dbs
       are clustered so make sure those work as well.
    
     * DRY-out code from both fabric inactive index cleanup and
       couch_mrview_cleanup modules. Move some of the common code to
       couch_mrview_util module. couch_mrvew_cleanup is the only place in charge
       the cleanup logic now.
    
     * Consolidate and improve tests. Utility functions to get all index files,
       purge checkpoint and signatures are now tested with couch_mrview_util tests,
       and end-to-end fabric cleanup tests are in fabric_tests. Since fabirc_tests
       covers all the test scenarios from fabric_test.exs, remove fabric_test.exs
       so we don't have test duplicated and get same coverage.
---
 src/couch/src/couch_db.erl                         |   5 +
 src/couch/test/exunit/fabric_test.exs              | 101 ---------
 src/couch_mrview/src/couch_mrview_cleanup.erl      |  97 +++++----
 src/couch_mrview/src/couch_mrview_util.erl         |  93 ++++++--
 .../test/eunit/couch_mrview_util_tests.erl         | 139 ++++++++++++
 src/fabric/src/fabric.erl                          |  74 ++-----
 src/fabric/test/eunit/fabric_tests.erl             | 237 ++++++++++++++++++---
 7 files changed, 500 insertions(+), 246 deletions(-)

diff --git a/src/couch/src/couch_db.erl b/src/couch/src/couch_db.erl
index 969b93636..cb4936193 100644
--- a/src/couch/src/couch_db.erl
+++ b/src/couch/src/couch_db.erl
@@ -687,6 +687,11 @@ get_design_doc(#db{} = Db, DDocId0) ->
     DDocId = couch_util:normalize_ddoc_id(DDocId0),
     couch_db:open_doc_int(Db, DDocId, [ejson_body]).
 
+% Note: get_design_docs/1 for clustered db shards returns docs as ejson {[_ |
+% _]} and for simple node-local dbs it returns #full_docs_info{} records. To
+% obtain ejson docs in that case would need to call couch_db:open_doc_int(Db,
+% FDI, [ejson_body]) for each one of them.
+%
 get_design_docs(#db{name = <<"shards/", _/binary>> = ShardDbName}) ->
     DbName = mem3:dbname(ShardDbName),
     {_, Ref} = spawn_monitor(fun() -> exit(fabric:design_docs(DbName)) end),
diff --git a/src/couch/test/exunit/fabric_test.exs b/src/couch/test/exunit/fabric_test.exs
deleted file mode 100644
index bdb84e9a2..000000000
--- a/src/couch/test/exunit/fabric_test.exs
+++ /dev/null
@@ -1,101 +0,0 @@
-defmodule Couch.Test.Fabric do
-  use Couch.Test.ExUnit.Case
-  alias Couch.Test.Utils
-
-  alias Couch.Test.Setup
-
-  alias Couch.Test.Setup.Step
-
-  import Couch.DBTest
-
-  import Utils
-
-  @admin {:user_ctx, user_ctx(roles: ["_admin"])}
-
-  def with_db(context, setup) do
-    setup =
-      setup
-      |> Setup.Common.with_db()
-      |> Setup.run()
-
-    context =
-      Map.merge(context, %{
-        db_name: setup |> Setup.get(:db) |> Step.Create.DB.name()
-      })
-
-    {context, setup}
-  end
-
-  describe "Fabric miscellaneous API" do
-    @describetag setup: &__MODULE__.with_db/2
-    test "Get inactive_index_files", ctx do
-      {:ok, _rev} = update_doc(ctx.db_name, %{"_id" => "doc1"})
-
-      design_doc = %{
-        "_id" => "_design/test",
-        "language" => "javascript",
-        "views" => %{
-          "view" => %{
-            "map" => "function(doc){emit(doc._id, doc._rev)}"
-          }
-        }
-      }
-
-      {:ok, rev1} = update_doc(ctx.db_name, design_doc)
-      wait_sig_update(ctx.db_name, "test", "")
-      prev_active = get_active_sig(ctx.db_name, "test")
-
-      updated_design_doc =
-        put_in(design_doc, ["views", "view", "map"], "function(doc){emit(doc._id, null)}")
-
-      {:ok, rev2} =
-        update_doc(
-          ctx.db_name,
-          Map.put(updated_design_doc, "_rev", rev1)
-        )
-
-      assert rev1 != rev2
-      wait_sig_update(ctx.db_name, "test", prev_active)
-
-      {:ok, info} = :fabric.get_view_group_info(ctx.db_name, "_design/test")
-      active = info[:signature]
-
-      files = Enum.map(:fabric.inactive_index_files(ctx.db_name), &List.to_string/1)
-
-      assert [] != files, "We should have some inactive"
-
-      assert not Enum.any?(files, fn
-               file_path -> String.contains?(file_path, active)
-             end),
-             "We are not suppose to return active views"
-
-      assert Enum.all?(files, fn
-               file_path -> String.contains?(file_path, prev_active)
-             end),
-             "We expect all files to contain previous active signature"
-    end
-  end
-
-  defp update_doc(db_name, body) do
-    json_body = :jiffy.decode(:jiffy.encode(body))
-
-    case :fabric.update_doc(db_name, json_body, [@admin]) do
-      {:ok, rev} ->
-        {:ok, :couch_doc.rev_to_str(rev)}
-
-      error ->
-        error
-    end
-  end
-
-  defp get_active_sig(db_name, ddoc_id) do
-    {:ok, info} = :fabric.get_view_group_info(db_name, "_design/#{ddoc_id}")
-    info[:signature]
-  end
-
-  defp wait_sig_update(db_name, ddoc_id, prev_active) do
-    retry_until(fn ->
-      get_active_sig(db_name, ddoc_id) != prev_active
-    end)
-  end
-end
diff --git a/src/couch_mrview/src/couch_mrview_cleanup.erl b/src/couch_mrview/src/couch_mrview_cleanup.erl
index 417605c55..5b5afbdce 100644
--- a/src/couch_mrview/src/couch_mrview_cleanup.erl
+++ b/src/couch_mrview/src/couch_mrview_cleanup.erl
@@ -12,57 +12,62 @@
 
 -module(couch_mrview_cleanup).
 
--export([run/1]).
+-export([
+    run/1,
+    cleanup_purges/3,
+    cleanup_indices/2
+]).
 
 -include_lib("couch/include/couch_db.hrl").
--include_lib("couch_mrview/include/couch_mrview.hrl").
 
 run(Db) ->
-    RootDir = couch_index_util:root_dir(),
-    DbName = couch_db:name(Db),
+    Indices = couch_mrview_util:get_index_files(Db),
+    Checkpoints = couch_mrview_util:get_purge_checkpoints(Db),
+    {ok, Db1} = couch_db:reopen(Db),
+    Sigs = couch_mrview_util:get_signatures(Db1),
+    ok = cleanup_purges(Db1, Sigs, Checkpoints),
+    ok = cleanup_indices(Sigs, Indices).
 
-    {ok, DesignDocs} = couch_db:get_design_docs(Db),
-    SigFiles = lists:foldl(
-        fun(DDocInfo, SFAcc) ->
-            {ok, DDoc} = couch_db:open_doc_int(Db, DDocInfo, [ejson_body]),
-            {ok, InitState} = couch_mrview_util:ddoc_to_mrst(DbName, DDoc),
-            Sig = InitState#mrst.sig,
-            IFName = couch_mrview_util:index_file(DbName, Sig),
-            CFName = couch_mrview_util:compaction_file(DbName, Sig),
-            [IFName, CFName | SFAcc]
-        end,
-        [],
-        [DD || DD <- DesignDocs, DD#full_doc_info.deleted == false]
-    ),
+cleanup_purges(DbName, Sigs, Checkpoints) when is_binary(DbName) ->
+    couch_util:with_db(DbName, fun(Db) ->
+        cleanup_purges(Db, Sigs, Checkpoints)
+    end);
+cleanup_purges(Db, #{} = Sigs, #{} = CheckpointsMap) ->
+    InactiveMap = maps:without(maps:keys(Sigs), CheckpointsMap),
+    InactiveCheckpoints = maps:values(InactiveMap),
+    DeleteFun = fun(DocId) -> delete_checkpoint(Db, DocId) end,
+    lists:foreach(DeleteFun, InactiveCheckpoints).
 
-    IdxDir = couch_index_util:index_dir(mrview, DbName),
-    DiskFiles = filelib:wildcard(filename:join(IdxDir, "*")),
+cleanup_indices(#{} = Sigs, #{} = IndexMap) ->
+    Fun = fun(_, Files) -> lists:foreach(fun delete_file/1, Files) end,
+    maps:map(Fun, maps:without(maps:keys(Sigs), IndexMap)),
+    ok.
 
-    % We need to delete files that have no ddoc.
-    ToDelete = DiskFiles -- SigFiles,
+delete_file(File) ->
+    RootDir = couch_index_util:root_dir(),
+    couch_log:debug("~p : deleting inactive index : ~s", [?MODULE, File]),
+    try
+        couch_file:delete(RootDir, File, [sync])
+    catch
+        Tag:Error ->
+            ErrLog = "~p : error deleting inactive index file ~s ~p:~p",
+            couch_log:error(ErrLog, [?MODULE, File, Tag, Error]),
+            ok
+    end.
 
-    lists:foreach(
-        fun(FN) ->
-            couch_log:debug("Deleting stale view file: ~s", [FN]),
-            couch_file:delete(RootDir, FN, [sync]),
-            case couch_mrview_util:verify_view_filename(FN) of
-                true ->
-                    Sig = couch_mrview_util:get_signature_from_filename(FN),
-                    DocId = couch_mrview_util:get_local_purge_doc_id(Sig),
-                    case couch_db:open_doc(Db, DocId, []) of
-                        {ok, LocalPurgeDoc} ->
-                            couch_db:update_doc(
-                                Db,
-                                LocalPurgeDoc#doc{deleted = true},
-                                [?ADMIN_CTX]
-                            );
-                        {not_found, _} ->
-                            ok
-                    end;
-                false ->
-                    ok
-            end
-        end,
-        ToDelete
-    ),
-    ok.
+delete_checkpoint(Db, DocId) ->
+    DbName = couch_db:name(Db),
+    LogMsg = "~p : deleting inactive purge checkpoint ~s : ~s",
+    couch_log:debug(LogMsg, [?MODULE, DbName, DocId]),
+    try couch_db:open_doc(Db, DocId, []) of
+        {ok, Doc = #doc{}} ->
+            Deleted = Doc#doc{deleted = true, body = {[]}},
+            couch_db:update_doc(Db, Deleted, [?ADMIN_CTX]);
+        {not_found, _} ->
+            ok
+    catch
+        Tag:Error ->
+            ErrLog = "~p : error deleting checkpoint ~s : ~s error: ~p:~p",
+            couch_log:error(ErrLog, [?MODULE, DbName, DocId, Tag, Error]),
+            ok
+    end.
diff --git a/src/couch_mrview/src/couch_mrview_util.erl b/src/couch_mrview/src/couch_mrview_util.erl
index 9e3d292ed..e1e75f34f 100644
--- a/src/couch_mrview/src/couch_mrview_util.erl
+++ b/src/couch_mrview/src/couch_mrview_util.erl
@@ -15,6 +15,7 @@
 -export([get_view/4, get_view_index_pid/4]).
 -export([get_local_purge_doc_id/1, get_value_from_options/2]).
 -export([verify_view_filename/1, get_signature_from_filename/1]).
+-export([get_signatures/1, get_purge_checkpoints/1, get_index_files/1]).
 -export([ddoc_to_mrst/2, init_state/4, reset_index/3]).
 -export([make_header/1]).
 -export([index_file/2, compaction_file/2, open_file/1]).
@@ -53,6 +54,11 @@
         true -> B
     end)
 ).
+-define(IS_HEX(C),
+    ((C >= $0 andalso C =< $9) orelse
+        (C >= $a andalso C =< $f) orelse
+        (C >= $A andalso C =< $F))
+).
 
 -include_lib("couch/include/couch_db.hrl").
 -include_lib("couch_mrview/include/couch_mrview.hrl").
@@ -70,31 +76,78 @@ get_value_from_options(Key, Options) ->
     end.
 
 verify_view_filename(FileName) ->
-    FilePathList = filename:split(FileName),
-    PureFN = lists:last(FilePathList),
-    case filename:extension(PureFN) of
+    case filename:extension(FileName) of
         ".view" ->
-            Sig = filename:basename(PureFN),
-            case
-                [
-                    Ch
-                 || Ch <- Sig,
-                    not (((Ch >= $0) and (Ch =< $9)) orelse
-                        ((Ch >= $a) and (Ch =< $f)) orelse
-                        ((Ch >= $A) and (Ch =< $F)))
-                ] == []
-            of
-                true -> true;
-                false -> false
-            end;
+            Sig = get_signature_from_filename(FileName),
+            lists:all(fun(C) -> ?IS_HEX(C) end, Sig);
         _ ->
             false
     end.
 
-get_signature_from_filename(FileName) ->
-    FilePathList = filename:split(FileName),
-    PureFN = lists:last(FilePathList),
-    filename:basename(PureFN, ".view").
+get_signature_from_filename(Path) ->
+    filename:basename(filename:basename(Path, ".view"), ".compact").
+
+% Returns map of `Sig => true` elements with all the active signatures.
+% Sig is a hex-encoded binary.
+%
+get_signatures(DbName) when is_binary(DbName) ->
+    couch_util:with_db(DbName, fun get_signatures/1);
+get_signatures(Db) ->
+    DbName = couch_db:name(Db),
+    % get_design_docs/1 returns ejson for clustered shards, and
+    % #full_doc_info{}'s for other cases.
+    {ok, DDocs} = couch_db:get_design_docs(Db),
+    FoldFun = fun
+        ({[_ | _]} = EJsonDoc, Acc) ->
+            Doc = couch_doc:from_json_obj(EJsonDoc),
+            {ok, Mrst} = ddoc_to_mrst(DbName, Doc),
+            Sig = couch_util:to_hex_bin(Mrst#mrst.sig),
+            Acc#{Sig => true};
+        (#full_doc_info{} = FDI, Acc) ->
+            {ok, Doc} = couch_db:open_doc_int(Db, FDI, [ejson_body]),
+            {ok, Mrst} = ddoc_to_mrst(DbName, Doc),
+            Sig = couch_util:to_hex_bin(Mrst#mrst.sig),
+            Acc#{Sig => true}
+    end,
+    lists:foldl(FoldFun, #{}, DDocs).
+
+% Returns a map of `Sig => DocId` elements for all the purge view
+% checkpoint docs. Sig is a hex-encoded binary.
+%
+get_purge_checkpoints(DbName) when is_binary(DbName) ->
+    couch_util:with_db(DbName, fun get_purge_checkpoints/1);
+get_purge_checkpoints(Db) ->
+    FoldFun = fun(#doc{id = Id}, Acc) ->
+        case Id of
+            <<?LOCAL_DOC_PREFIX, "purge-mrview-", Sig/binary>> ->
+                {ok, Acc#{Sig => Id}};
+            _ ->
+                {stop, Acc}
+        end
+    end,
+    Opts = [{start_key, <<?LOCAL_DOC_PREFIX, "purge-mrview-">>}],
+    {ok, Signatures = #{}} = couch_db:fold_local_docs(Db, FoldFun, #{}, Opts),
+    Signatures.
+
+% Returns a map of `Sig => [FilePath, ...]` elements. Sig is a hex-encoded
+% binary and FilePaths are lists as they intended to be passed to couch_file
+% and file module functions.
+%
+get_index_files(DbName) when is_binary(DbName) ->
+    IdxDir = couch_index_util:index_dir(mrview, DbName),
+    WildcardPath = filename:join(IdxDir, "*"),
+    FoldFun = fun(F, Acc) ->
+        case verify_view_filename(F) of
+            true ->
+                Sig = ?l2b(get_signature_from_filename(F)),
+                maps:update_with(Sig, fun(Fs) -> [F | Fs] end, [F], Acc);
+            false ->
+                Acc
+        end
+    end,
+    lists:foldl(FoldFun, #{}, filelib:wildcard(WildcardPath));
+get_index_files(Db) ->
+    get_index_files(couch_db:name(Db)).
 
 get_view(Db, DDoc, ViewName, Args0) ->
     case get_view_index_state(Db, DDoc, ViewName, Args0) of
diff --git a/src/couch_mrview/test/eunit/couch_mrview_util_tests.erl b/src/couch_mrview/test/eunit/couch_mrview_util_tests.erl
index a495fd82c..2562bb511 100644
--- a/src/couch_mrview/test/eunit/couch_mrview_util_tests.erl
+++ b/src/couch_mrview/test/eunit/couch_mrview_util_tests.erl
@@ -13,8 +13,11 @@
 -module(couch_mrview_util_tests).
 
 -include_lib("couch/include/couch_eunit.hrl").
+-include_lib("couch/include/couch_db.hrl").
 -include_lib("couch_mrview/include/couch_mrview.hrl").
 
+-define(DDOC_ID, <<"_design/bar">>).
+
 couch_mrview_util_test_() ->
     [
         ?_assertEqual(0, validate_group_level(undefined, undefined)),
@@ -35,3 +38,139 @@ validate_group_level(Group, GroupLevel) ->
     Args0 = #mrargs{group = Group, group_level = GroupLevel, view_type = red},
     Args1 = couch_mrview_util:validate_args(Args0),
     Args1#mrargs.group_level.
+
+get_signature_from_filename_test() ->
+    Sig = "da817c3d3f7413c1a610f25635a0c521",
+    P1 = "/x.1667618375_design/mrview/da817c3d3f7413c1a610f25635a0c521.view",
+    P2 = "/x.1667618375_design/mrview/da817c3d3f7413c1a610f25635a0c521.compact.view",
+    P3 = "/x.1667618375_design/mrview/da817c3d3f7413c1a610f25635a0c521",
+    ?assertEqual(Sig, couch_mrview_util:get_signature_from_filename(P1)),
+    ?assertEqual(Sig, couch_mrview_util:get_signature_from_filename(P2)),
+    ?assertEqual(Sig, couch_mrview_util:get_signature_from_filename(P3)).
+
+verify_view_filename_test() ->
+    P1 = "/x.1667618375_design/mrview/da817c3d3f7413c1a610f25635a0c521.view",
+    P2 = "/x.1667618375_design/mrview/da817c3d3f7413c1a610f25635a0c521.compact.view",
+    P3 = "/x.1667618375_design/mrview/da817c3d3f7413c1a610f25635a0c521",
+    ?assert(couch_mrview_util:verify_view_filename(P1)),
+    ?assert(couch_mrview_util:verify_view_filename(P2)),
+    ?assertNot(couch_mrview_util:verify_view_filename(P3)),
+    ?assertNot(couch_mrview_util:verify_view_filename("")),
+    ?assertNot(couch_mrview_util:verify_view_filename("foo.view")).
+
+setup() ->
+    DbName = ?tempdb(),
+    ok = fabric:create_db(DbName, [?ADMIN_CTX, {q, 2}]),
+    DDoc = couch_mrview_test_util:ddoc(map),
+    {ok, _} = fabric:update_doc(DbName, DDoc, [?ADMIN_CTX]),
+    {ok, _} = fabric:query_view(DbName, <<"bar">>, <<"baz">>, #mrargs{}),
+    {ok, Db} = couch_mrview_test_util:init_db(?tempdb(), map),
+    {ok, _} = couch_mrview:query_view(Db, ?DDOC_ID, <<"baz">>),
+    {DbName, Db}.
+
+teardown({DbName, Db}) ->
+    couch_db:close(Db),
+    couch_server:delete(couch_db:name(Db), [?ADMIN_CTX]),
+    ok = fabric:delete_db(DbName, [?ADMIN_CTX]).
+
+get_signatures_test_() ->
+    {
+        setup,
+        fun() -> test_util:start_couch([fabric]) end,
+        fun test_util:stop_couch/1,
+        {
+            foreach,
+            fun setup/0,
+            fun teardown/1,
+            [
+                ?TDEF_FE(t_get_signatures_local),
+                ?TDEF_FE(t_get_signatures_clustered),
+                ?TDEF_FE(t_get_purge_checkpoints_local),
+                ?TDEF_FE(t_get_purge_checkpoints_clustered),
+                ?TDEF_FE(t_get_index_files_local),
+                ?TDEF_FE(t_get_index_files_clustered)
+            ]
+        }
+    }.
+
+t_get_signatures_local({_, Db}) ->
+    DbName = couch_db:name(Db),
+    Sigs = couch_mrview_util:get_signatures(DbName),
+    ?assert(is_map(Sigs)),
+    ?assertEqual(1, map_size(Sigs)),
+    [{Sig, true}] = maps:to_list(Sigs),
+    {ok, Info} = couch_mrview:get_info(Db, ?DDOC_ID),
+    ?assertEqual(proplists:get_value(signature, Info), Sig),
+
+    {ok, DDoc} = couch_db:open_doc(Db, ?DDOC_ID, [?ADMIN_CTX]),
+    Deleted = DDoc#doc{deleted = true, body = {[]}},
+    {ok, _} = couch_db:update_doc(Db, Deleted, []),
+    ?assertEqual(#{}, couch_mrview_util:get_signatures(DbName)).
+
+t_get_signatures_clustered({DbName, _Db}) ->
+    [Shard1, Shard2] = mem3:local_shards(DbName),
+    ShardName1 = mem3:name(Shard1),
+    ShardName2 = mem3:name(Shard2),
+    Sigs = couch_mrview_util:get_signatures(ShardName1),
+    ?assertEqual(Sigs, couch_mrview_util:get_signatures(ShardName2)),
+    ?assert(is_map(Sigs)),
+    ?assertEqual(1, map_size(Sigs)),
+    [{Sig, true}] = maps:to_list(Sigs),
+    {ok, Info} = couch_mrview:get_info(ShardName1, ?DDOC_ID),
+    ?assertEqual(proplists:get_value(signature, Info), Sig),
+
+    {ok, DDoc} = fabric:open_doc(DbName, ?DDOC_ID, [?ADMIN_CTX]),
+    Deleted = DDoc#doc{deleted = true, body = {[]}},
+    {ok, _} = fabric:update_doc(DbName, Deleted, [?ADMIN_CTX]),
+    ?assertEqual(#{}, couch_mrview_util:get_signatures(ShardName1)),
+    ?assertEqual(#{}, couch_mrview_util:get_signatures(ShardName2)).
+
+t_get_purge_checkpoints_local({_, Db}) ->
+    DbName = couch_db:name(Db),
+    Checkpoints = couch_mrview_util:get_purge_checkpoints(DbName),
+    ?assert(is_map(Checkpoints)),
+    ?assertEqual(1, map_size(Checkpoints)),
+    [{Sig, <<"_local/", _/binary>>}] = maps:to_list(Checkpoints),
+    {ok, Info} = couch_mrview:get_info(Db, ?DDOC_ID),
+    ?assertEqual(proplists:get_value(signature, Info), Sig).
+
+t_get_purge_checkpoints_clustered({DbName, _Db}) ->
+    {ok, _} = fabric:query_view(DbName, <<"bar">>, <<"baz">>, #mrargs{}),
+    [Shard1, Shard2] = mem3:local_shards(DbName),
+    ShardName1 = mem3:name(Shard1),
+    ShardName2 = mem3:name(Shard2),
+    Sigs1 = couch_mrview_util:get_purge_checkpoints(ShardName1),
+    Sigs2 = couch_mrview_util:get_purge_checkpoints(ShardName2),
+    ?assertEqual(lists:sort(maps:keys(Sigs1)), lists:sort(maps:keys(Sigs2))),
+    ?assert(is_map(Sigs1)),
+    ?assertEqual(1, map_size(Sigs1)),
+    [{Sig, <<"_local/", _/binary>>}] = maps:to_list(Sigs1),
+    {ok, Info} = couch_mrview:get_info(ShardName1, ?DDOC_ID),
+    ?assertEqual(proplists:get_value(signature, Info), Sig).
+
+t_get_index_files_local({_, Db}) ->
+    DbName = couch_db:name(Db),
+    SigFilesMap = couch_mrview_util:get_index_files(DbName),
+    ?assert(is_map(SigFilesMap)),
+    ?assertEqual(1, map_size(SigFilesMap)),
+    [{Sig, [File]}] = maps:to_list(SigFilesMap),
+    ?assertMatch({ok, _}, file:read_file_info(File)),
+    {ok, Info} = couch_mrview:get_info(Db, ?DDOC_ID),
+    ?assertEqual(proplists:get_value(signature, Info), Sig).
+
+t_get_index_files_clustered({DbName, _Db}) ->
+    {ok, _} = fabric:query_view(DbName, <<"bar">>, <<"baz">>, #mrargs{}),
+    [Shard1, Shard2] = mem3:local_shards(DbName),
+    ShardName1 = mem3:name(Shard1),
+    ShardName2 = mem3:name(Shard2),
+    SigFilesMap1 = couch_mrview_util:get_index_files(ShardName1),
+    SigFilesMap2 = couch_mrview_util:get_index_files(ShardName2),
+    SigKeys1 = lists:sort(maps:keys(SigFilesMap1)),
+    SigKeys2 = lists:sort(maps:keys(SigFilesMap2)),
+    ?assertEqual(SigKeys1, SigKeys2),
+    ?assert(is_map(SigFilesMap1)),
+    ?assertEqual(1, map_size(SigFilesMap1)),
+    [{Sig, [File]}] = maps:to_list(SigFilesMap1),
+    ?assertMatch({ok, _}, file:read_file_info(File)),
+    {ok, Info} = couch_mrview:get_info(ShardName1, ?DDOC_ID),
+    ?assertEqual(proplists:get_value(signature, Info), Sig).
diff --git a/src/fabric/src/fabric.erl b/src/fabric/src/fabric.erl
index e61521da0..acd7f1190 100644
--- a/src/fabric/src/fabric.erl
+++ b/src/fabric/src/fabric.erl
@@ -65,7 +65,6 @@
     cleanup_index_files/1,
     cleanup_index_files_all_nodes/1,
     dbname/1,
-    inactive_index_files/1,
     db_uuids/1
 ]).
 
@@ -586,58 +585,34 @@ cleanup_index_files() ->
 -spec cleanup_index_files(dbname()) -> ok.
 cleanup_index_files(DbName) ->
     try
-        lists:foreach(
-            fun(File) ->
-                file:delete(File)
-            end,
-            inactive_index_files(DbName)
-        )
+        ShardNames = [mem3:name(S) || S <- mem3:local_shards(dbname(DbName))],
+        cleanup_local_indices_and_purge_checkpoints(ShardNames)
     catch
-        error:Error ->
-            couch_log:error(
-                "~p:cleanup_index_files. Error: ~p",
-                [?MODULE, Error]
-            ),
+        error:database_does_not_exist ->
             ok
     end.
 
-%% @doc inactive index files for a specific db
--spec inactive_index_files(dbname()) -> ok.
-inactive_index_files(DbName) ->
-    {ok, DesignDocs} = fabric:design_docs(DbName),
-
-    ActiveSigs = maps:from_list(
-        lists:map(
-            fun(#doc{id = GroupId}) ->
-                {ok, Info} = fabric:get_view_group_info(DbName, GroupId),
-                {binary_to_list(couch_util:get_value(signature, Info)), nil}
-            end,
-            [couch_doc:from_json_obj(DD) || DD <- DesignDocs]
-        )
-    ),
-
-    FileList = lists:flatmap(
-        fun(#shard{name = ShardName}) ->
-            IndexDir = couch_index_util:index_dir(mrview, ShardName),
-            filelib:wildcard([IndexDir, "/*"])
-        end,
-        mem3:local_shards(dbname(DbName))
-    ),
+cleanup_local_indices_and_purge_checkpoints([]) ->
+    ok;
+cleanup_local_indices_and_purge_checkpoints([_ | _] = Dbs) ->
+    AllIndices = lists:map(fun couch_mrview_util:get_index_files/1, Dbs),
+    AllPurges = lists:map(fun couch_mrview_util:get_purge_checkpoints/1, Dbs),
+    Sigs = couch_mrview_util:get_signatures(hd(Dbs)),
+    ok = cleanup_purges(Sigs, AllPurges, Dbs),
+    ok = cleanup_indices(Sigs, AllIndices).
+
+cleanup_purges(Sigs, AllPurges, Dbs) ->
+    Fun = fun(DbPurges, Db) ->
+        couch_mrview_cleanup:cleanup_purges(Db, Sigs, DbPurges)
+    end,
+    lists:zipwith(Fun, AllPurges, Dbs),
+    ok.
 
-    if
-        ActiveSigs =:= [] ->
-            FileList;
-        true ->
-            %% <sig>.view and <sig>.compact.view where <sig> is in ActiveSigs
-            %% will be excluded from FileList because they are active view
-            %% files and should not be deleted.
-            lists:filter(
-                fun(FilePath) ->
-                    not maps:is_key(get_view_sig_from_filename(FilePath), ActiveSigs)
-                end,
-                FileList
-            )
-    end.
+cleanup_indices(Sigs, AllIndices) ->
+    Fun = fun(DbIndices) ->
+        couch_mrview_cleanup:cleanup_indices(Sigs, DbIndices)
+    end,
+    lists:foreach(Fun, AllIndices).
 
 %% @doc clean up index files for a specific db on all nodes
 -spec cleanup_index_files_all_nodes(dbname()) -> [reference()].
@@ -791,9 +766,6 @@ kl_to_record(KeyList, RecName) ->
 set_namespace(NS, #mrargs{extra = Extra} = Args) ->
     Args#mrargs{extra = [{namespace, NS} | Extra]}.
 
-get_view_sig_from_filename(FilePath) ->
-    filename:basename(filename:basename(FilePath, ".view"), ".compact").
-
 -ifdef(TEST).
 -include_lib("eunit/include/eunit.hrl").
 
diff --git a/src/fabric/test/eunit/fabric_tests.erl b/src/fabric/test/eunit/fabric_tests.erl
index c0e2b626b..76400a9fe 100644
--- a/src/fabric/test/eunit/fabric_tests.erl
+++ b/src/fabric/test/eunit/fabric_tests.erl
@@ -12,48 +12,229 @@
 
 -module(fabric_tests).
 
+-include_lib("couch/include/couch_db.hrl").
 -include_lib("couch/include/couch_eunit.hrl").
 
 cleanup_index_files_test_() ->
     {
-        setup,
+        foreach,
         fun setup/0,
         fun teardown/1,
-        fun(Ctx) ->
-            [
-                t_cleanup_index_files(),
-                t_cleanup_index_files_with_existing_db(Ctx),
-                t_cleanup_index_files_with_deleted_db(Ctx)
-            ]
-        end
+        [
+            ?TDEF_FE(t_cleanup_index_files),
+            ?TDEF_FE(t_cleanup_index_files_with_existing_db),
+            ?TDEF_FE(t_cleanup_index_files_with_view_data),
+            ?TDEF_FE(t_cleanup_index_files_with_deleted_db),
+            ?TDEF_FE(t_cleanup_index_file_after_ddoc_update),
+            ?TDEF_FE(t_cleanup_index_file_after_ddoc_delete)
+        ]
     }.
 
 setup() ->
     Ctx = test_util:start_couch([fabric]),
-    % TempDb is deleted in the test "t_cleanup_index_files_with_deleted_db".
-    TempDb = ?tempdb(),
-    fabric:create_db(TempDb),
-    {Ctx, TempDb}.
+    DbName = ?tempdb(),
+    fabric:create_db(DbName, [{q, 1}]),
+    create_ddoc(DbName, <<"_design/foo">>, <<"bar">>),
+    {ok, _} = fabric:query_view(DbName, <<"foo">>, <<"bar">>),
+    create_ddoc(DbName, <<"_design/boo">>, <<"baz">>),
+    {ok, _} = fabric:query_view(DbName, <<"boo">>, <<"baz">>),
+    {Ctx, DbName}.
 
-teardown({Ctx, _TempDb}) ->
+teardown({Ctx, DbName}) ->
+    fabric:delete_db(DbName),
     test_util:stop_couch(Ctx).
 
-t_cleanup_index_files() ->
-    ?_assert(
-        lists:all(fun(Res) -> Res =:= ok end, fabric:cleanup_index_files())
+t_cleanup_index_files(_) ->
+    CheckFun = fun(Res) -> Res =:= ok end,
+    ?assert(lists:all(CheckFun, fabric:cleanup_index_files())).
+
+t_cleanup_index_files_with_existing_db({_, DbName}) ->
+    ?assertEqual(ok, fabric:cleanup_index_files(DbName)).
+
+t_cleanup_index_files_with_view_data({_, DbName}) ->
+    Sigs = sigs(DbName),
+    Indices = indices(DbName),
+    Purges = purges(DbName),
+    ok = fabric:cleanup_index_files(DbName),
+    % We haven't inadvertently removed any active index bits
+    ?assertEqual(Sigs, sigs(DbName)),
+    ?assertEqual(Indices, indices(DbName)),
+    ?assertEqual(Purges, purges(DbName)).
+
+t_cleanup_index_files_with_deleted_db(_) ->
+    SomeDb = ?tempdb(),
+    ?assertEqual(ok, fabric:cleanup_index_files(SomeDb)).
+
+t_cleanup_index_file_after_ddoc_update({_, DbName}) ->
+    ?assertEqual(
+        [
+            "4bcdf852098ff6b0578ddf472c320e9c.view",
+            "da817c3d3f7413c1a610f25635a0c521.view"
+        ],
+        indices(DbName)
+    ),
+    ?assertEqual(
+        [
+            <<"_local/purge-mrview-4bcdf852098ff6b0578ddf472c320e9c">>,
+            <<"_local/purge-mrview-da817c3d3f7413c1a610f25635a0c521">>
+        ],
+        purges(DbName)
+    ),
+
+    update_ddoc(DbName, <<"_design/foo">>, <<"bar1">>),
+    ok = fabric:cleanup_index_files(DbName),
+    {ok, _} = fabric:query_view(DbName, <<"foo">>, <<"bar1">>),
+
+    % One 4bc stays, da8 should  gone and 9e3 is added
+    ?assertEqual(
+        [
+            "4bcdf852098ff6b0578ddf472c320e9c.view",
+            "9e355b0fee411b4257036b8fca56f263.view"
+        ],
+        indices(DbName)
+    ),
+    ?assertEqual(
+        [
+            <<"_local/purge-mrview-4bcdf852098ff6b0578ddf472c320e9c">>,
+            <<"_local/purge-mrview-9e355b0fee411b4257036b8fca56f263">>
+        ],
+        purges(DbName)
     ).
 
-t_cleanup_index_files_with_existing_db({_Ctx, TempDb}) ->
-    ?_assertEqual(ok, fabric:cleanup_index_files(TempDb)).
+t_cleanup_index_file_after_ddoc_delete({_, DbName}) ->
+    ?assertEqual(
+        [
+            "4bcdf852098ff6b0578ddf472c320e9c.view",
+            "da817c3d3f7413c1a610f25635a0c521.view"
+        ],
+        indices(DbName)
+    ),
+    ?assertEqual(
+        [
+            <<"_local/purge-mrview-4bcdf852098ff6b0578ddf472c320e9c">>,
+            <<"_local/purge-mrview-da817c3d3f7413c1a610f25635a0c521">>
+        ],
+        purges(DbName)
+    ),
+
+    delete_ddoc(DbName, <<"_design/foo">>),
+    ok = fabric:cleanup_index_files(DbName),
+
+    % 4bc stays the same, da8 should be gone
+    ?assertEqual(
+        [
+            "4bcdf852098ff6b0578ddf472c320e9c.view"
+        ],
+        indices(DbName)
+    ),
+    ?assertEqual(
+        [
+            <<"_local/purge-mrview-4bcdf852098ff6b0578ddf472c320e9c">>
+        ],
+        purges(DbName)
+    ),
+
+    delete_ddoc(DbName, <<"_design/boo">>),
+    ok = fabric:cleanup_index_files(DbName),
+
+    ?assertEqual([], indices(DbName)),
+    ?assertEqual([], purges(DbName)),
+
+    % cleaning a db with all deleted indices should still work
+    ok = fabric:cleanup_index_files(DbName),
 
-t_cleanup_index_files_with_deleted_db({_Ctx, TempDb}) ->
-    ?_test(
-        begin
-            fabric:delete_db(TempDb, []),
-            ?assertError(
-                database_does_not_exist,
-                fabric:inactive_index_files(TempDb)
+    ?assertEqual([], indices(DbName)),
+    ?assertEqual([], purges(DbName)).
+
+shard_names(DbName) ->
+    [mem3:name(S) || S <- mem3:local_shards(DbName)].
+
+% Sorted list of sigs
+%
+sigs(DbName) ->
+    case shard_names(DbName) of
+        [] ->
+            [];
+        [SomeDb | _] ->
+            Sigs = couch_mrview_util:get_signatures(SomeDb),
+            lists:sort(maps:keys(Sigs))
+    end.
+
+% Sorted list of index files
+%
+indices(DbName) ->
+    case shard_names(DbName) of
+        [] ->
+            [];
+        [_ | _] = Dbs ->
+            AllIndices = lists:map(fun couch_mrview_util:get_index_files/1, Dbs),
+            AsList = lists:sort(
+                lists:foldl(
+                    fun(Indices, Acc) ->
+                        maps:values(Indices) ++ Acc
+                    end,
+                    [],
+                    AllIndices
+                )
             ),
-            ?assertEqual(ok, fabric:cleanup_index_files(TempDb))
-        end
-    ).
+            % Keep only file names and extensions. Since we use q=1, we shouldn't
+            % have any duplicates
+            [filename:basename(F) || F <- AsList]
+    end.
+
+% Sorted list of purge checkpoint doc ids
+%
+purges(DbName) ->
+    case shard_names(DbName) of
+        [] ->
+            [];
+        [_ | _] = Dbs ->
+            AllPurges = lists:map(fun couch_mrview_util:get_purge_checkpoints/1, Dbs),
+            lists:sort(
+                lists:foldl(
+                    fun(Purges, Acc) ->
+                        maps:values(Purges) ++ Acc
+                    end,
+                    [],
+                    AllPurges
+                )
+            )
+    end.
+
+create_ddoc(DbName, DDocId, ViewName) ->
+    DDoc = couch_doc:from_json_obj(
+        {[
+            {<<"_id">>, DDocId},
+            {<<"language">>, <<"javascript">>},
+            {<<"views">>,
+                {[
+                    {ViewName,
+                        {[
+                            {<<"map">>, <<"function(doc) { emit(doc.value, null); }">>}
+                        ]}}
+                ]}}
+        ]}
+    ),
+    fabric:update_doc(DbName, DDoc, [?ADMIN_CTX]).
+
+update_ddoc(DbName, DDocId, ViewName) ->
+    {ok, DDoc0} = fabric:open_doc(DbName, DDocId, [?ADMIN_CTX]),
+    DDoc = DDoc0#doc{
+        body =
+            {[
+                {<<"language">>, <<"javascript">>},
+                {<<"views">>,
+                    {[
+                        {ViewName,
+                            {[
+                                {<<"map">>, <<"function(doc) { emit(doc.value, 1); }">>}
+                            ]}}
+                    ]}}
+            ]}
+    },
+    fabric:update_doc(DbName, DDoc, [?ADMIN_CTX]).
+
+delete_ddoc(DbName, DDocId) ->
+    {ok, DDoc0} = fabric:open_doc(DbName, DDocId, [?ADMIN_CTX]),
+    DDoc = DDoc0#doc{deleted = true, body = {[]}},
+    fabric:update_doc(DbName, DDoc, [?ADMIN_CTX]).


[couchdb] 02/02: Update couch_mrview_debug with a few new functions

Posted by va...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

vatamane pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/couchdb.git

commit c5fb0948065ab9eb484152cd0a04e1e031154ffc
Author: Nick Vatamaniuc <va...@gmail.com>
AuthorDate: Tue Nov 8 14:48:28 2022 -0500

    Update couch_mrview_debug with a few new functions
    
    Add functions to inspect view signatures, index files and purge checkpoints.
---
 src/couch_mrview/src/couch_mrview_debug.erl | 128 +++++++++++++++++++++++++++-
 1 file changed, 126 insertions(+), 2 deletions(-)

diff --git a/src/couch_mrview/src/couch_mrview_debug.erl b/src/couch_mrview/src/couch_mrview_debug.erl
index 66f5d79d9..a4d144bba 100644
--- a/src/couch_mrview/src/couch_mrview_debug.erl
+++ b/src/couch_mrview/src/couch_mrview_debug.erl
@@ -28,7 +28,12 @@
     view_report/1,
     view_report/2,
     index_view_report/1,
-    index_view_report/2
+    index_view_report/2,
+    active_signatures/1,
+    index_files/1,
+    stale_index_files/1,
+    purge_checkpoints/1,
+    stale_purge_checkpoints/1
 ]).
 
 -include_lib("couch_mrview/include/couch_mrview.hrl").
@@ -41,7 +46,12 @@ help() ->
         index_view_state,
         index_report,
         view_report,
-        index_view_report
+        index_view_report,
+        active_signatures,
+        index_files,
+        stale_index_files,
+        purge_checkpoints,
+        stale_purge_checkpoints
     ].
 
 -spec help(Function :: atom()) -> ok.
@@ -267,6 +277,56 @@ help(index_view_report) ->
         |  reduce_funs  |                                                                                                _sum
         |  update_seq   |                                                                                                   3
 
+    ---
+    ", []);
+help(active_signatures) ->
+    io:format("
+    active_signatures(DbName)
+    -------------------------
+
+    DbName: Database name as list or binary
+
+    Returns the list of all view signatures.
+    ---
+    ", []);
+help(index_files) ->
+    io:format("
+    index_files(DbName)
+    -------------------------
+
+    DbName: Database name as list or binary
+
+    Returns the list of all view index files, both active and inactive.
+    ---
+    ", []);
+help(stale_index_files) ->
+    io:format("
+    stale_index_files(DbName)
+    -------------------------
+
+    DbName: Database name as list or binary
+
+    Returns the list of stale (inactive) index files.
+    ---
+    ", []);
+help(purge_checkpoints) ->
+    io:format("
+    purge_checkpoints(DbName)
+    -------------------------
+
+    DbName: Database name as list or binary
+
+    Returns the list of all the view purge checkpoint doc ids.
+    ---
+    ", []);
+help(stale_purge_checkpoints) ->
+    io:format("
+    stale_purge_checkpoints(DbName)
+    -------------------------
+
+    DbName: Database name as list or binary
+
+    Returns the list of stale (inactive) view purge checkpoint doc ids.
     ---
     ", []);
 help(Unknown) ->
@@ -432,6 +492,70 @@ convert_collator_versions_to_strings(State) ->
     ),
     maps:put(collator_versions, CollatorVersions, State).
 
+active_signatures(DbName) when is_list(DbName) ->
+    active_signatures(list_to_binary(DbName));
+active_signatures(DbName) when is_binary(DbName) ->
+    [binary_to_list(S) || S <- sigs(DbName)].
+
+index_files(DbName) when is_list(DbName) ->
+    index_files(list_to_binary(DbName));
+index_files(DbName) when is_binary(DbName) ->
+    ShardNames = shard_names(DbName),
+    Files = get_index_files(ShardNames, #{}),
+    [format_view_path(F) || F <- lists:sort(Files)].
+
+stale_index_files(DbName) when is_list(DbName) ->
+    index_files(list_to_binary(DbName));
+stale_index_files(DbName) when is_binary(DbName) ->
+    ShardNames = shard_names(DbName),
+    Files = get_index_files(ShardNames, get_sigs(ShardNames)),
+    [format_view_path(F) || F <- lists:sort(Files)].
+
+purge_checkpoints(DbName) when is_list(DbName) ->
+    purge_checkpoints(list_to_binary(DbName));
+purge_checkpoints(DbName) when is_binary(DbName) ->
+    ShardNames = shard_names(DbName),
+    Purges = get_purge_checkpoints(ShardNames, #{}),
+    [binary_to_list(DocId) || DocId <- lists:sort(Purges)].
+
+stale_purge_checkpoints(DbName) when is_list(DbName) ->
+    purge_checkpoints(list_to_binary(DbName));
+stale_purge_checkpoints(DbName) when is_binary(DbName) ->
+    ShardNames = shard_names(DbName),
+    Purges = get_purge_checkpoints(ShardNames, get_sigs(ShardNames)),
+    [binary_to_list(DocId) || DocId <- lists:sort(Purges)].
+
+get_purge_checkpoints([], #{}) ->
+    [];
+get_purge_checkpoints([_ | _] = Dbs, #{} = Sigs) ->
+    AllPurges = lists:map(fun couch_mrview_util:get_purge_checkpoints/1, Dbs),
+    Fun = fun(Purges, Acc) ->
+        Filtered = maps:without(maps:keys(Sigs), Purges),
+        maps:values(Filtered) ++ Acc
+    end,
+    lists:foldl(Fun, [], AllPurges).
+
+get_index_files([], #{}) ->
+    [];
+get_index_files([_ | _] = Dbs, #{} = Sigs) ->
+    AllIdxs = lists:map(fun couch_mrview_util:get_index_files/1, Dbs),
+    Fun = fun(Idxs, Acc) ->
+        Filtered = maps:without(maps:keys(Sigs), Idxs),
+        maps:values(Filtered) ++ Acc
+    end,
+    lists:foldl(Fun, [], AllIdxs).
+
+get_sigs([]) ->
+    #{};
+get_sigs([AnyShard | _]) ->
+    couch_mrview_util:get_signatures(AnyShard).
+
+shard_names(DbName) ->
+    [mem3:name(S) || S <- mem3:local_shards(DbName)].
+
+sigs(DbName) ->
+    lists:sort(maps:keys(get_sigs(shard_names(DbName)))).
+
 format_view_path(ViewFilePath) ->
     BaseDir = config:get("couchdb", "view_index_dir"),
     lists:flatten(string:replace(ViewFilePath, BaseDir ++ "/", "")).