You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@couchdb.apache.org by ji...@apache.org on 2020/04/21 11:22:20 UTC

[couchdb-couch-dbupdates] branch background-delete updated (f7435fb -> 8e4ab5b)

This is an automated email from the ASF dual-hosted git repository.

jiangphcn pushed a change to branch background-delete
in repository https://gitbox.apache.org/repos/asf/couchdb-couch-dbupdates.git.


 discard f7435fb  background deletion for soft-deleted database
     new 8e4ab5b  background deletion for soft-deleted database

This update added new revisions after undoing existing revisions.
That is to say, some revisions that were in the old version of the
branch are not in the new version.  This situation occurs
when a user --force pushes a change and generates a repository
containing something like this:

 * -- * -- B -- O -- O -- O   (f7435fb)
            \
             N -- N -- N   refs/heads/background-delete (8e4ab5b)

You should already have received notification emails for all of the O
revisions, and so the following emails describe only the N revisions
from the common base, B.

Any revisions marked "omit" are not gone; other references still
refer to them.  Any revisions marked "discard" are gone forever.

The 1 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.


Summary of changes:
 include/couch_dbupdates.hrl   |  7 ++--
 src/couch_dbdelete_server.erl | 75 +++++++++++++++++++++++++++----------------
 src/couch_dbdelete_sup.erl    |  8 ++---
 3 files changed, 56 insertions(+), 34 deletions(-)


[couchdb-couch-dbupdates] 01/01: background deletion for soft-deleted database

Posted by ji...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

jiangphcn pushed a commit to branch background-delete
in repository https://gitbox.apache.org/repos/asf/couchdb-couch-dbupdates.git

commit 8e4ab5b1400c0aa211bddb09b9d48e14cff5e65f
Author: jiangph <ji...@cn.ibm.com>
AuthorDate: Tue Apr 21 14:02:16 2020 +0800

    background deletion for soft-deleted database
    
    allow background job to delete soft-deleted database according to
    specified criteria to release space. Once database is hard-deleted,
    the data can't be fetched back.
---
 include/couch_dbupdates.hrl   |  24 ++++++
 src/couch_dbdelete_app.erl    |  31 +++++++
 src/couch_dbdelete_server.erl | 184 ++++++++++++++++++++++++++++++++++++++++++
 src/couch_dbdelete_sup.erl    |  61 ++++++++++++++
 src/couch_dbdelete_worker.erl |  51 ++++++++++++
 src/couch_dbupdates.app.src   |  14 +++-
 6 files changed, 363 insertions(+), 2 deletions(-)

diff --git a/include/couch_dbupdates.hrl b/include/couch_dbupdates.hrl
new file mode 100644
index 0000000..b72e5f6
--- /dev/null
+++ b/include/couch_dbupdates.hrl
@@ -0,0 +1,24 @@
+% Licensed under the Apache License, Version 2.0 (the "License"); you may not
+% use this file except in compliance with the License. You may obtain a copy of
+% the License at
+%
+%   http://www.apache.org/licenses/LICENSE-2.0
+%
+% Unless required by applicable law or agreed to in writing, software
+% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+% License for the specific language governing permissions and limitations under
+% the License.
+
+% jobs api
+-define(DB_DELETE_JOB_TYPE, <<"dbdelete">>).
+
+% number of worker for db background deletion
+-define(MAX_WORKERS, 10).
+
+% settings for background deletion interval
+-define(DEFAULT_DELAY_SEC, 172800). % 48 hours
+-define(DEFAULT_RESOLUTION_SEC, 3600). % 1 hour
+
+% type conversion
+-define(btoi(V), binary_to_integer(V)).
diff --git a/src/couch_dbdelete_app.erl b/src/couch_dbdelete_app.erl
new file mode 100644
index 0000000..757b0af
--- /dev/null
+++ b/src/couch_dbdelete_app.erl
@@ -0,0 +1,31 @@
+% Licensed under the Apache License, Version 2.0 (the "License"); you may not
+% use this file except in compliance with the License. You may obtain a copy of
+% the License at
+%
+%   http://www.apache.org/licenses/LICENSE-2.0
+%
+% Unless required by applicable law or agreed to in writing, software
+% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+% License for the specific language governing permissions and limitations under
+% the License.
+
+
+-module(couch_dbdelete_app).
+
+
+-behaviour(application).
+
+
+-export([
+    start/2,
+    stop/1
+]).
+
+
+start(_StartType, _StartArgs) ->
+    couch_dbdelete_sup:start_link().
+
+
+stop(_State) ->
+    ok.
diff --git a/src/couch_dbdelete_server.erl b/src/couch_dbdelete_server.erl
new file mode 100644
index 0000000..253f62b
--- /dev/null
+++ b/src/couch_dbdelete_server.erl
@@ -0,0 +1,184 @@
+% Licensed under the Apache License, Version 2.0 (the "License"); you may not
+% use this file except in compliance with the License. You may obtain a copy of
+% the License at
+%
+%   http://www.apache.org/licenses/LICENSE-2.0
+%
+% Unless required by applicable law or agreed to in writing, software
+% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+% License for the specific language governing permissions and limitations under
+% the License.
+
+-module(couch_dbdelete_server).
+
+
+-behaviour(gen_server).
+
+
+-export([
+    start_link/0,
+    process_loop/0
+]).
+
+
+-export([
+    init/1,
+    terminate/2,
+    handle_call/3,
+    handle_cast/2,
+    handle_info/2,
+    code_change/3
+]).
+
+
+-include("couch_dbupdates.hrl").
+
+
+start_link() ->
+    gen_server:start_link({local, ?MODULE}, ?MODULE, [], []).
+
+
+init(_) ->
+    process_flag(trap_exit, true),
+    proc_lib:spawn_link(?MODULE, process_loop, []),
+    couch_jobs:set_type_timeout(?DB_DELETE_JOB_TYPE, 6),
+    St = #{
+        workers => #{},
+        max_workers => max_workers()
+    },
+    {ok, spawn_workers(St)}.
+
+
+terminate(_, _St) ->
+    ok.
+
+
+handle_call(Msg, _From, St) ->
+    {stop, {bad_call, Msg}, {bad_call, Msg}, St}.
+
+
+handle_cast(Msg, St) ->
+    {stop, {bad_cast, Msg}, St}.
+
+
+handle_info({'EXIT', Pid, Reason}, St) ->
+    #{workers := Workers} = St,
+    case maps:is_key(Pid, Workers) of
+        true ->
+            if Reason == normal -> ok; true ->
+                LogMsg = "~p : indexer process ~p exited with ~p",
+                couch_log:error(LogMsg, [?MODULE, Pid, Reason])
+            end,
+            NewWorkers = maps:remove(Pid, Workers),
+            {noreply, spawn_workers(St#{workers := NewWorkers})};
+        false ->
+            LogMsg = "~p : unknown process ~p exited with ~p",
+            couch_log:error(LogMsg, [?MODULE, Pid, Reason]),
+            {stop, {unknown_pid_exit, Pid}, St}
+    end;
+
+handle_info(Msg, St) ->
+    {stop, {bad_info, Msg}, St}.
+
+
+code_change(_OldVsn, St, _Extra) ->
+    {ok, St}.
+
+
+process_loop() ->
+    case is_enabled() of
+        true ->
+            process_delete();
+        false ->
+            ok
+    end,
+    Resolution = resolution_msec(),
+    timer:sleep(Resolution),
+    process_loop().
+
+
+process_delete() ->
+    {ok, Infos} = fabric2_db:list_deleted_dbs_info(),
+    lists:foreach(fun(Info) ->
+        DbName = proplists:get_value(db_name, Info),
+        DeletedWhen = proplists:get_value(timestamp, Info),
+        Now = now_sec(),
+        Delay = delay_msec(),
+        Since = Now - Delay,
+        case Since > timestamp_to_sec(DeletedWhen)  of
+            true ->
+                JobId = job_id(DbName, DeletedWhen),
+                JobData = job_data(DbName, DeletedWhen),
+                ok = couch_jobs:add(
+                    undefined,
+                    ?DB_DELETE_JOB_TYPE,
+                    JobId,
+                    JobData
+                );
+            false ->
+                ok
+        end
+    end, Infos).
+
+
+job_id(DbName, Timestamp) ->
+    <<DbName/binary, "-", Timestamp/binary>>.
+
+
+job_data(DbName, Timestamp) ->
+    #{
+        db_name => DbName,
+        timestamp => Timestamp
+    }.
+
+
+now_sec() ->
+    Now = os:timestamp(),
+    Nowish = calendar:now_to_universal_time(Now),
+    calendar:datetime_to_gregorian_seconds(Nowish).
+
+
+timestamp_to_sec(TimeStamp) ->
+    <<Year:4/binary, "-", Month:2/binary, "-", Day:2/binary,
+        "T",
+        Hour:2/binary, ":", Minutes:2/binary, ":", Second:2/binary,
+        "Z">> = TimeStamp,
+
+    calendar:datetime_to_gregorian_seconds(
+        {{?btoi(Year), ?btoi(Month), ?btoi(Day)},
+        {?btoi(Hour), ?btoi(Minutes), ?btoi(Second)}}
+    ).
+
+
+spawn_workers(St) ->
+    #{
+        workers := Workers,
+        max_workers := MaxWorkers
+    } = St,
+    case maps:size(Workers) < MaxWorkers of
+        true ->
+            Pid = couch_dbdelete_worker:spawn_link(),
+            NewSt = St#{workers := Workers#{Pid => true}},
+            spawn_workers(NewSt);
+        false ->
+            St
+    end.
+
+
+max_workers() ->
+    config:get_integer("couch", "max_db_delete_workers", ?MAX_WORKERS).
+
+
+is_enabled() ->
+    config:get_boolean("couch", "db_delete_enabled", true).
+
+
+delay_msec() ->
+    config:get_integer("couch", "db_delete_delay_sec",
+        ?DEFAULT_DELAY_SEC).
+
+
+resolution_msec() ->
+    config:get_integer("couch", "db_delete_resolution_sec",
+        ?DEFAULT_RESOLUTION_SEC).
diff --git a/src/couch_dbdelete_sup.erl b/src/couch_dbdelete_sup.erl
new file mode 100644
index 0000000..9c6e94f
--- /dev/null
+++ b/src/couch_dbdelete_sup.erl
@@ -0,0 +1,61 @@
+% Licensed under the Apache License, Version 2.0 (the "License"); you may not
+% use this file except in compliance with the License. You may obtain a copy of
+% the License at
+%
+%   http://www.apache.org/licenses/LICENSE-2.0
+%
+% Unless required by applicable law or agreed to in writing, software
+% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+% License for the specific language governing permissions and limitations under
+% the License.
+
+
+-module(couch_dbdelete_sup).
+
+
+-behaviour(supervisor).
+
+
+-export([
+    start_link/0
+]).
+
+
+-export([
+    init/1
+]).
+
+
+-include("couch_dbupdates.hrl").
+
+
+start_link() ->
+    Arg = case fabric2_node_types:is_type(background_db_deletion) of
+          true -> normal;
+          false -> deletion_disabled
+        end,
+    supervisor:start_link({local, ?MODULE}, ?MODULE, Arg).
+
+
+init(normal) ->
+    Children = [
+        #{
+            id => couch_dbdelete_server,
+            start => {couch_dbdelete_server, start_link, []}
+        }
+    ],
+    {ok, {flags(), Children}};
+
+init(deletion_disabled) ->
+    couch_log:notice("~p : database background deletion disabled", [?MODULE]),
+    couch_jobs:set_type_timeout(?DB_DELETE_JOB_TYPE, 6),
+    {ok, {flags(), []}}.
+
+
+flags() ->
+    #{
+        strategy => one_for_one,
+        intensity => 1,
+        period => 5
+    }.
diff --git a/src/couch_dbdelete_worker.erl b/src/couch_dbdelete_worker.erl
new file mode 100644
index 0000000..88fe336
--- /dev/null
+++ b/src/couch_dbdelete_worker.erl
@@ -0,0 +1,51 @@
+% Licensed under the Apache License, Version 2.0 (the "License"); you may not
+% use this file except in compliance with the License. You may obtain a copy of
+% the License at
+%
+%   http://www.apache.org/licenses/LICENSE-2.0
+%
+% Unless required by applicable law or agreed to in writing, software
+% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+% License for the specific language governing permissions and limitations under
+% the License.
+
+-module(couch_dbdelete_worker).
+
+-export([
+    spawn_link/0
+]).
+
+
+-export([
+    init/0
+]).
+
+-ifdef(TEST).
+-compile(export_all).
+-compile(nowarn_export_all).
+-endif.
+
+-include("couch_dbupdates.hrl").
+-include_lib("couch/include/couch_db.hrl").
+-include_lib("fabric/include/fabric2.hrl").
+
+
+spawn_link() ->
+    proc_lib:spawn_link(?MODULE, init, []).
+
+
+init() ->
+    {ok, Job, JobData} = couch_jobs:accept(?DB_DELETE_JOB_TYPE, #{}),
+    #{
+        <<"db_name">> := DbName,
+        <<"timestamp">> := TimeStamp
+    } = JobData,
+
+    ok = fabric2_db:delete(DbName, [{deleted_at, TimeStamp}]),
+
+    couch_jobs:finish(undefined, Job, JobData#{
+        message => db_deleted,
+        reason => "Database was deleted"
+    }),
+    exit(normal).
diff --git a/src/couch_dbupdates.app.src b/src/couch_dbupdates.app.src
index a5597ba..48819be 100644
--- a/src/couch_dbupdates.app.src
+++ b/src/couch_dbupdates.app.src
@@ -2,10 +2,20 @@
  [
   {description, ""},
   {vsn, git},
-  {registered, []},
+  {mod, {couch_dbdelete_app, []}},
+  {registered,  [
+                 couch_dbdelete_sup,
+                 couch_dbdelete_server
+                ]},
   {applications, [
                   kernel,
-                  stdlib
+                  stdlib,
+                  erlfdb,
+                  couch_epi,
+                  couch_log,
+                  config,
+                  fabric,
+                  couch_jobs
                  ]},
   {env, []}
  ]}.