You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@couchdb.apache.org by ji...@apache.org on 2020/04/23 12:42:25 UTC

[couchdb] 01/01: background deletion for soft-deleted database

This is an automated email from the ASF dual-hosted git repository.

jiangphcn pushed a commit to branch background-db-deletion
in repository https://gitbox.apache.org/repos/asf/couchdb.git

commit ae4e9f0477141bd1f080bdd28d4dc2708090d5e5
Author: jiangph <ji...@cn.ibm.com>
AuthorDate: Thu Apr 23 20:41:40 2020 +0800

    background deletion for soft-deleted database
    
    allow background job to delete soft-deleted database according to
    specified criteria to release space. Once database is hard-deleted,
    the data can't be fetched back.
---
 src/fabric/include/fabric2.hrl           |   9 ++
 src/fabric/src/fabric2_db_expiration.erl | 175 +++++++++++++++++++++++++++++++
 src/fabric/src/fabric2_sup.erl           |  11 ++
 3 files changed, 195 insertions(+)

diff --git a/src/fabric/include/fabric2.hrl b/src/fabric/include/fabric2.hrl
index 587b4f8..2b34796 100644
--- a/src/fabric/include/fabric2.hrl
+++ b/src/fabric/include/fabric2.hrl
@@ -13,6 +13,7 @@
 
 -define(uint2bin(I), binary:encode_unsigned(I, little)).
 -define(bin2uint(I), binary:decode_unsigned(I, little)).
+-define(bin2int(V), binary_to_integer(V)).
 -define(METADATA_VERSION_KEY, <<16#FF, "/metadataVersion">>).
 
 % Prefix Definitions
@@ -78,3 +79,11 @@
 
 
 -define(BINARY_CHUNK_SIZE, 100000).
+
+% jobs api
+-define(DB_EXPIRATION_JOB_TYPE, <<"dbexpiration">>).
+-define(DB_EXPIRATION_JOB, <<"dbexpiration_job">>).
+
+% settings for background database expiration deletion
+-define(DEFAULT_RETENTION_SEC, 172800). % 48 hours
+-define(DEFAULT_EXPIRATION_BATCH, 100).
diff --git a/src/fabric/src/fabric2_db_expiration.erl b/src/fabric/src/fabric2_db_expiration.erl
new file mode 100644
index 0000000..5b1758a
--- /dev/null
+++ b/src/fabric/src/fabric2_db_expiration.erl
@@ -0,0 +1,175 @@
+% Licensed under the Apache License, Version 2.0 (the "License"); you may not
+% use this file except in compliance with the License. You may obtain a copy of
+% the License at
+%
+%   http://www.apache.org/licenses/LICENSE-2.0
+%
+% Unless required by applicable law or agreed to in writing, software
+% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+% License for the specific language governing permissions and limitations under
+% the License.
+
+-module(fabric2_db_expiration).
+
+
+-behaviour(gen_server).
+
+
+-export([
+    start_link/0
+]).
+
+-export([
+    init/1,
+    handle_call/3,
+    handle_cast/2,
+    handle_info/2,
+    terminate/2,
+    code_change/3
+]).
+
+
+-include_lib("couch/include/couch_db.hrl").
+-include_lib("fabric/include/fabric2.hrl").
+
+
+start_link() ->
+    gen_server:start_link({local, ?MODULE}, ?MODULE, [], []).
+
+
+init(_) ->
+    case wait_couch_job() of
+        ok -> ok;
+        retry -> wait_couch_job()
+    end,
+    case is_enabled() of
+        true ->
+            process_expiration();
+        false ->
+            ok
+    end,
+    {ok, nil}.
+
+
+terminate(_M, _St) ->
+    ok.
+
+
+handle_call(Msg, _From, St) ->
+    {stop, {bad_call, Msg}, {bad_call, Msg}, St}.
+
+
+handle_cast(Msg, St) ->
+    {stop, {bad_cast, Msg}, St}.
+
+
+handle_info(Msg, St) ->
+    {stop, {bad_info, Msg}, St}.
+
+
+code_change(_OldVsn, St, _Extra) ->
+    {ok, St}.
+
+
+wait_couch_job() ->
+    try
+        couch_jobs:set_type_timeout(?DB_EXPIRATION_JOB_TYPE, 6),
+        couch_jobs:add(undefined, ?DB_EXPIRATION_JOB_TYPE, ?DB_EXPIRATION_JOB, #{}),
+        {ok, _Job, _JobData} = couch_jobs:accept(?DB_EXPIRATION_JOB_TYPE,
+            #{max_sched_time => 1000}),
+        ok
+    catch
+        error:badarg ->
+            retry
+    end.
+
+
+process_expiration() ->
+    Callback = fun(Value, Acc) ->
+        NewAcc = case Value of
+            {meta, _} -> Acc;
+            {row, DbInfo} ->
+                process_row(Acc, DbInfo);
+            complete ->
+                TotalLen = length(Acc),
+                if TotalLen == 0 -> Acc; true ->
+                    [{LastDelete, _, _} | _] = Acc,
+                    TotalLen = length(Acc),
+                    delete_dbs(lists:sublist(Acc, TotalLen - LastDelete)),
+                    Acc
+                end
+            end,
+        {ok, NewAcc}
+    end,
+    {ok, _Infos} = fabric2_db:list_deleted_dbs_info(Callback, [], []).
+
+
+process_row(Acc, DbInfo) ->
+    TotalLen = length(Acc),
+    case TotalLen of
+        0 ->
+            DbName = proplists:get_value(db_name, DbInfo),
+            TimeStamp = proplists:get_value(timestamp, DbInfo),
+            [{0, DbName, TimeStamp}];
+        _ ->
+            [{LastDelete, _, _} | _] = Acc,
+            NumberToDelete = TotalLen - LastDelete,
+            DeleteBatch = expiration_batch(),
+            LastDelete2 = case NumberToDelete == DeleteBatch of
+                true ->
+                    delete_dbs(lists:sublist(Acc, DeleteBatch)),
+                    TotalLen;
+                _ ->
+                    LastDelete
+            end,
+            DbName = proplists:get_value(db_name, DbInfo),
+            TimeStamp = proplists:get_value(timestamp, DbInfo),
+            [{LastDelete2, DbName, TimeStamp} | Acc]
+    end.
+
+
+delete_dbs(Infos) ->
+    lists:foreach(fun({_, DbName, TimeStamp}) ->
+        Now = now_sec(),
+        Retention = retention_sec(),
+        Since = Now - Retention,
+        case Since > timestamp_to_sec(TimeStamp)  of
+            true ->
+                ok = fabric2_db:delete(DbName, [{deleted_at, TimeStamp}]);
+            false ->
+                ok
+        end
+    end, Infos).
+
+
+now_sec() ->
+    Now = os:timestamp(),
+    Nowish = calendar:now_to_universal_time(Now),
+    calendar:datetime_to_gregorian_seconds(Nowish).
+
+
+timestamp_to_sec(TimeStamp) ->
+    <<Year:4/binary, "-", Month:2/binary, "-", Day:2/binary,
+        "T",
+        Hour:2/binary, ":", Minutes:2/binary, ":", Second:2/binary,
+        "Z">> = TimeStamp,
+
+    calendar:datetime_to_gregorian_seconds(
+        {{?bin2int(Year), ?bin2int(Month), ?bin2int(Day)},
+            {?bin2int(Hour), ?bin2int(Minutes), ?bin2int(Second)}}
+    ).
+
+
+is_enabled() ->
+    config:get_boolean("couch", "db_expiration_enabled", true).
+
+
+retention_sec() ->
+    config:get_integer("couch", "db_expiration_retention_sec",
+        ?DEFAULT_RETENTION_SEC).
+
+
+expiration_batch() ->
+    config:get_integer("couch", "db_expiration_batch",
+        ?DEFAULT_EXPIRATION_BATCH).
diff --git a/src/fabric/src/fabric2_sup.erl b/src/fabric/src/fabric2_sup.erl
index e8201b4..589b70a 100644
--- a/src/fabric/src/fabric2_sup.erl
+++ b/src/fabric/src/fabric2_sup.erl
@@ -24,6 +24,9 @@
 ]).
 
 
+-include_lib("fabric/include/fabric2.hrl").
+
+
 start_link(Args) ->
     supervisor:start_link({local, ?MODULE}, ?MODULE, Args).
 
@@ -55,6 +58,14 @@ init([]) ->
             5000,
             worker,
             [fabric2_index]
+        },
+        {
+            fabric2_db_expiration,
+            {fabric2_db_expiration, start_link, []},
+            permanent,
+            5000,
+            worker,
+            [fabric2_db_expiration]
         }
     ],
     ChildrenWithEpi = couch_epi:register_service(fabric2_epi, Children),