You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@couchdb.apache.org by rn...@apache.org on 2023/06/21 16:47:17 UTC

[couchdb] 01/01: WIP Introduce countermeasures as we run out of disk space

This is an automated email from the ASF dual-hosted git repository.

rnewson pushed a commit to branch out-of-disk-handler
in repository https://gitbox.apache.org/repos/asf/couchdb.git

commit ded3c5b6dd5d0be51e96e74c53fcd377a2aa908d
Author: Robert Newson <rn...@apache.org>
AuthorDate: Tue Jun 20 11:52:46 2023 +0100

    WIP Introduce countermeasures as we run out of disk space
---
 rel/reltool.config                    |   1 +
 src/chttpd/src/chttpd.erl             |   2 +
 src/couch/priv/stats_descriptions.cfg |   4 ++
 src/couch/src/couch_alarm_handler.erl | 127 ++++++++++++++++++++++++++++++++++
 src/couch/src/couch_app.erl           |   3 +
 src/fabric/src/fabric_doc_update.erl  |   2 +
 src/fabric/src/fabric_rpc.erl         |  36 ++++++----
 7 files changed, 160 insertions(+), 15 deletions(-)

diff --git a/rel/reltool.config b/rel/reltool.config
index 0355a0b07..ebb15bb83 100644
--- a/rel/reltool.config
+++ b/rel/reltool.config
@@ -87,6 +87,7 @@
     {app, sasl, [{incl_cond, include}]},
     {app, ssl, [{incl_cond, include}]},
     {app, stdlib, [{incl_cond, include}]},
+    {app, os_mon, [{incl_cond, include}]},
     {app, syntax_tools, [{incl_cond, include}]},
     {app, xmerl, [{incl_cond, include}]},
 
diff --git a/src/chttpd/src/chttpd.erl b/src/chttpd/src/chttpd.erl
index 53abc731f..c8e6fdc97 100644
--- a/src/chttpd/src/chttpd.erl
+++ b/src/chttpd/src/chttpd.erl
@@ -1138,6 +1138,8 @@ error_info(timeout) ->
     >>};
 error_info({service_unavailable, Reason}) ->
     {503, <<"service unavailable">>, Reason};
+error_info({insufficient_storage, Reason}) ->
+    {507, <<"insufficent_storage">>, Reason};
 error_info({timeout, _Reason}) ->
     error_info(timeout);
 error_info({'EXIT', {Error, _Stack}}) ->
diff --git a/src/couch/priv/stats_descriptions.cfg b/src/couch/priv/stats_descriptions.cfg
index 6c0d4dad2..1983eed9b 100644
--- a/src/couch/priv/stats_descriptions.cfg
+++ b/src/couch/priv/stats_descriptions.cfg
@@ -266,6 +266,10 @@
     {type, counter},
     {desc, <<"number of HTTP 503 Service unavailable responses">>}
 ]}.
+{[couchdb, httpd_status_codes, 507], [
+    {type, counter},
+    {desc, <<"number of HTTP 507 Insufficient Storage responses">>}
+]}.
 {[couchdb, open_databases], [
     {type, counter},
     {desc,  <<"number of open databases">>}
diff --git a/src/couch/src/couch_alarm_handler.erl b/src/couch/src/couch_alarm_handler.erl
new file mode 100644
index 000000000..0bbb59ee6
--- /dev/null
+++ b/src/couch/src/couch_alarm_handler.erl
@@ -0,0 +1,127 @@
+% Licensed under the Apache License, Version 2.0 (the "License"); you may not
+% use this file except in compliance with the License. You may obtain a copy of
+% the License at
+%
+%   http://www.apache.org/licenses/LICENSE-2.0
+%
+% Unless required by applicable law or agreed to in writing, software
+% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+% License for the specific language governing permissions and limitations under
+% the License.
+
+-module(couch_alarm_handler).
+-behaviour(gen_event).
+
+% public api
+-export([
+    database_dir_almost_full/0,
+    view_index_dir_almost_full/0
+]).
+
+% gen_event callbacks
+-export([
+    init/1,
+    handle_event/2,
+    handle_call/2
+]).
+
+-include_lib("kernel/include/file.hrl").
+
+database_dir_almost_full() ->
+    <<DatabaseDirAlmostFull:1, _ViewIndexDirAlmostFull:1>> = get_almost_full(),
+    DatabaseDirAlmostFull == 1.
+
+view_index_dir_almost_full() ->
+    <<_DatabaseDirAlmostFull:1, ViewIndexDirAlmostFull:1>> = get_almost_full(),
+    ViewIndexDirAlmostFull == 1.
+
+init(_Args) ->
+    {ok, nil}.
+
+handle_event({set_alarm, Alarm}, St) ->
+    set_alarm(Alarm),
+    {ok, St};
+handle_event({clear_alarm, AlarmId}, St) ->
+    clear_alarm(AlarmId),
+    {ok, St};
+handle_event(_Event, St) ->
+    {ok, St}.
+
+ handle_call(_Query, St) ->
+    {ok, {error, bad_query}, St}.
+
+set_alarm({{disk_almost_full, MntOn}, []}) ->
+    IsDatabaseDir = is_database_dir(MntOn),
+    IsViewIndexDir = is_view_index_dir(MntOn),
+    <<DatabaseDirAlmostFull:1, ViewIndexDirAlmostFull:1>> = get_almost_full(),
+    if
+        IsDatabaseDir andalso IsViewIndexDir ->
+            couch_log:warning("database_dir and view_index_dir almost full, activating countermeasures", []),
+            put_almost_full(1, 1);
+        IsDatabaseDir ->
+            couch_log:warning("database_dir almost full, activating countermeasures", []),
+            put_almost_full(1, ViewIndexDirAlmostFull);
+        IsViewIndexDir ->
+            couch_log:warning("view_index_dir almost full, activating countermeasures", []),
+            put_almost_full(DatabaseDirAlmostFull, 1);
+        true ->
+            ok
+    end;
+set_alarm(_Alarm) ->
+    ok.
+
+clear_alarm({disk_almost_full, MntOn}) ->
+    IsDatabaseDir = is_database_dir(MntOn),
+    IsViewIndexDir = is_view_index_dir(MntOn),
+    <<DatabaseDirAlmostFull:1, ViewIndexDirAlmostFull:1>> = get_almost_full(),
+    if
+        IsDatabaseDir andalso IsViewIndexDir ->
+            couch_log:warning("database_dir and view_index_dir no longer almost full, rescinding countermeasures", []),
+            erase_almost_full();
+        IsDatabaseDir ->
+            couch_log:warning("database_dir no longer almost full, rescinding countermeasures", []),
+            put_almost_full(0, ViewIndexDirAlmostFull);
+        IsViewIndexDir ->
+            couch_log:warning("view_index_dir no longer almost full, rescinding countermeasures", []),
+            put_almost_full(DatabaseDirAlmostFull, 0);
+        true ->
+            ok
+    end;
+clear_alarm(_AlarmId) ->
+    ok.
+
+is_database_dir(MntOn) ->
+    same_device(config:get("couchdb", "database_dir"), MntOn).
+
+is_view_index_dir(MntOn) ->
+    same_device(config:get("couchdb", "view_index_dir"), MntOn).
+
+same_device(DirA, DirB) ->
+    case {device_id(DirA), device_id(DirB)} of
+        {{ok, DeviceId}, {ok, DeviceId}} ->
+            true;
+        _Else ->
+            false
+    end.
+
+device_id(Dir) ->
+    case file:read_file_info(Dir) of
+        {ok, FileInfo} ->
+            {ok, {FileInfo#file_info.minor_device, FileInfo#file_info.major_device}};
+        {error, Reason} ->
+            {error, Reason}
+    end.
+
+
+get_almost_full() ->
+    persistent_term:get(key(), <<0:2>>).
+
+put_almost_full(DatabaseDirAlmostFull, ViewIndexDirAlmostFull) ->
+    persistent_term:put(key(), <<DatabaseDirAlmostFull:1, ViewIndexDirAlmostFull:1>>).
+
+erase_almost_full() ->
+    persistent_term:erase(key()).
+
+key() ->
+    {?MODULE, almost_full}.
diff --git a/src/couch/src/couch_app.erl b/src/couch/src/couch_app.erl
index 8cd8c8482..1887d1451 100644
--- a/src/couch/src/couch_app.erl
+++ b/src/couch/src/couch_app.erl
@@ -21,6 +21,9 @@
 ]).
 
 start(_Type, _) ->
+    %% register our alarm handler
+    gen_event:swap_handler(alarm_handler, {alarm_handler, swap}, {couch_alarm_handler, ok}),
+
     case couch_sup:start_link() of
         {ok, _} = Resp ->
             {Time, _} = statistics(wall_clock),
diff --git a/src/fabric/src/fabric_doc_update.erl b/src/fabric/src/fabric_doc_update.erl
index 77b424911..695ab07e9 100644
--- a/src/fabric/src/fabric_doc_update.erl
+++ b/src/fabric/src/fabric_doc_update.erl
@@ -112,6 +112,8 @@ handle_message({bad_request, Msg}, _, _) ->
     throw({bad_request, Msg});
 handle_message({forbidden, Msg}, _, _) ->
     throw({forbidden, Msg});
+handle_message({insufficient_storage, Msg}, _, _) ->
+    throw({insufficient_storage, Msg});
 handle_message({request_entity_too_large, Entity}, _, _) ->
     throw({request_entity_too_large, Entity}).
 
diff --git a/src/fabric/src/fabric_rpc.erl b/src/fabric/src/fabric_rpc.erl
index b781eea99..92f04e3fd 100644
--- a/src/fabric/src/fabric_rpc.erl
+++ b/src/fabric/src/fabric_rpc.erl
@@ -274,21 +274,27 @@ get_missing_revs(DbName, IdRevsList, Options) ->
     with_db(DbName, Options, {couch_db, get_missing_revs, [IdRevsList]}).
 
 update_docs(DbName, Docs0, Options) ->
-    {Docs1, Type} =
-        case couch_util:get_value(read_repair, Options) of
-            NodeRevs when is_list(NodeRevs) ->
-                Filtered = read_repair_filter(DbName, Docs0, NodeRevs, Options),
-                {Filtered, ?REPLICATED_CHANGES};
-            undefined ->
-                X =
-                    case proplists:get_value(?REPLICATED_CHANGES, Options) of
-                        true -> ?REPLICATED_CHANGES;
-                        _ -> ?INTERACTIVE_EDIT
-                    end,
-                {Docs0, X}
-        end,
-    Docs2 = make_att_readers(Docs1),
-    with_db(DbName, Options, {couch_db, update_docs, [Docs2, Options, Type]}).
+    %% only if there's room
+    case couch_alarm_handler:database_dir_almost_full() of
+        true ->
+            rexi:reply({insufficient_storage, <<"database_dir almost full">>});
+        false ->
+            {Docs1, Type} =
+                case couch_util:get_value(read_repair, Options) of
+                    NodeRevs when is_list(NodeRevs) ->
+                        Filtered = read_repair_filter(DbName, Docs0, NodeRevs, Options),
+                        {Filtered, ?REPLICATED_CHANGES};
+                    undefined ->
+                        X =
+                            case proplists:get_value(?REPLICATED_CHANGES, Options) of
+                                true -> ?REPLICATED_CHANGES;
+                                _ -> ?INTERACTIVE_EDIT
+                            end,
+                        {Docs0, X}
+                end,
+            Docs2 = make_att_readers(Docs1),
+            with_db(DbName, Options, {couch_db, update_docs, [Docs2, Options, Type]})
+    end.
 
 get_purge_seq(DbName, Options) ->
     with_db(DbName, Options, {couch_db, get_purge_seq, []}).