You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@couchdb.apache.org by rn...@apache.org on 2023/06/21 16:47:16 UTC

[couchdb] branch out-of-disk-handler updated (399ec31c1 -> ded3c5b6d)

This is an automated email from the ASF dual-hosted git repository.

rnewson pushed a change to branch out-of-disk-handler
in repository https://gitbox.apache.org/repos/asf/couchdb.git


 discard 399ec31c1 WIP Introduce countermeasures as we run out of disk space
     new ded3c5b6d WIP Introduce countermeasures as we run out of disk space

This update added new revisions after undoing existing revisions.
That is to say, some revisions that were in the old version of the
branch are not in the new version.  This situation occurs
when a user --force pushes a change and generates a repository
containing something like this:

 * -- * -- B -- O -- O -- O   (399ec31c1)
            \
             N -- N -- N   refs/heads/out-of-disk-handler (ded3c5b6d)

You should already have received notification emails for all of the O
revisions, and so the following emails describe only the N revisions
from the common base, B.

Any revisions marked "omit" are not gone; other references still
refer to them.  Any revisions marked "discard" are gone forever.

The 1 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.


Summary of changes:
 src/couch/src/couch_alarm_handler.erl | 67 ++++++++++++++++++++++-------------
 1 file changed, 43 insertions(+), 24 deletions(-)


[couchdb] 01/01: WIP Introduce countermeasures as we run out of disk space

Posted by rn...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

rnewson pushed a commit to branch out-of-disk-handler
in repository https://gitbox.apache.org/repos/asf/couchdb.git

commit ded3c5b6dd5d0be51e96e74c53fcd377a2aa908d
Author: Robert Newson <rn...@apache.org>
AuthorDate: Tue Jun 20 11:52:46 2023 +0100

    WIP Introduce countermeasures as we run out of disk space
---
 rel/reltool.config                    |   1 +
 src/chttpd/src/chttpd.erl             |   2 +
 src/couch/priv/stats_descriptions.cfg |   4 ++
 src/couch/src/couch_alarm_handler.erl | 127 ++++++++++++++++++++++++++++++++++
 src/couch/src/couch_app.erl           |   3 +
 src/fabric/src/fabric_doc_update.erl  |   2 +
 src/fabric/src/fabric_rpc.erl         |  36 ++++++----
 7 files changed, 160 insertions(+), 15 deletions(-)

diff --git a/rel/reltool.config b/rel/reltool.config
index 0355a0b07..ebb15bb83 100644
--- a/rel/reltool.config
+++ b/rel/reltool.config
@@ -87,6 +87,7 @@
     {app, sasl, [{incl_cond, include}]},
     {app, ssl, [{incl_cond, include}]},
     {app, stdlib, [{incl_cond, include}]},
+    {app, os_mon, [{incl_cond, include}]},
     {app, syntax_tools, [{incl_cond, include}]},
     {app, xmerl, [{incl_cond, include}]},
 
diff --git a/src/chttpd/src/chttpd.erl b/src/chttpd/src/chttpd.erl
index 53abc731f..c8e6fdc97 100644
--- a/src/chttpd/src/chttpd.erl
+++ b/src/chttpd/src/chttpd.erl
@@ -1138,6 +1138,8 @@ error_info(timeout) ->
     >>};
 error_info({service_unavailable, Reason}) ->
     {503, <<"service unavailable">>, Reason};
+error_info({insufficient_storage, Reason}) ->
+    {507, <<"insufficent_storage">>, Reason};
 error_info({timeout, _Reason}) ->
     error_info(timeout);
 error_info({'EXIT', {Error, _Stack}}) ->
diff --git a/src/couch/priv/stats_descriptions.cfg b/src/couch/priv/stats_descriptions.cfg
index 6c0d4dad2..1983eed9b 100644
--- a/src/couch/priv/stats_descriptions.cfg
+++ b/src/couch/priv/stats_descriptions.cfg
@@ -266,6 +266,10 @@
     {type, counter},
     {desc, <<"number of HTTP 503 Service unavailable responses">>}
 ]}.
+{[couchdb, httpd_status_codes, 507], [
+    {type, counter},
+    {desc, <<"number of HTTP 507 Insufficient Storage responses">>}
+]}.
 {[couchdb, open_databases], [
     {type, counter},
     {desc,  <<"number of open databases">>}
diff --git a/src/couch/src/couch_alarm_handler.erl b/src/couch/src/couch_alarm_handler.erl
new file mode 100644
index 000000000..0bbb59ee6
--- /dev/null
+++ b/src/couch/src/couch_alarm_handler.erl
@@ -0,0 +1,127 @@
+% Licensed under the Apache License, Version 2.0 (the "License"); you may not
+% use this file except in compliance with the License. You may obtain a copy of
+% the License at
+%
+%   http://www.apache.org/licenses/LICENSE-2.0
+%
+% Unless required by applicable law or agreed to in writing, software
+% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+% License for the specific language governing permissions and limitations under
+% the License.
+
+-module(couch_alarm_handler).
+-behaviour(gen_event).
+
+% public api
+-export([
+    database_dir_almost_full/0,
+    view_index_dir_almost_full/0
+]).
+
+% gen_event callbacks
+-export([
+    init/1,
+    handle_event/2,
+    handle_call/2
+]).
+
+-include_lib("kernel/include/file.hrl").
+
+database_dir_almost_full() ->
+    <<DatabaseDirAlmostFull:1, _ViewIndexDirAlmostFull:1>> = get_almost_full(),
+    DatabaseDirAlmostFull == 1.
+
+view_index_dir_almost_full() ->
+    <<_DatabaseDirAlmostFull:1, ViewIndexDirAlmostFull:1>> = get_almost_full(),
+    ViewIndexDirAlmostFull == 1.
+
+init(_Args) ->
+    {ok, nil}.
+
+handle_event({set_alarm, Alarm}, St) ->
+    set_alarm(Alarm),
+    {ok, St};
+handle_event({clear_alarm, AlarmId}, St) ->
+    clear_alarm(AlarmId),
+    {ok, St};
+handle_event(_Event, St) ->
+    {ok, St}.
+
+ handle_call(_Query, St) ->
+    {ok, {error, bad_query}, St}.
+
+set_alarm({{disk_almost_full, MntOn}, []}) ->
+    IsDatabaseDir = is_database_dir(MntOn),
+    IsViewIndexDir = is_view_index_dir(MntOn),
+    <<DatabaseDirAlmostFull:1, ViewIndexDirAlmostFull:1>> = get_almost_full(),
+    if
+        IsDatabaseDir andalso IsViewIndexDir ->
+            couch_log:warning("database_dir and view_index_dir almost full, activating countermeasures", []),
+            put_almost_full(1, 1);
+        IsDatabaseDir ->
+            couch_log:warning("database_dir almost full, activating countermeasures", []),
+            put_almost_full(1, ViewIndexDirAlmostFull);
+        IsViewIndexDir ->
+            couch_log:warning("view_index_dir almost full, activating countermeasures", []),
+            put_almost_full(DatabaseDirAlmostFull, 1);
+        true ->
+            ok
+    end;
+set_alarm(_Alarm) ->
+    ok.
+
+clear_alarm({disk_almost_full, MntOn}) ->
+    IsDatabaseDir = is_database_dir(MntOn),
+    IsViewIndexDir = is_view_index_dir(MntOn),
+    <<DatabaseDirAlmostFull:1, ViewIndexDirAlmostFull:1>> = get_almost_full(),
+    if
+        IsDatabaseDir andalso IsViewIndexDir ->
+            couch_log:warning("database_dir and view_index_dir no longer almost full, rescinding countermeasures", []),
+            erase_almost_full();
+        IsDatabaseDir ->
+            couch_log:warning("database_dir no longer almost full, rescinding countermeasures", []),
+            put_almost_full(0, ViewIndexDirAlmostFull);
+        IsViewIndexDir ->
+            couch_log:warning("view_index_dir no longer almost full, rescinding countermeasures", []),
+            put_almost_full(DatabaseDirAlmostFull, 0);
+        true ->
+            ok
+    end;
+clear_alarm(_AlarmId) ->
+    ok.
+
+is_database_dir(MntOn) ->
+    same_device(config:get("couchdb", "database_dir"), MntOn).
+
+is_view_index_dir(MntOn) ->
+    same_device(config:get("couchdb", "view_index_dir"), MntOn).
+
+same_device(DirA, DirB) ->
+    case {device_id(DirA), device_id(DirB)} of
+        {{ok, DeviceId}, {ok, DeviceId}} ->
+            true;
+        _Else ->
+            false
+    end.
+
+device_id(Dir) ->
+    case file:read_file_info(Dir) of
+        {ok, FileInfo} ->
+            {ok, {FileInfo#file_info.minor_device, FileInfo#file_info.major_device}};
+        {error, Reason} ->
+            {error, Reason}
+    end.
+
+
+get_almost_full() ->
+    persistent_term:get(key(), <<0:2>>).
+
+put_almost_full(DatabaseDirAlmostFull, ViewIndexDirAlmostFull) ->
+    persistent_term:put(key(), <<DatabaseDirAlmostFull:1, ViewIndexDirAlmostFull:1>>).
+
+erase_almost_full() ->
+    persistent_term:erase(key()).
+
+key() ->
+    {?MODULE, almost_full}.
diff --git a/src/couch/src/couch_app.erl b/src/couch/src/couch_app.erl
index 8cd8c8482..1887d1451 100644
--- a/src/couch/src/couch_app.erl
+++ b/src/couch/src/couch_app.erl
@@ -21,6 +21,9 @@
 ]).
 
 start(_Type, _) ->
+    %% register our alarm handler
+    gen_event:swap_handler(alarm_handler, {alarm_handler, swap}, {couch_alarm_handler, ok}),
+
     case couch_sup:start_link() of
         {ok, _} = Resp ->
             {Time, _} = statistics(wall_clock),
diff --git a/src/fabric/src/fabric_doc_update.erl b/src/fabric/src/fabric_doc_update.erl
index 77b424911..695ab07e9 100644
--- a/src/fabric/src/fabric_doc_update.erl
+++ b/src/fabric/src/fabric_doc_update.erl
@@ -112,6 +112,8 @@ handle_message({bad_request, Msg}, _, _) ->
     throw({bad_request, Msg});
 handle_message({forbidden, Msg}, _, _) ->
     throw({forbidden, Msg});
+handle_message({insufficient_storage, Msg}, _, _) ->
+    throw({insufficient_storage, Msg});
 handle_message({request_entity_too_large, Entity}, _, _) ->
     throw({request_entity_too_large, Entity}).
 
diff --git a/src/fabric/src/fabric_rpc.erl b/src/fabric/src/fabric_rpc.erl
index b781eea99..92f04e3fd 100644
--- a/src/fabric/src/fabric_rpc.erl
+++ b/src/fabric/src/fabric_rpc.erl
@@ -274,21 +274,27 @@ get_missing_revs(DbName, IdRevsList, Options) ->
     with_db(DbName, Options, {couch_db, get_missing_revs, [IdRevsList]}).
 
 update_docs(DbName, Docs0, Options) ->
-    {Docs1, Type} =
-        case couch_util:get_value(read_repair, Options) of
-            NodeRevs when is_list(NodeRevs) ->
-                Filtered = read_repair_filter(DbName, Docs0, NodeRevs, Options),
-                {Filtered, ?REPLICATED_CHANGES};
-            undefined ->
-                X =
-                    case proplists:get_value(?REPLICATED_CHANGES, Options) of
-                        true -> ?REPLICATED_CHANGES;
-                        _ -> ?INTERACTIVE_EDIT
-                    end,
-                {Docs0, X}
-        end,
-    Docs2 = make_att_readers(Docs1),
-    with_db(DbName, Options, {couch_db, update_docs, [Docs2, Options, Type]}).
+    %% only if there's room
+    case couch_alarm_handler:database_dir_almost_full() of
+        true ->
+            rexi:reply({insufficient_storage, <<"database_dir almost full">>});
+        false ->
+            {Docs1, Type} =
+                case couch_util:get_value(read_repair, Options) of
+                    NodeRevs when is_list(NodeRevs) ->
+                        Filtered = read_repair_filter(DbName, Docs0, NodeRevs, Options),
+                        {Filtered, ?REPLICATED_CHANGES};
+                    undefined ->
+                        X =
+                            case proplists:get_value(?REPLICATED_CHANGES, Options) of
+                                true -> ?REPLICATED_CHANGES;
+                                _ -> ?INTERACTIVE_EDIT
+                            end,
+                        {Docs0, X}
+                end,
+            Docs2 = make_att_readers(Docs1),
+            with_db(DbName, Options, {couch_db, update_docs, [Docs2, Options, Type]})
+    end.
 
 get_purge_seq(DbName, Options) ->
     with_db(DbName, Options, {couch_db, get_purge_seq, []}).