You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@couchdb.apache.org by rn...@apache.org on 2023/06/22 13:23:17 UTC

[couchdb] branch out-of-disk-handler updated (53c64d663 -> ff6aaf107)

This is an automated email from the ASF dual-hosted git repository.

rnewson pushed a change to branch out-of-disk-handler
in repository https://gitbox.apache.org/repos/asf/couchdb.git


 discard 53c64d663 WIP Introduce countermeasures as we run out of disk space
     new ff6aaf107 WIP Introduce countermeasures as we run out of disk space

This update added new revisions after undoing existing revisions.
That is to say, some revisions that were in the old version of the
branch are not in the new version.  This situation occurs
when a user --force pushes a change and generates a repository
containing something like this:

 * -- * -- B -- O -- O -- O   (53c64d663)
            \
             N -- N -- N   refs/heads/out-of-disk-handler (ff6aaf107)

You should already have received notification emails for all of the O
revisions, and so the following emails describe only the N revisions
from the common base, B.

Any revisions marked "omit" are not gone; other references still
refer to them.  Any revisions marked "discard" are gone forever.

The 1 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.


Summary of changes:
 src/ken/src/ken.erl | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)


[couchdb] 01/01: WIP Introduce countermeasures as we run out of disk space

Posted by rn...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

rnewson pushed a commit to branch out-of-disk-handler
in repository https://gitbox.apache.org/repos/asf/couchdb.git

commit ff6aaf1076c3c2fa5c8194fd64ef605052102190
Author: Robert Newson <rn...@apache.org>
AuthorDate: Tue Jun 20 11:52:46 2023 +0100

    WIP Introduce countermeasures as we run out of disk space
---
 rel/reltool.config                    |   1 +
 src/chttpd/src/chttpd.erl             |   2 +
 src/couch/priv/stats_descriptions.cfg |   4 +
 src/couch/src/couch_alarm_handler.erl | 165 ++++++++++++++++++++++++++++++++++
 src/couch/src/couch_app.erl           |   3 +
 src/fabric/src/fabric_doc_update.erl  |   2 +
 src/fabric/src/fabric_rpc.erl         |  36 ++++----
 src/ken/src/ken.erl                   |   7 +-
 8 files changed, 204 insertions(+), 16 deletions(-)

diff --git a/rel/reltool.config b/rel/reltool.config
index 0355a0b07..ebb15bb83 100644
--- a/rel/reltool.config
+++ b/rel/reltool.config
@@ -87,6 +87,7 @@
     {app, sasl, [{incl_cond, include}]},
     {app, ssl, [{incl_cond, include}]},
     {app, stdlib, [{incl_cond, include}]},
+    {app, os_mon, [{incl_cond, include}]},
     {app, syntax_tools, [{incl_cond, include}]},
     {app, xmerl, [{incl_cond, include}]},
 
diff --git a/src/chttpd/src/chttpd.erl b/src/chttpd/src/chttpd.erl
index 53abc731f..c8e6fdc97 100644
--- a/src/chttpd/src/chttpd.erl
+++ b/src/chttpd/src/chttpd.erl
@@ -1138,6 +1138,8 @@ error_info(timeout) ->
     >>};
 error_info({service_unavailable, Reason}) ->
     {503, <<"service unavailable">>, Reason};
+error_info({insufficient_storage, Reason}) ->
+    {507, <<"insufficent_storage">>, Reason};
 error_info({timeout, _Reason}) ->
     error_info(timeout);
 error_info({'EXIT', {Error, _Stack}}) ->
diff --git a/src/couch/priv/stats_descriptions.cfg b/src/couch/priv/stats_descriptions.cfg
index 6c0d4dad2..1983eed9b 100644
--- a/src/couch/priv/stats_descriptions.cfg
+++ b/src/couch/priv/stats_descriptions.cfg
@@ -266,6 +266,10 @@
     {type, counter},
     {desc, <<"number of HTTP 503 Service unavailable responses">>}
 ]}.
+{[couchdb, httpd_status_codes, 507], [
+    {type, counter},
+    {desc, <<"number of HTTP 507 Insufficient Storage responses">>}
+]}.
 {[couchdb, open_databases], [
     {type, counter},
     {desc,  <<"number of open databases">>}
diff --git a/src/couch/src/couch_alarm_handler.erl b/src/couch/src/couch_alarm_handler.erl
new file mode 100644
index 000000000..8a04fd105
--- /dev/null
+++ b/src/couch/src/couch_alarm_handler.erl
@@ -0,0 +1,165 @@
+% Licensed under the Apache License, Version 2.0 (the "License"); you may not
+% use this file except in compliance with the License. You may obtain a copy of
+% the License at
+%
+%   http://www.apache.org/licenses/LICENSE-2.0
+%
+% Unless required by applicable law or agreed to in writing, software
+% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+% License for the specific language governing permissions and limitations under
+% the License.
+
+-module(couch_alarm_handler).
+-behaviour(gen_event).
+
+% public api
+-export([
+    database_dir_almost_full/0,
+    view_index_dir_almost_full/0
+]).
+
+% gen_event callbacks
+-export([
+    init/1,
+    handle_event/2,
+    handle_call/2,
+    handle_info/2
+]).
+
+-include_lib("kernel/include/file.hrl").
+
+-record(st, {
+    timer
+}).
+
+database_dir_almost_full() ->
+    {DatabaseDirAlmostFull, _ViewIndexDirAlmostFull} = get_almost_full(),
+    DatabaseDirAlmostFull.
+
+view_index_dir_almost_full() ->
+    {_DatabaseDirAlmostFull, ViewIndexDirAlmostFull} = get_almost_full(),
+    ViewIndexDirAlmostFull.
+
+init(_Args) ->
+    {ok, #st{}}.
+
+handle_event({set_alarm, Alarm}, St) ->
+    {ok, set_alarm(Alarm, St)};
+handle_event({clear_alarm, AlarmId}, St) ->
+    {ok, clear_alarm(AlarmId, St)};
+handle_event(_Event, St) ->
+    {ok, St}.
+
+handle_call(_Query, St) ->
+    {ok, {error, bad_query}, St}.
+
+handle_info({disk_almost_full, DatabaseDirAlmostFull, ViewIndexDirAlmostFull}, St) ->
+    set_almost_full(DatabaseDirAlmostFull, ViewIndexDirAlmostFull),
+    {ok, St#st{timer = undefined}};
+handle_info(_Msg, St) ->
+    {ok, St}.
+
+set_alarm({{disk_almost_full, MntOn}, []}, #st{} = St) ->
+    IsDatabaseDir = is_database_dir(MntOn),
+    IsViewIndexDir = is_view_index_dir(MntOn),
+    {DatabaseDirAlmostFull, ViewIndexDirAlmostFull} = get_almost_full(),
+    if
+        IsDatabaseDir andalso IsViewIndexDir ->
+            couch_log:warning(
+                "database_dir and view_index_dir almost full", []
+            ),
+            start_timer(true, true, St);
+        IsDatabaseDir ->
+            couch_log:warning("database_dir almost full", []),
+            start_timer(true, ViewIndexDirAlmostFull, St);
+        IsViewIndexDir ->
+            couch_log:warning("view_index_dir almost full", []),
+            start_timer(DatabaseDirAlmostFull, true, St);
+        true ->
+            St
+    end;
+set_alarm(_Alarm, #st{} = St) ->
+    St.
+
+clear_alarm({disk_almost_full, MntOn}, #st{} = St) ->
+    IsDatabaseDir = is_database_dir(MntOn),
+    IsViewIndexDir = is_view_index_dir(MntOn),
+    {DatabaseDirAlmostFull, ViewIndexDirAlmostFull} = get_almost_full(),
+    if
+        IsDatabaseDir andalso IsViewIndexDir ->
+            couch_log:warning(
+                "database_dir and view_index_dir no longer almost full",
+                []
+            ),
+            start_timer(false, false, St);
+        IsDatabaseDir ->
+            couch_log:warning("database_dir no longer almost full", []),
+            start_timer(false, ViewIndexDirAlmostFull, St);
+        IsViewIndexDir ->
+            couch_log:warning(
+                "view_index_dir no longer almost full", []
+            ),
+            start_timer(DatabaseDirAlmostFull, false, St);
+        true ->
+            St
+    end;
+clear_alarm(_AlarmId, #st{} = St) ->
+    St.
+
+start_timer(DatabaseDirAlmostFull, ViewIndexDirAlmostFull, #st{timer = undefined} = St) ->
+    case timer:send_after(5000, {disk_almost_full, DatabaseDirAlmostFull, DatabaseDirAlmostFull}) of
+        {ok, TRef} ->
+            St#st{timer = TRef};
+        {error, Reason} ->
+            couch_log:warning("failed to delay for reason ~p", [Reason]),
+            set_almost_full(DatabaseDirAlmostFull, ViewIndexDirAlmostFull),
+            St
+    end;
+start_timer(DatabaseDirAlmostFull, ViewIndexDirAlmostFull, #st{timer = TRef} = St) ->
+    timer:cancel(TRef),
+    start_timer(DatabaseDirAlmostFull, ViewIndexDirAlmostFull, St#st{timer = undefined}).
+
+is_database_dir(MntOn) ->
+    same_device(config:get("couchdb", "database_dir"), MntOn).
+
+is_view_index_dir(MntOn) ->
+    same_device(config:get("couchdb", "view_index_dir"), MntOn).
+
+same_device(DirA, DirB) ->
+    case {device_id(DirA), device_id(DirB)} of
+        {{ok, DeviceId}, {ok, DeviceId}} ->
+            true;
+        _Else ->
+            false
+    end.
+
+device_id(Dir) ->
+    case file:read_file_info(Dir) of
+        {ok, FileInfo} ->
+            {ok, {FileInfo#file_info.minor_device, FileInfo#file_info.major_device}};
+        {error, Reason} ->
+            {error, Reason}
+    end.
+
+get_almost_full() ->
+    <<DatabaseDirAlmostFull:1, ViewIndexDirAlmostFull:1>> = persistent_term:get(key(), <<0:2>>),
+    {DatabaseDirAlmostFull == 1, ViewIndexDirAlmostFull == 1}.
+
+%% keeping the term small so it finds in a single machine word
+%% is apparently better.
+set_almost_full(false, false) ->
+    couch_log:warning("Removing database_dir and view_index_dir full flag", []),
+    persistent_term:erase(key());
+set_almost_full(true, false) ->
+    couch_log:warning("Setting database_dir full flag", []),
+    persistent_term:put(key(), <<1:1, 0:1>>);
+set_almost_full(false, true) ->
+    couch_log:warning("Setting view_index_dir full flag", []),
+    persistent_term:put(key(), <<0:1, 1:1>>);
+set_almost_full(true, true) ->
+    couch_log:warning("Setting database_dir and view_index_dir full flag", []),
+    persistent_term:put(key(), <<1:1, 1:1>>).
+
+key() ->
+    {?MODULE, disk_almost_full}.
diff --git a/src/couch/src/couch_app.erl b/src/couch/src/couch_app.erl
index 8cd8c8482..1887d1451 100644
--- a/src/couch/src/couch_app.erl
+++ b/src/couch/src/couch_app.erl
@@ -21,6 +21,9 @@
 ]).
 
 start(_Type, _) ->
+    %% register our alarm handler
+    gen_event:swap_handler(alarm_handler, {alarm_handler, swap}, {couch_alarm_handler, ok}),
+
     case couch_sup:start_link() of
         {ok, _} = Resp ->
             {Time, _} = statistics(wall_clock),
diff --git a/src/fabric/src/fabric_doc_update.erl b/src/fabric/src/fabric_doc_update.erl
index 77b424911..695ab07e9 100644
--- a/src/fabric/src/fabric_doc_update.erl
+++ b/src/fabric/src/fabric_doc_update.erl
@@ -112,6 +112,8 @@ handle_message({bad_request, Msg}, _, _) ->
     throw({bad_request, Msg});
 handle_message({forbidden, Msg}, _, _) ->
     throw({forbidden, Msg});
+handle_message({insufficient_storage, Msg}, _, _) ->
+    throw({insufficient_storage, Msg});
 handle_message({request_entity_too_large, Entity}, _, _) ->
     throw({request_entity_too_large, Entity}).
 
diff --git a/src/fabric/src/fabric_rpc.erl b/src/fabric/src/fabric_rpc.erl
index b781eea99..92f04e3fd 100644
--- a/src/fabric/src/fabric_rpc.erl
+++ b/src/fabric/src/fabric_rpc.erl
@@ -274,21 +274,27 @@ get_missing_revs(DbName, IdRevsList, Options) ->
     with_db(DbName, Options, {couch_db, get_missing_revs, [IdRevsList]}).
 
 update_docs(DbName, Docs0, Options) ->
-    {Docs1, Type} =
-        case couch_util:get_value(read_repair, Options) of
-            NodeRevs when is_list(NodeRevs) ->
-                Filtered = read_repair_filter(DbName, Docs0, NodeRevs, Options),
-                {Filtered, ?REPLICATED_CHANGES};
-            undefined ->
-                X =
-                    case proplists:get_value(?REPLICATED_CHANGES, Options) of
-                        true -> ?REPLICATED_CHANGES;
-                        _ -> ?INTERACTIVE_EDIT
-                    end,
-                {Docs0, X}
-        end,
-    Docs2 = make_att_readers(Docs1),
-    with_db(DbName, Options, {couch_db, update_docs, [Docs2, Options, Type]}).
+    %% only if there's room
+    case couch_alarm_handler:database_dir_almost_full() of
+        true ->
+            rexi:reply({insufficient_storage, <<"database_dir almost full">>});
+        false ->
+            {Docs1, Type} =
+                case couch_util:get_value(read_repair, Options) of
+                    NodeRevs when is_list(NodeRevs) ->
+                        Filtered = read_repair_filter(DbName, Docs0, NodeRevs, Options),
+                        {Filtered, ?REPLICATED_CHANGES};
+                    undefined ->
+                        X =
+                            case proplists:get_value(?REPLICATED_CHANGES, Options) of
+                                true -> ?REPLICATED_CHANGES;
+                                _ -> ?INTERACTIVE_EDIT
+                            end,
+                        {Docs0, X}
+                end,
+            Docs2 = make_att_readers(Docs1),
+            with_db(DbName, Options, {couch_db, update_docs, [Docs2, Options, Type]})
+    end.
 
 get_purge_seq(DbName, Options) ->
     with_db(DbName, Options, {couch_db, get_purge_seq, []}).
diff --git a/src/ken/src/ken.erl b/src/ken/src/ken.erl
index 87a724ba1..9c26c60b7 100644
--- a/src/ken/src/ken.erl
+++ b/src/ken/src/ken.erl
@@ -18,7 +18,12 @@
 
 % Add a database shard to be indexed.
 add(DbName) ->
-    ken_server:add(DbName).
+    case couch_alarm_handler:view_index_dir_almost_full() of
+        true ->
+            ok;
+        false ->
+            ken_server:add(DbName)
+    end.
 
 % Remove all pending jobs for a database shard.
 remove(DbName) ->