You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@couchdb.apache.org by rn...@apache.org on 2023/06/21 16:47:17 UTC
[couchdb] 01/01: WIP Introduce countermeasures as we run out of disk space
This is an automated email from the ASF dual-hosted git repository.
rnewson pushed a commit to branch out-of-disk-handler
in repository https://gitbox.apache.org/repos/asf/couchdb.git
commit ded3c5b6dd5d0be51e96e74c53fcd377a2aa908d
Author: Robert Newson <rn...@apache.org>
AuthorDate: Tue Jun 20 11:52:46 2023 +0100
WIP Introduce countermeasures as we run out of disk space
---
rel/reltool.config | 1 +
src/chttpd/src/chttpd.erl | 2 +
src/couch/priv/stats_descriptions.cfg | 4 ++
src/couch/src/couch_alarm_handler.erl | 127 ++++++++++++++++++++++++++++++++++
src/couch/src/couch_app.erl | 3 +
src/fabric/src/fabric_doc_update.erl | 2 +
src/fabric/src/fabric_rpc.erl | 36 ++++++----
7 files changed, 160 insertions(+), 15 deletions(-)
diff --git a/rel/reltool.config b/rel/reltool.config
index 0355a0b07..ebb15bb83 100644
--- a/rel/reltool.config
+++ b/rel/reltool.config
@@ -87,6 +87,7 @@
{app, sasl, [{incl_cond, include}]},
{app, ssl, [{incl_cond, include}]},
{app, stdlib, [{incl_cond, include}]},
+ {app, os_mon, [{incl_cond, include}]},
{app, syntax_tools, [{incl_cond, include}]},
{app, xmerl, [{incl_cond, include}]},
diff --git a/src/chttpd/src/chttpd.erl b/src/chttpd/src/chttpd.erl
index 53abc731f..c8e6fdc97 100644
--- a/src/chttpd/src/chttpd.erl
+++ b/src/chttpd/src/chttpd.erl
@@ -1138,6 +1138,8 @@ error_info(timeout) ->
>>};
error_info({service_unavailable, Reason}) ->
{503, <<"service unavailable">>, Reason};
+error_info({insufficient_storage, Reason}) ->
+ {507, <<"insufficent_storage">>, Reason};
error_info({timeout, _Reason}) ->
error_info(timeout);
error_info({'EXIT', {Error, _Stack}}) ->
diff --git a/src/couch/priv/stats_descriptions.cfg b/src/couch/priv/stats_descriptions.cfg
index 6c0d4dad2..1983eed9b 100644
--- a/src/couch/priv/stats_descriptions.cfg
+++ b/src/couch/priv/stats_descriptions.cfg
@@ -266,6 +266,10 @@
{type, counter},
{desc, <<"number of HTTP 503 Service unavailable responses">>}
]}.
+{[couchdb, httpd_status_codes, 507], [
+ {type, counter},
+ {desc, <<"number of HTTP 507 Insufficient Storage responses">>}
+]}.
{[couchdb, open_databases], [
{type, counter},
{desc, <<"number of open databases">>}
diff --git a/src/couch/src/couch_alarm_handler.erl b/src/couch/src/couch_alarm_handler.erl
new file mode 100644
index 000000000..0bbb59ee6
--- /dev/null
+++ b/src/couch/src/couch_alarm_handler.erl
@@ -0,0 +1,127 @@
+% Licensed under the Apache License, Version 2.0 (the "License"); you may not
+% use this file except in compliance with the License. You may obtain a copy of
+% the License at
+%
+% http://www.apache.org/licenses/LICENSE-2.0
+%
+% Unless required by applicable law or agreed to in writing, software
+% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+% License for the specific language governing permissions and limitations under
+% the License.
+
+-module(couch_alarm_handler).
+-behaviour(gen_event).
+
+% public api
+-export([
+ database_dir_almost_full/0,
+ view_index_dir_almost_full/0
+]).
+
+% gen_event callbacks
+-export([
+ init/1,
+ handle_event/2,
+ handle_call/2
+]).
+
+-include_lib("kernel/include/file.hrl").
+
+database_dir_almost_full() ->
+ <<DatabaseDirAlmostFull:1, _ViewIndexDirAlmostFull:1>> = get_almost_full(),
+ DatabaseDirAlmostFull == 1.
+
+view_index_dir_almost_full() ->
+ <<_DatabaseDirAlmostFull:1, ViewIndexDirAlmostFull:1>> = get_almost_full(),
+ ViewIndexDirAlmostFull == 1.
+
+init(_Args) ->
+ {ok, nil}.
+
+handle_event({set_alarm, Alarm}, St) ->
+ set_alarm(Alarm),
+ {ok, St};
+handle_event({clear_alarm, AlarmId}, St) ->
+ clear_alarm(AlarmId),
+ {ok, St};
+handle_event(_Event, St) ->
+ {ok, St}.
+
+ handle_call(_Query, St) ->
+ {ok, {error, bad_query}, St}.
+
+set_alarm({{disk_almost_full, MntOn}, []}) ->
+ IsDatabaseDir = is_database_dir(MntOn),
+ IsViewIndexDir = is_view_index_dir(MntOn),
+ <<DatabaseDirAlmostFull:1, ViewIndexDirAlmostFull:1>> = get_almost_full(),
+ if
+ IsDatabaseDir andalso IsViewIndexDir ->
+ couch_log:warning("database_dir and view_index_dir almost full, activating countermeasures", []),
+ put_almost_full(1, 1);
+ IsDatabaseDir ->
+ couch_log:warning("database_dir almost full, activating countermeasures", []),
+ put_almost_full(1, ViewIndexDirAlmostFull);
+ IsViewIndexDir ->
+ couch_log:warning("view_index_dir almost full, activating countermeasures", []),
+ put_almost_full(DatabaseDirAlmostFull, 1);
+ true ->
+ ok
+ end;
+set_alarm(_Alarm) ->
+ ok.
+
+clear_alarm({disk_almost_full, MntOn}) ->
+ IsDatabaseDir = is_database_dir(MntOn),
+ IsViewIndexDir = is_view_index_dir(MntOn),
+ <<DatabaseDirAlmostFull:1, ViewIndexDirAlmostFull:1>> = get_almost_full(),
+ if
+ IsDatabaseDir andalso IsViewIndexDir ->
+ couch_log:warning("database_dir and view_index_dir no longer almost full, rescinding countermeasures", []),
+ erase_almost_full();
+ IsDatabaseDir ->
+ couch_log:warning("database_dir no longer almost full, rescinding countermeasures", []),
+ put_almost_full(0, ViewIndexDirAlmostFull);
+ IsViewIndexDir ->
+ couch_log:warning("view_index_dir no longer almost full, rescinding countermeasures", []),
+ put_almost_full(DatabaseDirAlmostFull, 0);
+ true ->
+ ok
+ end;
+clear_alarm(_AlarmId) ->
+ ok.
+
+is_database_dir(MntOn) ->
+ same_device(config:get("couchdb", "database_dir"), MntOn).
+
+is_view_index_dir(MntOn) ->
+ same_device(config:get("couchdb", "view_index_dir"), MntOn).
+
+same_device(DirA, DirB) ->
+ case {device_id(DirA), device_id(DirB)} of
+ {{ok, DeviceId}, {ok, DeviceId}} ->
+ true;
+ _Else ->
+ false
+ end.
+
+device_id(Dir) ->
+ case file:read_file_info(Dir) of
+ {ok, FileInfo} ->
+ {ok, {FileInfo#file_info.minor_device, FileInfo#file_info.major_device}};
+ {error, Reason} ->
+ {error, Reason}
+ end.
+
+
+get_almost_full() ->
+ persistent_term:get(key(), <<0:2>>).
+
+put_almost_full(DatabaseDirAlmostFull, ViewIndexDirAlmostFull) ->
+ persistent_term:put(key(), <<DatabaseDirAlmostFull:1, ViewIndexDirAlmostFull:1>>).
+
+erase_almost_full() ->
+ persistent_term:erase(key()).
+
+key() ->
+ {?MODULE, almost_full}.
diff --git a/src/couch/src/couch_app.erl b/src/couch/src/couch_app.erl
index 8cd8c8482..1887d1451 100644
--- a/src/couch/src/couch_app.erl
+++ b/src/couch/src/couch_app.erl
@@ -21,6 +21,9 @@
]).
start(_Type, _) ->
+ %% register our alarm handler
+ gen_event:swap_handler(alarm_handler, {alarm_handler, swap}, {couch_alarm_handler, ok}),
+
case couch_sup:start_link() of
{ok, _} = Resp ->
{Time, _} = statistics(wall_clock),
diff --git a/src/fabric/src/fabric_doc_update.erl b/src/fabric/src/fabric_doc_update.erl
index 77b424911..695ab07e9 100644
--- a/src/fabric/src/fabric_doc_update.erl
+++ b/src/fabric/src/fabric_doc_update.erl
@@ -112,6 +112,8 @@ handle_message({bad_request, Msg}, _, _) ->
throw({bad_request, Msg});
handle_message({forbidden, Msg}, _, _) ->
throw({forbidden, Msg});
+handle_message({insufficient_storage, Msg}, _, _) ->
+ throw({insufficient_storage, Msg});
handle_message({request_entity_too_large, Entity}, _, _) ->
throw({request_entity_too_large, Entity}).
diff --git a/src/fabric/src/fabric_rpc.erl b/src/fabric/src/fabric_rpc.erl
index b781eea99..92f04e3fd 100644
--- a/src/fabric/src/fabric_rpc.erl
+++ b/src/fabric/src/fabric_rpc.erl
@@ -274,21 +274,27 @@ get_missing_revs(DbName, IdRevsList, Options) ->
with_db(DbName, Options, {couch_db, get_missing_revs, [IdRevsList]}).
update_docs(DbName, Docs0, Options) ->
- {Docs1, Type} =
- case couch_util:get_value(read_repair, Options) of
- NodeRevs when is_list(NodeRevs) ->
- Filtered = read_repair_filter(DbName, Docs0, NodeRevs, Options),
- {Filtered, ?REPLICATED_CHANGES};
- undefined ->
- X =
- case proplists:get_value(?REPLICATED_CHANGES, Options) of
- true -> ?REPLICATED_CHANGES;
- _ -> ?INTERACTIVE_EDIT
- end,
- {Docs0, X}
- end,
- Docs2 = make_att_readers(Docs1),
- with_db(DbName, Options, {couch_db, update_docs, [Docs2, Options, Type]}).
+ %% only if there's room
+ case couch_alarm_handler:database_dir_almost_full() of
+ true ->
+ rexi:reply({insufficient_storage, <<"database_dir almost full">>});
+ false ->
+ {Docs1, Type} =
+ case couch_util:get_value(read_repair, Options) of
+ NodeRevs when is_list(NodeRevs) ->
+ Filtered = read_repair_filter(DbName, Docs0, NodeRevs, Options),
+ {Filtered, ?REPLICATED_CHANGES};
+ undefined ->
+ X =
+ case proplists:get_value(?REPLICATED_CHANGES, Options) of
+ true -> ?REPLICATED_CHANGES;
+ _ -> ?INTERACTIVE_EDIT
+ end,
+ {Docs0, X}
+ end,
+ Docs2 = make_att_readers(Docs1),
+ with_db(DbName, Options, {couch_db, update_docs, [Docs2, Options, Type]})
+ end.
get_purge_seq(DbName, Options) ->
with_db(DbName, Options, {couch_db, get_purge_seq, []}).