You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@kudu.apache.org by al...@apache.org on 2021/12/02 22:11:18 UTC
[kudu] 01/02: [scripts] add hard memory limit for mini-cluster processes
This is an automated email from the ASF dual-hosted git repository.
alexey pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/kudu.git
commit 5932b9c35dd50df3412ad3a63aef27b6b41506e5
Author: Alexey Serbin <al...@apache.org>
AuthorDate: Tue Nov 30 16:55:09 2021 -0800
[scripts] add hard memory limit for mini-cluster processes
Before this patch, the start_kudu.sh script ran all the Kudu server
processes at the same node where every kudu-tserver and kudu-master
assumed it was the only Kudu process running there, so it would
automatically self-impose the hard limit to be 80% of all the memory
available. However, since there may be many of those, the mini-cluster
could end up in an OOM condition.
This patch addresses the issue, dividing all the available memory at
a node between the kudu-tserver processes the script starts, so they
all use up to 80% of all available memory (the same threshold is used
by the auto-detection logic on the amount of memory to consume for
--memory_limit_hard_bytes in Kudu servers). The kudu-master processes
are started without imposing the hard memory limit assuming they
usually don't consume a lot of memory: that's to allocate more memory
to kudu-tserver processes. This script is supposed to cover a limited
set of use cases automatically and it's always possible to add flags
setting or overriding the limits set by the script for kudu-master and
kudu-tserver processes via the -M/--master-flags and -T/--tserver-flags
command line options correspondingly.
I verified that the OOM condition that I saw when running a mini-cluster
in a GCP VM no longer happens with this patch.
Change-Id: I4de6c96fb0227554edbd5e69b29840f84e408326
Reviewed-on: http://gerrit.cloudera.org:8080/18059
Tested-by: Kudu Jenkins
Reviewed-by: Andrew Wong <aw...@cloudera.com>
Reviewed-by: Attila Bukor <ab...@apache.org>
---
src/kudu/scripts/start_kudu.sh | 58 ++++++++++++++++++++++++++++++++++++------
1 file changed, 50 insertions(+), 8 deletions(-)
diff --git a/src/kudu/scripts/start_kudu.sh b/src/kudu/scripts/start_kudu.sh
index db33d4e..cce9316 100755
--- a/src/kudu/scripts/start_kudu.sh
+++ b/src/kudu/scripts/start_kudu.sh
@@ -22,6 +22,7 @@
########################################################################
set -e
+set -o pipefail
ulimit -n 2048
function usage() {
@@ -166,10 +167,38 @@ function set_port_vars_and_print() {
echo " HTTP port $HTTP_PORT"
}
-pids=()
+# Return a flag to set the hard memory limit for the Kudu server processes
+# running at the same node. Each of the processes is able to set the hard
+# memory limit based on the total amount of memory available, but such a
+# provision assumes there is a single Kudu server process running at a node.
+# Since there is going to be NUM_TSERVERS kudu-tserver and NUM_MASTERS
+# kudu-master processes running, it's necessary to divide the available memory
+# among them.
+function get_memory_limit_hard_bytes_flag() {
+ local num_processes=$1
+ local mem_size_bytes=0
+ if [[ "$OSTYPE" =~ ^linux ]]; then
+ local mem_size_kb=$(grep -E '^MemTotal' /proc/meminfo | awk '{print $2}')
+ mem_size_bytes=$((mem_size_kb * 1024))
+ elif [[ "$OSTYPE" =~ ^darwin ]]; then
+ mem_size_bytes=$(sysctl hw.memsize | awk '{print $2}')
+ fi
+
+ # Do not set the limit for a non-recognized OS.
+ if [ $mem_size_bytes -eq 0 ]; then
+ echo ""
+ return
+ fi
-# Start master server function
+ # Allocate 80% of all available memory to be used by all the Kudu processes.
+ local mem_limit_bytes=$((mem_size_bytes * 4 / 5))
+ mem_limit_bytes=$((mem_limit_bytes / num_processes))
+ echo "--memory_limit_hard_bytes=$mem_limit_bytes"
+}
+
+pids=()
+# Start kudu-master process.
function start_master() {
create_dirs_and_set_vars $1
set_port_vars_and_print $1 $2 $3
@@ -183,14 +212,20 @@ function start_master() {
ARGS="$ARGS --unlock_unsafe_flags"
ARGS="$ARGS --webserver_port=$HTTP_PORT"
ARGS="$ARGS --webserver_interface=$IP"
- if [ -d "$WEBSERVER_DOC_ROOT" ]; then ARGS="$ARGS --webserver_doc_root=$WEBSERVER_DOC_ROOT"; fi
+ if [ -d "$WEBSERVER_DOC_ROOT" ]; then
+ ARGS="$ARGS --webserver_doc_root=$WEBSERVER_DOC_ROOT"
+ fi
+ # NOTE: a kudu-master process doesn't usually consume a lot of memory,
+ # so the memory hard limit isn't set for them; if kudu-master memory
+ # consumption becomes an issue, provide the necessary flags for
+ # kudu-master processing using the --master-flags/-M command line
+ # option
ARGS="$ARGS $EXTRA_MASTER_FLAGS"
$ARGS &
pids+=($!)
}
-# Start tablet server function
-
+# Start kudu-tserver process.
function start_tserver() {
create_dirs_and_set_vars $1
set_port_vars_and_print $1 $2 $3
@@ -203,8 +238,16 @@ function start_tserver() {
ARGS="$ARGS --unlock_unsafe_flags"
ARGS="$ARGS --webserver_port=$HTTP_PORT"
ARGS="$ARGS --webserver_interface=$IP"
- if [ -d "$WEBSERVER_DOC_ROOT" ]; then ARGS="$ARGS --webserver_doc_root=$WEBSERVER_DOC_ROOT"; fi
ARGS="$ARGS --tserver_master_addrs=$4"
+ if [ -d "$WEBSERVER_DOC_ROOT" ]; then
+ ARGS="$ARGS --webserver_doc_root=$WEBSERVER_DOC_ROOT"
+ fi
+
+ # If applicable, set the memory hard limit.
+ local mem_limit_flag=$(get_memory_limit_hard_bytes_flag $NUM_TSERVERS)
+ if [ -n $mem_limit_flag ]; then
+ ARGS="$ARGS $mem_limit_flag"
+ fi
ARGS="$ARGS $EXTRA_TSERVER_FLAGS"
$ARGS &
pids+=($!)
@@ -235,6 +278,5 @@ for i in $(seq 0 $((NUM_TSERVERS - 1))); do
start_tserver tserver-$i $TSERVER_RPC_PORT $TSERVER_HTTP_PORT $MASTER_ADDRESSES
done
-# Show status of started processes
-
+# Show the status of the started processes.
ps -wwo args -p ${pids[@]}