You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@kudu.apache.org by al...@apache.org on 2021/12/02 22:11:18 UTC
[kudu] 01/02: [scripts] add hard memory limit for mini-cluster processes

This is an automated email from the ASF dual-hosted git repository.

alexey pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/kudu.git

commit 5932b9c35dd50df3412ad3a63aef27b6b41506e5
Author: Alexey Serbin <al...@apache.org>
AuthorDate: Tue Nov 30 16:55:09 2021 -0800

    [scripts] add hard memory limit for mini-cluster processes
    
    Before this patch, the start_kudu.sh script ran all the Kudu server
    processes at the same node where every kudu-tserver and kudu-master
    assumed it was the only Kudu process running there, so it would
    automatically self-impose the hard limit to be 80% of all the memory
    available.  However, since there may be many of those, the mini-cluster
    could end up in an OOM condition.
    
    This patch addresses the issue, dividing all the available memory at
    a node between the kudu-tserver processes the script starts, so they
    all use up to 80% of all available memory (the same threshold is used
    by the auto-detection logic on the amount of memory to consume for
    --memory_limit_hard_bytes in Kudu servers).  The kudu-master processes
    are started  without imposing the hard memory limit assuming they
    usually don't consume a lot of memory: that's to allocate more memory
    to kudu-tserver processes.  This script is supposed to cover a limited
    set of use cases automatically and it's always possible to add flags
    setting or overriding the limits set by the script for kudu-master and
    kudu-tserver processes via the -M/--master-flags and -T/--tserver-flags
    command line options correspondingly.
    
    I verified that the OOM condition that I saw when running a mini-cluster
    in a GCP VM no longer happens with this patch.
    
    Change-Id: I4de6c96fb0227554edbd5e69b29840f84e408326
    Reviewed-on: http://gerrit.cloudera.org:8080/18059
    Tested-by: Kudu Jenkins
    Reviewed-by: Andrew Wong <aw...@cloudera.com>
    Reviewed-by: Attila Bukor <ab...@apache.org>
---
 src/kudu/scripts/start_kudu.sh | 58 ++++++++++++++++++++++++++++++++++++------
 1 file changed, 50 insertions(+), 8 deletions(-)

diff --git a/src/kudu/scripts/start_kudu.sh b/src/kudu/scripts/start_kudu.sh
index db33d4e..cce9316 100755
--- a/src/kudu/scripts/start_kudu.sh
+++ b/src/kudu/scripts/start_kudu.sh
@@ -22,6 +22,7 @@
 ########################################################################
 
 set -e
+set -o pipefail
 ulimit -n 2048
 
 function usage() {
@@ -166,10 +167,38 @@ function set_port_vars_and_print() {
   echo "  HTTP port $HTTP_PORT"
 }
 
-pids=()
+# Return a flag to set the hard memory limit for the Kudu server processes
+# running at the same node. Each of the processes is able to set the hard
+# memory limit based on the total amount of memory available, but such a
+# provision assumes there is a single Kudu server process running at a node.
+# Since there is going to be NUM_TSERVERS kudu-tserver and NUM_MASTERS
+# kudu-master processes running, it's necessary to divide the available memory
+# among them.
+function get_memory_limit_hard_bytes_flag() {
+  local num_processes=$1
+  local mem_size_bytes=0
+  if [[ "$OSTYPE" =~ ^linux ]]; then
+    local mem_size_kb=$(grep -E '^MemTotal' /proc/meminfo | awk '{print $2}')
+    mem_size_bytes=$((mem_size_kb * 1024))
+  elif [[ "$OSTYPE" =~ ^darwin ]]; then
+    mem_size_bytes=$(sysctl hw.memsize | awk '{print $2}')
+  fi
+
+  # Do not set the limit for a non-recognized OS.
+  if [ $mem_size_bytes -eq 0 ]; then
+    echo ""
+    return
+  fi
 
-# Start master server function
+  # Allocate 80% of all available memory to be used by all the Kudu processes.
+  local mem_limit_bytes=$((mem_size_bytes * 4 / 5))
+  mem_limit_bytes=$((mem_limit_bytes / num_processes))
+  echo "--memory_limit_hard_bytes=$mem_limit_bytes"
+}
+
+pids=()
 
+# Start kudu-master process.
 function start_master() {
   create_dirs_and_set_vars $1
   set_port_vars_and_print $1 $2 $3
@@ -183,14 +212,20 @@ function start_master() {
   ARGS="$ARGS --unlock_unsafe_flags"
   ARGS="$ARGS --webserver_port=$HTTP_PORT"
   ARGS="$ARGS --webserver_interface=$IP"
-  if [ -d "$WEBSERVER_DOC_ROOT" ]; then ARGS="$ARGS --webserver_doc_root=$WEBSERVER_DOC_ROOT"; fi
+  if [ -d "$WEBSERVER_DOC_ROOT" ]; then
+    ARGS="$ARGS --webserver_doc_root=$WEBSERVER_DOC_ROOT"
+  fi
+  # NOTE: a kudu-master process doesn't usually consume a lot of memory,
+  #       so the memory hard limit isn't set for them; if kudu-master memory
+  #       consumption becomes an issue, provide the necessary flags for
+  #       kudu-master processing using the --master-flags/-M command line
+  #       option
   ARGS="$ARGS $EXTRA_MASTER_FLAGS"
   $ARGS &
   pids+=($!)
 }
 
-# Start tablet server function
-
+# Start kudu-tserver process.
 function start_tserver() {
   create_dirs_and_set_vars $1
   set_port_vars_and_print $1 $2 $3
@@ -203,8 +238,16 @@ function start_tserver() {
   ARGS="$ARGS --unlock_unsafe_flags"
   ARGS="$ARGS --webserver_port=$HTTP_PORT"
   ARGS="$ARGS --webserver_interface=$IP"
-  if [ -d "$WEBSERVER_DOC_ROOT" ]; then ARGS="$ARGS --webserver_doc_root=$WEBSERVER_DOC_ROOT"; fi
   ARGS="$ARGS --tserver_master_addrs=$4"
+  if [ -d "$WEBSERVER_DOC_ROOT" ]; then
+    ARGS="$ARGS --webserver_doc_root=$WEBSERVER_DOC_ROOT"
+  fi
+
+  # If applicable, set the memory hard limit.
+  local mem_limit_flag=$(get_memory_limit_hard_bytes_flag $NUM_TSERVERS)
+  if [ -n $mem_limit_flag ]; then
+    ARGS="$ARGS $mem_limit_flag"
+  fi
   ARGS="$ARGS $EXTRA_TSERVER_FLAGS"
   $ARGS &
   pids+=($!)
@@ -235,6 +278,5 @@ for i in $(seq 0 $((NUM_TSERVERS - 1))); do
   start_tserver tserver-$i $TSERVER_RPC_PORT $TSERVER_HTTP_PORT $MASTER_ADDRESSES
 done
 
-# Show status of started processes
-
+# Show the status of the started processes.
 ps -wwo args -p ${pids[@]}