You are viewing a plain text version of this content. The canonical link for it is here.
Posted to notifications@accumulo.apache.org by GitBox <gi...@apache.org> on 2022/01/11 20:16:11 UTC

[GitHub] [accumulo-testing] DomGarguilo commented on a change in pull request #184: Fix broken agitator scripts

DomGarguilo commented on a change in pull request #184:
URL: https://github.com/apache/accumulo-testing/pull/184#discussion_r782494051



##########
File path: bin/agitator
##########
@@ -30,6 +30,80 @@ Possible commands:
 EOF
 }
 
+# Starts a app specific agitator
+# usage: start_app_agitator app_name kill_sleep_time restart_sleep_time min_kill max_kill start_cmd kill_cmd
+# Requires that a list of hosts to be set in $APP_HOSTS
+function start_app_agitator() {
+  local app_name=$1
+  local kill_sleep_time=$2
+  local restart_sleep_time=$3
+  local min_kill=$4
+  local max_kill=$5
+  local start_cmd=$6
+  local kill_cmd=$7
+  local hosts_array; readarray -td' ' hosts_array < <(get_app_hosts "$app_name")
+  local num_hosts=${#hosts_array[@]}
+  local node_to_kill
+  nodes_to_kill_array=()
+  local T
+  local ENV_VARS="ACCUMULO_HOME=$ACCUMULO_HOME ZOOKEEPER_HOME=$ZOOKEEPER_HOME HADOOP_HOME=$HADOOP_HOME JAVA_HOME=$JAVA_HOME"
+
+  if (( num_hosts == 0 )); then
+    echo "ERROR: No hosts were found in env for $app_name"
+    exit 1
+  fi
+  if (( max_kill > num_hosts )); then
+      echo "ERROR: Max kill $max_kill greater then number of hosts $num_hosts"
+      exit 1
+    fi
+
+  T="$(date +'%Y%m%d %H:%M:%S')"
+  echo "$T Starting $app_name agitation $kill_sleep_time $restart_sleep_time $min_kill $max_kill for ${hosts_array[*]}"
+  while true; do
+    echo "$T Sleeping for $kill_sleep_time minutes"
+    sleep $((kill_sleep_time * 60))
+
+    T="$(date +'%Y%m%d %H:%M:%S')"
+    if ((max_kill == 1)) ; then
+      node_to_kill=${hosts_array[0]}
+      echo "$T Killing $app_name at $node_to_kill"
+      ssh "$node_to_kill" "$kill_cmd"
+    else
+      local num_to_kill=$((min_kill + RANDOM % max_kill))
+      # get the random nodes to kill
+      local count=0
+      while [[ $count -lt $num_to_kill ]]; do
+        randomHostIndex=$((1 + RANDOM % num_hosts))
+        node_to_kill=${hosts_array[randomHostIndex]}
+        # only add host to the array if its not already there
+        if [[ ! " ${nodes_to_kill_array[*]} " =~ $node_to_kill ]]; then
+          nodes_to_kill_array[count]=$node_to_kill
+        fi
+        count=${#nodes_to_kill_array[@]}
+      done
+      echo "$T Killing $count $app_name nodes"
+      for i in "${nodes_to_kill_array[@]}"; do
+        ssh "$i" "$kill_cmd"
+      done
+    fi
+
+    T="$(date +'%Y%m%d %H:%M:%S')"
+    echo "$T Sleeping for $restart_sleep_time minutes."
+    sleep $((restart_sleep_time * 60))
+
+    T="$(date +'%Y%m%d %H:%M:%S')"
+    if ((max_kill == 1)) ; then
+      echo "$T Restarting $app_name at $node_to_kill"
+      ssh "$node_to_kill" "bash -c '${ENV_VARS} $start_cmd'"
+    else
+      for i in "${nodes_to_kill_array[@]}"; do
+        echo "$T Restarting $app_name node at ${i}"
+        ssh "$i" "bash -c '${ENV_VARS} $start_cmd'"
+      done
+    fi
+  done
+}
+
 function start_agitator() {
   ## check that pssh is installed, falling back to parallel-ssh if needed
   ## make sure to export it, so it can be seen inside the agitator perl script

Review comment:
       Might need to remove/edit this comment to remove reference to the perl script.




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: notifications-unsubscribe@accumulo.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org