You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ambari.apache.org by od...@apache.org on 2013/08/12 19:08:04 UTC

git commit: AMBARI-2867. WARNING error in nagios alert for resourcemanager. (Vitaly Brodetskyi via odiachenko)

Updated Branches:
  refs/heads/trunk f547305d2 -> dd35a6d54


AMBARI-2867. WARNING error in nagios alert for resourcemanager. (Vitaly Brodetskyi via odiachenko)


Project: http://git-wip-us.apache.org/repos/asf/incubator-ambari/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-ambari/commit/dd35a6d5
Tree: http://git-wip-us.apache.org/repos/asf/incubator-ambari/tree/dd35a6d5
Diff: http://git-wip-us.apache.org/repos/asf/incubator-ambari/diff/dd35a6d5

Branch: refs/heads/trunk
Commit: dd35a6d54413a35bac26cd0503483b371a66e9be
Parents: f547305
Author: Oleksandr Diachenko <od...@hortonworks.com>
Authored: Mon Aug 12 20:07:37 2013 +0300
Committer: Oleksandr Diachenko <od...@hortonworks.com>
Committed: Mon Aug 12 20:07:37 2013 +0300

----------------------------------------------------------------------
 .../hdp-nagios/files/check_nodemanager_health.sh   |  4 ++--
 .../check_resourcemanager_nodes_percentage.sh      | 17 +++++++++++++----
 2 files changed, 15 insertions(+), 6 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-ambari/blob/dd35a6d5/ambari-agent/src/main/puppet/modules/hdp-nagios/files/check_nodemanager_health.sh
----------------------------------------------------------------------
diff --git a/ambari-agent/src/main/puppet/modules/hdp-nagios/files/check_nodemanager_health.sh b/ambari-agent/src/main/puppet/modules/hdp-nagios/files/check_nodemanager_health.sh
index ca13909..2a26f4e 100644
--- a/ambari-agent/src/main/puppet/modules/hdp-nagios/files/check_nodemanager_health.sh
+++ b/ambari-agent/src/main/puppet/modules/hdp-nagios/files/check_nodemanager_health.sh
@@ -23,10 +23,10 @@ HOST=$1
 PORT=$2
 NODEMANAGER_URL="http://$HOST:$PORT/ws/v1/node/info"
 export PATH="/usr/bin:$PATH"
-RESPONSE=`curl $NODEMANAGER_URL`
+RESPONSE=`curl -s $NODEMANAGER_URL`
 if [[ "$RESPONSE" == *'"nodeHealthy":true'* ]]; then 
   echo "OK: nodemanager healthy true";
   exit 0;
 fi
 echo "CRITICAL: nodemanager healthy false";
-exit 2;
\ No newline at end of file
+exit 2;

http://git-wip-us.apache.org/repos/asf/incubator-ambari/blob/dd35a6d5/ambari-agent/src/main/puppet/modules/hdp-nagios/files/check_resourcemanager_nodes_percentage.sh
----------------------------------------------------------------------
diff --git a/ambari-agent/src/main/puppet/modules/hdp-nagios/files/check_resourcemanager_nodes_percentage.sh b/ambari-agent/src/main/puppet/modules/hdp-nagios/files/check_resourcemanager_nodes_percentage.sh
index 48a2aae..cc899fa 100644
--- a/ambari-agent/src/main/puppet/modules/hdp-nagios/files/check_resourcemanager_nodes_percentage.sh
+++ b/ambari-agent/src/main/puppet/modules/hdp-nagios/files/check_resourcemanager_nodes_percentage.sh
@@ -26,18 +26,27 @@ NODE_STATUS=$3
 WARN_PERCENT=$4
 CRIT_PERCENT=$5
 NODES="Nodes"
+
 RESOURCEMANAGER_URL="http://$HOST:$PORT/ws/v1/cluster/metrics"
 export PATH="/usr/bin:$PATH"
-RESPONSE=`curl $RESOURCEMANAGER_URL`
-#code below is parsing RESPONSE that we get from resourcemanager api, for number between "totalNodes": and ','
-TOTAL_NODES_NUM=`echo "$RESPONSE" | sed -nre 's/^.*"totalNodes":([[:digit:]]+).*$/\1/gp'`
+RESPONSE=`curl -s $RESOURCEMANAGER_URL`
+
+#code below is parsing RESPONSE that we get from resourcemanager api, for number between "activeNodes": and ','
+ACTIVE_NODES=`echo "$RESPONSE" | sed -nre 's/^.*"activeNodes":([[:digit:]]+).*$/\1/gp'`
+LOST_NODES=`echo "$RESPONSE" | sed -nre 's/^.*"lostNodes":([[:digit:]]+).*$/\1/gp'`
+UNHEALTHY_NODES=`echo "$RESPONSE" | sed -nre 's/^.*"unhealthyNodes":([[:digit:]]+).*$/\1/gp'`
+DECOMMISSIONED_NODES=`echo "$RESPONSE" | sed -nre 's/^.*"decommissionedNodes":([[:digit:]]+).*$/\1/gp'`
+REBOOTED_NODES=`echo "$RESPONSE" | sed -nre 's/^.*"rebootedNodes":([[:digit:]]+).*$/\1/gp'`
+
+TOTAL_NODES_NUM=$(($ACTIVE_NODES+$LOST_NODES+$UNHEALTHY_NODES+$DECOMMISSIONED_NODES+$REBOOTED_NODES))
 NODES_NUM=`echo "$RESPONSE" | sed -nre "s/^.*\"$NODE_STATUS$NODES\":([[:digit:]]+).*$/\1/gp"`
 PERCENT=$(($NODES_NUM*100/$TOTAL_NODES_NUM))
+
 if [[ "$PERCENT" -lt "$WARN_PERCENT" ]]; then
   echo "OK: total:<$TOTAL_NODES_NUM>, affected:<$NODES_NUM>"
   exit 0;
 elif [[ "$PERCENT" -lt "$CRIT_PERCENT" ]]; then
-  echo "WARN: total:<$TOTAL_NODES_NUM>, affected:<$NODES_NUM>"
+  echo "WARNING: total:<$TOTAL_NODES_NUM>, affected:<$NODES_NUM>"
   exit 1;
 else 
   echo "CRITICAL: total:<$TOTAL_NODES_NUM>, affected:<$NODES_NUM>"