You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ambari.apache.org by od...@apache.org on 2013/08/12 19:08:04 UTC
git commit: AMBARI-2867. WARNING error in nagios alert for
resourcemanager. (Vitaly Brodetskyi via odiachenko)
Updated Branches:
refs/heads/trunk f547305d2 -> dd35a6d54
AMBARI-2867. WARNING error in nagios alert for resourcemanager. (Vitaly Brodetskyi via odiachenko)
Project: http://git-wip-us.apache.org/repos/asf/incubator-ambari/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-ambari/commit/dd35a6d5
Tree: http://git-wip-us.apache.org/repos/asf/incubator-ambari/tree/dd35a6d5
Diff: http://git-wip-us.apache.org/repos/asf/incubator-ambari/diff/dd35a6d5
Branch: refs/heads/trunk
Commit: dd35a6d54413a35bac26cd0503483b371a66e9be
Parents: f547305
Author: Oleksandr Diachenko <od...@hortonworks.com>
Authored: Mon Aug 12 20:07:37 2013 +0300
Committer: Oleksandr Diachenko <od...@hortonworks.com>
Committed: Mon Aug 12 20:07:37 2013 +0300
----------------------------------------------------------------------
.../hdp-nagios/files/check_nodemanager_health.sh | 4 ++--
.../check_resourcemanager_nodes_percentage.sh | 17 +++++++++++++----
2 files changed, 15 insertions(+), 6 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-ambari/blob/dd35a6d5/ambari-agent/src/main/puppet/modules/hdp-nagios/files/check_nodemanager_health.sh
----------------------------------------------------------------------
diff --git a/ambari-agent/src/main/puppet/modules/hdp-nagios/files/check_nodemanager_health.sh b/ambari-agent/src/main/puppet/modules/hdp-nagios/files/check_nodemanager_health.sh
index ca13909..2a26f4e 100644
--- a/ambari-agent/src/main/puppet/modules/hdp-nagios/files/check_nodemanager_health.sh
+++ b/ambari-agent/src/main/puppet/modules/hdp-nagios/files/check_nodemanager_health.sh
@@ -23,10 +23,10 @@ HOST=$1
PORT=$2
NODEMANAGER_URL="http://$HOST:$PORT/ws/v1/node/info"
export PATH="/usr/bin:$PATH"
-RESPONSE=`curl $NODEMANAGER_URL`
+RESPONSE=`curl -s $NODEMANAGER_URL`
if [[ "$RESPONSE" == *'"nodeHealthy":true'* ]]; then
echo "OK: nodemanager healthy true";
exit 0;
fi
echo "CRITICAL: nodemanager healthy false";
-exit 2;
\ No newline at end of file
+exit 2;
http://git-wip-us.apache.org/repos/asf/incubator-ambari/blob/dd35a6d5/ambari-agent/src/main/puppet/modules/hdp-nagios/files/check_resourcemanager_nodes_percentage.sh
----------------------------------------------------------------------
diff --git a/ambari-agent/src/main/puppet/modules/hdp-nagios/files/check_resourcemanager_nodes_percentage.sh b/ambari-agent/src/main/puppet/modules/hdp-nagios/files/check_resourcemanager_nodes_percentage.sh
index 48a2aae..cc899fa 100644
--- a/ambari-agent/src/main/puppet/modules/hdp-nagios/files/check_resourcemanager_nodes_percentage.sh
+++ b/ambari-agent/src/main/puppet/modules/hdp-nagios/files/check_resourcemanager_nodes_percentage.sh
@@ -26,18 +26,27 @@ NODE_STATUS=$3
WARN_PERCENT=$4
CRIT_PERCENT=$5
NODES="Nodes"
+
RESOURCEMANAGER_URL="http://$HOST:$PORT/ws/v1/cluster/metrics"
export PATH="/usr/bin:$PATH"
-RESPONSE=`curl $RESOURCEMANAGER_URL`
-#code below is parsing RESPONSE that we get from resourcemanager api, for number between "totalNodes": and ','
-TOTAL_NODES_NUM=`echo "$RESPONSE" | sed -nre 's/^.*"totalNodes":([[:digit:]]+).*$/\1/gp'`
+RESPONSE=`curl -s $RESOURCEMANAGER_URL`
+
+#code below is parsing RESPONSE that we get from resourcemanager api, for number between "activeNodes": and ','
+ACTIVE_NODES=`echo "$RESPONSE" | sed -nre 's/^.*"activeNodes":([[:digit:]]+).*$/\1/gp'`
+LOST_NODES=`echo "$RESPONSE" | sed -nre 's/^.*"lostNodes":([[:digit:]]+).*$/\1/gp'`
+UNHEALTHY_NODES=`echo "$RESPONSE" | sed -nre 's/^.*"unhealthyNodes":([[:digit:]]+).*$/\1/gp'`
+DECOMMISSIONED_NODES=`echo "$RESPONSE" | sed -nre 's/^.*"decommissionedNodes":([[:digit:]]+).*$/\1/gp'`
+REBOOTED_NODES=`echo "$RESPONSE" | sed -nre 's/^.*"rebootedNodes":([[:digit:]]+).*$/\1/gp'`
+
+TOTAL_NODES_NUM=$(($ACTIVE_NODES+$LOST_NODES+$UNHEALTHY_NODES+$DECOMMISSIONED_NODES+$REBOOTED_NODES))
NODES_NUM=`echo "$RESPONSE" | sed -nre "s/^.*\"$NODE_STATUS$NODES\":([[:digit:]]+).*$/\1/gp"`
PERCENT=$(($NODES_NUM*100/$TOTAL_NODES_NUM))
+
if [[ "$PERCENT" -lt "$WARN_PERCENT" ]]; then
echo "OK: total:<$TOTAL_NODES_NUM>, affected:<$NODES_NUM>"
exit 0;
elif [[ "$PERCENT" -lt "$CRIT_PERCENT" ]]; then
- echo "WARN: total:<$TOTAL_NODES_NUM>, affected:<$NODES_NUM>"
+ echo "WARNING: total:<$TOTAL_NODES_NUM>, affected:<$NODES_NUM>"
exit 1;
else
echo "CRITICAL: total:<$TOTAL_NODES_NUM>, affected:<$NODES_NUM>"