You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ambari.apache.org by vg...@apache.org on 2012/05/15 03:10:47 UTC
svn commit: r1338497 - in /incubator/ambari/branches/ambari-186: CHANGES.txt
hmc/puppet/modules/hdp-nagios/templates/hadoop-services.cfg.erb
mon_dashboard/src/addOns/nagios/scripts/nagios_alerts.php
Author: vgogate
Date: Tue May 15 01:10:46 2012
New Revision: 1338497
URL: http://svn.apache.org/viewvc?rev=1338497&view=rev
Log:
AMBARI-222. Remove the word alert from all the Nagios alerts descriptions by vgogate
Modified:
incubator/ambari/branches/ambari-186/CHANGES.txt
incubator/ambari/branches/ambari-186/hmc/puppet/modules/hdp-nagios/templates/hadoop-services.cfg.erb
incubator/ambari/branches/ambari-186/mon_dashboard/src/addOns/nagios/scripts/nagios_alerts.php
Modified: incubator/ambari/branches/ambari-186/CHANGES.txt
URL: http://svn.apache.org/viewvc/incubator/ambari/branches/ambari-186/CHANGES.txt?rev=1338497&r1=1338496&r2=1338497&view=diff
==============================================================================
--- incubator/ambari/branches/ambari-186/CHANGES.txt (original)
+++ incubator/ambari/branches/ambari-186/CHANGES.txt Tue May 15 01:10:46 2012
@@ -2,6 +2,8 @@ Ambari Change log
Release 0.x.x - unreleased
+ AMBARI-222. Remove the word alert from all the Nagios alerts descriptions. (vgogate)
+
AMBARI-221. Service fails to set its state to failed if a component fails to be acted upon (Hitesh via vgogate)
AMBARI-220. Alerts table semantic difference at different levels (vgogate)
Modified: incubator/ambari/branches/ambari-186/hmc/puppet/modules/hdp-nagios/templates/hadoop-services.cfg.erb
URL: http://svn.apache.org/viewvc/incubator/ambari/branches/ambari-186/hmc/puppet/modules/hdp-nagios/templates/hadoop-services.cfg.erb?rev=1338497&r1=1338496&r2=1338497&view=diff
==============================================================================
--- incubator/ambari/branches/ambari-186/hmc/puppet/modules/hdp-nagios/templates/hadoop-services.cfg.erb (original)
+++ incubator/ambari/branches/ambari-186/hmc/puppet/modules/hdp-nagios/templates/hadoop-services.cfg.erb Tue May 15 01:10:46 2012
@@ -3,7 +3,7 @@
define service {
hostgroup_name nagios-server
use generic-service
- service_description NAGIOS::Nagios status log staleness alert
+ service_description NAGIOS::Nagios status log staleness
check_command check_nagios!10!/var/nagios/status.dat!/usr/bin/nagios
normal_check_interval 5
retry_check_interval 0.5
@@ -14,8 +14,8 @@ define service {
define service {
hostgroup_name nagios-server
use generic-service
- service_description HDFS::Percent DataNodes storage full alert
- check_command check_aggregate!"DATANODE::Storage full alert"!10%!30%
+ service_description HDFS::Percent DataNodes storage full
+ check_command check_aggregate!"DATANODE::Storage full"!10%!30%
normal_check_interval 2
retry_check_interval 1
max_check_attempts 1
@@ -24,8 +24,8 @@ define service {
define service {
hostgroup_name nagios-server
use generic-service
- service_description HDFS::Percent DataNodes down alert
- check_command check_aggregate!"DATANODE::Process down alert"!10%!30%
+ service_description HDFS::Percent DataNodes down
+ check_command check_aggregate!"DATANODE::Process down"!10%!30%
normal_check_interval 0.5
retry_check_interval 0.25
max_check_attempts 3
@@ -35,8 +35,8 @@ define service {
define service {
hostgroup_name nagios-server
use generic-service
- service_description MAPREDUCE::Percent TaskTrackers down alert
- check_command check_aggregate!"TASKTRACKER::Process down alert"!10%!30%
+ service_description MAPREDUCE::Percent TaskTrackers down
+ check_command check_aggregate!"TASKTRACKER::Process down"!10%!30%
normal_check_interval 0.5
retry_check_interval 0.25
max_check_attempts 3
@@ -47,8 +47,8 @@ define service {
define service {
hostgroup_name nagios-server
use generic-service
- service_description ZOOKEEPER::Percent zookeeper servers down alert
- check_command check_aggregate!"ZKSERVERS::ZKSERVERS Process down alert"!35%!70%
+ service_description ZOOKEEPER::Percent zookeeper servers down
+ check_command check_aggregate!"ZKSERVERS::ZKSERVERS Process down"!35%!70%
normal_check_interval 0.5
retry_check_interval 0.25
max_check_attempts 3
@@ -60,8 +60,8 @@ define service {
define service {
hostgroup_name nagios-server
use generic-service
- service_description HBASE::Percent region servers down alert
- check_command check_aggregate!"REGIONSERVER::Process down alert"!10%!30%
+ service_description HBASE::Percent region servers down
+ check_command check_aggregate!"REGIONSERVER::Process down"!10%!30%
normal_check_interval 0.5
retry_check_interval 0.25
max_check_attempts 3
@@ -74,7 +74,7 @@ define service {
define service {
hostgroup_name ganglia-server
use generic-service
- service_description GANGLIA::Ganglia [gmetad] Process down alert
+ service_description GANGLIA::Ganglia [gmetad] Process down
check_command check_tcp!8651!-w 1 -c 1
normal_check_interval 0.25
retry_check_interval 0.25
@@ -127,7 +127,7 @@ define service {
define service {
hostgroup_name namenode
use generic-service
- service_description NAMENODE::Namenode Web UI down alert
+ service_description NAMENODE::Namenode Web UI down
check_command check_webui!namenode
normal_check_interval 1
retry_check_interval 1
@@ -137,7 +137,7 @@ define service {
define service {
hostgroup_name namenode
use generic-service
- service_description NAMENODE::Namenode Edit logs directory status alert
+ service_description NAMENODE::Namenode Edit logs directory status
check_command check_name_dir_status!50070
normal_check_interval 0.5
retry_check_interval 0.5
@@ -147,7 +147,7 @@ define service {
define service {
hostgroup_name namenode
use generic-service
- service_description NAMENODE::Namenode Host CPU utilization alert
+ service_description NAMENODE::Namenode Host CPU utilization
check_command check_cpu!200%!250%
normal_check_interval 5
retry_check_interval 2
@@ -157,7 +157,7 @@ define service {
define service {
hostgroup_name namenode
use generic-service
- service_description NAMENODE::Namenode Process down alert
+ service_description NAMENODE::Namenode Process down
check_command check_tcp!8020!-w 1 -c 1
normal_check_interval 0.5
retry_check_interval 0.25
@@ -167,7 +167,7 @@ define service {
define service {
hostgroup_name namenode
use generic-service
- service_description HDFS::Corrupt/Missing blocks alert
+ service_description HDFS::Corrupt/Missing blocks
check_command check_hdfs_blocks!50070!0%!0%
normal_check_interval 2
retry_check_interval 1
@@ -177,7 +177,7 @@ define service {
define service {
hostgroup_name namenode
use generic-service
- service_description HDFS::HDFS Capacity utilization alert
+ service_description HDFS::HDFS Capacity utilization
check_command check_hdfs_capacity!50070!80%!90%
normal_check_interval 10
retry_check_interval 1
@@ -187,7 +187,7 @@ define service {
define service {
hostgroup_name namenode
use generic-service
- service_description HDFS::Namenode RPC Latency alert
+ service_description HDFS::Namenode RPC Latency
check_command check_rpcq_latency!NameNode!50070!3000!5000
normal_check_interval 5
retry_check_interval 1
@@ -200,7 +200,7 @@ define service {
define service {
hostgroup_name jobtracker
use generic-service
- service_description JOBTRACKER::JobTracker Web UI down alert
+ service_description JOBTRACKER::JobTracker Web UI down
check_command check_webui!jobtracker
normal_check_interval 1
retry_check_interval 1
@@ -210,7 +210,7 @@ define service {
define service {
hostgroup_name jobtracker
use generic-service
- service_description JOBTRACKER::JobHistory Web UI down alert
+ service_description JOBTRACKER::JobHistory Web UI down
check_command check_webui!jobhistory
normal_check_interval 1
retry_check_interval 1
@@ -220,7 +220,7 @@ define service {
define service {
hostgroup_name jobtracker
use generic-service
- service_description JOBTRACKER::Jobtracker CPU utilization alert
+ service_description JOBTRACKER::Jobtracker CPU utilization
check_command check_cpu!200%!250%
normal_check_interval 5
retry_check_interval 2
@@ -230,7 +230,7 @@ define service {
define service {
hostgroup_name jobtracker
use generic-service
- service_description JOBTRACKER::Jobtracker Process down alert
+ service_description JOBTRACKER::Jobtracker Process down
check_command check_tcp!50030!-w 1 -c 1
normal_check_interval 0.5
retry_check_interval 0.25
@@ -240,7 +240,7 @@ define service {
define service {
hostgroup_name jobtracker
use generic-service
- service_description MAPREDUCE::JobTracker RPC Latency alert
+ service_description MAPREDUCE::JobTracker RPC Latency
check_command check_rpcq_latency!JobTracker!50030!3000!5000
normal_check_interval 5
retry_check_interval 1
@@ -253,7 +253,7 @@ define service {
define service {
hostgroup_name slaves
use generic-service
- service_description DATANODE::Process down alert
+ service_description DATANODE::Process down
check_command check_tcp!50010!-w 1 -c 1
normal_check_interval 1
retry_check_interval 0.5
@@ -263,7 +263,7 @@ define service {
define service {
hostgroup_name slaves
use generic-service
- service_description DATANODE::Storage full alert
+ service_description DATANODE::Storage full
check_command check_datanode_storage!50075!90%!90%
normal_check_interval 5
retry_check_interval 1
@@ -274,7 +274,7 @@ define service {
define service {
hostgroup_name slaves
use generic-service
- service_description TASKTRACKER::Process down alert
+ service_description TASKTRACKER::Process down
check_command check_tcp!50060!-w 1 -c 1
normal_check_interval 1
retry_check_interval 0.5
@@ -287,7 +287,7 @@ define service {
define service {
hostgroup_name region-servers
use generic-service
- service_description REGIONSERVER::Process down alert
+ service_description REGIONSERVER::Process down
check_command check_tcp!60020!-w 1 -c 1
normal_check_interval 1
retry_check_interval 0.5
@@ -300,7 +300,7 @@ define service {
define service {
hostgroup_name zookeeper-servers
use generic-service
- service_description ZKSERVERS::ZKSERVERS Process down alert
+ service_description ZKSERVERS::ZKSERVERS Process down
check_command check_tcp!2181!-w 1 -c 1
normal_check_interval 1
retry_check_interval 0.5
@@ -313,7 +313,7 @@ define service {
define service {
hostgroup_name hbasemaster
use generic-service
- service_description HBASEMASTER::HBase Web UI down alert
+ service_description HBASEMASTER::HBase Web UI down
check_command check_webui!hbase
normal_check_interval 1
retry_check_interval 1
@@ -323,7 +323,7 @@ define service {
define service {
hostgroup_name hbasemaster
use generic-service
- service_description HBASEMASTER::HBaseMaster CPU utilization alert
+ service_description HBASEMASTER::HBaseMaster CPU utilization
check_command check_cpu!200%!250%
normal_check_interval 5
retry_check_interval 2
@@ -333,7 +333,7 @@ define service {
define service {
hostgroup_name hbasemaster
use generic-service
- service_description HBASEMASTER::HBaseMaster Process down alert
+ service_description HBASEMASTER::HBaseMaster Process down
check_command check_tcp!60000!-w 1 -c 1
normal_check_interval 0.5
retry_check_interval 0.25
@@ -346,7 +346,7 @@ define service {
define service {
hostgroup_name hiveserver
use generic-service
- service_description HIVE-METASTORE::HIVE-METASTORE status check alert
+ service_description HIVE-METASTORE::HIVE-METASTORE status check
check_command check_hive_metastore_status!9083
normal_check_interval 0.5
retry_check_interval 0.5
@@ -358,7 +358,7 @@ define service {
define service {
hostgroup_name oozie-server
use generic-service
- service_description OOZIE::Oozie status check alert
+ service_description OOZIE::Oozie status check
check_command check_oozie_status!11000!<%=scope.function_hdp_template_var("java32_home") %>
normal_check_interval 1
retry_check_interval 1
@@ -370,7 +370,7 @@ define service {
define service {
hostgroup_name templeton-server
use generic-service
- service_description TEMPLETON::Templeton status check alert
+ service_description TEMPLETON::Templeton status check
check_command check_templeton_status!50111!v1
normal_check_interval 1
retry_check_interval 0.5
Modified: incubator/ambari/branches/ambari-186/mon_dashboard/src/addOns/nagios/scripts/nagios_alerts.php
URL: http://svn.apache.org/viewvc/incubator/ambari/branches/ambari-186/mon_dashboard/src/addOns/nagios/scripts/nagios_alerts.php?rev=1338497&r1=1338496&r2=1338497&view=diff
==============================================================================
--- incubator/ambari/branches/ambari-186/mon_dashboard/src/addOns/nagios/scripts/nagios_alerts.php (original)
+++ incubator/ambari/branches/ambari-186/mon_dashboard/src/addOns/nagios/scripts/nagios_alerts.php Tue May 15 01:10:46 2012
@@ -91,13 +91,13 @@ function hdp_mon_generate_response( $res
define ("warn", "1");
define ("critical", "2");
- define ("HDFS_SERVICE_CHECK", "NAMENODE::Namenode Process down alert");
- define ("MAPREDUCE_SERVICE_CHECK", "JOBTRACKER::Jobtracker Process down alert");
- define ("HBASE_SERVICE_CHECK", "HBASEMASTER::HBaseMaster Process down alert");
- define ("ZOOKEEPER_SERVICE_CHECK", "ZOOKEEPER::Percent zookeeper servers down alert");
- define ("HIVE_METASTORE_SERVICE_CHECK", "HIVE-METASTORE::HIVE-METASTORE status check alert");
- define ("OOZIE_SERVICE_CHECK", "OOZIE::Oozie status check alert");
- define ("TEMPLETON_SERVICE_CHECK", "TEMPLETON::Templeton status check alert");
+ define ("HDFS_SERVICE_CHECK", "NAMENODE::Namenode Process down");
+ define ("MAPREDUCE_SERVICE_CHECK", "JOBTRACKER::Jobtracker Process down");
+ define ("HBASE_SERVICE_CHECK", "HBASEMASTER::HBaseMaster Process down");
+ define ("ZOOKEEPER_SERVICE_CHECK", "ZOOKEEPER::Percent zookeeper servers down");
+ define ("HIVE_METASTORE_SERVICE_CHECK", "HIVE-METASTORE::HIVE-METASTORE status check");
+ define ("OOZIE_SERVICE_CHECK", "OOZIE::Oozie status check");
+ define ("TEMPLETON_SERVICE_CHECK", "TEMPLETON::Templeton status check");
$status_file="/var/nagios/status.dat";
$q1="";