You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ambari.apache.org by vg...@apache.org on 2012/05/15 03:10:47 UTC

svn commit: r1338497 - in /incubator/ambari/branches/ambari-186: CHANGES.txt hmc/puppet/modules/hdp-nagios/templates/hadoop-services.cfg.erb mon_dashboard/src/addOns/nagios/scripts/nagios_alerts.php

Author: vgogate
Date: Tue May 15 01:10:46 2012
New Revision: 1338497

URL: http://svn.apache.org/viewvc?rev=1338497&view=rev
Log:
AMBARI-222. Remove the word alert from all the Nagios alerts descriptions by vgogate

Modified:
    incubator/ambari/branches/ambari-186/CHANGES.txt
    incubator/ambari/branches/ambari-186/hmc/puppet/modules/hdp-nagios/templates/hadoop-services.cfg.erb
    incubator/ambari/branches/ambari-186/mon_dashboard/src/addOns/nagios/scripts/nagios_alerts.php

Modified: incubator/ambari/branches/ambari-186/CHANGES.txt
URL: http://svn.apache.org/viewvc/incubator/ambari/branches/ambari-186/CHANGES.txt?rev=1338497&r1=1338496&r2=1338497&view=diff
==============================================================================
--- incubator/ambari/branches/ambari-186/CHANGES.txt (original)
+++ incubator/ambari/branches/ambari-186/CHANGES.txt Tue May 15 01:10:46 2012
@@ -2,6 +2,8 @@ Ambari Change log
 
 Release 0.x.x - unreleased
 
+  AMBARI-222. Remove the word alert from all the Nagios alerts descriptions. (vgogate)
+
   AMBARI-221. Service fails to set its state to failed if a component fails to be acted upon (Hitesh via vgogate)
 
   AMBARI-220. Alerts table semantic difference at different levels (vgogate)

Modified: incubator/ambari/branches/ambari-186/hmc/puppet/modules/hdp-nagios/templates/hadoop-services.cfg.erb
URL: http://svn.apache.org/viewvc/incubator/ambari/branches/ambari-186/hmc/puppet/modules/hdp-nagios/templates/hadoop-services.cfg.erb?rev=1338497&r1=1338496&r2=1338497&view=diff
==============================================================================
--- incubator/ambari/branches/ambari-186/hmc/puppet/modules/hdp-nagios/templates/hadoop-services.cfg.erb (original)
+++ incubator/ambari/branches/ambari-186/hmc/puppet/modules/hdp-nagios/templates/hadoop-services.cfg.erb Tue May 15 01:10:46 2012
@@ -3,7 +3,7 @@
 define service {        
         hostgroup_name          nagios-server        
         use                     generic-service
-        service_description     NAGIOS::Nagios status log staleness alert
+        service_description     NAGIOS::Nagios status log staleness
         check_command           check_nagios!10!/var/nagios/status.dat!/usr/bin/nagios
         normal_check_interval   5
         retry_check_interval    0.5
@@ -14,8 +14,8 @@ define service {        
 define service {
         hostgroup_name          nagios-server
         use                     generic-service
-        service_description     HDFS::Percent DataNodes storage full alert
-        check_command           check_aggregate!"DATANODE::Storage full alert"!10%!30%
+        service_description     HDFS::Percent DataNodes storage full
+        check_command           check_aggregate!"DATANODE::Storage full"!10%!30%
         normal_check_interval   2
         retry_check_interval    1 
         max_check_attempts      1
@@ -24,8 +24,8 @@ define service {
 define service {
         hostgroup_name          nagios-server
         use                     generic-service
-        service_description     HDFS::Percent DataNodes down alert
-        check_command           check_aggregate!"DATANODE::Process down alert"!10%!30%
+        service_description     HDFS::Percent DataNodes down
+        check_command           check_aggregate!"DATANODE::Process down"!10%!30%
         normal_check_interval   0.5
         retry_check_interval    0.25
         max_check_attempts      3
@@ -35,8 +35,8 @@ define service {
 define service {
         hostgroup_name          nagios-server
         use                     generic-service
-        service_description     MAPREDUCE::Percent TaskTrackers down alert
-        check_command           check_aggregate!"TASKTRACKER::Process down alert"!10%!30%
+        service_description     MAPREDUCE::Percent TaskTrackers down
+        check_command           check_aggregate!"TASKTRACKER::Process down"!10%!30%
         normal_check_interval   0.5
         retry_check_interval    0.25
         max_check_attempts      3
@@ -47,8 +47,8 @@ define service {
 define service {
         hostgroup_name          nagios-server
         use                     generic-service
-        service_description     ZOOKEEPER::Percent zookeeper servers down alert
-        check_command           check_aggregate!"ZKSERVERS::ZKSERVERS Process down alert"!35%!70%
+        service_description     ZOOKEEPER::Percent zookeeper servers down
+        check_command           check_aggregate!"ZKSERVERS::ZKSERVERS Process down"!35%!70%
         normal_check_interval   0.5
         retry_check_interval    0.25
         max_check_attempts      3
@@ -60,8 +60,8 @@ define service {
 define service {
         hostgroup_name          nagios-server
         use                     generic-service
-        service_description     HBASE::Percent region servers down alert
-        check_command           check_aggregate!"REGIONSERVER::Process down alert"!10%!30%
+        service_description     HBASE::Percent region servers down
+        check_command           check_aggregate!"REGIONSERVER::Process down"!10%!30%
         normal_check_interval   0.5
         retry_check_interval    0.25
         max_check_attempts      3
@@ -74,7 +74,7 @@ define service {
 define service {
         hostgroup_name          ganglia-server
         use                     generic-service
-        service_description     GANGLIA::Ganglia [gmetad] Process down alert
+        service_description     GANGLIA::Ganglia [gmetad] Process down
         check_command           check_tcp!8651!-w 1 -c 1
         normal_check_interval   0.25
         retry_check_interval    0.25
@@ -127,7 +127,7 @@ define service {
 define service {
         hostgroup_name          namenode
         use                     generic-service
-        service_description     NAMENODE::Namenode Web UI down alert
+        service_description     NAMENODE::Namenode Web UI down
         check_command           check_webui!namenode
         normal_check_interval   1
         retry_check_interval    1
@@ -137,7 +137,7 @@ define service {
 define service {
         hostgroup_name          namenode
         use                     generic-service
-        service_description     NAMENODE::Namenode Edit logs directory status alert
+        service_description     NAMENODE::Namenode Edit logs directory status
         check_command           check_name_dir_status!50070
         normal_check_interval   0.5
         retry_check_interval    0.5
@@ -147,7 +147,7 @@ define service {
 define service {        
         hostgroup_name          namenode        
         use                     generic-service
-        service_description     NAMENODE::Namenode Host CPU utilization alert
+        service_description     NAMENODE::Namenode Host CPU utilization
         check_command           check_cpu!200%!250%
         normal_check_interval   5
         retry_check_interval    2 
@@ -157,7 +157,7 @@ define service {        
 define service {
         hostgroup_name          namenode
         use                     generic-service
-        service_description     NAMENODE::Namenode Process down alert
+        service_description     NAMENODE::Namenode Process down
         check_command           check_tcp!8020!-w 1 -c 1
         normal_check_interval   0.5
         retry_check_interval    0.25
@@ -167,7 +167,7 @@ define service {
 define service {
         hostgroup_name          namenode
         use                     generic-service
-        service_description     HDFS::Corrupt/Missing blocks alert
+        service_description     HDFS::Corrupt/Missing blocks
         check_command           check_hdfs_blocks!50070!0%!0%
         normal_check_interval   2
         retry_check_interval    1 
@@ -177,7 +177,7 @@ define service {
 define service {
         hostgroup_name          namenode
         use                     generic-service
-        service_description     HDFS::HDFS Capacity utilization alert
+        service_description     HDFS::HDFS Capacity utilization
         check_command           check_hdfs_capacity!50070!80%!90%
         normal_check_interval   10
         retry_check_interval    1 
@@ -187,7 +187,7 @@ define service {
 define service {
         hostgroup_name          namenode
         use                     generic-service
-        service_description     HDFS::Namenode RPC Latency alert
+        service_description     HDFS::Namenode RPC Latency
         check_command           check_rpcq_latency!NameNode!50070!3000!5000
         normal_check_interval   5
         retry_check_interval    1 
@@ -200,7 +200,7 @@ define service {
 define service {
         hostgroup_name          jobtracker
         use                     generic-service
-        service_description     JOBTRACKER::JobTracker Web UI down alert
+        service_description     JOBTRACKER::JobTracker Web UI down
         check_command           check_webui!jobtracker
         normal_check_interval   1
         retry_check_interval    1
@@ -210,7 +210,7 @@ define service {
 define service {
         hostgroup_name          jobtracker
         use                     generic-service
-        service_description     JOBTRACKER::JobHistory Web UI down alert
+        service_description     JOBTRACKER::JobHistory Web UI down
         check_command           check_webui!jobhistory
         normal_check_interval   1
         retry_check_interval    1
@@ -220,7 +220,7 @@ define service {
 define service {
         hostgroup_name          jobtracker
         use                     generic-service
-        service_description     JOBTRACKER::Jobtracker CPU utilization alert
+        service_description     JOBTRACKER::Jobtracker CPU utilization
         check_command           check_cpu!200%!250%
         normal_check_interval   5
         retry_check_interval    2 
@@ -230,7 +230,7 @@ define service {
 define service {
         hostgroup_name          jobtracker
         use                     generic-service
-        service_description     JOBTRACKER::Jobtracker Process down alert
+        service_description     JOBTRACKER::Jobtracker Process down
         check_command           check_tcp!50030!-w 1 -c 1
         normal_check_interval   0.5
         retry_check_interval    0.25
@@ -240,7 +240,7 @@ define service {
 define service {
         hostgroup_name          jobtracker
         use                     generic-service
-        service_description     MAPREDUCE::JobTracker RPC Latency alert
+        service_description     MAPREDUCE::JobTracker RPC Latency
         check_command           check_rpcq_latency!JobTracker!50030!3000!5000
         normal_check_interval   5
         retry_check_interval    1 
@@ -253,7 +253,7 @@ define service {
 define service {
         hostgroup_name          slaves
         use                     generic-service
-        service_description     DATANODE::Process down alert
+        service_description     DATANODE::Process down
         check_command           check_tcp!50010!-w 1 -c 1
         normal_check_interval   1
         retry_check_interval    0.5
@@ -263,7 +263,7 @@ define service {
 define service {
         hostgroup_name          slaves
         use                     generic-service
-        service_description     DATANODE::Storage full alert
+        service_description     DATANODE::Storage full
         check_command           check_datanode_storage!50075!90%!90%
         normal_check_interval   5
         retry_check_interval    1
@@ -274,7 +274,7 @@ define service {
 define service {
         hostgroup_name          slaves
         use                     generic-service
-        service_description     TASKTRACKER::Process down alert
+        service_description     TASKTRACKER::Process down
         check_command           check_tcp!50060!-w 1 -c 1
         normal_check_interval   1
         retry_check_interval    0.5
@@ -287,7 +287,7 @@ define service {
 define service {
         hostgroup_name          region-servers
         use                     generic-service
-        service_description     REGIONSERVER::Process down alert
+        service_description     REGIONSERVER::Process down
         check_command           check_tcp!60020!-w 1 -c 1
         normal_check_interval   1
         retry_check_interval    0.5
@@ -300,7 +300,7 @@ define service {
 define service {
         hostgroup_name          zookeeper-servers
         use                     generic-service
-        service_description     ZKSERVERS::ZKSERVERS Process down alert
+        service_description     ZKSERVERS::ZKSERVERS Process down
         check_command           check_tcp!2181!-w 1 -c 1
         normal_check_interval   1
         retry_check_interval    0.5
@@ -313,7 +313,7 @@ define service {
 define service {
         hostgroup_name          hbasemaster
         use                     generic-service
-        service_description     HBASEMASTER::HBase Web UI down alert
+        service_description     HBASEMASTER::HBase Web UI down
         check_command           check_webui!hbase
         normal_check_interval   1
         retry_check_interval    1
@@ -323,7 +323,7 @@ define service {
 define service {
         hostgroup_name          hbasemaster
         use                     generic-service
-        service_description     HBASEMASTER::HBaseMaster CPU utilization alert
+        service_description     HBASEMASTER::HBaseMaster CPU utilization
         check_command           check_cpu!200%!250%
         normal_check_interval   5
         retry_check_interval    2 
@@ -333,7 +333,7 @@ define service {
 define service {
         hostgroup_name          hbasemaster
         use                     generic-service
-        service_description     HBASEMASTER::HBaseMaster Process down alert
+        service_description     HBASEMASTER::HBaseMaster Process down
         check_command           check_tcp!60000!-w 1 -c 1
         normal_check_interval   0.5
         retry_check_interval    0.25
@@ -346,7 +346,7 @@ define service {
 define service {
         hostgroup_name          hiveserver
         use                     generic-service
-        service_description     HIVE-METASTORE::HIVE-METASTORE status check alert
+        service_description     HIVE-METASTORE::HIVE-METASTORE status check
         check_command           check_hive_metastore_status!9083
         normal_check_interval   0.5
         retry_check_interval    0.5
@@ -358,7 +358,7 @@ define service {
 define service {
         hostgroup_name          oozie-server
         use                     generic-service
-        service_description     OOZIE::Oozie status check alert
+        service_description     OOZIE::Oozie status check
         check_command           check_oozie_status!11000!<%=scope.function_hdp_template_var("java32_home") %>
         normal_check_interval   1
         retry_check_interval    1
@@ -370,7 +370,7 @@ define service {
 define service {
         hostgroup_name          templeton-server
         use                     generic-service
-        service_description     TEMPLETON::Templeton status check alert
+        service_description     TEMPLETON::Templeton status check
         check_command           check_templeton_status!50111!v1
         normal_check_interval   1
         retry_check_interval    0.5

Modified: incubator/ambari/branches/ambari-186/mon_dashboard/src/addOns/nagios/scripts/nagios_alerts.php
URL: http://svn.apache.org/viewvc/incubator/ambari/branches/ambari-186/mon_dashboard/src/addOns/nagios/scripts/nagios_alerts.php?rev=1338497&r1=1338496&r2=1338497&view=diff
==============================================================================
--- incubator/ambari/branches/ambari-186/mon_dashboard/src/addOns/nagios/scripts/nagios_alerts.php (original)
+++ incubator/ambari/branches/ambari-186/mon_dashboard/src/addOns/nagios/scripts/nagios_alerts.php Tue May 15 01:10:46 2012
@@ -91,13 +91,13 @@ function hdp_mon_generate_response( $res
   define ("warn", "1");
   define ("critical", "2");
 
-  define ("HDFS_SERVICE_CHECK", "NAMENODE::Namenode Process down alert");
-  define ("MAPREDUCE_SERVICE_CHECK", "JOBTRACKER::Jobtracker Process down alert");
-  define ("HBASE_SERVICE_CHECK", "HBASEMASTER::HBaseMaster Process down alert");
-  define ("ZOOKEEPER_SERVICE_CHECK", "ZOOKEEPER::Percent zookeeper servers down alert");
-  define ("HIVE_METASTORE_SERVICE_CHECK", "HIVE-METASTORE::HIVE-METASTORE status check alert");
-  define ("OOZIE_SERVICE_CHECK", "OOZIE::Oozie status check alert");
-  define ("TEMPLETON_SERVICE_CHECK", "TEMPLETON::Templeton status check alert");
+  define ("HDFS_SERVICE_CHECK", "NAMENODE::Namenode Process down");
+  define ("MAPREDUCE_SERVICE_CHECK", "JOBTRACKER::Jobtracker Process down");
+  define ("HBASE_SERVICE_CHECK", "HBASEMASTER::HBaseMaster Process down");
+  define ("ZOOKEEPER_SERVICE_CHECK", "ZOOKEEPER::Percent zookeeper servers down");
+  define ("HIVE_METASTORE_SERVICE_CHECK", "HIVE-METASTORE::HIVE-METASTORE status check");
+  define ("OOZIE_SERVICE_CHECK", "OOZIE::Oozie status check");
+  define ("TEMPLETON_SERVICE_CHECK", "TEMPLETON::Templeton status check");
 
   $status_file="/var/nagios/status.dat";
   $q1="";