You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ambari.apache.org by ra...@apache.org on 2012/05/17 22:11:03 UTC

svn commit: r1339838 - in /incubator/ambari/branches/ambari-186: CHANGES.txt hmc/puppet/modules/hdp-nagios/templates/hadoop-hosts.cfg.erb hmc/puppet/modules/hdp-nagios/templates/hadoop-services.cfg.erb

Author: ramya
Date: Thu May 17 20:11:02 2012
New Revision: 1339838

URL: http://svn.apache.org/viewvc?rev=1339838&view=rev
Log:
AMBARI-270. Specifiy the notification intervals and options for Alerts. Contributed by Suhas

Modified:
    incubator/ambari/branches/ambari-186/CHANGES.txt
    incubator/ambari/branches/ambari-186/hmc/puppet/modules/hdp-nagios/templates/hadoop-hosts.cfg.erb
    incubator/ambari/branches/ambari-186/hmc/puppet/modules/hdp-nagios/templates/hadoop-services.cfg.erb

Modified: incubator/ambari/branches/ambari-186/CHANGES.txt
URL: http://svn.apache.org/viewvc/incubator/ambari/branches/ambari-186/CHANGES.txt?rev=1339838&r1=1339837&r2=1339838&view=diff
==============================================================================
--- incubator/ambari/branches/ambari-186/CHANGES.txt (original)
+++ incubator/ambari/branches/ambari-186/CHANGES.txt Thu May 17 20:11:02 2012
@@ -2,6 +2,9 @@ Ambari Change log
 
 Release 0.x.x - unreleased
 
+  AMBARI-270. Specifiy the notification intervals and options for Alerts 
+  (vgogate via ramya)
+
   AMBARI-300. Change the status message (success/error) location so that it 
   shows below the page summary box, rather than above, more better visibility 
   (Yusaku Sako via ramya)

Modified: incubator/ambari/branches/ambari-186/hmc/puppet/modules/hdp-nagios/templates/hadoop-hosts.cfg.erb
URL: http://svn.apache.org/viewvc/incubator/ambari/branches/ambari-186/hmc/puppet/modules/hdp-nagios/templates/hadoop-hosts.cfg.erb?rev=1339838&r1=1339837&r2=1339838&view=diff
==============================================================================
--- incubator/ambari/branches/ambari-186/hmc/puppet/modules/hdp-nagios/templates/hadoop-hosts.cfg.erb (original)
+++ incubator/ambari/branches/ambari-186/hmc/puppet/modules/hdp-nagios/templates/hadoop-hosts.cfg.erb Thu May 17 20:11:02 2012
@@ -7,6 +7,10 @@ define host {
         check_interval         0.25
         retry_interval         0.25
         max_check_attempts     4
+        notifications_enabled     1
+        first_notification_delay  0     # Send notification soon after change in the hard state
+        notification_interval     0     # Send the notification once
+        notification_options      d,u,r
 }
 
 <%end%>

Modified: incubator/ambari/branches/ambari-186/hmc/puppet/modules/hdp-nagios/templates/hadoop-services.cfg.erb
URL: http://svn.apache.org/viewvc/incubator/ambari/branches/ambari-186/hmc/puppet/modules/hdp-nagios/templates/hadoop-services.cfg.erb?rev=1339838&r1=1339837&r2=1339838&view=diff
==============================================================================
--- incubator/ambari/branches/ambari-186/hmc/puppet/modules/hdp-nagios/templates/hadoop-services.cfg.erb (original)
+++ incubator/ambari/branches/ambari-186/hmc/puppet/modules/hdp-nagios/templates/hadoop-services.cfg.erb Thu May 17 20:11:02 2012
@@ -1,8 +1,16 @@
 # NAGIOS SERVER Check (status log update)
 <%if scope.function_hdp_nagios_members_exist('nagios-server')-%>
+define service {
+        name                            hadoop-service
+        use                             generic-service
+        notification_options            w,u,c
+        first_notification_delay        0
+        notification_interval           0     # Send the notification once
+}
+
 define service {        
         hostgroup_name          nagios-server        
-        use                     generic-service
+        use                     hadoop-service
         service_description     NAGIOS::Nagios status log staleness
         check_command           check_nagios!10!/var/nagios/status.dat!/usr/bin/nagios
         normal_check_interval   5
@@ -13,7 +21,7 @@ define service {        
 # NAGIOS SERVER HDFS Checks
 define service {
         hostgroup_name          nagios-server
-        use                     generic-service
+        use                     hadoop-service
         service_description     HDFS::Percent DataNodes storage full
         check_command           check_aggregate!"DATANODE::Storage full"!10%!30%
         normal_check_interval   2
@@ -23,7 +31,7 @@ define service {
 
 define service {
         hostgroup_name          nagios-server
-        use                     generic-service
+        use                     hadoop-service
         service_description     HDFS::Percent DataNodes down
         check_command           check_aggregate!"DATANODE::Process down"!10%!30%
         normal_check_interval   0.5
@@ -34,7 +42,7 @@ define service {
 # NAGIOS SERVER MAPREDUCE Checks
 define service {
         hostgroup_name          nagios-server
-        use                     generic-service
+        use                     hadoop-service
         service_description     MAPREDUCE::Percent TaskTrackers down
         check_command           check_aggregate!"TASKTRACKER::Process down"!10%!30%
         normal_check_interval   0.5
@@ -46,7 +54,7 @@ define service {
 <%if scope.function_hdp_nagios_members_exist('zookeeper-servers')-%>
 define service {
         hostgroup_name          nagios-server
-        use                     generic-service
+        use                     hadoop-service
         service_description     ZOOKEEPER::Percent zookeeper servers down
         check_command           check_aggregate!"ZKSERVERS::ZKSERVERS Process down"!35%!70%
         normal_check_interval   0.5
@@ -59,7 +67,7 @@ define service {
 <%if scope.function_hdp_nagios_members_exist('hbasemaster')-%>
 define service {
         hostgroup_name          nagios-server
-        use                     generic-service
+        use                     hadoop-service
         service_description     HBASE::Percent region servers down
         check_command           check_aggregate!"REGIONSERVER::Process down"!10%!30%
         normal_check_interval   0.5
@@ -73,7 +81,7 @@ define service {
 <%if scope.function_hdp_nagios_members_exist('ganglia-server')-%>
 define service {
         hostgroup_name          ganglia-server
-        use                     generic-service
+        use                     hadoop-service
         service_description     GANGLIA::Ganglia [gmetad] Process down
         check_command           check_tcp!8651!-w 1 -c 1
         normal_check_interval   0.25
@@ -83,7 +91,7 @@ define service {
 
 define service {
         hostgroup_name          ganglia-server
-        use                     generic-service
+        use                     hadoop-service
         service_description     GANGLIA::Ganglia collector [gmond] Process down alert for slaves
         check_command           check_tcp!8660!-w 1 -c 1
         normal_check_interval   0.25
@@ -93,7 +101,7 @@ define service {
 
 define service {
         hostgroup_name          ganglia-server
-        use                     generic-service
+        use                     hadoop-service
         service_description     GANGLIA::Ganglia collector [gmond] Process down alert for namenode
         check_command           check_tcp!8661!-w 1 -c 1
         normal_check_interval   0.25
@@ -103,7 +111,7 @@ define service {
 
 define service {
         hostgroup_name          ganglia-server
-        use                     generic-service
+        use                     hadoop-service
         service_description     GANGLIA::Ganglia collector [gmond] Process down alert for jobtracker
         check_command           check_tcp!8662!-w 1 -c 1
         normal_check_interval   0.25
@@ -113,7 +121,7 @@ define service {
 
 define service {
         hostgroup_name          ganglia-server
-        use                     generic-service
+        use                     hadoop-service
         service_description     GANGLIA::Ganglia collector [gmond] Process down alert for hbasemaster
         check_command           check_tcp!8663!-w 1 -c 1
         normal_check_interval   0.25
@@ -126,7 +134,7 @@ define service {
 # Secondary namenode checks
 define service {
         hostgroup_name          snamenode
-        use                     generic-service
+        use                     hadoop-service
         service_description     NAMENODE::Secondary Namenode Process down
         check_command           check_tcp!50090!-w 1 -c 1
         normal_check_interval   0.5
@@ -138,7 +146,7 @@ define service {
 # HDFS Checks
 define service {
         hostgroup_name          namenode
-        use                     generic-service
+        use                     hadoop-service
         service_description     NAMENODE::Namenode Web UI down
         check_command           check_webui!namenode
         normal_check_interval   1
@@ -148,7 +156,7 @@ define service {
 
 define service {
         hostgroup_name          namenode
-        use                     generic-service
+        use                     hadoop-service
         service_description     NAMENODE::Namenode Edit logs directory status
         check_command           check_name_dir_status!50070
         normal_check_interval   0.5
@@ -158,7 +166,7 @@ define service {
 
 define service {        
         hostgroup_name          namenode        
-        use                     generic-service
+        use                     hadoop-service
         service_description     NAMENODE::Namenode Host CPU utilization
         check_command           check_cpu!200%!250%
         normal_check_interval   5
@@ -168,7 +176,7 @@ define service {        
 
 define service {
         hostgroup_name          namenode
-        use                     generic-service
+        use                     hadoop-service
         service_description     NAMENODE::Namenode Process down
         check_command           check_tcp!8020!-w 1 -c 1
         normal_check_interval   0.5
@@ -178,7 +186,7 @@ define service {
 
 define service {
         hostgroup_name          namenode
-        use                     generic-service
+        use                     hadoop-service
         service_description     HDFS::Corrupt/Missing blocks
         check_command           check_hdfs_blocks!50070!0%!0%
         normal_check_interval   2
@@ -188,7 +196,7 @@ define service {
 
 define service {
         hostgroup_name          namenode
-        use                     generic-service
+        use                     hadoop-service
         service_description     HDFS::HDFS Capacity utilization
         check_command           check_hdfs_capacity!50070!80%!90%
         normal_check_interval   10
@@ -198,7 +206,7 @@ define service {
 
 define service {
         hostgroup_name          namenode
-        use                     generic-service
+        use                     hadoop-service
         service_description     HDFS::Namenode RPC Latency
         check_command           check_rpcq_latency!NameNode!50070!3000!5000
         normal_check_interval   5
@@ -211,7 +219,7 @@ define service {
 <%if scope.function_hdp_nagios_members_exist('jobtracker')-%>
 define service {
         hostgroup_name          jobtracker
-        use                     generic-service
+        use                     hadoop-service
         service_description     JOBTRACKER::JobTracker Web UI down
         check_command           check_webui!jobtracker
         normal_check_interval   1
@@ -221,7 +229,7 @@ define service {
 
 define service {
         hostgroup_name          jobtracker
-        use                     generic-service
+        use                     hadoop-service
         service_description     JOBTRACKER::JobHistory Web UI down
         check_command           check_webui!jobhistory
         normal_check_interval   1
@@ -231,7 +239,7 @@ define service {
 
 define service {
         hostgroup_name          jobtracker
-        use                     generic-service
+        use                     hadoop-service
         service_description     JOBTRACKER::Jobtracker CPU utilization
         check_command           check_cpu!200%!250%
         normal_check_interval   5
@@ -241,7 +249,7 @@ define service {
 
 define service {
         hostgroup_name          jobtracker
-        use                     generic-service
+        use                     hadoop-service
         service_description     JOBTRACKER::Jobtracker Process down
         check_command           check_tcp!50030!-w 1 -c 1
         normal_check_interval   0.5
@@ -251,7 +259,7 @@ define service {
 
 define service {
         hostgroup_name          jobtracker
-        use                     generic-service
+        use                     hadoop-service
         service_description     MAPREDUCE::JobTracker RPC Latency
         check_command           check_rpcq_latency!JobTracker!50030!3000!5000
         normal_check_interval   5
@@ -264,33 +272,36 @@ define service {
 <%if scope.function_hdp_nagios_members_exist('slaves')-%>
 define service {
         hostgroup_name          slaves
-        use                     generic-service
+        use                     hadoop-service
         service_description     DATANODE::Process down
         check_command           check_tcp!50010!-w 1 -c 1
         normal_check_interval   1
         retry_check_interval    0.5
         max_check_attempts      3
+        notifications_enabled   0
 }
 
 define service {
         hostgroup_name          slaves
-        use                     generic-service
+        use                     hadoop-service
         service_description     DATANODE::Storage full
         check_command           check_datanode_storage!50075!90%!90%
         normal_check_interval   5
         retry_check_interval    1
         max_check_attempts      2
+        notifications_enabled   0
 }
 
 # MAPREDUCE::TASKTRACKER Checks 
 define service {
         hostgroup_name          slaves
-        use                     generic-service
+        use                     hadoop-service
         service_description     TASKTRACKER::Process down
         check_command           check_tcp!50060!-w 1 -c 1
         normal_check_interval   1
         retry_check_interval    0.5
         max_check_attempts      3
+        notifications_enabled   0
 }
 <%end-%>
 
@@ -298,12 +309,13 @@ define service {
 # HBASE::REGIONSERVER Checks
 define service {
         hostgroup_name          region-servers
-        use                     generic-service
+        use                     hadoop-service
         service_description     REGIONSERVER::Process down
         check_command           check_tcp!60020!-w 1 -c 1
         normal_check_interval   1
         retry_check_interval    0.5
         max_check_attempts      3
+        notifications_enabled   0
 }
 <%end-%>
 
@@ -311,7 +323,7 @@ define service {
 # ZOOKEEPER Checks
 define service {
         hostgroup_name          zookeeper-servers
-        use                     generic-service
+        use                     hadoop-service
         service_description     ZKSERVERS::ZKSERVERS Process down
         check_command           check_tcp!2181!-w 1 -c 1
         normal_check_interval   1
@@ -324,7 +336,7 @@ define service {
 # HBASE Checks
 define service {
         hostgroup_name          hbasemaster
-        use                     generic-service
+        use                     hadoop-service
         service_description     HBASEMASTER::HBase Web UI down
         check_command           check_webui!hbase
         normal_check_interval   1
@@ -334,7 +346,7 @@ define service {
 
 define service {
         hostgroup_name          hbasemaster
-        use                     generic-service
+        use                     hadoop-service
         service_description     HBASEMASTER::HBaseMaster CPU utilization
         check_command           check_cpu!200%!250%
         normal_check_interval   5
@@ -344,7 +356,7 @@ define service {
 
 define service {
         hostgroup_name          hbasemaster
-        use                     generic-service
+        use                     hadoop-service
         service_description     HBASEMASTER::HBaseMaster Process down
         check_command           check_tcp!60000!-w 1 -c 1
         normal_check_interval   0.5
@@ -357,7 +369,7 @@ define service {
 # HIVE Metastore check
 define service {
         hostgroup_name          hiveserver
-        use                     generic-service
+        use                     hadoop-service
         service_description     HIVE-METASTORE::HIVE-METASTORE status check
         check_command           check_hive_metastore_status!9083
         normal_check_interval   0.5
@@ -369,7 +381,7 @@ define service {
 # Oozie check
 define service {
         hostgroup_name          oozie-server
-        use                     generic-service
+        use                     hadoop-service
         service_description     OOZIE::Oozie status check
         check_command           check_oozie_status!11000!<%=scope.function_hdp_template_var("java32_home") %>
         normal_check_interval   1
@@ -381,7 +393,7 @@ define service {
 # Templeton check
 define service {
         hostgroup_name          templeton-server
-        use                     generic-service
+        use                     hadoop-service
         service_description     TEMPLETON::Templeton status check
         check_command           check_templeton_status!50111!v1
         normal_check_interval   1