You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ambari.apache.org by vi...@apache.org on 2012/06/07 03:13:48 UTC

svn commit: r1347251 - in /incubator/ambari/branches/ambari-186: ./ hmc/puppet/modules/hdp-nagios/templates/ mon_dashboard/src/addOns/ganglia/graph.d/ mon_dashboard/src/dataServices/ganglia/graph_info/dashboard/ mon_dashboard/src/dataServices/ganglia/g...

Author: vikram
Date: Thu Jun  7 01:13:48 2012
New Revision: 1347251

URL: http://svn.apache.org/viewvc?rev=1347251&view=rev
Log:
AMBARI-444, 445.Nagios checks send kicks to the agent that trigger a run.  Configure nagios to send email notifications when slaves go down (Contributed by Suhas)

Modified:
    incubator/ambari/branches/ambari-186/CHANGES.txt
    incubator/ambari/branches/ambari-186/hmc/puppet/modules/hdp-nagios/templates/hadoop-commands.cfg.erb
    incubator/ambari/branches/ambari-186/hmc/puppet/modules/hdp-nagios/templates/hadoop-services.cfg.erb
    incubator/ambari/branches/ambari-186/mon_dashboard/src/addOns/ganglia/graph.d/hdp_mon_hdfs_io_report.json
    incubator/ambari/branches/ambari-186/mon_dashboard/src/addOns/ganglia/graph.d/hdp_mon_jobtracker_mapreduce_report.json
    incubator/ambari/branches/ambari-186/mon_dashboard/src/dataServices/ganglia/graph_info/dashboard/all.json
    incubator/ambari/branches/ambari-186/mon_dashboard/src/dataServices/ganglia/graph_info/dashboard/custom/hdp.json
    incubator/ambari/branches/ambari-186/mon_dashboard/src/dataServices/ganglia/graph_info/mapreduce/custom/hdp.json

Modified: incubator/ambari/branches/ambari-186/CHANGES.txt
URL: http://svn.apache.org/viewvc/incubator/ambari/branches/ambari-186/CHANGES.txt?rev=1347251&r1=1347250&r2=1347251&view=diff
==============================================================================
--- incubator/ambari/branches/ambari-186/CHANGES.txt (original)
+++ incubator/ambari/branches/ambari-186/CHANGES.txt Thu Jun  7 01:13:48 2012
@@ -6,6 +6,8 @@ characters wide.
 
 Release 0.1.x - unreleased
 
+  AMBARI-444, 445. Nagios checks send kicks to the agent that trigger a run. Configure nagios to send email notifications when slaves go down (Suhas via Vikram)
+
   AMBARI-443. Nagios start fails on reinstall. Fix for previous patch. (Ramya via Vikram)
 
   AMBARI-443. Nagios start fails on reinstall (Ramya via Vikram)

Modified: incubator/ambari/branches/ambari-186/hmc/puppet/modules/hdp-nagios/templates/hadoop-commands.cfg.erb
URL: http://svn.apache.org/viewvc/incubator/ambari/branches/ambari-186/hmc/puppet/modules/hdp-nagios/templates/hadoop-commands.cfg.erb?rev=1347251&r1=1347250&r2=1347251&view=diff
==============================================================================
--- incubator/ambari/branches/ambari-186/hmc/puppet/modules/hdp-nagios/templates/hadoop-commands.cfg.erb (original)
+++ incubator/ambari/branches/ambari-186/hmc/puppet/modules/hdp-nagios/templates/hadoop-commands.cfg.erb Thu Jun  7 01:13:48 2012
@@ -51,11 +51,6 @@ define command{
        }
 
 define command{
-        command_name    check_puppet_agent_status
-        command_line    $USER1$/check_puppet_agent_status.php -h $HOSTADDRESS$
-       }
-
-define command{
         command_name    check_oozie_status
         command_line    $USER1$/check_oozie_status.sh $HOSTADDRESS$ $ARG1$ $ARG2$
        }

Modified: incubator/ambari/branches/ambari-186/hmc/puppet/modules/hdp-nagios/templates/hadoop-services.cfg.erb
URL: http://svn.apache.org/viewvc/incubator/ambari/branches/ambari-186/hmc/puppet/modules/hdp-nagios/templates/hadoop-services.cfg.erb?rev=1347251&r1=1347250&r2=1347251&view=diff
==============================================================================
--- incubator/ambari/branches/ambari-186/hmc/puppet/modules/hdp-nagios/templates/hadoop-services.cfg.erb (original)
+++ incubator/ambari/branches/ambari-186/hmc/puppet/modules/hdp-nagios/templates/hadoop-services.cfg.erb Thu Jun  7 01:13:48 2012
@@ -13,8 +13,8 @@ define service {        
         use                     hadoop-service
         service_description     PUPPET::Puppet agent down
         servicegroups           PUPPET
-        check_command           check_puppet_agent_status
-        normal_check_interval   0.25
+        check_command           check_tcp!8139!-w 1 -c 1
+        normal_check_interval   1
         retry_check_interval    0.25
         max_check_attempts      4
 }
@@ -316,7 +316,6 @@ define service {
         normal_check_interval   1
         retry_check_interval    0.5
         max_check_attempts      3
-        notifications_enabled   0
 }
 
 define service {
@@ -328,7 +327,6 @@ define service {
         normal_check_interval   5
         retry_check_interval    1
         max_check_attempts      2
-        notifications_enabled   0
 }
 
 # MAPREDUCE::TASKTRACKER Checks 
@@ -341,7 +339,6 @@ define service {
         normal_check_interval   1
         retry_check_interval    0.5
         max_check_attempts      3
-        notifications_enabled   0
 }
 <%end-%>
 
@@ -356,7 +353,6 @@ define service {
         normal_check_interval   1
         retry_check_interval    0.5
         max_check_attempts      3
-        notifications_enabled   0
 }
 <%end-%>
 

Modified: incubator/ambari/branches/ambari-186/mon_dashboard/src/addOns/ganglia/graph.d/hdp_mon_hdfs_io_report.json
URL: http://svn.apache.org/viewvc/incubator/ambari/branches/ambari-186/mon_dashboard/src/addOns/ganglia/graph.d/hdp_mon_hdfs_io_report.json?rev=1347251&r1=1347250&r2=1347251&view=diff
==============================================================================
--- incubator/ambari/branches/ambari-186/mon_dashboard/src/addOns/ganglia/graph.d/hdp_mon_hdfs_io_report.json (original)
+++ incubator/ambari/branches/ambari-186/mon_dashboard/src/addOns/ganglia/graph.d/hdp_mon_hdfs_io_report.json Thu Jun  7 01:13:48 2012
@@ -2,7 +2,7 @@
    "report_name" : "hdfs_io_report",
    "report_type" : "standard",
    "title" : "HDFS I/O",
-   "vertical_label" : "Bytes",
+   "vertical_label" : "Bytes/Sec",
    "series" : [
       { "metric": "dfs.datanode.bytes_written", "color": "ff0000", "label": "Bytes Written", "line_width": "2", "type": "line" },
       { "metric": "dfs.datanode.bytes_read", "color": "0000ff", "label": "Bytes Read", "line_width": "2", "type": "line" }

Modified: incubator/ambari/branches/ambari-186/mon_dashboard/src/addOns/ganglia/graph.d/hdp_mon_jobtracker_mapreduce_report.json
URL: http://svn.apache.org/viewvc/incubator/ambari/branches/ambari-186/mon_dashboard/src/addOns/ganglia/graph.d/hdp_mon_jobtracker_mapreduce_report.json?rev=1347251&r1=1347250&r2=1347251&view=diff
==============================================================================
--- incubator/ambari/branches/ambari-186/mon_dashboard/src/addOns/ganglia/graph.d/hdp_mon_jobtracker_mapreduce_report.json (original)
+++ incubator/ambari/branches/ambari-186/mon_dashboard/src/addOns/ganglia/graph.d/hdp_mon_jobtracker_mapreduce_report.json Thu Jun  7 01:13:48 2012
@@ -1,8 +1,8 @@
 {
    "report_name" : "jobtracker_mapreduce_report",
    "report_type" : "standard",
-   "title" : "Map/Reduce Stats",
-   "vertical_label" : "Number Of Maps/Reduces",
+   "title" : "Waiting Map/Reduce tasks",
+   "vertical_label" : "Number of Tasks",
    "series" : [
       { "metric": "mapred.jobtracker.waiting_maps", "color": "ff0000", "label": "Waiting Maps", 
         "line_width": "2", "type": "stack" },

Modified: incubator/ambari/branches/ambari-186/mon_dashboard/src/dataServices/ganglia/graph_info/dashboard/all.json
URL: http://svn.apache.org/viewvc/incubator/ambari/branches/ambari-186/mon_dashboard/src/dataServices/ganglia/graph_info/dashboard/all.json?rev=1347251&r1=1347250&r2=1347251&view=diff
==============================================================================
--- incubator/ambari/branches/ambari-186/mon_dashboard/src/dataServices/ganglia/graph_info/dashboard/all.json (original)
+++ incubator/ambari/branches/ambari-186/mon_dashboard/src/dataServices/ganglia/graph_info/dashboard/all.json Thu Jun  7 01:13:48 2012
@@ -1,25 +1,25 @@
 {
     "Global": [
         {
-            "description": "Key load metrics, aggregated across the slaves in the grid", 
+            "description": "Key load metrics, aggregated across the slave nodes", 
             "link": "http://%GangliaWebHostName%:%GangliaWebPort%/ganglia/?c=%GridSlavesClusterName%", 
             "title": "Load Report", 
             "url": "http://%GangliaWebHostName%:%GangliaWebPort%/ganglia/graph.php?c=%GridSlavesClusterName%&g=load_report"
         }, 
         {
-            "description": "Key memory metrics, aggregated across the slaves in the grid", 
+            "description": "Key memory metrics, aggregated across the slave nodes", 
             "link": "http://%GangliaWebHostName%:%GangliaWebPort%/ganglia/?c=%GridSlavesClusterName%", 
             "title": "Memory Report", 
             "url": "http://%GangliaWebHostName%:%GangliaWebPort%/ganglia/graph.php?c=%GridSlavesClusterName%&g=mem_report"
         }, 
         {
-            "description": "Key CPU metrics, aggregated across the slaves in the grid", 
+            "description": "Key CPU metrics, aggregated across the slave nodes", 
             "link": "http://%GangliaWebHostName%:%GangliaWebPort%/ganglia/?c=%GridSlavesClusterName%", 
             "title": "CPU Report", 
             "url": "http://%GangliaWebHostName%:%GangliaWebPort%/ganglia/graph.php?c=%GridSlavesClusterName%&g=cpu_report"
         }, 
         {
-            "description": "Key network I/O metrics, aggregated across the slaves in the grid", 
+            "description": "Key network I/O metrics, aggregated across the slave nodes", 
             "link": "http://%GangliaWebHostName%:%GangliaWebPort%/ganglia/?c=%GridSlavesClusterName%", 
             "title": "Network I/O Report", 
             "url": "http://%GangliaWebHostName%:%GangliaWebPort%/ganglia/graph.php?c=%GridSlavesClusterName%&g=network_report"

Modified: incubator/ambari/branches/ambari-186/mon_dashboard/src/dataServices/ganglia/graph_info/dashboard/custom/hdp.json
URL: http://svn.apache.org/viewvc/incubator/ambari/branches/ambari-186/mon_dashboard/src/dataServices/ganglia/graph_info/dashboard/custom/hdp.json?rev=1347251&r1=1347250&r2=1347251&view=diff
==============================================================================
--- incubator/ambari/branches/ambari-186/mon_dashboard/src/dataServices/ganglia/graph_info/dashboard/custom/hdp.json (original)
+++ incubator/ambari/branches/ambari-186/mon_dashboard/src/dataServices/ganglia/graph_info/dashboard/custom/hdp.json Thu Jun  7 01:13:48 2012
@@ -19,9 +19,9 @@
             "url": "http://%GangliaWebHostName%:%GangliaWebPort%/ganglia/graph.php?c=%JobTrackerClusterName%&g=hdp_mon_jobtracker_reduce_slot_report"
         }, 
         {
-            "description": "Waiting Maps and Reduces, to give a feel for a combined MapReduce backlog", 
+            "description": "Map and Reduce tasks waiting to be schdeduled, indicating load on the Map/Reduce cluster", 
             "link": "http://%GangliaWebHostName%:%GangliaWebPort%/ganglia/?c=%JobTrackerClusterName%", 
-            "title": "MapReduce Backlog", 
+            "title": "Waiting Map/Reduce tasks", 
             "url": "http://%GangliaWebHostName%:%GangliaWebPort%/ganglia/graph.php?c=%JobTrackerClusterName%&g=hdp_mon_jobtracker_mapreduce_report"
         }
     ]

Modified: incubator/ambari/branches/ambari-186/mon_dashboard/src/dataServices/ganglia/graph_info/mapreduce/custom/hdp.json
URL: http://svn.apache.org/viewvc/incubator/ambari/branches/ambari-186/mon_dashboard/src/dataServices/ganglia/graph_info/mapreduce/custom/hdp.json?rev=1347251&r1=1347250&r2=1347251&view=diff
==============================================================================
--- incubator/ambari/branches/ambari-186/mon_dashboard/src/dataServices/ganglia/graph_info/mapreduce/custom/hdp.json (original)
+++ incubator/ambari/branches/ambari-186/mon_dashboard/src/dataServices/ganglia/graph_info/mapreduce/custom/hdp.json Thu Jun  7 01:13:48 2012
@@ -1,23 +1,23 @@
 {
     "Global": [
         {
-            "description": "Jobs submitted to the JobTracker", 
+            "description": "Number of Map/Reduce jobs submitted to the JobTracker", 
             "link": "http://%GangliaWebHostName%:%GangliaWebPort%/ganglia/?c=%JobTrackerClusterName%", 
-            "title": "JobTracker Jobs: Submitted", 
+            "title": "Map/Reduce Jobs: Submitted", 
             "url":
               "http://%GangliaWebHostName%:%GangliaWebPort%/ganglia/graph.php?c=%JobTrackerClusterName%&g=hdp_mon_jobtracker_jobs_submitted_report"
         }, 
         {
-            "description": "Jobs running on the JobTracker", 
+            "description": "Number of jobs currently running on the cluster", 
             "link": "http://%GangliaWebHostName%:%GangliaWebPort%/ganglia/?c=%JobTrackerClusterName%", 
-            "title": "JobTracker Jobs: Running", 
+            "title": "Map/Reduce Jobs: Running", 
             "url":
               "http://%GangliaWebHostName%:%GangliaWebPort%/ganglia/graph.php?c=%JobTrackerClusterName%&g=hdp_mon_jobtracker_jobs_running_report"
         }, 
         {
-            "description": "Jobs completed on the JobTracker, to give a feel for cluster-wide job throughput", 
+            "description": "Number of Map/Reduce jobs completed, to give a feel for cluster-wide job throughput", 
             "link": "http://%GangliaWebHostName%:%GangliaWebPort%/ganglia/?c=%JobTrackerClusterName%", 
-            "title": "JobTracker Jobs: Completed", 
+            "title": "Map/Reduce Jobs: Completed", 
             "url":
               "http://%GangliaWebHostName%:%GangliaWebPort%/ganglia/graph.php?c=%JobTrackerClusterName%&g=hdp_mon_jobtracker_jobs_completed_report"
         },