You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ambari.apache.org by vi...@apache.org on 2012/06/07 03:13:48 UTC
svn commit: r1347251 - in /incubator/ambari/branches/ambari-186: ./
hmc/puppet/modules/hdp-nagios/templates/
mon_dashboard/src/addOns/ganglia/graph.d/
mon_dashboard/src/dataServices/ganglia/graph_info/dashboard/
mon_dashboard/src/dataServices/ganglia/g...
Author: vikram
Date: Thu Jun 7 01:13:48 2012
New Revision: 1347251
URL: http://svn.apache.org/viewvc?rev=1347251&view=rev
Log:
AMBARI-444, 445.Nagios checks send kicks to the agent that trigger a run. Configure nagios to send email notifications when slaves go down (Contributed by Suhas)
Modified:
incubator/ambari/branches/ambari-186/CHANGES.txt
incubator/ambari/branches/ambari-186/hmc/puppet/modules/hdp-nagios/templates/hadoop-commands.cfg.erb
incubator/ambari/branches/ambari-186/hmc/puppet/modules/hdp-nagios/templates/hadoop-services.cfg.erb
incubator/ambari/branches/ambari-186/mon_dashboard/src/addOns/ganglia/graph.d/hdp_mon_hdfs_io_report.json
incubator/ambari/branches/ambari-186/mon_dashboard/src/addOns/ganglia/graph.d/hdp_mon_jobtracker_mapreduce_report.json
incubator/ambari/branches/ambari-186/mon_dashboard/src/dataServices/ganglia/graph_info/dashboard/all.json
incubator/ambari/branches/ambari-186/mon_dashboard/src/dataServices/ganglia/graph_info/dashboard/custom/hdp.json
incubator/ambari/branches/ambari-186/mon_dashboard/src/dataServices/ganglia/graph_info/mapreduce/custom/hdp.json
Modified: incubator/ambari/branches/ambari-186/CHANGES.txt
URL: http://svn.apache.org/viewvc/incubator/ambari/branches/ambari-186/CHANGES.txt?rev=1347251&r1=1347250&r2=1347251&view=diff
==============================================================================
--- incubator/ambari/branches/ambari-186/CHANGES.txt (original)
+++ incubator/ambari/branches/ambari-186/CHANGES.txt Thu Jun 7 01:13:48 2012
@@ -6,6 +6,8 @@ characters wide.
Release 0.1.x - unreleased
+ AMBARI-444, 445. Nagios checks send kicks to the agent that trigger a run. Configure nagios to send email notifications when slaves go down (Suhas via Vikram)
+
AMBARI-443. Nagios start fails on reinstall. Fix for previous patch. (Ramya via Vikram)
AMBARI-443. Nagios start fails on reinstall (Ramya via Vikram)
Modified: incubator/ambari/branches/ambari-186/hmc/puppet/modules/hdp-nagios/templates/hadoop-commands.cfg.erb
URL: http://svn.apache.org/viewvc/incubator/ambari/branches/ambari-186/hmc/puppet/modules/hdp-nagios/templates/hadoop-commands.cfg.erb?rev=1347251&r1=1347250&r2=1347251&view=diff
==============================================================================
--- incubator/ambari/branches/ambari-186/hmc/puppet/modules/hdp-nagios/templates/hadoop-commands.cfg.erb (original)
+++ incubator/ambari/branches/ambari-186/hmc/puppet/modules/hdp-nagios/templates/hadoop-commands.cfg.erb Thu Jun 7 01:13:48 2012
@@ -51,11 +51,6 @@ define command{
}
define command{
- command_name check_puppet_agent_status
- command_line $USER1$/check_puppet_agent_status.php -h $HOSTADDRESS$
- }
-
-define command{
command_name check_oozie_status
command_line $USER1$/check_oozie_status.sh $HOSTADDRESS$ $ARG1$ $ARG2$
}
Modified: incubator/ambari/branches/ambari-186/hmc/puppet/modules/hdp-nagios/templates/hadoop-services.cfg.erb
URL: http://svn.apache.org/viewvc/incubator/ambari/branches/ambari-186/hmc/puppet/modules/hdp-nagios/templates/hadoop-services.cfg.erb?rev=1347251&r1=1347250&r2=1347251&view=diff
==============================================================================
--- incubator/ambari/branches/ambari-186/hmc/puppet/modules/hdp-nagios/templates/hadoop-services.cfg.erb (original)
+++ incubator/ambari/branches/ambari-186/hmc/puppet/modules/hdp-nagios/templates/hadoop-services.cfg.erb Thu Jun 7 01:13:48 2012
@@ -13,8 +13,8 @@ define service {
use hadoop-service
service_description PUPPET::Puppet agent down
servicegroups PUPPET
- check_command check_puppet_agent_status
- normal_check_interval 0.25
+ check_command check_tcp!8139!-w 1 -c 1
+ normal_check_interval 1
retry_check_interval 0.25
max_check_attempts 4
}
@@ -316,7 +316,6 @@ define service {
normal_check_interval 1
retry_check_interval 0.5
max_check_attempts 3
- notifications_enabled 0
}
define service {
@@ -328,7 +327,6 @@ define service {
normal_check_interval 5
retry_check_interval 1
max_check_attempts 2
- notifications_enabled 0
}
# MAPREDUCE::TASKTRACKER Checks
@@ -341,7 +339,6 @@ define service {
normal_check_interval 1
retry_check_interval 0.5
max_check_attempts 3
- notifications_enabled 0
}
<%end-%>
@@ -356,7 +353,6 @@ define service {
normal_check_interval 1
retry_check_interval 0.5
max_check_attempts 3
- notifications_enabled 0
}
<%end-%>
Modified: incubator/ambari/branches/ambari-186/mon_dashboard/src/addOns/ganglia/graph.d/hdp_mon_hdfs_io_report.json
URL: http://svn.apache.org/viewvc/incubator/ambari/branches/ambari-186/mon_dashboard/src/addOns/ganglia/graph.d/hdp_mon_hdfs_io_report.json?rev=1347251&r1=1347250&r2=1347251&view=diff
==============================================================================
--- incubator/ambari/branches/ambari-186/mon_dashboard/src/addOns/ganglia/graph.d/hdp_mon_hdfs_io_report.json (original)
+++ incubator/ambari/branches/ambari-186/mon_dashboard/src/addOns/ganglia/graph.d/hdp_mon_hdfs_io_report.json Thu Jun 7 01:13:48 2012
@@ -2,7 +2,7 @@
"report_name" : "hdfs_io_report",
"report_type" : "standard",
"title" : "HDFS I/O",
- "vertical_label" : "Bytes",
+ "vertical_label" : "Bytes/Sec",
"series" : [
{ "metric": "dfs.datanode.bytes_written", "color": "ff0000", "label": "Bytes Written", "line_width": "2", "type": "line" },
{ "metric": "dfs.datanode.bytes_read", "color": "0000ff", "label": "Bytes Read", "line_width": "2", "type": "line" }
Modified: incubator/ambari/branches/ambari-186/mon_dashboard/src/addOns/ganglia/graph.d/hdp_mon_jobtracker_mapreduce_report.json
URL: http://svn.apache.org/viewvc/incubator/ambari/branches/ambari-186/mon_dashboard/src/addOns/ganglia/graph.d/hdp_mon_jobtracker_mapreduce_report.json?rev=1347251&r1=1347250&r2=1347251&view=diff
==============================================================================
--- incubator/ambari/branches/ambari-186/mon_dashboard/src/addOns/ganglia/graph.d/hdp_mon_jobtracker_mapreduce_report.json (original)
+++ incubator/ambari/branches/ambari-186/mon_dashboard/src/addOns/ganglia/graph.d/hdp_mon_jobtracker_mapreduce_report.json Thu Jun 7 01:13:48 2012
@@ -1,8 +1,8 @@
{
"report_name" : "jobtracker_mapreduce_report",
"report_type" : "standard",
- "title" : "Map/Reduce Stats",
- "vertical_label" : "Number Of Maps/Reduces",
+ "title" : "Waiting Map/Reduce tasks",
+ "vertical_label" : "Number of Tasks",
"series" : [
{ "metric": "mapred.jobtracker.waiting_maps", "color": "ff0000", "label": "Waiting Maps",
"line_width": "2", "type": "stack" },
Modified: incubator/ambari/branches/ambari-186/mon_dashboard/src/dataServices/ganglia/graph_info/dashboard/all.json
URL: http://svn.apache.org/viewvc/incubator/ambari/branches/ambari-186/mon_dashboard/src/dataServices/ganglia/graph_info/dashboard/all.json?rev=1347251&r1=1347250&r2=1347251&view=diff
==============================================================================
--- incubator/ambari/branches/ambari-186/mon_dashboard/src/dataServices/ganglia/graph_info/dashboard/all.json (original)
+++ incubator/ambari/branches/ambari-186/mon_dashboard/src/dataServices/ganglia/graph_info/dashboard/all.json Thu Jun 7 01:13:48 2012
@@ -1,25 +1,25 @@
{
"Global": [
{
- "description": "Key load metrics, aggregated across the slaves in the grid",
+ "description": "Key load metrics, aggregated across the slave nodes",
"link": "http://%GangliaWebHostName%:%GangliaWebPort%/ganglia/?c=%GridSlavesClusterName%",
"title": "Load Report",
"url": "http://%GangliaWebHostName%:%GangliaWebPort%/ganglia/graph.php?c=%GridSlavesClusterName%&g=load_report"
},
{
- "description": "Key memory metrics, aggregated across the slaves in the grid",
+ "description": "Key memory metrics, aggregated across the slave nodes",
"link": "http://%GangliaWebHostName%:%GangliaWebPort%/ganglia/?c=%GridSlavesClusterName%",
"title": "Memory Report",
"url": "http://%GangliaWebHostName%:%GangliaWebPort%/ganglia/graph.php?c=%GridSlavesClusterName%&g=mem_report"
},
{
- "description": "Key CPU metrics, aggregated across the slaves in the grid",
+ "description": "Key CPU metrics, aggregated across the slave nodes",
"link": "http://%GangliaWebHostName%:%GangliaWebPort%/ganglia/?c=%GridSlavesClusterName%",
"title": "CPU Report",
"url": "http://%GangliaWebHostName%:%GangliaWebPort%/ganglia/graph.php?c=%GridSlavesClusterName%&g=cpu_report"
},
{
- "description": "Key network I/O metrics, aggregated across the slaves in the grid",
+ "description": "Key network I/O metrics, aggregated across the slave nodes",
"link": "http://%GangliaWebHostName%:%GangliaWebPort%/ganglia/?c=%GridSlavesClusterName%",
"title": "Network I/O Report",
"url": "http://%GangliaWebHostName%:%GangliaWebPort%/ganglia/graph.php?c=%GridSlavesClusterName%&g=network_report"
Modified: incubator/ambari/branches/ambari-186/mon_dashboard/src/dataServices/ganglia/graph_info/dashboard/custom/hdp.json
URL: http://svn.apache.org/viewvc/incubator/ambari/branches/ambari-186/mon_dashboard/src/dataServices/ganglia/graph_info/dashboard/custom/hdp.json?rev=1347251&r1=1347250&r2=1347251&view=diff
==============================================================================
--- incubator/ambari/branches/ambari-186/mon_dashboard/src/dataServices/ganglia/graph_info/dashboard/custom/hdp.json (original)
+++ incubator/ambari/branches/ambari-186/mon_dashboard/src/dataServices/ganglia/graph_info/dashboard/custom/hdp.json Thu Jun 7 01:13:48 2012
@@ -19,9 +19,9 @@
"url": "http://%GangliaWebHostName%:%GangliaWebPort%/ganglia/graph.php?c=%JobTrackerClusterName%&g=hdp_mon_jobtracker_reduce_slot_report"
},
{
- "description": "Waiting Maps and Reduces, to give a feel for a combined MapReduce backlog",
+ "description": "Map and Reduce tasks waiting to be schdeduled, indicating load on the Map/Reduce cluster",
"link": "http://%GangliaWebHostName%:%GangliaWebPort%/ganglia/?c=%JobTrackerClusterName%",
- "title": "MapReduce Backlog",
+ "title": "Waiting Map/Reduce tasks",
"url": "http://%GangliaWebHostName%:%GangliaWebPort%/ganglia/graph.php?c=%JobTrackerClusterName%&g=hdp_mon_jobtracker_mapreduce_report"
}
]
Modified: incubator/ambari/branches/ambari-186/mon_dashboard/src/dataServices/ganglia/graph_info/mapreduce/custom/hdp.json
URL: http://svn.apache.org/viewvc/incubator/ambari/branches/ambari-186/mon_dashboard/src/dataServices/ganglia/graph_info/mapreduce/custom/hdp.json?rev=1347251&r1=1347250&r2=1347251&view=diff
==============================================================================
--- incubator/ambari/branches/ambari-186/mon_dashboard/src/dataServices/ganglia/graph_info/mapreduce/custom/hdp.json (original)
+++ incubator/ambari/branches/ambari-186/mon_dashboard/src/dataServices/ganglia/graph_info/mapreduce/custom/hdp.json Thu Jun 7 01:13:48 2012
@@ -1,23 +1,23 @@
{
"Global": [
{
- "description": "Jobs submitted to the JobTracker",
+ "description": "Number of Map/Reduce jobs submitted to the JobTracker",
"link": "http://%GangliaWebHostName%:%GangliaWebPort%/ganglia/?c=%JobTrackerClusterName%",
- "title": "JobTracker Jobs: Submitted",
+ "title": "Map/Reduce Jobs: Submitted",
"url":
"http://%GangliaWebHostName%:%GangliaWebPort%/ganglia/graph.php?c=%JobTrackerClusterName%&g=hdp_mon_jobtracker_jobs_submitted_report"
},
{
- "description": "Jobs running on the JobTracker",
+ "description": "Number of jobs currently running on the cluster",
"link": "http://%GangliaWebHostName%:%GangliaWebPort%/ganglia/?c=%JobTrackerClusterName%",
- "title": "JobTracker Jobs: Running",
+ "title": "Map/Reduce Jobs: Running",
"url":
"http://%GangliaWebHostName%:%GangliaWebPort%/ganglia/graph.php?c=%JobTrackerClusterName%&g=hdp_mon_jobtracker_jobs_running_report"
},
{
- "description": "Jobs completed on the JobTracker, to give a feel for cluster-wide job throughput",
+ "description": "Number of Map/Reduce jobs completed, to give a feel for cluster-wide job throughput",
"link": "http://%GangliaWebHostName%:%GangliaWebPort%/ganglia/?c=%JobTrackerClusterName%",
- "title": "JobTracker Jobs: Completed",
+ "title": "Map/Reduce Jobs: Completed",
"url":
"http://%GangliaWebHostName%:%GangliaWebPort%/ganglia/graph.php?c=%JobTrackerClusterName%&g=hdp_mon_jobtracker_jobs_completed_report"
},