You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ambari.apache.org by ma...@apache.org on 2013/11/23 00:02:41 UTC
[1/2] git commit: AMBARI-3681. Provide TVI rules for service start
for SLA quantification. (Artem Baranchuk via mahadev)
Updated Branches:
refs/heads/trunk 87bf327a7 -> f071cc6c0
AMBARI-3681. Provide TVI rules for service start for SLA quantification. (Artem Baranchuk via mahadev)
Project: http://git-wip-us.apache.org/repos/asf/incubator-ambari/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-ambari/commit/37565def
Tree: http://git-wip-us.apache.org/repos/asf/incubator-ambari/tree/37565def
Diff: http://git-wip-us.apache.org/repos/asf/incubator-ambari/diff/37565def
Branch: refs/heads/trunk
Commit: 37565defb6a417607d7b911a462928e6b771893b
Parents: 87bf327
Author: Mahadev Konar <ma...@apache.org>
Authored: Fri Nov 22 14:55:43 2013 -0800
Committer: Mahadev Konar <ma...@apache.org>
Committed: Fri Nov 22 14:55:43 2013 -0800
----------------------------------------------------------------------
.../hdp-nagios/templates/contacts.cfg.erb | 25 ++-
.../templates/hadoop-commands.cfg.erb | 10 ++
.../templates/hadoop-services.cfg.erb | 8 +-
.../src/addOns/nagios/plugins/sys_logger.py | 95 ++++++++---
.../test/nagios/plugins/test_sys_logger.py | 167 +++++++++++++++++++
5 files changed, 271 insertions(+), 34 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-ambari/blob/37565def/ambari-agent/src/main/puppet/modules/hdp-nagios/templates/contacts.cfg.erb
----------------------------------------------------------------------
diff --git a/ambari-agent/src/main/puppet/modules/hdp-nagios/templates/contacts.cfg.erb b/ambari-agent/src/main/puppet/modules/hdp-nagios/templates/contacts.cfg.erb
index e967457..ee6f09e 100644
--- a/ambari-agent/src/main/puppet/modules/hdp-nagios/templates/contacts.cfg.erb
+++ b/ambari-agent/src/main/puppet/modules/hdp-nagios/templates/contacts.cfg.erb
@@ -49,14 +49,29 @@
# template which is defined elsewhere.
define contact{
- contact_name <%=scope.function_hdp_template_var("nagios_web_login")%> ; Short name of user
- use generic-contact ; Inherit default values from generic-contact template (defined above)
- alias Nagios Admin ; Full name of user
+ contact_name <%=scope.function_hdp_template_var("nagios_web_login")%> ; Short name of user
+ use generic-contact ; Inherit default values from generic-contact template (defined above)
+ alias Nagios Admin ; Full name of user
email <%=scope.function_hdp_template_var("nagios_contact")%> ; <<***** CHANGE THIS TO YOUR EMAIL ADDRESS ******
}
-
+# Contact which writes all Nagios alerts to the system logger.
+define contact{
+ contact_name sys_logger ; Short name of user
+ use generic-contact ; Inherit default values from generic-contact template (defined above)
+ alias System Logger ; Full name of user
+ host_notifications_enabled 1
+ service_notifications_enabled 1
+ service_notification_period 24x7
+ host_notification_period 24x7
+ service_notification_options w,u,c,r,s
+ host_notification_options d,u,r,s
+ can_submit_commands 1
+ retain_status_information 1
+ service_notification_commands service_sys_logger
+ host_notification_commands host_sys_logger
+ }
###############################################################################
###############################################################################
@@ -72,5 +87,5 @@ define contact{
define contactgroup {
contactgroup_name admins
alias Nagios Administrators
- members <%=scope.function_hdp_template_var("nagios_web_login")%>
+ members <%=scope.function_hdp_template_var("nagios_web_login")%>,sys_logger
}
http://git-wip-us.apache.org/repos/asf/incubator-ambari/blob/37565def/ambari-agent/src/main/puppet/modules/hdp-nagios/templates/hadoop-commands.cfg.erb
----------------------------------------------------------------------
diff --git a/ambari-agent/src/main/puppet/modules/hdp-nagios/templates/hadoop-commands.cfg.erb b/ambari-agent/src/main/puppet/modules/hdp-nagios/templates/hadoop-commands.cfg.erb
index ca1585b..e49199b 100644
--- a/ambari-agent/src/main/puppet/modules/hdp-nagios/templates/hadoop-commands.cfg.erb
+++ b/ambari-agent/src/main/puppet/modules/hdp-nagios/templates/hadoop-commands.cfg.erb
@@ -102,3 +102,13 @@ define command{
command_name check_nodemanager_health
command_line $USER1$/check_nodemanager_health.sh $HOSTADDRESS$ $ARG1$
}
+
+define command{
+ command_name host_sys_logger
+ command_line $USER1$/sys_logger.py $HOSTSTATETYPE$ $HOSTATTEMPT$ $HOSTSTATE$ "Host::Ping" "Event Host=$HOSTADDRESS$($HOSTSTATE$), $HOSTOUTPUT$ $LONGHOSTOUTPUT$"
+ }
+
+define command{
+ command_name service_sys_logger
+ command_line $USER1$/sys_logger.py $SERVICESTATETYPE$ $SERVICEATTEMPT$ $SERVICESTATE$ "$SERVICEDESC$" "Event Host=$HOSTADDRESS$ Service Description=$SERVICEDESC$($SERVICESTATE$), $SERVICEOUTPUT$ $LONGSERVICEOUTPUT$"
+ }
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/incubator-ambari/blob/37565def/ambari-agent/src/main/puppet/modules/hdp-nagios/templates/hadoop-services.cfg.erb
----------------------------------------------------------------------
diff --git a/ambari-agent/src/main/puppet/modules/hdp-nagios/templates/hadoop-services.cfg.erb b/ambari-agent/src/main/puppet/modules/hdp-nagios/templates/hadoop-services.cfg.erb
index 58238ae..41c74ef 100644
--- a/ambari-agent/src/main/puppet/modules/hdp-nagios/templates/hadoop-services.cfg.erb
+++ b/ambari-agent/src/main/puppet/modules/hdp-nagios/templates/hadoop-services.cfg.erb
@@ -25,9 +25,13 @@
define service {
name hadoop-service
use generic-service
- notification_options w,u,c
+ notification_options w,u,c,r,f,s
first_notification_delay 0
- notification_interval 0 # Send the notification once
+ notification_interval 0 # Send the notification once
+ contact_groups admins
+ notifications_enabled 1
+ event_handler_enabled 1
+ register 0
}
define service {
http://git-wip-us.apache.org/repos/asf/incubator-ambari/blob/37565def/contrib/addons/src/addOns/nagios/plugins/sys_logger.py
----------------------------------------------------------------------
diff --git a/contrib/addons/src/addOns/nagios/plugins/sys_logger.py b/contrib/addons/src/addOns/nagios/plugins/sys_logger.py
index 7b716c5..4e2e50f 100644
--- a/contrib/addons/src/addOns/nagios/plugins/sys_logger.py
+++ b/contrib/addons/src/addOns/nagios/plugins/sys_logger.py
@@ -21,37 +21,78 @@ severities = {'UP':'OK', 'DOWN':'Critical', 'UNREACHABLE':'Critical', 'OK':'OK',
'WARNING':'Warning', 'UNKNOWN':'Warning', 'CRITICAL':'Critical'}
# List of services which can result in events at the Degraded severity
-degraded_alert_services = ['HBASEMASTER::HBase Master CPU utilization',
- 'HDFS::NameNode RPC latency',
- 'MAPREDUCE::JobTracker RPC latency',
- 'JOBTRACKER::JobTracker CPU utilization']
+degraded_alert_services = ['HBASEMASTER::HBaseMaster CPU utilization',
+ 'HDFS::Namenode RPC Latency',
+ 'MAPREDUCE::JobTracker RPC Latency',
+ 'JOBTRACKER::Jobtracker CPU utilization']
# List of services which can result in events at the Fatal severity
-fatal_alert_services = ['NAMENODE::NameNode process down']
+fatal_alert_services = ['NAMENODE::Namenode Process down',
+ 'NAMENODE::NameNode process']
# dictionary of service->msg_id mappings
-msg_ids = {'Host::Ping':'host_down', 'HBASEMASTER::HBase Master CPU utilization':'master_cpu_utilization',
- 'HDFS::HDFS capacity utilization':'hdfs_percent_capacity', 'HDFS::Corrupt/Missing blocks':'hdfs_block',
- 'NAMENODE::NameNode edit logs directory status':'namenode_edit_log_write', 'HDFS::Percent DataNodes down':'datanode_down',
- 'DATANODE::DataNode process down':'datanode_process_down', 'HDFS::Percent DataNodes storage full':'datanodes_percent_storage_full',
- 'NAMENODE::NameNode process down':'namenode_process_down', 'HDFS::NameNode RPC latency':'namenode_rpc_latency',
- 'DATANODE::DataNode storage full':'datanodes_storage_full', 'JOBTRACKER::JobTracker process down':'jobtracker_process_down',
- 'MAPREDUCE::JobTracker RPC latency':'jobtracker_rpc_latency', 'MAPREDUCE::Percent TaskTrackers down':'tasktrackers_down',
- 'TASKTRACKER::TaskTracker process down':'tasktracker_process_down', 'HBASEMASTER::HBase Master process down':'hbasemaster_process_down',
- 'REGIONSERVER::RegionServer process down':'regionserver_process_down', 'HBASE::Percent RegionServers down':'regionservers_down',
- 'HIVE-METASTORE::Hive Metastore status check':'hive_metastore_process_down', 'ZOOKEEPER::Percent ZooKeeper Servers down':'zookeepers_down',
- 'ZOOKEEPER::ZooKeeper Server process down':'zookeeper_process_down', 'OOZIE::Oozie Server status check':'oozie_down',
- 'WEBHCAT::WebHCat Server status check':'templeton_down', 'PUPPET::Puppet agent down':'puppet_down',
- 'NAGIOS::Nagios status log staleness':'nagios_status_log_stale', 'GANGLIA::Ganglia [gmetad] process down':'ganglia_process_down',
- 'GANGLIA::Ganglia Collector [gmond] process down alert for HBase Master':'ganglia_collector_process_down',
- 'GANGLIA::Ganglia Collector [gmond] process down alert for JobTracker':'ganglia_collector_process_down',
- 'GANGLIA::Ganglia Collector [gmond] process down alert for NameNode':'ganglia_collector_process_down',
- 'GANGLIA::Ganglia Collector [gmond] process down alert for slaves':'ganglia_collector_process_down',
- 'NAMENODE::Secondary NameNode process down':'secondary_namenode_process_down',
- 'JOBTRACKER::JobTracker CPU utilization':'jobtracker_cpu_utilization',
- 'HBASEMASTER::HBase Master Web UI down':'hbase_ui_down', 'NAMENODE::NameNode Web UI down':'namenode_ui_down',
- 'JOBTRACKER::JobHistory Web UI down':'jobhistory_ui_down', 'JOBTRACKER::JobTracker Web UI down':'jobtracker_ui_down'}
-
+msg_ids = {'Host::Ping':'host_down',
+ 'HBASEMASTER::HBaseMaster CPU utilization':'master_cpu_utilization',
+ 'HDFS::HDFS Capacity utilization':'hdfs_percent_capacity',
+ 'HDFS::Corrupt/Missing blocks':'hdfs_block',
+ 'NAMENODE::Namenode Edit logs directory status':'namenode_edit_log_write',
+ 'HDFS::Percent DataNodes down':'datanode_down',
+ 'DATANODE::Process down':'datanode_process_down',
+ 'HDFS::Percent DataNodes storage full':'datanodes_percent_storage_full',
+ 'NAMENODE::Namenode Process down':'namenode_process_down',
+ 'HDFS::Namenode RPC Latency':'namenode_rpc_latency',
+ 'DATANODE::Storage full':'datanodes_storage_full',
+ 'JOBTRACKER::Jobtracker Process down':'jobtracker_process_down',
+ 'MAPREDUCE::JobTracker RPC Latency':'jobtracker_rpc_latency',
+ 'MAPREDUCE::Percent TaskTrackers down':'tasktrackers_down',
+ 'TASKTRACKER::Process down':'tasktracker_process_down',
+ 'HBASEMASTER::HBaseMaster Process down':'hbasemaster_process_down',
+ 'REGIONSERVER::Process down':'regionserver_process_down',
+ 'HBASE::Percent region servers down':'regionservers_down',
+ 'HIVE-METASTORE::HIVE-METASTORE status check':'hive_metastore_process_down',
+ 'ZOOKEEPER::Percent zookeeper servers down':'zookeepers_down',
+ 'ZKSERVERS::ZKSERVERS Process down':'zookeeper_process_down',
+ 'OOZIE::Oozie status check':'oozie_down',
+ 'TEMPLETON::Templeton status check':'templeton_down',
+ 'PUPPET::Puppet agent down':'puppet_down',
+ 'NAGIOS::Nagios status log staleness':'nagios_status_log_stale',
+ 'GANGLIA::Ganglia [gmetad] Process down':'ganglia_process_down',
+ 'GANGLIA::Ganglia collector [gmond] Process down alert for hbasemaster':'ganglia_collector_process_down',
+ 'GANGLIA::Ganglia collector [gmond] Process down alert for jobtracker':'ganglia_collector_process_down',
+ 'GANGLIA::Ganglia collector [gmond] Process down alert for namenode':'ganglia_collector_process_down',
+ 'GANGLIA::Ganglia collector [gmond] Process down alert for slaves':'ganglia_collector_process_down',
+ 'NAMENODE::Secondary Namenode Process down':'secondary_namenode_process_down',
+ 'JOBTRACKER::Jobtracker CPU utilization':'jobtracker_cpu_utilization',
+ 'HBASEMASTER::HBase Web UI down':'hbase_ui_down',
+ 'NAMENODE::Namenode Web UI down':'namenode_ui_down',
+ 'JOBTRACKER::JobHistory Web UI down':'jobhistory_ui_down',
+ 'JOBTRACKER::JobTracker Web UI down':'jobtracker_ui_down',
+
+ # Ambari Nagios service check descriptions
+ 'DATANODE::DataNode process':'datanode_process',
+ 'NAMENODE::NameNode process':'namenode_process',
+ 'NAMENODE::Secondary NameNode process':'secondary_namenode_process',
+ 'JOURNALNODE::JournalNode process':'journalnode_process',
+ 'ZOOKEEPER::ZooKeeper Server process':'zookeeper_server_process',
+ 'JOBTRACKER::JobTracker process':'jobtracker_process',
+ 'TASKTRACKER::TaskTracker process':'tasktracker_process',
+ 'GANGLIA::Ganglia Server process':'ganglia_server_process',
+ 'GANGLIA::Ganglia Monitor process for Slaves':'ganglia_monitor_process',
+ 'GANGLIA::Ganglia Monitor process for NameNode':'ganglia_monitor_process',
+ 'GANGLIA::Ganglia Monitor process for JobTracker':'ganglia_monitor_process',
+ 'GANGLIA::Ganglia Monitor process for HBase Master':'ganglia_monitor_process',
+ 'GANGLIA::Ganglia Monitor process for ResourceManager':'ganglia_monitor_process',
+ 'GANGLIA::Ganglia Monitor process for HistoryServer':'ganglia_monitor_process',
+ 'HBASEMASTER::HBase Master process':'hbase_master_process',
+ 'REGIONSERVER::RegionServer process':'regionserver_process',
+ 'NAGIOS::Nagios status log freshness':'nagios_process',
+ 'FLUME::Flume Agent process':'flume_agent_process',
+ 'OOZIE::Oozie Server status':'oozie_server_process',
+ 'HIVE-METASTORE::Hive Metastore status':'hive_metastore_process',
+ 'WEBHCAT::WebHCat Server status':'webhcat_server_process',
+ 'RESOURCEMANAGER::ResourceManager process':'resourcemanager_process',
+ 'NODEMANAGER::NodeManager process':'nodemanager_process',
+ 'JOBHISTORY::HistoryServer process':'historyserver_process'}
# Determine the severity of the TVI alert based on the Nagios alert state.
def determine_severity(state, service):
http://git-wip-us.apache.org/repos/asf/incubator-ambari/blob/37565def/contrib/addons/test/nagios/plugins/test_sys_logger.py
----------------------------------------------------------------------
diff --git a/contrib/addons/test/nagios/plugins/test_sys_logger.py b/contrib/addons/test/nagios/plugins/test_sys_logger.py
index 7f9abff..eb7a8fe 100644
--- a/contrib/addons/test/nagios/plugins/test_sys_logger.py
+++ b/contrib/addons/test/nagios/plugins/test_sys_logger.py
@@ -408,5 +408,172 @@ test('Hadoop_JobTracker_UI_Down:OK',
'OK: Hadoop: jobtracker_ui_down_ok# SERVICE MSG',
'HARD', '1', 'OK', 'JOBTRACKER::JobTracker Web UI down', 'SERVICE MSG')
+# Tests for ambari nagios service check
+test('DataNode_process',
+ 'Critical: Hadoop: datanode_process# SERVICE MSG',
+ 'HARD', '1', 'CRITICAL', 'DATANODE::DataNode process', 'SERVICE MSG')
+test('DataNode_process:OK',
+ 'OK: Hadoop: datanode_process_ok# SERVICE MSG',
+ 'HARD', '1', 'OK', 'DATANODE::DataNode process', 'SERVICE MSG')
+
+test('NameNode_process',
+ 'Fatal: Hadoop: namenode_process# SERVICE MSG',
+ 'HARD', '1', 'CRITICAL', 'NAMENODE::NameNode process', 'SERVICE MSG')
+test('NameNode_process:WARNING',
+ 'Fatal: Hadoop: namenode_process# SERVICE MSG',
+ 'HARD', '1', 'WARNING', 'NAMENODE::NameNode process', 'SERVICE MSG')
+test('NameNode_process:OK',
+ 'OK: Hadoop: namenode_process_ok# SERVICE MSG',
+ 'HARD', '1', 'OK', 'NAMENODE::NameNode process', 'SERVICE MSG')
+
+test('Secondary_NameNode_process',
+ 'Critical: Hadoop: secondary_namenode_process# SERVICE MSG',
+ 'HARD', '1', 'CRITICAL', 'NAMENODE::Secondary NameNode process', 'SERVICE MSG')
+test('Secondary_NameNode_process:OK',
+ 'OK: Hadoop: secondary_namenode_process_ok# SERVICE MSG',
+ 'HARD', '1', 'OK', 'NAMENODE::Secondary NameNode process', 'SERVICE MSG')
+
+test('JournalNode_process',
+ 'Critical: Hadoop: journalnode_process# SERVICE MSG',
+ 'HARD', '1', 'CRITICAL', 'JOURNALNODE::JournalNode process', 'SERVICE MSG')
+test('JournalNode_process:OK',
+ 'OK: Hadoop: journalnode_process_ok# SERVICE MSG',
+ 'HARD', '1', 'OK', 'JOURNALNODE::JournalNode process', 'SERVICE MSG')
+
+test('ZooKeeper_Server_process',
+ 'Critical: Hadoop: zookeeper_server_process# SERVICE MSG',
+ 'HARD', '1', 'CRITICAL', 'ZOOKEEPER::ZooKeeper Server process', 'SERVICE MSG')
+test('ZooKeeper_Server_process:OK',
+ 'OK: Hadoop: zookeeper_server_process_ok# SERVICE MSG',
+ 'HARD', '1', 'OK', 'ZOOKEEPER::ZooKeeper Server process', 'SERVICE MSG')
+
+test('JobTracker_process',
+ 'Critical: Hadoop: jobtracker_process# SERVICE MSG',
+ 'HARD', '1', 'CRITICAL', 'JOBTRACKER::JobTracker process', 'SERVICE MSG')
+test('JobTracker_process:OK',
+ 'OK: Hadoop: jobtracker_process_ok# SERVICE MSG',
+ 'HARD', '1', 'OK', 'JOBTRACKER::JobTracker process', 'SERVICE MSG')
+
+test('TaskTracker_process',
+ 'Critical: Hadoop: tasktracker_process# SERVICE MSG',
+ 'HARD', '1', 'CRITICAL', 'TASKTRACKER::TaskTracker process', 'SERVICE MSG')
+test('TaskTracker_process:OK',
+ 'OK: Hadoop: tasktracker_process_ok# SERVICE MSG',
+ 'HARD', '1', 'OK', 'TASKTRACKER::TaskTracker process', 'SERVICE MSG')
+
+test('Ganglia_Server_process',
+ 'Critical: Hadoop: ganglia_server_process# SERVICE MSG',
+ 'HARD', '1', 'CRITICAL', 'GANGLIA::Ganglia Server process', 'SERVICE MSG')
+test('Ganglia_Server_process:OK',
+ 'OK: Hadoop: ganglia_server_process_ok# SERVICE MSG',
+ 'HARD', '1', 'OK', 'GANGLIA::Ganglia Server process', 'SERVICE MSG')
+
+test('Ganglia_Monitor_process_for_Slaves',
+ 'Critical: Hadoop: ganglia_monitor_process# SERVICE MSG',
+ 'HARD', '1', 'CRITICAL', 'GANGLIA::Ganglia Monitor process for Slaves', 'SERVICE MSG')
+test('Ganglia_Monitor_process_for_Slaves:OK',
+ 'OK: Hadoop: ganglia_monitor_process_ok# SERVICE MSG',
+ 'HARD', '1', 'OK', 'GANGLIA::Ganglia Monitor process for Slaves', 'SERVICE MSG')
+test('Ganglia_Monitor_process_for_NameNode',
+ 'Critical: Hadoop: ganglia_monitor_process# SERVICE MSG',
+ 'HARD', '1', 'CRITICAL', 'GANGLIA::Ganglia Monitor process for NameNode', 'SERVICE MSG')
+test('Ganglia_Monitor_process_for_NameNode:OK',
+ 'OK: Hadoop: ganglia_monitor_process_ok# SERVICE MSG',
+ 'HARD', '1', 'OK', 'GANGLIA::Ganglia Monitor process for NameNode', 'SERVICE MSG')
+test('Ganglia_Monitor_process_for_JobTracker',
+ 'Critical: Hadoop: ganglia_monitor_process# SERVICE MSG',
+ 'HARD', '1', 'CRITICAL', 'GANGLIA::Ganglia Monitor process for JobTracker', 'SERVICE MSG')
+test('Ganglia_Monitor_process_for_JobTracker:OK',
+ 'OK: Hadoop: ganglia_monitor_process_ok# SERVICE MSG',
+ 'HARD', '1', 'OK', 'GANGLIA::Ganglia Monitor process for JobTracker', 'SERVICE MSG')
+test('Ganglia_Monitor_process_for_HBase_Master',
+ 'Critical: Hadoop: ganglia_monitor_process# SERVICE MSG',
+ 'HARD', '1', 'CRITICAL', 'GANGLIA::Ganglia Monitor process for HBase Master', 'SERVICE MSG')
+test('Ganglia_Monitor_process_for_HBase_Master:OK',
+ 'OK: Hadoop: ganglia_monitor_process_ok# SERVICE MSG',
+ 'HARD', '1', 'OK', 'GANGLIA::Ganglia Monitor process for HBase Master', 'SERVICE MSG')
+test('Ganglia_Monitor_process_for_ResourceManager',
+ 'Critical: Hadoop: ganglia_monitor_process# SERVICE MSG',
+ 'HARD', '1', 'CRITICAL', 'GANGLIA::Ganglia Monitor process for ResourceManager', 'SERVICE MSG')
+test('Ganglia_Monitor_process_for_ResourceManager:OK',
+ 'OK: Hadoop: ganglia_monitor_process_ok# SERVICE MSG',
+ 'HARD', '1', 'OK', 'GANGLIA::Ganglia Monitor process for ResourceManager', 'SERVICE MSG')
+test('Ganglia_Monitor_process_for_HistoryServer',
+ 'Critical: Hadoop: ganglia_monitor_process# SERVICE MSG',
+ 'HARD', '1', 'CRITICAL', 'GANGLIA::Ganglia Monitor process for HistoryServer', 'SERVICE MSG')
+test('Ganglia_Monitor_process_for_HistoryServer:OK',
+ 'OK: Hadoop: ganglia_monitor_process_ok# SERVICE MSG',
+ 'HARD', '1', 'OK', 'GANGLIA::Ganglia Monitor process for HistoryServer', 'SERVICE MSG')
+
+test('HBase_Master_process',
+ 'Critical: Hadoop: hbase_master_process# SERVICE MSG',
+ 'HARD', '1', 'CRITICAL', 'HBASEMASTER::HBase Master process', 'SERVICE MSG')
+test('HBase_Master_process:OK',
+ 'OK: Hadoop: hbase_master_process_ok# SERVICE MSG',
+ 'HARD', '1', 'OK', 'HBASEMASTER::HBase Master process', 'SERVICE MSG')
+
+test('RegionServer_process',
+ 'Critical: Hadoop: regionserver_process# SERVICE MSG',
+ 'HARD', '1', 'CRITICAL', 'REGIONSERVER::RegionServer process', 'SERVICE MSG')
+test('RegionServer_process:OK',
+ 'OK: Hadoop: regionserver_process_ok# SERVICE MSG',
+ 'HARD', '1', 'OK', 'REGIONSERVER::RegionServer process', 'SERVICE MSG')
+
+test('Nagios_status_log_freshness',
+ 'Critical: Hadoop: nagios_process# SERVICE MSG',
+ 'HARD', '1', 'CRITICAL', 'NAGIOS::Nagios status log freshness', 'SERVICE MSG')
+test('Nagios_status_log_freshness:OK',
+ 'OK: Hadoop: nagios_process_ok# SERVICE MSG',
+ 'HARD', '1', 'OK', 'NAGIOS::Nagios status log freshness', 'SERVICE MSG')
+
+test('Flume_Agent_process',
+ 'Critical: Hadoop: flume_agent_process# SERVICE MSG',
+ 'HARD', '1', 'CRITICAL', 'FLUME::Flume Agent process', 'SERVICE MSG')
+test('Flume_Agent_process:OK',
+ 'OK: Hadoop: flume_agent_process_ok# SERVICE MSG',
+ 'HARD', '1', 'OK', 'FLUME::Flume Agent process', 'SERVICE MSG')
+
+test('Oozie_Server_status',
+ 'Critical: Hadoop: oozie_server_process# SERVICE MSG',
+ 'HARD', '1', 'CRITICAL', 'OOZIE::Oozie Server status', 'SERVICE MSG')
+test('Oozie_Server_status:OK',
+ 'OK: Hadoop: oozie_server_process_ok# SERVICE MSG',
+ 'HARD', '1', 'OK', 'OOZIE::Oozie Server status', 'SERVICE MSG')
+
+test('Hive_Metastore_status',
+ 'Critical: Hadoop: hive_metastore_process# SERVICE MSG',
+ 'HARD', '1', 'CRITICAL', 'HIVE-METASTORE::Hive Metastore status', 'SERVICE MSG')
+test('Hive_Metastore_status:OK',
+ 'OK: Hadoop: hive_metastore_process_ok# SERVICE MSG',
+ 'HARD', '1', 'OK', 'HIVE-METASTORE::Hive Metastore status', 'SERVICE MSG')
+
+test('WebHCat_Server_status',
+ 'Critical: Hadoop: webhcat_server_process# SERVICE MSG',
+ 'HARD', '1', 'CRITICAL', 'WEBHCAT::WebHCat Server status', 'SERVICE MSG')
+test('WebHCat_Server_status:OK',
+ 'OK: Hadoop: webhcat_server_process_ok# SERVICE MSG',
+ 'HARD', '1', 'OK', 'WEBHCAT::WebHCat Server status', 'SERVICE MSG')
+
+test('ResourceManager_process',
+ 'Critical: Hadoop: resourcemanager_process# SERVICE MSG',
+ 'HARD', '1', 'CRITICAL', 'RESOURCEMANAGER::ResourceManager process', 'SERVICE MSG')
+test('ResourceManager_process:OK',
+ 'OK: Hadoop: resourcemanager_process_ok# SERVICE MSG',
+ 'HARD', '1', 'OK', 'RESOURCEMANAGER::ResourceManager process', 'SERVICE MSG')
+
+test('NodeManager_process',
+ 'Critical: Hadoop: nodemanager_process# SERVICE MSG',
+ 'HARD', '1', 'CRITICAL', 'NODEMANAGER::NodeManager process', 'SERVICE MSG')
+test('NodeManager_process:OK',
+ 'OK: Hadoop: nodemanager_process_ok# SERVICE MSG',
+ 'HARD', '1', 'OK', 'NODEMANAGER::NodeManager process', 'SERVICE MSG')
+
+test('HistoryServer_process',
+ 'Critical: Hadoop: historyserver_process# SERVICE MSG',
+ 'HARD', '1', 'CRITICAL', 'JOBHISTORY::HistoryServer process', 'SERVICE MSG')
+test('HistoryServer_process:OK',
+ 'OK: Hadoop: historyserver_process_ok# SERVICE MSG',
+ 'HARD', '1', 'OK', 'JOBHISTORY::HistoryServer process', 'SERVICE MSG')
+
summary()
[2/2] git commit: AMBARI-3814. Enable data locality in YARN. (mahadev)
Posted by ma...@apache.org.
AMBARI-3814. Enable data locality in YARN. (mahadev)
Project: http://git-wip-us.apache.org/repos/asf/incubator-ambari/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-ambari/commit/f071cc6c
Tree: http://git-wip-us.apache.org/repos/asf/incubator-ambari/tree/f071cc6c
Diff: http://git-wip-us.apache.org/repos/asf/incubator-ambari/diff/f071cc6c
Branch: refs/heads/trunk
Commit: f071cc6c0fe10581fd982f015cf868c6ab0a201e
Parents: 37565de
Author: Mahadev Konar <ma...@apache.org>
Authored: Fri Nov 22 15:02:29 2013 -0800
Committer: Mahadev Konar <ma...@apache.org>
Committed: Fri Nov 22 15:02:29 2013 -0800
----------------------------------------------------------------------
.../2.0.6/services/YARN/configuration/capacity-scheduler.xml | 8 ++++++++
.../2.0.8/services/YARN/configuration/capacity-scheduler.xml | 8 ++++++++
2 files changed, 16 insertions(+)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-ambari/blob/f071cc6c/ambari-server/src/main/resources/stacks/HDP/2.0.6/services/YARN/configuration/capacity-scheduler.xml
----------------------------------------------------------------------
diff --git a/ambari-server/src/main/resources/stacks/HDP/2.0.6/services/YARN/configuration/capacity-scheduler.xml b/ambari-server/src/main/resources/stacks/HDP/2.0.6/services/YARN/configuration/capacity-scheduler.xml
index ccfb779..4a19779 100644
--- a/ambari-server/src/main/resources/stacks/HDP/2.0.6/services/YARN/configuration/capacity-scheduler.xml
+++ b/ambari-server/src/main/resources/stacks/HDP/2.0.6/services/YARN/configuration/capacity-scheduler.xml
@@ -116,5 +116,13 @@
No description
</description>
</property>
+ <property>
+ <name>yarn.scheduler.capacity.node-locality-delay</name>
+ <value>40</value>
+ <description>
+ No description
+ </description>
+ </property>
+
</configuration>
http://git-wip-us.apache.org/repos/asf/incubator-ambari/blob/f071cc6c/ambari-server/src/main/resources/stacks/HDP/2.0.8/services/YARN/configuration/capacity-scheduler.xml
----------------------------------------------------------------------
diff --git a/ambari-server/src/main/resources/stacks/HDP/2.0.8/services/YARN/configuration/capacity-scheduler.xml b/ambari-server/src/main/resources/stacks/HDP/2.0.8/services/YARN/configuration/capacity-scheduler.xml
index ccfb779..4a19779 100644
--- a/ambari-server/src/main/resources/stacks/HDP/2.0.8/services/YARN/configuration/capacity-scheduler.xml
+++ b/ambari-server/src/main/resources/stacks/HDP/2.0.8/services/YARN/configuration/capacity-scheduler.xml
@@ -116,5 +116,13 @@
No description
</description>
</property>
+ <property>
+ <name>yarn.scheduler.capacity.node-locality-delay</name>
+ <value>40</value>
+ <description>
+ No description
+ </description>
+ </property>
+
</configuration>