You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ambari.apache.org by ma...@apache.org on 2013/11/23 00:02:41 UTC

[1/2] git commit: AMBARI-3681. Provide TVI rules for service start for SLA quantification. (Artem Baranchuk via mahadev)

Updated Branches:
  refs/heads/trunk 87bf327a7 -> f071cc6c0


AMBARI-3681. Provide TVI rules for service start for SLA quantification. (Artem Baranchuk via mahadev)


Project: http://git-wip-us.apache.org/repos/asf/incubator-ambari/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-ambari/commit/37565def
Tree: http://git-wip-us.apache.org/repos/asf/incubator-ambari/tree/37565def
Diff: http://git-wip-us.apache.org/repos/asf/incubator-ambari/diff/37565def

Branch: refs/heads/trunk
Commit: 37565defb6a417607d7b911a462928e6b771893b
Parents: 87bf327
Author: Mahadev Konar <ma...@apache.org>
Authored: Fri Nov 22 14:55:43 2013 -0800
Committer: Mahadev Konar <ma...@apache.org>
Committed: Fri Nov 22 14:55:43 2013 -0800

----------------------------------------------------------------------
 .../hdp-nagios/templates/contacts.cfg.erb       |  25 ++-
 .../templates/hadoop-commands.cfg.erb           |  10 ++
 .../templates/hadoop-services.cfg.erb           |   8 +-
 .../src/addOns/nagios/plugins/sys_logger.py     |  95 ++++++++---
 .../test/nagios/plugins/test_sys_logger.py      | 167 +++++++++++++++++++
 5 files changed, 271 insertions(+), 34 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-ambari/blob/37565def/ambari-agent/src/main/puppet/modules/hdp-nagios/templates/contacts.cfg.erb
----------------------------------------------------------------------
diff --git a/ambari-agent/src/main/puppet/modules/hdp-nagios/templates/contacts.cfg.erb b/ambari-agent/src/main/puppet/modules/hdp-nagios/templates/contacts.cfg.erb
index e967457..ee6f09e 100644
--- a/ambari-agent/src/main/puppet/modules/hdp-nagios/templates/contacts.cfg.erb
+++ b/ambari-agent/src/main/puppet/modules/hdp-nagios/templates/contacts.cfg.erb
@@ -49,14 +49,29 @@
 # template which is defined elsewhere.
 
 define contact{
-        contact_name                    <%=scope.function_hdp_template_var("nagios_web_login")%>		; Short name of user
-	use				generic-contact		; Inherit default values from generic-contact template (defined above)
-        alias                           Nagios Admin		; Full name of user
+        contact_name    <%=scope.function_hdp_template_var("nagios_web_login")%>    ; Short name of user
+        use             generic-contact                                             ; Inherit default values from generic-contact template (defined above)
+        alias           Nagios Admin                                                ; Full name of user
 
         email                           <%=scope.function_hdp_template_var("nagios_contact")%>	; <<***** CHANGE THIS TO YOUR EMAIL ADDRESS ******
         }
 
-
+# Contact which writes all Nagios alerts to the system logger.
+define contact{
+        contact_name                    sys_logger         ; Short name of user
+        use                             generic-contact    ; Inherit default values from generic-contact template (defined above)
+        alias                           System Logger      ; Full name of user
+        host_notifications_enabled      1
+        service_notifications_enabled   1
+        service_notification_period     24x7
+        host_notification_period        24x7
+        service_notification_options    w,u,c,r,s
+        host_notification_options       d,u,r,s
+        can_submit_commands             1
+        retain_status_information       1
+        service_notification_commands   service_sys_logger
+        host_notification_commands      host_sys_logger
+        }
 
 ###############################################################################
 ###############################################################################
@@ -72,5 +87,5 @@ define contact{
 define contactgroup {
         contactgroup_name       admins
         alias                   Nagios Administrators
-        members                 <%=scope.function_hdp_template_var("nagios_web_login")%>
+        members                 <%=scope.function_hdp_template_var("nagios_web_login")%>,sys_logger
 }

http://git-wip-us.apache.org/repos/asf/incubator-ambari/blob/37565def/ambari-agent/src/main/puppet/modules/hdp-nagios/templates/hadoop-commands.cfg.erb
----------------------------------------------------------------------
diff --git a/ambari-agent/src/main/puppet/modules/hdp-nagios/templates/hadoop-commands.cfg.erb b/ambari-agent/src/main/puppet/modules/hdp-nagios/templates/hadoop-commands.cfg.erb
index ca1585b..e49199b 100644
--- a/ambari-agent/src/main/puppet/modules/hdp-nagios/templates/hadoop-commands.cfg.erb
+++ b/ambari-agent/src/main/puppet/modules/hdp-nagios/templates/hadoop-commands.cfg.erb
@@ -102,3 +102,13 @@ define command{
         command_name    check_nodemanager_health
         command_line    $USER1$/check_nodemanager_health.sh $HOSTADDRESS$ $ARG1$
        }
+
+define command{
+        command_name    host_sys_logger
+        command_line    $USER1$/sys_logger.py $HOSTSTATETYPE$ $HOSTATTEMPT$ $HOSTSTATE$ "Host::Ping" "Event Host=$HOSTADDRESS$($HOSTSTATE$), $HOSTOUTPUT$ $LONGHOSTOUTPUT$"
+       }
+
+define command{
+        command_name    service_sys_logger
+        command_line    $USER1$/sys_logger.py $SERVICESTATETYPE$ $SERVICEATTEMPT$ $SERVICESTATE$ "$SERVICEDESC$" "Event Host=$HOSTADDRESS$ Service Description=$SERVICEDESC$($SERVICESTATE$), $SERVICEOUTPUT$ $LONGSERVICEOUTPUT$"
+       }
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-ambari/blob/37565def/ambari-agent/src/main/puppet/modules/hdp-nagios/templates/hadoop-services.cfg.erb
----------------------------------------------------------------------
diff --git a/ambari-agent/src/main/puppet/modules/hdp-nagios/templates/hadoop-services.cfg.erb b/ambari-agent/src/main/puppet/modules/hdp-nagios/templates/hadoop-services.cfg.erb
index 58238ae..41c74ef 100644
--- a/ambari-agent/src/main/puppet/modules/hdp-nagios/templates/hadoop-services.cfg.erb
+++ b/ambari-agent/src/main/puppet/modules/hdp-nagios/templates/hadoop-services.cfg.erb
@@ -25,9 +25,13 @@
 define service {
         name                            hadoop-service
         use                             generic-service
-        notification_options            w,u,c
+        notification_options            w,u,c,r,f,s
         first_notification_delay        0
-        notification_interval           0     # Send the notification once
+        notification_interval           0                 # Send the notification once
+        contact_groups                  admins
+        notifications_enabled           1
+        event_handler_enabled           1
+        register                        0
 }
 
 define service {        

http://git-wip-us.apache.org/repos/asf/incubator-ambari/blob/37565def/contrib/addons/src/addOns/nagios/plugins/sys_logger.py
----------------------------------------------------------------------
diff --git a/contrib/addons/src/addOns/nagios/plugins/sys_logger.py b/contrib/addons/src/addOns/nagios/plugins/sys_logger.py
index 7b716c5..4e2e50f 100644
--- a/contrib/addons/src/addOns/nagios/plugins/sys_logger.py
+++ b/contrib/addons/src/addOns/nagios/plugins/sys_logger.py
@@ -21,37 +21,78 @@ severities = {'UP':'OK', 'DOWN':'Critical', 'UNREACHABLE':'Critical', 'OK':'OK',
               'WARNING':'Warning', 'UNKNOWN':'Warning', 'CRITICAL':'Critical'}
 
 # List of services which can result in events at the Degraded severity
-degraded_alert_services = ['HBASEMASTER::HBase Master CPU utilization',
-                           'HDFS::NameNode RPC latency',
-                           'MAPREDUCE::JobTracker RPC latency',
-                           'JOBTRACKER::JobTracker CPU utilization']
+degraded_alert_services = ['HBASEMASTER::HBaseMaster CPU utilization',
+                           'HDFS::Namenode RPC Latency',
+                           'MAPREDUCE::JobTracker RPC Latency',
+                           'JOBTRACKER::Jobtracker CPU utilization']
 
 # List of services which can result in events at the Fatal severity
-fatal_alert_services = ['NAMENODE::NameNode process down']
+fatal_alert_services = ['NAMENODE::Namenode Process down',
+                        'NAMENODE::NameNode process']
 
 # dictionary of service->msg_id mappings
-msg_ids = {'Host::Ping':'host_down', 'HBASEMASTER::HBase Master CPU utilization':'master_cpu_utilization',
-           'HDFS::HDFS capacity utilization':'hdfs_percent_capacity', 'HDFS::Corrupt/Missing blocks':'hdfs_block',
-           'NAMENODE::NameNode edit logs directory status':'namenode_edit_log_write', 'HDFS::Percent DataNodes down':'datanode_down',
-           'DATANODE::DataNode process down':'datanode_process_down', 'HDFS::Percent DataNodes storage full':'datanodes_percent_storage_full',
-           'NAMENODE::NameNode process down':'namenode_process_down', 'HDFS::NameNode RPC latency':'namenode_rpc_latency',
-           'DATANODE::DataNode storage full':'datanodes_storage_full', 'JOBTRACKER::JobTracker process down':'jobtracker_process_down',
-           'MAPREDUCE::JobTracker RPC latency':'jobtracker_rpc_latency', 'MAPREDUCE::Percent TaskTrackers down':'tasktrackers_down',
-           'TASKTRACKER::TaskTracker process down':'tasktracker_process_down', 'HBASEMASTER::HBase Master process down':'hbasemaster_process_down',
-           'REGIONSERVER::RegionServer process down':'regionserver_process_down', 'HBASE::Percent RegionServers down':'regionservers_down',
-           'HIVE-METASTORE::Hive Metastore status check':'hive_metastore_process_down', 'ZOOKEEPER::Percent ZooKeeper Servers down':'zookeepers_down',
-           'ZOOKEEPER::ZooKeeper Server process down':'zookeeper_process_down', 'OOZIE::Oozie Server status check':'oozie_down',
-           'WEBHCAT::WebHCat Server status check':'templeton_down', 'PUPPET::Puppet agent down':'puppet_down',
-           'NAGIOS::Nagios status log staleness':'nagios_status_log_stale', 'GANGLIA::Ganglia [gmetad] process down':'ganglia_process_down',
-           'GANGLIA::Ganglia Collector [gmond] process down alert for HBase Master':'ganglia_collector_process_down',
-           'GANGLIA::Ganglia Collector [gmond] process down alert for JobTracker':'ganglia_collector_process_down',
-           'GANGLIA::Ganglia Collector [gmond] process down alert for NameNode':'ganglia_collector_process_down',
-           'GANGLIA::Ganglia Collector [gmond] process down alert for slaves':'ganglia_collector_process_down',
-           'NAMENODE::Secondary NameNode process down':'secondary_namenode_process_down',
-           'JOBTRACKER::JobTracker CPU utilization':'jobtracker_cpu_utilization',
-           'HBASEMASTER::HBase Master Web UI down':'hbase_ui_down', 'NAMENODE::NameNode Web UI down':'namenode_ui_down',
-           'JOBTRACKER::JobHistory Web UI down':'jobhistory_ui_down', 'JOBTRACKER::JobTracker Web UI down':'jobtracker_ui_down'}
-
+msg_ids = {'Host::Ping':'host_down',
+           'HBASEMASTER::HBaseMaster CPU utilization':'master_cpu_utilization',
+           'HDFS::HDFS Capacity utilization':'hdfs_percent_capacity',
+           'HDFS::Corrupt/Missing blocks':'hdfs_block',
+           'NAMENODE::Namenode Edit logs directory status':'namenode_edit_log_write',
+           'HDFS::Percent DataNodes down':'datanode_down',
+           'DATANODE::Process down':'datanode_process_down',
+           'HDFS::Percent DataNodes storage full':'datanodes_percent_storage_full',
+           'NAMENODE::Namenode Process down':'namenode_process_down',
+           'HDFS::Namenode RPC Latency':'namenode_rpc_latency',
+           'DATANODE::Storage full':'datanodes_storage_full',
+           'JOBTRACKER::Jobtracker Process down':'jobtracker_process_down',
+           'MAPREDUCE::JobTracker RPC Latency':'jobtracker_rpc_latency',
+           'MAPREDUCE::Percent TaskTrackers down':'tasktrackers_down',
+           'TASKTRACKER::Process down':'tasktracker_process_down',
+           'HBASEMASTER::HBaseMaster Process down':'hbasemaster_process_down',
+           'REGIONSERVER::Process down':'regionserver_process_down',
+           'HBASE::Percent region servers down':'regionservers_down',
+           'HIVE-METASTORE::HIVE-METASTORE status check':'hive_metastore_process_down',
+           'ZOOKEEPER::Percent zookeeper servers down':'zookeepers_down',
+           'ZKSERVERS::ZKSERVERS Process down':'zookeeper_process_down',
+           'OOZIE::Oozie status check':'oozie_down',
+           'TEMPLETON::Templeton status check':'templeton_down',
+           'PUPPET::Puppet agent down':'puppet_down',
+           'NAGIOS::Nagios status log staleness':'nagios_status_log_stale',
+           'GANGLIA::Ganglia [gmetad] Process down':'ganglia_process_down',
+           'GANGLIA::Ganglia collector [gmond] Process down alert for hbasemaster':'ganglia_collector_process_down',
+           'GANGLIA::Ganglia collector [gmond] Process down alert for jobtracker':'ganglia_collector_process_down',
+           'GANGLIA::Ganglia collector [gmond] Process down alert for namenode':'ganglia_collector_process_down',
+           'GANGLIA::Ganglia collector [gmond] Process down alert for slaves':'ganglia_collector_process_down',
+           'NAMENODE::Secondary Namenode Process down':'secondary_namenode_process_down',
+           'JOBTRACKER::Jobtracker CPU utilization':'jobtracker_cpu_utilization',
+           'HBASEMASTER::HBase Web UI down':'hbase_ui_down',
+           'NAMENODE::Namenode Web UI down':'namenode_ui_down',
+           'JOBTRACKER::JobHistory Web UI down':'jobhistory_ui_down',
+           'JOBTRACKER::JobTracker Web UI down':'jobtracker_ui_down',
+
+           # Ambari Nagios service check descriptions
+           'DATANODE::DataNode process':'datanode_process',
+           'NAMENODE::NameNode process':'namenode_process',
+           'NAMENODE::Secondary NameNode process':'secondary_namenode_process',
+           'JOURNALNODE::JournalNode process':'journalnode_process',
+           'ZOOKEEPER::ZooKeeper Server process':'zookeeper_server_process',
+           'JOBTRACKER::JobTracker process':'jobtracker_process',
+           'TASKTRACKER::TaskTracker process':'tasktracker_process',
+           'GANGLIA::Ganglia Server process':'ganglia_server_process',
+           'GANGLIA::Ganglia Monitor process for Slaves':'ganglia_monitor_process',
+           'GANGLIA::Ganglia Monitor process for NameNode':'ganglia_monitor_process',
+           'GANGLIA::Ganglia Monitor process for JobTracker':'ganglia_monitor_process',
+           'GANGLIA::Ganglia Monitor process for HBase Master':'ganglia_monitor_process',
+           'GANGLIA::Ganglia Monitor process for ResourceManager':'ganglia_monitor_process',
+           'GANGLIA::Ganglia Monitor process for HistoryServer':'ganglia_monitor_process',
+           'HBASEMASTER::HBase Master process':'hbase_master_process',
+           'REGIONSERVER::RegionServer process':'regionserver_process',
+           'NAGIOS::Nagios status log freshness':'nagios_process',
+           'FLUME::Flume Agent process':'flume_agent_process',
+           'OOZIE::Oozie Server status':'oozie_server_process',
+           'HIVE-METASTORE::Hive Metastore status':'hive_metastore_process',
+           'WEBHCAT::WebHCat Server status':'webhcat_server_process',
+           'RESOURCEMANAGER::ResourceManager process':'resourcemanager_process',
+           'NODEMANAGER::NodeManager process':'nodemanager_process',
+           'JOBHISTORY::HistoryServer process':'historyserver_process'}
 
 # Determine the severity of the TVI alert based on the Nagios alert state.
 def determine_severity(state, service):

http://git-wip-us.apache.org/repos/asf/incubator-ambari/blob/37565def/contrib/addons/test/nagios/plugins/test_sys_logger.py
----------------------------------------------------------------------
diff --git a/contrib/addons/test/nagios/plugins/test_sys_logger.py b/contrib/addons/test/nagios/plugins/test_sys_logger.py
index 7f9abff..eb7a8fe 100644
--- a/contrib/addons/test/nagios/plugins/test_sys_logger.py
+++ b/contrib/addons/test/nagios/plugins/test_sys_logger.py
@@ -408,5 +408,172 @@ test('Hadoop_JobTracker_UI_Down:OK',
     'OK: Hadoop: jobtracker_ui_down_ok# SERVICE MSG',
     'HARD', '1', 'OK', 'JOBTRACKER::JobTracker Web UI down', 'SERVICE MSG')
 
+# Tests for ambari nagios service check
+test('DataNode_process',
+     'Critical: Hadoop: datanode_process# SERVICE MSG',
+     'HARD', '1', 'CRITICAL', 'DATANODE::DataNode process', 'SERVICE MSG')
+test('DataNode_process:OK',
+     'OK: Hadoop: datanode_process_ok# SERVICE MSG',
+     'HARD', '1', 'OK', 'DATANODE::DataNode process', 'SERVICE MSG')
+
+test('NameNode_process',
+     'Fatal: Hadoop: namenode_process# SERVICE MSG',
+     'HARD', '1', 'CRITICAL', 'NAMENODE::NameNode process', 'SERVICE MSG')
+test('NameNode_process:WARNING',
+     'Fatal: Hadoop: namenode_process# SERVICE MSG',
+     'HARD', '1', 'WARNING', 'NAMENODE::NameNode process', 'SERVICE MSG')
+test('NameNode_process:OK',
+     'OK: Hadoop: namenode_process_ok# SERVICE MSG',
+     'HARD', '1', 'OK', 'NAMENODE::NameNode process', 'SERVICE MSG')
+
+test('Secondary_NameNode_process',
+     'Critical: Hadoop: secondary_namenode_process# SERVICE MSG',
+     'HARD', '1', 'CRITICAL', 'NAMENODE::Secondary NameNode process', 'SERVICE MSG')
+test('Secondary_NameNode_process:OK',
+     'OK: Hadoop: secondary_namenode_process_ok# SERVICE MSG',
+     'HARD', '1', 'OK', 'NAMENODE::Secondary NameNode process', 'SERVICE MSG')
+
+test('JournalNode_process',
+     'Critical: Hadoop: journalnode_process# SERVICE MSG',
+     'HARD', '1', 'CRITICAL', 'JOURNALNODE::JournalNode process', 'SERVICE MSG')
+test('JournalNode_process:OK',
+     'OK: Hadoop: journalnode_process_ok# SERVICE MSG',
+     'HARD', '1', 'OK', 'JOURNALNODE::JournalNode process', 'SERVICE MSG')
+
+test('ZooKeeper_Server_process',
+     'Critical: Hadoop: zookeeper_server_process# SERVICE MSG',
+     'HARD', '1', 'CRITICAL', 'ZOOKEEPER::ZooKeeper Server process', 'SERVICE MSG')
+test('ZooKeeper_Server_process:OK',
+     'OK: Hadoop: zookeeper_server_process_ok# SERVICE MSG',
+     'HARD', '1', 'OK', 'ZOOKEEPER::ZooKeeper Server process', 'SERVICE MSG')
+
+test('JobTracker_process',
+     'Critical: Hadoop: jobtracker_process# SERVICE MSG',
+     'HARD', '1', 'CRITICAL', 'JOBTRACKER::JobTracker process', 'SERVICE MSG')
+test('JobTracker_process:OK',
+     'OK: Hadoop: jobtracker_process_ok# SERVICE MSG',
+     'HARD', '1', 'OK', 'JOBTRACKER::JobTracker process', 'SERVICE MSG')
+
+test('TaskTracker_process',
+     'Critical: Hadoop: tasktracker_process# SERVICE MSG',
+     'HARD', '1', 'CRITICAL', 'TASKTRACKER::TaskTracker process', 'SERVICE MSG')
+test('TaskTracker_process:OK',
+     'OK: Hadoop: tasktracker_process_ok# SERVICE MSG',
+     'HARD', '1', 'OK', 'TASKTRACKER::TaskTracker process', 'SERVICE MSG')
+
+test('Ganglia_Server_process',
+     'Critical: Hadoop: ganglia_server_process# SERVICE MSG',
+     'HARD', '1', 'CRITICAL', 'GANGLIA::Ganglia Server process', 'SERVICE MSG')
+test('Ganglia_Server_process:OK',
+     'OK: Hadoop: ganglia_server_process_ok# SERVICE MSG',
+     'HARD', '1', 'OK', 'GANGLIA::Ganglia Server process', 'SERVICE MSG')
+
+test('Ganglia_Monitor_process_for_Slaves',
+     'Critical: Hadoop: ganglia_monitor_process# SERVICE MSG',
+     'HARD', '1', 'CRITICAL', 'GANGLIA::Ganglia Monitor process for Slaves', 'SERVICE MSG')
+test('Ganglia_Monitor_process_for_Slaves:OK',
+     'OK: Hadoop: ganglia_monitor_process_ok# SERVICE MSG',
+     'HARD', '1', 'OK', 'GANGLIA::Ganglia Monitor process for Slaves', 'SERVICE MSG')
+test('Ganglia_Monitor_process_for_NameNode',
+     'Critical: Hadoop: ganglia_monitor_process# SERVICE MSG',
+     'HARD', '1', 'CRITICAL', 'GANGLIA::Ganglia Monitor process for NameNode', 'SERVICE MSG')
+test('Ganglia_Monitor_process_for_NameNode:OK',
+     'OK: Hadoop: ganglia_monitor_process_ok# SERVICE MSG',
+     'HARD', '1', 'OK', 'GANGLIA::Ganglia Monitor process for NameNode', 'SERVICE MSG')
+test('Ganglia_Monitor_process_for_JobTracker',
+     'Critical: Hadoop: ganglia_monitor_process# SERVICE MSG',
+     'HARD', '1', 'CRITICAL', 'GANGLIA::Ganglia Monitor process for JobTracker', 'SERVICE MSG')
+test('Ganglia_Monitor_process_for_JobTracker:OK',
+     'OK: Hadoop: ganglia_monitor_process_ok# SERVICE MSG',
+     'HARD', '1', 'OK', 'GANGLIA::Ganglia Monitor process for JobTracker', 'SERVICE MSG')
+test('Ganglia_Monitor_process_for_HBase_Master',
+     'Critical: Hadoop: ganglia_monitor_process# SERVICE MSG',
+     'HARD', '1', 'CRITICAL', 'GANGLIA::Ganglia Monitor process for HBase Master', 'SERVICE MSG')
+test('Ganglia_Monitor_process_for_HBase_Master:OK',
+     'OK: Hadoop: ganglia_monitor_process_ok# SERVICE MSG',
+     'HARD', '1', 'OK', 'GANGLIA::Ganglia Monitor process for HBase Master', 'SERVICE MSG')
+test('Ganglia_Monitor_process_for_ResourceManager',
+     'Critical: Hadoop: ganglia_monitor_process# SERVICE MSG',
+     'HARD', '1', 'CRITICAL', 'GANGLIA::Ganglia Monitor process for ResourceManager', 'SERVICE MSG')
+test('Ganglia_Monitor_process_for_ResourceManager:OK',
+     'OK: Hadoop: ganglia_monitor_process_ok# SERVICE MSG',
+     'HARD', '1', 'OK', 'GANGLIA::Ganglia Monitor process for ResourceManager', 'SERVICE MSG')
+test('Ganglia_Monitor_process_for_HistoryServer',
+     'Critical: Hadoop: ganglia_monitor_process# SERVICE MSG',
+     'HARD', '1', 'CRITICAL', 'GANGLIA::Ganglia Monitor process for HistoryServer', 'SERVICE MSG')
+test('Ganglia_Monitor_process_for_HistoryServer:OK',
+     'OK: Hadoop: ganglia_monitor_process_ok# SERVICE MSG',
+     'HARD', '1', 'OK', 'GANGLIA::Ganglia Monitor process for HistoryServer', 'SERVICE MSG')
+
+test('HBase_Master_process',
+     'Critical: Hadoop: hbase_master_process# SERVICE MSG',
+     'HARD', '1', 'CRITICAL', 'HBASEMASTER::HBase Master process', 'SERVICE MSG')
+test('HBase_Master_process:OK',
+     'OK: Hadoop: hbase_master_process_ok# SERVICE MSG',
+     'HARD', '1', 'OK', 'HBASEMASTER::HBase Master process', 'SERVICE MSG')
+
+test('RegionServer_process',
+     'Critical: Hadoop: regionserver_process# SERVICE MSG',
+     'HARD', '1', 'CRITICAL', 'REGIONSERVER::RegionServer process', 'SERVICE MSG')
+test('RegionServer_process:OK',
+     'OK: Hadoop: regionserver_process_ok# SERVICE MSG',
+     'HARD', '1', 'OK', 'REGIONSERVER::RegionServer process', 'SERVICE MSG')
+
+test('Nagios_status_log_freshness',
+     'Critical: Hadoop: nagios_process# SERVICE MSG',
+     'HARD', '1', 'CRITICAL', 'NAGIOS::Nagios status log freshness', 'SERVICE MSG')
+test('Nagios_status_log_freshness:OK',
+     'OK: Hadoop: nagios_process_ok# SERVICE MSG',
+     'HARD', '1', 'OK', 'NAGIOS::Nagios status log freshness', 'SERVICE MSG')
+
+test('Flume_Agent_process',
+     'Critical: Hadoop: flume_agent_process# SERVICE MSG',
+     'HARD', '1', 'CRITICAL', 'FLUME::Flume Agent process', 'SERVICE MSG')
+test('Flume_Agent_process:OK',
+     'OK: Hadoop: flume_agent_process_ok# SERVICE MSG',
+     'HARD', '1', 'OK', 'FLUME::Flume Agent process', 'SERVICE MSG')
+
+test('Oozie_Server_status',
+     'Critical: Hadoop: oozie_server_process# SERVICE MSG',
+     'HARD', '1', 'CRITICAL', 'OOZIE::Oozie Server status', 'SERVICE MSG')
+test('Oozie_Server_status:OK',
+     'OK: Hadoop: oozie_server_process_ok# SERVICE MSG',
+     'HARD', '1', 'OK', 'OOZIE::Oozie Server status', 'SERVICE MSG')
+
+test('Hive_Metastore_status',
+     'Critical: Hadoop: hive_metastore_process# SERVICE MSG',
+     'HARD', '1', 'CRITICAL', 'HIVE-METASTORE::Hive Metastore status', 'SERVICE MSG')
+test('Hive_Metastore_status:OK',
+     'OK: Hadoop: hive_metastore_process_ok# SERVICE MSG',
+     'HARD', '1', 'OK', 'HIVE-METASTORE::Hive Metastore status', 'SERVICE MSG')
+
+test('WebHCat_Server_status',
+     'Critical: Hadoop: webhcat_server_process# SERVICE MSG',
+     'HARD', '1', 'CRITICAL', 'WEBHCAT::WebHCat Server status', 'SERVICE MSG')
+test('WebHCat_Server_status:OK',
+     'OK: Hadoop: webhcat_server_process_ok# SERVICE MSG',
+     'HARD', '1', 'OK', 'WEBHCAT::WebHCat Server status', 'SERVICE MSG')
+
+test('ResourceManager_process',
+     'Critical: Hadoop: resourcemanager_process# SERVICE MSG',
+     'HARD', '1', 'CRITICAL', 'RESOURCEMANAGER::ResourceManager process', 'SERVICE MSG')
+test('ResourceManager_process:OK',
+     'OK: Hadoop: resourcemanager_process_ok# SERVICE MSG',
+     'HARD', '1', 'OK', 'RESOURCEMANAGER::ResourceManager process', 'SERVICE MSG')
+
+test('NodeManager_process',
+     'Critical: Hadoop: nodemanager_process# SERVICE MSG',
+     'HARD', '1', 'CRITICAL', 'NODEMANAGER::NodeManager process', 'SERVICE MSG')
+test('NodeManager_process:OK',
+     'OK: Hadoop: nodemanager_process_ok# SERVICE MSG',
+     'HARD', '1', 'OK', 'NODEMANAGER::NodeManager process', 'SERVICE MSG')
+
+test('HistoryServer_process',
+     'Critical: Hadoop: historyserver_process# SERVICE MSG',
+     'HARD', '1', 'CRITICAL', 'JOBHISTORY::HistoryServer process', 'SERVICE MSG')
+test('HistoryServer_process:OK',
+     'OK: Hadoop: historyserver_process_ok# SERVICE MSG',
+     'HARD', '1', 'OK', 'JOBHISTORY::HistoryServer process', 'SERVICE MSG')
+
 summary()
 


[2/2] git commit: AMBARI-3814. Enable data locality in YARN. (mahadev)

Posted by ma...@apache.org.
AMBARI-3814. Enable data locality in YARN. (mahadev)


Project: http://git-wip-us.apache.org/repos/asf/incubator-ambari/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-ambari/commit/f071cc6c
Tree: http://git-wip-us.apache.org/repos/asf/incubator-ambari/tree/f071cc6c
Diff: http://git-wip-us.apache.org/repos/asf/incubator-ambari/diff/f071cc6c

Branch: refs/heads/trunk
Commit: f071cc6c0fe10581fd982f015cf868c6ab0a201e
Parents: 37565de
Author: Mahadev Konar <ma...@apache.org>
Authored: Fri Nov 22 15:02:29 2013 -0800
Committer: Mahadev Konar <ma...@apache.org>
Committed: Fri Nov 22 15:02:29 2013 -0800

----------------------------------------------------------------------
 .../2.0.6/services/YARN/configuration/capacity-scheduler.xml | 8 ++++++++
 .../2.0.8/services/YARN/configuration/capacity-scheduler.xml | 8 ++++++++
 2 files changed, 16 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-ambari/blob/f071cc6c/ambari-server/src/main/resources/stacks/HDP/2.0.6/services/YARN/configuration/capacity-scheduler.xml
----------------------------------------------------------------------
diff --git a/ambari-server/src/main/resources/stacks/HDP/2.0.6/services/YARN/configuration/capacity-scheduler.xml b/ambari-server/src/main/resources/stacks/HDP/2.0.6/services/YARN/configuration/capacity-scheduler.xml
index ccfb779..4a19779 100644
--- a/ambari-server/src/main/resources/stacks/HDP/2.0.6/services/YARN/configuration/capacity-scheduler.xml
+++ b/ambari-server/src/main/resources/stacks/HDP/2.0.6/services/YARN/configuration/capacity-scheduler.xml
@@ -116,5 +116,13 @@
       No description
     </description>
   </property>
+  <property>
+    <name>yarn.scheduler.capacity.node-locality-delay</name>
+    <value>40</value>
+    <description>
+      No description
+    </description>
+  </property>
+
 
 </configuration>

http://git-wip-us.apache.org/repos/asf/incubator-ambari/blob/f071cc6c/ambari-server/src/main/resources/stacks/HDP/2.0.8/services/YARN/configuration/capacity-scheduler.xml
----------------------------------------------------------------------
diff --git a/ambari-server/src/main/resources/stacks/HDP/2.0.8/services/YARN/configuration/capacity-scheduler.xml b/ambari-server/src/main/resources/stacks/HDP/2.0.8/services/YARN/configuration/capacity-scheduler.xml
index ccfb779..4a19779 100644
--- a/ambari-server/src/main/resources/stacks/HDP/2.0.8/services/YARN/configuration/capacity-scheduler.xml
+++ b/ambari-server/src/main/resources/stacks/HDP/2.0.8/services/YARN/configuration/capacity-scheduler.xml
@@ -116,5 +116,13 @@
       No description
     </description>
   </property>
+  <property>
+    <name>yarn.scheduler.capacity.node-locality-delay</name>
+    <value>40</value>
+    <description>
+      No description
+    </description>
+  </property>
+
 
 </configuration>