You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ambari.apache.org by nc...@apache.org on 2014/07/09 23:45:47 UTC

git commit: AMBARI-6421. Add capability to report alert data from agents to Nagios (ncole)

Repository: ambari
Updated Branches:
  refs/heads/trunk 3b62427a4 -> b94eb716b


AMBARI-6421. Add capability to report alert data from agents to Nagios (ncole)


Project: http://git-wip-us.apache.org/repos/asf/ambari/repo
Commit: http://git-wip-us.apache.org/repos/asf/ambari/commit/b94eb716
Tree: http://git-wip-us.apache.org/repos/asf/ambari/tree/b94eb716
Diff: http://git-wip-us.apache.org/repos/asf/ambari/diff/b94eb716

Branch: refs/heads/trunk
Commit: b94eb716ba3228be6d39f0fc9009f8e63d146c89
Parents: 3b62427
Author: Nate Cole <nc...@hortonworks.com>
Authored: Tue Jul 8 13:24:43 2014 -0400
Committer: Nate Cole <nc...@hortonworks.com>
Committed: Wed Jul 9 16:21:48 2014 -0400

----------------------------------------------------------------------
 .../ambari/server/agent/HeartbeatMonitor.java   | 41 ++++++++-
 .../ambari/server/agent/NagiosAlertCommand.java | 44 ++++++++++
 .../nagios/NagiosPropertyProvider.java          | 11 ---
 .../NAGIOS/package/files/check_ambari_alerts.py | 83 ++++++++++++++++++
 .../NAGIOS/package/scripts/nagios_server.py     | 20 ++---
 .../package/scripts/nagios_server_config.py     |  1 +
 .../NAGIOS/package/scripts/nagios_service.py    | 36 +++++++-
 .../services/NAGIOS/package/scripts/params.py   |  4 +-
 .../NAGIOS/package/scripts/status_params.py     |  3 +
 .../package/templates/hadoop-commands.cfg.j2    |  5 ++
 .../templates/hadoop-servicegroups.cfg.j2       |  7 ++
 .../package/templates/hadoop-services.cfg.j2    | 15 ++++
 .../server/agent/TestHeartbeatMonitor.java      | 92 ++++++++++++++++++++
 .../nagios/NagiosPropertyProviderTest.java      | 78 -----------------
 .../stacks/2.0.6/NAGIOS/test_nagios_server.py   |  5 ++
 15 files changed, 340 insertions(+), 105 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/ambari/blob/b94eb716/ambari-server/src/main/java/org/apache/ambari/server/agent/HeartbeatMonitor.java
----------------------------------------------------------------------
diff --git a/ambari-server/src/main/java/org/apache/ambari/server/agent/HeartbeatMonitor.java b/ambari-server/src/main/java/org/apache/ambari/server/agent/HeartbeatMonitor.java
index 5fa4062..959ee5a 100644
--- a/ambari-server/src/main/java/org/apache/ambari/server/agent/HeartbeatMonitor.java
+++ b/ambari-server/src/main/java/org/apache/ambari/server/agent/HeartbeatMonitor.java
@@ -17,26 +17,51 @@
  */
 package org.apache.ambari.server.agent;
 
+import static org.apache.ambari.server.agent.ExecutionCommand.KeyNames.COMMAND_TIMEOUT;
+import static org.apache.ambari.server.agent.ExecutionCommand.KeyNames.GLOBAL;
+import static org.apache.ambari.server.agent.ExecutionCommand.KeyNames.HOOKS_FOLDER;
+import static org.apache.ambari.server.agent.ExecutionCommand.KeyNames.JDK_LOCATION;
+import static org.apache.ambari.server.agent.ExecutionCommand.KeyNames.SCRIPT;
+import static org.apache.ambari.server.agent.ExecutionCommand.KeyNames.SCRIPT_TYPE;
+import static org.apache.ambari.server.agent.ExecutionCommand.KeyNames.SERVICE_PACKAGE_FOLDER;
+import static org.apache.ambari.server.agent.ExecutionCommand.KeyNames.STACK_NAME;
+import static org.apache.ambari.server.agent.ExecutionCommand.KeyNames.STACK_VERSION;
+
 import java.util.ArrayList;
+import java.util.Collection;
 import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
 import java.util.TreeMap;
 
-import com.google.inject.Injector;
 import org.apache.ambari.server.AmbariException;
 import org.apache.ambari.server.actionmanager.ActionManager;
 import org.apache.ambari.server.api.services.AmbariMetaInfo;
 import org.apache.ambari.server.configuration.Configuration;
 import org.apache.ambari.server.controller.AmbariManagementController;
 import org.apache.ambari.server.controller.MaintenanceStateHelper;
-import org.apache.ambari.server.state.*;
+import org.apache.ambari.server.state.Alert;
+import org.apache.ambari.server.state.Cluster;
+import org.apache.ambari.server.state.Clusters;
+import org.apache.ambari.server.state.CommandScriptDefinition;
+import org.apache.ambari.server.state.ComponentInfo;
+import org.apache.ambari.server.state.Config;
+import org.apache.ambari.server.state.ConfigHelper;
+import org.apache.ambari.server.state.Host;
+import org.apache.ambari.server.state.HostState;
+import org.apache.ambari.server.state.Service;
+import org.apache.ambari.server.state.ServiceComponent;
+import org.apache.ambari.server.state.ServiceComponentHost;
+import org.apache.ambari.server.state.ServiceInfo;
+import org.apache.ambari.server.state.StackId;
+import org.apache.ambari.server.state.StackInfo;
+import org.apache.ambari.server.state.State;
 import org.apache.ambari.server.state.fsm.InvalidStateTransitionException;
 import org.apache.ambari.server.state.host.HostHeartbeatLostEvent;
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 
-import static org.apache.ambari.server.agent.ExecutionCommand.KeyNames.*;
+import com.google.inject.Injector;
 
 /**
  * Monitors the node state and heartbeats.
@@ -251,6 +276,16 @@ public class HeartbeatMonitor implements Runnable {
     }
 
     StatusCommand statusCmd = new StatusCommand();
+    if (sch.getServiceComponentName().equals("NAGIOS_SERVER")) {
+      // this requires special treatment
+
+      Collection<Alert> alerts = cluster.getAlerts();
+      if (null != alerts && alerts.size() > 0) {
+        statusCmd = new NagiosAlertCommand();
+        ((NagiosAlertCommand) statusCmd).setAlerts(alerts);
+      }
+    }
+
     statusCmd.setClusterName(cluster.getClusterName());
     statusCmd.setServiceName(serviceName);
     statusCmd.setComponentName(componentName);

http://git-wip-us.apache.org/repos/asf/ambari/blob/b94eb716/ambari-server/src/main/java/org/apache/ambari/server/agent/NagiosAlertCommand.java
----------------------------------------------------------------------
diff --git a/ambari-server/src/main/java/org/apache/ambari/server/agent/NagiosAlertCommand.java b/ambari-server/src/main/java/org/apache/ambari/server/agent/NagiosAlertCommand.java
new file mode 100644
index 0000000..f8e2f26
--- /dev/null
+++ b/ambari-server/src/main/java/org/apache/ambari/server/agent/NagiosAlertCommand.java
@@ -0,0 +1,44 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.ambari.server.agent;
+
+import java.util.Collection;
+
+import org.apache.ambari.server.state.Alert;
+
+/**
+ * Specialized command that updates Nagios with alert data 
+ */
+public class NagiosAlertCommand extends StatusCommand {
+  private Collection<Alert> alerts = null;
+  
+  /**
+   * @param alerts
+   */
+  public void setAlerts(Collection<Alert> alertData) {
+    alerts = alertData;
+  }
+  
+  /**
+   * @return the alerts
+   */
+  public Collection<Alert> getAlerts() {
+    return alerts;
+  }
+  
+}

http://git-wip-us.apache.org/repos/asf/ambari/blob/b94eb716/ambari-server/src/main/java/org/apache/ambari/server/controller/nagios/NagiosPropertyProvider.java
----------------------------------------------------------------------
diff --git a/ambari-server/src/main/java/org/apache/ambari/server/controller/nagios/NagiosPropertyProvider.java b/ambari-server/src/main/java/org/apache/ambari/server/controller/nagios/NagiosPropertyProvider.java
index ce1dc2f..7c40ed1 100644
--- a/ambari-server/src/main/java/org/apache/ambari/server/controller/nagios/NagiosPropertyProvider.java
+++ b/ambari-server/src/main/java/org/apache/ambari/server/controller/nagios/NagiosPropertyProvider.java
@@ -21,7 +21,6 @@ import java.io.IOException;
 import java.io.InputStream;
 import java.util.ArrayList;
 import java.util.Arrays;
-import java.util.Collection;
 import java.util.Collections;
 import java.util.Comparator;
 import java.util.HashSet;
@@ -50,7 +49,6 @@ import org.apache.ambari.server.controller.spi.Request;
 import org.apache.ambari.server.controller.spi.Resource;
 import org.apache.ambari.server.controller.spi.SystemException;
 import org.apache.ambari.server.controller.utilities.StreamProvider;
-import org.apache.ambari.server.state.Alert;
 import org.apache.ambari.server.state.Cluster;
 import org.apache.ambari.server.state.Clusters;
 import org.apache.ambari.server.state.Service;
@@ -372,15 +370,6 @@ public class NagiosPropertyProvider extends BaseProvider implements PropertyProv
       if (!hosts.isEmpty())
         nagiosHost = hosts.keySet().iterator().next();
       
-      // !!! use the cluster to retrieve alerts that are not from Nagios, but
-      // from agents themselves.
-      Collection<Alert> currentAlerts = cluster.getAlerts();
-      if (null != currentAlerts) {
-        for (Alert alert : currentAlerts) {
-          results.add(new NagiosAlert(alert));
-        }
-      }
-      
     } catch (AmbariException e) {
       LOG.debug("Cannot find a nagios service.  Skipping alerts.");
     }

http://git-wip-us.apache.org/repos/asf/ambari/blob/b94eb716/ambari-server/src/main/resources/stacks/HDP/2.0.6/services/NAGIOS/package/files/check_ambari_alerts.py
----------------------------------------------------------------------
diff --git a/ambari-server/src/main/resources/stacks/HDP/2.0.6/services/NAGIOS/package/files/check_ambari_alerts.py b/ambari-server/src/main/resources/stacks/HDP/2.0.6/services/NAGIOS/package/files/check_ambari_alerts.py
new file mode 100644
index 0000000..912a209
--- /dev/null
+++ b/ambari-server/src/main/resources/stacks/HDP/2.0.6/services/NAGIOS/package/files/check_ambari_alerts.py
@@ -0,0 +1,83 @@
+#!/usr/bin/env python
+#
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#
+
+import os
+import optparse
+import json
+import traceback
+
+def main():
+
+  parser = optparse.OptionParser()
+
+  parser.add_option("-H", "--host", dest="host", default="localhost", help="NameNode host")
+  parser.add_option("-n", "--name", dest="alert_name", help="Alert name to check")
+  parser.add_option("-f", "--file", dest="alert_file", help="File containing the alert structure")
+
+  (options, args) = parser.parse_args()
+
+  if options.alert_name is None:
+    print "Alert name is required (--name or -n)"
+    exit(-1)
+
+  if options.alert_file is None:
+    print "Alert file is required (--file or -f)"
+    exit(-1)
+
+  if not os.path.exists(options.alert_file):
+    print "Alert file must exist"
+    exit(-1)
+
+  try:
+    with open(options.alert_file, 'r') as f:
+      data = json.load(f)
+
+      first = True
+      buf = ''
+
+      for_hosts = data[options.alert_name]
+      if for_hosts.has_key(options.host):
+        for host_entry in for_hosts[options.host]:
+          alert_state = host_entry['state']
+          alert_text = host_entry['text']
+          if alert_state == 'CRITICAL':
+            print str(alert_text)
+            exit(2)
+          elif alert_state == 'WARNING':
+            print str(alert_text)
+            exit(1)
+          else:
+            if not first:
+              buf = buf + ', '
+            buf = buf + alert_text
+            first = False
+
+      print buf
+      exit(0)
+      
+  except Exception:
+    traceback.print_exc()
+    exit(3)
+
+if __name__ == "__main__":
+  main()
+

http://git-wip-us.apache.org/repos/asf/ambari/blob/b94eb716/ambari-server/src/main/resources/stacks/HDP/2.0.6/services/NAGIOS/package/scripts/nagios_server.py
----------------------------------------------------------------------
diff --git a/ambari-server/src/main/resources/stacks/HDP/2.0.6/services/NAGIOS/package/scripts/nagios_server.py b/ambari-server/src/main/resources/stacks/HDP/2.0.6/services/NAGIOS/package/scripts/nagios_server.py
index ed7cebc..da35b34 100644
--- a/ambari-server/src/main/resources/stacks/HDP/2.0.6/services/NAGIOS/package/scripts/nagios_server.py
+++ b/ambari-server/src/main/resources/stacks/HDP/2.0.6/services/NAGIOS/package/scripts/nagios_server.py
@@ -24,6 +24,7 @@ import sys
 from resource_management import *
 from nagios import nagios
 from nagios_service import nagios_service
+from nagios_service import update_active_alerts
 
          
 class NagiosServer(Script):
@@ -59,21 +60,20 @@ class NagiosServer(Script):
     import status_params
     env.set_params(status_params)
     check_process_status(status_params.nagios_pid_file)
+
+    # check for alert structures
+    update_active_alerts()
+
     
 def remove_conflicting_packages():  
-  Package( 'hdp_mon_nagios_addons',
-    action = "remove"
-  )
+  Package('hdp_mon_nagios_addons', action = "remove")
 
-  Package( 'nagios-plugins',
-    action = "remove"
-  )
+  Package('nagios-plugins', action = "remove")
   
   if System.get_instance().os_family in ["redhat","suse"]:
-    Execute( "rpm -e --allmatches --nopostun nagios",
-      path    = "/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin",
-      ignore_failures = True 
-    )
+    Execute("rpm -e --allmatches --nopostun nagios",
+      path  = "/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin",
+      ignore_failures = True)
 
 def update_ignorable(params):
   if not params.config.has_key('passiveInfo'):

http://git-wip-us.apache.org/repos/asf/ambari/blob/b94eb716/ambari-server/src/main/resources/stacks/HDP/2.0.6/services/NAGIOS/package/scripts/nagios_server_config.py
----------------------------------------------------------------------
diff --git a/ambari-server/src/main/resources/stacks/HDP/2.0.6/services/NAGIOS/package/scripts/nagios_server_config.py b/ambari-server/src/main/resources/stacks/HDP/2.0.6/services/NAGIOS/package/scripts/nagios_server_config.py
index 564f78e..40a1bb6 100644
--- a/ambari-server/src/main/resources/stacks/HDP/2.0.6/services/NAGIOS/package/scripts/nagios_server_config.py
+++ b/ambari-server/src/main/resources/stacks/HDP/2.0.6/services/NAGIOS/package/scripts/nagios_server_config.py
@@ -69,6 +69,7 @@ def nagios_server_config():
   nagios_server_check( 'hdp_nagios_init.php')
   nagios_server_check( 'check_checkpoint_time.py' )
   nagios_server_check( 'sys_logger.py' )
+  nagios_server_check( 'check_ambari_alerts.py' )
 
 def nagios_server_configfile(
   name,

http://git-wip-us.apache.org/repos/asf/ambari/blob/b94eb716/ambari-server/src/main/resources/stacks/HDP/2.0.6/services/NAGIOS/package/scripts/nagios_service.py
----------------------------------------------------------------------
diff --git a/ambari-server/src/main/resources/stacks/HDP/2.0.6/services/NAGIOS/package/scripts/nagios_service.py b/ambari-server/src/main/resources/stacks/HDP/2.0.6/services/NAGIOS/package/scripts/nagios_service.py
index 8f47702..b7f512b 100644
--- a/ambari-server/src/main/resources/stacks/HDP/2.0.6/services/NAGIOS/package/scripts/nagios_service.py
+++ b/ambari-server/src/main/resources/stacks/HDP/2.0.6/services/NAGIOS/package/scripts/nagios_service.py
@@ -20,6 +20,7 @@ Ambari Agent
 
 """
 
+import json
 import os
 import signal
 
@@ -66,4 +67,37 @@ def nagios_service(action='start'): # start or stop
     if isfile( nagios_pid_file ):   
       Execute(format("rm -f {nagios_pid_file}"))
         
-  MonitorWebserver("restart")
\ No newline at end of file
+  MonitorWebserver("restart")
+
+def update_active_alerts():
+  import status_params
+
+  alerts = None
+  if 'alerts' in status_params.config and status_params.config['alerts'] is not None:
+    alerts = status_params.config['alerts']
+
+  if alerts is None:
+    return
+
+  output = {}
+
+  for a in alerts:
+    alert_name = a['name']
+    alert_text = a['text']
+    alert_state = a['state']
+    alert_host = a['host']
+    if not output.has_key(alert_name):
+      output[alert_name] = {}
+
+    if not output[alert_name].has_key(alert_host):
+      output[alert_name][alert_host] = []
+
+    host_items = output[alert_name][alert_host]
+    alert_out = {}
+    alert_out['state'] = alert_state
+    alert_out['text'] = alert_text
+    host_items.append(alert_out)
+
+  with open(os.path.join(status_params.nagios_var_dir, 'ambari.json'), 'w') as f:
+    json.dump(output, f)
+

http://git-wip-us.apache.org/repos/asf/ambari/blob/b94eb716/ambari-server/src/main/resources/stacks/HDP/2.0.6/services/NAGIOS/package/scripts/params.py
----------------------------------------------------------------------
diff --git a/ambari-server/src/main/resources/stacks/HDP/2.0.6/services/NAGIOS/package/scripts/params.py b/ambari-server/src/main/resources/stacks/HDP/2.0.6/services/NAGIOS/package/scripts/params.py
index 136255d..edb9b04 100644
--- a/ambari-server/src/main/resources/stacks/HDP/2.0.6/services/NAGIOS/package/scripts/params.py
+++ b/ambari-server/src/main/resources/stacks/HDP/2.0.6/services/NAGIOS/package/scripts/params.py
@@ -34,8 +34,8 @@ else:
 
 conf_dir = format("/etc/{nagios_service_name}")
 nagios_obj_dir = format("{conf_dir}/objects")
-nagios_var_dir = "/var/nagios"
-nagios_rw_dir = "/var/nagios/rw"
+nagios_var_dir = status_params.nagios_var_dir
+nagios_rw_dir = status_params.nagios_rw_dir
 
 if System.get_instance().os_family == "debian":
   host_template = "generic-host"

http://git-wip-us.apache.org/repos/asf/ambari/blob/b94eb716/ambari-server/src/main/resources/stacks/HDP/2.0.6/services/NAGIOS/package/scripts/status_params.py
----------------------------------------------------------------------
diff --git a/ambari-server/src/main/resources/stacks/HDP/2.0.6/services/NAGIOS/package/scripts/status_params.py b/ambari-server/src/main/resources/stacks/HDP/2.0.6/services/NAGIOS/package/scripts/status_params.py
index 33b35fe..11d4aa9 100644
--- a/ambari-server/src/main/resources/stacks/HDP/2.0.6/services/NAGIOS/package/scripts/status_params.py
+++ b/ambari-server/src/main/resources/stacks/HDP/2.0.6/services/NAGIOS/package/scripts/status_params.py
@@ -24,3 +24,6 @@ config = Script.get_config()
 
 nagios_pid_dir = "/var/run/nagios"
 nagios_pid_file = format("{nagios_pid_dir}/nagios.pid")
+
+nagios_var_dir = "/var/nagios"
+nagios_rw_dir = "/var/nagios/rw"

http://git-wip-us.apache.org/repos/asf/ambari/blob/b94eb716/ambari-server/src/main/resources/stacks/HDP/2.0.6/services/NAGIOS/package/templates/hadoop-commands.cfg.j2
----------------------------------------------------------------------
diff --git a/ambari-server/src/main/resources/stacks/HDP/2.0.6/services/NAGIOS/package/templates/hadoop-commands.cfg.j2 b/ambari-server/src/main/resources/stacks/HDP/2.0.6/services/NAGIOS/package/templates/hadoop-commands.cfg.j2
index caf0ff4..bc5b2df 100644
--- a/ambari-server/src/main/resources/stacks/HDP/2.0.6/services/NAGIOS/package/templates/hadoop-commands.cfg.j2
+++ b/ambari-server/src/main/resources/stacks/HDP/2.0.6/services/NAGIOS/package/templates/hadoop-commands.cfg.j2
@@ -149,3 +149,8 @@ define command{
         command_name check_tcp_wrapper_sasl
         command_line  $USER1$/check_wrapper.sh $USER1$/check_tcp -H $HOSTADDRESS$ -p $ARG1$ $ARG2$ -s \"$ARG3$\"
        }
+
+define command{
+        command_name check_ambari
+        command_line $USER1$/check_wrapper.sh /var/lib/ambari-agent/ambari-python-wrap $USER1$/check_ambari_alerts.py -H $HOSTADDRESS$ -f $ARG1$ -n $ARG2$
+       }

http://git-wip-us.apache.org/repos/asf/ambari/blob/b94eb716/ambari-server/src/main/resources/stacks/HDP/2.0.6/services/NAGIOS/package/templates/hadoop-servicegroups.cfg.j2
----------------------------------------------------------------------
diff --git a/ambari-server/src/main/resources/stacks/HDP/2.0.6/services/NAGIOS/package/templates/hadoop-servicegroups.cfg.j2 b/ambari-server/src/main/resources/stacks/HDP/2.0.6/services/NAGIOS/package/templates/hadoop-servicegroups.cfg.j2
index 3173072..00f0740 100644
--- a/ambari-server/src/main/resources/stacks/HDP/2.0.6/services/NAGIOS/package/templates/hadoop-servicegroups.cfg.j2
+++ b/ambari-server/src/main/resources/stacks/HDP/2.0.6/services/NAGIOS/package/templates/hadoop-servicegroups.cfg.j2
@@ -110,3 +110,10 @@ define servicegroup {
   alias  FALCON Checks
 }
 {% endif %}
+
+{%if hostgroup_defs['flume-servers'] %}
+define servicegroup {
+  servicegroup_name  FLUME
+  alias  FLUME Checks
+}
+{% endif %}

http://git-wip-us.apache.org/repos/asf/ambari/blob/b94eb716/ambari-server/src/main/resources/stacks/HDP/2.0.6/services/NAGIOS/package/templates/hadoop-services.cfg.j2
----------------------------------------------------------------------
diff --git a/ambari-server/src/main/resources/stacks/HDP/2.0.6/services/NAGIOS/package/templates/hadoop-services.cfg.j2 b/ambari-server/src/main/resources/stacks/HDP/2.0.6/services/NAGIOS/package/templates/hadoop-services.cfg.j2
index aa626bf..2b2df1e 100644
--- a/ambari-server/src/main/resources/stacks/HDP/2.0.6/services/NAGIOS/package/templates/hadoop-services.cfg.j2
+++ b/ambari-server/src/main/resources/stacks/HDP/2.0.6/services/NAGIOS/package/templates/hadoop-services.cfg.j2
@@ -777,3 +777,18 @@ define service {
         max_check_attempts      3
 }
 {% endif %}
+
+{% if hostgroup_defs['flume-servers'] %}
+# FLUME Checks
+define service {
+        hostgroup_name          flume-servers
+        use                     hadoop-service
+        service_description     FLUME::Flume Agent process
+        servicegroups           FLUME
+        check_command           check_ambari!/var/nagios/ambari.json!flume_agent
+        normal_check_interval   1
+        retry_check_interval    0.5
+        max_check_attempts      3
+}
+{% endif %}
+

http://git-wip-us.apache.org/repos/asf/ambari/blob/b94eb716/ambari-server/src/test/java/org/apache/ambari/server/agent/TestHeartbeatMonitor.java
----------------------------------------------------------------------
diff --git a/ambari-server/src/test/java/org/apache/ambari/server/agent/TestHeartbeatMonitor.java b/ambari-server/src/test/java/org/apache/ambari/server/agent/TestHeartbeatMonitor.java
index 65d5a96..856baee 100644
--- a/ambari-server/src/test/java/org/apache/ambari/server/agent/TestHeartbeatMonitor.java
+++ b/ambari-server/src/test/java/org/apache/ambari/server/agent/TestHeartbeatMonitor.java
@@ -25,6 +25,7 @@ import static org.mockito.Mockito.atLeast;
 import static org.mockito.Mockito.mock;
 import static org.mockito.Mockito.verify;
 
+import java.util.Collections;
 import java.util.HashMap;
 import java.util.HashSet;
 import java.util.List;
@@ -37,6 +38,8 @@ import org.apache.ambari.server.actionmanager.ActionManager;
 import org.apache.ambari.server.api.services.AmbariMetaInfo;
 import org.apache.ambari.server.orm.GuiceJpaInitializer;
 import org.apache.ambari.server.orm.InMemoryDefaultTestModule;
+import org.apache.ambari.server.state.Alert;
+import org.apache.ambari.server.state.AlertState;
 import org.apache.ambari.server.state.Cluster;
 import org.apache.ambari.server.state.Clusters;
 import org.apache.ambari.server.state.Config;
@@ -501,4 +504,93 @@ public class TestHeartbeatMonitor {
           sch.getState());
     }
   }
+  
+  @Test
+  public void testStateCommandsWithAlertsGeneration() throws AmbariException, InterruptedException,
+          InvalidStateTransitionException {
+    Clusters clusters = injector.getInstance(Clusters.class);
+    
+    clusters.addHost(hostname1);
+    setOsFamily(clusters.getHost(hostname1), "redhat", "6.3");
+    clusters.getHost(hostname1).persist();
+    
+    clusters.addHost(hostname2);
+    setOsFamily(clusters.getHost(hostname2), "redhat", "6.3");
+    clusters.getHost(hostname2).persist();
+    clusters.addCluster(clusterName);
+    
+    Cluster cluster = clusters.getCluster(clusterName);
+    cluster.setDesiredStackVersion(new StackId("HDP-2.0.7"));
+    Set<String> hostNames = new HashSet<String>(){{
+      add(hostname1);
+      add(hostname2);
+    }};
+    
+    clusters.mapHostsToCluster(hostNames, clusterName);
+    
+    Service hdfs = cluster.addService(serviceName);
+    Service nagios = cluster.addService("NAGIOS");
+    
+    hdfs.persist();
+    nagios.persist();
+    
+    hdfs.addServiceComponent(Role.DATANODE.name()).persist();
+    hdfs.getServiceComponent(Role.DATANODE.name()).addServiceComponentHost(hostname1).persist();
+    hdfs.addServiceComponent(Role.NAMENODE.name()).persist();
+    hdfs.getServiceComponent(Role.NAMENODE.name()).addServiceComponentHost(hostname1).persist();
+    hdfs.addServiceComponent(Role.SECONDARY_NAMENODE.name()).persist();
+    hdfs.getServiceComponent(Role.SECONDARY_NAMENODE.name()).addServiceComponentHost(hostname1).persist();
+    nagios.addServiceComponent(Role.NAGIOS_SERVER.name()).persist();
+    nagios.getServiceComponent(Role.NAGIOS_SERVER.name()).addServiceComponentHost(hostname1).persist();
+
+    hdfs.getServiceComponent(Role.DATANODE.name()).getServiceComponentHost(hostname1).setState(State.INSTALLED);
+    hdfs.getServiceComponent(Role.NAMENODE.name()).getServiceComponentHost(hostname1).setState(State.INSTALLED);
+    hdfs.getServiceComponent(Role.SECONDARY_NAMENODE.name()).getServiceComponentHost(hostname1).setState(State.INSTALLED);
+    nagios.getServiceComponent(Role.NAGIOS_SERVER.name()).getServiceComponentHost(hostname1).setState(State.INSTALLED);
+    
+
+    Alert alert = new Alert("datanode_madeup", null, "HDFS", "DATANODE",
+     hostname1, AlertState.CRITICAL);
+    cluster.addAlerts(Collections.singleton(alert));
+        
+    ActionQueue aq = new ActionQueue();
+    ActionManager am = mock(ActionManager.class);
+    HeartbeatMonitor hm = new HeartbeatMonitor(clusters, aq, am,
+      heartbeatMonitorWakeupIntervalMS, injector);
+    HeartBeatHandler handler = new HeartBeatHandler(clusters, aq, am, injector);
+    Register reg = new Register();
+    reg.setHostname(hostname1);
+    reg.setResponseId(12);
+    reg.setTimestamp(System.currentTimeMillis() - 300);
+    reg.setAgentVersion(ambariMetaInfo.getServerVersion());
+    HostInfo hi = new HostInfo();
+    hi.setOS("Centos5");
+    reg.setHardwareProfile(hi);
+    handler.handleRegistration(reg);
+
+    HeartBeat hb = new HeartBeat();
+    hb.setHostname(hostname1);
+    hb.setNodeStatus(new HostStatus(HostStatus.Status.HEALTHY, "cool"));
+    hb.setTimestamp(System.currentTimeMillis());
+    hb.setResponseId(12);
+    handler.handleHeartBeat(hb);
+
+    List<StatusCommand> cmds = hm.generateStatusCommands(hostname1);
+    assertEquals("HeartbeatMonitor should generate StatusCommands for host1", 4, cmds.size());
+    assertEquals("HDFS", cmds.get(0).getServiceName());
+
+    boolean  containsNAGIOSStatus = false;
+    for (StatusCommand cmd : cmds) {
+      if (cmd.getComponentName().equals(Role.NAGIOS_SERVER.name())) {
+        containsNAGIOSStatus = true;
+        assertTrue(cmd.getClass().equals(NagiosAlertCommand.class));
+        assertEquals(1, ((NagiosAlertCommand) cmd).getAlerts().size());
+      }
+      
+    }
+    assertTrue(containsNAGIOSStatus);
+    
+    cmds = hm.generateStatusCommands(hostname2);
+    assertTrue("HeartbeatMonitor should not generate StatusCommands for host2 because it has no services", cmds.isEmpty());
+  }  
 }

http://git-wip-us.apache.org/repos/asf/ambari/blob/b94eb716/ambari-server/src/test/java/org/apache/ambari/server/controller/nagios/NagiosPropertyProviderTest.java
----------------------------------------------------------------------
diff --git a/ambari-server/src/test/java/org/apache/ambari/server/controller/nagios/NagiosPropertyProviderTest.java b/ambari-server/src/test/java/org/apache/ambari/server/controller/nagios/NagiosPropertyProviderTest.java
index 92d3c40..cc45ef0 100644
--- a/ambari-server/src/test/java/org/apache/ambari/server/controller/nagios/NagiosPropertyProviderTest.java
+++ b/ambari-server/src/test/java/org/apache/ambari/server/controller/nagios/NagiosPropertyProviderTest.java
@@ -38,7 +38,6 @@ import org.apache.ambari.server.controller.spi.Resource;
 import org.apache.ambari.server.controller.spi.TemporalInfo;
 import org.apache.ambari.server.controller.utilities.PropertyHelper;
 import org.apache.ambari.server.state.Alert;
-import org.apache.ambari.server.state.AlertState;
 import org.apache.ambari.server.state.Cluster;
 import org.apache.ambari.server.state.Clusters;
 import org.apache.ambari.server.state.Service;
@@ -578,81 +577,4 @@ public class NagiosPropertyProviderTest {
     }
   }
 
-  @Test
-  public void testNagiosServiceAlertsWithAgentAlerts() throws Exception {
-    Injector inj = Guice.createInjector(new GuiceModule());
-    
-    Clusters clusters = inj.getInstance(Clusters.class);
-    Cluster cluster = createMock(Cluster.class);
-    
-    Alert alert = new Alert("ganglia_madeup", null, "GANGLIA", "GANGLIA_MYSTERY",
-        "h1", AlertState.CRITICAL);
-    
-    expect(cluster.getAlerts()).andReturn(Collections.singleton(alert)).anyTimes();
-    expect(clusters.getCluster("c1")).andReturn(cluster);
-
-    Service nagiosService = createMock(Service.class);
-    expect(cluster.getService("NAGIOS")).andReturn(nagiosService);
-    
-    ServiceComponent nagiosServiceComponent = createMock(ServiceComponent.class);
-    expect(nagiosService.getServiceComponent("NAGIOS_SERVER")).andReturn(nagiosServiceComponent);
-    
-    ServiceComponentHost nagiosScHost = createMock(ServiceComponentHost.class);
-    Map<String, ServiceComponentHost> map1 = new HashMap<String, ServiceComponentHost>();
-    map1.put(HOST, nagiosScHost);
-    expect(nagiosServiceComponent.getServiceComponentHosts()).andReturn(map1);
-    
-    replay(clusters, cluster, nagiosService, nagiosServiceComponent);
-
-    
-    TestStreamProvider streamProvider = new TestStreamProvider("nagios_alerts.txt");
-
-    NagiosPropertyProvider npp = new NagiosPropertyProvider(Resource.Type.Service,
-        streamProvider,
-        "ServiceInfo/cluster_name",
-        "ServiceInfo/service_name");
-    npp.forceReset();
-    NagiosPropertyProvider.init(inj);
-    
-    Resource resource = new ResourceImpl(Resource.Type.Service);
-    resource.setProperty("ServiceInfo/cluster_name", "c1");
-    resource.setProperty("ServiceInfo/service_name", "GANGLIA");
-    
-    // request with an empty set should get all supported properties
-    Request request = PropertyHelper.getReadRequest(Collections.<String>emptySet(), new HashMap<String, TemporalInfo>());
-
-    Set<Resource> set = npp.populateResources(Collections.singleton(resource), request, null);
-    Assert.assertEquals(1, set.size());
-    
-    Resource res = set.iterator().next();
-    
-    Map<String, Map<String, Object>> values = res.getPropertiesMap();
-    
-    Assert.assertTrue(values.containsKey("alerts"));
-    Assert.assertTrue(values.containsKey("alerts/summary"));
-    Assert.assertTrue(values.get("alerts").containsKey("detail"));
-    Assert.assertTrue(List.class.isInstance(values.get("alerts").get("detail")));
-    
-    List<?> list = (List<?>) values.get("alerts").get("detail");
-    // removed an additional one
-    Assert.assertEquals(Integer.valueOf(5), Integer.valueOf(list.size()));
-    for (Object o : list) {
-      Assert.assertTrue(Map.class.isInstance(o));
-      Map<?, ?> map = (Map<?, ?>) o;
-      Assert.assertTrue(map.containsKey("service_name"));
-      String serviceName = map.get("service_name").toString();
-      Assert.assertEquals(serviceName, "GANGLIA");
-    }
-    
-    Map<String, Object> summary = values.get("alerts/summary");
-    Assert.assertTrue(summary.containsKey("OK"));
-    Assert.assertTrue(summary.containsKey("WARNING"));
-    Assert.assertTrue(summary.containsKey("CRITICAL"));
-    Assert.assertTrue(summary.containsKey("PASSIVE"));
-    
-    Assert.assertEquals(Integer.valueOf(3), summary.get("OK"));
-    Assert.assertEquals(Integer.valueOf(0), summary.get("WARNING"));
-    Assert.assertEquals(Integer.valueOf(1), summary.get("CRITICAL"));
-    Assert.assertEquals(Integer.valueOf(1), summary.get("PASSIVE"));
-  }  
 }

http://git-wip-us.apache.org/repos/asf/ambari/blob/b94eb716/ambari-server/src/test/python/stacks/2.0.6/NAGIOS/test_nagios_server.py
----------------------------------------------------------------------
diff --git a/ambari-server/src/test/python/stacks/2.0.6/NAGIOS/test_nagios_server.py b/ambari-server/src/test/python/stacks/2.0.6/NAGIOS/test_nagios_server.py
index 51f89df..f51169c 100644
--- a/ambari-server/src/test/python/stacks/2.0.6/NAGIOS/test_nagios_server.py
+++ b/ambari-server/src/test/python/stacks/2.0.6/NAGIOS/test_nagios_server.py
@@ -268,6 +268,11 @@ class TestNagiosServer(RMFTestCase):
         content = StaticFile('sys_logger.py'),
         mode = 0755,
     )
+    self.assertResourceCalled('File',
+                              '/usr/lib64/nagios/plugins/check_ambari_alerts.py',
+                              content=StaticFile('check_ambari_alerts.py'),
+                              mode=0755
+    )
     self.assertResourceCalled('Execute',
                               'htpasswd2 -c -b  /etc/nagios/htpasswd.users nagiosadmin \'!`"\'"\'"\' 1\'',
                               not_if="grep nagiosadmin /etc/nagios/htpasswd.users"