You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ambari.apache.org by nc...@apache.org on 2014/07/09 23:45:47 UTC
git commit: AMBARI-6421. Add capability to report alert data from
agents to Nagios (ncole)
Repository: ambari
Updated Branches:
refs/heads/trunk 3b62427a4 -> b94eb716b
AMBARI-6421. Add capability to report alert data from agents to Nagios (ncole)
Project: http://git-wip-us.apache.org/repos/asf/ambari/repo
Commit: http://git-wip-us.apache.org/repos/asf/ambari/commit/b94eb716
Tree: http://git-wip-us.apache.org/repos/asf/ambari/tree/b94eb716
Diff: http://git-wip-us.apache.org/repos/asf/ambari/diff/b94eb716
Branch: refs/heads/trunk
Commit: b94eb716ba3228be6d39f0fc9009f8e63d146c89
Parents: 3b62427
Author: Nate Cole <nc...@hortonworks.com>
Authored: Tue Jul 8 13:24:43 2014 -0400
Committer: Nate Cole <nc...@hortonworks.com>
Committed: Wed Jul 9 16:21:48 2014 -0400
----------------------------------------------------------------------
.../ambari/server/agent/HeartbeatMonitor.java | 41 ++++++++-
.../ambari/server/agent/NagiosAlertCommand.java | 44 ++++++++++
.../nagios/NagiosPropertyProvider.java | 11 ---
.../NAGIOS/package/files/check_ambari_alerts.py | 83 ++++++++++++++++++
.../NAGIOS/package/scripts/nagios_server.py | 20 ++---
.../package/scripts/nagios_server_config.py | 1 +
.../NAGIOS/package/scripts/nagios_service.py | 36 +++++++-
.../services/NAGIOS/package/scripts/params.py | 4 +-
.../NAGIOS/package/scripts/status_params.py | 3 +
.../package/templates/hadoop-commands.cfg.j2 | 5 ++
.../templates/hadoop-servicegroups.cfg.j2 | 7 ++
.../package/templates/hadoop-services.cfg.j2 | 15 ++++
.../server/agent/TestHeartbeatMonitor.java | 92 ++++++++++++++++++++
.../nagios/NagiosPropertyProviderTest.java | 78 -----------------
.../stacks/2.0.6/NAGIOS/test_nagios_server.py | 5 ++
15 files changed, 340 insertions(+), 105 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/ambari/blob/b94eb716/ambari-server/src/main/java/org/apache/ambari/server/agent/HeartbeatMonitor.java
----------------------------------------------------------------------
diff --git a/ambari-server/src/main/java/org/apache/ambari/server/agent/HeartbeatMonitor.java b/ambari-server/src/main/java/org/apache/ambari/server/agent/HeartbeatMonitor.java
index 5fa4062..959ee5a 100644
--- a/ambari-server/src/main/java/org/apache/ambari/server/agent/HeartbeatMonitor.java
+++ b/ambari-server/src/main/java/org/apache/ambari/server/agent/HeartbeatMonitor.java
@@ -17,26 +17,51 @@
*/
package org.apache.ambari.server.agent;
+import static org.apache.ambari.server.agent.ExecutionCommand.KeyNames.COMMAND_TIMEOUT;
+import static org.apache.ambari.server.agent.ExecutionCommand.KeyNames.GLOBAL;
+import static org.apache.ambari.server.agent.ExecutionCommand.KeyNames.HOOKS_FOLDER;
+import static org.apache.ambari.server.agent.ExecutionCommand.KeyNames.JDK_LOCATION;
+import static org.apache.ambari.server.agent.ExecutionCommand.KeyNames.SCRIPT;
+import static org.apache.ambari.server.agent.ExecutionCommand.KeyNames.SCRIPT_TYPE;
+import static org.apache.ambari.server.agent.ExecutionCommand.KeyNames.SERVICE_PACKAGE_FOLDER;
+import static org.apache.ambari.server.agent.ExecutionCommand.KeyNames.STACK_NAME;
+import static org.apache.ambari.server.agent.ExecutionCommand.KeyNames.STACK_VERSION;
+
import java.util.ArrayList;
+import java.util.Collection;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.TreeMap;
-import com.google.inject.Injector;
import org.apache.ambari.server.AmbariException;
import org.apache.ambari.server.actionmanager.ActionManager;
import org.apache.ambari.server.api.services.AmbariMetaInfo;
import org.apache.ambari.server.configuration.Configuration;
import org.apache.ambari.server.controller.AmbariManagementController;
import org.apache.ambari.server.controller.MaintenanceStateHelper;
-import org.apache.ambari.server.state.*;
+import org.apache.ambari.server.state.Alert;
+import org.apache.ambari.server.state.Cluster;
+import org.apache.ambari.server.state.Clusters;
+import org.apache.ambari.server.state.CommandScriptDefinition;
+import org.apache.ambari.server.state.ComponentInfo;
+import org.apache.ambari.server.state.Config;
+import org.apache.ambari.server.state.ConfigHelper;
+import org.apache.ambari.server.state.Host;
+import org.apache.ambari.server.state.HostState;
+import org.apache.ambari.server.state.Service;
+import org.apache.ambari.server.state.ServiceComponent;
+import org.apache.ambari.server.state.ServiceComponentHost;
+import org.apache.ambari.server.state.ServiceInfo;
+import org.apache.ambari.server.state.StackId;
+import org.apache.ambari.server.state.StackInfo;
+import org.apache.ambari.server.state.State;
import org.apache.ambari.server.state.fsm.InvalidStateTransitionException;
import org.apache.ambari.server.state.host.HostHeartbeatLostEvent;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
-import static org.apache.ambari.server.agent.ExecutionCommand.KeyNames.*;
+import com.google.inject.Injector;
/**
* Monitors the node state and heartbeats.
@@ -251,6 +276,16 @@ public class HeartbeatMonitor implements Runnable {
}
StatusCommand statusCmd = new StatusCommand();
+ if (sch.getServiceComponentName().equals("NAGIOS_SERVER")) {
+ // this requires special treatment
+
+ Collection<Alert> alerts = cluster.getAlerts();
+ if (null != alerts && alerts.size() > 0) {
+ statusCmd = new NagiosAlertCommand();
+ ((NagiosAlertCommand) statusCmd).setAlerts(alerts);
+ }
+ }
+
statusCmd.setClusterName(cluster.getClusterName());
statusCmd.setServiceName(serviceName);
statusCmd.setComponentName(componentName);
http://git-wip-us.apache.org/repos/asf/ambari/blob/b94eb716/ambari-server/src/main/java/org/apache/ambari/server/agent/NagiosAlertCommand.java
----------------------------------------------------------------------
diff --git a/ambari-server/src/main/java/org/apache/ambari/server/agent/NagiosAlertCommand.java b/ambari-server/src/main/java/org/apache/ambari/server/agent/NagiosAlertCommand.java
new file mode 100644
index 0000000..f8e2f26
--- /dev/null
+++ b/ambari-server/src/main/java/org/apache/ambari/server/agent/NagiosAlertCommand.java
@@ -0,0 +1,44 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.ambari.server.agent;
+
+import java.util.Collection;
+
+import org.apache.ambari.server.state.Alert;
+
+/**
+ * Specialized command that updates Nagios with alert data
+ */
+public class NagiosAlertCommand extends StatusCommand {
+ private Collection<Alert> alerts = null;
+
+ /**
+ * @param alerts
+ */
+ public void setAlerts(Collection<Alert> alertData) {
+ alerts = alertData;
+ }
+
+ /**
+ * @return the alerts
+ */
+ public Collection<Alert> getAlerts() {
+ return alerts;
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/ambari/blob/b94eb716/ambari-server/src/main/java/org/apache/ambari/server/controller/nagios/NagiosPropertyProvider.java
----------------------------------------------------------------------
diff --git a/ambari-server/src/main/java/org/apache/ambari/server/controller/nagios/NagiosPropertyProvider.java b/ambari-server/src/main/java/org/apache/ambari/server/controller/nagios/NagiosPropertyProvider.java
index ce1dc2f..7c40ed1 100644
--- a/ambari-server/src/main/java/org/apache/ambari/server/controller/nagios/NagiosPropertyProvider.java
+++ b/ambari-server/src/main/java/org/apache/ambari/server/controller/nagios/NagiosPropertyProvider.java
@@ -21,7 +21,6 @@ import java.io.IOException;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.Arrays;
-import java.util.Collection;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashSet;
@@ -50,7 +49,6 @@ import org.apache.ambari.server.controller.spi.Request;
import org.apache.ambari.server.controller.spi.Resource;
import org.apache.ambari.server.controller.spi.SystemException;
import org.apache.ambari.server.controller.utilities.StreamProvider;
-import org.apache.ambari.server.state.Alert;
import org.apache.ambari.server.state.Cluster;
import org.apache.ambari.server.state.Clusters;
import org.apache.ambari.server.state.Service;
@@ -372,15 +370,6 @@ public class NagiosPropertyProvider extends BaseProvider implements PropertyProv
if (!hosts.isEmpty())
nagiosHost = hosts.keySet().iterator().next();
- // !!! use the cluster to retrieve alerts that are not from Nagios, but
- // from agents themselves.
- Collection<Alert> currentAlerts = cluster.getAlerts();
- if (null != currentAlerts) {
- for (Alert alert : currentAlerts) {
- results.add(new NagiosAlert(alert));
- }
- }
-
} catch (AmbariException e) {
LOG.debug("Cannot find a nagios service. Skipping alerts.");
}
http://git-wip-us.apache.org/repos/asf/ambari/blob/b94eb716/ambari-server/src/main/resources/stacks/HDP/2.0.6/services/NAGIOS/package/files/check_ambari_alerts.py
----------------------------------------------------------------------
diff --git a/ambari-server/src/main/resources/stacks/HDP/2.0.6/services/NAGIOS/package/files/check_ambari_alerts.py b/ambari-server/src/main/resources/stacks/HDP/2.0.6/services/NAGIOS/package/files/check_ambari_alerts.py
new file mode 100644
index 0000000..912a209
--- /dev/null
+++ b/ambari-server/src/main/resources/stacks/HDP/2.0.6/services/NAGIOS/package/files/check_ambari_alerts.py
@@ -0,0 +1,83 @@
+#!/usr/bin/env python
+#
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#
+
+import os
+import optparse
+import json
+import traceback
+
+def main():
+
+ parser = optparse.OptionParser()
+
+ parser.add_option("-H", "--host", dest="host", default="localhost", help="NameNode host")
+ parser.add_option("-n", "--name", dest="alert_name", help="Alert name to check")
+ parser.add_option("-f", "--file", dest="alert_file", help="File containing the alert structure")
+
+ (options, args) = parser.parse_args()
+
+ if options.alert_name is None:
+ print "Alert name is required (--name or -n)"
+ exit(-1)
+
+ if options.alert_file is None:
+ print "Alert file is required (--file or -f)"
+ exit(-1)
+
+ if not os.path.exists(options.alert_file):
+ print "Alert file must exist"
+ exit(-1)
+
+ try:
+ with open(options.alert_file, 'r') as f:
+ data = json.load(f)
+
+ first = True
+ buf = ''
+
+ for_hosts = data[options.alert_name]
+ if for_hosts.has_key(options.host):
+ for host_entry in for_hosts[options.host]:
+ alert_state = host_entry['state']
+ alert_text = host_entry['text']
+ if alert_state == 'CRITICAL':
+ print str(alert_text)
+ exit(2)
+ elif alert_state == 'WARNING':
+ print str(alert_text)
+ exit(1)
+ else:
+ if not first:
+ buf = buf + ', '
+ buf = buf + alert_text
+ first = False
+
+ print buf
+ exit(0)
+
+ except Exception:
+ traceback.print_exc()
+ exit(3)
+
+if __name__ == "__main__":
+ main()
+
http://git-wip-us.apache.org/repos/asf/ambari/blob/b94eb716/ambari-server/src/main/resources/stacks/HDP/2.0.6/services/NAGIOS/package/scripts/nagios_server.py
----------------------------------------------------------------------
diff --git a/ambari-server/src/main/resources/stacks/HDP/2.0.6/services/NAGIOS/package/scripts/nagios_server.py b/ambari-server/src/main/resources/stacks/HDP/2.0.6/services/NAGIOS/package/scripts/nagios_server.py
index ed7cebc..da35b34 100644
--- a/ambari-server/src/main/resources/stacks/HDP/2.0.6/services/NAGIOS/package/scripts/nagios_server.py
+++ b/ambari-server/src/main/resources/stacks/HDP/2.0.6/services/NAGIOS/package/scripts/nagios_server.py
@@ -24,6 +24,7 @@ import sys
from resource_management import *
from nagios import nagios
from nagios_service import nagios_service
+from nagios_service import update_active_alerts
class NagiosServer(Script):
@@ -59,21 +60,20 @@ class NagiosServer(Script):
import status_params
env.set_params(status_params)
check_process_status(status_params.nagios_pid_file)
+
+ # check for alert structures
+ update_active_alerts()
+
def remove_conflicting_packages():
- Package( 'hdp_mon_nagios_addons',
- action = "remove"
- )
+ Package('hdp_mon_nagios_addons', action = "remove")
- Package( 'nagios-plugins',
- action = "remove"
- )
+ Package('nagios-plugins', action = "remove")
if System.get_instance().os_family in ["redhat","suse"]:
- Execute( "rpm -e --allmatches --nopostun nagios",
- path = "/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin",
- ignore_failures = True
- )
+ Execute("rpm -e --allmatches --nopostun nagios",
+ path = "/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin",
+ ignore_failures = True)
def update_ignorable(params):
if not params.config.has_key('passiveInfo'):
http://git-wip-us.apache.org/repos/asf/ambari/blob/b94eb716/ambari-server/src/main/resources/stacks/HDP/2.0.6/services/NAGIOS/package/scripts/nagios_server_config.py
----------------------------------------------------------------------
diff --git a/ambari-server/src/main/resources/stacks/HDP/2.0.6/services/NAGIOS/package/scripts/nagios_server_config.py b/ambari-server/src/main/resources/stacks/HDP/2.0.6/services/NAGIOS/package/scripts/nagios_server_config.py
index 564f78e..40a1bb6 100644
--- a/ambari-server/src/main/resources/stacks/HDP/2.0.6/services/NAGIOS/package/scripts/nagios_server_config.py
+++ b/ambari-server/src/main/resources/stacks/HDP/2.0.6/services/NAGIOS/package/scripts/nagios_server_config.py
@@ -69,6 +69,7 @@ def nagios_server_config():
nagios_server_check( 'hdp_nagios_init.php')
nagios_server_check( 'check_checkpoint_time.py' )
nagios_server_check( 'sys_logger.py' )
+ nagios_server_check( 'check_ambari_alerts.py' )
def nagios_server_configfile(
name,
http://git-wip-us.apache.org/repos/asf/ambari/blob/b94eb716/ambari-server/src/main/resources/stacks/HDP/2.0.6/services/NAGIOS/package/scripts/nagios_service.py
----------------------------------------------------------------------
diff --git a/ambari-server/src/main/resources/stacks/HDP/2.0.6/services/NAGIOS/package/scripts/nagios_service.py b/ambari-server/src/main/resources/stacks/HDP/2.0.6/services/NAGIOS/package/scripts/nagios_service.py
index 8f47702..b7f512b 100644
--- a/ambari-server/src/main/resources/stacks/HDP/2.0.6/services/NAGIOS/package/scripts/nagios_service.py
+++ b/ambari-server/src/main/resources/stacks/HDP/2.0.6/services/NAGIOS/package/scripts/nagios_service.py
@@ -20,6 +20,7 @@ Ambari Agent
"""
+import json
import os
import signal
@@ -66,4 +67,37 @@ def nagios_service(action='start'): # start or stop
if isfile( nagios_pid_file ):
Execute(format("rm -f {nagios_pid_file}"))
- MonitorWebserver("restart")
\ No newline at end of file
+ MonitorWebserver("restart")
+
+def update_active_alerts():
+ import status_params
+
+ alerts = None
+ if 'alerts' in status_params.config and status_params.config['alerts'] is not None:
+ alerts = status_params.config['alerts']
+
+ if alerts is None:
+ return
+
+ output = {}
+
+ for a in alerts:
+ alert_name = a['name']
+ alert_text = a['text']
+ alert_state = a['state']
+ alert_host = a['host']
+ if not output.has_key(alert_name):
+ output[alert_name] = {}
+
+ if not output[alert_name].has_key(alert_host):
+ output[alert_name][alert_host] = []
+
+ host_items = output[alert_name][alert_host]
+ alert_out = {}
+ alert_out['state'] = alert_state
+ alert_out['text'] = alert_text
+ host_items.append(alert_out)
+
+ with open(os.path.join(status_params.nagios_var_dir, 'ambari.json'), 'w') as f:
+ json.dump(output, f)
+
http://git-wip-us.apache.org/repos/asf/ambari/blob/b94eb716/ambari-server/src/main/resources/stacks/HDP/2.0.6/services/NAGIOS/package/scripts/params.py
----------------------------------------------------------------------
diff --git a/ambari-server/src/main/resources/stacks/HDP/2.0.6/services/NAGIOS/package/scripts/params.py b/ambari-server/src/main/resources/stacks/HDP/2.0.6/services/NAGIOS/package/scripts/params.py
index 136255d..edb9b04 100644
--- a/ambari-server/src/main/resources/stacks/HDP/2.0.6/services/NAGIOS/package/scripts/params.py
+++ b/ambari-server/src/main/resources/stacks/HDP/2.0.6/services/NAGIOS/package/scripts/params.py
@@ -34,8 +34,8 @@ else:
conf_dir = format("/etc/{nagios_service_name}")
nagios_obj_dir = format("{conf_dir}/objects")
-nagios_var_dir = "/var/nagios"
-nagios_rw_dir = "/var/nagios/rw"
+nagios_var_dir = status_params.nagios_var_dir
+nagios_rw_dir = status_params.nagios_rw_dir
if System.get_instance().os_family == "debian":
host_template = "generic-host"
http://git-wip-us.apache.org/repos/asf/ambari/blob/b94eb716/ambari-server/src/main/resources/stacks/HDP/2.0.6/services/NAGIOS/package/scripts/status_params.py
----------------------------------------------------------------------
diff --git a/ambari-server/src/main/resources/stacks/HDP/2.0.6/services/NAGIOS/package/scripts/status_params.py b/ambari-server/src/main/resources/stacks/HDP/2.0.6/services/NAGIOS/package/scripts/status_params.py
index 33b35fe..11d4aa9 100644
--- a/ambari-server/src/main/resources/stacks/HDP/2.0.6/services/NAGIOS/package/scripts/status_params.py
+++ b/ambari-server/src/main/resources/stacks/HDP/2.0.6/services/NAGIOS/package/scripts/status_params.py
@@ -24,3 +24,6 @@ config = Script.get_config()
nagios_pid_dir = "/var/run/nagios"
nagios_pid_file = format("{nagios_pid_dir}/nagios.pid")
+
+nagios_var_dir = "/var/nagios"
+nagios_rw_dir = "/var/nagios/rw"
http://git-wip-us.apache.org/repos/asf/ambari/blob/b94eb716/ambari-server/src/main/resources/stacks/HDP/2.0.6/services/NAGIOS/package/templates/hadoop-commands.cfg.j2
----------------------------------------------------------------------
diff --git a/ambari-server/src/main/resources/stacks/HDP/2.0.6/services/NAGIOS/package/templates/hadoop-commands.cfg.j2 b/ambari-server/src/main/resources/stacks/HDP/2.0.6/services/NAGIOS/package/templates/hadoop-commands.cfg.j2
index caf0ff4..bc5b2df 100644
--- a/ambari-server/src/main/resources/stacks/HDP/2.0.6/services/NAGIOS/package/templates/hadoop-commands.cfg.j2
+++ b/ambari-server/src/main/resources/stacks/HDP/2.0.6/services/NAGIOS/package/templates/hadoop-commands.cfg.j2
@@ -149,3 +149,8 @@ define command{
command_name check_tcp_wrapper_sasl
command_line $USER1$/check_wrapper.sh $USER1$/check_tcp -H $HOSTADDRESS$ -p $ARG1$ $ARG2$ -s \"$ARG3$\"
}
+
+define command{
+ command_name check_ambari
+ command_line $USER1$/check_wrapper.sh /var/lib/ambari-agent/ambari-python-wrap $USER1$/check_ambari_alerts.py -H $HOSTADDRESS$ -f $ARG1$ -n $ARG2$
+ }
http://git-wip-us.apache.org/repos/asf/ambari/blob/b94eb716/ambari-server/src/main/resources/stacks/HDP/2.0.6/services/NAGIOS/package/templates/hadoop-servicegroups.cfg.j2
----------------------------------------------------------------------
diff --git a/ambari-server/src/main/resources/stacks/HDP/2.0.6/services/NAGIOS/package/templates/hadoop-servicegroups.cfg.j2 b/ambari-server/src/main/resources/stacks/HDP/2.0.6/services/NAGIOS/package/templates/hadoop-servicegroups.cfg.j2
index 3173072..00f0740 100644
--- a/ambari-server/src/main/resources/stacks/HDP/2.0.6/services/NAGIOS/package/templates/hadoop-servicegroups.cfg.j2
+++ b/ambari-server/src/main/resources/stacks/HDP/2.0.6/services/NAGIOS/package/templates/hadoop-servicegroups.cfg.j2
@@ -110,3 +110,10 @@ define servicegroup {
alias FALCON Checks
}
{% endif %}
+
+{%if hostgroup_defs['flume-servers'] %}
+define servicegroup {
+ servicegroup_name FLUME
+ alias FLUME Checks
+}
+{% endif %}
http://git-wip-us.apache.org/repos/asf/ambari/blob/b94eb716/ambari-server/src/main/resources/stacks/HDP/2.0.6/services/NAGIOS/package/templates/hadoop-services.cfg.j2
----------------------------------------------------------------------
diff --git a/ambari-server/src/main/resources/stacks/HDP/2.0.6/services/NAGIOS/package/templates/hadoop-services.cfg.j2 b/ambari-server/src/main/resources/stacks/HDP/2.0.6/services/NAGIOS/package/templates/hadoop-services.cfg.j2
index aa626bf..2b2df1e 100644
--- a/ambari-server/src/main/resources/stacks/HDP/2.0.6/services/NAGIOS/package/templates/hadoop-services.cfg.j2
+++ b/ambari-server/src/main/resources/stacks/HDP/2.0.6/services/NAGIOS/package/templates/hadoop-services.cfg.j2
@@ -777,3 +777,18 @@ define service {
max_check_attempts 3
}
{% endif %}
+
+{% if hostgroup_defs['flume-servers'] %}
+# FLUME Checks
+define service {
+ hostgroup_name flume-servers
+ use hadoop-service
+ service_description FLUME::Flume Agent process
+ servicegroups FLUME
+ check_command check_ambari!/var/nagios/ambari.json!flume_agent
+ normal_check_interval 1
+ retry_check_interval 0.5
+ max_check_attempts 3
+}
+{% endif %}
+
http://git-wip-us.apache.org/repos/asf/ambari/blob/b94eb716/ambari-server/src/test/java/org/apache/ambari/server/agent/TestHeartbeatMonitor.java
----------------------------------------------------------------------
diff --git a/ambari-server/src/test/java/org/apache/ambari/server/agent/TestHeartbeatMonitor.java b/ambari-server/src/test/java/org/apache/ambari/server/agent/TestHeartbeatMonitor.java
index 65d5a96..856baee 100644
--- a/ambari-server/src/test/java/org/apache/ambari/server/agent/TestHeartbeatMonitor.java
+++ b/ambari-server/src/test/java/org/apache/ambari/server/agent/TestHeartbeatMonitor.java
@@ -25,6 +25,7 @@ import static org.mockito.Mockito.atLeast;
import static org.mockito.Mockito.mock;
import static org.mockito.Mockito.verify;
+import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
@@ -37,6 +38,8 @@ import org.apache.ambari.server.actionmanager.ActionManager;
import org.apache.ambari.server.api.services.AmbariMetaInfo;
import org.apache.ambari.server.orm.GuiceJpaInitializer;
import org.apache.ambari.server.orm.InMemoryDefaultTestModule;
+import org.apache.ambari.server.state.Alert;
+import org.apache.ambari.server.state.AlertState;
import org.apache.ambari.server.state.Cluster;
import org.apache.ambari.server.state.Clusters;
import org.apache.ambari.server.state.Config;
@@ -501,4 +504,93 @@ public class TestHeartbeatMonitor {
sch.getState());
}
}
+
+ @Test
+ public void testStateCommandsWithAlertsGeneration() throws AmbariException, InterruptedException,
+ InvalidStateTransitionException {
+ Clusters clusters = injector.getInstance(Clusters.class);
+
+ clusters.addHost(hostname1);
+ setOsFamily(clusters.getHost(hostname1), "redhat", "6.3");
+ clusters.getHost(hostname1).persist();
+
+ clusters.addHost(hostname2);
+ setOsFamily(clusters.getHost(hostname2), "redhat", "6.3");
+ clusters.getHost(hostname2).persist();
+ clusters.addCluster(clusterName);
+
+ Cluster cluster = clusters.getCluster(clusterName);
+ cluster.setDesiredStackVersion(new StackId("HDP-2.0.7"));
+ Set<String> hostNames = new HashSet<String>(){{
+ add(hostname1);
+ add(hostname2);
+ }};
+
+ clusters.mapHostsToCluster(hostNames, clusterName);
+
+ Service hdfs = cluster.addService(serviceName);
+ Service nagios = cluster.addService("NAGIOS");
+
+ hdfs.persist();
+ nagios.persist();
+
+ hdfs.addServiceComponent(Role.DATANODE.name()).persist();
+ hdfs.getServiceComponent(Role.DATANODE.name()).addServiceComponentHost(hostname1).persist();
+ hdfs.addServiceComponent(Role.NAMENODE.name()).persist();
+ hdfs.getServiceComponent(Role.NAMENODE.name()).addServiceComponentHost(hostname1).persist();
+ hdfs.addServiceComponent(Role.SECONDARY_NAMENODE.name()).persist();
+ hdfs.getServiceComponent(Role.SECONDARY_NAMENODE.name()).addServiceComponentHost(hostname1).persist();
+ nagios.addServiceComponent(Role.NAGIOS_SERVER.name()).persist();
+ nagios.getServiceComponent(Role.NAGIOS_SERVER.name()).addServiceComponentHost(hostname1).persist();
+
+ hdfs.getServiceComponent(Role.DATANODE.name()).getServiceComponentHost(hostname1).setState(State.INSTALLED);
+ hdfs.getServiceComponent(Role.NAMENODE.name()).getServiceComponentHost(hostname1).setState(State.INSTALLED);
+ hdfs.getServiceComponent(Role.SECONDARY_NAMENODE.name()).getServiceComponentHost(hostname1).setState(State.INSTALLED);
+ nagios.getServiceComponent(Role.NAGIOS_SERVER.name()).getServiceComponentHost(hostname1).setState(State.INSTALLED);
+
+
+ Alert alert = new Alert("datanode_madeup", null, "HDFS", "DATANODE",
+ hostname1, AlertState.CRITICAL);
+ cluster.addAlerts(Collections.singleton(alert));
+
+ ActionQueue aq = new ActionQueue();
+ ActionManager am = mock(ActionManager.class);
+ HeartbeatMonitor hm = new HeartbeatMonitor(clusters, aq, am,
+ heartbeatMonitorWakeupIntervalMS, injector);
+ HeartBeatHandler handler = new HeartBeatHandler(clusters, aq, am, injector);
+ Register reg = new Register();
+ reg.setHostname(hostname1);
+ reg.setResponseId(12);
+ reg.setTimestamp(System.currentTimeMillis() - 300);
+ reg.setAgentVersion(ambariMetaInfo.getServerVersion());
+ HostInfo hi = new HostInfo();
+ hi.setOS("Centos5");
+ reg.setHardwareProfile(hi);
+ handler.handleRegistration(reg);
+
+ HeartBeat hb = new HeartBeat();
+ hb.setHostname(hostname1);
+ hb.setNodeStatus(new HostStatus(HostStatus.Status.HEALTHY, "cool"));
+ hb.setTimestamp(System.currentTimeMillis());
+ hb.setResponseId(12);
+ handler.handleHeartBeat(hb);
+
+ List<StatusCommand> cmds = hm.generateStatusCommands(hostname1);
+ assertEquals("HeartbeatMonitor should generate StatusCommands for host1", 4, cmds.size());
+ assertEquals("HDFS", cmds.get(0).getServiceName());
+
+ boolean containsNAGIOSStatus = false;
+ for (StatusCommand cmd : cmds) {
+ if (cmd.getComponentName().equals(Role.NAGIOS_SERVER.name())) {
+ containsNAGIOSStatus = true;
+ assertTrue(cmd.getClass().equals(NagiosAlertCommand.class));
+ assertEquals(1, ((NagiosAlertCommand) cmd).getAlerts().size());
+ }
+
+ }
+ assertTrue(containsNAGIOSStatus);
+
+ cmds = hm.generateStatusCommands(hostname2);
+ assertTrue("HeartbeatMonitor should not generate StatusCommands for host2 because it has no services", cmds.isEmpty());
+ }
}
http://git-wip-us.apache.org/repos/asf/ambari/blob/b94eb716/ambari-server/src/test/java/org/apache/ambari/server/controller/nagios/NagiosPropertyProviderTest.java
----------------------------------------------------------------------
diff --git a/ambari-server/src/test/java/org/apache/ambari/server/controller/nagios/NagiosPropertyProviderTest.java b/ambari-server/src/test/java/org/apache/ambari/server/controller/nagios/NagiosPropertyProviderTest.java
index 92d3c40..cc45ef0 100644
--- a/ambari-server/src/test/java/org/apache/ambari/server/controller/nagios/NagiosPropertyProviderTest.java
+++ b/ambari-server/src/test/java/org/apache/ambari/server/controller/nagios/NagiosPropertyProviderTest.java
@@ -38,7 +38,6 @@ import org.apache.ambari.server.controller.spi.Resource;
import org.apache.ambari.server.controller.spi.TemporalInfo;
import org.apache.ambari.server.controller.utilities.PropertyHelper;
import org.apache.ambari.server.state.Alert;
-import org.apache.ambari.server.state.AlertState;
import org.apache.ambari.server.state.Cluster;
import org.apache.ambari.server.state.Clusters;
import org.apache.ambari.server.state.Service;
@@ -578,81 +577,4 @@ public class NagiosPropertyProviderTest {
}
}
- @Test
- public void testNagiosServiceAlertsWithAgentAlerts() throws Exception {
- Injector inj = Guice.createInjector(new GuiceModule());
-
- Clusters clusters = inj.getInstance(Clusters.class);
- Cluster cluster = createMock(Cluster.class);
-
- Alert alert = new Alert("ganglia_madeup", null, "GANGLIA", "GANGLIA_MYSTERY",
- "h1", AlertState.CRITICAL);
-
- expect(cluster.getAlerts()).andReturn(Collections.singleton(alert)).anyTimes();
- expect(clusters.getCluster("c1")).andReturn(cluster);
-
- Service nagiosService = createMock(Service.class);
- expect(cluster.getService("NAGIOS")).andReturn(nagiosService);
-
- ServiceComponent nagiosServiceComponent = createMock(ServiceComponent.class);
- expect(nagiosService.getServiceComponent("NAGIOS_SERVER")).andReturn(nagiosServiceComponent);
-
- ServiceComponentHost nagiosScHost = createMock(ServiceComponentHost.class);
- Map<String, ServiceComponentHost> map1 = new HashMap<String, ServiceComponentHost>();
- map1.put(HOST, nagiosScHost);
- expect(nagiosServiceComponent.getServiceComponentHosts()).andReturn(map1);
-
- replay(clusters, cluster, nagiosService, nagiosServiceComponent);
-
-
- TestStreamProvider streamProvider = new TestStreamProvider("nagios_alerts.txt");
-
- NagiosPropertyProvider npp = new NagiosPropertyProvider(Resource.Type.Service,
- streamProvider,
- "ServiceInfo/cluster_name",
- "ServiceInfo/service_name");
- npp.forceReset();
- NagiosPropertyProvider.init(inj);
-
- Resource resource = new ResourceImpl(Resource.Type.Service);
- resource.setProperty("ServiceInfo/cluster_name", "c1");
- resource.setProperty("ServiceInfo/service_name", "GANGLIA");
-
- // request with an empty set should get all supported properties
- Request request = PropertyHelper.getReadRequest(Collections.<String>emptySet(), new HashMap<String, TemporalInfo>());
-
- Set<Resource> set = npp.populateResources(Collections.singleton(resource), request, null);
- Assert.assertEquals(1, set.size());
-
- Resource res = set.iterator().next();
-
- Map<String, Map<String, Object>> values = res.getPropertiesMap();
-
- Assert.assertTrue(values.containsKey("alerts"));
- Assert.assertTrue(values.containsKey("alerts/summary"));
- Assert.assertTrue(values.get("alerts").containsKey("detail"));
- Assert.assertTrue(List.class.isInstance(values.get("alerts").get("detail")));
-
- List<?> list = (List<?>) values.get("alerts").get("detail");
- // removed an additional one
- Assert.assertEquals(Integer.valueOf(5), Integer.valueOf(list.size()));
- for (Object o : list) {
- Assert.assertTrue(Map.class.isInstance(o));
- Map<?, ?> map = (Map<?, ?>) o;
- Assert.assertTrue(map.containsKey("service_name"));
- String serviceName = map.get("service_name").toString();
- Assert.assertEquals(serviceName, "GANGLIA");
- }
-
- Map<String, Object> summary = values.get("alerts/summary");
- Assert.assertTrue(summary.containsKey("OK"));
- Assert.assertTrue(summary.containsKey("WARNING"));
- Assert.assertTrue(summary.containsKey("CRITICAL"));
- Assert.assertTrue(summary.containsKey("PASSIVE"));
-
- Assert.assertEquals(Integer.valueOf(3), summary.get("OK"));
- Assert.assertEquals(Integer.valueOf(0), summary.get("WARNING"));
- Assert.assertEquals(Integer.valueOf(1), summary.get("CRITICAL"));
- Assert.assertEquals(Integer.valueOf(1), summary.get("PASSIVE"));
- }
}
http://git-wip-us.apache.org/repos/asf/ambari/blob/b94eb716/ambari-server/src/test/python/stacks/2.0.6/NAGIOS/test_nagios_server.py
----------------------------------------------------------------------
diff --git a/ambari-server/src/test/python/stacks/2.0.6/NAGIOS/test_nagios_server.py b/ambari-server/src/test/python/stacks/2.0.6/NAGIOS/test_nagios_server.py
index 51f89df..f51169c 100644
--- a/ambari-server/src/test/python/stacks/2.0.6/NAGIOS/test_nagios_server.py
+++ b/ambari-server/src/test/python/stacks/2.0.6/NAGIOS/test_nagios_server.py
@@ -268,6 +268,11 @@ class TestNagiosServer(RMFTestCase):
content = StaticFile('sys_logger.py'),
mode = 0755,
)
+ self.assertResourceCalled('File',
+ '/usr/lib64/nagios/plugins/check_ambari_alerts.py',
+ content=StaticFile('check_ambari_alerts.py'),
+ mode=0755
+ )
self.assertResourceCalled('Execute',
'htpasswd2 -c -b /etc/nagios/htpasswd.users nagiosadmin \'!`"\'"\'"\' 1\'',
not_if="grep nagiosadmin /etc/nagios/htpasswd.users"