You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ambari.apache.org by ds...@apache.org on 2014/12/04 18:28:06 UTC
ambari git commit: AMBARI-8544 Integrate AMS with Ambari Alerting
System (dsen)
Repository: ambari
Updated Branches:
refs/heads/trunk 46325c563 -> 21e3d2a73
AMBARI-8544 Integrate AMS with Ambari Alerting System (dsen)
Project: http://git-wip-us.apache.org/repos/asf/ambari/repo
Commit: http://git-wip-us.apache.org/repos/asf/ambari/commit/21e3d2a7
Tree: http://git-wip-us.apache.org/repos/asf/ambari/tree/21e3d2a7
Diff: http://git-wip-us.apache.org/repos/asf/ambari/diff/21e3d2a7
Branch: refs/heads/trunk
Commit: 21e3d2a73ba2048292e42b5e01c1d95d9d5896f8
Parents: 46325c5
Author: Dmytro Sen <ds...@apache.org>
Authored: Thu Dec 4 19:27:40 2014 +0200
Committer: Dmytro Sen <ds...@apache.org>
Committed: Thu Dec 4 19:27:40 2014 +0200
----------------------------------------------------------------------
.../stacks/HDP/2.2/services/AMS/alerts.json | 139 +++++++++++++++++++
.../files/alert_ambari_metrics_monitor.py | 80 +++++++++++
2 files changed, 219 insertions(+)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/ambari/blob/21e3d2a7/ambari-server/src/main/resources/stacks/HDP/2.2/services/AMS/alerts.json
----------------------------------------------------------------------
diff --git a/ambari-server/src/main/resources/stacks/HDP/2.2/services/AMS/alerts.json b/ambari-server/src/main/resources/stacks/HDP/2.2/services/AMS/alerts.json
new file mode 100644
index 0000000..0a1e469
--- /dev/null
+++ b/ambari-server/src/main/resources/stacks/HDP/2.2/services/AMS/alerts.json
@@ -0,0 +1,139 @@
+{
+ "AMS": {
+ "service": [
+ {
+ "name": "ams_metric_monitor_process_percent",
+ "label": "Percent AMS Metric Monitors Available",
+ "interval": 1,
+ "scope": "SERVICE",
+ "enabled": true,
+ "source": {
+ "type": "AGGREGATE",
+ "alert_name": "ams_metric_monitor_process",
+ "reporting": {
+ "ok": {
+ "text": "affected: [{1}], total: [{0}]"
+ },
+ "warning": {
+ "text": "affected: [{1}], total: [{0}]",
+ "value": 0.1
+ },
+ "critical": {
+ "text": "affected: [{1}], total: [{0}]",
+ "value": 0.3
+ }
+ }
+ }
+ }
+ ],
+ "METRIC_COLLECTOR": [
+ {
+ "name": "ams_metric_collector_process",
+ "label": "AMS Metric Collector Process",
+ "interval": 1,
+ "scope": "ANY",
+ "enabled": true,
+ "source": {
+ "type": "PORT",
+ "uri": "8188",
+ "default_port": 8188,
+ "reporting": {
+ "ok": {
+ "text": "TCP OK - {0:.4f} response on port {1}"
+ },
+ "critical": {
+ "text": "Connection failed: {0} to {1}:{2}"
+ }
+ }
+ }
+ },
+ {
+ "name": "ams_metric_collector_hbase_master_process",
+ "label": "AMS Metric Collector HBase Master Process",
+ "interval": 1,
+ "scope": "ANY",
+ "source": {
+ "type": "PORT",
+ "uri": "{{ams-hbase-site/hbase.master.info.port}}",
+ "default_port": 61310,
+ "reporting": {
+ "ok": {
+ "text": "TCP OK - {0:.4f} response on port {1}"
+ },
+ "critical": {
+ "text": "Connection failed: {0} to {1}:{2}"
+ }
+ }
+ }
+ },
+ {
+ "name": "ams_metric_collector_hbase_master_cpu",
+ "label": "AMS Metric Collector HBase Maser CPU Utilization",
+ "interval": 5,
+ "scope": "ANY",
+ "enabled": true,
+ "source": {
+ "type": "METRIC",
+ "uri": {
+ "http": "{{ams-hbase-site/hbase.master.info.port}}",
+ "https": "{{ams-hbase-site/hbase.master.info.port}}",
+ "https_property": "{{cluster-env/security_enabled}}",
+ "https_property_value": "true",
+ "default_port": 61310
+ },
+ "reporting": {
+ "ok": {
+ "text": "{1} CPU, load {0:.1%}"
+ },
+ "warning": {
+ "text": "{1} CPU, load {0:.1%}",
+ "value": 200
+ },
+ "critical": {
+ "text": "{1} CPU, load {0:.1%}",
+ "value": 250
+ }
+ },
+ "jmx": {
+ "property_list": [
+ "java.lang:type=OperatingSystem/SystemCpuLoad",
+ "java.lang:type=OperatingSystem/AvailableProcessors"
+ ],
+ "value": "{0} * 100"
+ }
+ }
+ },
+ {
+ "name": "ams_metric_collector_zookeeper_server_process",
+ "label": "AMS Metric Collector ZooKeeper Server Process",
+ "interval": 1,
+ "scope": "ANY",
+ "source": {
+ "type": "PORT",
+ "uri": "{{ams-hbase-site/hbase.zookeeper.property.clientPort}}",
+ "default_port": 61181,
+ "reporting": {
+ "ok": {
+ "text": "TCP OK - {0:.4f} response on port {1}"
+ },
+ "critical": {
+ "text": "Connection failed: {0} to {1}:{2}"
+ }
+ }
+ }
+ }
+ ],
+ "METRIC_MONITOR": [
+ {
+ "name": "ams_metric_monitor_process",
+ "label": "AMS Metric Monitor Status",
+ "interval": 1,
+ "scope": "ANY",
+ "source": {
+ "type": "SCRIPT",
+ "path": "HDP/2.2/services/AMS/package/files/alert_ambari_metrics_monitor.py"
+ }
+ }
+ ]
+ }
+}
http://git-wip-us.apache.org/repos/asf/ambari/blob/21e3d2a7/ambari-server/src/main/resources/stacks/HDP/2.2/services/AMS/package/files/alert_ambari_metrics_monitor.py
----------------------------------------------------------------------
diff --git a/ambari-server/src/main/resources/stacks/HDP/2.2/services/AMS/package/files/alert_ambari_metrics_monitor.py b/ambari-server/src/main/resources/stacks/HDP/2.2/services/AMS/package/files/alert_ambari_metrics_monitor.py
new file mode 100644
index 0000000..5841267
--- /dev/null
+++ b/ambari-server/src/main/resources/stacks/HDP/2.2/services/AMS/package/files/alert_ambari_metrics_monitor.py
@@ -0,0 +1,80 @@
+#!/usr/bin/env python
+
+"""
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements. See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership. The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+
+import socket
+
+from resource_management.libraries.functions.check_process_status import check_process_status
+from resource_management.core.exceptions import ComponentIsNotRunning
+
+RESULT_CODE_OK = 'OK'
+RESULT_CODE_CRITICAL = 'CRITICAL'
+RESULT_CODE_UNKNOWN = 'UNKNOWN'
+
+AMS_MONITOR_PID_PATH = '/var/run/ambari-metrics-monitor/ambari-metrics-monitor.pid'
+
+
+def get_tokens():
+ """
+ Returns a tuple of tokens in the format {{site/property}} that will be used
+ to build the dictionary passed into execute
+ """
+ return ()
+
+
+def is_monitor_process_live(pid_file):
+ """
+ Gets whether the AMS monitor represented by the specified file is running.
+ :param pid_file: the PID file of the monitor to check
+ :return: True if the monitor is running, False otherwise
+ """
+ live = False
+
+ try:
+ check_process_status(pid_file)
+ live = True
+ except ComponentIsNotRunning:
+ pass
+
+ return live
+
+
+def execute(parameters=None, host_name=None):
+ """
+ Returns a tuple containing the result code and a pre-formatted result label
+
+ Keyword arguments:
+ parameters (dictionary): a mapping of parameter key to value
+ host_name (string): the name of this host where the alert is running
+ """
+
+ if parameters is None:
+ return (RESULT_CODE_UNKNOWN, ['There were no parameters supplied to the script.'])
+
+ if host_name is None:
+ host_name = socket.getfqdn()
+
+ ams_monitor_process_running = is_monitor_process_live(AMS_MONITOR_PID_PATH)
+
+ alert_state = RESULT_CODE_OK if ams_monitor_process_running else RESULT_CODE_CRITICAL
+
+ alert_label = 'Ambari Monitor is running on {0}' if ams_monitor_process_running else 'Ambari Monitor is NOT running on {0}'
+ alert_label = alert_label.format(host_name)
+
+ return (alert_state, [alert_label])