You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ambari.apache.org by ds...@apache.org on 2014/12/04 18:28:06 UTC

ambari git commit: AMBARI-8544 Integrate AMS with Ambari Alerting System (dsen)

Repository: ambari
Updated Branches:
  refs/heads/trunk 46325c563 -> 21e3d2a73


AMBARI-8544 Integrate AMS with Ambari Alerting System (dsen)


Project: http://git-wip-us.apache.org/repos/asf/ambari/repo
Commit: http://git-wip-us.apache.org/repos/asf/ambari/commit/21e3d2a7
Tree: http://git-wip-us.apache.org/repos/asf/ambari/tree/21e3d2a7
Diff: http://git-wip-us.apache.org/repos/asf/ambari/diff/21e3d2a7

Branch: refs/heads/trunk
Commit: 21e3d2a73ba2048292e42b5e01c1d95d9d5896f8
Parents: 46325c5
Author: Dmytro Sen <ds...@apache.org>
Authored: Thu Dec 4 19:27:40 2014 +0200
Committer: Dmytro Sen <ds...@apache.org>
Committed: Thu Dec 4 19:27:40 2014 +0200

----------------------------------------------------------------------
 .../stacks/HDP/2.2/services/AMS/alerts.json     | 139 +++++++++++++++++++
 .../files/alert_ambari_metrics_monitor.py       |  80 +++++++++++
 2 files changed, 219 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/ambari/blob/21e3d2a7/ambari-server/src/main/resources/stacks/HDP/2.2/services/AMS/alerts.json
----------------------------------------------------------------------
diff --git a/ambari-server/src/main/resources/stacks/HDP/2.2/services/AMS/alerts.json b/ambari-server/src/main/resources/stacks/HDP/2.2/services/AMS/alerts.json
new file mode 100644
index 0000000..0a1e469
--- /dev/null
+++ b/ambari-server/src/main/resources/stacks/HDP/2.2/services/AMS/alerts.json
@@ -0,0 +1,139 @@
+{
+  "AMS": {
+    "service": [
+      {
+        "name": "ams_metric_monitor_process_percent",
+        "label": "Percent AMS Metric Monitors Available",
+        "interval": 1,
+        "scope": "SERVICE",
+        "enabled": true,
+        "source": {
+          "type": "AGGREGATE",
+          "alert_name": "ams_metric_monitor_process",
+          "reporting": {
+            "ok": {
+              "text": "affected: [{1}], total: [{0}]"
+            },
+            "warning": {
+              "text": "affected: [{1}], total: [{0}]",
+              "value": 0.1
+            },
+            "critical": {
+              "text": "affected: [{1}], total: [{0}]",
+              "value": 0.3
+            }
+          }
+        }
+      }
+    ],
+    "METRIC_COLLECTOR": [
+      {
+        "name": "ams_metric_collector_process",
+        "label": "AMS Metric Collector Process",
+        "interval": 1,
+        "scope": "ANY",
+        "enabled": true,
+        "source": {
+          "type": "PORT",
+          "uri": "8188",
+          "default_port": 8188,
+          "reporting": {
+            "ok": {
+              "text": "TCP OK - {0:.4f} response on port {1}"
+            },
+            "critical": {
+              "text": "Connection failed: {0} to {1}:{2}"
+            }
+          }
+        }
+      },
+      {
+        "name": "ams_metric_collector_hbase_master_process",
+        "label": "AMS Metric Collector HBase Master Process",
+        "interval": 1,
+        "scope": "ANY",
+        "source": {
+          "type": "PORT",
+          "uri": "{{ams-hbase-site/hbase.master.info.port}}",
+          "default_port": 61310,
+          "reporting": {
+            "ok": {
+              "text": "TCP OK - {0:.4f} response on port {1}"
+            },
+            "critical": {
+              "text": "Connection failed: {0} to {1}:{2}"
+            }
+          }
+        }
+      },
+      {
+        "name": "ams_metric_collector_hbase_master_cpu",
+        "label": "AMS Metric Collector HBase Maser CPU Utilization",
+        "interval": 5,
+        "scope": "ANY",
+        "enabled": true,
+        "source": {
+          "type": "METRIC",
+          "uri": {
+            "http": "{{ams-hbase-site/hbase.master.info.port}}",
+            "https": "{{ams-hbase-site/hbase.master.info.port}}",
+            "https_property": "{{cluster-env/security_enabled}}",
+            "https_property_value": "true",
+            "default_port": 61310
+          },
+          "reporting": {
+            "ok": {
+              "text": "{1} CPU, load {0:.1%}"
+            },
+            "warning": {
+              "text": "{1} CPU, load {0:.1%}",
+              "value": 200
+            },
+            "critical": {
+              "text": "{1} CPU, load {0:.1%}",
+              "value": 250
+            }
+          },
+          "jmx": {
+            "property_list": [
+              "java.lang:type=OperatingSystem/SystemCpuLoad",
+              "java.lang:type=OperatingSystem/AvailableProcessors"
+            ],
+            "value": "{0} * 100"
+          }
+        }
+      },
+      {
+        "name": "ams_metric_collector_zookeeper_server_process",
+        "label": "AMS Metric Collector ZooKeeper Server Process",
+        "interval": 1,
+        "scope": "ANY",
+        "source": {
+          "type": "PORT",
+          "uri": "{{ams-hbase-site/hbase.zookeeper.property.clientPort}}",
+          "default_port": 61181,
+          "reporting": {
+            "ok": {
+              "text": "TCP OK - {0:.4f} response on port {1}"
+            },
+            "critical": {
+              "text": "Connection failed: {0} to {1}:{2}"
+            }
+          }
+        }
+      }
+    ],
+    "METRIC_MONITOR": [
+      {
+        "name": "ams_metric_monitor_process",
+        "label": "AMS Metric Monitor Status",
+        "interval": 1,
+        "scope": "ANY",
+        "source": {
+          "type": "SCRIPT",
+          "path": "HDP/2.2/services/AMS/package/files/alert_ambari_metrics_monitor.py"
+        }
+      }
+    ]
+  }
+}

http://git-wip-us.apache.org/repos/asf/ambari/blob/21e3d2a7/ambari-server/src/main/resources/stacks/HDP/2.2/services/AMS/package/files/alert_ambari_metrics_monitor.py
----------------------------------------------------------------------
diff --git a/ambari-server/src/main/resources/stacks/HDP/2.2/services/AMS/package/files/alert_ambari_metrics_monitor.py b/ambari-server/src/main/resources/stacks/HDP/2.2/services/AMS/package/files/alert_ambari_metrics_monitor.py
new file mode 100644
index 0000000..5841267
--- /dev/null
+++ b/ambari-server/src/main/resources/stacks/HDP/2.2/services/AMS/package/files/alert_ambari_metrics_monitor.py
@@ -0,0 +1,80 @@
+#!/usr/bin/env python
+
+"""
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements.  See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership.  The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License.  You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+
+import socket
+
+from resource_management.libraries.functions.check_process_status import check_process_status
+from resource_management.core.exceptions import ComponentIsNotRunning
+
+RESULT_CODE_OK = 'OK'
+RESULT_CODE_CRITICAL = 'CRITICAL'
+RESULT_CODE_UNKNOWN = 'UNKNOWN'
+
+AMS_MONITOR_PID_PATH = '/var/run/ambari-metrics-monitor/ambari-metrics-monitor.pid'
+
+
+def get_tokens():
+  """
+  Returns a tuple of tokens in the format {{site/property}} that will be used
+  to build the dictionary passed into execute
+  """
+  return ()
+
+
+def is_monitor_process_live(pid_file):
+  """
+  Gets whether the AMS monitor represented by the specified file is running.
+  :param pid_file: the PID file of the monitor to check
+  :return: True if the monitor is running, False otherwise
+  """
+  live = False
+
+  try:
+    check_process_status(pid_file)
+    live = True
+  except ComponentIsNotRunning:
+    pass
+
+  return live
+
+
+def execute(parameters=None, host_name=None):
+  """
+  Returns a tuple containing the result code and a pre-formatted result label
+
+  Keyword arguments:
+  parameters (dictionary): a mapping of parameter key to value
+  host_name (string): the name of this host where the alert is running
+  """
+
+  if parameters is None:
+    return (RESULT_CODE_UNKNOWN, ['There were no parameters supplied to the script.'])
+
+  if host_name is None:
+    host_name = socket.getfqdn()
+
+  ams_monitor_process_running = is_monitor_process_live(AMS_MONITOR_PID_PATH)
+
+  alert_state = RESULT_CODE_OK if ams_monitor_process_running else RESULT_CODE_CRITICAL
+
+  alert_label = 'Ambari Monitor is running on {0}' if ams_monitor_process_running else 'Ambari Monitor is NOT running on {0}'
+  alert_label = alert_label.format(host_name)
+
+  return (alert_state, [alert_label])