You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ambari.apache.org by ja...@apache.org on 2016/02/26 00:46:06 UTC
ambari git commit: AMBARI-15105: Add alerts for HAWQ components
status (bhuvnesh2703 via jaoki)
Repository: ambari
Updated Branches:
refs/heads/trunk 8fb17ab09 -> 29115e81e
AMBARI-15105: Add alerts for HAWQ components status (bhuvnesh2703 via jaoki)
Project: http://git-wip-us.apache.org/repos/asf/ambari/repo
Commit: http://git-wip-us.apache.org/repos/asf/ambari/commit/29115e81
Tree: http://git-wip-us.apache.org/repos/asf/ambari/tree/29115e81
Diff: http://git-wip-us.apache.org/repos/asf/ambari/diff/29115e81
Branch: refs/heads/trunk
Commit: 29115e81ee2ba6643c7725903300f070c4ba2ea5
Parents: 8fb17ab
Author: Jun Aoki <ja...@apache.org>
Authored: Thu Feb 25 15:45:58 2016 -0800
Committer: Jun Aoki <ja...@apache.org>
Committed: Thu Feb 25 15:45:58 2016 -0800
----------------------------------------------------------------------
.../common-services/HAWQ/2.0.0/alerts.json | 93 +++++++++++-
.../package/alerts/alert_component_status.py | 76 ++++++++++
.../2.3/HAWQ/test_alert_component_status.py | 141 +++++++++++++++++++
ambari-web/app/views/main/dashboard/widgets.js | 2 +-
4 files changed, 310 insertions(+), 2 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/ambari/blob/29115e81/ambari-server/src/main/resources/common-services/HAWQ/2.0.0/alerts.json
----------------------------------------------------------------------
diff --git a/ambari-server/src/main/resources/common-services/HAWQ/2.0.0/alerts.json b/ambari-server/src/main/resources/common-services/HAWQ/2.0.0/alerts.json
index 3119a0c..14ad6d7 100644
--- a/ambari-server/src/main/resources/common-services/HAWQ/2.0.0/alerts.json
+++ b/ambari-server/src/main/resources/common-services/HAWQ/2.0.0/alerts.json
@@ -1,5 +1,32 @@
{
"HAWQ": {
+ "service": [
+ {
+ "name": "hawq_segment_process_percent",
+ "label": "Percent HAWQ Segments Available",
+ "description": "This alert is triggered if the number of down HAWQ Segments in the cluster is greater than the configured critical threshold.",
+ "interval": 1,
+ "scope": "SERVICE",
+ "enabled": true,
+ "source": {
+ "type": "AGGREGATE",
+ "alert_name": "hawq_segment_process",
+ "reporting": {
+ "ok": {
+ "text": "affected: [{1}], total: [{0}]"
+ },
+ "warning": {
+ "text": "affected: [{1}], total: [{0}]",
+ "value": 0.1
+ },
+ "critical": {
+ "text": "affected: [{1}], total: [{0}]",
+ "value": 0.3
+ }
+ }
+ }
+ }
+ ],
"HAWQMASTER": [
{
"name": "hawqstandby_sync_status",
@@ -13,7 +40,71 @@
"path": "HAWQ/2.0.0/package/alerts/alert_sync_status.py",
"parameters": []
}
+ },
+ {
+ "name": "hawq_master_process",
+ "label": "HAWQ Master Process",
+ "description": "This alert is triggered if the HAWQ Master process cannot be confirmed to be up and listening on the network.",
+ "interval": 1,
+ "scope": "ANY",
+ "source": {
+ "type": "SCRIPT",
+ "path": "HAWQ/2.0.0/package/alerts/alert_component_status.py",
+ "parameters": [
+ {
+ "name": "component_name",
+ "display_name": "Component Name",
+ "value": "master",
+ "type": "STRING",
+ "description": "This text string indicates if it is a Master, Standby or Segment"
+ }
+ ]
+ }
+ }
+ ],
+ "HAWQSEGMENT": [
+ {
+ "name": "hawq_segment_process",
+ "label": "HAWQ Segment Process",
+ "description": "This host-level alert is triggered if the HAWQ Segment process cannot be confirmed to be up and listening on the network.",
+ "interval": 1,
+ "scope": "HOST",
+ "source": {
+ "type": "SCRIPT",
+ "path": "HAWQ/2.0.0/package/alerts/alert_component_status.py",
+ "parameters": [
+ {
+ "name": "component_name",
+ "display_name": "Component Name",
+ "value": "segment",
+ "type": "STRING",
+ "description": "This text string indicates if it is a Master, Standby or Segment"
+ }
+ ]
+ }
+ }
+ ],
+ "HAWQSTANDBY": [
+ {
+ "name": "hawq_standby_process",
+ "label": "HAWQ Standby Process",
+ "description": "This alert is triggered if the HAWQ Standby process cannot be confirmed to be up and listening on the network.",
+ "interval": 1,
+ "scope": "ANY",
+ "source": {
+ "type": "SCRIPT",
+ "path": "HAWQ/2.0.0/package/alerts/alert_component_status.py",
+ "parameters": [
+ {
+ "name": "component_name",
+ "display_name": "Component Name",
+ "value": "standby",
+ "type": "STRING",
+ "description": "This text string indicates if it is a Master, Standby or Segment"
+ }
+ ]
+ }
}
]
}
-}
\ No newline at end of file
+}
http://git-wip-us.apache.org/repos/asf/ambari/blob/29115e81/ambari-server/src/main/resources/common-services/HAWQ/2.0.0/package/alerts/alert_component_status.py
----------------------------------------------------------------------
diff --git a/ambari-server/src/main/resources/common-services/HAWQ/2.0.0/package/alerts/alert_component_status.py b/ambari-server/src/main/resources/common-services/HAWQ/2.0.0/package/alerts/alert_component_status.py
new file mode 100644
index 0000000..9ca9ac6
--- /dev/null
+++ b/ambari-server/src/main/resources/common-services/HAWQ/2.0.0/package/alerts/alert_component_status.py
@@ -0,0 +1,76 @@
+#!/usr/bin/env python
+
+"""
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements. See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership. The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+from resource_management.core.shell import call
+
+HAWQMASTER_PORT = '{{hawq-site/hawq_master_address_port}}'
+HAWQSEGMENT_PORT = '{{hawq-site/hawq_segment_address_port}}'
+HAWQSTANDBY_ADDRESS = '{{hawq-site/hawq_standby_address_host}}'
+
+RESULT_STATE_OK = 'OK'
+RESULT_STATE_UNKNOWN = 'UNKNOWN'
+RESULT_STATE_SKIPPED = 'SKIPPED'
+RESULT_STATE_CRITICAL = 'CRITICAL'
+
+COMPONENT_PROCESS_MAP = {
+ "segment": "postgres",
+ "master": "postgres",
+ "standby": "gpsyncmaster"
+ }
+
+
+
+def get_tokens():
+ """
+ Returns a tuple of tokens in the format {{site/property}} that will be used to build the dictionary passed into execute
+ """
+ return (HAWQMASTER_PORT, HAWQSEGMENT_PORT, HAWQSTANDBY_ADDRESS)
+
+
+def execute(configurations={}, parameters={}, host_name=None):
+ """
+ Returns a tuple containing the result code and a pre-formatted result label
+
+ Keyword arguments:
+ configurations (dictionary): a mapping of configuration key to value
+ parameters (dictionary): a mapping of script parameter key to value
+ host_name (string): the name of this host where the alert is running
+ """
+
+ if configurations is None:
+ return (RESULT_STATE_UNKNOWN, ['There were no configurations supplied to the script.'])
+
+ component = parameters['component_name']
+ # Identify port of the process
+ port = configurations[HAWQSEGMENT_PORT] if component == "segment" else configurations[HAWQMASTER_PORT]
+
+ component_name = component.capitalize()
+ is_running = is_component_running(port, COMPONENT_PROCESS_MAP[component])
+ if is_running:
+ return (RESULT_STATE_OK, ['HAWQ {0} is running'.format(component_name)])
+ else:
+ return (RESULT_STATE_CRITICAL, ['HAWQ {0} is not running'.format(component_name)])
+
+def is_component_running(port, process):
+ """
+ Check if the process is running on the specified port
+ """
+ cmd = "netstat -tupln | egrep ':{0}\s' | egrep {1}".format(port, process)
+ rc, op= call(cmd, timeout=60)
+ return rc == 0
http://git-wip-us.apache.org/repos/asf/ambari/blob/29115e81/ambari-server/src/test/python/stacks/2.3/HAWQ/test_alert_component_status.py
----------------------------------------------------------------------
diff --git a/ambari-server/src/test/python/stacks/2.3/HAWQ/test_alert_component_status.py b/ambari-server/src/test/python/stacks/2.3/HAWQ/test_alert_component_status.py
new file mode 100644
index 0000000..b2e1d4d
--- /dev/null
+++ b/ambari-server/src/test/python/stacks/2.3/HAWQ/test_alert_component_status.py
@@ -0,0 +1,141 @@
+#!/usr/bin/env python
+
+'''
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements. See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership. The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+'''
+
+# System imports
+import os
+import sys
+
+from mock.mock import patch
+
+# Local imports
+from stacks.utils.RMFTestCase import *
+
+COMMON_SERVICES_ALERTS_DIR = "HAWQ/2.0.0/package/alerts"
+
+file_path = os.path.dirname(os.path.abspath(__file__))
+file_path = os.path.dirname(os.path.dirname(os.path.dirname(os.path.dirname(os.path.dirname(file_path)))))
+file_path = os.path.join(file_path, "main", "resources", "common-services", COMMON_SERVICES_ALERTS_DIR)
+
+WORKING_CONFIGS = {
+ "{{hawq-site/hawq_master_address_port}}": "5432",
+ "{{hawq-site/hawq_segment_address_port}}": "40000",
+ "{{hawq-site/hawq_standby_address_host}}": "c6402.ambari.apache.org"
+ }
+
+class TestAlertComponentStatus(RMFTestCase):
+
+ def setUp(self):
+ """
+ Import the class under test.
+ Because the class is present in a different folder, append its dir to the system path.
+ Also, shorten the import name and make it a global so the test functions can access it.
+ :return:
+ """
+ sys.path.append(file_path)
+ global alert_component_status
+ import alert_component_status
+
+ def test_missing_configs(self):
+ """
+ Check that the status is UNKNOWN when configs are missing.
+ """
+ configs = None
+ [status, messages] = alert_component_status.execute(configurations=configs)
+ self.assertEqual(status, alert_component_status.RESULT_STATE_UNKNOWN)
+ self.assertTrue(messages is not None and len(messages) == 1)
+ self.assertEqual(messages[0], 'There were no configurations supplied to the script.')
+
+ @patch("alert_component_status.is_component_running")
+ def test_hawq_master_ok(self, is_component_running_mock):
+ """
+ Test that the status is OK when HAWQ Master is up
+ """
+ # Mock calls
+ is_component_running_mock.return_value = True
+
+ [status, messages] = alert_component_status.execute(configurations=WORKING_CONFIGS, parameters={'component_name': 'master'})
+ self.assertEqual(status, alert_component_status.RESULT_STATE_OK)
+ self.assertTrue(messages is not None and len(messages) == 1)
+ self.assertEqual(messages[0], 'HAWQ Master is running')
+
+ @patch("alert_component_status.is_component_running")
+ def test_hawq_master_critical(self, is_component_running_mock):
+ """
+ Test that the status is CRITICIAL when HAWQ Master is down
+ """
+ # Mock calls
+ is_component_running_mock.return_value = False
+
+ [status, messages] = alert_component_status.execute(configurations=WORKING_CONFIGS, parameters={'component_name': 'master'})
+ self.assertEqual(status, alert_component_status.RESULT_STATE_CRITICAL)
+ self.assertTrue(messages is not None and len(messages) == 1)
+ self.assertEqual(messages[0], 'HAWQ Master is not running')
+
+ @patch("alert_component_status.is_component_running")
+ def test_hawq_standby_ok(self, is_component_running_mock):
+ """
+ Test that the status is OK when HAWQ Standby is up
+ """
+ # Mock calls
+ is_component_running_mock.return_value = True
+
+ [status, messages] = alert_component_status.execute(configurations=WORKING_CONFIGS, parameters={'component_name': 'standby'})
+ self.assertEqual(status, alert_component_status.RESULT_STATE_OK)
+ self.assertTrue(messages is not None and len(messages) == 1)
+ self.assertEqual(messages[0], 'HAWQ Standby is running')
+
+ @patch("alert_component_status.is_component_running")
+ def test_hawq_standby_critical(self, is_component_running_mock):
+ """
+ Test that the status is CRITICIAL when HAWQ Standby is down
+ """
+ # Mock calls
+ is_component_running_mock.return_value = False
+
+ [status, messages] = alert_component_status.execute(configurations=WORKING_CONFIGS, parameters={'component_name': 'standby'})
+ self.assertEqual(status, alert_component_status.RESULT_STATE_CRITICAL)
+ self.assertTrue(messages is not None and len(messages) == 1)
+ self.assertEqual(messages[0], 'HAWQ Standby is not running')
+
+ @patch("alert_component_status.is_component_running")
+ def test_hawq_segment_ok(self, is_component_running_mock):
+ """
+ Test that the status is OK when HAWQ Segment is up
+ """
+ # Mock calls
+ is_component_running_mock.return_value = True
+
+ [status, messages] = alert_component_status.execute(configurations=WORKING_CONFIGS, parameters={'component_name': 'segment'})
+ self.assertEqual(status, alert_component_status.RESULT_STATE_OK)
+ self.assertTrue(messages is not None and len(messages) == 1)
+ self.assertEqual(messages[0], 'HAWQ Segment is running')
+
+ @patch("alert_component_status.is_component_running")
+ def test_hawq_segment_critical(self, is_component_running_mock):
+ """
+ Test that the status is CRITICIAL when HAWQ Segment is down
+ """
+ # Mock calls
+ is_component_running_mock.return_value = False
+
+ [status, messages] = alert_component_status.execute(configurations=WORKING_CONFIGS, parameters={'component_name': 'segment'})
+ self.assertEqual(status, alert_component_status.RESULT_STATE_CRITICAL)
+ self.assertTrue(messages is not None and len(messages) == 1)
+ self.assertEqual(messages[0], 'HAWQ Segment is not running')
http://git-wip-us.apache.org/repos/asf/ambari/blob/29115e81/ambari-web/app/views/main/dashboard/widgets.js
----------------------------------------------------------------------
diff --git a/ambari-web/app/views/main/dashboard/widgets.js b/ambari-web/app/views/main/dashboard/widgets.js
index c6723be..04be7bb 100644
--- a/ambari-web/app/views/main/dashboard/widgets.js
+++ b/ambari-web/app/views/main/dashboard/widgets.js
@@ -478,7 +478,7 @@ App.MainDashboardWidgetsView = Em.View.extend(App.UserPref, App.LocalStorage, Ap
visible: [],
hidden: [],
threshold: {1: [80, 90], 2: [85, 95], 3: [90, 95], 4: [80, 90], 5: [1000, 3000], 6: [], 7: [], 8: [], 9: [], 10: [], 11: [], 12: [], 13: [70, 90], 14: [150, 250], 15: [3, 10], 16: [],
- 17: [70, 90], 18: [], 19: [50, 75], 20: [50, 75], 21: [85, 95], 22: [85, 95], 23: [], 24: [80, 90]} // id:[thresh1, thresh2]
+ 17: [70, 90], 18: [], 19: [50, 75], 20: [50, 75], 21: [85, 95], 22: [85, 95], 23: [], 24: [70, 90]} // id:[thresh1, thresh2]
}),
/**