You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ambari.apache.org by jo...@apache.org on 2015/08/04 16:40:38 UTC
ambari git commit: AMBARI-12622 - Malformed Alert Data Can Prevent
Alerts From Reporting (jonathanhurley)
Repository: ambari
Updated Branches:
refs/heads/trunk b5bd53a10 -> 5dbb0e1b8
AMBARI-12622 - Malformed Alert Data Can Prevent Alerts From Reporting (jonathanhurley)
Project: http://git-wip-us.apache.org/repos/asf/ambari/repo
Commit: http://git-wip-us.apache.org/repos/asf/ambari/commit/5dbb0e1b
Tree: http://git-wip-us.apache.org/repos/asf/ambari/tree/5dbb0e1b
Diff: http://git-wip-us.apache.org/repos/asf/ambari/diff/5dbb0e1b
Branch: refs/heads/trunk
Commit: 5dbb0e1b87a8720a1e20920190a552193ed90bf4
Parents: b5bd53a
Author: Jonathan Hurley <jh...@hortonworks.com>
Authored: Mon Aug 3 11:20:52 2015 -0400
Committer: Jonathan Hurley <jh...@hortonworks.com>
Committed: Tue Aug 4 10:40:31 2015 -0400
----------------------------------------------------------------------
.../ambari_agent/AlertSchedulerHandler.py | 45 ++++---
.../python/ambari_agent/alerts/base_alert.py | 38 ++++--
.../python/ambari_agent/alerts/metric_alert.py | 2 +-
.../src/test/python/ambari_agent/TestAlerts.py | 134 +++++++++++++++++++
4 files changed, 190 insertions(+), 29 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/ambari/blob/5dbb0e1b/ambari-agent/src/main/python/ambari_agent/AlertSchedulerHandler.py
----------------------------------------------------------------------
diff --git a/ambari-agent/src/main/python/ambari_agent/AlertSchedulerHandler.py b/ambari-agent/src/main/python/ambari_agent/AlertSchedulerHandler.py
index c41397d..cddee57 100644
--- a/ambari-agent/src/main/python/ambari_agent/AlertSchedulerHandler.py
+++ b/ambari-agent/src/main/python/ambari_agent/AlertSchedulerHandler.py
@@ -260,28 +260,33 @@ class AlertSchedulerHandler():
converts the json that represents all aspects of a definition
and makes an object that extends BaseAlert that is used for individual
"""
- source = json_definition['source']
- source_type = source.get('type', '')
-
- if logger.isEnabledFor(logging.DEBUG):
- logger.debug("[AlertScheduler] Creating job type {0} with {1}".format(source_type, str(json_definition)))
-
alert = None
- if source_type == AlertSchedulerHandler.TYPE_METRIC:
- alert = MetricAlert(json_definition, source, self.config)
- elif source_type == AlertSchedulerHandler.TYPE_PORT:
- alert = PortAlert(json_definition, source)
- elif source_type == AlertSchedulerHandler.TYPE_SCRIPT:
- source['stacks_directory'] = self.stacks_dir
- source['common_services_directory'] = self.common_services_dir
- source['host_scripts_directory'] = self.host_scripts_dir
- alert = ScriptAlert(json_definition, source, self.config)
- elif source_type == AlertSchedulerHandler.TYPE_WEB:
- alert = WebAlert(json_definition, source, self.config)
-
- if alert is not None:
- alert.set_cluster(clusterName, hostName)
+ try:
+ source = json_definition['source']
+ source_type = source.get('type', '')
+
+ if logger.isEnabledFor(logging.DEBUG):
+ logger.debug("[AlertScheduler] Creating job type {0} with {1}".format(source_type, str(json_definition)))
+
+
+ if source_type == AlertSchedulerHandler.TYPE_METRIC:
+ alert = MetricAlert(json_definition, source, self.config)
+ elif source_type == AlertSchedulerHandler.TYPE_PORT:
+ alert = PortAlert(json_definition, source)
+ elif source_type == AlertSchedulerHandler.TYPE_SCRIPT:
+ source['stacks_directory'] = self.stacks_dir
+ source['common_services_directory'] = self.common_services_dir
+ source['host_scripts_directory'] = self.host_scripts_dir
+ alert = ScriptAlert(json_definition, source, self.config)
+ elif source_type == AlertSchedulerHandler.TYPE_WEB:
+ alert = WebAlert(json_definition, source, self.config)
+
+ if alert is not None:
+ alert.set_cluster(clusterName, hostName)
+
+ except Exception,exception:
+ logger.exception("[AlertScheduler] Unable to load an invalid alert definition. It will be skipped.")
return alert
http://git-wip-us.apache.org/repos/asf/ambari/blob/5dbb0e1b/ambari-agent/src/main/python/ambari_agent/alerts/base_alert.py
----------------------------------------------------------------------
diff --git a/ambari-agent/src/main/python/ambari_agent/alerts/base_alert.py b/ambari-agent/src/main/python/ambari_agent/alerts/base_alert.py
index 9151796..fd6b03c 100644
--- a/ambari-agent/src/main/python/ambari_agent/alerts/base_alert.py
+++ b/ambari-agent/src/main/python/ambari_agent/alerts/base_alert.py
@@ -145,23 +145,45 @@ class BaseAlert(object):
if logger.isEnabledFor(logging.DEBUG):
logger.debug("[Alert][{0}] result = {1}".format(self.get_name(), str(res)))
-
+
data = {}
data['name'] = self._get_alert_meta_value_safely('name')
data['label'] = self._get_alert_meta_value_safely('label')
- data['state'] = res[0]
- data['text'] = res_base_text.format(*res[1])
+ data['uuid'] = self._get_alert_meta_value_safely('uuid')
data['cluster'] = self.cluster_name
data['service'] = self._get_alert_meta_value_safely('serviceName')
data['component'] = self._get_alert_meta_value_safely('componentName')
data['timestamp'] = long(time.time() * 1000)
- data['uuid'] = self._get_alert_meta_value_safely('uuid')
data['enabled'] = self._get_alert_meta_value_safely('enabled')
- if logger.isEnabledFor(logging.DEBUG):
- logger.debug("[Alert][{0}] text = {1}".format(self.get_name(), data['text']))
-
- self.collector.put(self.cluster_name, data)
+ try:
+ data['state'] = res[0]
+
+ # * is the splat operator, which flattens a collection into positional arguments
+ # flatten the array and then try formatting it
+ try:
+ data['text'] = res_base_text.format(*res[1])
+ except ValueError, value_error:
+ logger.warn("[Alert][{0}] - {1}".format(self.get_name(), str(value_error)))
+
+ # if there is a ValueError, it's probably because the text doesn't match the type of
+ # positional arguemtns (ie {0:d} with a float)
+ res_base_text = res_base_text.replace("d}", "s}")
+ data_as_strings = map(str, res[1])
+ data['text'] = res_base_text.format(*data_as_strings)
+
+ if logger.isEnabledFor(logging.DEBUG):
+ logger.debug("[Alert][{0}] text = {1}".format(self.get_name(), data['text']))
+ except Exception, exception:
+ logger.exception("[Alert][{0}] - The alert's data is not properly formatted".format(self.get_name()))
+
+ # if there's a problem with getting the data returned from collect() then mark this
+ # alert as UNKNOWN
+ data['state'] = self.RESULT_UNKNOWN
+ data['text'] = "There is a problem with the alert definition: {0}".format(str(exception))
+ finally:
+ # put the alert into the collector so it can be collected on the next run
+ self.collector.put(self.cluster_name, data)
def _get_configuration_value(self, key):
http://git-wip-us.apache.org/repos/asf/ambari/blob/5dbb0e1b/ambari-agent/src/main/python/ambari_agent/alerts/metric_alert.py
----------------------------------------------------------------------
diff --git a/ambari-agent/src/main/python/ambari_agent/alerts/metric_alert.py b/ambari-agent/src/main/python/ambari_agent/alerts/metric_alert.py
index 6bcdacd..aa4ad75 100644
--- a/ambari-agent/src/main/python/ambari_agent/alerts/metric_alert.py
+++ b/ambari-agent/src/main/python/ambari_agent/alerts/metric_alert.py
@@ -96,7 +96,7 @@ class MetricAlert(BaseAlert):
collect_result = self.RESULT_UNKNOWN
value_list.append('HTTP {0} response (metrics unavailable)'.format(str(http_code)))
elif not jmx_property_values and http_code not in [200, 307]:
- raise Exception("[Alert][{0}] Unable to get json from jmx response!".format(self.get_name()))
+ raise Exception("[Alert][{0}] Unable to extract JSON from JMX response".format(self.get_name()))
else:
value_list.extend(jmx_property_values)
check_value = self.metric_info.calculate(value_list)
http://git-wip-us.apache.org/repos/asf/ambari/blob/5dbb0e1b/ambari-agent/src/test/python/ambari_agent/TestAlerts.py
----------------------------------------------------------------------
diff --git a/ambari-agent/src/test/python/ambari_agent/TestAlerts.py b/ambari-agent/src/test/python/ambari_agent/TestAlerts.py
index 7b64445..dab717d 100644
--- a/ambari-agent/src/test/python/ambari_agent/TestAlerts.py
+++ b/ambari-agent/src/test/python/ambari_agent/TestAlerts.py
@@ -1037,6 +1037,82 @@ class TestAlerts(TestCase):
self.assertEquals('(Unit Tests) OK: 10 5 2.0', alerts[0]['text'])
+
+ @patch.object(socket.socket,"connect")
+ def test_alert_definition_value_error_conversion(self, socket_connect_mock):
+ """
+ Tests that an alert definition with text that doesn't match the type of positional arguments
+ can recover and retry the ValueError.
+ :param socket_connect_mock:
+ :return:
+ """
+ definition_json = self._get_alert_definition_with_value_error_text()
+
+ configuration = {'hdfs-site' :
+ { 'my-key': 'c6401.ambari.apache.org:2181'}
+ }
+
+ collector = AlertCollector()
+ cluster_configuration = self.__get_cluster_configuration()
+ self.__update_cluster_configuration(cluster_configuration, configuration)
+
+ alert = PortAlert(definition_json, definition_json['source'])
+ alert.set_helpers(collector, cluster_configuration)
+ alert.set_cluster("c1", "c6402.ambari.apache.org")
+
+ # use a URI that has commas to verify that we properly parse it
+ alert.set_helpers(collector, cluster_configuration)
+ alert.set_cluster("c1", "c6401.ambari.apache.org")
+ self.assertEquals(6, alert.interval())
+
+ # the collect should catch the invalid text in the definition
+ # ValueError: Unknown format code 'd' for object of type 'float'
+ alert.collect()
+
+ alerts = collector.alerts()
+ self.assertEquals(0, len(collector.alerts()))
+
+ self.assertEquals('OK', alerts[0]['state'])
+ self.assertTrue('(Unit Tests) TCP OK' in alerts[0]['text'])
+
+
+ @patch.object(socket.socket,"connect")
+ def test_alert_definition_too_many_positional_arguments(self, socket_connect_mock):
+ """
+ Tests that an alert definition with too many arguments produces an alert to collect after the
+ exceptioin is raised.
+ :param socket_connect_mock:
+ :return:
+ """
+ definition_json = self._get_alert_definition_with_too_many_positional_arguments()
+
+ configuration = {'hdfs-site' :
+ { 'my-key': 'c6401.ambari.apache.org:2181'}
+ }
+
+ collector = AlertCollector()
+ cluster_configuration = self.__get_cluster_configuration()
+ self.__update_cluster_configuration(cluster_configuration, configuration)
+
+ alert = PortAlert(definition_json, definition_json['source'])
+ alert.set_helpers(collector, cluster_configuration)
+ alert.set_cluster("c1", "c6402.ambari.apache.org")
+
+ # use a URI that has commas to verify that we properly parse it
+ alert.set_helpers(collector, cluster_configuration)
+ alert.set_cluster("c1", "c6401.ambari.apache.org")
+ self.assertEquals(6, alert.interval())
+
+ # the collect should catch the invalid text in the definition
+ # ValueError: Unknown format code 'd' for object of type 'float'
+ alert.collect()
+
+ alerts = collector.alerts()
+ self.assertEquals(0, len(collector.alerts()))
+
+ self.assertEquals('UNKNOWN', alerts[0]['state'])
+ self.assertTrue('There is a problem with the alert definition' in alerts[0]['text'])
+
def __get_cluster_configuration(self):
"""
Gets an instance of the cluster cache where the file read and write
@@ -1251,6 +1327,64 @@ class TestAlerts(TestCase):
}
}
+ def _get_alert_definition_with_value_error_text(self):
+ return { "name": "namenode_process",
+ "service": "HDFS",
+ "component": "NAMENODE",
+ "label": "NameNode process",
+ "interval": 6,
+ "scope": "host",
+ "enabled": True,
+ "uuid": "c1f73191-4481-4435-8dae-fd380e4c0be1",
+ "source": {
+ "type": "PORT",
+ "uri": "{{hdfs-site/my-key}}",
+ "default_port": 50070,
+ "reporting": {
+ "ok": {
+ "text": "(Unit Tests) TCP OK {0:.4d}"
+ },
+ "warning": {
+ "text": "(Unit Tests) TCP Warning {0:.4d}",
+ "value": 1.5
+ },
+ "critical": {
+ "text": "(Unit Tests) TCP Critical {0:.4d}",
+ "value": 5.0
+ }
+ }
+ }
+ }
+
+ def _get_alert_definition_with_too_many_positional_arguments(self):
+ return { "name": "namenode_process",
+ "service": "HDFS",
+ "component": "NAMENODE",
+ "label": "NameNode process",
+ "interval": 6,
+ "scope": "host",
+ "enabled": True,
+ "uuid": "c1f73191-4481-4435-8dae-fd380e4c0be1",
+ "source": {
+ "type": "PORT",
+ "uri": "{{hdfs-site/my-key}}",
+ "default_port": 50070,
+ "reporting": {
+ "ok": {
+ "text": "Bad Syntax Going To Mess You Up {0:.4d} {1} {2} {3} {4}"
+ },
+ "warning": {
+ "text": "Bad Syntax Going To Mess You Up {0:.4d} {1} {2} {3} {4}",
+ "value": 1.5
+ },
+ "critical": {
+ "text": "Bad Syntax Going To Mess You Up {0:.4d} {1} {2} {3} {4}",
+ "value": 5.0
+ }
+ }
+ }
+ }
+
class MockAlert(BaseAlert):
"""
Mock class for testing