You are viewing a plain text version of this content. The canonical link for it is here.
Posted to issues@ambari.apache.org by "Andrew Onischuk (JIRA)" <ji...@apache.org> on 2018/05/17 07:01:00 UTC
[jira] [Created] (AMBARI-23872) New Alert JSON Is Invalid When Sent
To Agents
Andrew Onischuk created AMBARI-23872:
----------------------------------------
Summary: New Alert JSON Is Invalid When Sent To Agents
Key: AMBARI-23872
URL: https://issues.apache.org/jira/browse/AMBARI-23872
Project: Ambari
Issue Type: Bug
Reporter: Andrew Onischuk
Assignee: Andrew Onischuk
Fix For: 2.7.0
Attachments: AMBARI-23872.patch
STR:
* Set a simple cluster with HDFS
* Attempt to create a new Alert:
POST http://{{ambari-server}}:8080/api/v1/clusters/c1/alert_definitions
{
"AlertDefinition": {
"component_name": "NAMENODE",
"description": "This service-level alert is triggered if the total number of volume failures across the cluster is greater than the configured critical threshold.",
"enabled": true,
"help_url": null,
"ignore_host": false,
"interval": 2,
"label": "NameNode Volume Failures",
"name": "namenode_volume_failures",
"scope": "ANY",
"service_name": "HDFS",
"source": {
"jmx": {
"property_list": [
"Hadoop:service=NameNode,name=FSNamesystemState/VolumeFailuresTotal"
],
"value": "{0}"
},
"reporting": {
"ok": {
"text": "There are {0} volume failures"
},
"warning": {
"text": "There are {0} volume failures",
"value": 1
},
"critical": {
"text": "There are {0} volume failures",
"value": 1
},
"units": "Volume(s)"
},
"type": "METRIC",
"uri": {
"http": "{{hdfs-site/dfs.namenode.http-address}}",
"https": "{{hdfs-site/dfs.namenode.https-address}}",
"https_property": "{{hdfs-site/dfs.http.policy}}",
"https_property_value": "HTTPS_ONLY",
"kerberos_keytab": "{{hdfs-site/dfs.web.authentication.kerberos.keytab}}",
"kerberos_principal": "{{hdfs-site/dfs.web.authentication.kerberos.principal}}",
"default_port": 0,
"connection_timeout": 5,
"high_availability": {
"nameservice": "{{hdfs-site/dfs.internal.nameservices}}",
"alias_key": "{{hdfs-site/dfs.ha.namenodes.{{ha-nameservice}}}}",
"http_pattern": "{{hdfs-site/dfs.namenode.http-address.{{ha-nameservice}}.{{alias}}}}",
"https_pattern": "{{hdfs-site/dfs.namenode.https-address.{{ha-nameservice}}.{{alias}}}}"
}
}
}
}
}
This alert will not be scheduled on the agent correctly:
ERROR 2018-05-16 20:11:55,186 AlertSchedulerHandler.py:307 - [AlertScheduler] Unable to load an invalid alert definition. It will be skipped.
Traceback (most recent call last):
File "/usr/lib/ambari-agent/lib/ambari_agent/AlertSchedulerHandler.py", line 287, in __json_to_callable
alert = MetricAlert(json_definition, source, self.config)
File "/usr/lib/ambari-agent/lib/ambari_agent/alerts/metric_alert.py", line 52, in __init__
self.metric_info = JmxMetric(alert_source_meta['jmx'])
File "/usr/lib/ambari-agent/lib/ambari_agent/alerts/metric_alert.py", line 288, in __init__
self.property_list = jmx_info['property_list']
KeyError: 'property_list'
Looking at `/var/lib/ambari-agent/cache/cluster_cache/alerts.json`, we can see
that `property_list` was changed into `propertyList`.
"name": "namenode_volume_failures",
"componentName": "NAMENODE",
"description": "This service-level alert is triggered if the total number of volume failures across the cluster is greater than the configured critical threshold.",
"interval": 2,
"clusterId": 2,
"label": "NameNode Volume Failures",
"ignore_host": false,
"source": {
"jmx": {
"urlSuffix": "/jmx",
"propertyList": [
"Hadoop:service=NameNode,name=FSNamesystemState/VolumeFailuresTotal"
],
--
This message was sent by Atlassian JIRA
(v7.6.3#76005)