You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ambari.apache.org by ad...@apache.org on 2018/04/10 08:29:15 UTC

[ambari] branch trunk updated: AMBARI-23445. Refactor metrics_alert to support multiple nameservices (#904)

This is an automated email from the ASF dual-hosted git repository.

adoroszlai pushed a commit to branch trunk
in repository https://gitbox.apache.org/repos/asf/ambari.git


The following commit(s) were added to refs/heads/trunk by this push:
     new 09c959d  AMBARI-23445. Refactor metrics_alert to support multiple nameservices (#904)
09c959d is described below

commit 09c959dba7622d0428931e87262f35a580fa3c14
Author: majorendre <34...@users.noreply.github.com>
AuthorDate: Tue Apr 10 10:29:12 2018 +0200

    AMBARI-23445. Refactor metrics_alert to support multiple nameservices (#904)
---
 .../libraries/functions/namenode_ha_utils.py       |  30 +++++-
 .../package/alerts/alert_metrics_deviation.py      |  20 +++-
 .../2.0.6/HDFS/test_alert_metrics_deviation.py     | 112 +++++++++++++++++++--
 3 files changed, 150 insertions(+), 12 deletions(-)

diff --git a/ambari-common/src/main/python/resource_management/libraries/functions/namenode_ha_utils.py b/ambari-common/src/main/python/resource_management/libraries/functions/namenode_ha_utils.py
index 4d51e69..0d2cd3f 100644
--- a/ambari-common/src/main/python/resource_management/libraries/functions/namenode_ha_utils.py
+++ b/ambari-common/src/main/python/resource_management/libraries/functions/namenode_ha_utils.py
@@ -27,7 +27,8 @@ from resource_management.libraries.functions.hdfs_utils import is_https_enabled_
 
 
 __all__ = ["get_namenode_states", "get_active_namenode",
-           "get_property_for_active_namenodes", "get_property_for_active_namenode", "get_nameservices"]
+           "get_property_for_active_namenodes", "get_property_for_active_namenode", "get_nameservices",
+           "get_name_service_by_hostname"]
 
 HDFS_NN_STATE_ACTIVE = 'active'
 HDFS_NN_STATE_STANDBY = 'standby'
@@ -355,3 +356,30 @@ def get_initial_active_namenodes(hadoop_env):
     return frozenset(setting.split(','))
 
   return frozenset()
+
+
+def get_name_service_by_hostname(hdfs_site, host_name):
+  """
+   Finds the name service which the name node belongs to in an HA or federated setup.
+  :param hdfs_site: the hdfs config
+  :param host_name: the host name of the name node, can be None if there is only 1 name service
+  :return: the name service
+  """
+  #there has to be a name service - we are in HA at least
+  name_services_string = hdfs_site['dfs.internal.nameservices']
+  if not name_services_string:
+    raise Fail('Not a HA setup')
+  name_services = name_services_string.split(',')
+  if len(name_services) == 1:
+    return name_services[0]
+
+  if not host_name:
+    raise ValueError('Host name required when using namenode federation')
+
+  for ns in name_services:
+    ha_name_nodes = hdfs_site['dfs.ha.namenodes.{0}'.format(ns)].split(',')
+    for nn in ha_name_nodes:
+      nn_rpc_port = hdfs_site['dfs.namenode.rpc-address.{0}.{1}'.format(ns,nn)]
+      nn_rpc = nn_rpc_port.split(':')[0]
+      if nn_rpc == host_name:
+        return ns
diff --git a/ambari-server/src/main/resources/common-services/HDFS/2.1.0.2.0/package/alerts/alert_metrics_deviation.py b/ambari-server/src/main/resources/common-services/HDFS/2.1.0.2.0/package/alerts/alert_metrics_deviation.py
index 3f8eb2e..fe2e9fc 100644
--- a/ambari-server/src/main/resources/common-services/HDFS/2.1.0.2.0/package/alerts/alert_metrics_deviation.py
+++ b/ambari-server/src/main/resources/common-services/HDFS/2.1.0.2.0/package/alerts/alert_metrics_deviation.py
@@ -33,6 +33,7 @@ from ambari_commons.aggregate_functions import sample_standard_deviation, mean
 from resource_management.libraries.functions.curl_krb_request import curl_krb_request
 from resource_management.libraries.functions.curl_krb_request import DEFAULT_KERBEROS_KINIT_TIMER_MS
 from resource_management.libraries.functions.curl_krb_request import KERBEROS_KINIT_TIMER_PARAMETER
+from resource_management.libraries.functions.namenode_ha_utils import get_name_service_by_hostname
 from ambari_commons.ambari_metrics_helper import select_metric_collector_for_sink
 from ambari_agent.AmbariConfig import AmbariConfig
 
@@ -233,7 +234,7 @@ def execute(configurations={}, parameters={}, host_name=None):
 
     kinit_timer_ms = parameters.get(KERBEROS_KINIT_TIMER_PARAMETER, DEFAULT_KERBEROS_KINIT_TIMER_MS)
 
-    name_service = configurations[NAMESERVICE_KEY]
+    name_service = get_name_service_by_hostname(hdfs_site, host_name)
 
     # look for dfs.ha.namenodes.foo
     nn_unique_ids_key = 'dfs.ha.namenodes.' + name_service
@@ -276,8 +277,7 @@ def execute(configurations={}, parameters={}, host_name=None):
 
             state = _get_ha_state_from_json(state_response)
           else:
-            state_response = get_jmx(jmx_uri, connection_timeout)
-            state = _get_ha_state_from_json(state_response)
+            state = _get_state_from_jmx(jmx_uri, connection_timeout)
 
           if state == HDFS_NN_STATE_ACTIVE:
             active_namenodes.append(namenode)
@@ -319,13 +319,14 @@ def execute(configurations={}, parameters={}, host_name=None):
                           metric_truststore_ca_certs)
   metric_collector_https_enabled = str(configurations[AMS_HTTP_POLICY]) == "HTTPS_ONLY"
 
+  _ssl_version = _get_ssl_version()
   try:
     conn = network.get_http_connection(
       collector_host,
       int(collector_port),
       metric_collector_https_enabled,
       ca_certs,
-      ssl_version=AmbariConfig.get_resolved_config().get_force_https_protocol_value()
+      ssl_version=_ssl_version
     )
     conn.request("GET", AMS_METRICS_GET_URL % encoded_get_metrics_parameters)
     response = conn.getresponse()
@@ -427,6 +428,16 @@ def valid_collector_webapp_address(webapp_address):
   return False
 
 
+def _get_state_from_jmx(jmx_uri, connection_timeout):
+  state_response = get_jmx(jmx_uri, connection_timeout)
+  state = _get_ha_state_from_json(state_response)
+  return state
+
+
+def _get_ssl_version():
+  return AmbariConfig.get_resolved_config().get_force_https_protocol_value()
+
+
 def get_jmx(query, connection_timeout):
   response = None
 
@@ -484,3 +495,4 @@ def _coerce_to_integer(value):
     return int(value)
   except ValueError:
     return int(float(value))
+
diff --git a/ambari-server/src/test/python/stacks/2.0.6/HDFS/test_alert_metrics_deviation.py b/ambari-server/src/test/python/stacks/2.0.6/HDFS/test_alert_metrics_deviation.py
index 09e8886..22f6d2b 100644
--- a/ambari-server/src/test/python/stacks/2.0.6/HDFS/test_alert_metrics_deviation.py
+++ b/ambari-server/src/test/python/stacks/2.0.6/HDFS/test_alert_metrics_deviation.py
@@ -39,6 +39,14 @@ RESULT_STATE_CRITICAL = "CRITICAL"
 RESULT_STATE_UNKNOWN = "UNKNOWN"
 RESULT_STATE_SKIPPED = "SKIPPED"
 
+def dummy_get_state_from_jmx(jmx_uri, connection_timeout):
+  if 'c6401' in jmx_uri or 'c6403' in jmx_uri:
+    return 'active'
+  return 'standby'
+
+def dummy_get_ssl_version():
+  return 3
+
 class TestAlertMetricsDeviation(RMFTestCase):
 
   def setUp(self):
@@ -127,7 +135,94 @@ class TestAlertMetricsDeviation(RMFTestCase):
     }
     self.make_alert_tests(configs, conn_mock)
 
-  def make_alert_tests(self, configs, conn_mock):
+  @patch.object(ambari_metrics_helper, 'get_metric_collectors_from_properties_file', new = MagicMock(return_value='c6401.ambari.apache.org:6188'))
+  @patch("httplib.HTTPConnection")
+  def test_alert_ha(self, conn_mock):
+    configs = {
+      '{{hdfs-site/dfs.namenode.https-address}}': 'c6401.ambari.apache.org:50470',
+      '{{hdfs-site/dfs.http.policy}}': 'HTTP_ONLY',
+      '{{ams-site/timeline.metrics.service.webapp.address}}': '0.0.0.0:6188',
+      '{{ams-site/timeline.metrics.service.http.policy}}' : 'HTTP_ONLY',
+      '{{hdfs-site/dfs.namenode.http-address}}': 'c6401.ambari.apache.org:50070',
+      '{{cluster-env/security_enabled}}': 'false',
+      '{{cluster-env/smokeuser}}': 'ambari-qa',
+      '{{hdfs-site/dfs.internal.nameservices}}': 'ns',
+      '{{hdfs-site}}': {
+        'dfs.internal.nameservices': 'ns',
+        'dfs.nameservices': 'ns',
+        'dfs.ha.namenodes.ns': 'nn1,nn2',
+        'dfs.datanode.address.ns.nn1': '0.0.0.0:50010',
+        'dfs.namenode.rpc-address.ns.nn1': 'c6401.ambari.apache.org:8020',
+        'dfs.namenode.https-address.ns.nn1': 'c6401.ambari.apache.org:50470',
+        'dfs.namenode.http-address.ns.nn1': 'c6401.ambari.apache.org:50070',
+        'dfs.datanode.https.address.ns.nn1': '0.0.0.0:50475',
+        'dfs.namenode.secondary.http-address.ns.nn1': 'c6401.ambari.apache.org:50090',
+        'dfs.datanode.http.address.ns.nn1': '0.0.0.0:50075',
+        'dfs.namenode.rpc-address.ns.nn2': 'c6402.ambari.apache.org:8020',
+        'dfs.namenode.https-address.ns.nn2': 'c6402.ambari.apache.org:50470',
+        'dfs.namenode.http-address.ns.nn2': 'c6402.ambari.apache.org:50070',
+        'dfs.datanode.https.address.ns.nn2': 'c6402.ambari.apache.org:50475',
+        'dfs.namenode.secondary.http-address.ns.nn2': 'c6402.ambari.apache.org:50090',
+        'dfs.datanode.http.address.ns.nn2': 'c6402.ambari.apache.org:50075',
+        'dfs.http.policy': 'HTTP_ONLY',
+        'dfs.journalnode.https-address': '0.0.0.0:8481',
+        'dfs.journalnode.http-address': '0.0.0.0:8480',
+      }
+    }
+    self.make_alert_tests(configs, conn_mock)
+
+  @patch.object(ambari_metrics_helper, 'get_metric_collectors_from_properties_file', new = MagicMock(return_value='c6401.ambari.apache.org:6188'))
+  @patch("httplib.HTTPConnection")
+  def test_alert_federation(self, conn_mock):
+    configs = {
+      '{{hdfs-site/dfs.namenode.https-address}}': 'c6401.ambari.apache.org:50470',
+      '{{hdfs-site/dfs.http.policy}}': 'HTTP_ONLY',
+      '{{ams-site/timeline.metrics.service.webapp.address}}': '0.0.0.0:6188',
+      '{{ams-site/timeline.metrics.service.http.policy}}' : 'HTTP_ONLY',
+      '{{hdfs-site/dfs.namenode.http-address}}': 'c6401.ambari.apache.org:50070',
+      '{{cluster-env/security_enabled}}': 'false',
+      '{{cluster-env/smokeuser}}': 'ambari-qa',
+      '{{hdfs-site/dfs.internal.nameservices}}': 'ns1,ns2',
+      '{{hdfs-site}}': {
+        'dfs.internal.nameservices': 'ns1,ns2',
+        'dfs.nameservices': 'ns1,ns2',
+        'dfs.ha.namenodes.ns1': 'nn1,nn2',
+        'dfs.datanode.address.ns1.nn1': 'c6401.ambari.apache.org:50010',
+        'dfs.namenode.rpc-address.ns1.nn1': 'c6401.ambari.apache.org:8020',
+        'dfs.namenode.https-address.ns1.nn1': 'c6401.ambari.apache.org:50470',
+        'dfs.namenode.http-address.ns1.nn1': 'c6401.ambari.apache.org:50070',
+        'dfs.datanode.https.address.ns1.nn1': 'c6401.ambari.apache.org:50475',
+        'dfs.namenode.secondary.http-address.ns1.nn1': 'c6401.ambari.apache.org:50090',
+        'dfs.datanode.http.address.ns1.nn1': '0.0.0.0:50075',
+        'dfs.namenode.rpc-address.ns1.nn2': 'c6402.ambari.apache.org:8020',
+        'dfs.namenode.https-address.ns1.nn2': 'c6402.ambari.apache.org:50470',
+        'dfs.namenode.http-address.ns1.nn2': 'c6402.ambari.apache.org:50070',
+        'dfs.datanode.https.address.ns1.nn2': 'c6402.ambari.apache.org:50475',
+        'dfs.namenode.secondary.http-address.ns1.nn2': 'c6402.ambari.apache.org:50090',
+        'dfs.datanode.http.address.ns1.nn2': 'c6402.ambari.apache.org:50075',
+
+        'dfs.ha.namenodes.ns2': 'nn3,nn4',
+        'dfs.namenode.rpc-address.ns2.nn3': 'c6403.ambari.apache.org:8020',
+        'dfs.namenode.https-address.ns2.nn3': 'c6403.ambari.apache.org:50470',
+        'dfs.namenode.http-address.ns2.nn3': 'c6403.ambari.apache.org:50070',
+        'dfs.datanode.https.address.ns2.nn3': 'c6403.ambari.apache.org:50475',
+        'dfs.namenode.secondary.http-address.ns2.nn3': 'c6403.ambari.apache.org:50090',
+        'dfs.datanode.http.address.ns2.nn3': '0.0.0.0:50075',
+        'dfs.namenode.rpc-address.ns2.nn4': 'c6404.ambari.apache.org:8020',
+        'dfs.namenode.https-address.ns2.nn4': 'c6404.ambari.apache.org:50470',
+        'dfs.namenode.http-address.ns2.nn4': 'c6404.ambari.apache.org:50070',
+        'dfs.datanode.https.address.ns2.nn4': 'c6404.ambari.apache.org:50475',
+        'dfs.namenode.secondary.http-address.ns2.nn4': 'c6404.ambari.apache.org:50090',
+        'dfs.datanode.http.address.ns2.nn4': 'c6404.ambari.apache.org:50075',
+        'dfs.http.policy': 'HTTP_ONLY',
+        'dfs.journalnode.https-address': '0.0.0.0:8481',
+        'dfs.journalnode.http-address': '0.0.0.0:8480',
+      }
+    }
+    self.make_alert_tests(configs, conn_mock, 'c6401.ambari.apache.org')
+
+
+  def make_alert_tests(self, configs, conn_mock, _host_name = None):
     connection = MagicMock()
     response = MagicMock()
     response.status = 200
@@ -135,29 +230,32 @@ class TestAlertMetricsDeviation(RMFTestCase):
     conn_mock.return_value = connection
     response.read.return_value = '{"metrics":[{"metricname":"metric1","metrics":{"1459966360838":1,"1459966370838":3}}]}'
 
+    alert._get_state_from_jmx = dummy_get_state_from_jmx
+    alert._get_ssl_version = dummy_get_ssl_version
+
     # OK, but no datapoints above the minimum threshold
-    [status, messages] = alert.execute(configurations=configs, parameters=parameters)
+    [status, messages] = alert.execute(configurations=configs, parameters=parameters, host_name=_host_name)
     self.assertEqual(status, RESULT_STATE_OK)
     self.assertTrue(messages is not None and len(messages) == 1)
     self.assertEquals('There were no data points above the minimum threshold of 30 seconds',messages[0])
 
     # Unable to calculate the standard deviation for 1 data point
     response.read.return_value = '{"metrics":[{"metricname":"metric1","metrics":{"1459966360838":40000}}]}'
-    [status, messages] = alert.execute(configurations=configs, parameters=parameters)
+    [status, messages] = alert.execute(configurations=configs, parameters=parameters, host_name=_host_name)
     self.assertEqual(status, RESULT_STATE_SKIPPED)
     self.assertTrue(messages is not None and len(messages) == 1)
     self.assertEquals('There are not enough data points to calculate the standard deviation (1 sampled)', messages[0])
 
     # OK
     response.read.return_value = '{"metrics":[{"metricname":"metric1","metrics":{"1459966360838":40000,"1459966370838":50000}}]}'
-    [status, messages] = alert.execute(configurations=configs, parameters=parameters)
+    [status, messages] = alert.execute(configurations=configs, parameters=parameters, host_name=_host_name)
     self.assertEqual(status, RESULT_STATE_OK)
     self.assertTrue(messages is not None and len(messages) == 1)
     self.assertEquals('The variance for this alert is 7071ms which is within 100% of the 45000ms average (45000ms is the limit)', messages[0])
 
     # Warning
     response.read.return_value = '{"metrics":[{"metricname":"metric1","metrics":{"1459966360838":40000,"1459966370838":1000000}}]}'
-    [status, messages] = alert.execute(configurations=configs, parameters=parameters)
+    [status, messages] = alert.execute(configurations=configs, parameters=parameters, host_name=_host_name)
     self.assertEqual(status, RESULT_STATE_WARNING)
     self.assertTrue(messages is not None and len(messages) == 1)
     self.assertEquals('The variance for this alert is 678823ms which is 131% of the 520000ms average (520000ms is the limit)', messages[0])
@@ -165,14 +263,14 @@ class TestAlertMetricsDeviation(RMFTestCase):
     # HTTP request to AMS failed
     response.read.return_value = ''
     response.status = 501
-    [status, messages] = alert.execute(configurations=configs, parameters=parameters)
+    [status, messages] = alert.execute(configurations=configs, parameters=parameters, host_name=_host_name)
     self.assertEqual(status, RESULT_STATE_UNKNOWN)
     self.assertTrue(messages is not None and len(messages) == 1)
     self.assertEquals('Unable to retrieve metrics from the Ambari Metrics service.', messages[0])
 
     # Unable to connect to AMS
     conn_mock.side_effect = Exception('Unable to connect to AMS')
-    [status, messages] = alert.execute(configurations=configs, parameters=parameters)
+    [status, messages] = alert.execute(configurations=configs, parameters=parameters, host_name=_host_name)
     self.assertEqual(status, RESULT_STATE_UNKNOWN)
     self.assertTrue(messages is not None and len(messages) == 1)
     self.assertEquals('Unable to retrieve metrics from the Ambari Metrics service.', messages[0])

-- 
To stop receiving notification emails like this one, please contact
adoroszlai@apache.org.