You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ambari.apache.org by al...@apache.org on 2016/12/01 17:45:06 UTC

ambari git commit: AMBARI-18929. Yarn service check fails when either resource manager is down in HA enabled cluster (Weiwei Yang via alejandro)

Repository: ambari
Updated Branches:
  refs/heads/trunk 6100be638 -> 88e0c29e0


AMBARI-18929. Yarn service check fails when either resource manager is down in HA enabled cluster (Weiwei Yang via alejandro)


Project: http://git-wip-us.apache.org/repos/asf/ambari/repo
Commit: http://git-wip-us.apache.org/repos/asf/ambari/commit/88e0c29e
Tree: http://git-wip-us.apache.org/repos/asf/ambari/tree/88e0c29e
Diff: http://git-wip-us.apache.org/repos/asf/ambari/diff/88e0c29e

Branch: refs/heads/trunk
Commit: 88e0c29e0617f05c0ecb72a75e74b2bb3def6bac
Parents: 6100be6
Author: Alejandro Fernandez <af...@hortonworks.com>
Authored: Thu Dec 1 09:45:56 2016 -0800
Committer: Alejandro Fernandez <af...@hortonworks.com>
Committed: Thu Dec 1 09:45:56 2016 -0800

----------------------------------------------------------------------
 .../2.1.0.2.0/package/scripts/service_check.py  |  66 +++++++----
 .../2.0.6/YARN/test_yarn_service_check.py       | 111 ++++++++++---------
 2 files changed, 100 insertions(+), 77 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/ambari/blob/88e0c29e/ambari-server/src/main/resources/common-services/YARN/2.1.0.2.0/package/scripts/service_check.py
----------------------------------------------------------------------
diff --git a/ambari-server/src/main/resources/common-services/YARN/2.1.0.2.0/package/scripts/service_check.py b/ambari-server/src/main/resources/common-services/YARN/2.1.0.2.0/package/scripts/service_check.py
index c0bd480..b934767 100644
--- a/ambari-server/src/main/resources/common-services/YARN/2.1.0.2.0/package/scripts/service_check.py
+++ b/ambari-server/src/main/resources/common-services/YARN/2.1.0.2.0/package/scripts/service_check.py
@@ -130,34 +130,56 @@ class ServiceCheckDefault(ServiceCheck):
       if "application" in item:
         application_name = item
 
-    for rm_webapp_address in params.rm_webapp_addresses_list:
-      info_app_url = params.scheme + "://" + rm_webapp_address + "/ws/v1/cluster/apps/" + application_name
+    # Find out the active RM from RM list
+    # Raise an exception if the active rm cannot be determined
+    active_rm_webapp_address = self.get_active_rm_webapp_address()
+    Logger.info("Active Resource Manager web app address is : " + active_rm_webapp_address);
 
-      get_app_info_cmd = "curl --negotiate -u : -ks --location-trusted --connect-timeout " + CURL_CONNECTION_TIMEOUT + " " + info_app_url
+    # Verify job state from active resource manager via rest api
+    info_app_url = params.scheme + "://" + active_rm_webapp_address + "/ws/v1/cluster/apps/" + application_name
+    get_app_info_cmd = "curl --negotiate -u : -ks --location-trusted --connect-timeout " + CURL_CONNECTION_TIMEOUT + " " + info_app_url
 
-      return_code, stdout, _ = get_user_call_output(get_app_info_cmd,
-                                            user=params.smokeuser,
-                                            path='/usr/sbin:/sbin:/usr/local/bin:/bin:/usr/bin',
-                                            )
+    return_code, stdout, _ = get_user_call_output(get_app_info_cmd,
+                                                  user=params.smokeuser,
+                                                  path='/usr/sbin:/sbin:/usr/local/bin:/bin:/usr/bin',
+                                                  )
 
-      # Handle HDP<2.2.8.1 where RM doesn't do automatic redirection from standby to active
-      if stdout.startswith("This is standby RM. Redirecting to the current active RM:"):
-        Logger.info(format("Skipped checking of {rm_webapp_address} since returned '{stdout}'"))
-        continue
+    try:
+      json_response = json.loads(stdout)
+    except Exception as e:
+      raise Fail(format("Response from YARN API was not a valid JSON. Response: {stdout}"))
 
-      try:
-        json_response = json.loads(stdout)
-      except Exception as e:
-        raise Fail(format("Response from YARN API was not a valid JSON. Response: {stdout}"))
-      
-      if json_response is None or 'app' not in json_response or \
-              'state' not in json_response['app'] or 'finalStatus' not in json_response['app']:
-        raise Fail("Application " + app_url + " returns invalid data.")
-
-      if json_response['app']['state'] != "FINISHED" or json_response['app']['finalStatus'] != "SUCCEEDED":
-        raise Fail("Application " + app_url + " state/status is not valid. Should be FINISHED/SUCCEEDED.")
+    if json_response is None or 'app' not in json_response or \
+            'state' not in json_response['app'] or 'finalStatus' not in json_response['app']:
+      raise Fail("Application " + app_url + " returns invalid data.")
 
+    if json_response['app']['state'] != "FINISHED" or json_response['app']['finalStatus'] != "SUCCEEDED":
+      raise Fail("Application " + app_url + " state/status is not valid. Should be FINISHED/SUCCEEDED.")
 
+  def get_active_rm_webapp_address(self):
+    import params
+    active_rm_webapp_address = None
+    rm_webapp_addresses = params.rm_webapp_addresses_list
+    if rm_webapp_addresses is not None and len(rm_webapp_addresses) > 0:
+      for rm_webapp_address in rm_webapp_addresses:
+        rm_state_url = params.scheme + "://" + rm_webapp_address + "/ws/v1/cluster/info"
+        get_cluster_info_cmd = "curl --negotiate -u : -ks --location-trusted --connect-timeout " + CURL_CONNECTION_TIMEOUT + " " + rm_state_url
+        try:
+          return_code, stdout, _ = get_user_call_output(get_cluster_info_cmd,
+                                                        user=params.smokeuser,
+                                                        path='/usr/sbin:/sbin:/usr/local/bin:/bin:/usr/bin',
+                                                        )
+          json_response = json.loads(stdout)
+          if json_response is not None and 'clusterInfo' in json_response \
+            and json_response['clusterInfo']['haState'] == "ACTIVE":
+              active_rm_webapp_address = rm_webapp_address
+              break
+        except Exception as e:
+          Logger.warning(format("Cluster info is not available from calling {get_cluster_info_cmd}"))
+
+    if active_rm_webapp_address is None:
+      raise Fail('Resource Manager state is not available. Failed to determine the active Resource Manager web application address from {0}'.format(','.join(rm_webapp_addresses)));
+    return active_rm_webapp_address
 
 if __name__ == "__main__":
   ServiceCheck().execute()

http://git-wip-us.apache.org/repos/asf/ambari/blob/88e0c29e/ambari-server/src/test/python/stacks/2.0.6/YARN/test_yarn_service_check.py
----------------------------------------------------------------------
diff --git a/ambari-server/src/test/python/stacks/2.0.6/YARN/test_yarn_service_check.py b/ambari-server/src/test/python/stacks/2.0.6/YARN/test_yarn_service_check.py
index bb671aa..fe7456d 100644
--- a/ambari-server/src/test/python/stacks/2.0.6/YARN/test_yarn_service_check.py
+++ b/ambari-server/src/test/python/stacks/2.0.6/YARN/test_yarn_service_check.py
@@ -22,11 +22,11 @@ import re
 from mock.mock import MagicMock, call, patch
 from stacks.utils.RMFTestCase import *
 
-curl_call = MagicMock(return_value=(0, "{ \"app\": {\"state\": \"FINISHED\",\"finalStatus\": \"SUCCEEDED\"}}",''))
+curl_returns = [(0, "{\"clusterInfo\":{\"id\": \"1471586271500\",\"haState\": \"ACTIVE\"}}",''),
+                         (0, "{\"app\":{\"state\": \"FINISHED\",\"finalStatus\":\"SUCCEEDED\"}}",'')]
 
 @patch("platform.linux_distribution", new = MagicMock(return_value="Linux"))
 @patch("sys.executable", new = '/usr/bin/python2.6')
-@patch("resource_management.libraries.functions.get_user_call_output.get_user_call_output", new = curl_call)
 class TestServiceCheck(RMFTestCase):
   COMMON_SERVICES_PACKAGE_DIR = "YARN/2.1.0.2.0/package"
   STACK_VERSION = "2.0.6"
@@ -38,32 +38,32 @@ class TestServiceCheck(RMFTestCase):
     re_search_mock.return_value = m
     m.group.return_value = "http://c6402.ambari.apache.org:8088/proxy/application_1429699682952_0010/"
 
-    self.executeScript(self.COMMON_SERVICES_PACKAGE_DIR + "/scripts/service_check.py",
-                          classname="ServiceCheck",
-                          command="service_check",
-                          config_file="default.json",
-                          stack_version = self.STACK_VERSION,
-                          target = RMFTestCase.TARGET_COMMON_SERVICES,
-                          checked_call_mocks = [(0, "some test text, appTrackingUrl=http:"
-                                "//c6402.ambari.apache.org:8088/proxy/application_1429885383763_0001/, some test text")]
-    )
-    self.assertResourceCalled('HdfsResource', '/user/ambari-qa',
-                              immutable_paths = self.DEFAULT_IMMUTABLE_PATHS,
-                              security_enabled = False,
-                              hadoop_bin_dir = '/usr/bin',
-                              keytab = UnknownConfigurationMock(),
-                              kinit_path_local = '/usr/bin/kinit',
-                              user = 'hdfs',
-                              dfs_type = '',
-                              mode = 0770,
-                              owner = 'ambari-qa',
-                              action = ['create_on_execute'], hdfs_resource_ignore_file='/var/lib/ambari-agent/data/.hdfs_resource_ignore', hdfs_site=self.getConfig()['configurations']['hdfs-site'], principal_name=UnknownConfigurationMock(), default_fs='hdfs://c6401.ambari.apache.org:8020',
-                              hadoop_conf_dir = '/etc/hadoop/conf',
-                              type = 'directory',
-                              )
-    self.assertCurlCallForwardsCredentialsOnRedirect()
-    self.assertNoMoreResources()
-
+    with patch("resource_management.libraries.functions.get_user_call_output.get_user_call_output", side_effect = curl_returns) as mock_curl:
+        self.executeScript(self.COMMON_SERVICES_PACKAGE_DIR + "/scripts/service_check.py",
+                           classname="ServiceCheck",
+                           command="service_check",
+                           config_file="default.json",
+                           stack_version = self.STACK_VERSION,
+                           target = RMFTestCase.TARGET_COMMON_SERVICES,
+                           checked_call_mocks = [(0, "some test text, appTrackingUrl=http:"
+                                                  "//c6402.ambari.apache.org:8088/proxy/application_1429885383763_0001/, some test text")]
+                           )
+        self.assertResourceCalled('HdfsResource', '/user/ambari-qa',
+                                  immutable_paths = self.DEFAULT_IMMUTABLE_PATHS,
+                                  security_enabled = False,
+                                  hadoop_bin_dir = '/usr/bin',
+                                  keytab = UnknownConfigurationMock(),
+                                  kinit_path_local = '/usr/bin/kinit',
+                                  user = 'hdfs',
+                                  dfs_type = '',
+                                  mode = 0770,
+                                  owner = 'ambari-qa',
+                                  action = ['create_on_execute'], hdfs_resource_ignore_file='/var/lib/ambari-agent/data/.hdfs_resource_ignore', hdfs_site=self.getConfig()['configurations']['hdfs-site'], principal_name=UnknownConfigurationMock(), default_fs='hdfs://c6401.ambari.apache.org:8020',
+                                  hadoop_conf_dir = '/etc/hadoop/conf',
+                                  type = 'directory',
+                                  )
+        self.assertCurlCallForwardsCredentialsOnRedirect(mock_curl_call = mock_curl)
+        self.assertNoMoreResources()
 
   @patch("re.search")
   def test_service_check_secured(self, re_search_mock):
@@ -71,31 +71,32 @@ class TestServiceCheck(RMFTestCase):
     re_search_mock.return_value = m
     m.group.return_value = "http://c6402.ambari.apache.org:8088/proxy/application_1429699682952_0010/"
 
-    self.executeScript(self.COMMON_SERVICES_PACKAGE_DIR + "/scripts/service_check.py",
-                          classname="ServiceCheck",
-                          command="service_check",
-                          config_file="secured.json",
-                          stack_version = self.STACK_VERSION,
-                          target = RMFTestCase.TARGET_COMMON_SERVICES,
-                          checked_call_mocks = [(0, "some test text, appTrackingUrl=http:"
-                               "//c6402.ambari.apache.org:8088/proxy/application_1429885383763_0001/, some test text")]
-    )
-    self.assertResourceCalled('HdfsResource', '/user/ambari-qa',
-                              immutable_paths = self.DEFAULT_IMMUTABLE_PATHS,
-                              security_enabled = True,
-                              hadoop_bin_dir = '/usr/bin',
-                              keytab = '/etc/security/keytabs/hdfs.headless.keytab',
-                              kinit_path_local = '/usr/bin/kinit',
-                              user = 'hdfs',
-                              dfs_type = '',
-                              mode = 0770,
-                              owner = 'ambari-qa',
-                              action = ['create_on_execute'], hdfs_resource_ignore_file='/var/lib/ambari-agent/data/.hdfs_resource_ignore', hdfs_site=self.getConfig()['configurations']['hdfs-site'], principal_name='hdfs', default_fs='hdfs://c6401.ambari.apache.org:8020',
-                              hadoop_conf_dir = '/etc/hadoop/conf',
-                              type = 'directory',
-                              )
-    self.assertCurlCallForwardsCredentialsOnRedirect()
-    self.assertNoMoreResources()
+    with patch("resource_management.libraries.functions.get_user_call_output.get_user_call_output", side_effect = curl_returns) as mock_curl:
+        self.executeScript(self.COMMON_SERVICES_PACKAGE_DIR + "/scripts/service_check.py",
+                           classname="ServiceCheck",
+                           command="service_check",
+                           config_file="secured.json",
+                           stack_version = self.STACK_VERSION,
+                           target = RMFTestCase.TARGET_COMMON_SERVICES,
+                           checked_call_mocks = [(0, "some test text, appTrackingUrl=http:"
+                                                  "//c6402.ambari.apache.org:8088/proxy/application_1429885383763_0001/, some test text")]
+                           )
+        self.assertResourceCalled('HdfsResource', '/user/ambari-qa',
+                                  immutable_paths = self.DEFAULT_IMMUTABLE_PATHS,
+                                  security_enabled = True,
+                                  hadoop_bin_dir = '/usr/bin',
+                                  keytab = '/etc/security/keytabs/hdfs.headless.keytab',
+                                  kinit_path_local = '/usr/bin/kinit',
+                                  user = 'hdfs',
+                                  dfs_type = '',
+                                  mode = 0770,
+                                  owner = 'ambari-qa',
+                                  action = ['create_on_execute'], hdfs_resource_ignore_file='/var/lib/ambari-agent/data/.hdfs_resource_ignore', hdfs_site=self.getConfig()['configurations']['hdfs-site'], principal_name='hdfs', default_fs='hdfs://c6401.ambari.apache.org:8020',
+                                  hadoop_conf_dir = '/etc/hadoop/conf',
+                                  type = 'directory',
+                                  )
+        self.assertCurlCallForwardsCredentialsOnRedirect(mock_curl_call = mock_curl)
+        self.assertNoMoreResources()
 
-  def assertCurlCallForwardsCredentialsOnRedirect(self):
-    self.assertIn('--location-trusted', curl_call.call_args[0][0])
\ No newline at end of file
+  def assertCurlCallForwardsCredentialsOnRedirect(self, mock_curl_call):
+    self.assertIn('--location-trusted', mock_curl_call.call_args[0][0])