You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ambari.apache.org by al...@apache.org on 2016/12/01 17:45:06 UTC
ambari git commit: AMBARI-18929. Yarn service check fails when either
resource manager is down in HA enabled cluster (Weiwei Yang via alejandro)
Repository: ambari
Updated Branches:
refs/heads/trunk 6100be638 -> 88e0c29e0
AMBARI-18929. Yarn service check fails when either resource manager is down in HA enabled cluster (Weiwei Yang via alejandro)
Project: http://git-wip-us.apache.org/repos/asf/ambari/repo
Commit: http://git-wip-us.apache.org/repos/asf/ambari/commit/88e0c29e
Tree: http://git-wip-us.apache.org/repos/asf/ambari/tree/88e0c29e
Diff: http://git-wip-us.apache.org/repos/asf/ambari/diff/88e0c29e
Branch: refs/heads/trunk
Commit: 88e0c29e0617f05c0ecb72a75e74b2bb3def6bac
Parents: 6100be6
Author: Alejandro Fernandez <af...@hortonworks.com>
Authored: Thu Dec 1 09:45:56 2016 -0800
Committer: Alejandro Fernandez <af...@hortonworks.com>
Committed: Thu Dec 1 09:45:56 2016 -0800
----------------------------------------------------------------------
.../2.1.0.2.0/package/scripts/service_check.py | 66 +++++++----
.../2.0.6/YARN/test_yarn_service_check.py | 111 ++++++++++---------
2 files changed, 100 insertions(+), 77 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/ambari/blob/88e0c29e/ambari-server/src/main/resources/common-services/YARN/2.1.0.2.0/package/scripts/service_check.py
----------------------------------------------------------------------
diff --git a/ambari-server/src/main/resources/common-services/YARN/2.1.0.2.0/package/scripts/service_check.py b/ambari-server/src/main/resources/common-services/YARN/2.1.0.2.0/package/scripts/service_check.py
index c0bd480..b934767 100644
--- a/ambari-server/src/main/resources/common-services/YARN/2.1.0.2.0/package/scripts/service_check.py
+++ b/ambari-server/src/main/resources/common-services/YARN/2.1.0.2.0/package/scripts/service_check.py
@@ -130,34 +130,56 @@ class ServiceCheckDefault(ServiceCheck):
if "application" in item:
application_name = item
- for rm_webapp_address in params.rm_webapp_addresses_list:
- info_app_url = params.scheme + "://" + rm_webapp_address + "/ws/v1/cluster/apps/" + application_name
+ # Find out the active RM from RM list
+ # Raise an exception if the active rm cannot be determined
+ active_rm_webapp_address = self.get_active_rm_webapp_address()
+ Logger.info("Active Resource Manager web app address is : " + active_rm_webapp_address);
- get_app_info_cmd = "curl --negotiate -u : -ks --location-trusted --connect-timeout " + CURL_CONNECTION_TIMEOUT + " " + info_app_url
+ # Verify job state from active resource manager via rest api
+ info_app_url = params.scheme + "://" + active_rm_webapp_address + "/ws/v1/cluster/apps/" + application_name
+ get_app_info_cmd = "curl --negotiate -u : -ks --location-trusted --connect-timeout " + CURL_CONNECTION_TIMEOUT + " " + info_app_url
- return_code, stdout, _ = get_user_call_output(get_app_info_cmd,
- user=params.smokeuser,
- path='/usr/sbin:/sbin:/usr/local/bin:/bin:/usr/bin',
- )
+ return_code, stdout, _ = get_user_call_output(get_app_info_cmd,
+ user=params.smokeuser,
+ path='/usr/sbin:/sbin:/usr/local/bin:/bin:/usr/bin',
+ )
- # Handle HDP<2.2.8.1 where RM doesn't do automatic redirection from standby to active
- if stdout.startswith("This is standby RM. Redirecting to the current active RM:"):
- Logger.info(format("Skipped checking of {rm_webapp_address} since returned '{stdout}'"))
- continue
+ try:
+ json_response = json.loads(stdout)
+ except Exception as e:
+ raise Fail(format("Response from YARN API was not a valid JSON. Response: {stdout}"))
- try:
- json_response = json.loads(stdout)
- except Exception as e:
- raise Fail(format("Response from YARN API was not a valid JSON. Response: {stdout}"))
-
- if json_response is None or 'app' not in json_response or \
- 'state' not in json_response['app'] or 'finalStatus' not in json_response['app']:
- raise Fail("Application " + app_url + " returns invalid data.")
-
- if json_response['app']['state'] != "FINISHED" or json_response['app']['finalStatus'] != "SUCCEEDED":
- raise Fail("Application " + app_url + " state/status is not valid. Should be FINISHED/SUCCEEDED.")
+ if json_response is None or 'app' not in json_response or \
+ 'state' not in json_response['app'] or 'finalStatus' not in json_response['app']:
+ raise Fail("Application " + app_url + " returns invalid data.")
+ if json_response['app']['state'] != "FINISHED" or json_response['app']['finalStatus'] != "SUCCEEDED":
+ raise Fail("Application " + app_url + " state/status is not valid. Should be FINISHED/SUCCEEDED.")
+ def get_active_rm_webapp_address(self):
+ import params
+ active_rm_webapp_address = None
+ rm_webapp_addresses = params.rm_webapp_addresses_list
+ if rm_webapp_addresses is not None and len(rm_webapp_addresses) > 0:
+ for rm_webapp_address in rm_webapp_addresses:
+ rm_state_url = params.scheme + "://" + rm_webapp_address + "/ws/v1/cluster/info"
+ get_cluster_info_cmd = "curl --negotiate -u : -ks --location-trusted --connect-timeout " + CURL_CONNECTION_TIMEOUT + " " + rm_state_url
+ try:
+ return_code, stdout, _ = get_user_call_output(get_cluster_info_cmd,
+ user=params.smokeuser,
+ path='/usr/sbin:/sbin:/usr/local/bin:/bin:/usr/bin',
+ )
+ json_response = json.loads(stdout)
+ if json_response is not None and 'clusterInfo' in json_response \
+ and json_response['clusterInfo']['haState'] == "ACTIVE":
+ active_rm_webapp_address = rm_webapp_address
+ break
+ except Exception as e:
+ Logger.warning(format("Cluster info is not available from calling {get_cluster_info_cmd}"))
+
+ if active_rm_webapp_address is None:
+ raise Fail('Resource Manager state is not available. Failed to determine the active Resource Manager web application address from {0}'.format(','.join(rm_webapp_addresses)));
+ return active_rm_webapp_address
if __name__ == "__main__":
ServiceCheck().execute()
http://git-wip-us.apache.org/repos/asf/ambari/blob/88e0c29e/ambari-server/src/test/python/stacks/2.0.6/YARN/test_yarn_service_check.py
----------------------------------------------------------------------
diff --git a/ambari-server/src/test/python/stacks/2.0.6/YARN/test_yarn_service_check.py b/ambari-server/src/test/python/stacks/2.0.6/YARN/test_yarn_service_check.py
index bb671aa..fe7456d 100644
--- a/ambari-server/src/test/python/stacks/2.0.6/YARN/test_yarn_service_check.py
+++ b/ambari-server/src/test/python/stacks/2.0.6/YARN/test_yarn_service_check.py
@@ -22,11 +22,11 @@ import re
from mock.mock import MagicMock, call, patch
from stacks.utils.RMFTestCase import *
-curl_call = MagicMock(return_value=(0, "{ \"app\": {\"state\": \"FINISHED\",\"finalStatus\": \"SUCCEEDED\"}}",''))
+curl_returns = [(0, "{\"clusterInfo\":{\"id\": \"1471586271500\",\"haState\": \"ACTIVE\"}}",''),
+ (0, "{\"app\":{\"state\": \"FINISHED\",\"finalStatus\":\"SUCCEEDED\"}}",'')]
@patch("platform.linux_distribution", new = MagicMock(return_value="Linux"))
@patch("sys.executable", new = '/usr/bin/python2.6')
-@patch("resource_management.libraries.functions.get_user_call_output.get_user_call_output", new = curl_call)
class TestServiceCheck(RMFTestCase):
COMMON_SERVICES_PACKAGE_DIR = "YARN/2.1.0.2.0/package"
STACK_VERSION = "2.0.6"
@@ -38,32 +38,32 @@ class TestServiceCheck(RMFTestCase):
re_search_mock.return_value = m
m.group.return_value = "http://c6402.ambari.apache.org:8088/proxy/application_1429699682952_0010/"
- self.executeScript(self.COMMON_SERVICES_PACKAGE_DIR + "/scripts/service_check.py",
- classname="ServiceCheck",
- command="service_check",
- config_file="default.json",
- stack_version = self.STACK_VERSION,
- target = RMFTestCase.TARGET_COMMON_SERVICES,
- checked_call_mocks = [(0, "some test text, appTrackingUrl=http:"
- "//c6402.ambari.apache.org:8088/proxy/application_1429885383763_0001/, some test text")]
- )
- self.assertResourceCalled('HdfsResource', '/user/ambari-qa',
- immutable_paths = self.DEFAULT_IMMUTABLE_PATHS,
- security_enabled = False,
- hadoop_bin_dir = '/usr/bin',
- keytab = UnknownConfigurationMock(),
- kinit_path_local = '/usr/bin/kinit',
- user = 'hdfs',
- dfs_type = '',
- mode = 0770,
- owner = 'ambari-qa',
- action = ['create_on_execute'], hdfs_resource_ignore_file='/var/lib/ambari-agent/data/.hdfs_resource_ignore', hdfs_site=self.getConfig()['configurations']['hdfs-site'], principal_name=UnknownConfigurationMock(), default_fs='hdfs://c6401.ambari.apache.org:8020',
- hadoop_conf_dir = '/etc/hadoop/conf',
- type = 'directory',
- )
- self.assertCurlCallForwardsCredentialsOnRedirect()
- self.assertNoMoreResources()
-
+ with patch("resource_management.libraries.functions.get_user_call_output.get_user_call_output", side_effect = curl_returns) as mock_curl:
+ self.executeScript(self.COMMON_SERVICES_PACKAGE_DIR + "/scripts/service_check.py",
+ classname="ServiceCheck",
+ command="service_check",
+ config_file="default.json",
+ stack_version = self.STACK_VERSION,
+ target = RMFTestCase.TARGET_COMMON_SERVICES,
+ checked_call_mocks = [(0, "some test text, appTrackingUrl=http:"
+ "//c6402.ambari.apache.org:8088/proxy/application_1429885383763_0001/, some test text")]
+ )
+ self.assertResourceCalled('HdfsResource', '/user/ambari-qa',
+ immutable_paths = self.DEFAULT_IMMUTABLE_PATHS,
+ security_enabled = False,
+ hadoop_bin_dir = '/usr/bin',
+ keytab = UnknownConfigurationMock(),
+ kinit_path_local = '/usr/bin/kinit',
+ user = 'hdfs',
+ dfs_type = '',
+ mode = 0770,
+ owner = 'ambari-qa',
+ action = ['create_on_execute'], hdfs_resource_ignore_file='/var/lib/ambari-agent/data/.hdfs_resource_ignore', hdfs_site=self.getConfig()['configurations']['hdfs-site'], principal_name=UnknownConfigurationMock(), default_fs='hdfs://c6401.ambari.apache.org:8020',
+ hadoop_conf_dir = '/etc/hadoop/conf',
+ type = 'directory',
+ )
+ self.assertCurlCallForwardsCredentialsOnRedirect(mock_curl_call = mock_curl)
+ self.assertNoMoreResources()
@patch("re.search")
def test_service_check_secured(self, re_search_mock):
@@ -71,31 +71,32 @@ class TestServiceCheck(RMFTestCase):
re_search_mock.return_value = m
m.group.return_value = "http://c6402.ambari.apache.org:8088/proxy/application_1429699682952_0010/"
- self.executeScript(self.COMMON_SERVICES_PACKAGE_DIR + "/scripts/service_check.py",
- classname="ServiceCheck",
- command="service_check",
- config_file="secured.json",
- stack_version = self.STACK_VERSION,
- target = RMFTestCase.TARGET_COMMON_SERVICES,
- checked_call_mocks = [(0, "some test text, appTrackingUrl=http:"
- "//c6402.ambari.apache.org:8088/proxy/application_1429885383763_0001/, some test text")]
- )
- self.assertResourceCalled('HdfsResource', '/user/ambari-qa',
- immutable_paths = self.DEFAULT_IMMUTABLE_PATHS,
- security_enabled = True,
- hadoop_bin_dir = '/usr/bin',
- keytab = '/etc/security/keytabs/hdfs.headless.keytab',
- kinit_path_local = '/usr/bin/kinit',
- user = 'hdfs',
- dfs_type = '',
- mode = 0770,
- owner = 'ambari-qa',
- action = ['create_on_execute'], hdfs_resource_ignore_file='/var/lib/ambari-agent/data/.hdfs_resource_ignore', hdfs_site=self.getConfig()['configurations']['hdfs-site'], principal_name='hdfs', default_fs='hdfs://c6401.ambari.apache.org:8020',
- hadoop_conf_dir = '/etc/hadoop/conf',
- type = 'directory',
- )
- self.assertCurlCallForwardsCredentialsOnRedirect()
- self.assertNoMoreResources()
+ with patch("resource_management.libraries.functions.get_user_call_output.get_user_call_output", side_effect = curl_returns) as mock_curl:
+ self.executeScript(self.COMMON_SERVICES_PACKAGE_DIR + "/scripts/service_check.py",
+ classname="ServiceCheck",
+ command="service_check",
+ config_file="secured.json",
+ stack_version = self.STACK_VERSION,
+ target = RMFTestCase.TARGET_COMMON_SERVICES,
+ checked_call_mocks = [(0, "some test text, appTrackingUrl=http:"
+ "//c6402.ambari.apache.org:8088/proxy/application_1429885383763_0001/, some test text")]
+ )
+ self.assertResourceCalled('HdfsResource', '/user/ambari-qa',
+ immutable_paths = self.DEFAULT_IMMUTABLE_PATHS,
+ security_enabled = True,
+ hadoop_bin_dir = '/usr/bin',
+ keytab = '/etc/security/keytabs/hdfs.headless.keytab',
+ kinit_path_local = '/usr/bin/kinit',
+ user = 'hdfs',
+ dfs_type = '',
+ mode = 0770,
+ owner = 'ambari-qa',
+ action = ['create_on_execute'], hdfs_resource_ignore_file='/var/lib/ambari-agent/data/.hdfs_resource_ignore', hdfs_site=self.getConfig()['configurations']['hdfs-site'], principal_name='hdfs', default_fs='hdfs://c6401.ambari.apache.org:8020',
+ hadoop_conf_dir = '/etc/hadoop/conf',
+ type = 'directory',
+ )
+ self.assertCurlCallForwardsCredentialsOnRedirect(mock_curl_call = mock_curl)
+ self.assertNoMoreResources()
- def assertCurlCallForwardsCredentialsOnRedirect(self):
- self.assertIn('--location-trusted', curl_call.call_args[0][0])
\ No newline at end of file
+ def assertCurlCallForwardsCredentialsOnRedirect(self, mock_curl_call):
+ self.assertIn('--location-trusted', mock_curl_call.call_args[0][0])