You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ambari.apache.org by ol...@apache.org on 2016/04/25 12:04:53 UTC
ambari git commit: AMBARI-15991. DataNode and RegionServer during
upgrade are reported as "failed" incorrectly (part2) (Daniel Gergely via
oleewere)
Repository: ambari
Updated Branches:
refs/heads/trunk 8a20810b2 -> 0806468be
AMBARI-15991. DataNode and RegionServer during upgrade are reported as "failed" incorrectly (part2) (Daniel Gergely via oleewere)
Project: http://git-wip-us.apache.org/repos/asf/ambari/repo
Commit: http://git-wip-us.apache.org/repos/asf/ambari/commit/0806468b
Tree: http://git-wip-us.apache.org/repos/asf/ambari/tree/0806468b
Diff: http://git-wip-us.apache.org/repos/asf/ambari/diff/0806468b
Branch: refs/heads/trunk
Commit: 0806468bedeb764f2d97025cb086944e482ace84
Parents: 8a20810
Author: Daniel Gergely <dg...@hortonworks.com>
Authored: Mon Apr 25 12:01:46 2016 +0200
Committer: oleewere <ol...@gmail.com>
Committed: Mon Apr 25 12:01:46 2016 +0200
----------------------------------------------------------------------
.../HBASE/0.96.0.2.0/package/scripts/upgrade.py | 14 ++++++--------
.../2.1.0.2.0/package/scripts/datanode_upgrade.py | 16 +++++++---------
.../test/python/stacks/2.0.6/HDFS/test_datanode.py | 10 +++++-----
3 files changed, 18 insertions(+), 22 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/ambari/blob/0806468b/ambari-server/src/main/resources/common-services/HBASE/0.96.0.2.0/package/scripts/upgrade.py
----------------------------------------------------------------------
diff --git a/ambari-server/src/main/resources/common-services/HBASE/0.96.0.2.0/package/scripts/upgrade.py b/ambari-server/src/main/resources/common-services/HBASE/0.96.0.2.0/package/scripts/upgrade.py
index bc68cc6..b1a19e6 100644
--- a/ambari-server/src/main/resources/common-services/HBASE/0.96.0.2.0/package/scripts/upgrade.py
+++ b/ambari-server/src/main/resources/common-services/HBASE/0.96.0.2.0/package/scripts/upgrade.py
@@ -41,14 +41,8 @@ def post_regionserver(env):
check_cmd = "echo 'status \"simple\"' | {0} shell".format(params.hbase_cmd)
exec_cmd = "{0} {1}".format(params.kinit_cmd, check_cmd)
- _wait_for_region_server_to_start(exec_cmd, params.hbase_user, params.hostname + ":", re.IGNORECASE)
+ call_and_match(exec_cmd, params.hbase_user, params.hostname + ":", re.IGNORECASE)
-@retry(times=3, sleep_time=300, err_class=Fail)
-def _wait_for_region_server_to_start(cmd, user, regex, regex_search_flags):
- if not is_region_server_process_running():
- Logger.info("RegionServer process is not running")
- raise Fail("RegionServer process is not running")
- call_and_match(cmd, user, regex, regex_search_flags)
def is_region_server_process_running():
try:
@@ -58,9 +52,13 @@ def is_region_server_process_running():
except ComponentIsNotRunning:
return False
-@retry(times=15, sleep_time=2, err_class=Fail)
+@retry(times=30, sleep_time=30, err_class=Fail) # keep trying for 15 mins
def call_and_match(cmd, user, regex, regex_search_flags):
+ if not is_region_server_process_running():
+ Logger.info("RegionServer process is not running")
+ raise Fail("RegionServer process is not running")
+
code, out = shell.call(cmd, user=user)
if not (out and re.search(regex, out, regex_search_flags)):
http://git-wip-us.apache.org/repos/asf/ambari/blob/0806468b/ambari-server/src/main/resources/common-services/HDFS/2.1.0.2.0/package/scripts/datanode_upgrade.py
----------------------------------------------------------------------
diff --git a/ambari-server/src/main/resources/common-services/HDFS/2.1.0.2.0/package/scripts/datanode_upgrade.py b/ambari-server/src/main/resources/common-services/HDFS/2.1.0.2.0/package/scripts/datanode_upgrade.py
index c8e2eab..b55237d 100644
--- a/ambari-server/src/main/resources/common-services/HDFS/2.1.0.2.0/package/scripts/datanode_upgrade.py
+++ b/ambari-server/src/main/resources/common-services/HDFS/2.1.0.2.0/package/scripts/datanode_upgrade.py
@@ -73,13 +73,6 @@ def post_upgrade_check(hdfs_binary):
Execute(params.dn_kinit_cmd, user=params.hdfs_user)
# verify that the datanode has started and rejoined the HDFS cluster
- _wait_for_datanode_to_join(hdfs_binary)
-
-@retry(times=3, sleep_time=300, err_class=Fail)
-def _wait_for_datanode_to_join(hdfs_binary):
- if not is_datanode_process_running():
- Logger.info("DataNode process is not running")
- raise Fail("DataNode process is not running")
_check_datanode_startup(hdfs_binary)
@@ -125,16 +118,21 @@ def _check_datanode_shutdown(hdfs_binary):
raise Fail('DataNode has not shutdown.')
-@retry(times=12, sleep_time=10, err_class=Fail)
+@retry(times=30, sleep_time=30, err_class=Fail) # keep trying for 15 mins
def _check_datanode_startup(hdfs_binary):
"""
- Checks that a DataNode is reported as being alive via the
+ Checks that a DataNode process is running and DataNode is reported as being alive via the
"hdfs dfsadmin -fs {namenode_address} -report -live" command. Once the DataNode is found to be
alive this method will return, otherwise it will raise a Fail(...) and retry
automatically.
:param hdfs_binary: name/path of the HDFS binary to use
:return:
"""
+
+ if not is_datanode_process_running():
+ Logger.info("DataNode process is not running")
+ raise Fail("DataNode process is not running")
+
import params
import socket
http://git-wip-us.apache.org/repos/asf/ambari/blob/0806468b/ambari-server/src/test/python/stacks/2.0.6/HDFS/test_datanode.py
----------------------------------------------------------------------
diff --git a/ambari-server/src/test/python/stacks/2.0.6/HDFS/test_datanode.py b/ambari-server/src/test/python/stacks/2.0.6/HDFS/test_datanode.py
index dbd76cf..90c12ca 100644
--- a/ambari-server/src/test/python/stacks/2.0.6/HDFS/test_datanode.py
+++ b/ambari-server/src/test/python/stacks/2.0.6/HDFS/test_datanode.py
@@ -515,7 +515,7 @@ class TestDatanode(RMFTestCase):
config_file = "default.json",
stack_version = self.STACK_VERSION,
target = RMFTestCase.TARGET_COMMON_SERVICES,
- call_mocks = [(0, shell_call_output)] * 3,
+ call_mocks = [(0, shell_call_output)],
mocks_dict = mocks_dict
)
@@ -535,13 +535,13 @@ class TestDatanode(RMFTestCase):
config_file = "default.json",
stack_version = self.STACK_VERSION,
target = RMFTestCase.TARGET_COMMON_SERVICES,
- call_mocks = [(0, 'There are no DataNodes here!')] * 36,
+ call_mocks = [(0, 'There are no DataNodes here!')] * 30,
mocks_dict = mocks_dict
)
self.fail('Missing DataNode should have caused a failure')
except Fail,fail:
self.assertTrue(mocks_dict['call'].called)
- self.assertEqual(mocks_dict['call'].call_count,36)
+ self.assertEqual(mocks_dict['call'].call_count,30)
@patch("socket.gethostbyname")
@@ -556,13 +556,13 @@ class TestDatanode(RMFTestCase):
config_file = "default.json",
stack_version = self.STACK_VERSION,
target = RMFTestCase.TARGET_COMMON_SERVICES,
- call_mocks = [(1, 'some')] * 36,
+ call_mocks = [(1, 'some')] * 30,
mocks_dict = mocks_dict
)
self.fail('Invalid return code should cause a failure')
except Fail,fail:
self.assertTrue(mocks_dict['call'].called)
- self.assertEqual(mocks_dict['call'].call_count,36)
+ self.assertEqual(mocks_dict['call'].call_count,30)
@patch("resource_management.core.shell.call")