You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ambari.apache.org by nc...@apache.org on 2015/10/19 15:54:19 UTC
[33/50] [abbrv] ambari git commit: AMBARI-13427: NAMENODE START
failed with both NN's being passive (jluniya)
AMBARI-13427: NAMENODE START failed with both NN's being passive (jluniya)
Project: http://git-wip-us.apache.org/repos/asf/ambari/repo
Commit: http://git-wip-us.apache.org/repos/asf/ambari/commit/3318eb68
Tree: http://git-wip-us.apache.org/repos/asf/ambari/tree/3318eb68
Diff: http://git-wip-us.apache.org/repos/asf/ambari/diff/3318eb68
Branch: refs/heads/branch-dev-patch-upgrade
Commit: 3318eb682611fffbe817815cdffdf6aca2aacfa2
Parents: dec7c8e
Author: Jayush Luniya <jl...@hortonworks.com>
Authored: Fri Oct 16 11:30:38 2015 -0700
Committer: Jayush Luniya <jl...@hortonworks.com>
Committed: Fri Oct 16 11:30:38 2015 -0700
----------------------------------------------------------------------
.../libraries/functions/decorator.py | 5 ++--
.../libraries/functions/namenode_ha_utils.py | 27 ++++++++++++++++++--
.../python/stacks/2.0.6/HDFS/test_namenode.py | 17 +++++++++++-
3 files changed, 44 insertions(+), 5 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/ambari/blob/3318eb68/ambari-common/src/main/python/resource_management/libraries/functions/decorator.py
----------------------------------------------------------------------
diff --git a/ambari-common/src/main/python/resource_management/libraries/functions/decorator.py b/ambari-common/src/main/python/resource_management/libraries/functions/decorator.py
index cd653e5..1b45981 100644
--- a/ambari-common/src/main/python/resource_management/libraries/functions/decorator.py
+++ b/ambari-common/src/main/python/resource_management/libraries/functions/decorator.py
@@ -26,7 +26,7 @@ __all__ = ['retry', ]
from resource_management.core.logger import Logger
-def retry(times=3, sleep_time=1, backoff_factor=1, err_class=Exception):
+def retry(times=3, sleep_time=1, max_sleep_time=8, backoff_factor=1, err_class=Exception):
"""
Retry decorator for improved robustness of functions.
:param times: Number of times to attempt to call the function.
@@ -44,12 +44,13 @@ def retry(times=3, sleep_time=1, backoff_factor=1, err_class=Exception):
while _times > 1:
_times -= 1
- _sleep_time *= _backoff_factor
try:
return function(*args, **kwargs)
except _err_class, err:
Logger.info("Will retry %d time(s), caught exception: %s. Sleeping for %d sec(s)" % (_times, str(err), _sleep_time))
time.sleep(_sleep_time)
+ if(_sleep_time * _backoff_factor <= max_sleep_time):
+ _sleep_time *= _backoff_factor
return function(*args, **kwargs)
return wrapper
http://git-wip-us.apache.org/repos/asf/ambari/blob/3318eb68/ambari-common/src/main/python/resource_management/libraries/functions/namenode_ha_utils.py
----------------------------------------------------------------------
diff --git a/ambari-common/src/main/python/resource_management/libraries/functions/namenode_ha_utils.py b/ambari-common/src/main/python/resource_management/libraries/functions/namenode_ha_utils.py
index 99f90b8..0920e85 100644
--- a/ambari-common/src/main/python/resource_management/libraries/functions/namenode_ha_utils.py
+++ b/ambari-common/src/main/python/resource_management/libraries/functions/namenode_ha_utils.py
@@ -23,6 +23,8 @@ from resource_management.libraries.functions.format import format
from resource_management.libraries.functions.jmx import get_value_from_jmx
from resource_management.core.base import Fail
from resource_management.core import shell
+from resource_management.core.logger import Logger
+from resource_management.libraries.functions.decorator import retry
__all__ = ["get_namenode_states", "get_active_namenode", "get_property_for_active_namenode"]
@@ -32,8 +34,29 @@ HDFS_NN_STATE_STANDBY = 'standby'
NAMENODE_HTTP_FRAGMENT = 'dfs.namenode.http-address.{0}.{1}'
NAMENODE_HTTPS_FRAGMENT = 'dfs.namenode.https-address.{0}.{1}'
JMX_URI_FRAGMENT = "{0}://{1}/jmx?qry=Hadoop:service=NameNode,name=FSNamesystem"
-
-def get_namenode_states(hdfs_site, security_enabled, run_user):
+
+def get_namenode_states(hdfs_site, security_enabled, run_user, times=10, sleep_time=1, backoff_factor=2):
+ """
+ return format [('nn1', 'hdfs://hostname1:port1'), ('nn2', 'hdfs://hostname2:port2')] , [....], [....]
+ """
+ @retry(times=times, sleep_time=sleep_time, backoff_factor=backoff_factor, err_class=Fail)
+ def doRetries(hdfs_site, security_enabled, run_user):
+ doRetries.attempt += 1
+ active_namenodes, standby_namenodes, unknown_namenodes = get_namenode_states_noretries(hdfs_site, security_enabled, run_user)
+ Logger.info(
+ "NameNode HA states: active_namenodes = {0}, standby_namenodes = {1}, unknown_namenodes = {2}".format(
+ active_namenodes, standby_namenodes, unknown_namenodes))
+ if active_namenodes:
+ return active_namenodes, standby_namenodes, unknown_namenodes
+ elif doRetries.attempt == times:
+ Logger.warning("No active NameNode was found after {0} retries. Will return current NameNode HA states".format(times))
+ return active_namenodes, standby_namenodes, unknown_namenodes
+ raise Fail('No active NameNode was found.')
+
+ doRetries.attempt = 0
+ return doRetries(hdfs_site, security_enabled, run_user)
+
+def get_namenode_states_noretries(hdfs_site, security_enabled, run_user):
"""
return format [('nn1', 'hdfs://hostname1:port1'), ('nn2', 'hdfs://hostname2:port2')] , [....], [....]
"""
http://git-wip-us.apache.org/repos/asf/ambari/blob/3318eb68/ambari-server/src/test/python/stacks/2.0.6/HDFS/test_namenode.py
----------------------------------------------------------------------
diff --git a/ambari-server/src/test/python/stacks/2.0.6/HDFS/test_namenode.py b/ambari-server/src/test/python/stacks/2.0.6/HDFS/test_namenode.py
index 3378892..e954a84 100644
--- a/ambari-server/src/test/python/stacks/2.0.6/HDFS/test_namenode.py
+++ b/ambari-server/src/test/python/stacks/2.0.6/HDFS/test_namenode.py
@@ -1275,10 +1275,16 @@ class TestNamenode(RMFTestCase):
put_structured_out_mock.assert_called_with({"securityState": "UNSECURED"})
- def test_upgrade_restart(self):
+ @patch("utils.get_namenode_states")
+ def test_upgrade_restart(self, get_namenode_states_mock):
# Execution of nn_ru_lzo invokes a code path that invokes lzo installation, which
# was failing in RU case. See hdfs.py and the lzo_enabled check that is in it.
# Just executing the script is enough to test the fix
+ active_namenodes = [('nn1', 'c6401.ambari.apache.org:50070')]
+ standby_namenodes = [('nn2', 'c6402.ambari.apache.org:50070')]
+ unknown_namenodes = []
+
+ get_namenode_states_mock.return_value = active_namenodes, standby_namenodes, unknown_namenodes
self.executeScript(self.COMMON_SERVICES_PACKAGE_DIR + "/scripts/namenode.py",
classname = "NameNode",
command = "restart",
@@ -1286,6 +1292,15 @@ class TestNamenode(RMFTestCase):
hdp_stack_version = self.STACK_VERSION,
target = RMFTestCase.TARGET_COMMON_SERVICES)
+ unknown_namenodes = active_namenodes
+ active_namenodes = []
+ get_namenode_states_mock.return_value = active_namenodes, standby_namenodes, unknown_namenodes
+ self.executeScript(self.COMMON_SERVICES_PACKAGE_DIR + "/scripts/namenode.py",
+ classname = "NameNode",
+ command = "restart",
+ config_file = "nn_ru_lzo.json",
+ hdp_stack_version = self.STACK_VERSION,
+ target = RMFTestCase.TARGET_COMMON_SERVICES)
def test_pre_rolling_restart(self):
config_file = self.get_src_folder()+"/test/python/stacks/2.0.6/configs/default.json"