You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ambari.apache.org by jl...@apache.org on 2015/03/13 23:28:34 UTC
ambari git commit: AMBARI-10067: ZKFailoverController failed on
restart (jluniya)
Repository: ambari
Updated Branches:
refs/heads/trunk 88aed0b8e -> a5c571e97
AMBARI-10067: ZKFailoverController failed on restart (jluniya)
Project: http://git-wip-us.apache.org/repos/asf/ambari/repo
Commit: http://git-wip-us.apache.org/repos/asf/ambari/commit/a5c571e9
Tree: http://git-wip-us.apache.org/repos/asf/ambari/tree/a5c571e9
Diff: http://git-wip-us.apache.org/repos/asf/ambari/diff/a5c571e9
Branch: refs/heads/trunk
Commit: a5c571e973badd90da3cb34e6c8345a3d80c9e4c
Parents: 88aed0b
Author: Jayush Luniya <jl...@hortonworks.com>
Authored: Fri Mar 13 15:28:27 2015 -0700
Committer: Jayush Luniya <jl...@hortonworks.com>
Committed: Fri Mar 13 15:28:27 2015 -0700
----------------------------------------------------------------------
.../2.1.0.2.0/package/scripts/hdfs_namenode.py | 45 ++++++++++----------
.../2.1.0.2.0/package/scripts/zkfc_slave.py | 26 ++++++++++-
.../test/python/stacks/2.0.6/HDFS/test_zkfc.py | 16 +++----
3 files changed, 52 insertions(+), 35 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/ambari/blob/a5c571e9/ambari-server/src/main/resources/common-services/HDFS/2.1.0.2.0/package/scripts/hdfs_namenode.py
----------------------------------------------------------------------
diff --git a/ambari-server/src/main/resources/common-services/HDFS/2.1.0.2.0/package/scripts/hdfs_namenode.py b/ambari-server/src/main/resources/common-services/HDFS/2.1.0.2.0/package/scripts/hdfs_namenode.py
index c89eeba..615dd54 100644
--- a/ambari-server/src/main/resources/common-services/HDFS/2.1.0.2.0/package/scripts/hdfs_namenode.py
+++ b/ambari-server/src/main/resources/common-services/HDFS/2.1.0.2.0/package/scripts/hdfs_namenode.py
@@ -49,9 +49,10 @@ def namenode(action=None, do_format=True, rolling_restart=False, env=None):
group=params.user_group
)
- if params.dfs_ha_enabled:
- # if the current host is the standby NameNode in an HA deployment
- if params.hostname == params.dfs_ha_namenode_standby:
+ if params.dfs_ha_enabled and \
+ params.dfs_ha_namenode_standby is not None and \
+ params.hostname == params.dfs_ha_namenode_standby:
+ # if the current host is the standby NameNode in an HA deployment
# run the bootstrap command, to start the NameNode in standby mode
# this requires that the active NameNode is already up and running,
# so this execute should be re-tried upon failure, up to a timeout
@@ -178,25 +179,25 @@ def format_namenode(force=None):
recursive = True
)
else:
- if params.dfs_ha_namenode_active is not None:
- if params.hostname == params.dfs_ha_namenode_active:
- # check and run the format command in the HA deployment scenario
- # only format the "active" namenode in an HA deployment
- if force:
- ExecuteHadoop('namenode -format',
- kinit_override=True,
- bin_dir=params.hadoop_bin_dir,
- conf_dir=hadoop_conf_dir)
- else:
- if not is_namenode_formatted(params):
- Execute(format("yes Y | hdfs --config {hadoop_conf_dir} namenode -format"),
- user = params.hdfs_user,
- path = [params.hadoop_bin_dir]
+ if params.dfs_ha_namenode_active is not None and \
+ params.hostname == params.dfs_ha_namenode_active:
+ # check and run the format command in the HA deployment scenario
+ # only format the "active" namenode in an HA deployment
+ if force:
+ ExecuteHadoop('namenode -format',
+ kinit_override=True,
+ bin_dir=params.hadoop_bin_dir,
+ conf_dir=hadoop_conf_dir)
+ else:
+ if not is_namenode_formatted(params):
+ Execute(format("yes Y | hdfs --config {hadoop_conf_dir} namenode -format"),
+ user = params.hdfs_user,
+ path = [params.hadoop_bin_dir]
+ )
+ for m_dir in mark_dir:
+ Directory(m_dir,
+ recursive = True
)
- for m_dir in mark_dir:
- Directory(m_dir,
- recursive = True
- )
def is_namenode_formatted(params):
old_mark_dirs = params.namenode_formatted_old_mark_dirs
@@ -289,7 +290,7 @@ def bootstrap_standby_namenode(params):
Logger.info("Boostrapping standby namenode: %s" % (bootstrap_cmd))
for i in range(iterations):
Logger.info('Try %d out of %d' % (i+1, iterations))
- code, out = shell.call(bootstrap_cmd, logoutput=True, user=params.hdfs_user)
+ code, out = shell.call(bootstrap_cmd, logoutput=False, user=params.hdfs_user)
if code == 0:
Logger.info("Standby namenode bootstrapped successfully")
return True
http://git-wip-us.apache.org/repos/asf/ambari/blob/a5c571e9/ambari-server/src/main/resources/common-services/HDFS/2.1.0.2.0/package/scripts/zkfc_slave.py
----------------------------------------------------------------------
diff --git a/ambari-server/src/main/resources/common-services/HDFS/2.1.0.2.0/package/scripts/zkfc_slave.py b/ambari-server/src/main/resources/common-services/HDFS/2.1.0.2.0/package/scripts/zkfc_slave.py
index 42dc7d3..533ea7a 100644
--- a/ambari-server/src/main/resources/common-services/HDFS/2.1.0.2.0/package/scripts/zkfc_slave.py
+++ b/ambari-server/src/main/resources/common-services/HDFS/2.1.0.2.0/package/scripts/zkfc_slave.py
@@ -48,8 +48,11 @@ class ZkfcSlave(Script):
# only run this format command if the active namenode hostname is set
# The Ambari UI HA Wizard prompts the user to run this command
# manually, so this guarantees it is only run in the Blueprints case
- if params.dfs_ha_enabled and params.dfs_ha_namenode_active is not None:
- Execute("hdfs zkfc -formatZK -force -nonInteractive", user=params.hdfs_user)
+ if params.dfs_ha_enabled and \
+ params.dfs_ha_namenode_active is not None:
+ success = initialize_ha_zookeeper(params)
+ if not success:
+ raise Fail("Could not initialize HA state in zookeeper")
utils.service(
action="start", name="zkfc", user=params.hdfs_user, create_pid_dir=True,
@@ -121,6 +124,25 @@ class ZkfcSlave(Script):
else:
self.put_structured_out({"securityState": "UNSECURED"})
+def initialize_ha_zookeeper(params):
+ try:
+ iterations = 10
+ formatZK_cmd = "hdfs zkfc -formatZK -nonInteractive"
+ Logger.info("Initialize HA state in ZooKeeper: %s" % (formatZK_cmd))
+ for i in range(iterations):
+ Logger.info('Try %d out of %d' % (i+1, iterations))
+ code, out = shell.call(formatZK_cmd, logoutput=False, user=params.hdfs_user)
+ if code == 0:
+ Logger.info("HA state initialized in ZooKeeper successfully")
+ return True
+ elif code == 2:
+ Logger.info("HA state already initialized in ZooKeeper")
+ return True
+ else:
+ Logger.warning('HA state initialization in ZooKeeper failed with %d error code. Will retry' % (code))
+ except Exception as ex:
+ Logger.error('HA state initialization in ZooKeeper threw an exception. Reason %s' %(str(ex)))
+ return False
if __name__ == "__main__":
ZkfcSlave().execute()
http://git-wip-us.apache.org/repos/asf/ambari/blob/a5c571e9/ambari-server/src/test/python/stacks/2.0.6/HDFS/test_zkfc.py
----------------------------------------------------------------------
diff --git a/ambari-server/src/test/python/stacks/2.0.6/HDFS/test_zkfc.py b/ambari-server/src/test/python/stacks/2.0.6/HDFS/test_zkfc.py
index 8aa4871..bc15f3f 100644
--- a/ambari-server/src/test/python/stacks/2.0.6/HDFS/test_zkfc.py
+++ b/ambari-server/src/test/python/stacks/2.0.6/HDFS/test_zkfc.py
@@ -20,6 +20,7 @@ limitations under the License.
from stacks.utils.RMFTestCase import *
from ambari_commons import OSCheck
from mock.mock import MagicMock, patch
+from resource_management.core import shell
class TestZkfc(RMFTestCase):
COMMON_SERVICES_PACKAGE_DIR = "HDFS/2.1.0.2.0/package"
@@ -206,7 +207,7 @@ class TestZkfc(RMFTestCase):
)
self.assertNoMoreResources()
-
+ @patch.object(shell, "call", new=MagicMock(return_value=(0,"")))
def test_start_with_ha_active_namenode_bootstrap(self):
self.executeScript(self.COMMON_SERVICES_PACKAGE_DIR + "/scripts/zkfc_slave.py",
classname = "ZkfcSlave",
@@ -250,11 +251,7 @@ class TestZkfc(RMFTestCase):
group = 'hadoop',
mode = 0755
)
-
- # verify that the znode initialization occurs prior to ZKFC startup
- self.assertResourceCalled('Execute', 'hdfs zkfc -formatZK -force -nonInteractive',
- user = 'hdfs')
-
+ # TODO: verify that the znode initialization occurs prior to ZKFC startup
self.assertResourceCalled('Directory', '/var/run/hadoop/hdfs',
owner = 'hdfs',
recursive = True,
@@ -273,6 +270,7 @@ class TestZkfc(RMFTestCase):
)
self.assertNoMoreResources()
+ @patch.object(shell, "call", new=MagicMock(return_value=(2,"")))
def test_start_with_ha_standby_namenode_bootstrap(self):
self.executeScript(self.COMMON_SERVICES_PACKAGE_DIR + "/scripts/zkfc_slave.py",
classname = "ZkfcSlave",
@@ -316,11 +314,7 @@ class TestZkfc(RMFTestCase):
group = 'hadoop',
mode = 0755
)
-
- # verify that the znode initialization occurs prior to ZKFC startup
- self.assertResourceCalled('Execute', 'hdfs zkfc -formatZK -force -nonInteractive',
- user = 'hdfs')
-
+ # TODO: verify that the znode initialization occurs prior to ZKFC startup
self.assertResourceCalled('Directory', '/var/run/hadoop/hdfs',
owner = 'hdfs',
recursive = True,