You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ambari.apache.org by mp...@apache.org on 2015/11/18 22:16:41 UTC
ambari git commit: AMBARI-13856. Sometimes when HA is enabled
NameNode does not wait to leave safe mode on start. (mpapirkovskyy)
Repository: ambari
Updated Branches:
refs/heads/branch-2.1 2c78fa9c7 -> 71a5c1097
AMBARI-13856. Sometimes when HA is enabled NameNode does not wait to leave safe mode on start. (mpapirkovskyy)
Project: http://git-wip-us.apache.org/repos/asf/ambari/repo
Commit: http://git-wip-us.apache.org/repos/asf/ambari/commit/71a5c109
Tree: http://git-wip-us.apache.org/repos/asf/ambari/tree/71a5c109
Diff: http://git-wip-us.apache.org/repos/asf/ambari/diff/71a5c109
Branch: refs/heads/branch-2.1
Commit: 71a5c1097e093b86e1c3996def2da0cd044d0707
Parents: 2c78fa9
Author: Myroslav Papirkovskyi <mp...@hortonworks.com>
Authored: Wed Nov 18 23:16:23 2015 +0200
Committer: Myroslav Papirkovskyi <mp...@hortonworks.com>
Committed: Wed Nov 18 23:16:35 2015 +0200
----------------------------------------------------------------------
.../2.1.0.2.0/package/scripts/hdfs_namenode.py | 37 ++++++-
.../python/stacks/2.0.6/HDFS/test_namenode.py | 108 ++++++++++++++++++-
2 files changed, 139 insertions(+), 6 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/ambari/blob/71a5c109/ambari-server/src/main/resources/common-services/HDFS/2.1.0.2.0/package/scripts/hdfs_namenode.py
----------------------------------------------------------------------
diff --git a/ambari-server/src/main/resources/common-services/HDFS/2.1.0.2.0/package/scripts/hdfs_namenode.py b/ambari-server/src/main/resources/common-services/HDFS/2.1.0.2.0/package/scripts/hdfs_namenode.py
index f944b8d..44119ab 100644
--- a/ambari-server/src/main/resources/common-services/HDFS/2.1.0.2.0/package/scripts/hdfs_namenode.py
+++ b/ambari-server/src/main/resources/common-services/HDFS/2.1.0.2.0/package/scripts/hdfs_namenode.py
@@ -17,7 +17,7 @@ limitations under the License.
"""
import os.path
-
+import time
from resource_management.core import shell
from resource_management.core.source import Template
@@ -143,9 +143,8 @@ def namenode(action=None, hdfs_binary=None, do_format=True, upgrade_type=None, e
check_for_safemode_off = True
msg = "Must wait to leave safemode since High Availability is enabled during a Stack Upgrade"
else:
- # During normal operations, the NameNode is expected to be up.
- code, out = shell.call(is_active_namenode_cmd, logoutput=True) # If active NN, code will be 0
- if code == 0: # active
+ Logger.info("Wait for NameNode to become active.")
+ if is_active_namenode(hdfs_binary): # active
check_for_safemode_off = True
msg = "Must wait to leave safemode since High Availability is enabled and this is the Active NameNode."
else:
@@ -434,3 +433,33 @@ def bootstrap_standby_namenode(params, use_path=False):
except Exception as ex:
Logger.error('Bootstrap standby namenode threw an exception. Reason %s' %(str(ex)))
return False
+
+
+def is_active_namenode(hdfs_binary):
+ """
+ Checks if current NameNode is active. Waits up to 30 seconds. If other NameNode is active returns False.
+ :return: True if current NameNode is active, False otherwise
+ """
+ import params
+
+ if params.dfs_ha_enabled:
+ is_active_this_namenode_cmd = as_user(format("{hdfs_binary} --config {hadoop_conf_dir} haadmin -getServiceState {namenode_id} | grep active"), params.hdfs_user, env={'PATH':params.hadoop_bin_dir})
+ is_active_other_namenode_cmd = as_user(format("{hdfs_binary} --config {hadoop_conf_dir} haadmin -getServiceState {other_namenode_id} | grep active"), params.hdfs_user, env={'PATH':params.hadoop_bin_dir})
+
+ for i in range(0, 5):
+ code, out = shell.call(is_active_this_namenode_cmd) # If active NN, code will be 0
+ if code == 0: # active
+ return True
+
+ code, out = shell.call(is_active_other_namenode_cmd) # If other NN is active, code will be 0
+ if code == 0: # other NN is active
+ return False
+
+ if i < 4: # Do not sleep after last iteration
+ time.sleep(6)
+
+ Logger.info("Active NameNode is not found.")
+ return False
+
+ else:
+ return True
http://git-wip-us.apache.org/repos/asf/ambari/blob/71a5c109/ambari-server/src/test/python/stacks/2.0.6/HDFS/test_namenode.py
----------------------------------------------------------------------
diff --git a/ambari-server/src/test/python/stacks/2.0.6/HDFS/test_namenode.py b/ambari-server/src/test/python/stacks/2.0.6/HDFS/test_namenode.py
index f18d501..ef3c5a5 100644
--- a/ambari-server/src/test/python/stacks/2.0.6/HDFS/test_namenode.py
+++ b/ambari-server/src/test/python/stacks/2.0.6/HDFS/test_namenode.py
@@ -21,6 +21,7 @@ from ambari_commons import OSCheck
import json
import os
import tempfile
+import time
from stacks.utils.RMFTestCase import *
from mock.mock import MagicMock, patch, call
import resource_management
@@ -469,6 +470,109 @@ class TestNamenode(RMFTestCase):
)
self.assertNoMoreResources()
+ @patch.object(shell, "call")
+ @patch.object(time, "sleep")
+ def test_start_ha_default_active_with_retry(self, sleep_mock, call_mocks):
+ call_mocks = MagicMock()
+ call_mocks.side_effect = [(1, None), (1, None), (1, None), (1, None), (0, None)]
+
+ self.executeScript(self.COMMON_SERVICES_PACKAGE_DIR + "/scripts/namenode.py",
+ classname = "NameNode",
+ command = "start",
+ config_file = "ha_default.json",
+ hdp_stack_version = self.STACK_VERSION,
+ target = RMFTestCase.TARGET_COMMON_SERVICES,
+ call_mocks = call_mocks
+ )
+ self.assert_configure_default()
+ self.assertResourceCalled('File', '/etc/hadoop/conf/dfs.exclude',
+ owner = 'hdfs',
+ content = Template('exclude_hosts_list.j2'),
+ group = 'hadoop',
+ )
+ self.assertResourceCalled('Directory', '/var/run/hadoop',
+ owner = 'hdfs',
+ group = 'hadoop',
+ mode = 0755
+ )
+ self.assertResourceCalled('Directory', '/var/run/hadoop/hdfs',
+ owner = 'hdfs',
+ recursive = True,
+ )
+ self.assertResourceCalled('Directory', '/var/log/hadoop/hdfs',
+ owner = 'hdfs',
+ recursive = True,
+ )
+ self.assertResourceCalled('File', '/var/run/hadoop/hdfs/hadoop-hdfs-namenode.pid',
+ action = ['delete'],
+ not_if = "ambari-sudo.sh [RMF_ENV_PLACEHOLDER] -H -E test -f /var/run/hadoop/hdfs/hadoop-hdfs-namenode.pid && ambari-sudo.sh [RMF_ENV_PLACEHOLDER] -H -E pgrep -F /var/run/hadoop/hdfs/hadoop-hdfs-namenode.pid",
+ )
+ self.assertResourceCalled('Execute', "ambari-sudo.sh su hdfs -l -s /bin/bash -c '[RMF_EXPORT_PLACEHOLDER]ulimit -c unlimited ; /usr/lib/hadoop/sbin/hadoop-daemon.sh --config /etc/hadoop/conf start namenode'",
+ environment = {'HADOOP_LIBEXEC_DIR': '/usr/lib/hadoop/libexec'},
+ not_if = "ambari-sudo.sh [RMF_ENV_PLACEHOLDER] -H -E test -f /var/run/hadoop/hdfs/hadoop-hdfs-namenode.pid && ambari-sudo.sh [RMF_ENV_PLACEHOLDER] -H -E pgrep -F /var/run/hadoop/hdfs/hadoop-hdfs-namenode.pid",
+ )
+ self.assertResourceCalled('Execute', "hdfs dfsadmin -fs hdfs://ns1 -safemode get | grep 'Safe mode is OFF'",
+ tries=180,
+ try_sleep=10,
+ user="hdfs",
+ logoutput=True
+ )
+ self.assertResourceCalled('HdfsResource', '/tmp',
+ security_enabled = False,
+ only_if = "ambari-sudo.sh su hdfs -l -s /bin/bash -c 'export PATH=/bin:/usr/bin ; hdfs --config /etc/hadoop/conf haadmin -getServiceState nn1 | grep active'",
+ keytab = UnknownConfigurationMock(),
+ hadoop_bin_dir = '/usr/bin',
+ default_fs = 'hdfs://ns1',
+ hdfs_site = self.getConfig()['configurations']['hdfs-site'],
+ kinit_path_local = '/usr/bin/kinit',
+ principal_name = None,
+ user = 'hdfs',
+ owner = 'hdfs',
+ hadoop_conf_dir = '/etc/hadoop/conf',
+ type = 'directory',
+ action = ['create_on_execute'],
+ mode = 0777,
+ )
+ self.assertResourceCalled('HdfsResource', '/user/ambari-qa',
+ security_enabled = False,
+ only_if = "ambari-sudo.sh su hdfs -l -s /bin/bash -c 'export PATH=/bin:/usr/bin ; hdfs --config /etc/hadoop/conf haadmin -getServiceState nn1 | grep active'",
+ keytab = UnknownConfigurationMock(),
+ hadoop_bin_dir = '/usr/bin',
+ default_fs = 'hdfs://ns1',
+ hdfs_site = self.getConfig()['configurations']['hdfs-site'],
+ kinit_path_local = '/usr/bin/kinit',
+ principal_name = None,
+ user = 'hdfs',
+ owner = 'ambari-qa',
+ hadoop_conf_dir = '/etc/hadoop/conf',
+ type = 'directory',
+ action = ['create_on_execute'],
+ mode = 0770,
+ )
+ self.assertResourceCalled('HdfsResource', None,
+ security_enabled = False,
+ only_if = "ambari-sudo.sh su hdfs -l -s /bin/bash -c 'export PATH=/bin:/usr/bin ; hdfs --config /etc/hadoop/conf haadmin -getServiceState nn1 | grep active'",
+ keytab = UnknownConfigurationMock(),
+ hadoop_bin_dir = '/usr/bin',
+ default_fs = 'hdfs://ns1',
+ hdfs_site = self.getConfig()['configurations']['hdfs-site'],
+ kinit_path_local = '/usr/bin/kinit',
+ principal_name = None,
+ user = 'hdfs',
+ action = ['execute'],
+ hadoop_conf_dir = '/etc/hadoop/conf',
+ )
+ self.assertNoMoreResources()
+ self.assertTrue(call_mocks.called)
+ self.assertEqual(5, call_mocks.call_count)
+ calls = [
+ call("ambari-sudo.sh su hdfs -l -s /bin/bash -c 'export PATH=/bin:/usr/bin ; hdfs --config /etc/hadoop/conf haadmin -getServiceState nn1 | grep active'"),
+ call("ambari-sudo.sh su hdfs -l -s /bin/bash -c 'export PATH=/bin:/usr/bin ; hdfs --config /etc/hadoop/conf haadmin -getServiceState nn2 | grep active'"),
+ call("ambari-sudo.sh su hdfs -l -s /bin/bash -c 'export PATH=/bin:/usr/bin ; hdfs --config /etc/hadoop/conf haadmin -getServiceState nn1 | grep active'"),
+ call("ambari-sudo.sh su hdfs -l -s /bin/bash -c 'export PATH=/bin:/usr/bin ; hdfs --config /etc/hadoop/conf haadmin -getServiceState nn2 | grep active'"),
+ call("ambari-sudo.sh su hdfs -l -s /bin/bash -c 'export PATH=/bin:/usr/bin ; hdfs --config /etc/hadoop/conf haadmin -getServiceState nn1 | grep active'")]
+ call_mocks.assert_has_calls(calls)
+
def test_start_ha_secured(self):
self.executeScript(self.COMMON_SERVICES_PACKAGE_DIR + "/scripts/namenode.py",
classname = "NameNode",
@@ -762,7 +866,7 @@ class TestNamenode(RMFTestCase):
self.assertEqual(2, call_mocks.call_count)
calls = [
call('hdfs namenode -bootstrapStandby -nonInteractive', logoutput=False, user=u'hdfs'),
- call("ambari-sudo.sh su hdfs -l -s /bin/bash -c 'export PATH=/bin:/usr/bin ; hdfs --config /etc/hadoop/conf haadmin -getServiceState nn2 | grep active'", logoutput=True)]
+ call("ambari-sudo.sh su hdfs -l -s /bin/bash -c 'export PATH=/bin:/usr/bin ; hdfs --config /etc/hadoop/conf haadmin -getServiceState nn2 | grep active'")]
call_mocks.assert_has_calls(calls, any_order=False)
# tests namenode start command when NameNode HA is enabled, and
@@ -868,7 +972,7 @@ class TestNamenode(RMFTestCase):
self.assertTrue(call_mocks.called)
self.assertEqual(3, call_mocks.call_count)
calls = [
- call("ambari-sudo.sh su hdfs -l -s /bin/bash -c 'export PATH=/bin:/usr/bin ; hdfs --config /etc/hadoop/conf haadmin -getServiceState nn2 | grep active'", logoutput=True),
+ call("ambari-sudo.sh su hdfs -l -s /bin/bash -c 'export PATH=/bin:/usr/bin ; hdfs --config /etc/hadoop/conf haadmin -getServiceState nn2 | grep active'"),
call('hdfs namenode -bootstrapStandby -nonInteractive -force', logoutput=False, user=u'hdfs'),
call('hdfs namenode -bootstrapStandby -nonInteractive -force', logoutput=False, user=u'hdfs')]
call_mocks.assert_has_calls(calls, any_order=True)