You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ambari.apache.org by mp...@apache.org on 2015/11/18 22:16:41 UTC

ambari git commit: AMBARI-13856. Sometimes when HA is enabled NameNode does not wait to leave safe mode on start. (mpapirkovskyy)

Repository: ambari
Updated Branches:
  refs/heads/branch-2.1 2c78fa9c7 -> 71a5c1097


AMBARI-13856. Sometimes when HA is enabled NameNode does not wait to leave safe mode on start. (mpapirkovskyy)


Project: http://git-wip-us.apache.org/repos/asf/ambari/repo
Commit: http://git-wip-us.apache.org/repos/asf/ambari/commit/71a5c109
Tree: http://git-wip-us.apache.org/repos/asf/ambari/tree/71a5c109
Diff: http://git-wip-us.apache.org/repos/asf/ambari/diff/71a5c109

Branch: refs/heads/branch-2.1
Commit: 71a5c1097e093b86e1c3996def2da0cd044d0707
Parents: 2c78fa9
Author: Myroslav Papirkovskyi <mp...@hortonworks.com>
Authored: Wed Nov 18 23:16:23 2015 +0200
Committer: Myroslav Papirkovskyi <mp...@hortonworks.com>
Committed: Wed Nov 18 23:16:35 2015 +0200

----------------------------------------------------------------------
 .../2.1.0.2.0/package/scripts/hdfs_namenode.py  |  37 ++++++-
 .../python/stacks/2.0.6/HDFS/test_namenode.py   | 108 ++++++++++++++++++-
 2 files changed, 139 insertions(+), 6 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/ambari/blob/71a5c109/ambari-server/src/main/resources/common-services/HDFS/2.1.0.2.0/package/scripts/hdfs_namenode.py
----------------------------------------------------------------------
diff --git a/ambari-server/src/main/resources/common-services/HDFS/2.1.0.2.0/package/scripts/hdfs_namenode.py b/ambari-server/src/main/resources/common-services/HDFS/2.1.0.2.0/package/scripts/hdfs_namenode.py
index f944b8d..44119ab 100644
--- a/ambari-server/src/main/resources/common-services/HDFS/2.1.0.2.0/package/scripts/hdfs_namenode.py
+++ b/ambari-server/src/main/resources/common-services/HDFS/2.1.0.2.0/package/scripts/hdfs_namenode.py
@@ -17,7 +17,7 @@ limitations under the License.
 
 """
 import os.path
-
+import time
 
 from resource_management.core import shell
 from resource_management.core.source import Template
@@ -143,9 +143,8 @@ def namenode(action=None, hdfs_binary=None, do_format=True, upgrade_type=None, e
         check_for_safemode_off = True
         msg = "Must wait to leave safemode since High Availability is enabled during a Stack Upgrade"
       else:
-        # During normal operations, the NameNode is expected to be up.
-        code, out = shell.call(is_active_namenode_cmd, logoutput=True) # If active NN, code will be 0
-        if code == 0: # active
+        Logger.info("Wait for NameNode to become active.")
+        if is_active_namenode(hdfs_binary): # active
           check_for_safemode_off = True
           msg = "Must wait to leave safemode since High Availability is enabled and this is the Active NameNode."
         else:
@@ -434,3 +433,33 @@ def bootstrap_standby_namenode(params, use_path=False):
   except Exception as ex:
     Logger.error('Bootstrap standby namenode threw an exception. Reason %s' %(str(ex)))
   return False
+
+
+def is_active_namenode(hdfs_binary):
+  """
+  Checks if current NameNode is active. Waits up to 30 seconds. If other NameNode is active returns False.
+  :return: True if current NameNode is active, False otherwise
+  """
+  import params
+
+  if params.dfs_ha_enabled:
+    is_active_this_namenode_cmd = as_user(format("{hdfs_binary} --config {hadoop_conf_dir} haadmin -getServiceState {namenode_id} | grep active"), params.hdfs_user, env={'PATH':params.hadoop_bin_dir})
+    is_active_other_namenode_cmd = as_user(format("{hdfs_binary} --config {hadoop_conf_dir} haadmin -getServiceState {other_namenode_id} | grep active"), params.hdfs_user, env={'PATH':params.hadoop_bin_dir})
+
+    for i in range(0, 5):
+      code, out = shell.call(is_active_this_namenode_cmd) # If active NN, code will be 0
+      if code == 0: # active
+        return True
+
+      code, out = shell.call(is_active_other_namenode_cmd) # If other NN is active, code will be 0
+      if code == 0: # other NN is active
+        return False
+
+      if i < 4: # Do not sleep after last iteration
+        time.sleep(6)
+
+    Logger.info("Active NameNode is not found.")
+    return False
+
+  else:
+    return True

http://git-wip-us.apache.org/repos/asf/ambari/blob/71a5c109/ambari-server/src/test/python/stacks/2.0.6/HDFS/test_namenode.py
----------------------------------------------------------------------
diff --git a/ambari-server/src/test/python/stacks/2.0.6/HDFS/test_namenode.py b/ambari-server/src/test/python/stacks/2.0.6/HDFS/test_namenode.py
index f18d501..ef3c5a5 100644
--- a/ambari-server/src/test/python/stacks/2.0.6/HDFS/test_namenode.py
+++ b/ambari-server/src/test/python/stacks/2.0.6/HDFS/test_namenode.py
@@ -21,6 +21,7 @@ from ambari_commons import OSCheck
 import json
 import os
 import tempfile
+import time
 from stacks.utils.RMFTestCase import *
 from mock.mock import MagicMock, patch, call
 import resource_management
@@ -469,6 +470,109 @@ class TestNamenode(RMFTestCase):
     )
     self.assertNoMoreResources()
 
+  @patch.object(shell, "call")
+  @patch.object(time, "sleep")
+  def test_start_ha_default_active_with_retry(self, sleep_mock, call_mocks):
+    call_mocks = MagicMock()
+    call_mocks.side_effect = [(1, None), (1, None), (1, None), (1, None), (0, None)]
+
+    self.executeScript(self.COMMON_SERVICES_PACKAGE_DIR + "/scripts/namenode.py",
+                       classname = "NameNode",
+                       command = "start",
+                       config_file = "ha_default.json",
+                       hdp_stack_version = self.STACK_VERSION,
+                       target = RMFTestCase.TARGET_COMMON_SERVICES,
+                       call_mocks = call_mocks
+    )
+    self.assert_configure_default()
+    self.assertResourceCalled('File', '/etc/hadoop/conf/dfs.exclude',
+                              owner = 'hdfs',
+                              content = Template('exclude_hosts_list.j2'),
+                              group = 'hadoop',
+                              )
+    self.assertResourceCalled('Directory', '/var/run/hadoop',
+                              owner = 'hdfs',
+                              group = 'hadoop',
+                              mode = 0755
+                              )
+    self.assertResourceCalled('Directory', '/var/run/hadoop/hdfs',
+                              owner = 'hdfs',
+                              recursive = True,
+                              )
+    self.assertResourceCalled('Directory', '/var/log/hadoop/hdfs',
+                              owner = 'hdfs',
+                              recursive = True,
+                              )
+    self.assertResourceCalled('File', '/var/run/hadoop/hdfs/hadoop-hdfs-namenode.pid',
+        action = ['delete'],
+        not_if = "ambari-sudo.sh [RMF_ENV_PLACEHOLDER] -H -E test -f /var/run/hadoop/hdfs/hadoop-hdfs-namenode.pid && ambari-sudo.sh [RMF_ENV_PLACEHOLDER] -H -E pgrep -F /var/run/hadoop/hdfs/hadoop-hdfs-namenode.pid",
+    )
+    self.assertResourceCalled('Execute', "ambari-sudo.sh su hdfs -l -s /bin/bash -c '[RMF_EXPORT_PLACEHOLDER]ulimit -c unlimited ;  /usr/lib/hadoop/sbin/hadoop-daemon.sh --config /etc/hadoop/conf start namenode'",
+        environment = {'HADOOP_LIBEXEC_DIR': '/usr/lib/hadoop/libexec'},
+        not_if = "ambari-sudo.sh [RMF_ENV_PLACEHOLDER] -H -E test -f /var/run/hadoop/hdfs/hadoop-hdfs-namenode.pid && ambari-sudo.sh [RMF_ENV_PLACEHOLDER] -H -E pgrep -F /var/run/hadoop/hdfs/hadoop-hdfs-namenode.pid",
+    )
+    self.assertResourceCalled('Execute', "hdfs dfsadmin -fs hdfs://ns1 -safemode get | grep 'Safe mode is OFF'",
+        tries=180,
+        try_sleep=10,
+        user="hdfs",
+        logoutput=True
+    )
+    self.assertResourceCalled('HdfsResource', '/tmp',
+        security_enabled = False,
+        only_if = "ambari-sudo.sh su hdfs -l -s /bin/bash -c 'export  PATH=/bin:/usr/bin ; hdfs --config /etc/hadoop/conf haadmin -getServiceState nn1 | grep active'",
+        keytab = UnknownConfigurationMock(),
+        hadoop_bin_dir = '/usr/bin',
+        default_fs = 'hdfs://ns1',
+        hdfs_site = self.getConfig()['configurations']['hdfs-site'],
+        kinit_path_local = '/usr/bin/kinit',
+        principal_name = None,
+        user = 'hdfs',
+        owner = 'hdfs',
+        hadoop_conf_dir = '/etc/hadoop/conf',
+        type = 'directory',
+        action = ['create_on_execute'],
+        mode = 0777,
+    )
+    self.assertResourceCalled('HdfsResource', '/user/ambari-qa',
+        security_enabled = False,
+        only_if = "ambari-sudo.sh su hdfs -l -s /bin/bash -c 'export  PATH=/bin:/usr/bin ; hdfs --config /etc/hadoop/conf haadmin -getServiceState nn1 | grep active'",
+        keytab = UnknownConfigurationMock(),
+        hadoop_bin_dir = '/usr/bin',
+        default_fs = 'hdfs://ns1',
+        hdfs_site = self.getConfig()['configurations']['hdfs-site'],
+        kinit_path_local = '/usr/bin/kinit',
+        principal_name = None,
+        user = 'hdfs',
+        owner = 'ambari-qa',
+        hadoop_conf_dir = '/etc/hadoop/conf',
+        type = 'directory',
+        action = ['create_on_execute'],
+        mode = 0770,
+    )
+    self.assertResourceCalled('HdfsResource', None,
+        security_enabled = False,
+        only_if = "ambari-sudo.sh su hdfs -l -s /bin/bash -c 'export  PATH=/bin:/usr/bin ; hdfs --config /etc/hadoop/conf haadmin -getServiceState nn1 | grep active'",
+        keytab = UnknownConfigurationMock(),
+        hadoop_bin_dir = '/usr/bin',
+        default_fs = 'hdfs://ns1',
+        hdfs_site = self.getConfig()['configurations']['hdfs-site'],
+        kinit_path_local = '/usr/bin/kinit',
+        principal_name = None,
+        user = 'hdfs',
+        action = ['execute'],
+        hadoop_conf_dir = '/etc/hadoop/conf',
+    )
+    self.assertNoMoreResources()
+    self.assertTrue(call_mocks.called)
+    self.assertEqual(5, call_mocks.call_count)
+    calls = [
+        call("ambari-sudo.sh su hdfs -l -s /bin/bash -c 'export  PATH=/bin:/usr/bin ; hdfs --config /etc/hadoop/conf haadmin -getServiceState nn1 | grep active'"),
+        call("ambari-sudo.sh su hdfs -l -s /bin/bash -c 'export  PATH=/bin:/usr/bin ; hdfs --config /etc/hadoop/conf haadmin -getServiceState nn2 | grep active'"),
+        call("ambari-sudo.sh su hdfs -l -s /bin/bash -c 'export  PATH=/bin:/usr/bin ; hdfs --config /etc/hadoop/conf haadmin -getServiceState nn1 | grep active'"),
+        call("ambari-sudo.sh su hdfs -l -s /bin/bash -c 'export  PATH=/bin:/usr/bin ; hdfs --config /etc/hadoop/conf haadmin -getServiceState nn2 | grep active'"),
+        call("ambari-sudo.sh su hdfs -l -s /bin/bash -c 'export  PATH=/bin:/usr/bin ; hdfs --config /etc/hadoop/conf haadmin -getServiceState nn1 | grep active'")]
+    call_mocks.assert_has_calls(calls)
+
   def test_start_ha_secured(self):
     self.executeScript(self.COMMON_SERVICES_PACKAGE_DIR + "/scripts/namenode.py",
                        classname = "NameNode",
@@ -762,7 +866,7 @@ class TestNamenode(RMFTestCase):
     self.assertEqual(2, call_mocks.call_count)
     calls = [
       call('hdfs namenode -bootstrapStandby -nonInteractive', logoutput=False, user=u'hdfs'),
-      call("ambari-sudo.sh su hdfs -l -s /bin/bash -c 'export  PATH=/bin:/usr/bin ; hdfs --config /etc/hadoop/conf haadmin -getServiceState nn2 | grep active'", logoutput=True)]
+      call("ambari-sudo.sh su hdfs -l -s /bin/bash -c 'export  PATH=/bin:/usr/bin ; hdfs --config /etc/hadoop/conf haadmin -getServiceState nn2 | grep active'")]
     call_mocks.assert_has_calls(calls, any_order=False)
 
   # tests namenode start command when NameNode HA is enabled, and
@@ -868,7 +972,7 @@ class TestNamenode(RMFTestCase):
     self.assertTrue(call_mocks.called)
     self.assertEqual(3, call_mocks.call_count)
     calls = [
-      call("ambari-sudo.sh su hdfs -l -s /bin/bash -c 'export  PATH=/bin:/usr/bin ; hdfs --config /etc/hadoop/conf haadmin -getServiceState nn2 | grep active'", logoutput=True),
+      call("ambari-sudo.sh su hdfs -l -s /bin/bash -c 'export  PATH=/bin:/usr/bin ; hdfs --config /etc/hadoop/conf haadmin -getServiceState nn2 | grep active'"),
       call('hdfs namenode -bootstrapStandby -nonInteractive -force', logoutput=False, user=u'hdfs'),
       call('hdfs namenode -bootstrapStandby -nonInteractive -force', logoutput=False, user=u'hdfs')]
     call_mocks.assert_has_calls(calls, any_order=True)