You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ambari.apache.org by ao...@apache.org on 2015/11/13 12:40:19 UTC

ambari git commit: Revert "AMBARI-13856. Sometimes when HA is enabled NameNode does not wait to leave safe mode on start (aonishuk)"

Repository: ambari
Updated Branches:
  refs/heads/trunk 99d9c26dc -> 59ee076e6


Revert "AMBARI-13856. Sometimes when HA is enabled NameNode does not wait to leave safe mode on start (aonishuk)"

This reverts commit 696e58bd869574f6c9b013360d2f32d5a9b883a4.


Project: http://git-wip-us.apache.org/repos/asf/ambari/repo
Commit: http://git-wip-us.apache.org/repos/asf/ambari/commit/59ee076e
Tree: http://git-wip-us.apache.org/repos/asf/ambari/tree/59ee076e
Diff: http://git-wip-us.apache.org/repos/asf/ambari/diff/59ee076e

Branch: refs/heads/trunk
Commit: 59ee076e6edba88a714343f6fccca8d46c13288b
Parents: 99d9c26
Author: Andrew Onishuk <ao...@hortonworks.com>
Authored: Fri Nov 13 13:40:09 2015 +0200
Committer: Andrew Onishuk <ao...@hortonworks.com>
Committed: Fri Nov 13 13:40:09 2015 +0200

----------------------------------------------------------------------
 .../2.1.0.2.0/package/scripts/hdfs_namenode.py  | 43 ++++++++++++--------
 .../python/stacks/2.0.6/HDFS/test_namenode.py   | 17 ++++----
 2 files changed, 35 insertions(+), 25 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/ambari/blob/59ee076e/ambari-server/src/main/resources/common-services/HDFS/2.1.0.2.0/package/scripts/hdfs_namenode.py
----------------------------------------------------------------------
diff --git a/ambari-server/src/main/resources/common-services/HDFS/2.1.0.2.0/package/scripts/hdfs_namenode.py b/ambari-server/src/main/resources/common-services/HDFS/2.1.0.2.0/package/scripts/hdfs_namenode.py
index d6a0a41..f944b8d 100644
--- a/ambari-server/src/main/resources/common-services/HDFS/2.1.0.2.0/package/scripts/hdfs_namenode.py
+++ b/ambari-server/src/main/resources/common-services/HDFS/2.1.0.2.0/package/scripts/hdfs_namenode.py
@@ -115,11 +115,10 @@ def namenode(action=None, hdfs_binary=None, do_format=True, upgrade_type=None, e
       Execute(format("{kinit_path_local} -kt {hdfs_user_keytab} {hdfs_principal_name}"),
               user = params.hdfs_user)
 
+    is_namenode_safe_mode_off = format("{hdfs_binary} dfsadmin -fs {namenode_address} -safemode get | grep 'Safe mode is OFF'")
     if params.dfs_ha_enabled:
-      is_namenode_safe_mode_off = format("{hdfs_binary} dfsadmin -fs hdfs://{namenode_rpc} -safemode get | grep 'Safe mode is OFF'")
       is_active_namenode_cmd = as_user(format("{hdfs_binary} --config {hadoop_conf_dir} haadmin -getServiceState {namenode_id} | grep active"), params.hdfs_user, env={'PATH':params.hadoop_bin_dir})
     else:
-      is_namenode_safe_mode_off = format("{hdfs_binary} dfsadmin -fs {namenode_address} -safemode get | grep 'Safe mode is OFF'")
       is_active_namenode_cmd = True
     
     # During NonRolling Upgrade, both NameNodes are initially down,
@@ -130,21 +129,30 @@ def namenode(action=None, hdfs_binary=None, do_format=True, upgrade_type=None, e
     # ___Scenario___________|_Expected safemode state__|_Wait for safemode OFF____|
     # no-HA                 | ON -> OFF                | Yes                      |
     # HA and active         | ON -> OFF                | Yes                      |
-    # HA and standby        | ON -> OFF                | Yes                      |
+    # HA and standby        | no change                | no check                 |
     # RU with HA on active  | ON -> OFF                | Yes                      |
     # RU with HA on standby | ON -> OFF                | Yes                      |
     # EU with HA on active  | no change                | no check                 |
     # EU with HA on standby | no change                | no check                 |
     # EU non-HA             | no change                | no check                 |
 
+    check_for_safemode_off = False
     msg = ""
     if params.dfs_ha_enabled:
       if upgrade_type is not None:
+        check_for_safemode_off = True
         msg = "Must wait to leave safemode since High Availability is enabled during a Stack Upgrade"
       else:
-        msg = "Must wait to leave safemode since High Availability is enabled."
+        # During normal operations, the NameNode is expected to be up.
+        code, out = shell.call(is_active_namenode_cmd, logoutput=True) # If active NN, code will be 0
+        if code == 0: # active
+          check_for_safemode_off = True
+          msg = "Must wait to leave safemode since High Availability is enabled and this is the Active NameNode."
+        else:
+          msg = "Will remain in the current safemode state."
     else:
       msg = "Must wait to leave safemode since High Availability is not enabled."
+      check_for_safemode_off = True
 
     Logger.info(msg)
 
@@ -153,19 +161,20 @@ def namenode(action=None, hdfs_binary=None, do_format=True, upgrade_type=None, e
     if upgrade_type == "nonrolling":
       stay_in_safe_mode = True
 
-    Logger.info("Stay in safe mode: {0}".format(stay_in_safe_mode))
-    if not stay_in_safe_mode:
-      Logger.info("Wait to leafe safemode since must transition from ON to OFF.")
-      try:
-        # Wait up to 30 mins
-        Execute(is_namenode_safe_mode_off,
-                tries=180,
-                try_sleep=10,
-                user=params.hdfs_user,
-                logoutput=True
-        )
-      except Fail:
-        Logger.error("NameNode is still in safemode, please be careful with commands that need safemode OFF.")
+    if check_for_safemode_off:
+      Logger.info("Stay in safe mode: {0}".format(stay_in_safe_mode))
+      if not stay_in_safe_mode:
+        Logger.info("Wait to leafe safemode since must transition from ON to OFF.")
+        try:
+          # Wait up to 30 mins
+          Execute(is_namenode_safe_mode_off,
+                  tries=180,
+                  try_sleep=10,
+                  user=params.hdfs_user,
+                  logoutput=True
+          )
+        except Fail:
+          Logger.error("NameNode is still in safemode, please be careful with commands that need safemode OFF.")
 
     # Always run this on non-HA, or active NameNode during HA.
     create_hdfs_directories(is_active_namenode_cmd)

http://git-wip-us.apache.org/repos/asf/ambari/blob/59ee076e/ambari-server/src/test/python/stacks/2.0.6/HDFS/test_namenode.py
----------------------------------------------------------------------
diff --git a/ambari-server/src/test/python/stacks/2.0.6/HDFS/test_namenode.py b/ambari-server/src/test/python/stacks/2.0.6/HDFS/test_namenode.py
index 1ec8c8b..353d91f 100644
--- a/ambari-server/src/test/python/stacks/2.0.6/HDFS/test_namenode.py
+++ b/ambari-server/src/test/python/stacks/2.0.6/HDFS/test_namenode.py
@@ -425,7 +425,7 @@ class TestNamenode(RMFTestCase):
         environment = {'HADOOP_LIBEXEC_DIR': '/usr/lib/hadoop/libexec'},
         not_if = "ambari-sudo.sh [RMF_ENV_PLACEHOLDER] -H -E test -f /var/run/hadoop/hdfs/hadoop-hdfs-namenode.pid && ambari-sudo.sh [RMF_ENV_PLACEHOLDER] -H -E pgrep -F /var/run/hadoop/hdfs/hadoop-hdfs-namenode.pid",
     )
-    self.assertResourceCalled('Execute', "hdfs dfsadmin -fs hdfs://c6401.ambari.apache.org:8020 -safemode get | grep 'Safe mode is OFF'",
+    self.assertResourceCalled('Execute', "hdfs dfsadmin -fs hdfs://ns1 -safemode get | grep 'Safe mode is OFF'",
         tries=180,
         try_sleep=10,
         user="hdfs",
@@ -519,7 +519,7 @@ class TestNamenode(RMFTestCase):
     self.assertResourceCalled('Execute', '/usr/bin/kinit -kt /etc/security/keytabs/hdfs.headless.keytab hdfs',
         user = 'hdfs',
     )
-    self.assertResourceCalled('Execute', "hdfs dfsadmin -fs hdfs://c6401.ambari.apache.org:8020 -safemode get | grep 'Safe mode is OFF'",
+    self.assertResourceCalled('Execute', "hdfs dfsadmin -fs hdfs://ns1 -safemode get | grep 'Safe mode is OFF'",
         tries=180,
         try_sleep=10,
         user="hdfs",
@@ -622,7 +622,7 @@ class TestNamenode(RMFTestCase):
         environment = {'HADOOP_LIBEXEC_DIR': '/usr/lib/hadoop/libexec'},
         not_if = "ambari-sudo.sh [RMF_ENV_PLACEHOLDER] -H -E test -f /var/run/hadoop/hdfs/hadoop-hdfs-namenode.pid && ambari-sudo.sh [RMF_ENV_PLACEHOLDER] -H -E pgrep -F /var/run/hadoop/hdfs/hadoop-hdfs-namenode.pid",
     )
-    self.assertResourceCalled('Execute', "hdfs dfsadmin -fs hdfs://c6401.ambari.apache.org:8020 -safemode get | grep 'Safe mode is OFF'",
+    self.assertResourceCalled('Execute', "hdfs dfsadmin -fs hdfs://ns1 -safemode get | grep 'Safe mode is OFF'",
         tries=180,
         try_sleep=10,
         user="hdfs",
@@ -724,7 +724,7 @@ class TestNamenode(RMFTestCase):
         environment = {'HADOOP_LIBEXEC_DIR': '/usr/lib/hadoop/libexec'},
         not_if = "ambari-sudo.sh [RMF_ENV_PLACEHOLDER] -H -E test -f /var/run/hadoop/hdfs/hadoop-hdfs-namenode.pid && ambari-sudo.sh [RMF_ENV_PLACEHOLDER] -H -E pgrep -F /var/run/hadoop/hdfs/hadoop-hdfs-namenode.pid",
     )
-    self.assertResourceCalled('Execute', "hdfs dfsadmin -fs hdfs://c6402.ambari.apache.org:8020 -safemode get | grep 'Safe mode is OFF'",
+    self.assertResourceCalled('Execute', "hdfs dfsadmin -fs hdfs://ns1 -safemode get | grep 'Safe mode is OFF'",
         tries=180,
         try_sleep=10,
         user="hdfs",
@@ -780,10 +780,10 @@ class TestNamenode(RMFTestCase):
     )
     self.assertNoMoreResources()
     self.assertTrue(call_mocks.called)
-    self.assertEqual(1, call_mocks.call_count)
+    self.assertEqual(2, call_mocks.call_count)
     calls = [
       call('hdfs namenode -bootstrapStandby -nonInteractive', logoutput=False, user=u'hdfs'),
-    ]
+      call("ambari-sudo.sh su hdfs -l -s /bin/bash -c 'export  PATH=/bin:/usr/bin ; hdfs --config /etc/hadoop/conf haadmin -getServiceState nn2 | grep active'", logoutput=True)]
     call_mocks.assert_has_calls(calls, any_order=False)
 
   # tests namenode start command when NameNode HA is enabled, and
@@ -834,7 +834,7 @@ class TestNamenode(RMFTestCase):
                               environment = {'HADOOP_LIBEXEC_DIR': '/usr/lib/hadoop/libexec'},
                               not_if = "ambari-sudo.sh [RMF_ENV_PLACEHOLDER] -H -E test -f /var/run/hadoop/hdfs/hadoop-hdfs-namenode.pid && ambari-sudo.sh [RMF_ENV_PLACEHOLDER] -H -E pgrep -F /var/run/hadoop/hdfs/hadoop-hdfs-namenode.pid",
                               )
-    self.assertResourceCalled('Execute', "hdfs dfsadmin -fs hdfs://c6402.ambari.apache.org:8020 -safemode get | grep 'Safe mode is OFF'",
+    self.assertResourceCalled('Execute', "hdfs dfsadmin -fs hdfs://ns1 -safemode get | grep 'Safe mode is OFF'",
                               tries=180,
                               try_sleep=10,
                               user="hdfs",
@@ -890,8 +890,9 @@ class TestNamenode(RMFTestCase):
                               )
     self.assertNoMoreResources()
     self.assertTrue(call_mocks.called)
-    self.assertEqual(2, call_mocks.call_count)
+    self.assertEqual(3, call_mocks.call_count)
     calls = [
+      call("ambari-sudo.sh su hdfs -l -s /bin/bash -c 'export  PATH=/bin:/usr/bin ; hdfs --config /etc/hadoop/conf haadmin -getServiceState nn2 | grep active'", logoutput=True),
       call('hdfs namenode -bootstrapStandby -nonInteractive -force', logoutput=False, user=u'hdfs'),
       call('hdfs namenode -bootstrapStandby -nonInteractive -force', logoutput=False, user=u'hdfs')]
     call_mocks.assert_has_calls(calls, any_order=True)