You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ambari.apache.org by al...@apache.org on 2015/08/06 00:35:16 UTC

ambari git commit: AMBARI-11743. Ambari forces NameNode to leave safemode when NameNode starts (alejandro)

Repository: ambari
Updated Branches:
  refs/heads/branch-2.0.maint fc7f513a3 -> 4342a0602


AMBARI-11743. Ambari forces NameNode to leave safemode when NameNode starts (alejandro)


Project: http://git-wip-us.apache.org/repos/asf/ambari/repo
Commit: http://git-wip-us.apache.org/repos/asf/ambari/commit/4342a060
Tree: http://git-wip-us.apache.org/repos/asf/ambari/tree/4342a060
Diff: http://git-wip-us.apache.org/repos/asf/ambari/diff/4342a060

Branch: refs/heads/branch-2.0.maint
Commit: 4342a060282c720007707fdc6956f0cf2d263463
Parents: fc7f513
Author: Alejandro Fernandez <af...@hortonworks.com>
Authored: Wed Aug 5 15:31:06 2015 -0700
Committer: Alejandro Fernandez <af...@hortonworks.com>
Committed: Wed Aug 5 15:31:06 2015 -0700

----------------------------------------------------------------------
 .../2.1.0.2.0/package/scripts/hdfs_namenode.py  | 86 +++++++++++++-------
 .../HDFS/2.1.0.2.0/package/scripts/params.py    | 11 +++
 2 files changed, 66 insertions(+), 31 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/ambari/blob/4342a060/ambari-server/src/main/resources/common-services/HDFS/2.1.0.2.0/package/scripts/hdfs_namenode.py
----------------------------------------------------------------------
diff --git a/ambari-server/src/main/resources/common-services/HDFS/2.1.0.2.0/package/scripts/hdfs_namenode.py b/ambari-server/src/main/resources/common-services/HDFS/2.1.0.2.0/package/scripts/hdfs_namenode.py
index 615dd54..b2c5f9d 100644
--- a/ambari-server/src/main/resources/common-services/HDFS/2.1.0.2.0/package/scripts/hdfs_namenode.py
+++ b/ambari-server/src/main/resources/common-services/HDFS/2.1.0.2.0/package/scripts/hdfs_namenode.py
@@ -19,8 +19,15 @@ limitations under the License.
 import os.path
 
 from resource_management import *
+from resource_management.core import shell
+from resource_management.core.source import Template
+from resource_management.core.resources.system import File, Execute, Directory
+from resource_management.libraries.functions.format import format
+from resource_management.libraries.resources.execute_hadoop import ExecuteHadoop
+
+from resource_management.core.shell import as_user
+from resource_management.core.exceptions import Fail
 from resource_management.core.logger import Logger
-from resource_management.core.exceptions import ComponentIsNotRunning
 
 from utils import service, safe_zkfc_op
 
@@ -75,44 +82,61 @@ def namenode(action=None, do_format=True, rolling_restart=False, env=None):
       create_log_dir=True
     )
 
-
     if params.security_enabled:
       Execute(format("{kinit_path_local} -kt {hdfs_user_keytab} {hdfs_principal_name}"),
               user = params.hdfs_user)
 
+    # Where does namenode_address come from?
+    is_namenode_safe_mode_off = format("hdfs dfsadmin -fs {namenode_address} -safemode get | grep 'Safe mode is OFF'")
+    if params.dfs_ha_enabled:
+      is_active_namenode_cmd = as_user(format("hdfs --config {hadoop_conf_dir} haadmin -getServiceState {namenode_id} | grep active"), params.hdfs_user, env={'PATH':params.hadoop_bin_dir})
+    else:
+      is_active_namenode_cmd = None
+
+    # During normal operations, if HA is enabled and it is in standby, then no need to check safemode staus.
+    # During Rolling Upgrade, both namenodes must eventually leave safemode, and Ambari can wait for this.
+
+    # ___Scenario_________|_Expected safemode state___|_Wait for safemode OFF____|
+    # 1 (HA and active)   | ON -> OFF                 | Yes                      |
+    # 2 (HA and standby)  | no change (yes during RU) | no check (yes during RU) |
+    # 3 (no-HA)           | ON -> OFF                 | Yes                      |
+    check_for_safemode_off = False
+    msg = ""
     if params.dfs_ha_enabled:
-      dfs_check_nn_status_cmd = as_user(format("hdfs --config {hadoop_conf_dir} haadmin -getServiceState {namenode_id} | grep active"), params.hdfs_user, env={'PATH':params.hadoop_bin_dir})
+      code, out = shell.call(is_active_namenode_cmd, logoutput=True) # If active NN, code will be 0
+      if code == 0: # active
+        check_for_safemode_off = True
+        msg = "Must wait to leave safemode since High Availability is enabled and this is the Active NameNode."
+      elif rolling_restart:
+        check_for_safemode_off = True
+        msg = "Must wait to leave safemode since High Availability is enabled during a Rolling Upgrade"
     else:
-      dfs_check_nn_status_cmd = None
-
-    namenode_safe_mode_off = format("hadoop dfsadmin -safemode get | grep 'Safe mode is OFF'")
-
-    # If HA is enabled and it is in standby, then stay in safemode, otherwise, leave safemode.
-    leave_safe_mode = True
-    if dfs_check_nn_status_cmd is not None:
-      code, out = shell.call(dfs_check_nn_status_cmd) # If active NN, code will be 0
-      if code != 0:
-        leave_safe_mode = False
-
-    if leave_safe_mode:
-      # First check if Namenode is not in 'safemode OFF' (equivalent to safemode ON), if so, then leave it
-      code, out = shell.call(namenode_safe_mode_off)
-      if code != 0:
-        leave_safe_mode_cmd = format("hdfs --config {hadoop_conf_dir} dfsadmin -safemode leave")
-        Execute(leave_safe_mode_cmd,
+      msg = "Must wait to leave safemode since High Availability is not enabled."
+      check_for_safemode_off = True
+
+    if not msg:
+      msg = "Will remain in the current safemode state."
+    Logger.info(msg)
+
+    if check_for_safemode_off:
+      # First check if Namenode is not in 'safemode OFF' (equivalent to safemode ON). If safemode is OFF, no change.
+      # If safemode is ON, first wait for NameNode to leave safemode on its own (if that doesn't happen within 30 seconds, then
+      # force NameNode to leave safemode).
+      Logger.info("Checking the NameNode safemode status since may need to transition from ON to OFF.")
+
+      try:
+        # Wait up to 30 mins
+        Execute(is_namenode_safe_mode_off,
+                tries=180,
+                try_sleep=10,
                 user=params.hdfs_user,
-                path=[params.hadoop_bin_dir],
+                logoutput=True
         )
+      except Fail:
+        Logger.error("NameNode is still in safemode, please be careful with commands that need safemode OFF.")
 
-    # Verify if Namenode should be in safemode OFF
-    Execute(namenode_safe_mode_off,
-            tries=40,
-            try_sleep=10,
-            path=[params.hadoop_bin_dir],
-            user=params.hdfs_user,
-            only_if=dfs_check_nn_status_cmd #skip when HA not active
-    )
-    create_hdfs_directories(dfs_check_nn_status_cmd)
+    # Always run this on non-HA, or active NameNode during HA.
+    create_hdfs_directories(is_active_namenode_cmd)
 
   if action == "stop":
     service(
@@ -276,7 +300,7 @@ def decommission():
       # need to execute each command scoped to a particular namenode
       nn_refresh_cmd = format('dfsadmin -fs hdfs://{namenode_rpc} -refreshNodes')
     else:
-      nn_refresh_cmd = format('dfsadmin -refreshNodes')
+      nn_refresh_cmd = format('dfsadmin -fs {namenode_address} -refreshNodes')
     ExecuteHadoop(nn_refresh_cmd,
                   user=hdfs_user,
                   conf_dir=conf_dir,

http://git-wip-us.apache.org/repos/asf/ambari/blob/4342a060/ambari-server/src/main/resources/common-services/HDFS/2.1.0.2.0/package/scripts/params.py
----------------------------------------------------------------------
diff --git a/ambari-server/src/main/resources/common-services/HDFS/2.1.0.2.0/package/scripts/params.py b/ambari-server/src/main/resources/common-services/HDFS/2.1.0.2.0/package/scripts/params.py
index 6fb4fb2..1a7b765 100644
--- a/ambari-server/src/main/resources/common-services/HDFS/2.1.0.2.0/package/scripts/params.py
+++ b/ambari-server/src/main/resources/common-services/HDFS/2.1.0.2.0/package/scripts/params.py
@@ -19,6 +19,7 @@ limitations under the License.
 
 from resource_management.libraries.functions.version import format_hdp_stack_version, compare_versions
 from resource_management.libraries.functions.default import default
+from resource_management.libraries.resources import HdfsDirectory
 from resource_management import *
 import status_params
 import utils
@@ -186,6 +187,14 @@ for dn_dir in dfs_name_dirs:
  tmp_mark_dir = format("{dn_dir}{hdfs_namenode_formatted_mark_suffix}")
  namenode_formatted_mark_dirs.append(tmp_mark_dir)
 
+# Use the namenode RPC address if configured, otherwise, fallback to the default file system
+namenode_address = None
+if 'dfs.namenode.rpc-address' in config['configurations']['hdfs-site']:
+  namenode_rpcaddress = config['configurations']['hdfs-site']['dfs.namenode.rpc-address']
+  namenode_address = format("hdfs://{namenode_rpcaddress}")
+else:
+  namenode_address = config['configurations']['core-site']['fs.defaultFS']
+
 fs_checkpoint_dirs = config['configurations']['hdfs-site']['dfs.namenode.checkpoint.dir'].split(',')
 
 dfs_data_dir = config['configurations']['hdfs-site']['dfs.datanode.data.dir']
@@ -218,6 +227,8 @@ if dfs_ha_enabled:
     if hostname in nn_host:
       namenode_id = nn_id
       namenode_rpc = nn_host
+  # With HA enabled namenode_address is recomputed
+  namenode_address = format('hdfs://{dfs_ha_nameservices}')
 
 if dfs_http_policy is not None and dfs_http_policy.upper() == "HTTPS_ONLY":
   https_only = True