You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ambari.apache.org by al...@apache.org on 2015/08/06 00:35:16 UTC
ambari git commit: AMBARI-11743. Ambari forces NameNode to leave
safemode when NameNode starts (alejandro)
Repository: ambari
Updated Branches:
refs/heads/branch-2.0.maint fc7f513a3 -> 4342a0602
AMBARI-11743. Ambari forces NameNode to leave safemode when NameNode starts (alejandro)
Project: http://git-wip-us.apache.org/repos/asf/ambari/repo
Commit: http://git-wip-us.apache.org/repos/asf/ambari/commit/4342a060
Tree: http://git-wip-us.apache.org/repos/asf/ambari/tree/4342a060
Diff: http://git-wip-us.apache.org/repos/asf/ambari/diff/4342a060
Branch: refs/heads/branch-2.0.maint
Commit: 4342a060282c720007707fdc6956f0cf2d263463
Parents: fc7f513
Author: Alejandro Fernandez <af...@hortonworks.com>
Authored: Wed Aug 5 15:31:06 2015 -0700
Committer: Alejandro Fernandez <af...@hortonworks.com>
Committed: Wed Aug 5 15:31:06 2015 -0700
----------------------------------------------------------------------
.../2.1.0.2.0/package/scripts/hdfs_namenode.py | 86 +++++++++++++-------
.../HDFS/2.1.0.2.0/package/scripts/params.py | 11 +++
2 files changed, 66 insertions(+), 31 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/ambari/blob/4342a060/ambari-server/src/main/resources/common-services/HDFS/2.1.0.2.0/package/scripts/hdfs_namenode.py
----------------------------------------------------------------------
diff --git a/ambari-server/src/main/resources/common-services/HDFS/2.1.0.2.0/package/scripts/hdfs_namenode.py b/ambari-server/src/main/resources/common-services/HDFS/2.1.0.2.0/package/scripts/hdfs_namenode.py
index 615dd54..b2c5f9d 100644
--- a/ambari-server/src/main/resources/common-services/HDFS/2.1.0.2.0/package/scripts/hdfs_namenode.py
+++ b/ambari-server/src/main/resources/common-services/HDFS/2.1.0.2.0/package/scripts/hdfs_namenode.py
@@ -19,8 +19,15 @@ limitations under the License.
import os.path
from resource_management import *
+from resource_management.core import shell
+from resource_management.core.source import Template
+from resource_management.core.resources.system import File, Execute, Directory
+from resource_management.libraries.functions.format import format
+from resource_management.libraries.resources.execute_hadoop import ExecuteHadoop
+
+from resource_management.core.shell import as_user
+from resource_management.core.exceptions import Fail
from resource_management.core.logger import Logger
-from resource_management.core.exceptions import ComponentIsNotRunning
from utils import service, safe_zkfc_op
@@ -75,44 +82,61 @@ def namenode(action=None, do_format=True, rolling_restart=False, env=None):
create_log_dir=True
)
-
if params.security_enabled:
Execute(format("{kinit_path_local} -kt {hdfs_user_keytab} {hdfs_principal_name}"),
user = params.hdfs_user)
+ # Where does namenode_address come from?
+ is_namenode_safe_mode_off = format("hdfs dfsadmin -fs {namenode_address} -safemode get | grep 'Safe mode is OFF'")
+ if params.dfs_ha_enabled:
+ is_active_namenode_cmd = as_user(format("hdfs --config {hadoop_conf_dir} haadmin -getServiceState {namenode_id} | grep active"), params.hdfs_user, env={'PATH':params.hadoop_bin_dir})
+ else:
+ is_active_namenode_cmd = None
+
+ # During normal operations, if HA is enabled and it is in standby, then no need to check safemode staus.
+ # During Rolling Upgrade, both namenodes must eventually leave safemode, and Ambari can wait for this.
+
+ # ___Scenario_________|_Expected safemode state___|_Wait for safemode OFF____|
+ # 1 (HA and active) | ON -> OFF | Yes |
+ # 2 (HA and standby) | no change (yes during RU) | no check (yes during RU) |
+ # 3 (no-HA) | ON -> OFF | Yes |
+ check_for_safemode_off = False
+ msg = ""
if params.dfs_ha_enabled:
- dfs_check_nn_status_cmd = as_user(format("hdfs --config {hadoop_conf_dir} haadmin -getServiceState {namenode_id} | grep active"), params.hdfs_user, env={'PATH':params.hadoop_bin_dir})
+ code, out = shell.call(is_active_namenode_cmd, logoutput=True) # If active NN, code will be 0
+ if code == 0: # active
+ check_for_safemode_off = True
+ msg = "Must wait to leave safemode since High Availability is enabled and this is the Active NameNode."
+ elif rolling_restart:
+ check_for_safemode_off = True
+ msg = "Must wait to leave safemode since High Availability is enabled during a Rolling Upgrade"
else:
- dfs_check_nn_status_cmd = None
-
- namenode_safe_mode_off = format("hadoop dfsadmin -safemode get | grep 'Safe mode is OFF'")
-
- # If HA is enabled and it is in standby, then stay in safemode, otherwise, leave safemode.
- leave_safe_mode = True
- if dfs_check_nn_status_cmd is not None:
- code, out = shell.call(dfs_check_nn_status_cmd) # If active NN, code will be 0
- if code != 0:
- leave_safe_mode = False
-
- if leave_safe_mode:
- # First check if Namenode is not in 'safemode OFF' (equivalent to safemode ON), if so, then leave it
- code, out = shell.call(namenode_safe_mode_off)
- if code != 0:
- leave_safe_mode_cmd = format("hdfs --config {hadoop_conf_dir} dfsadmin -safemode leave")
- Execute(leave_safe_mode_cmd,
+ msg = "Must wait to leave safemode since High Availability is not enabled."
+ check_for_safemode_off = True
+
+ if not msg:
+ msg = "Will remain in the current safemode state."
+ Logger.info(msg)
+
+ if check_for_safemode_off:
+ # First check if Namenode is not in 'safemode OFF' (equivalent to safemode ON). If safemode is OFF, no change.
+ # If safemode is ON, first wait for NameNode to leave safemode on its own (if that doesn't happen within 30 seconds, then
+ # force NameNode to leave safemode).
+ Logger.info("Checking the NameNode safemode status since may need to transition from ON to OFF.")
+
+ try:
+ # Wait up to 30 mins
+ Execute(is_namenode_safe_mode_off,
+ tries=180,
+ try_sleep=10,
user=params.hdfs_user,
- path=[params.hadoop_bin_dir],
+ logoutput=True
)
+ except Fail:
+ Logger.error("NameNode is still in safemode, please be careful with commands that need safemode OFF.")
- # Verify if Namenode should be in safemode OFF
- Execute(namenode_safe_mode_off,
- tries=40,
- try_sleep=10,
- path=[params.hadoop_bin_dir],
- user=params.hdfs_user,
- only_if=dfs_check_nn_status_cmd #skip when HA not active
- )
- create_hdfs_directories(dfs_check_nn_status_cmd)
+ # Always run this on non-HA, or active NameNode during HA.
+ create_hdfs_directories(is_active_namenode_cmd)
if action == "stop":
service(
@@ -276,7 +300,7 @@ def decommission():
# need to execute each command scoped to a particular namenode
nn_refresh_cmd = format('dfsadmin -fs hdfs://{namenode_rpc} -refreshNodes')
else:
- nn_refresh_cmd = format('dfsadmin -refreshNodes')
+ nn_refresh_cmd = format('dfsadmin -fs {namenode_address} -refreshNodes')
ExecuteHadoop(nn_refresh_cmd,
user=hdfs_user,
conf_dir=conf_dir,
http://git-wip-us.apache.org/repos/asf/ambari/blob/4342a060/ambari-server/src/main/resources/common-services/HDFS/2.1.0.2.0/package/scripts/params.py
----------------------------------------------------------------------
diff --git a/ambari-server/src/main/resources/common-services/HDFS/2.1.0.2.0/package/scripts/params.py b/ambari-server/src/main/resources/common-services/HDFS/2.1.0.2.0/package/scripts/params.py
index 6fb4fb2..1a7b765 100644
--- a/ambari-server/src/main/resources/common-services/HDFS/2.1.0.2.0/package/scripts/params.py
+++ b/ambari-server/src/main/resources/common-services/HDFS/2.1.0.2.0/package/scripts/params.py
@@ -19,6 +19,7 @@ limitations under the License.
from resource_management.libraries.functions.version import format_hdp_stack_version, compare_versions
from resource_management.libraries.functions.default import default
+from resource_management.libraries.resources import HdfsDirectory
from resource_management import *
import status_params
import utils
@@ -186,6 +187,14 @@ for dn_dir in dfs_name_dirs:
tmp_mark_dir = format("{dn_dir}{hdfs_namenode_formatted_mark_suffix}")
namenode_formatted_mark_dirs.append(tmp_mark_dir)
+# Use the namenode RPC address if configured, otherwise, fallback to the default file system
+namenode_address = None
+if 'dfs.namenode.rpc-address' in config['configurations']['hdfs-site']:
+ namenode_rpcaddress = config['configurations']['hdfs-site']['dfs.namenode.rpc-address']
+ namenode_address = format("hdfs://{namenode_rpcaddress}")
+else:
+ namenode_address = config['configurations']['core-site']['fs.defaultFS']
+
fs_checkpoint_dirs = config['configurations']['hdfs-site']['dfs.namenode.checkpoint.dir'].split(',')
dfs_data_dir = config['configurations']['hdfs-site']['dfs.datanode.data.dir']
@@ -218,6 +227,8 @@ if dfs_ha_enabled:
if hostname in nn_host:
namenode_id = nn_id
namenode_rpc = nn_host
+ # With HA enabled namenode_address is recomputed
+ namenode_address = format('hdfs://{dfs_ha_nameservices}')
if dfs_http_policy is not None and dfs_http_policy.upper() == "HTTPS_ONLY":
https_only = True