You are viewing a plain text version of this content. The canonical link for it is here.
Posted to issues@ambari.apache.org by "Andrii Babiichuk (JIRA)" <ji...@apache.org> on 2018/05/18 17:55:00 UTC

[jira] [Updated] (AMBARI-23904) ZKFC fails to start while moving Namenode on a cluster with multiple namespaces

     [ https://issues.apache.org/jira/browse/AMBARI-23904?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]

Andrii Babiichuk updated AMBARI-23904:
--------------------------------------
    Description: 
STR: 
- Deploy a cluster with 2 namespaces using blueprint.
- Use the UI wizard to move Active Namenode for namespace NS2
- Perform manual operations when prompted in the wizard (FormatZkfc on other 3 hosts, perform bootstrapStandby on the new NN)
- In the final step, start all services, ZKFC fails to start on the host where we moved the NN 

{code}
Traceback (most recent call last):
  File "/var/lib/ambari-agent/cache/stacks/HDP/3.0/services/HDFS/package/scripts/zkfc_slave.py", line 192, in <module>
    ZkfcSlave().execute()
  File "/usr/lib/ambari-agent/lib/resource_management/libraries/script/script.py", line 353, in execute
    method(env)
  File "/var/lib/ambari-agent/cache/stacks/HDP/3.0/services/HDFS/package/scripts/zkfc_slave.py", line 71, in start
    ZkfcSlaveDefault.start_static(env, upgrade_type)
  File "/var/lib/ambari-agent/cache/stacks/HDP/3.0/services/HDFS/package/scripts/zkfc_slave.py", line 96, in start_static
    create_log_dir=True
  File "/var/lib/ambari-agent/cache/stacks/HDP/3.0/services/HDFS/package/scripts/utils.py", line 258, in service
    Execute(daemon_cmd, not_if=process_id_exists_command, environment=hadoop_env_exports)
  File "/usr/lib/ambari-agent/lib/resource_management/core/base.py", line 166, in __init__
    self.env.run()
  File "/usr/lib/ambari-agent/lib/resource_management/core/environment.py", line 160, in run
    self.run_action(resource, action)
  File "/usr/lib/ambari-agent/lib/resource_management/core/environment.py", line 124, in run_action
    provider_action()
  File "/usr/lib/ambari-agent/lib/resource_management/core/providers/system.py", line 263, in action_run
    returns=self.resource.returns)
  File "/usr/lib/ambari-agent/lib/resource_management/core/shell.py", line 72, in inner
    result = function(command, **kwargs)
  File "/usr/lib/ambari-agent/lib/resource_management/core/shell.py", line 102, in checked_call
    tries=tries, try_sleep=try_sleep, timeout_kill_strategy=timeout_kill_strategy, returns=returns)
  File "/usr/lib/ambari-agent/lib/resource_management/core/shell.py", line 150, in _call_wrapper
    result = _call(command, **kwargs_copy)
  File "/usr/lib/ambari-agent/lib/resource_management/core/shell.py", line 308, in _call
    raise ExecutionFailed(err_msg, code, out, err)
resource_management.core.exceptions.ExecutionFailed: Execution of 'ambari-sudo.sh su cstm-hdfs -l -s /bin/bash -c 'ulimit -c unlimited ;  /usr/hdp/3.0.0.0-1316/hadoop/bin/hdfs --config /usr/hdp/3.0.0.0-1316/hadoop/conf --daemon start zkfc'' returned 1. ######## Hortonworks #############
This is MOTD message, added for testing in qe infra
WARNING: HADOOP_ZKFC_OPTS has been replaced by HDFS_ZKFC_OPTS. Using value of HADOOP_ZKFC_OPTS.
{code}

  was:
STR: 
-> Deploy a cluster with 2 namespaces using blueprint.
-> Use the UI wizard to move Active Namenode for namespace NS2
-> Perform manual operations when prompted in the wizard (FormatZkfc on other 3 hosts, perform bootstrapStandby on the new NN)
-> In the final step, start all services, ZKFC fails to start on the host where we moved the NN 

{code}
Traceback (most recent call last):
  File "/var/lib/ambari-agent/cache/stacks/HDP/3.0/services/HDFS/package/scripts/zkfc_slave.py", line 192, in <module>
    ZkfcSlave().execute()
  File "/usr/lib/ambari-agent/lib/resource_management/libraries/script/script.py", line 353, in execute
    method(env)
  File "/var/lib/ambari-agent/cache/stacks/HDP/3.0/services/HDFS/package/scripts/zkfc_slave.py", line 71, in start
    ZkfcSlaveDefault.start_static(env, upgrade_type)
  File "/var/lib/ambari-agent/cache/stacks/HDP/3.0/services/HDFS/package/scripts/zkfc_slave.py", line 96, in start_static
    create_log_dir=True
  File "/var/lib/ambari-agent/cache/stacks/HDP/3.0/services/HDFS/package/scripts/utils.py", line 258, in service
    Execute(daemon_cmd, not_if=process_id_exists_command, environment=hadoop_env_exports)
  File "/usr/lib/ambari-agent/lib/resource_management/core/base.py", line 166, in __init__
    self.env.run()
  File "/usr/lib/ambari-agent/lib/resource_management/core/environment.py", line 160, in run
    self.run_action(resource, action)
  File "/usr/lib/ambari-agent/lib/resource_management/core/environment.py", line 124, in run_action
    provider_action()
  File "/usr/lib/ambari-agent/lib/resource_management/core/providers/system.py", line 263, in action_run
    returns=self.resource.returns)
  File "/usr/lib/ambari-agent/lib/resource_management/core/shell.py", line 72, in inner
    result = function(command, **kwargs)
  File "/usr/lib/ambari-agent/lib/resource_management/core/shell.py", line 102, in checked_call
    tries=tries, try_sleep=try_sleep, timeout_kill_strategy=timeout_kill_strategy, returns=returns)
  File "/usr/lib/ambari-agent/lib/resource_management/core/shell.py", line 150, in _call_wrapper
    result = _call(command, **kwargs_copy)
  File "/usr/lib/ambari-agent/lib/resource_management/core/shell.py", line 308, in _call
    raise ExecutionFailed(err_msg, code, out, err)
resource_management.core.exceptions.ExecutionFailed: Execution of 'ambari-sudo.sh su cstm-hdfs -l -s /bin/bash -c 'ulimit -c unlimited ;  /usr/hdp/3.0.0.0-1316/hadoop/bin/hdfs --config /usr/hdp/3.0.0.0-1316/hadoop/conf --daemon start zkfc'' returned 1. ######## Hortonworks #############
This is MOTD message, added for testing in qe infra
WARNING: HADOOP_ZKFC_OPTS has been replaced by HDFS_ZKFC_OPTS. Using value of HADOOP_ZKFC_OPTS.
{code}


> ZKFC fails to start while moving Namenode on a cluster with multiple namespaces
> -------------------------------------------------------------------------------
>
>                 Key: AMBARI-23904
>                 URL: https://issues.apache.org/jira/browse/AMBARI-23904
>             Project: Ambari
>          Issue Type: Bug
>          Components: ambari-web
>    Affects Versions: 2.7.0
>            Reporter: Andrii Babiichuk
>            Assignee: Andrii Babiichuk
>            Priority: Blocker
>             Fix For: 2.7.0
>
>
> STR: 
> - Deploy a cluster with 2 namespaces using blueprint.
> - Use the UI wizard to move Active Namenode for namespace NS2
> - Perform manual operations when prompted in the wizard (FormatZkfc on other 3 hosts, perform bootstrapStandby on the new NN)
> - In the final step, start all services, ZKFC fails to start on the host where we moved the NN 
> {code}
> Traceback (most recent call last):
>   File "/var/lib/ambari-agent/cache/stacks/HDP/3.0/services/HDFS/package/scripts/zkfc_slave.py", line 192, in <module>
>     ZkfcSlave().execute()
>   File "/usr/lib/ambari-agent/lib/resource_management/libraries/script/script.py", line 353, in execute
>     method(env)
>   File "/var/lib/ambari-agent/cache/stacks/HDP/3.0/services/HDFS/package/scripts/zkfc_slave.py", line 71, in start
>     ZkfcSlaveDefault.start_static(env, upgrade_type)
>   File "/var/lib/ambari-agent/cache/stacks/HDP/3.0/services/HDFS/package/scripts/zkfc_slave.py", line 96, in start_static
>     create_log_dir=True
>   File "/var/lib/ambari-agent/cache/stacks/HDP/3.0/services/HDFS/package/scripts/utils.py", line 258, in service
>     Execute(daemon_cmd, not_if=process_id_exists_command, environment=hadoop_env_exports)
>   File "/usr/lib/ambari-agent/lib/resource_management/core/base.py", line 166, in __init__
>     self.env.run()
>   File "/usr/lib/ambari-agent/lib/resource_management/core/environment.py", line 160, in run
>     self.run_action(resource, action)
>   File "/usr/lib/ambari-agent/lib/resource_management/core/environment.py", line 124, in run_action
>     provider_action()
>   File "/usr/lib/ambari-agent/lib/resource_management/core/providers/system.py", line 263, in action_run
>     returns=self.resource.returns)
>   File "/usr/lib/ambari-agent/lib/resource_management/core/shell.py", line 72, in inner
>     result = function(command, **kwargs)
>   File "/usr/lib/ambari-agent/lib/resource_management/core/shell.py", line 102, in checked_call
>     tries=tries, try_sleep=try_sleep, timeout_kill_strategy=timeout_kill_strategy, returns=returns)
>   File "/usr/lib/ambari-agent/lib/resource_management/core/shell.py", line 150, in _call_wrapper
>     result = _call(command, **kwargs_copy)
>   File "/usr/lib/ambari-agent/lib/resource_management/core/shell.py", line 308, in _call
>     raise ExecutionFailed(err_msg, code, out, err)
> resource_management.core.exceptions.ExecutionFailed: Execution of 'ambari-sudo.sh su cstm-hdfs -l -s /bin/bash -c 'ulimit -c unlimited ;  /usr/hdp/3.0.0.0-1316/hadoop/bin/hdfs --config /usr/hdp/3.0.0.0-1316/hadoop/conf --daemon start zkfc'' returned 1. ######## Hortonworks #############
> This is MOTD message, added for testing in qe infra
> WARNING: HADOOP_ZKFC_OPTS has been replaced by HDFS_ZKFC_OPTS. Using value of HADOOP_ZKFC_OPTS.
> {code}



--
This message was sent by Atlassian JIRA
(v7.6.3#76005)