You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ambari.apache.org by ao...@apache.org on 2016/04/19 14:26:13 UTC

ambari git commit: AMBARI-15972. Need more informative message incase of EU failures (aonishuk)

Repository: ambari
Updated Branches:
  refs/heads/trunk 8965be4cf -> c8a2da207


AMBARI-15972. Need more informative message incase of EU failures (aonishuk)


Project: http://git-wip-us.apache.org/repos/asf/ambari/repo
Commit: http://git-wip-us.apache.org/repos/asf/ambari/commit/c8a2da20
Tree: http://git-wip-us.apache.org/repos/asf/ambari/tree/c8a2da20
Diff: http://git-wip-us.apache.org/repos/asf/ambari/diff/c8a2da20

Branch: refs/heads/trunk
Commit: c8a2da207af504a70c19ab903a7cefbade667cbf
Parents: 8965be4
Author: Andrew Onishuk <ao...@hortonworks.com>
Authored: Tue Apr 19 15:26:04 2016 +0300
Committer: Andrew Onishuk <ao...@hortonworks.com>
Committed: Tue Apr 19 15:26:04 2016 +0300

----------------------------------------------------------------------
 .../package/scripts/nodemanager_upgrade.py      | 41 +++++++++-----------
 .../YARN/2.1.0.2.0/package/scripts/service.py   |  4 +-
 .../stacks/2.0.6/YARN/test_nodemanager.py       | 10 ++---
 3 files changed, 26 insertions(+), 29 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/ambari/blob/c8a2da20/ambari-server/src/main/resources/common-services/YARN/2.1.0.2.0/package/scripts/nodemanager_upgrade.py
----------------------------------------------------------------------
diff --git a/ambari-server/src/main/resources/common-services/YARN/2.1.0.2.0/package/scripts/nodemanager_upgrade.py b/ambari-server/src/main/resources/common-services/YARN/2.1.0.2.0/package/scripts/nodemanager_upgrade.py
index 65709ac..1c886f9 100644
--- a/ambari-server/src/main/resources/common-services/YARN/2.1.0.2.0/package/scripts/nodemanager_upgrade.py
+++ b/ambari-server/src/main/resources/common-services/YARN/2.1.0.2.0/package/scripts/nodemanager_upgrade.py
@@ -24,6 +24,7 @@ from resource_management.core.exceptions import Fail
 from resource_management.core.resources.system import Execute
 from resource_management.core import shell
 from resource_management.libraries.functions.decorator import retry
+from resource_management.libraries.functions.show_logs import show_logs
 
 
 def post_upgrade_check():
@@ -38,8 +39,12 @@ def post_upgrade_check():
   if params.security_enabled and params.nodemanager_kinit_cmd:
     Execute(params.nodemanager_kinit_cmd, user=params.yarn_user)
 
-  _check_nodemanager_startup()
-
+  try:
+    _check_nodemanager_startup()
+  except Fail:
+    show_logs(params.yarn_log_dir, params.yarn_user)
+    raise
+    
 
 @retry(times=30, sleep_time=10, err_class=Fail)
 def _check_nodemanager_startup():
@@ -54,23 +59,15 @@ def _check_nodemanager_startup():
   import socket
 
   command = 'yarn node -list -states=RUNNING'
-
-  try:
-    # 'su - yarn -c "yarn node -status c6401.ambari.apache.org:45454"'
-    return_code, yarn_output = shell.call(command, user=params.yarn_user)
-  except:
-    raise Fail('Unable to determine if the NodeManager has started after upgrade.')
-
-  if return_code == 0:
-    hostname = params.hostname.lower()
-    hostname_ip = socket.gethostbyname(params.hostname.lower())
-    nodemanager_address = params.nm_address.lower()
-    yarn_output = yarn_output.lower()
-
-    if hostname in yarn_output or nodemanager_address in yarn_output or hostname_ip in yarn_output:
-      Logger.info('NodeManager with ID {0} has rejoined the cluster.'.format(nodemanager_address))
-      return
-    else:
-      raise Fail('NodeManager with ID {0} was not found in the list of running NodeManagers'.format(nodemanager_address))
-
-  raise Fail('Unable to determine if the NodeManager has started after upgrade (result code {0})'.format(str(return_code)))
+  return_code, yarn_output = shell.checked_call(command, user=params.yarn_user)
+  
+  hostname = params.hostname.lower()
+  hostname_ip = socket.gethostbyname(params.hostname.lower())
+  nodemanager_address = params.nm_address.lower()
+  yarn_output = yarn_output.lower()
+
+  if hostname in yarn_output or nodemanager_address in yarn_output or hostname_ip in yarn_output:
+    Logger.info('NodeManager with ID \'{0}\' has rejoined the cluster.'.format(nodemanager_address))
+    return
+  else:
+    raise Fail('NodeManager with ID \'{0}\' was not found in the list of running NodeManagers. \'{1}\' output was:\n{2}'.format(nodemanager_address, command, yarn_output))

http://git-wip-us.apache.org/repos/asf/ambari/blob/c8a2da20/ambari-server/src/main/resources/common-services/YARN/2.1.0.2.0/package/scripts/service.py
----------------------------------------------------------------------
diff --git a/ambari-server/src/main/resources/common-services/YARN/2.1.0.2.0/package/scripts/service.py b/ambari-server/src/main/resources/common-services/YARN/2.1.0.2.0/package/scripts/service.py
index 0703c1f..b1179b9 100644
--- a/ambari-server/src/main/resources/common-services/YARN/2.1.0.2.0/package/scripts/service.py
+++ b/ambari-server/src/main/resources/common-services/YARN/2.1.0.2.0/package/scripts/service.py
@@ -45,7 +45,7 @@ def service(componentName, action='start', serviceName='yarn'):
     daemon = format("{mapred_bin}/mr-jobhistory-daemon.sh")
     pid_file = format("{mapred_pid_dir}/mapred-{mapred_user}-{componentName}.pid")
     usr = params.mapred_user
-    log_dir = params.yarn_log_dir
+    log_dir = params.mapred_log_dir
   else:
     # !!! yarn-daemon.sh deletes the PID for us; if we remove it the script
     # may not work correctly when stopping the service
@@ -53,7 +53,7 @@ def service(componentName, action='start', serviceName='yarn'):
     daemon = format("{yarn_bin}/yarn-daemon.sh")
     pid_file = format("{yarn_pid_dir}/yarn-{yarn_user}-{componentName}.pid")
     usr = params.yarn_user
-    log_dir = params.mapred_log_dir
+    log_dir = params.yarn_log_dir
 
   cmd = format("export HADOOP_LIBEXEC_DIR={hadoop_libexec_dir} && {daemon} --config {hadoop_conf_dir}")
 

http://git-wip-us.apache.org/repos/asf/ambari/blob/c8a2da20/ambari-server/src/test/python/stacks/2.0.6/YARN/test_nodemanager.py
----------------------------------------------------------------------
diff --git a/ambari-server/src/test/python/stacks/2.0.6/YARN/test_nodemanager.py b/ambari-server/src/test/python/stacks/2.0.6/YARN/test_nodemanager.py
index 665ebfb..cb8178c 100644
--- a/ambari-server/src/test/python/stacks/2.0.6/YARN/test_nodemanager.py
+++ b/ambari-server/src/test/python/stacks/2.0.6/YARN/test_nodemanager.py
@@ -539,18 +539,18 @@ class TestNodeManager(RMFTestCase):
       config_file = "default.json",
       stack_version = self.STACK_VERSION,
       target = RMFTestCase.TARGET_COMMON_SERVICES,
-      call_mocks = [(0, process_output)],
+      checked_call_mocks = [(0, process_output)],
       mocks_dict = mocks_dict
     )
 
-    self.assertTrue(mocks_dict['call'].called)
-    self.assertEqual(mocks_dict['call'].call_count,1)
+    self.assertTrue(mocks_dict['checked_call'].called)
+    self.assertEqual(mocks_dict['checked_call'].call_count,1)
 
     self.assertEquals(
       "yarn node -list -states=RUNNING",
-       mocks_dict['call'].call_args_list[0][0][0])
+       mocks_dict['checked_call'].call_args_list[0][0][0])
 
-    self.assertEquals( {'user': u'yarn'}, mocks_dict['call'].call_args_list[0][1])
+    self.assertEquals( {'user': u'yarn'}, mocks_dict['checked_call'].call_args_list[0][1])
 
 
   @patch('time.sleep')