You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ambari.apache.org by ao...@apache.org on 2017/02/27 14:49:23 UTC
[2/2] ambari git commit: AMBARI-20210. [BE] "refresh yarn queue"
takes 20 minutes to fail,
instead it should fail after a fixed numbers of the attempts within 2 minutes
(aonishuk)
AMBARI-20210. [BE] "refresh yarn queue" takes 20 minutes to fail, instead it should fail after a fixed numbers of the attempts within 2 minutes (aonishuk)
Project: http://git-wip-us.apache.org/repos/asf/ambari/repo
Commit: http://git-wip-us.apache.org/repos/asf/ambari/commit/ac0fb82c
Tree: http://git-wip-us.apache.org/repos/asf/ambari/tree/ac0fb82c
Diff: http://git-wip-us.apache.org/repos/asf/ambari/diff/ac0fb82c
Branch: refs/heads/branch-2.5
Commit: ac0fb82c75aaaeb44c8e7790f4ade93dbe61cca5
Parents: 2c16e73
Author: Andrew Onishuk <ao...@hortonworks.com>
Authored: Mon Feb 27 16:49:13 2017 +0200
Committer: Andrew Onishuk <ao...@hortonworks.com>
Committed: Mon Feb 27 16:49:13 2017 +0200
----------------------------------------------------------------------
.../main/python/resource_management/core/signal_utils.py | 1 +
.../YARN/2.1.0.2.0/package/scripts/service.py | 10 +++++++++-
2 files changed, 10 insertions(+), 1 deletion(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/ambari/blob/ac0fb82c/ambari-common/src/main/python/resource_management/core/signal_utils.py
----------------------------------------------------------------------
diff --git a/ambari-common/src/main/python/resource_management/core/signal_utils.py b/ambari-common/src/main/python/resource_management/core/signal_utils.py
index 1f0dfe7..d166502 100644
--- a/ambari-common/src/main/python/resource_management/core/signal_utils.py
+++ b/ambari-common/src/main/python/resource_management/core/signal_utils.py
@@ -23,6 +23,7 @@ Ambari Agent
__all__ = ["TerminateStrategy", "terminate_process"]
import os
+import time
import signal
from resource_management.core.base import Fail
http://git-wip-us.apache.org/repos/asf/ambari/blob/ac0fb82c/ambari-server/src/main/resources/common-services/YARN/2.1.0.2.0/package/scripts/service.py
----------------------------------------------------------------------
diff --git a/ambari-server/src/main/resources/common-services/YARN/2.1.0.2.0/package/scripts/service.py b/ambari-server/src/main/resources/common-services/YARN/2.1.0.2.0/package/scripts/service.py
index 03b66a5..6495209 100644
--- a/ambari-server/src/main/resources/common-services/YARN/2.1.0.2.0/package/scripts/service.py
+++ b/ambari-server/src/main/resources/common-services/YARN/2.1.0.2.0/package/scripts/service.py
@@ -24,6 +24,7 @@ from ambari_commons.os_family_impl import OsFamilyFuncImpl, OsFamilyImpl
from ambari_commons import OSConst
from resource_management.core.shell import as_user, as_sudo
from resource_management.libraries.functions.show_logs import show_logs
+from resource_management.core.signal_utils import TerminateStrategy
@OsFamilyFuncImpl(os_family=OSConst.WINSRV_FAMILY)
def service(componentName, action='start', serviceName='yarn'):
@@ -102,4 +103,11 @@ def service(componentName, action='start', serviceName='yarn'):
elif action == 'refreshQueues':
rm_kinit_cmd = params.rm_kinit_cmd
refresh_cmd = format("{rm_kinit_cmd} export HADOOP_LIBEXEC_DIR={hadoop_libexec_dir} && {yarn_container_bin}/yarn rmadmin -refreshQueues")
- Execute(refresh_cmd, user=usr)
+
+ Execute(refresh_cmd,
+ user = usr,
+ timeout = 20, # when Yarn is not started command hangs forever and should be killed
+ tries = 5,
+ try_sleep = 5,
+ timeout_kill_strategy = TerminateStrategy.KILL_PROCESS_GROUP, # the process cannot be simply killed by 'kill -15', so kill pg group instread.
+ )