You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ambari.apache.org by ao...@apache.org on 2018/06/14 16:08:59 UTC

[ambari] branch trunk updated (2d925a4 -> 484cd10)

This is an automated email from the ASF dual-hosted git repository.

aonishuk pushed a change to branch trunk
in repository https://gitbox.apache.org/repos/asf/ambari.git.


    from 2d925a4  AMBARI-23945. Embed backup/migration/restore collection steps to README.md
     new 451cdac  AMBARI-24097. Canceling task during blueprint install results in agent not responding to any other tasks (aonishuk)
     new 484cd10  AMBARI-24097. Canceling task during blueprint install results in agent not responding to any other tasks (aonishuk)

The 2 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.


Summary of changes:
 .../src/main/python/ambari_agent/ActionQueue.py         | 17 ++++++++++++++++-
 1 file changed, 16 insertions(+), 1 deletion(-)

-- 
To stop receiving notification emails like this one, please contact
aonishuk@apache.org.

[ambari] 02/02: AMBARI-24097. Canceling task during blueprint install results in agent not responding to any other tasks (aonishuk)

Posted by ao...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

aonishuk pushed a commit to branch trunk
in repository https://gitbox.apache.org/repos/asf/ambari.git

commit 484cd10cf4066926cd8715821d9f5eb29e1c273b
Author: Andrew Onishuk <ao...@hortonworks.com>
AuthorDate: Thu Jun 14 12:37:12 2018 +0300

    AMBARI-24097. Canceling task during blueprint install results in agent not responding to any other tasks (aonishuk)
---
 ambari-agent/src/main/python/ambari_agent/ActionQueue.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/ambari-agent/src/main/python/ambari_agent/ActionQueue.py b/ambari-agent/src/main/python/ambari_agent/ActionQueue.py
index 6ee3ec0..c30e404 100644
--- a/ambari-agent/src/main/python/ambari_agent/ActionQueue.py
+++ b/ambari-agent/src/main/python/ambari_agent/ActionQueue.py
@@ -329,12 +329,15 @@ class ActionQueue(threading.Thread):
 
         command['agentLevelParams']['commandBeingRetried'] = "true"
         self.cancelEvent.wait(delay) # wake up if something was canceled
+
         continue
       else:
         logger.info("Quit retrying for command with taskId = {cid}. Status: {status}, retryAble: {retryAble}, retryDuration (sec): {retryDuration}, last delay (sec): {delay}"
                     .format(cid=taskId, status=status, retryAble=retryAble, retryDuration=retryDuration, delay=delay))
         break
 
+    self.cancelEvent.clear()
+
     # do not fail task which was rescheduled from server
     if command_canceled:
       with self.lock:

-- 
To stop receiving notification emails like this one, please contact
aonishuk@apache.org.

[ambari] 01/02: AMBARI-24097. Canceling task during blueprint install results in agent not responding to any other tasks (aonishuk)

Posted by ao...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

aonishuk pushed a commit to branch trunk
in repository https://gitbox.apache.org/repos/asf/ambari.git

commit 451cdacb6bd6549de297ef4c995111a6e01832f6
Author: Andrew Onishuk <ao...@hortonworks.com>
AuthorDate: Thu Jun 14 12:33:52 2018 +0300

    AMBARI-24097. Canceling task during blueprint install results in agent not responding to any other tasks (aonishuk)
---
 ambari-agent/src/main/python/ambari_agent/ActionQueue.py | 14 +++++++++++++-
 1 file changed, 13 insertions(+), 1 deletion(-)

diff --git a/ambari-agent/src/main/python/ambari_agent/ActionQueue.py b/ambari-agent/src/main/python/ambari_agent/ActionQueue.py
index f0c996b..6ee3ec0 100644
--- a/ambari-agent/src/main/python/ambari_agent/ActionQueue.py
+++ b/ambari-agent/src/main/python/ambari_agent/ActionQueue.py
@@ -83,6 +83,8 @@ class ActionQueue(threading.Thread):
     self.tmpdir = self.config.get('agent', 'prefix')
     self.customServiceOrchestrator = initializer_module.customServiceOrchestrator
     self.parallel_execution = self.config.get_parallel_exec_option()
+    self.taskIdsToCancel = set()
+    self.cancelEvent = threading.Event()
     self.component_status_executor = initializer_module.component_status_executor
     if self.parallel_execution == 1:
       logger.info("Parallel execution is enabled, will execute agent commands in parallel")
@@ -133,6 +135,8 @@ class ActionQueue(threading.Thread):
 
       # Kill if in progress
       self.customServiceOrchestrator.cancel_command(task_id, reason)
+      self.taskIdsToCancel.add(task_id)
+      self.cancelEvent.set()
 
   def run(self):
     while not self.stop_event.is_set():
@@ -275,6 +279,13 @@ class ActionQueue(threading.Thread):
                  format(taskId=taskId, retryAble=retryAble, retryDuration=retryDuration, log_command_output=log_command_output))
     command_canceled = False
     while retryDuration >= 0:
+      if taskId in self.taskIdsToCancel:
+        logger.info('Command with taskId = {0} canceled'.format(taskId))
+        command_canceled = True
+
+        self.taskIdsToCancel.discard(taskId)
+        break
+
       numAttempts += 1
       start = 0
       if retryAble:
@@ -303,6 +314,7 @@ class ActionQueue(threading.Thread):
           if (commandresult['exitcode'] == -signal.SIGTERM) or (commandresult['exitcode'] == -signal.SIGKILL):
             logger.info('Command with taskId = {cid} was canceled!'.format(cid=taskId))
             command_canceled = True
+            self.taskIdsToCancel.discard(taskId)
             break
 
       if status != self.COMPLETED_STATUS and retryAble and retryDuration > 0:
@@ -316,7 +328,7 @@ class ActionQueue(threading.Thread):
           command['agentLevelParams'] = {}
 
         command['agentLevelParams']['commandBeingRetried'] = "true"
-        time.sleep(delay)
+        self.cancelEvent.wait(delay) # wake up if something was canceled
         continue
       else:
         logger.info("Quit retrying for command with taskId = {cid}. Status: {status}, retryAble: {retryAble}, retryDuration (sec): {retryDuration}, last delay (sec): {delay}"

-- 
To stop receiving notification emails like this one, please contact
aonishuk@apache.org.