You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ariatosca.apache.org by mx...@apache.org on 2017/07/05 13:16:46 UTC
incubator-ariatosca git commit: ARIA-299 Resuming canceled execution with frozen task fails

Repository: incubator-ariatosca
Updated Branches:
  refs/heads/ARIA-299-Resuming-canceled-execution-with-frozen-task-fails [created] 6750554e5


ARIA-299 Resuming canceled execution with frozen task fails


Project: http://git-wip-us.apache.org/repos/asf/incubator-ariatosca/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-ariatosca/commit/6750554e
Tree: http://git-wip-us.apache.org/repos/asf/incubator-ariatosca/tree/6750554e
Diff: http://git-wip-us.apache.org/repos/asf/incubator-ariatosca/diff/6750554e

Branch: refs/heads/ARIA-299-Resuming-canceled-execution-with-frozen-task-fails
Commit: 6750554e545521232b3fc6c7d239ee1e96e0c26e
Parents: 6c08424
Author: max-orlov <ma...@gigaspaces.com>
Authored: Wed Jul 5 16:16:39 2017 +0300
Committer: max-orlov <ma...@gigaspaces.com>
Committed: Wed Jul 5 16:16:39 2017 +0300

----------------------------------------------------------------------
 .../workflows/core/events_handler.py            |   4 +
 aria/orchestrator/workflows/executor/base.py    |   2 +-
 aria/orchestrator/workflows/executor/thread.py  |   2 +-
 tests/orchestrator/test_workflow_runner.py      | 146 ++++++++++++++++---
 4 files changed, 131 insertions(+), 23 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-ariatosca/blob/6750554e/aria/orchestrator/workflows/core/events_handler.py
----------------------------------------------------------------------
diff --git a/aria/orchestrator/workflows/core/events_handler.py b/aria/orchestrator/workflows/core/events_handler.py
index 769c1a8..eb6f271 100644
--- a/aria/orchestrator/workflows/core/events_handler.py
+++ b/aria/orchestrator/workflows/core/events_handler.py
@@ -114,6 +114,10 @@ def _workflow_cancelled(workflow_context, *args, **kwargs):
         elif execution.status in (execution.SUCCEEDED, execution.FAILED):
             _log_tried_to_cancel_execution_but_it_already_ended(workflow_context, execution.status)
         else:
+            # Any non ended task would be put back to pending state
+            for task in execution.tasks:
+                if not task.has_ended():
+                    task.status = task.PENDING
             execution.status = execution.CANCELLED
             execution.ended_at = datetime.utcnow()
 

http://git-wip-us.apache.org/repos/asf/incubator-ariatosca/blob/6750554e/aria/orchestrator/workflows/executor/base.py
----------------------------------------------------------------------
diff --git a/aria/orchestrator/workflows/executor/base.py b/aria/orchestrator/workflows/executor/base.py
index ec1a0c7..e7d03ea 100644
--- a/aria/orchestrator/workflows/executor/base.py
+++ b/aria/orchestrator/workflows/executor/base.py
@@ -49,7 +49,7 @@ class BaseExecutor(logger.LoggerMixin):
         """
         pass
 
-    def terminate(self, ctx):
+    def terminate(self, task_id):
         """
         Terminate the executing task
         :return:

http://git-wip-us.apache.org/repos/asf/incubator-ariatosca/blob/6750554e/aria/orchestrator/workflows/executor/thread.py
----------------------------------------------------------------------
diff --git a/aria/orchestrator/workflows/executor/thread.py b/aria/orchestrator/workflows/executor/thread.py
index d9dcdf8..6feef0c 100644
--- a/aria/orchestrator/workflows/executor/thread.py
+++ b/aria/orchestrator/workflows/executor/thread.py
@@ -54,7 +54,7 @@ class ThreadExecutor(BaseExecutor):
     def close(self):
         self._stopped = True
         for thread in self._pool:
-            thread.join()
+            thread.join(5)
 
     def _processor(self):
         while not self._stopped:

http://git-wip-us.apache.org/repos/asf/incubator-ariatosca/blob/6750554e/tests/orchestrator/test_workflow_runner.py
----------------------------------------------------------------------
diff --git a/tests/orchestrator/test_workflow_runner.py b/tests/orchestrator/test_workflow_runner.py
index e640c7d..6e3e693 100644
--- a/tests/orchestrator/test_workflow_runner.py
+++ b/tests/orchestrator/test_workflow_runner.py
@@ -23,7 +23,7 @@ import pytest
 from aria.modeling import exceptions as modeling_exceptions
 from aria.modeling import models
 from aria.orchestrator import exceptions
-from aria.orchestrator.events import on_cancelled_workflow_signal
+from aria.orchestrator import events
 from aria.orchestrator.workflow_runner import WorkflowRunner
 from aria.orchestrator.workflows.executor.process import ProcessExecutor
 from aria.orchestrator.workflows import api
@@ -46,9 +46,10 @@ from ..fixtures import (  # pylint: disable=unused-import
     resource_storage as resource
 )
 
-events = {
+custom_events = {
     'is_resumed': Event(),
     'is_active': Event(),
+    'execution_cancelled': Event(),
     'execution_ended': Event()
 }
 
@@ -318,43 +319,54 @@ def _create_workflow_runner(request, workflow_name, inputs=None, executor=None,
 
 class TestResumableWorkflows(object):
 
-    def test_resume_workflow(self, workflow_context, executor):
-        node = workflow_context.model.node.get_by_name(tests_mock.models.DEPENDENCY_NODE_NAME)
-        node.attributes['invocations'] = models.Attribute.wrap('invocations', 0)
-        self._create_interface(workflow_context, node, mock_resuming_task)
+    def _create_initial_workflow_runner(
+            self, workflow_context, workflow, executor, inputs=None):
 
         service = workflow_context.service
         service.workflows['custom_workflow'] = tests_mock.models.create_operation(
             'custom_workflow',
-            operation_kwargs={'function': '{0}.{1}'.format(__name__, mock_workflow.__name__)}
+            operation_kwargs={
+                'function': '{0}.{1}'.format(__name__, workflow.__name__),
+                'inputs': dict((k, models.Input.wrap(k, v)) for k, v in (inputs or {}).items())
+            }
         )
         workflow_context.model.service.update(service)
 
         wf_runner = WorkflowRunner(
             service_id=workflow_context.service.id,
-            inputs={},
+            inputs=inputs or {},
             model_storage=workflow_context.model,
             resource_storage=workflow_context.resource,
             plugin_manager=None,
             workflow_name='custom_workflow',
             executor=executor)
+        return wf_runner
+
+    def test_resume_workflow(self, workflow_context, thread_executor):
+        node = workflow_context.model.node.get_by_name(tests_mock.models.DEPENDENCY_NODE_NAME)
+        node.attributes['invocations'] = models.Attribute.wrap('invocations', 0)
+        self._create_interface(workflow_context, node, mock_resuming_task)
+
+        wf_runner = self._create_initial_workflow_runner(
+            workflow_context, mock_parallel_workflow, thread_executor)
+
         wf_thread = Thread(target=wf_runner.execute)
         wf_thread.daemon = True
         wf_thread.start()
 
         # Wait for the execution to start
-        if events['is_active'].wait(5) is False:
+        if custom_events['is_active'].wait(5) is False:
             raise TimeoutError("is_active wasn't set to True")
         wf_runner.cancel()
 
-        if events['execution_ended'].wait(60) is False:
+        if custom_events['execution_cancelled'].wait(60) is False:
             raise TimeoutError("Execution did not end")
 
         tasks = workflow_context.model.task.list(filters={'_stub_type': None})
         assert any(task.status == task.SUCCESS for task in tasks)
-        assert any(task.status in (task.FAILED, task.RETRYING) for task in tasks)
-        events['is_resumed'].set()
-        assert any(task.status in (task.FAILED, task.RETRYING) for task in tasks)
+        assert any(task.status == task.PENDING for task in tasks)
+        custom_events['is_resumed'].set()
+        assert any(task.status == task.PENDING for task in tasks)
 
         # Create a new workflow runner, with an existing execution id. This would cause
         # the old execution to restart.
@@ -365,7 +377,7 @@ class TestResumableWorkflows(object):
             resource_storage=workflow_context.resource,
             plugin_manager=None,
             execution_id=wf_runner.execution.id,
-            executor=executor)
+            executor=thread_executor)
 
         new_wf_runner.execute()
 
@@ -374,9 +386,58 @@ class TestResumableWorkflows(object):
         assert node.attributes['invocations'].value == 3
         assert wf_runner.execution.status == wf_runner.execution.SUCCEEDED
 
+    def test_resume_failed_task(self, workflow_context, thread_executor):
+
+        node = workflow_context.model.node.get_by_name(tests_mock.models.DEPENDENCY_NODE_NAME)
+        node.attributes['invocations'] = models.Attribute.wrap('invocations', 0)
+        self._create_interface(workflow_context, node, mock_failed_first_task)
+
+        wf_runner = self._create_initial_workflow_runner(
+            workflow_context, mock_sequential_workflow, thread_executor)
+        wf_thread = Thread(target=wf_runner.execute)
+        wf_thread.setDaemon(True)
+        wf_thread.start()
+
+        if custom_events['is_active'].wait(60) is False:
+            raise TimeoutError("Execution did not end")
+        wf_runner.cancel()
+        if custom_events['execution_cancelled'].wait(60) is False:
+            raise TimeoutError("Execution did not end")
+
+        task = workflow_context.model.task.list(filters={'_stub_type': None})[0]
+        assert node.attributes['invocations'].value == 2
+        assert task.status == task.PENDING
+        assert wf_runner.execution.status in (wf_runner.execution.CANCELLED,
+                                              wf_runner.execution.CANCELLING)
+
+        custom_events['is_resumed'].set()
+        assert node.attributes['invocations'].value == 2
+
+        # Create a new workflow runner, with an existing execution id. This would cause
+        # the old execution to restart.
+        new_thread_executor = thread.ThreadExecutor()
+        try:
+            new_wf_runner = WorkflowRunner(
+                service_id=wf_runner.service.id,
+                inputs={},
+                model_storage=workflow_context.model,
+                resource_storage=workflow_context.resource,
+                plugin_manager=None,
+                execution_id=wf_runner.execution.id,
+                executor=new_thread_executor)
+
+            new_wf_runner.execute()
+        finally:
+            new_thread_executor.close()
+
+        # Wait for it to finish and assert changes.
+        assert node.attributes['invocations'].value == task.max_attempts - 1
+        assert task.status == task.SUCCESS
+        assert wf_runner.execution.status == wf_runner.execution.SUCCEEDED
+
     @staticmethod
     @pytest.fixture
-    def executor():
+    def thread_executor():
         result = thread.ThreadExecutor()
         try:
             yield result
@@ -417,16 +478,23 @@ class TestResumableWorkflows(object):
 
     @pytest.fixture(autouse=True)
     def register_to_events(self):
+        def execution_cancelled(*args, **kwargs):
+            custom_events['execution_cancelled'].set()
+
         def execution_ended(*args, **kwargs):
-            events['execution_ended'].set()
+            custom_events['execution_ended'].set()
 
-        on_cancelled_workflow_signal.connect(execution_ended)
+        events.on_cancelled_workflow_signal.connect(execution_cancelled)
+        events.on_failure_workflow_signal.connect(execution_ended)
         yield
-        on_cancelled_workflow_signal.disconnect(execution_ended)
+        events.on_cancelled_workflow_signal.disconnect(execution_cancelled)
+        events.on_failure_workflow_signal.disconnect(execution_ended)
+        for event in custom_events.values():
+            event.clear()
 
 
 @workflow
-def mock_workflow(ctx, graph):
+def mock_parallel_workflow(ctx, graph):
     node = ctx.model.node.get_by_name(tests_mock.models.DEPENDENCY_NODE_NAME)
     graph.add_tasks(
         api.task.OperationTask(
@@ -441,8 +509,44 @@ def mock_resuming_task(ctx):
     ctx.node.attributes['invocations'] += 1
 
     if ctx.node.attributes['invocations'] != 1:
-        events['is_active'].set()
-        if not events['is_resumed'].isSet():
+        custom_events['is_active'].set()
+        if not custom_events['is_resumed'].isSet():
             # if resume was called, increase by one. o/w fail the execution - second task should
             # fail as long it was not a part of resuming the workflow
             raise BaseException("wasn't resumed yet")
+
+
+@workflow
+def mock_sequential_workflow(ctx, graph):
+    node = ctx.model.node.get_by_name(tests_mock.models.DEPENDENCY_NODE_NAME)
+    graph.sequence(
+        api.task.OperationTask(node,
+                               interface_name='aria.interfaces.lifecycle',
+                               operation_name='create',
+                               retry_interval=1,
+                               max_attempts=10),
+    )
+
+
+@operation
+def mock_failed_first_task(ctx):
+    """
+    The task runs for 10 times. then it sleeps waiting for cancellation. upon resume
+    :param ctx: 
+    :return: 
+    """
+    ctx.node.attributes['invocations'] += 1
+
+    if ctx.node.attributes['invocations'] == 2:
+        custom_events['is_active'].set()
+        # stuck the thread
+        while True:
+            pass
+
+    elif ctx.node.attributes['invocations'] == ctx.task.max_attempts - 1:
+        # pass only just before the end.
+        return
+    else:
+        # fail o.w.
+        raise BaseException("stop this task")
+