You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@aurora.apache.org by sa...@apache.org on 2018/07/18 22:23:56 UTC

aurora git commit: Unhandled exception should not strand runner in STARTING state.

Repository: aurora
Updated Branches:
  refs/heads/master efe865651 -> f054e9b10


Unhandled exception should not strand runner in STARTING state.

If the ThermoTaskRunner encounters an Exception when trying to
fork the process, it bubbles this up to the Executor which does
not handle execptions other than TaskError. This leads to the
executor leaving the task in STARTING state and we end up with
tasks that get stranded in this state.

Fix it so that any unknown expection that is thrown when starting
a runner leads to task failure and get marked as FAILED.

Testing Done:
./gradlew test
./pants test src/test/python/apache::

Reviewed at https://reviews.apache.org/r/67967/


Project: http://git-wip-us.apache.org/repos/asf/aurora/repo
Commit: http://git-wip-us.apache.org/repos/asf/aurora/commit/f054e9b1
Tree: http://git-wip-us.apache.org/repos/asf/aurora/tree/f054e9b1
Diff: http://git-wip-us.apache.org/repos/asf/aurora/diff/f054e9b1

Branch: refs/heads/master
Commit: f054e9b1095a7ecacbbc2fa72ce0a842a3297859
Parents: efe8656
Author: Santhosh Kumar Shanmugham <sa...@gmail.com>
Authored: Wed Jul 18 15:23:27 2018 -0700
Committer: Santhosh Kumar <ss...@twitter.com>
Committed: Wed Jul 18 15:23:27 2018 -0700

----------------------------------------------------------------------
 .../apache/aurora/executor/aurora_executor.py    |  3 +++
 .../aurora/executor/test_thermos_executor.py     | 19 +++++++++++++++++++
 2 files changed, 22 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/aurora/blob/f054e9b1/src/main/python/apache/aurora/executor/aurora_executor.py
----------------------------------------------------------------------
diff --git a/src/main/python/apache/aurora/executor/aurora_executor.py b/src/main/python/apache/aurora/executor/aurora_executor.py
index 8a9958f..94f58a1 100644
--- a/src/main/python/apache/aurora/executor/aurora_executor.py
+++ b/src/main/python/apache/aurora/executor/aurora_executor.py
@@ -155,6 +155,9 @@ class AuroraExecutor(ExecutorBase, Observable):
     except Timeout:
       self._die(driver, mesos_pb2.TASK_LOST, 'Timed out waiting for task to start!')
       return False
+    except Exception as e:
+      self._die(driver, mesos_pb2.TASK_FAILED, 'Unknown exception starting runner: %s' % e)
+      return False
 
     self.runner_started.set()
     log.debug('Task started.')

http://git-wip-us.apache.org/repos/asf/aurora/blob/f054e9b1/src/test/python/apache/aurora/executor/test_thermos_executor.py
----------------------------------------------------------------------
diff --git a/src/test/python/apache/aurora/executor/test_thermos_executor.py b/src/test/python/apache/aurora/executor/test_thermos_executor.py
index f6ae1be..09f286c 100644
--- a/src/test/python/apache/aurora/executor/test_thermos_executor.py
+++ b/src/test/python/apache/aurora/executor/test_thermos_executor.py
@@ -83,6 +83,11 @@ class FailingStartingTaskRunner(ThermosTaskRunner):
     raise TaskError('I am an idiot!')
 
 
+class ErroringStartingTaskRunner(ThermosTaskRunner):
+  def start(self):
+    raise Exception('I am an idiot!')
+
+
 class FailingSandbox(DirectorySandbox):
   def __init__(self, root, exception_type, **kwargs):
     self._exception_type = exception_type
@@ -513,6 +518,20 @@ class TestThermosExecutor(object):
       updates = proxy_driver.method_calls['sendStatusUpdate']
       assert updates[-1][0][0].state == mesos_pb2.TASK_FAILED
 
+  def test_unknown_exception_runner_start(self):
+    proxy_driver = ProxyDriver()
+
+    with temporary_dir() as td:
+      runner_provider = make_provider(td, ErroringStartingTaskRunner)
+      te = FastThermosExecutor(
+        runner_provider=runner_provider,
+        sandbox_provider=DefaultTestSandboxProvider())
+      te.launchTask(proxy_driver, make_task(HELLO_WORLD_MTI))
+      proxy_driver.wait_stopped()
+
+      updates = proxy_driver.method_calls['sendStatusUpdate']
+      assert updates[-1][0][0].state == mesos_pb2.TASK_FAILED
+
   def test_failing_runner_initialize(self):
     proxy_driver = ProxyDriver()