You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@aurora.apache.org by ke...@apache.org on 2014/10/08 20:57:51 UTC
git commit: Don't kill GC Executor after period of inactivity
Repository: incubator-aurora
Updated Branches:
refs/heads/master 94eeceabd -> f2f0ed861
Don't kill GC Executor after period of inactivity
The GC executor is configured to exit after 15 minutes of
inactivity. This leads to a race where the mesos slave gets a
launchTask message for a GC executor just as the executor has exited,
causing TASK_LOST noise. This also increases the risk that a slave
will lose its GC executor due to the scheduler not being able to find
a slot for it (since GC executors will have a higher churn rate).
Cluster operators will still be able to deploy new versions of the
GC executor as the 24-hour max lifetime limit is still in place. This
patch only removes the inactivity limit.
Testing Done:
./pants src/test/python/apache/aurora/executor:gc_executor
Bugs closed: AURORA-788
Reviewed at https://reviews.apache.org/r/26300/
Project: http://git-wip-us.apache.org/repos/asf/incubator-aurora/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-aurora/commit/f2f0ed86
Tree: http://git-wip-us.apache.org/repos/asf/incubator-aurora/tree/f2f0ed86
Diff: http://git-wip-us.apache.org/repos/asf/incubator-aurora/diff/f2f0ed86
Branch: refs/heads/master
Commit: f2f0ed8615def98e26fcfc3ce580a537204a2a9d
Parents: 94eecea
Author: Kevin Sweeney <ke...@apache.org>
Authored: Wed Oct 8 11:56:59 2014 -0700
Committer: Kevin Sweeney <ke...@apache.org>
Committed: Wed Oct 8 11:57:17 2014 -0700
----------------------------------------------------------------------
.../apache/aurora/executor/gc_executor.py | 5 ---
.../apache/aurora/executor/test_gc_executor.py | 33 +-------------------
2 files changed, 1 insertion(+), 37 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-aurora/blob/f2f0ed86/src/main/python/apache/aurora/executor/gc_executor.py
----------------------------------------------------------------------
diff --git a/src/main/python/apache/aurora/executor/gc_executor.py b/src/main/python/apache/aurora/executor/gc_executor.py
index 788671e..a11feb9 100644
--- a/src/main/python/apache/aurora/executor/gc_executor.py
+++ b/src/main/python/apache/aurora/executor/gc_executor.py
@@ -88,9 +88,6 @@ class ThermosGCExecutor(ExecutorBase, ExceptionalThread, Observable):
# wait time between checking for new GC events from the slave and/or cleaning orphaned tasks
POLL_WAIT = Amount(5, Time.MINUTES)
- # maximum amount of time the executor will wait with no tasks before it exits.
- MAXIMUM_EXECUTOR_WAIT = Amount(15, Time.MINUTES)
-
# maximum lifetime of this executor. this is to prevent older GC executor binaries from
# running forever
MAXIMUM_EXECUTOR_LIFETIME = Amount(1, Time.DAYS)
@@ -455,8 +452,6 @@ class ThermosGCExecutor(ExecutorBase, ExceptionalThread, Observable):
now = self._clock.time()
if now > run_start + self.MAXIMUM_EXECUTOR_LIFETIME.as_(Time.SECONDS):
return True
- if now > last_gc_run + self.MAXIMUM_EXECUTOR_WAIT.as_(Time.SECONDS):
- return True
return self._stop_event.is_set()
while not should_terminate():
http://git-wip-us.apache.org/repos/asf/incubator-aurora/blob/f2f0ed86/src/test/python/apache/aurora/executor/test_gc_executor.py
----------------------------------------------------------------------
diff --git a/src/test/python/apache/aurora/executor/test_gc_executor.py b/src/test/python/apache/aurora/executor/test_gc_executor.py
index 1905fe3..85afe50 100644
--- a/src/test/python/apache/aurora/executor/test_gc_executor.py
+++ b/src/test/python/apache/aurora/executor/test_gc_executor.py
@@ -500,11 +500,8 @@ def test_ignores_launch_task_when_shutting_down():
assert (mesos_pb2.TASK_FAILED, TASK_ID) == proxy_driver.updates[-1]
-def make_gc_executor_with_timeouts(
- maximum_executor_wait=Amount(15, Time.MINUTES),
- maximum_executor_lifetime=Amount(1, Time.DAYS)):
+def make_gc_executor_with_timeouts(maximum_executor_lifetime=Amount(1, Time.DAYS)):
class TimeoutGCExecutor(ThinTestThermosGCExecutor):
- MAXIMUM_EXECUTOR_WAIT = maximum_executor_wait
MAXIMUM_EXECUTOR_LIFETIME = maximum_executor_lifetime
return TimeoutGCExecutor
@@ -520,34 +517,6 @@ def run_gc_with_timeout(**kw):
yield (proxy_driver, executor)
-def test_gc_wait():
- # run w/ no tasks
- with run_gc_with_timeout(maximum_executor_wait=Amount(15, Time.SECONDS)) as (
- proxy_driver, executor):
- executor._clock.tick(10)
- proxy_driver.stopped.wait(timeout=0.1)
- assert not proxy_driver.stopped.is_set()
- executor._clock.tick(5.1)
- proxy_driver.stopped.wait(timeout=0.1)
- assert proxy_driver.stopped.is_set()
- assert not executor._stop_event.is_set()
-
- # ensure launchTask restarts executor wait
- with run_gc_with_timeout(maximum_executor_wait=Amount(15, Time.SECONDS)) as (
- proxy_driver, executor):
- executor._clock.tick(10)
- proxy_driver.stopped.wait(timeout=0.1)
- assert not proxy_driver.stopped.is_set()
- executor.launchTask(proxy_driver, serialize_art(AdjustRetainedTasks(retainedTasks={})))
- executor._clock.tick(5.1)
- proxy_driver.stopped.wait(timeout=0.1)
- assert not proxy_driver.stopped.is_set()
- executor._clock.tick(15.1)
- proxy_driver.stopped.wait(timeout=0.1)
- assert proxy_driver.stopped.is_set()
- assert not executor._stop_event.is_set()
-
-
def test_gc_lifetime():
with run_gc_with_timeout(maximum_executor_lifetime=Amount(500, Time.MILLISECONDS)) as (
proxy_driver, executor):