You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@aurora.apache.org by jc...@apache.org on 2016/09/30 20:22:11 UTC
aurora git commit: Add min_consecutive_health_checks in
HealthCheckConfig
Repository: aurora
Updated Branches:
refs/heads/master ca683cb9e -> ed72b1bf6
Add min_consecutive_health_checks in HealthCheckConfig
HealthCheckConfig should accept a new configuration value that will tell how many positive
consecutive health checks an instance requires to move from STARTING to RUNNING.
Bugs closed: AURORA-1224
Reviewed at https://reviews.apache.org/r/52094/
Project: http://git-wip-us.apache.org/repos/asf/aurora/repo
Commit: http://git-wip-us.apache.org/repos/asf/aurora/commit/ed72b1bf
Tree: http://git-wip-us.apache.org/repos/asf/aurora/tree/ed72b1bf
Diff: http://git-wip-us.apache.org/repos/asf/aurora/diff/ed72b1bf
Branch: refs/heads/master
Commit: ed72b1bf662d1e29d2bb483b317c787630c26a9e
Parents: ca683cb
Author: Kai Huang <te...@hotmail.com>
Authored: Fri Sep 30 14:56:28 2016 -0500
Committer: Joshua Cohen <jc...@apache.org>
Committed: Fri Sep 30 14:56:28 2016 -0500
----------------------------------------------------------------------
docs/reference/configuration.md | 3 +-
src/main/python/apache/aurora/client/config.py | 31 ++++++++++++++++----
.../python/apache/aurora/config/schema/base.py | 1 +
.../python/apache/aurora/client/test_config.py | 31 +++++++-------------
4 files changed, 39 insertions(+), 27 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/aurora/blob/ed72b1bf/docs/reference/configuration.md
----------------------------------------------------------------------
diff --git a/docs/reference/configuration.md b/docs/reference/configuration.md
index f2a0b18..71d2ce5 100644
--- a/docs/reference/configuration.md
+++ b/docs/reference/configuration.md
@@ -379,9 +379,10 @@ Parameters for controlling a task's health checks via HTTP or a shell command.
| param | type | description
| ------- | :-------: | --------
| ```health_checker``` | HealthCheckerConfig | Configure what kind of health check to use.
-| ```initial_interval_secs``` | Integer | Initial delay for performing a health check. (Default: 15)
+| ```initial_interval_secs``` | Integer | Initial grace period for performing health checks. (Default: 15)
| ```interval_secs``` | Integer | Interval on which to check the task's health. (Default: 10)
| ```max_consecutive_failures``` | Integer | Maximum number of consecutive failures that will be tolerated before considering a task unhealthy (Default: 0)
+| ```min_consecutive_successes``` | Integer | Minimum number of consecutive successful health checks required before considering a task healthy (Default: 1)
| ```timeout_secs``` | Integer | Health check timeout. (Default: 1)
### HealthCheckerConfig Objects
http://git-wip-us.apache.org/repos/asf/aurora/blob/ed72b1bf/src/main/python/apache/aurora/client/config.py
----------------------------------------------------------------------
diff --git a/src/main/python/apache/aurora/client/config.py b/src/main/python/apache/aurora/client/config.py
index 0186af5..ce4bffe 100644
--- a/src/main/python/apache/aurora/client/config.py
+++ b/src/main/python/apache/aurora/client/config.py
@@ -92,15 +92,31 @@ health check interval (%d seconds) plus %d consecutive failures at a check inter
'''
+INITIAL_INTERVAL_SECS_INSUFFICIENT_ERROR_FORMAT = '''
+You have specified an insufficiently short initial interval period (%d seconds)
+in your health check configuration. Your health check will always fail. In order for
+the health check to pass, HealthCheckConfig.initial_interval_secs must be greater
+than the duration of %d consecutive successful health checks at a check interval
+of %d seconds. You can either increase initial_interval_secs, decrease interval_secs
+or decrease min_consecutive_successes.
+'''
+
+
+INVALID_MIN_CONSECUTIVE_SUCCESSES_ERROR = '''
+You have specified an invalid min_consecutive_successes value (%d) in your health check
+configuration. Your health check will always succeed. In order for the updater to detect
+health check failures, HealthCheckConfig.min_consecutive_successes must be a positive value.
+'''
+
+
def _validate_update_config(config):
job_size = config.instances()
update_config = config.update_config()
health_check_config = config.health_check_config()
max_failures = update_config.max_total_failures().get()
- watch_secs = update_config.watch_secs().get()
initial_interval_secs = health_check_config.initial_interval_secs().get()
- max_consecutive_failures = health_check_config.max_consecutive_failures().get()
+ min_consecutive_successes = health_check_config.min_consecutive_successes().get()
interval_secs = health_check_config.interval_secs().get()
if max_failures >= job_size:
@@ -111,10 +127,13 @@ def _validate_update_config(config):
if max_failures < min_failure_threshold:
die(UPDATE_CONFIG_DEDICATED_THRESHOLD_ERROR % (job_size, min_failure_threshold))
- target_watch = initial_interval_secs + (max_consecutive_failures * interval_secs)
- if watch_secs <= target_watch:
- die(WATCH_SECS_INSUFFICIENT_ERROR_FORMAT %
- (watch_secs, target_watch, initial_interval_secs, max_consecutive_failures, interval_secs))
+ if min_consecutive_successes <= 0:
+ die(INVALID_MIN_CONSECUTIVE_SUCCESSES_ERROR % min_consecutive_successes)
+
+ target_initial_interval_secs = interval_secs * min_consecutive_successes
+ if initial_interval_secs <= target_initial_interval_secs:
+ die(INITIAL_INTERVAL_SECS_INSUFFICIENT_ERROR_FORMAT %
+ (initial_interval_secs, min_consecutive_successes, interval_secs))
PRODUCTION_DEPRECATED_WARNING = (
http://git-wip-us.apache.org/repos/asf/aurora/blob/ed72b1bf/src/main/python/apache/aurora/config/schema/base.py
----------------------------------------------------------------------
diff --git a/src/main/python/apache/aurora/config/schema/base.py b/src/main/python/apache/aurora/config/schema/base.py
index 8451630..baea660 100644
--- a/src/main/python/apache/aurora/config/schema/base.py
+++ b/src/main/python/apache/aurora/config/schema/base.py
@@ -60,6 +60,7 @@ class HealthCheckConfig(Struct):
initial_interval_secs = Default(Float, 15.0)
interval_secs = Default(Float, 10.0)
max_consecutive_failures = Default(Integer, 0)
+ min_consecutive_successes = Default(Integer, 1)
timeout_secs = Default(Float, 1.0)
http://git-wip-us.apache.org/repos/asf/aurora/blob/ed72b1bf/src/test/python/apache/aurora/client/test_config.py
----------------------------------------------------------------------
diff --git a/src/test/python/apache/aurora/client/test_config.py b/src/test/python/apache/aurora/client/test_config.py
index 5cf68a5..ff46558 100644
--- a/src/test/python/apache/aurora/client/test_config.py
+++ b/src/test/python/apache/aurora/client/test_config.py
@@ -24,15 +24,7 @@ from apache.aurora.client.config import get_config as get_aurora_config
from apache.aurora.client.config import PRODUCTION_DEPRECATED_WARNING
from apache.aurora.config import AuroraConfig
from apache.aurora.config.loader import AuroraConfigLoader
-from apache.aurora.config.schema.base import (
- MB,
- Announcer,
- HealthCheckConfig,
- Job,
- Resources,
- Task,
- UpdateConfig
-)
+from apache.aurora.config.schema.base import MB, Announcer, HealthCheckConfig, Job, Resources, Task
from apache.thermos.config.schema_base import Process
MESOS_CONFIG_BASE = """
@@ -192,21 +184,21 @@ def test_update_config_passes_with_default_values():
config._validate_update_config(AuroraConfig(base_job))
-def test_update_config_passes_with_min_requirement_values():
+def test_health_check_config_fails_insufficient_initital_interval_secs_less_than_target():
base_job = Job(
name='hello_world', role='john_doe', cluster='test-cluster',
- update_config=UpdateConfig(watch_secs=26),
- health_check_config=HealthCheckConfig(max_consecutive_failures=1),
+ health_check_config=HealthCheckConfig(initial_interval_secs=5),
task=Task(name='main', processes=[],
resources=Resources(cpu=0.1, ram=64 * MB, disk=64 * MB)))
- config._validate_update_config(AuroraConfig(base_job))
+ with pytest.raises(SystemExit):
+ config._validate_update_config(AuroraConfig(base_job))
-def test_update_config_fails_insufficient_watch_secs_less_than_target():
+def test_health_check_config_fails_insufficient_initital_interval_secs_equal_to_target():
base_job = Job(
name='hello_world', role='john_doe', cluster='test-cluster',
- update_config=UpdateConfig(watch_secs=10),
+ health_check_config=HealthCheckConfig(initial_interval_secs=10),
task=Task(name='main', processes=[],
resources=Resources(cpu=0.1, ram=64 * MB, disk=64 * MB)))
@@ -214,16 +206,15 @@ def test_update_config_fails_insufficient_watch_secs_less_than_target():
config._validate_update_config(AuroraConfig(base_job))
-def test_update_config_fails_insufficient_watch_secs_equal_to_target():
+def test_health_check_config_passes_with_min_requirement_values():
base_job = Job(
name='hello_world', role='john_doe', cluster='test-cluster',
- update_config=UpdateConfig(watch_secs=25),
- health_check_config=HealthCheckConfig(max_consecutive_failures=1),
+ health_check_config=HealthCheckConfig(initial_interval_secs=21,
+ min_consecutive_successes=2),
task=Task(name='main', processes=[],
resources=Resources(cpu=0.1, ram=64 * MB, disk=64 * MB)))
- with pytest.raises(SystemExit):
- config._validate_update_config(AuroraConfig(base_job))
+ config._validate_update_config(AuroraConfig(base_job))
def test_validate_deprecated_config_adds_warning_for_production():