You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@aurora.apache.org by jc...@apache.org on 2016/09/30 20:22:11 UTC

aurora git commit: Add min_consecutive_health_checks in HealthCheckConfig

Repository: aurora
Updated Branches:
  refs/heads/master ca683cb9e -> ed72b1bf6


Add min_consecutive_health_checks in HealthCheckConfig

HealthCheckConfig should accept a new configuration value that will tell how many positive
consecutive health checks an instance requires to move from STARTING to RUNNING.

Bugs closed: AURORA-1224

Reviewed at https://reviews.apache.org/r/52094/


Project: http://git-wip-us.apache.org/repos/asf/aurora/repo
Commit: http://git-wip-us.apache.org/repos/asf/aurora/commit/ed72b1bf
Tree: http://git-wip-us.apache.org/repos/asf/aurora/tree/ed72b1bf
Diff: http://git-wip-us.apache.org/repos/asf/aurora/diff/ed72b1bf

Branch: refs/heads/master
Commit: ed72b1bf662d1e29d2bb483b317c787630c26a9e
Parents: ca683cb
Author: Kai Huang <te...@hotmail.com>
Authored: Fri Sep 30 14:56:28 2016 -0500
Committer: Joshua Cohen <jc...@apache.org>
Committed: Fri Sep 30 14:56:28 2016 -0500

----------------------------------------------------------------------
 docs/reference/configuration.md                 |  3 +-
 src/main/python/apache/aurora/client/config.py  | 31 ++++++++++++++++----
 .../python/apache/aurora/config/schema/base.py  |  1 +
 .../python/apache/aurora/client/test_config.py  | 31 +++++++-------------
 4 files changed, 39 insertions(+), 27 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/aurora/blob/ed72b1bf/docs/reference/configuration.md
----------------------------------------------------------------------
diff --git a/docs/reference/configuration.md b/docs/reference/configuration.md
index f2a0b18..71d2ce5 100644
--- a/docs/reference/configuration.md
+++ b/docs/reference/configuration.md
@@ -379,9 +379,10 @@ Parameters for controlling a task's health checks via HTTP or a shell command.
 | param                          | type      | description
 | -------                        | :-------: | --------
 | ```health_checker```           | HealthCheckerConfig | Configure what kind of health check to use.
-| ```initial_interval_secs```    | Integer   | Initial delay for performing a health check. (Default: 15)
+| ```initial_interval_secs```    | Integer   | Initial grace period for performing health checks. (Default: 15)
 | ```interval_secs```            | Integer   | Interval on which to check the task's health. (Default: 10)
 | ```max_consecutive_failures``` | Integer   | Maximum number of consecutive failures that will be tolerated before considering a task unhealthy (Default: 0)
+| ```min_consecutive_successes``` | Integer   | Minimum number of consecutive successful health checks required before considering a task healthy (Default: 1)
 | ```timeout_secs```             | Integer   | Health check timeout. (Default: 1)
 
 ### HealthCheckerConfig Objects

http://git-wip-us.apache.org/repos/asf/aurora/blob/ed72b1bf/src/main/python/apache/aurora/client/config.py
----------------------------------------------------------------------
diff --git a/src/main/python/apache/aurora/client/config.py b/src/main/python/apache/aurora/client/config.py
index 0186af5..ce4bffe 100644
--- a/src/main/python/apache/aurora/client/config.py
+++ b/src/main/python/apache/aurora/client/config.py
@@ -92,15 +92,31 @@ health check interval (%d seconds) plus %d consecutive failures at a check inter
 '''
 
 
+INITIAL_INTERVAL_SECS_INSUFFICIENT_ERROR_FORMAT = '''
+You have specified an insufficiently short initial interval period (%d seconds)
+in your health check configuration. Your health check will always fail. In order for
+the health check to pass, HealthCheckConfig.initial_interval_secs must be greater
+than the duration of %d consecutive successful health checks at a check interval
+of %d seconds. You can either increase initial_interval_secs, decrease interval_secs
+or decrease min_consecutive_successes.
+'''
+
+
+INVALID_MIN_CONSECUTIVE_SUCCESSES_ERROR = '''
+You have specified an invalid min_consecutive_successes value (%d) in your health check
+configuration. Your health check will always succeed. In order for the updater to detect
+health check failures, HealthCheckConfig.min_consecutive_successes must be a positive value.
+'''
+
+
 def _validate_update_config(config):
   job_size = config.instances()
   update_config = config.update_config()
   health_check_config = config.health_check_config()
 
   max_failures = update_config.max_total_failures().get()
-  watch_secs = update_config.watch_secs().get()
   initial_interval_secs = health_check_config.initial_interval_secs().get()
-  max_consecutive_failures = health_check_config.max_consecutive_failures().get()
+  min_consecutive_successes = health_check_config.min_consecutive_successes().get()
   interval_secs = health_check_config.interval_secs().get()
 
   if max_failures >= job_size:
@@ -111,10 +127,13 @@ def _validate_update_config(config):
     if max_failures < min_failure_threshold:
       die(UPDATE_CONFIG_DEDICATED_THRESHOLD_ERROR % (job_size, min_failure_threshold))
 
-  target_watch = initial_interval_secs + (max_consecutive_failures * interval_secs)
-  if watch_secs <= target_watch:
-    die(WATCH_SECS_INSUFFICIENT_ERROR_FORMAT %
-        (watch_secs, target_watch, initial_interval_secs, max_consecutive_failures, interval_secs))
+  if min_consecutive_successes <= 0:
+    die(INVALID_MIN_CONSECUTIVE_SUCCESSES_ERROR % min_consecutive_successes)
+
+  target_initial_interval_secs = interval_secs * min_consecutive_successes
+  if initial_interval_secs <= target_initial_interval_secs:
+    die(INITIAL_INTERVAL_SECS_INSUFFICIENT_ERROR_FORMAT %
+        (initial_interval_secs, min_consecutive_successes, interval_secs))
 
 
 PRODUCTION_DEPRECATED_WARNING = (

http://git-wip-us.apache.org/repos/asf/aurora/blob/ed72b1bf/src/main/python/apache/aurora/config/schema/base.py
----------------------------------------------------------------------
diff --git a/src/main/python/apache/aurora/config/schema/base.py b/src/main/python/apache/aurora/config/schema/base.py
index 8451630..baea660 100644
--- a/src/main/python/apache/aurora/config/schema/base.py
+++ b/src/main/python/apache/aurora/config/schema/base.py
@@ -60,6 +60,7 @@ class HealthCheckConfig(Struct):
   initial_interval_secs    = Default(Float, 15.0)
   interval_secs            = Default(Float, 10.0)
   max_consecutive_failures = Default(Integer, 0)
+  min_consecutive_successes = Default(Integer, 1)
   timeout_secs             = Default(Float, 1.0)
 
 

http://git-wip-us.apache.org/repos/asf/aurora/blob/ed72b1bf/src/test/python/apache/aurora/client/test_config.py
----------------------------------------------------------------------
diff --git a/src/test/python/apache/aurora/client/test_config.py b/src/test/python/apache/aurora/client/test_config.py
index 5cf68a5..ff46558 100644
--- a/src/test/python/apache/aurora/client/test_config.py
+++ b/src/test/python/apache/aurora/client/test_config.py
@@ -24,15 +24,7 @@ from apache.aurora.client.config import get_config as get_aurora_config
 from apache.aurora.client.config import PRODUCTION_DEPRECATED_WARNING
 from apache.aurora.config import AuroraConfig
 from apache.aurora.config.loader import AuroraConfigLoader
-from apache.aurora.config.schema.base import (
-    MB,
-    Announcer,
-    HealthCheckConfig,
-    Job,
-    Resources,
-    Task,
-    UpdateConfig
-)
+from apache.aurora.config.schema.base import MB, Announcer, HealthCheckConfig, Job, Resources, Task
 from apache.thermos.config.schema_base import Process
 
 MESOS_CONFIG_BASE = """
@@ -192,21 +184,21 @@ def test_update_config_passes_with_default_values():
   config._validate_update_config(AuroraConfig(base_job))
 
 
-def test_update_config_passes_with_min_requirement_values():
+def test_health_check_config_fails_insufficient_initital_interval_secs_less_than_target():
   base_job = Job(
     name='hello_world', role='john_doe', cluster='test-cluster',
-    update_config=UpdateConfig(watch_secs=26),
-    health_check_config=HealthCheckConfig(max_consecutive_failures=1),
+    health_check_config=HealthCheckConfig(initial_interval_secs=5),
     task=Task(name='main', processes=[],
               resources=Resources(cpu=0.1, ram=64 * MB, disk=64 * MB)))
 
-  config._validate_update_config(AuroraConfig(base_job))
+  with pytest.raises(SystemExit):
+    config._validate_update_config(AuroraConfig(base_job))
 
 
-def test_update_config_fails_insufficient_watch_secs_less_than_target():
+def test_health_check_config_fails_insufficient_initital_interval_secs_equal_to_target():
   base_job = Job(
     name='hello_world', role='john_doe', cluster='test-cluster',
-    update_config=UpdateConfig(watch_secs=10),
+    health_check_config=HealthCheckConfig(initial_interval_secs=10),
     task=Task(name='main', processes=[],
               resources=Resources(cpu=0.1, ram=64 * MB, disk=64 * MB)))
 
@@ -214,16 +206,15 @@ def test_update_config_fails_insufficient_watch_secs_less_than_target():
     config._validate_update_config(AuroraConfig(base_job))
 
 
-def test_update_config_fails_insufficient_watch_secs_equal_to_target():
+def test_health_check_config_passes_with_min_requirement_values():
   base_job = Job(
     name='hello_world', role='john_doe', cluster='test-cluster',
-    update_config=UpdateConfig(watch_secs=25),
-    health_check_config=HealthCheckConfig(max_consecutive_failures=1),
+    health_check_config=HealthCheckConfig(initial_interval_secs=21,
+                                          min_consecutive_successes=2),
     task=Task(name='main', processes=[],
               resources=Resources(cpu=0.1, ram=64 * MB, disk=64 * MB)))
 
-  with pytest.raises(SystemExit):
-    config._validate_update_config(AuroraConfig(base_job))
+  config._validate_update_config(AuroraConfig(base_job))
 
 
 def test_validate_deprecated_config_adds_warning_for_production():