You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@aurora.apache.org by wi...@apache.org on 2015/04/04 01:12:36 UTC
aurora git commit: Make health check configurable
Repository: aurora
Updated Branches:
refs/heads/master 05d75e5dc -> 27a602d2c
Make health check configurable
This allows the endpoint, expected response
and expected response code to be configured by the user.
Testing Done:
Unittests added and manurally verified in vagrant enviroment.
Bugs closed: AURORA-316
Reviewed at https://reviews.apache.org/r/32295/
Project: http://git-wip-us.apache.org/repos/asf/aurora/repo
Commit: http://git-wip-us.apache.org/repos/asf/aurora/commit/27a602d2
Tree: http://git-wip-us.apache.org/repos/asf/aurora/tree/27a602d2
Diff: http://git-wip-us.apache.org/repos/asf/aurora/diff/27a602d2
Branch: refs/heads/master
Commit: 27a602d2c9efdd1cd2591c9c754f086c04ad0eb9
Parents: 05d75e5
Author: Brian Brazil <br...@boxever.com>
Authored: Fri Apr 3 15:33:05 2015 -0700
Committer: Brian Wickman <wi...@apache.org>
Committed: Fri Apr 3 15:33:05 2015 -0700
----------------------------------------------------------------------
docs/configuration-reference.md | 5 +-
.../apache/aurora/common/http_signaler.py | 29 ++++++----
.../python/apache/aurora/config/schema/base.py | 3 +
.../aurora/executor/common/health_checker.py | 5 +-
.../apache/aurora/common/test_http_signaler.py | 59 ++++++++++++++++----
.../executor/common/test_health_checker.py | 22 ++++----
6 files changed, 86 insertions(+), 37 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/aurora/blob/27a602d2/docs/configuration-reference.md
----------------------------------------------------------------------
diff --git a/docs/configuration-reference.md b/docs/configuration-reference.md
index af332f2..fb753ea 100644
--- a/docs/configuration-reference.md
+++ b/docs/configuration-reference.md
@@ -359,8 +359,11 @@ Parameters for controlling a task's health checks via HTTP.
| ------- | :-------: | --------
| ```initial_interval_secs``` | Integer | Initial delay for performing an HTTP health check. (Default: 15)
| ```interval_secs``` | Integer | Interval on which to check the task's health via HTTP. (Default: 10)
-| ```timeout_secs``` | Integer | HTTP request timeout. (Default: 1)
| ```max_consecutive_failures``` | Integer | Maximum number of consecutive failures that tolerated before considering a task unhealthy (Default: 0)
+| ```timeout_secs``` | Integer | HTTP request timeout. (Default: 1)
+| ```endpoint``` | String | HTTP endpoint to check (Default: /health)
+| ```expected_response``` | String | If not empty, fail the health check if the response differs. Case insensitive. (Default: ok)
+| ```expected_response_code``` | Integer | If not zero, fail the health check if the response code differs. (Default: 0)
### Announcer Objects
http://git-wip-us.apache.org/repos/asf/aurora/blob/27a602d2/src/main/python/apache/aurora/common/http_signaler.py
----------------------------------------------------------------------
diff --git a/src/main/python/apache/aurora/common/http_signaler.py b/src/main/python/apache/aurora/common/http_signaler.py
index e3e819d..531f1fe 100644
--- a/src/main/python/apache/aurora/common/http_signaler.py
+++ b/src/main/python/apache/aurora/common/http_signaler.py
@@ -39,7 +39,7 @@ class HttpSignaler(object):
def __init__(self, port, host='localhost', timeout_secs=None):
self._host = host
- self._url_base = 'http://%s:%d/' % (host, port)
+ self._url_base = 'http://%s:%d' % (host, port)
if timeout_secs is None:
env_timeout = os.getenv('AURORA_HTTP_SIGNALER_TIMEOUT_SECS')
if env_timeout is not None:
@@ -70,37 +70,42 @@ class HttpSignaler(object):
try:
with contextlib.closing(
self.opener(url, data, timeout=self._timeout_secs)) as fp:
- return fp.read()
+ return (fp.read(), fp.getcode())
except (HTTPException, SocketTimeout) as e:
# the type of an HTTPException is typically more useful than its contents (since for example
# BadStatusLines are often empty). likewise with socket.timeout.
raise_error('Error within %s' % e.__class__.__name__)
- except (URLError, HTTPError) as e:
+ except HTTPError as e:
+ return ('', e.code)
+ except URLError as e:
raise_error(e)
except Exception as e:
raise_error('Unexpected error: %s' % e)
- def __call__(self, endpoint, use_post_method=False, expected_response=None):
+ def __call__(self, endpoint, use_post_method=False, expected_response=None,
+ expected_response_code=None):
"""Returns a (boolean, string|None) tuple of (call success, failure reason)"""
try:
- response = self.query(endpoint, '' if use_post_method else None).strip().lower()
- if expected_response is not None and response != expected_response:
+ response, response_code = self.query(endpoint, '' if use_post_method else None)
+ response = response.strip().lower()
+ if expected_response and response != expected_response.lower():
+ reason = 'Response differs from expected response (expected "%s", got "%s")'
def shorten(string):
return (string if len(string) < self.FAILURE_REASON_LENGTH
else "%s..." % string[:self.FAILURE_REASON_LENGTH - 3])
- reason = 'Response differs from expected response (expected "%s", got "%s")'
log.warning(reason % (expected_response, response))
return (False, reason % (shorten(str(expected_response)), shorten(str(response))))
+ elif expected_response_code and response_code != expected_response_code:
+ reason = 'Response code differs from expected response (expected %i, got %i)'
+ log.warning(reason % (expected_response_code, response_code))
+ return (False, reason % (expected_response_code, response_code))
else:
return (True, None)
except self.QueryError as e:
return (False, str(e))
- def health(self):
- return self('health', use_post_method=False, expected_response='ok')
-
def quitquitquit(self):
- return self('quitquitquit', use_post_method=True)
+ return self('/quitquitquit', use_post_method=True)
def abortabortabort(self):
- return self('abortabortabort', use_post_method=True)
+ return self('/abortabortabort', use_post_method=True)
http://git-wip-us.apache.org/repos/asf/aurora/blob/27a602d2/src/main/python/apache/aurora/config/schema/base.py
----------------------------------------------------------------------
diff --git a/src/main/python/apache/aurora/config/schema/base.py b/src/main/python/apache/aurora/config/schema/base.py
index a87524a..ec9f983 100644
--- a/src/main/python/apache/aurora/config/schema/base.py
+++ b/src/main/python/apache/aurora/config/schema/base.py
@@ -40,6 +40,9 @@ class HealthCheckConfig(Struct):
interval_secs = Default(Float, 10.0)
timeout_secs = Default(Float, 1.0)
max_consecutive_failures = Default(Integer, 0)
+ endpoint = Default(String, '/health')
+ expected_response = Default(String, 'ok')
+ expected_response_code = Default(Integer, 0)
class Announcer(Struct):
http://git-wip-us.apache.org/repos/asf/aurora/blob/27a602d2/src/main/python/apache/aurora/executor/common/health_checker.py
----------------------------------------------------------------------
diff --git a/src/main/python/apache/aurora/executor/common/health_checker.py b/src/main/python/apache/aurora/executor/common/health_checker.py
index cfc29c3..03fdf0a 100644
--- a/src/main/python/apache/aurora/executor/common/health_checker.py
+++ b/src/main/python/apache/aurora/executor/common/health_checker.py
@@ -211,7 +211,10 @@ class HealthCheckerProvider(StatusCheckerProvider):
portmap['health'],
timeout_secs=health_check_config.get('timeout_secs'))
health_checker = HealthChecker(
- http_signaler.health,
+ lambda: http_signaler(
+ endpoint=health_check_config.get('endpoint'),
+ expected_response=health_check_config.get('expected_response'),
+ expected_response_code=health_check_config.get('expected_response_code')),
sandbox,
interval_secs=health_check_config.get('interval_secs'),
initial_interval_secs=health_check_config.get('initial_interval_secs'),
http://git-wip-us.apache.org/repos/asf/aurora/blob/27a602d2/src/test/python/apache/aurora/common/test_http_signaler.py
----------------------------------------------------------------------
diff --git a/src/test/python/apache/aurora/common/test_http_signaler.py b/src/test/python/apache/aurora/common/test_http_signaler.py
index f5f8419..c6a2170 100644
--- a/src/test/python/apache/aurora/common/test_http_signaler.py
+++ b/src/test/python/apache/aurora/common/test_http_signaler.py
@@ -25,7 +25,20 @@ if Compatibility.PY3:
else:
import urllib2 as urllib_request
-StringIO = Compatibility.StringIO
+
+class OpenedURL(object):
+ def __init__(self, content, code=200):
+ self.content = content
+ self.code = code
+
+ def read(self):
+ return self.content
+
+ def close(self):
+ pass
+
+ def getcode(self):
+ return self.code
class TestHttpSignaler(unittest.TestCase):
@@ -41,29 +54,51 @@ class TestHttpSignaler(unittest.TestCase):
def test_all_calls_ok(self):
self._mox.StubOutWithMock(urllib_request, 'urlopen')
urllib_request.urlopen(
- 'http://localhost:%s/health' % self.PORT, None, timeout=1.0).AndReturn(StringIO('ok'))
- urllib_request.urlopen(
- 'http://localhost:%s/quitquitquit' % self.PORT, '', timeout=1.0).AndReturn(StringIO(''))
+ 'http://localhost:%s/quitquitquit' % self.PORT, '', timeout=1.0).AndReturn(OpenedURL(''))
urllib_request.urlopen(
- 'http://localhost:%s/abortabortabort' % self.PORT, '', timeout=1.0).AndReturn(StringIO(''))
+ 'http://localhost:%s/abortabortabort' % self.PORT, '', timeout=1.0).AndReturn(OpenedURL(''))
self._mox.ReplayAll()
signaler = HttpSignaler(self.PORT)
- assert signaler.health() == (True, None)
assert signaler.quitquitquit() == (True, None)
assert signaler.abortabortabort() == (True, None)
- def test_health_not_ok(self):
+ def test_health_checks(self):
self._mox.StubOutWithMock(urllib_request, 'urlopen')
urllib_request.urlopen(
- 'http://localhost:%s/health' % self.PORT, None, timeout=1.0).AndReturn(StringIO('not ok'))
+ 'http://localhost:%s/health' % self.PORT, None, timeout=1.0).AndReturn(OpenedURL('ok'))
+ urllib_request.urlopen(
+ 'http://localhost:%s/health' % self.PORT, None, timeout=1.0).AndReturn(OpenedURL('not ok'))
+ urllib_request.urlopen(
+ 'http://localhost:%s/health' % self.PORT, None, timeout=1.0).AndReturn(
+ OpenedURL('not ok', code=200))
+ urllib_request.urlopen(
+ 'http://localhost:%s/health' % self.PORT, None, timeout=1.0).AndReturn(
+ OpenedURL('ok', code=400))
+ urllib_request.urlopen(
+ 'http://localhost:%s/health' % self.PORT, None, timeout=1.0).AndRaise(
+ urllib_request.HTTPError('', 501, '', None, None))
+ urllib_request.urlopen(
+ 'http://localhost:%s/health' % self.PORT, None, timeout=1.0).AndReturn(
+ OpenedURL('ok', code=200))
+ urllib_request.urlopen(
+ 'http://localhost:%s/random/endpoint' % self.PORT, None, timeout=1.0).AndReturn(
+ OpenedURL('ok'))
self._mox.ReplayAll()
- health, reason = HttpSignaler(self.PORT).health()
- assert not health
- assert reason.startswith('Response differs from expected response')
+ signaler = HttpSignaler(self.PORT)
+ assert signaler('/health', expected_response='ok') == (True, None)
+ assert signaler('/health', expected_response='ok') == (
+ False, 'Response differs from expected response (expected "ok", got "not ok")')
+ assert signaler('/health', expected_response_code=200) == (True, None)
+ assert signaler('/health', expected_response_code=200) == (
+ False, 'Response code differs from expected response (expected 200, got 400)')
+ assert signaler('/health', expected_response_code=200) == (
+ False, 'Response code differs from expected response (expected 200, got 501)')
+ assert signaler('/health', expected_response='ok', expected_response_code=200) == (True, None)
+ assert signaler('/random/endpoint', expected_response='ok') == (True, None)
def test_exception(self):
self._mox.StubOutWithMock(urllib_request, 'urlopen')
@@ -73,4 +108,4 @@ class TestHttpSignaler(unittest.TestCase):
self._mox.ReplayAll()
- assert not HttpSignaler(self.PORT).health()[0]
+ assert not HttpSignaler(self.PORT)('/health', expected_response='ok')[0]
http://git-wip-us.apache.org/repos/asf/aurora/blob/27a602d2/src/test/python/apache/aurora/executor/common/test_health_checker.py
----------------------------------------------------------------------
diff --git a/src/test/python/apache/aurora/executor/common/test_health_checker.py b/src/test/python/apache/aurora/executor/common/test_health_checker.py
index 1b4423a..27c7171 100644
--- a/src/test/python/apache/aurora/executor/common/test_health_checker.py
+++ b/src/test/python/apache/aurora/executor/common/test_health_checker.py
@@ -44,7 +44,7 @@ class TestHealthChecker(unittest.TestCase):
self.fake_health_checks = []
def mock_health_check():
return self.fake_health_checks.pop(0)
- self._checker.health = mock.Mock(spec=self._checker.health)
+ self._checker.health = mock.Mock(spec=self._checker.__call__)
self._checker.health.side_effect = mock_health_check
def append_health_checks(self, status, num_calls=1):
@@ -209,8 +209,8 @@ class TestHealthCheckerProvider(unittest.TestCase):
class TestThreadedHealthChecker(unittest.TestCase):
def setUp(self):
- self.signaler = mock.Mock(spec=HttpSignaler)
- self.signaler.health.return_value = (True, 'Fake')
+ self.health = mock.Mock()
+ self.health.return_value = (True, 'Fake')
self.sandbox = mock.Mock(spec_set=SandboxInterface)
self.sandbox.exists.return_value = True
@@ -222,14 +222,14 @@ class TestThreadedHealthChecker(unittest.TestCase):
self.clock = mock.Mock(spec=time)
self.clock.time.return_value = 1.0
self.health_checker = HealthChecker(
- self.signaler.health,
+ self.health,
None,
self.interval_secs,
self.initial_interval_secs,
self.max_consecutive_failures,
self.clock)
self.health_checker_sandbox_exists = HealthChecker(
- self.signaler.health,
+ self.health,
self.sandbox,
self.interval_secs,
self.initial_interval_secs,
@@ -238,29 +238,29 @@ class TestThreadedHealthChecker(unittest.TestCase):
def test_perform_check_if_not_disabled_snooze_file_is_none(self):
self.health_checker.threaded_health_checker.snooze_file = None
- assert self.signaler.health.call_count == 0
+ assert self.health.call_count == 0
assert self.health_checker_sandbox_exists.metrics.sample()['snoozed'] == 0
self.health_checker.threaded_health_checker._perform_check_if_not_disabled()
- assert self.signaler.health.call_count == 1
+ assert self.health.call_count == 1
assert self.health_checker_sandbox_exists.metrics.sample()['snoozed'] == 0
@mock.patch('os.path', spec_set=os.path)
def test_perform_check_if_not_disabled_no_snooze_file(self, mock_os_path):
mock_os_path.isfile.return_value = False
- assert self.signaler.health.call_count == 0
+ assert self.health.call_count == 0
assert self.health_checker_sandbox_exists.metrics.sample()['snoozed'] == 0
self.health_checker_sandbox_exists.threaded_health_checker._perform_check_if_not_disabled()
- assert self.signaler.health.call_count == 1
+ assert self.health.call_count == 1
assert self.health_checker_sandbox_exists.metrics.sample()['snoozed'] == 0
@mock.patch('os.path', spec_set=os.path)
def test_perform_check_if_not_disabled_snooze_file_exists(self, mock_os_path):
mock_os_path.isfile.return_value = True
- assert self.signaler.health.call_count == 0
+ assert self.health.call_count == 0
assert self.health_checker_sandbox_exists.metrics.sample()['snoozed'] == 0
result = (
self.health_checker_sandbox_exists.threaded_health_checker._perform_check_if_not_disabled())
- assert self.signaler.health.call_count == 0
+ assert self.health.call_count == 0
assert self.health_checker_sandbox_exists.metrics.sample()['snoozed'] == 1
assert result == (True, None)