You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@aurora.apache.org by wi...@apache.org on 2015/04/04 01:12:36 UTC

aurora git commit: Make health check configurable

Repository: aurora
Updated Branches:
  refs/heads/master 05d75e5dc -> 27a602d2c


Make health check configurable

This allows the endpoint, expected response
and expected response code to be configured by the user.

Testing Done:
Unittests added and manurally verified in vagrant enviroment.

Bugs closed: AURORA-316

Reviewed at https://reviews.apache.org/r/32295/


Project: http://git-wip-us.apache.org/repos/asf/aurora/repo
Commit: http://git-wip-us.apache.org/repos/asf/aurora/commit/27a602d2
Tree: http://git-wip-us.apache.org/repos/asf/aurora/tree/27a602d2
Diff: http://git-wip-us.apache.org/repos/asf/aurora/diff/27a602d2

Branch: refs/heads/master
Commit: 27a602d2c9efdd1cd2591c9c754f086c04ad0eb9
Parents: 05d75e5
Author: Brian Brazil <br...@boxever.com>
Authored: Fri Apr 3 15:33:05 2015 -0700
Committer: Brian Wickman <wi...@apache.org>
Committed: Fri Apr 3 15:33:05 2015 -0700

----------------------------------------------------------------------
 docs/configuration-reference.md                 |  5 +-
 .../apache/aurora/common/http_signaler.py       | 29 ++++++----
 .../python/apache/aurora/config/schema/base.py  |  3 +
 .../aurora/executor/common/health_checker.py    |  5 +-
 .../apache/aurora/common/test_http_signaler.py  | 59 ++++++++++++++++----
 .../executor/common/test_health_checker.py      | 22 ++++----
 6 files changed, 86 insertions(+), 37 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/aurora/blob/27a602d2/docs/configuration-reference.md
----------------------------------------------------------------------
diff --git a/docs/configuration-reference.md b/docs/configuration-reference.md
index af332f2..fb753ea 100644
--- a/docs/configuration-reference.md
+++ b/docs/configuration-reference.md
@@ -359,8 +359,11 @@ Parameters for controlling a task's health checks via HTTP.
 | -------                        | :-------: | --------
 | ```initial_interval_secs```    | Integer   | Initial delay for performing an HTTP health check. (Default: 15)
 | ```interval_secs```            | Integer   | Interval on which to check the task's health via HTTP. (Default: 10)
-| ```timeout_secs```             | Integer   | HTTP request timeout. (Default: 1)
 | ```max_consecutive_failures``` | Integer   | Maximum number of consecutive failures that tolerated before considering a task unhealthy (Default: 0)
+| ```timeout_secs```             | Integer   | HTTP request timeout. (Default: 1)
+| ```endpoint```                 | String    | HTTP endpoint to check (Default: /health)
+| ```expected_response```        | String    | If not empty, fail the health check if the response differs. Case insensitive. (Default: ok)
+| ```expected_response_code```   | Integer   | If not zero, fail the health check if the response code differs. (Default: 0)
 
 ### Announcer Objects
 

http://git-wip-us.apache.org/repos/asf/aurora/blob/27a602d2/src/main/python/apache/aurora/common/http_signaler.py
----------------------------------------------------------------------
diff --git a/src/main/python/apache/aurora/common/http_signaler.py b/src/main/python/apache/aurora/common/http_signaler.py
index e3e819d..531f1fe 100644
--- a/src/main/python/apache/aurora/common/http_signaler.py
+++ b/src/main/python/apache/aurora/common/http_signaler.py
@@ -39,7 +39,7 @@ class HttpSignaler(object):
 
   def __init__(self, port, host='localhost', timeout_secs=None):
     self._host = host
-    self._url_base = 'http://%s:%d/' % (host, port)
+    self._url_base = 'http://%s:%d' % (host, port)
     if timeout_secs is None:
       env_timeout = os.getenv('AURORA_HTTP_SIGNALER_TIMEOUT_SECS')
       if env_timeout is not None:
@@ -70,37 +70,42 @@ class HttpSignaler(object):
     try:
       with contextlib.closing(
           self.opener(url, data, timeout=self._timeout_secs)) as fp:
-        return fp.read()
+        return (fp.read(), fp.getcode())
     except (HTTPException, SocketTimeout) as e:
       # the type of an HTTPException is typically more useful than its contents (since for example
       # BadStatusLines are often empty). likewise with socket.timeout.
       raise_error('Error within %s' % e.__class__.__name__)
-    except (URLError, HTTPError) as e:
+    except HTTPError as e:
+      return ('', e.code)
+    except URLError as e:
       raise_error(e)
     except Exception as e:
       raise_error('Unexpected error: %s' % e)
 
-  def __call__(self, endpoint, use_post_method=False, expected_response=None):
+  def __call__(self, endpoint, use_post_method=False, expected_response=None,
+      expected_response_code=None):
     """Returns a (boolean, string|None) tuple of (call success, failure reason)"""
     try:
-      response = self.query(endpoint, '' if use_post_method else None).strip().lower()
-      if expected_response is not None and response != expected_response:
+      response, response_code = self.query(endpoint, '' if use_post_method else None)
+      response = response.strip().lower()
+      if expected_response and response != expected_response.lower():
+        reason = 'Response differs from expected response (expected "%s", got "%s")'
         def shorten(string):
           return (string if len(string) < self.FAILURE_REASON_LENGTH
                          else "%s..." % string[:self.FAILURE_REASON_LENGTH - 3])
-        reason = 'Response differs from expected response (expected "%s", got "%s")'
         log.warning(reason % (expected_response, response))
         return (False, reason % (shorten(str(expected_response)), shorten(str(response))))
+      elif expected_response_code and response_code != expected_response_code:
+        reason = 'Response code differs from expected response (expected %i, got %i)'
+        log.warning(reason % (expected_response_code, response_code))
+        return (False, reason % (expected_response_code, response_code))
       else:
         return (True, None)
     except self.QueryError as e:
       return (False, str(e))
 
-  def health(self):
-    return self('health', use_post_method=False, expected_response='ok')
-
   def quitquitquit(self):
-    return self('quitquitquit', use_post_method=True)
+    return self('/quitquitquit', use_post_method=True)
 
   def abortabortabort(self):
-    return self('abortabortabort', use_post_method=True)
+    return self('/abortabortabort', use_post_method=True)

http://git-wip-us.apache.org/repos/asf/aurora/blob/27a602d2/src/main/python/apache/aurora/config/schema/base.py
----------------------------------------------------------------------
diff --git a/src/main/python/apache/aurora/config/schema/base.py b/src/main/python/apache/aurora/config/schema/base.py
index a87524a..ec9f983 100644
--- a/src/main/python/apache/aurora/config/schema/base.py
+++ b/src/main/python/apache/aurora/config/schema/base.py
@@ -40,6 +40,9 @@ class HealthCheckConfig(Struct):
   interval_secs            = Default(Float, 10.0)
   timeout_secs             = Default(Float, 1.0)
   max_consecutive_failures = Default(Integer, 0)
+  endpoint                 = Default(String, '/health')
+  expected_response        = Default(String, 'ok')
+  expected_response_code   = Default(Integer, 0)
 
 
 class Announcer(Struct):

http://git-wip-us.apache.org/repos/asf/aurora/blob/27a602d2/src/main/python/apache/aurora/executor/common/health_checker.py
----------------------------------------------------------------------
diff --git a/src/main/python/apache/aurora/executor/common/health_checker.py b/src/main/python/apache/aurora/executor/common/health_checker.py
index cfc29c3..03fdf0a 100644
--- a/src/main/python/apache/aurora/executor/common/health_checker.py
+++ b/src/main/python/apache/aurora/executor/common/health_checker.py
@@ -211,7 +211,10 @@ class HealthCheckerProvider(StatusCheckerProvider):
         portmap['health'],
         timeout_secs=health_check_config.get('timeout_secs'))
     health_checker = HealthChecker(
-        http_signaler.health,
+        lambda: http_signaler(
+            endpoint=health_check_config.get('endpoint'),
+            expected_response=health_check_config.get('expected_response'),
+            expected_response_code=health_check_config.get('expected_response_code')),
         sandbox,
         interval_secs=health_check_config.get('interval_secs'),
         initial_interval_secs=health_check_config.get('initial_interval_secs'),

http://git-wip-us.apache.org/repos/asf/aurora/blob/27a602d2/src/test/python/apache/aurora/common/test_http_signaler.py
----------------------------------------------------------------------
diff --git a/src/test/python/apache/aurora/common/test_http_signaler.py b/src/test/python/apache/aurora/common/test_http_signaler.py
index f5f8419..c6a2170 100644
--- a/src/test/python/apache/aurora/common/test_http_signaler.py
+++ b/src/test/python/apache/aurora/common/test_http_signaler.py
@@ -25,7 +25,20 @@ if Compatibility.PY3:
 else:
   import urllib2 as urllib_request
 
-StringIO = Compatibility.StringIO
+
+class OpenedURL(object):
+  def __init__(self, content, code=200):
+    self.content = content
+    self.code = code
+
+  def read(self):
+    return self.content
+
+  def close(self):
+    pass
+
+  def getcode(self):
+    return self.code
 
 
 class TestHttpSignaler(unittest.TestCase):
@@ -41,29 +54,51 @@ class TestHttpSignaler(unittest.TestCase):
   def test_all_calls_ok(self):
     self._mox.StubOutWithMock(urllib_request, 'urlopen')
     urllib_request.urlopen(
-      'http://localhost:%s/health' % self.PORT, None, timeout=1.0).AndReturn(StringIO('ok'))
-    urllib_request.urlopen(
-      'http://localhost:%s/quitquitquit' % self.PORT, '', timeout=1.0).AndReturn(StringIO(''))
+      'http://localhost:%s/quitquitquit' % self.PORT, '', timeout=1.0).AndReturn(OpenedURL(''))
     urllib_request.urlopen(
-      'http://localhost:%s/abortabortabort' % self.PORT, '', timeout=1.0).AndReturn(StringIO(''))
+      'http://localhost:%s/abortabortabort' % self.PORT, '', timeout=1.0).AndReturn(OpenedURL(''))
 
     self._mox.ReplayAll()
 
     signaler = HttpSignaler(self.PORT)
-    assert signaler.health() == (True, None)
     assert signaler.quitquitquit() == (True, None)
     assert signaler.abortabortabort() == (True, None)
 
-  def test_health_not_ok(self):
+  def test_health_checks(self):
     self._mox.StubOutWithMock(urllib_request, 'urlopen')
     urllib_request.urlopen(
-        'http://localhost:%s/health' % self.PORT, None, timeout=1.0).AndReturn(StringIO('not ok'))
+      'http://localhost:%s/health' % self.PORT, None, timeout=1.0).AndReturn(OpenedURL('ok'))
+    urllib_request.urlopen(
+      'http://localhost:%s/health' % self.PORT, None, timeout=1.0).AndReturn(OpenedURL('not ok'))
+    urllib_request.urlopen(
+      'http://localhost:%s/health' % self.PORT, None, timeout=1.0).AndReturn(
+          OpenedURL('not ok', code=200))
+    urllib_request.urlopen(
+      'http://localhost:%s/health' % self.PORT, None, timeout=1.0).AndReturn(
+          OpenedURL('ok', code=400))
+    urllib_request.urlopen(
+      'http://localhost:%s/health' % self.PORT, None, timeout=1.0).AndRaise(
+          urllib_request.HTTPError('', 501, '', None, None))
+    urllib_request.urlopen(
+      'http://localhost:%s/health' % self.PORT, None, timeout=1.0).AndReturn(
+          OpenedURL('ok', code=200))
+    urllib_request.urlopen(
+      'http://localhost:%s/random/endpoint' % self.PORT, None, timeout=1.0).AndReturn(
+          OpenedURL('ok'))
 
     self._mox.ReplayAll()
 
-    health, reason = HttpSignaler(self.PORT).health()
-    assert not health
-    assert reason.startswith('Response differs from expected response')
+    signaler = HttpSignaler(self.PORT)
+    assert signaler('/health', expected_response='ok') == (True, None)
+    assert signaler('/health', expected_response='ok') == (
+        False, 'Response differs from expected response (expected "ok", got "not ok")')
+    assert signaler('/health', expected_response_code=200) == (True, None)
+    assert signaler('/health', expected_response_code=200) == (
+        False, 'Response code differs from expected response (expected 200, got 400)')
+    assert signaler('/health', expected_response_code=200) == (
+        False, 'Response code differs from expected response (expected 200, got 501)')
+    assert signaler('/health', expected_response='ok', expected_response_code=200) == (True, None)
+    assert signaler('/random/endpoint', expected_response='ok') == (True, None)
 
   def test_exception(self):
     self._mox.StubOutWithMock(urllib_request, 'urlopen')
@@ -73,4 +108,4 @@ class TestHttpSignaler(unittest.TestCase):
 
     self._mox.ReplayAll()
 
-    assert not HttpSignaler(self.PORT).health()[0]
+    assert not HttpSignaler(self.PORT)('/health', expected_response='ok')[0]

http://git-wip-us.apache.org/repos/asf/aurora/blob/27a602d2/src/test/python/apache/aurora/executor/common/test_health_checker.py
----------------------------------------------------------------------
diff --git a/src/test/python/apache/aurora/executor/common/test_health_checker.py b/src/test/python/apache/aurora/executor/common/test_health_checker.py
index 1b4423a..27c7171 100644
--- a/src/test/python/apache/aurora/executor/common/test_health_checker.py
+++ b/src/test/python/apache/aurora/executor/common/test_health_checker.py
@@ -44,7 +44,7 @@ class TestHealthChecker(unittest.TestCase):
     self.fake_health_checks = []
     def mock_health_check():
       return self.fake_health_checks.pop(0)
-    self._checker.health = mock.Mock(spec=self._checker.health)
+    self._checker.health = mock.Mock(spec=self._checker.__call__)
     self._checker.health.side_effect = mock_health_check
 
   def append_health_checks(self, status, num_calls=1):
@@ -209,8 +209,8 @@ class TestHealthCheckerProvider(unittest.TestCase):
 
 class TestThreadedHealthChecker(unittest.TestCase):
   def setUp(self):
-    self.signaler = mock.Mock(spec=HttpSignaler)
-    self.signaler.health.return_value = (True, 'Fake')
+    self.health = mock.Mock()
+    self.health.return_value = (True, 'Fake')
 
     self.sandbox = mock.Mock(spec_set=SandboxInterface)
     self.sandbox.exists.return_value = True
@@ -222,14 +222,14 @@ class TestThreadedHealthChecker(unittest.TestCase):
     self.clock = mock.Mock(spec=time)
     self.clock.time.return_value = 1.0
     self.health_checker = HealthChecker(
-        self.signaler.health,
+        self.health,
         None,
         self.interval_secs,
         self.initial_interval_secs,
         self.max_consecutive_failures,
         self.clock)
     self.health_checker_sandbox_exists = HealthChecker(
-        self.signaler.health,
+        self.health,
         self.sandbox,
         self.interval_secs,
         self.initial_interval_secs,
@@ -238,29 +238,29 @@ class TestThreadedHealthChecker(unittest.TestCase):
 
   def test_perform_check_if_not_disabled_snooze_file_is_none(self):
     self.health_checker.threaded_health_checker.snooze_file = None
-    assert self.signaler.health.call_count == 0
+    assert self.health.call_count == 0
     assert self.health_checker_sandbox_exists.metrics.sample()['snoozed'] == 0
     self.health_checker.threaded_health_checker._perform_check_if_not_disabled()
-    assert self.signaler.health.call_count == 1
+    assert self.health.call_count == 1
     assert self.health_checker_sandbox_exists.metrics.sample()['snoozed'] == 0
 
   @mock.patch('os.path', spec_set=os.path)
   def test_perform_check_if_not_disabled_no_snooze_file(self, mock_os_path):
     mock_os_path.isfile.return_value = False
-    assert self.signaler.health.call_count == 0
+    assert self.health.call_count == 0
     assert self.health_checker_sandbox_exists.metrics.sample()['snoozed'] == 0
     self.health_checker_sandbox_exists.threaded_health_checker._perform_check_if_not_disabled()
-    assert self.signaler.health.call_count == 1
+    assert self.health.call_count == 1
     assert self.health_checker_sandbox_exists.metrics.sample()['snoozed'] == 0
 
   @mock.patch('os.path', spec_set=os.path)
   def test_perform_check_if_not_disabled_snooze_file_exists(self, mock_os_path):
     mock_os_path.isfile.return_value = True
-    assert self.signaler.health.call_count == 0
+    assert self.health.call_count == 0
     assert self.health_checker_sandbox_exists.metrics.sample()['snoozed'] == 0
     result = (
         self.health_checker_sandbox_exists.threaded_health_checker._perform_check_if_not_disabled())
-    assert self.signaler.health.call_count == 0
+    assert self.health.call_count == 0
     assert self.health_checker_sandbox_exists.metrics.sample()['snoozed'] == 1
     assert result == (True, None)