You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@aurora.apache.org by ma...@apache.org on 2014/08/23 01:49:00 UTC
git commit: Refactoring maintenance commands to host_ prefix and
converting perform_maintenance_hosts into host_drain.
Repository: incubator-aurora
Updated Branches:
refs/heads/master 900a2b299 -> 3f8b5ea73
Refactoring maintenance commands to host_ prefix
and converting perform_maintenance_hosts into host_drain.
Bugs closed: AURORA-43
Reviewed at https://reviews.apache.org/r/24871/
Project: http://git-wip-us.apache.org/repos/asf/incubator-aurora/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-aurora/commit/3f8b5ea7
Tree: http://git-wip-us.apache.org/repos/asf/incubator-aurora/tree/3f8b5ea7
Diff: http://git-wip-us.apache.org/repos/asf/incubator-aurora/diff/3f8b5ea7
Branch: refs/heads/master
Commit: 3f8b5ea73340a33b5d3c285ae58d648aeecf2222
Parents: 900a2b2
Author: Maxim Khutornenko <ma...@apache.org>
Authored: Fri Aug 22 16:48:37 2014 -0700
Committer: Maxim Khutornenko <ma...@apache.org>
Committed: Fri Aug 22 16:48:37 2014 -0700
----------------------------------------------------------------------
.../python/apache/aurora/admin/admin_util.py | 94 +++++++++++--
.../apache/aurora/admin/host_maintenance.py | 41 ++----
.../aurora/client/commands/maintenance.py | 135 +++++++++----------
src/test/python/apache/aurora/admin/BUILD | 1 +
.../aurora/admin/test_host_maintenance.py | 41 +-----
.../aurora/client/commands/test_maintenance.py | 107 ++++-----------
6 files changed, 198 insertions(+), 221 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-aurora/blob/3f8b5ea7/src/main/python/apache/aurora/admin/admin_util.py
----------------------------------------------------------------------
diff --git a/src/main/python/apache/aurora/admin/admin_util.py b/src/main/python/apache/aurora/admin/admin_util.py
index 31ec2b1..394deb5 100644
--- a/src/main/python/apache/aurora/admin/admin_util.py
+++ b/src/main/python/apache/aurora/admin/admin_util.py
@@ -19,6 +19,9 @@ import os
import subprocess
from uuid import uuid1
+from twitter.common.quantity import Amount, Time
+from twitter.common.quantity.parse_simple import parse_time
+
from apache.aurora.client.base import die
"""Admin client utility functions shared between admin and maintenance modules."""
@@ -28,6 +31,10 @@ LOGGER_NAME = 'aurora_admin'
logger = logging.getLogger(LOGGER_NAME)
CLIENT_ID = uuid1()
+# Default SLA limits
+SLA_UPTIME_PERCENTAGE_LIMIT = 95
+SLA_UPTIME_DURATION_LIMIT = Amount(30, Time.MINUTES)
+
def log_admin_message(sev, msg, *args, **kwargs):
"""Logs message using the module-defined logger.
@@ -46,17 +53,55 @@ def log_admin_message(sev, msg, *args, **kwargs):
FILENAME_OPTION = optparse.Option(
- '--filename',
- dest='filename',
- default=None,
- help='Name of the file with hostnames')
+ '--filename',
+ dest='filename',
+ default=None,
+ help='Name of the file with hostnames')
HOSTS_OPTION = optparse.Option(
- '--hosts',
- dest='hosts',
- default=None,
- help='Comma separated list of hosts')
+ '--hosts',
+ dest='hosts',
+ default=None,
+ help='Comma separated list of hosts')
+
+
+POST_DRAIN_SCRIPT_OPTION = optparse.Option(
+ '--post_drain_script',
+ dest='post_drain_script',
+ default=None,
+ help='Path to a script to run for each host if needed.')
+
+OVERRIDE_SLA_PERCENTAGE_OPTION = optparse.Option(
+ '--override_percentage',
+ dest='percentage',
+ default=None,
+ help='Percentage of tasks required to be up all the time within the duration. '
+ 'Default value:%s. DO NOT override default value unless absolutely necessary! '
+ 'See sla_probe_hosts and sla_list_safe_domain commands '
+ 'for more details on SLA.' % SLA_UPTIME_PERCENTAGE_LIMIT)
+
+OVERRIDE_SLA_DURATION_OPTION = optparse.Option(
+ '--override_duration',
+ dest='duration',
+ default=None,
+ help='Time interval (now - value) for the percentage of up tasks. Format: XdYhZmWs. '
+ 'Default value:%s. DO NOT override default value unless absolutely necessary! '
+ 'See sla_probe_hosts and sla_list_safe_domain commands '
+ 'for more details on SLA.' % SLA_UPTIME_DURATION_LIMIT)
+
+OVERRIDE_SLA_REASON_OPTION = optparse.Option(
+ '--override_reason',
+ dest='reason',
+ default=None,
+ help='Reason for overriding default SLA values. Provide details including the '
+ 'maintenance ticket number.')
+
+UNSAFE_SLA_HOSTS_FILE_OPTION = optparse.Option(
+ '--unsafe_hosts_file',
+ dest='unsafe_hosts_filename',
+ default=None,
+ help='Output file to write host names that did not pass SLA check.')
def parse_sla_percentage(percentage):
@@ -149,6 +194,39 @@ def parse_script(filename):
return None
+def parse_and_validate_sla_overrides(options, hostnames):
+ """Parses and validates host SLA override 3-tuple (percentage, duration, reason).
+
+ In addition, logs an admin message about overriding default SLA values.
+
+ :param options: command line options
+ :type options: list of app.option
+ :param hostnames: host names override is issued to
+ :type hostnames: list of string
+ :rtype: a tuple of: override percentage (float) and override duration (Amount)
+ """
+ has_override = bool(options.percentage) or bool(options.duration) or bool(options.reason)
+ all_overrides = bool(options.percentage) and bool(options.duration) and bool(options.reason)
+ if has_override != all_overrides:
+ die('All --override_* options are required when attempting to override default SLA values.')
+
+ percentage = parse_sla_percentage(options.percentage) if options.percentage else None
+ duration = parse_time(options.duration) if options.duration else None
+ if options.reason:
+ log_admin_message(
+ logging.WARNING,
+ 'Default SLA values (percentage: %s, duration: %s) are overridden for the following '
+ 'hosts: %s. New percentage: %s, duration: %s, override reason: %s' % (
+ SLA_UPTIME_PERCENTAGE_LIMIT,
+ SLA_UPTIME_DURATION_LIMIT,
+ hostnames,
+ percentage,
+ duration,
+ options.reason))
+
+ return percentage or SLA_UPTIME_PERCENTAGE_LIMIT, duration or SLA_UPTIME_DURATION_LIMIT
+
+
def print_results(results):
"""Prints formatted SLA results.
http://git-wip-us.apache.org/repos/asf/incubator-aurora/blob/3f8b5ea7/src/main/python/apache/aurora/admin/host_maintenance.py
----------------------------------------------------------------------
diff --git a/src/main/python/apache/aurora/admin/host_maintenance.py b/src/main/python/apache/aurora/admin/host_maintenance.py
index 162f9af..29745a3 100644
--- a/src/main/python/apache/aurora/admin/host_maintenance.py
+++ b/src/main/python/apache/aurora/admin/host_maintenance.py
@@ -40,8 +40,6 @@ class HostMaintenance(object):
START_MAINTENANCE_DELAY = Amount(30, Time.SECONDS)
SLA_MIN_JOB_INSTANCE_COUNT = 20
- SLA_UPTIME_PERCENTAGE_LIMIT = 95
- SLA_UPTIME_DURATION_LIMIT = Amount(30, Time.MINUTES)
@classmethod
@@ -92,18 +90,7 @@ class HostMaintenance(object):
if host_status.mode != MaintenanceMode.NONE:
log.warning('%s is DRAINING or in DRAINED' % host_status.host)
- def _operate_on_hosts(self, drained_hosts, callback):
- """Perform a given operation on a list of hosts that are ready for maintenance.
-
- :param drained_hosts: Hosts that have been drained (via _drain_hosts)
- :type drained_hosts: gen.apache.aurora.ttypes.Hosts
- :param callback: Function to call one hostname at a time
- :type callback: function
- """
- for hostname in drained_hosts.hostNames:
- callback(hostname)
-
- def _check_sla(self, hostnames, grouping_function, percentage=None, duration=None):
+ def _check_sla(self, hostnames, grouping_function, percentage, duration):
"""Check if the provided list of hosts passes the job uptime SLA check.
This is an all-or-nothing check, meaning that all provided hosts must pass their job
@@ -119,13 +106,10 @@ class HostMaintenance(object):
:type duration: twitter.common.quantity.Amount
:rtype: set of unsafe hosts
"""
- sla_percentage = percentage or self.SLA_UPTIME_PERCENTAGE_LIMIT
- sla_duration = duration or self.SLA_UPTIME_DURATION_LIMIT
-
vector = self._client.sla_get_safe_domain_vector(self.SLA_MIN_JOB_INSTANCE_COUNT, hostnames)
host_groups = vector.probe_hosts(
- sla_percentage,
- sla_duration.as_(Time.SECONDS),
+ percentage,
+ duration.as_(Time.SECONDS),
grouping_function)
unsafe_hostnames = set()
@@ -170,25 +154,24 @@ class HostMaintenance(object):
return result
def perform_maintenance(self, hostnames, grouping_function=DEFAULT_GROUPING,
- callback=None, percentage=None, duration=None, output_file=None):
- """Wrap a callback in between sending hosts into maintenance mode and back.
+ percentage=None, duration=None, output_file=None):
+ """Put hosts into maintenance mode and drain them.
+
+ Walk through the process of putting hosts into maintenance and draining them of tasks. The hosts
+ will remain in maintenance mode upon completion.
- Walk through the process of putting hosts into maintenance, draining them of tasks,
- performing an action on them once drained, then removing them from maintenance mode
- so tasks can schedule.
:param hostnames: A list of hostnames to operate upon
:type hostnames: list of strings
:param grouping_function: How to split up the hostname into groups
:type grouping_function: function
- :param callback: Function to call once hosts are drained
- :type callback: function
:param percentage: SLA percentage to use
:type percentage: float
:param duration: SLA duration to use
:type duration: twitter.common.quantity.Time
:param output_file: file to write hosts that were not drained due to failed SLA check
:type output_file: string
+ :rtype: set of host names that were successfully drained
"""
hostnames = self.start_maintenance(hostnames)
not_drained_hostnames = set()
@@ -204,7 +187,6 @@ class HostMaintenance(object):
if unsafe_hostnames:
log.warning('Some hosts did not pass SLA check and will not be drained! '
'Skipping hosts: %s' % unsafe_hostnames)
- self._complete_maintenance(Hosts(unsafe_hostnames))
not_drained_hostnames |= unsafe_hostnames
drainable_hostnames = hosts.hostNames - unsafe_hostnames
if not drainable_hostnames:
@@ -214,9 +196,6 @@ class HostMaintenance(object):
log.info('All hosts passed SLA check.')
self._drain_hosts(hosts)
- if callback:
- self._operate_on_hosts(hosts, callback)
- self._complete_maintenance(hosts)
if not_drained_hostnames:
output = '\n'.join(list(not_drained_hostnames))
@@ -231,6 +210,8 @@ class HostMaintenance(object):
except IOError as e:
log.error('Failed to write into the output file: %s' % e)
+ return set(hostnames) - not_drained_hostnames
+
def check_status(self, hostnames):
"""Query the scheduler to determine the maintenance status for a list of hostnames
http://git-wip-us.apache.org/repos/asf/incubator-aurora/blob/3f8b5ea7/src/main/python/apache/aurora/client/commands/maintenance.py
----------------------------------------------------------------------
diff --git a/src/main/python/apache/aurora/client/commands/maintenance.py b/src/main/python/apache/aurora/client/commands/maintenance.py
index a54f0f6..c83b96a 100644
--- a/src/main/python/apache/aurora/client/commands/maintenance.py
+++ b/src/main/python/apache/aurora/client/commands/maintenance.py
@@ -15,15 +15,18 @@
import logging
from twitter.common import app, log
-from twitter.common.quantity.parse_simple import parse_time
from apache.aurora.admin.admin_util import (
FILENAME_OPTION,
HOSTS_OPTION,
- log_admin_message,
+ OVERRIDE_SLA_DURATION_OPTION,
+ OVERRIDE_SLA_PERCENTAGE_OPTION,
+ OVERRIDE_SLA_REASON_OPTION,
+ parse_and_validate_sla_overrides,
parse_hostnames,
parse_script,
- parse_sla_percentage
+ POST_DRAIN_SCRIPT_OPTION,
+ UNSAFE_SLA_HOSTS_FILE_OPTION
)
from apache.aurora.admin.host_maintenance import HostMaintenance
from apache.aurora.client.base import die, get_grouping_or_die, GROUPING_OPTION, requires
@@ -34,9 +37,18 @@ from apache.aurora.common.clusters import CLUSTERS
@app.command_option(FILENAME_OPTION)
@app.command_option(HOSTS_OPTION)
@requires.exactly('cluster')
-def start_maintenance_hosts(cluster):
- """usage: start_maintenance_hosts {--filename=filename | --hosts=hosts}
- cluster
+def host_deactivate(cluster):
+ """usage: host_deactivate {--filename=filename | --hosts=hosts}
+ cluster
+
+ Puts hosts into maintenance mode.
+
+ The list of hosts is marked for maintenance, and will be de-prioritized
+ from consideration for scheduling. Note, they are not removed from
+ consideration, and may still schedule tasks if resources are very scarce.
+ Usually you would mark a larger set of machines for drain, and then do
+ them in batches within the larger set, to help drained tasks not land on
+ future hosts that will be drained shortly in subsequent batches.
"""
options = app.get_options()
HostMaintenance(CLUSTERS[cluster], options.verbosity).start_maintenance(
@@ -47,9 +59,14 @@ def start_maintenance_hosts(cluster):
@app.command_option(FILENAME_OPTION)
@app.command_option(HOSTS_OPTION)
@requires.exactly('cluster')
-def end_maintenance_hosts(cluster):
- """usage: end_maintenance_hosts {--filename=filename | --hosts=hosts}
- cluster
+def host_activate(cluster):
+ """usage: host_activate {--filename=filename | --hosts=hosts}
+ cluster
+
+ Removes maintenance mode from hosts.
+
+ The list of hosts is marked as not in a drained state anymore. This will
+ allow normal scheduling to resume on the given list of hosts.
"""
options = app.get_options()
HostMaintenance(CLUSTERS[cluster], options.verbosity).end_maintenance(
@@ -57,84 +74,66 @@ def end_maintenance_hosts(cluster):
@app.command
-@app.command_option('--post_drain_script', dest='post_drain_script', default=None,
- help='Path to a script to run for each host.')
-@app.command_option('--override_percentage', dest='percentage', default=None,
- help='Percentage of tasks required to be up all the time within the duration. '
- 'Default value:%s. DO NOT override default value unless absolutely necessary! '
- 'See sla_probe_hosts and sla_list_safe_domain commands '
- 'for more details on SLA.' % HostMaintenance.SLA_UPTIME_PERCENTAGE_LIMIT)
-@app.command_option('--override_duration', dest='duration', default=None,
- help='Time interval (now - value) for the percentage of up tasks. Format: XdYhZmWs. '
- 'Default value:%s. DO NOT override default value unless absolutely necessary! '
- 'See sla_probe_hosts and sla_list_safe_domain commands '
- 'for more details on SLA.' % HostMaintenance.SLA_UPTIME_DURATION_LIMIT)
-@app.command_option('--override_reason', dest='reason', default=None,
- help='Reason for overriding default SLA values. Provide details including the '
- 'maintenance ticket number.')
-@app.command_option('--unsafe_hosts_file', dest='unsafe_hosts_filename', default=None,
- help='Output file to write host names that did not pass SLA check.')
@app.command_option(FILENAME_OPTION)
@app.command_option(HOSTS_OPTION)
+@app.command_option(POST_DRAIN_SCRIPT_OPTION)
@app.command_option(GROUPING_OPTION)
+@app.command_option(OVERRIDE_SLA_PERCENTAGE_OPTION)
+@app.command_option(OVERRIDE_SLA_DURATION_OPTION)
+@app.command_option(OVERRIDE_SLA_REASON_OPTION)
+@app.command_option(UNSAFE_SLA_HOSTS_FILE_OPTION)
@requires.exactly('cluster')
-def perform_maintenance_hosts(cluster):
- """usage: perform_maintenance_hosts {--filename=filename | --hosts=hosts}
- [--post_drain_script=path]
- [--grouping=function]
- [--override_percentage=percentage]
- [--override_duration=duration]
- [--override_reason=reason]
- [--unsafe_hosts_file=unsafe_hosts_filename]
- cluster
-
- Asks the scheduler to remove any running tasks from the machine and remove it
- from service temporarily, perform some action on them, then return the machines
- to service.
+def host_drain(cluster):
+ """usage: host_drain {--filename=filename | --hosts=hosts}
+ [--post_drain_script=path]
+ [--grouping=function]
+ [--override_percentage=percentage]
+ [--override_duration=duration]
+ [--override_reason=reason]
+ [--unsafe_hosts_file=unsafe_hosts_filename]
+ cluster
+
+ Asks the scheduler to start maintenance on the list of provided hosts (see host_deactivate
+ for more details) and drains any active tasks on them.
+
+ The list of hosts is drained and marked in a drained state. This will kill
+ off any tasks currently running on these hosts, as well as prevent future
+ tasks from scheduling on these hosts while they are drained.
+
+ The hosts are left in maintenance mode upon completion. Use host_activate to
+ return hosts back to service and allow scheduling tasks on them.
"""
options = app.get_options()
drainable_hosts = parse_hostnames(options.filename, options.hosts)
get_grouping_or_die(options.grouping)
- has_override = bool(options.percentage) or bool(options.duration) or bool(options.reason)
- all_overrides = bool(options.percentage) and bool(options.duration) and bool(options.reason)
- if has_override != all_overrides:
- die('All --override_* options are required when attempting to override default SLA values.')
-
- percentage = parse_sla_percentage(options.percentage) if options.percentage else None
- duration = parse_time(options.duration) if options.duration else None
- if options.reason:
- log_admin_message(
- logging.WARNING,
- 'Default SLA values (percentage: %s, duration: %s) are overridden for the following '
- 'hosts: %s. New percentage: %s, duration: %s, override reason: %s' % (
- HostMaintenance.SLA_UPTIME_PERCENTAGE_LIMIT,
- HostMaintenance.SLA_UPTIME_DURATION_LIMIT,
- drainable_hosts,
- percentage,
- duration,
- options.reason))
-
- drained_callback = parse_script(options.post_drain_script)
-
- HostMaintenance(CLUSTERS[cluster], options.verbosity).perform_maintenance(
+ override_percentage, override_duration = parse_and_validate_sla_overrides(
+ options,
+ drainable_hosts)
+
+ post_drain_callback = parse_script(options.post_drain_script)
+
+ drained_hostnames = HostMaintenance(CLUSTERS[cluster], options.verbosity).perform_maintenance(
drainable_hosts,
grouping_function=options.grouping,
- callback=drained_callback,
- percentage=percentage,
- duration=duration,
+ percentage=override_percentage,
+ duration=override_duration,
output_file=options.unsafe_hosts_filename)
+ if post_drain_callback:
+ for hostname in drained_hostnames:
+ post_drain_callback(hostname)
+
@app.command
@app.command_option(FILENAME_OPTION)
@app.command_option(HOSTS_OPTION)
@requires.exactly('cluster')
-def host_maintenance_status(cluster):
- """usage: host_maintenance_status {--filename=filename | --hosts=hosts}
- cluster
+def host_status(cluster):
+ """usage: host_status {--filename=filename | --hosts=hosts}
+ cluster
- Check on the schedulers maintenance status for a list of hosts in the cluster.
+ Print the drain status of each supplied host.
"""
options = app.get_options()
checkable_hosts = parse_hostnames(options.filename, options.hosts)
http://git-wip-us.apache.org/repos/asf/incubator-aurora/blob/3f8b5ea7/src/test/python/apache/aurora/admin/BUILD
----------------------------------------------------------------------
diff --git a/src/test/python/apache/aurora/admin/BUILD b/src/test/python/apache/aurora/admin/BUILD
index f423e63..5e170d6 100644
--- a/src/test/python/apache/aurora/admin/BUILD
+++ b/src/test/python/apache/aurora/admin/BUILD
@@ -36,6 +36,7 @@ python_tests(name = 'admin_util',
sources = ['test_admin_util.py'],
dependencies = [
pants('3rdparty/python:mock'),
+ pants('3rdparty/python:twitter.common.quantity'),
pants('3rdparty/python:twitter.common.contextutil'),
pants('src/main/python/apache/aurora/admin:util'),
],
http://git-wip-us.apache.org/repos/asf/incubator-aurora/blob/3f8b5ea7/src/test/python/apache/aurora/admin/test_host_maintenance.py
----------------------------------------------------------------------
diff --git a/src/test/python/apache/aurora/admin/test_host_maintenance.py b/src/test/python/apache/aurora/admin/test_host_maintenance.py
index d8aeffb..f2c5b7f 100644
--- a/src/test/python/apache/aurora/admin/test_host_maintenance.py
+++ b/src/test/python/apache/aurora/admin/test_host_maintenance.py
@@ -117,13 +117,6 @@ class TestHostMaintenance(unittest.TestCase):
mock_maintenance_status.assert_called_once_with(test_hosts)
mock_warning.assert_called_once_with('%s is DRAINING or in DRAINED' % TEST_HOSTNAMES[2])
- def test_operate_on_hosts(self):
- mock_callback = mock.Mock()
- test_hosts = Hosts(TEST_HOSTNAMES)
- maintenance = HostMaintenance(DEFAULT_CLUSTER, 'quiet')
- maintenance._operate_on_hosts(test_hosts, mock_callback)
- assert mock_callback.call_count == 3
-
@mock.patch("apache.aurora.admin.host_maintenance.HostMaintenance._complete_maintenance",
spec=HostMaintenance._complete_maintenance)
def test_end_maintenance(self, mock_complete_maintenance):
@@ -140,39 +133,23 @@ class TestHostMaintenance(unittest.TestCase):
maintenance.start_maintenance(TEST_HOSTNAMES)
mock_api.assert_called_once_with(Hosts(set(TEST_HOSTNAMES)))
- @mock.patch("apache.aurora.admin.host_maintenance.HostMaintenance._complete_maintenance",
- spec=HostMaintenance._complete_maintenance)
- @mock.patch("apache.aurora.admin.host_maintenance.HostMaintenance._operate_on_hosts",
- spec=HostMaintenance._operate_on_hosts)
@mock.patch("apache.aurora.admin.host_maintenance.HostMaintenance._drain_hosts",
spec=HostMaintenance._drain_hosts)
@mock.patch("apache.aurora.admin.host_maintenance.HostMaintenance.start_maintenance",
spec=HostMaintenance.start_maintenance)
@mock.patch("apache.aurora.admin.host_maintenance.HostMaintenance._check_sla",
spec=HostMaintenance._check_sla)
- def test_perform_maintenance(self, mock_check_sla, mock_start_maintenance,
- mock_drain_hosts, mock_operate_on_hosts, mock_complete_maintenance):
- mock_callback = mock.Mock()
+ def test_perform_maintenance(self, mock_check_sla, mock_start_maintenance, mock_drain_hosts):
mock_check_sla.return_value = set()
mock_start_maintenance.return_value = TEST_HOSTNAMES
maintenance = HostMaintenance(DEFAULT_CLUSTER, 'quiet')
- maintenance.perform_maintenance(TEST_HOSTNAMES, callback=mock_callback)
+ maintenance.perform_maintenance(TEST_HOSTNAMES)
mock_start_maintenance.assert_called_once_with(TEST_HOSTNAMES)
assert mock_check_sla.call_count == 3
assert mock_drain_hosts.call_count == 3
assert mock_drain_hosts.call_args_list == [
mock.call(Hosts(set([hostname]))) for hostname in TEST_HOSTNAMES]
- assert mock_operate_on_hosts.call_count == 3
- assert mock_operate_on_hosts.call_args_list == [
- mock.call(Hosts(set([hostname])), mock_callback) for hostname in TEST_HOSTNAMES]
- assert mock_complete_maintenance.call_count == 3
- assert mock_complete_maintenance.call_args_list == [
- mock.call(Hosts(set([hostname]))) for hostname in TEST_HOSTNAMES]
- @mock.patch("apache.aurora.admin.host_maintenance.HostMaintenance._complete_maintenance",
- spec=HostMaintenance._complete_maintenance)
- @mock.patch("apache.aurora.admin.host_maintenance.HostMaintenance._operate_on_hosts",
- spec=HostMaintenance._operate_on_hosts)
@mock.patch("apache.aurora.admin.host_maintenance.HostMaintenance._drain_hosts",
spec=HostMaintenance._drain_hosts)
@mock.patch("apache.aurora.admin.host_maintenance.HostMaintenance.start_maintenance",
@@ -180,8 +157,7 @@ class TestHostMaintenance(unittest.TestCase):
@mock.patch("apache.aurora.admin.host_maintenance.HostMaintenance._check_sla",
spec=HostMaintenance._check_sla)
def test_perform_maintenance_partial_sla_failure(self, mock_check_sla, mock_start_maintenance,
- mock_drain_hosts, mock_operate_on_hosts, mock_complete_maintenance):
- mock_callback = mock.Mock()
+ mock_drain_hosts):
failed_host = 'us-west-001.example.com'
mock_check_sla.return_value = set([failed_host])
mock_start_maintenance.return_value = TEST_HOSTNAMES
@@ -190,9 +166,8 @@ class TestHostMaintenance(unittest.TestCase):
with temporary_file() as fp:
with group_by_rack():
- maintenance.perform_maintenance(
+ drained = maintenance.perform_maintenance(
TEST_HOSTNAMES,
- callback=mock_callback,
grouping_function='by_rack',
output_file=fp.name)
@@ -201,15 +176,11 @@ class TestHostMaintenance(unittest.TestCase):
assert failed_host in content
mock_start_maintenance.assert_called_once_with(TEST_HOSTNAMES)
+ assert len(drained) == 2
+ assert failed_host not in drained
assert mock_check_sla.call_count == 1
assert mock_drain_hosts.call_count == 1
assert mock_drain_hosts.call_args_list == [mock.call(Hosts(drained_hosts))]
- assert mock_operate_on_hosts.call_count == 1
- assert mock_operate_on_hosts.call_args_list == [
- mock.call(Hosts(drained_hosts), mock_callback)]
- assert mock_complete_maintenance.call_count == 2
- assert mock_complete_maintenance.call_args_list == [
- mock.call(Hosts(set([failed_host]))), mock.call(Hosts(drained_hosts))]
@mock.patch("apache.aurora.client.api.AuroraClientAPI.maintenance_status",
spec=AuroraClientAPI.maintenance_status)
http://git-wip-us.apache.org/repos/asf/incubator-aurora/blob/3f8b5ea7/src/test/python/apache/aurora/client/commands/test_maintenance.py
----------------------------------------------------------------------
diff --git a/src/test/python/apache/aurora/client/commands/test_maintenance.py b/src/test/python/apache/aurora/client/commands/test_maintenance.py
index a748c01..2151f02 100644
--- a/src/test/python/apache/aurora/client/commands/test_maintenance.py
+++ b/src/test/python/apache/aurora/client/commands/test_maintenance.py
@@ -18,10 +18,10 @@ from mock import Mock, patch
from twitter.common.contextutil import temporary_file
from apache.aurora.client.commands.maintenance import (
- end_maintenance_hosts,
- host_maintenance_status,
- perform_maintenance_hosts,
- start_maintenance_hosts
+ host_activate,
+ host_deactivate,
+ host_drain,
+ host_status
)
from apache.aurora.client.commands.util import AuroraClientCommandTest
@@ -94,7 +94,7 @@ class TestMaintenanceCommands(AuroraClientCommandTest):
patch('apache.aurora.client.api.SchedulerProxy', return_value=mock_scheduler_proxy),
patch('apache.aurora.client.commands.maintenance.CLUSTERS', new=self.TEST_CLUSTERS),
patch('twitter.common.app.get_options', return_value=mock_options)):
- start_maintenance_hosts([self.TEST_CLUSTER])
+ host_deactivate([self.TEST_CLUSTER])
mock_scheduler_proxy.startMaintenance.assert_called_with(Hosts(set(self.HOSTNAMES)))
@@ -107,14 +107,14 @@ class TestMaintenanceCommands(AuroraClientCommandTest):
patch('apache.aurora.client.api.SchedulerProxy', return_value=mock_scheduler_proxy),
patch('apache.aurora.client.commands.maintenance.CLUSTERS', new=self.TEST_CLUSTERS),
patch('twitter.common.app.get_options', return_value=mock_options)):
- end_maintenance_hosts([self.TEST_CLUSTER])
+ host_activate([self.TEST_CLUSTER])
mock_scheduler_proxy.endMaintenance.assert_called_with(Hosts(set(self.HOSTNAMES)))
mock_scheduler_proxy.maintenanceStatus.assert_called_with(Hosts(set(self.HOSTNAMES)))
def test_perform_maintenance_hosts(self):
mock_options = self.make_mock_options()
- mock_options.post_drain_script = None
+ mock_options.post_drain_script = 'callback'
mock_options.grouping = 'by_host'
def host_status_results(hostnames):
@@ -123,7 +123,7 @@ class TestMaintenanceCommands(AuroraClientCommandTest):
return self.create_maintenance_status_result()
mock_api, mock_scheduler_proxy = self.create_mock_api()
- mock_scheduler_proxy.endMaintenance.return_value = self.create_end_maintenance_result()
+ mock_callback = Mock()
mock_scheduler_proxy.maintenanceStatus.side_effect = host_status_results
mock_scheduler_proxy.startMaintenance.return_value = self.create_start_maintenance_result()
mock_scheduler_proxy.drainHosts.return_value = self.create_start_maintenance_result()
@@ -139,20 +139,17 @@ class TestMaintenanceCommands(AuroraClientCommandTest):
patch('apache.aurora.client.api.sla.Sla.get_domain_uptime_vector',
return_value=mock_vector),
patch('apache.aurora.client.commands.maintenance.CLUSTERS', new=self.TEST_CLUSTERS),
+ patch('apache.aurora.client.commands.maintenance.parse_script', return_value=mock_callback),
patch('twitter.common.app.get_options', return_value=mock_options)) as (
- mock_sleep,
- mock_scheduler_proxy_class,
- mock_vector_class,
- mock_clusters_maintenancepatch,
- options):
- perform_maintenance_hosts([self.TEST_CLUSTER])
+ mock_sleep, _, _, _, _, _):
+ host_drain([self.TEST_CLUSTER])
mock_scheduler_proxy.startMaintenance.assert_called_with(Hosts(set(self.HOSTNAMES)))
#TODO(jsmith): Consider not mocking out sleep and instead refactoring
assert mock_sleep.call_count == 3
- assert mock_scheduler_proxy.maintenanceStatus.call_count == 6
+ assert mock_scheduler_proxy.maintenanceStatus.call_count == 3
assert mock_scheduler_proxy.drainHosts.call_count == 3
- assert mock_scheduler_proxy.endMaintenance.call_count == 3
+ assert mock_callback.call_count == 3
def test_perform_maintenance_hosts_unknown_hosts_skipped(self):
mock_options = self.make_mock_options()
@@ -165,7 +162,6 @@ class TestMaintenanceCommands(AuroraClientCommandTest):
return self.create_maintenance_status_result()
mock_api, mock_scheduler_proxy = self.create_mock_api()
- mock_scheduler_proxy.endMaintenance.return_value = self.create_end_maintenance_result()
mock_scheduler_proxy.maintenanceStatus.side_effect = host_status_results
mock_scheduler_proxy.startMaintenance.return_value = self.create_start_maintenance_result(
skip_hosts=['us-grf-20'])
@@ -183,18 +179,13 @@ class TestMaintenanceCommands(AuroraClientCommandTest):
return_value=mock_vector),
patch('apache.aurora.client.commands.maintenance.CLUSTERS', new=self.TEST_CLUSTERS),
patch('twitter.common.app.get_options', return_value=mock_options)) as (
- mock_sleep,
- mock_scheduler_proxy_class,
- mock_vector_class,
- mock_clusters_maintenancepatch,
- options):
- perform_maintenance_hosts([self.TEST_CLUSTER])
+ mock_sleep, _, _, _, _):
+ host_drain([self.TEST_CLUSTER])
mock_scheduler_proxy.startMaintenance.assert_called_with(Hosts(set(self.HOSTNAMES)))
assert mock_sleep.call_count == 2
- assert mock_scheduler_proxy.maintenanceStatus.call_count == 4
+ assert mock_scheduler_proxy.maintenanceStatus.call_count == 2
assert mock_scheduler_proxy.drainHosts.call_count == 2
- assert mock_scheduler_proxy.endMaintenance.call_count == 2
def test_perform_maintenance_hosts_failed_default_sla(self):
with temporary_file() as fp:
@@ -203,14 +194,7 @@ class TestMaintenanceCommands(AuroraClientCommandTest):
mock_options.grouping = 'by_host'
mock_options.unsafe_hosts_filename = fp.name
- def host_status_results(hostnames):
- if isinstance(hostnames, Hosts):
- return self.create_drained_status_result(hostnames)
- return self.create_maintenance_status_result()
-
mock_api, mock_scheduler_proxy = self.create_mock_api()
- mock_scheduler_proxy.endMaintenance.return_value = self.create_end_maintenance_result()
- mock_scheduler_proxy.maintenanceStatus.side_effect = host_status_results
mock_scheduler_proxy.startMaintenance.return_value = self.create_start_maintenance_result()
mock_scheduler_proxy.drainHosts.return_value = self.create_start_maintenance_result()
mock_vector = self.create_mock_probe_hosts_vector([
@@ -226,10 +210,9 @@ class TestMaintenanceCommands(AuroraClientCommandTest):
return_value=mock_vector),
patch('apache.aurora.client.commands.maintenance.CLUSTERS', new=self.TEST_CLUSTERS),
patch('twitter.common.app.get_options', return_value=mock_options)):
- perform_maintenance_hosts([self.TEST_CLUSTER])
+ host_drain([self.TEST_CLUSTER])
mock_scheduler_proxy.startMaintenance.assert_called_with(Hosts(set(self.HOSTNAMES)))
- assert mock_scheduler_proxy.endMaintenance.call_count == len(self.HOSTNAMES)
def test_perform_maintenance_hosts_failed_custom_sla(self):
with temporary_file() as fp:
@@ -241,14 +224,7 @@ class TestMaintenanceCommands(AuroraClientCommandTest):
mock_options.reason = 'Test overrides'
mock_options.unsafe_hosts_filename = fp.name
- def host_status_results(hostnames):
- if isinstance(hostnames, Hosts):
- return self.create_drained_status_result(hostnames)
- return self.create_maintenance_status_result()
-
mock_api, mock_scheduler_proxy = self.create_mock_api()
- mock_scheduler_proxy.endMaintenance.return_value = self.create_end_maintenance_result()
- mock_scheduler_proxy.maintenanceStatus.side_effect = host_status_results
mock_scheduler_proxy.startMaintenance.return_value = self.create_start_maintenance_result()
mock_scheduler_proxy.drainHosts.return_value = self.create_start_maintenance_result()
mock_vector = self.create_mock_probe_hosts_vector([
@@ -263,14 +239,13 @@ class TestMaintenanceCommands(AuroraClientCommandTest):
patch('apache.aurora.client.api.sla.Sla.get_domain_uptime_vector',
return_value=mock_vector),
patch('apache.aurora.client.commands.maintenance.CLUSTERS', new=self.TEST_CLUSTERS),
- patch('apache.aurora.client.commands.maintenance.log_admin_message'),
+ patch('apache.aurora.admin.admin_util.log_admin_message'),
patch('twitter.common.app.get_options', return_value=mock_options)) as (
_, _, _, _, log, _):
- perform_maintenance_hosts([self.TEST_CLUSTER])
+ host_drain([self.TEST_CLUSTER])
assert 'Test overrides' in log.call_args[0][1]
mock_scheduler_proxy.startMaintenance.assert_called_with(Hosts(set(self.HOSTNAMES)))
- assert mock_scheduler_proxy.endMaintenance.call_count == len(self.HOSTNAMES)
def test_perform_maintenance_hosts_no_prod_tasks(self):
mock_options = self.make_mock_options()
@@ -283,7 +258,6 @@ class TestMaintenanceCommands(AuroraClientCommandTest):
return self.create_maintenance_status_result()
mock_api, mock_scheduler_proxy = self.create_mock_api()
- mock_scheduler_proxy.endMaintenance.return_value = self.create_end_maintenance_result()
mock_scheduler_proxy.maintenanceStatus.side_effect = host_status_results
mock_scheduler_proxy.startMaintenance.return_value = self.create_start_maintenance_result()
mock_scheduler_proxy.drainHosts.return_value = self.create_start_maintenance_result()
@@ -300,19 +274,14 @@ class TestMaintenanceCommands(AuroraClientCommandTest):
return_value=create_empty_sla_results()),
patch('apache.aurora.client.commands.maintenance.CLUSTERS', new=self.TEST_CLUSTERS),
patch('twitter.common.app.get_options', return_value=mock_options)) as (
- mock_sleep,
- mock_scheduler_proxy_class,
- mock_vector_class,
- mock_clusters_maintenancepatch,
- options):
+ mock_sleep, _, _, _, _):
- perform_maintenance_hosts([self.TEST_CLUSTER])
+ host_drain([self.TEST_CLUSTER])
mock_scheduler_proxy.startMaintenance.assert_called_with(Hosts(set(self.HOSTNAMES)))
assert mock_sleep.call_count == 3
- assert mock_scheduler_proxy.maintenanceStatus.call_count == 6
+ assert mock_scheduler_proxy.maintenanceStatus.call_count == 3
assert mock_scheduler_proxy.drainHosts.call_count == 3
- assert mock_scheduler_proxy.endMaintenance.call_count == 3
def test_perform_maintenance_hosts_multiple_sla_groups_failure(self):
mock_options = self.make_mock_options()
@@ -320,16 +289,8 @@ class TestMaintenanceCommands(AuroraClientCommandTest):
mock_options.grouping = 'by_host'
mock_options.unsafe_hosts_filename = None
- def host_status_results(hostnames):
- if isinstance(hostnames, Hosts):
- return self.create_drained_status_result(hostnames)
- return self.create_maintenance_status_result()
-
mock_api, mock_scheduler_proxy = self.create_mock_api()
- mock_scheduler_proxy.endMaintenance.return_value = self.create_end_maintenance_result()
- mock_scheduler_proxy.maintenanceStatus.side_effect = host_status_results
mock_scheduler_proxy.startMaintenance.return_value = self.create_start_maintenance_result()
- mock_scheduler_proxy.drainHosts.return_value = self.create_start_maintenance_result()
def create_multiple_sla_results():
mock_vector = Mock()
@@ -337,25 +298,15 @@ class TestMaintenanceCommands(AuroraClientCommandTest):
return mock_vector
with contextlib.nested(
- patch('time.sleep'),
patch('apache.aurora.client.api.SchedulerProxy', return_value=mock_scheduler_proxy),
patch('apache.aurora.client.api.sla.Sla.get_domain_uptime_vector',
return_value=create_multiple_sla_results()),
patch('apache.aurora.client.commands.maintenance.CLUSTERS', new=self.TEST_CLUSTERS),
- patch('twitter.common.app.get_options', return_value=mock_options)) as (
- mock_sleep,
- mock_scheduler_proxy_class,
- mock_vector_class,
- mock_clusters_maintenancepatch,
- options):
+ patch('twitter.common.app.get_options', return_value=mock_options)):
- perform_maintenance_hosts([self.TEST_CLUSTER])
+ host_drain([self.TEST_CLUSTER])
mock_scheduler_proxy.startMaintenance.assert_called_with(Hosts(set(self.HOSTNAMES)))
- assert mock_sleep.call_count == 0
- assert mock_scheduler_proxy.maintenanceStatus.call_count == 3
- assert mock_scheduler_proxy.drainHosts.call_count == 0
- assert mock_scheduler_proxy.endMaintenance.call_count == 3
def test_perform_maintenance_hosts_reason_missing(self):
mock_options = self.make_mock_options()
@@ -365,12 +316,8 @@ class TestMaintenanceCommands(AuroraClientCommandTest):
with contextlib.nested(
patch('twitter.common.app.get_options', return_value=mock_options)):
- try:
- perform_maintenance_hosts([self.TEST_CLUSTER])
- except SystemExit:
- pass
- else:
- assert 'Expected error is not raised.'
+ self.assertRaises(SystemExit, host_drain, [self.TEST_CLUSTER])
+
def test_host_maintenance_status(self):
mock_options = self.make_mock_options()
@@ -380,6 +327,6 @@ class TestMaintenanceCommands(AuroraClientCommandTest):
patch('apache.aurora.client.api.SchedulerProxy', return_value=mock_scheduler_proxy),
patch('apache.aurora.client.commands.maintenance.CLUSTERS', new=self.TEST_CLUSTERS),
patch('twitter.common.app.get_options', return_value=mock_options)):
- host_maintenance_status([self.TEST_CLUSTER])
+ host_status([self.TEST_CLUSTER])
mock_scheduler_proxy.maintenanceStatus.assert_called_with(Hosts(set(self.HOSTNAMES)))