You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@aurora.apache.org by jc...@apache.org on 2016/09/20 19:27:01 UTC
aurora git commit: Fix host maintenance commands to properly
initialize the api client.
Repository: aurora
Updated Branches:
refs/heads/master 4745c8cc2 -> a9f4e26a2
Fix host maintenance commands to properly initialize the api client.
Bugs closed: AURORA-1777
Reviewed at https://reviews.apache.org/r/52087/
Project: http://git-wip-us.apache.org/repos/asf/aurora/repo
Commit: http://git-wip-us.apache.org/repos/asf/aurora/commit/a9f4e26a
Tree: http://git-wip-us.apache.org/repos/asf/aurora/tree/a9f4e26a
Diff: http://git-wip-us.apache.org/repos/asf/aurora/diff/a9f4e26a
Branch: refs/heads/master
Commit: a9f4e26a24e63aeb6a215c0f5d5bb54b7c936abb
Parents: 4745c8c
Author: Joshua Cohen <jc...@apache.org>
Authored: Tue Sep 20 14:26:49 2016 -0500
Committer: Joshua Cohen <jc...@apache.org>
Committed: Tue Sep 20 14:26:49 2016 -0500
----------------------------------------------------------------------
src/main/python/apache/aurora/admin/admin.py | 49 +++++++++-----------
.../python/apache/aurora/admin/admin_util.py | 29 +++++++++++-
.../apache/aurora/admin/host_maintenance.py | 10 ++--
.../python/apache/aurora/admin/maintenance.py | 37 ++++++++++-----
.../python/apache/aurora/admin/test_admin.py | 2 +
.../apache/aurora/admin/test_admin_sla.py | 2 +
.../apache/aurora/admin/test_admin_util.py | 23 ++++++++-
src/test/python/apache/aurora/admin/util.py | 1 +
.../sh/org/apache/aurora/e2e/test_end_to_end.sh | 6 ++-
9 files changed, 114 insertions(+), 45 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/aurora/blob/a9f4e26a/src/main/python/apache/aurora/admin/admin.py
----------------------------------------------------------------------
diff --git a/src/main/python/apache/aurora/admin/admin.py b/src/main/python/apache/aurora/admin/admin.py
index 9fc89a2..070c348 100644
--- a/src/main/python/apache/aurora/admin/admin.py
+++ b/src/main/python/apache/aurora/admin/admin.py
@@ -22,10 +22,9 @@ from twitter.common import app, log
from twitter.common.quantity import Data, Time
from twitter.common.quantity.parse_simple import parse_data, parse_time
-from apache.aurora.client.api import AuroraClientAPI
+from apache.aurora.admin.admin_util import make_admin_client
from apache.aurora.client.api.sla import JobUpTimeLimit
from apache.aurora.client.base import (
- AURORA_ADMIN_USER_AGENT_NAME,
GROUPING_OPTION,
check_and_log_response,
combine_messages,
@@ -63,17 +62,12 @@ MIN_SLA_INSTANCE_COUNT = optparse.Option(
)
-def make_admin_client(cluster):
- if cluster not in CLUSTERS:
- die('Unknown cluster: %s. Known clusters: %s' % (cluster, ", ".join(CLUSTERS.keys())))
-
+def make_admin_client_with_options(cluster):
options = app.get_options()
- verbose = getattr(options, 'verbosity', 'normal') == 'verbose'
- return AuroraClientAPI(
- CLUSTERS[cluster],
- AURORA_ADMIN_USER_AGENT_NAME,
- verbose=verbose,
+ return make_admin_client(
+ cluster=cluster,
+ verbose=getattr(options, 'verbosity', 'normal') == 'verbose',
bypass_leader_redirect=options.bypass_leader_redirect)
@@ -158,7 +152,7 @@ def query(args, options):
if not (states <= ACTIVE_STATES) and not options.force:
die('--force is required for expensive queries (states outside ACTIVE states')
- api = make_admin_client(cluster)
+ api = make_admin_client_with_options(cluster)
query_info = api.query(TaskQuery(role=role, jobName=job, instanceIds=instances, statuses=states))
if query_info.responseCode != ResponseCode.OK:
@@ -198,7 +192,7 @@ def set_quota(cluster, role, cpu_str, ram, disk):
except ValueError as e:
die(str(e))
- resp = make_admin_client(cluster).set_quota(role, cpu, ram_mb, disk_mb)
+ resp = make_admin_client_with_options(cluster).set_quota(role, cpu, ram_mb, disk_mb)
check_and_log_response(resp)
@@ -213,7 +207,7 @@ def increase_quota(cluster, role, cpu_str, ram_str, disk_str):
ram = parse_data(ram_str).as_(Data.MB)
disk = parse_data(disk_str).as_(Data.MB)
- client = make_admin_client(cluster)
+ client = make_admin_client_with_options(cluster)
resp = client.get_quota(role)
quota = resp.result.getQuotaResult.quota
resource_details = ResourceManager.resource_details_from_quota(quota)
@@ -245,7 +239,7 @@ def scheduler_backup_now(cluster):
Immediately initiates a full storage backup.
"""
- check_and_log_response(make_admin_client(cluster).perform_backup())
+ check_and_log_response(make_admin_client_with_options(cluster).perform_backup())
@app.command
@@ -255,7 +249,7 @@ def scheduler_list_backups(cluster):
Lists backups available for recovery.
"""
- resp = make_admin_client(cluster).list_backups()
+ resp = make_admin_client_with_options(cluster).list_backups()
check_and_log_response(resp)
backups = resp.result.listBackupsResult.backups
print('%s available backups:' % len(backups))
@@ -270,7 +264,7 @@ def scheduler_stage_recovery(cluster, backup_id):
Stages a backup for recovery.
"""
- check_and_log_response(make_admin_client(cluster).stage_recovery(backup_id))
+ check_and_log_response(make_admin_client_with_options(cluster).stage_recovery(backup_id))
@app.command
@@ -280,7 +274,7 @@ def scheduler_print_recovery_tasks(cluster):
Prints all active tasks in a staged recovery.
"""
- resp = make_admin_client(cluster).query_recovery(
+ resp = make_admin_client_with_options(cluster).query_recovery(
TaskQuery(statuses=ACTIVE_STATES))
check_and_log_response(resp)
log.info('Role\tJob\tShard\tStatus\tTask ID')
@@ -302,7 +296,8 @@ def scheduler_delete_recovery_tasks(cluster, task_ids):
Deletes a comma-separated list of task IDs from a staged recovery.
"""
ids = set(task_ids.split(','))
- check_and_log_response(make_admin_client(cluster).delete_recovery_tasks(TaskQuery(taskIds=ids)))
+ check_and_log_response(make_admin_client_with_options(cluster).delete_recovery_tasks(
+ TaskQuery(taskIds=ids)))
@app.command
@@ -312,7 +307,7 @@ def scheduler_commit_recovery(cluster):
Commits a staged recovery.
"""
- check_and_log_response(make_admin_client(cluster).commit_recovery())
+ check_and_log_response(make_admin_client_with_options(cluster).commit_recovery())
@app.command
@@ -322,7 +317,7 @@ def scheduler_unload_recovery(cluster):
Unloads a staged recovery.
"""
- check_and_log_response(make_admin_client(cluster).unload_recovery())
+ check_and_log_response(make_admin_client_with_options(cluster).unload_recovery())
@app.command
@@ -332,7 +327,7 @@ def scheduler_snapshot(cluster):
Request that the scheduler perform a storage snapshot and block until complete.
"""
- check_and_log_response(make_admin_client(cluster).snapshot())
+ check_and_log_response(make_admin_client_with_options(cluster).snapshot())
@app.command
@@ -352,7 +347,7 @@ def reconcile_tasks(cluster):
scheduler configuration option.
"""
options = app.get_options()
- client = make_admin_client(cluster)
+ client = make_admin_client_with_options(cluster)
if options.type == 'implicit':
resp = client.reconcile_implicit()
elif options.type == 'explicit':
@@ -445,7 +440,7 @@ def sla_list_safe_domain(cluster, percentage, duration):
override_jobs = parse_jobs_file(options.override_filename) if options.override_filename else {}
get_grouping_or_die(options.grouping)
- vector = make_admin_client(cluster).sla_get_safe_domain_vector(
+ vector = make_admin_client_with_options(cluster).sla_get_safe_domain_vector(
options.min_instance_count,
include_hosts)
groups = vector.get_safe_hosts(sla_percentage, sla_duration.as_(Time.SECONDS),
@@ -502,7 +497,9 @@ def sla_probe_hosts(cluster, percentage, duration):
hosts = parse_hostnames(options.filename, options.hosts)
get_grouping_or_die(options.grouping)
- vector = make_admin_client(cluster).sla_get_safe_domain_vector(options.min_instance_count, hosts)
+ vector = make_admin_client_with_options(cluster).sla_get_safe_domain_vector(
+ options.min_instance_count,
+ hosts)
groups = vector.probe_hosts(sla_percentage, sla_duration.as_(Time.SECONDS), options.grouping)
output, _ = format_sla_results(groups)
@@ -539,4 +536,4 @@ def get_scheduler(cluster):
Dumps the leading scheduler endpoint URL.
"""
print("Found leading scheduler at: %s" %
- make_admin_client(cluster).scheduler_proxy.scheduler_client().raw_url)
+ make_admin_client_with_options(cluster).scheduler_proxy.scheduler_client().raw_url)
http://git-wip-us.apache.org/repos/asf/aurora/blob/a9f4e26a/src/main/python/apache/aurora/admin/admin_util.py
----------------------------------------------------------------------
diff --git a/src/main/python/apache/aurora/admin/admin_util.py b/src/main/python/apache/aurora/admin/admin_util.py
index 394deb5..8240e80 100644
--- a/src/main/python/apache/aurora/admin/admin_util.py
+++ b/src/main/python/apache/aurora/admin/admin_util.py
@@ -22,7 +22,10 @@ from uuid import uuid1
from twitter.common.quantity import Amount, Time
from twitter.common.quantity.parse_simple import parse_time
-from apache.aurora.client.base import die
+from apache.aurora.client.api import AuroraClientAPI
+from apache.aurora.client.base import AURORA_ADMIN_USER_AGENT_NAME, die
+from apache.aurora.common.cluster import Cluster
+from apache.aurora.common.clusters import CLUSTERS
"""Admin client utility functions shared between admin and maintenance modules."""
@@ -264,3 +267,27 @@ def format_sla_results(host_groups, unsafe_only=False):
results.append(host_details)
hostnames.add(host)
return results, hostnames
+
+
+def make_admin_client(cluster, verbose=False, bypass_leader_redirect=False):
+ """Creates an API client with the specified options for use in admin commands.
+
+ :param cluster: The cluster to connect with.
+ :type cluster: Either a string cluster name or a Cluster object.
+ :param verbose: Should the client emit verbose output.
+ :type verbose: bool
+ :type bypass_leader_redirect: Should the client bypass the scheduler's leader redirect filter.
+ :type bypass_leader_redirect: bool
+ :rtype: an AuroraClientAPI instance.
+ """
+
+ is_cluster_object = isinstance(cluster, Cluster)
+
+ if not is_cluster_object and cluster not in CLUSTERS:
+ die('Unknown cluster: %s. Known clusters: %s' % (cluster, ", ".join(CLUSTERS.keys())))
+
+ return AuroraClientAPI(
+ cluster if is_cluster_object else CLUSTERS[cluster],
+ AURORA_ADMIN_USER_AGENT_NAME,
+ verbose=verbose,
+ bypass_leader_redirect=bypass_leader_redirect)
http://git-wip-us.apache.org/repos/asf/aurora/blob/a9f4e26a/src/main/python/apache/aurora/admin/host_maintenance.py
----------------------------------------------------------------------
diff --git a/src/main/python/apache/aurora/admin/host_maintenance.py b/src/main/python/apache/aurora/admin/host_maintenance.py
index 677f870..83fc2b6 100644
--- a/src/main/python/apache/aurora/admin/host_maintenance.py
+++ b/src/main/python/apache/aurora/admin/host_maintenance.py
@@ -16,8 +16,7 @@ from threading import Event
from twitter.common import log
from twitter.common.quantity import Amount, Time
-from apache.aurora.admin.admin_util import format_sla_results, print_results
-from apache.aurora.client.api import AuroraClientAPI
+from apache.aurora.admin.admin_util import format_sla_results, make_admin_client, print_results
from apache.aurora.client.base import DEFAULT_GROUPING, check_and_log_response, group_hosts
from gen.apache.aurora.api.ttypes import Hosts, MaintenanceMode
@@ -47,8 +46,11 @@ class HostMaintenance(object):
for group in groups:
yield Hosts(group[1])
- def __init__(self, cluster, verbosity, wait_event=None):
- self._client = AuroraClientAPI(cluster, verbosity == 'verbose')
+ def __init__(self, cluster, verbosity, wait_event=None, bypass_leader_redirect=False):
+ self._client = make_admin_client(
+ cluster=cluster,
+ verbose=verbosity == 'verbose',
+ bypass_leader_redirect=bypass_leader_redirect)
self._wait_event = wait_event or Event()
def _drain_hosts(self, drainable_hosts):
http://git-wip-us.apache.org/repos/asf/aurora/blob/a9f4e26a/src/main/python/apache/aurora/admin/maintenance.py
----------------------------------------------------------------------
diff --git a/src/main/python/apache/aurora/admin/maintenance.py b/src/main/python/apache/aurora/admin/maintenance.py
index bf44651..942a237 100644
--- a/src/main/python/apache/aurora/admin/maintenance.py
+++ b/src/main/python/apache/aurora/admin/maintenance.py
@@ -51,8 +51,11 @@ def host_deactivate(cluster):
future hosts that will be drained shortly in subsequent batches.
"""
options = app.get_options()
- HostMaintenance(CLUSTERS[cluster], options.verbosity).start_maintenance(
- parse_hostnames(options.filename, options.hosts))
+ HostMaintenance(
+ cluster=CLUSTERS[cluster],
+ verbosity=options.verbosity,
+ bypass_leader_redirect=options.bypass_leader_redirect).start_maintenance(
+ parse_hostnames(options.filename, options.hosts))
@app.command
@@ -69,8 +72,11 @@ def host_activate(cluster):
allow normal scheduling to resume on the given list of hosts.
"""
options = app.get_options()
- HostMaintenance(CLUSTERS[cluster], options.verbosity).end_maintenance(
- parse_hostnames(options.filename, options.hosts))
+ HostMaintenance(
+ cluster=CLUSTERS[cluster],
+ verbosity=options.verbosity,
+ bypass_leader_redirect=options.bypass_leader_redirect).end_maintenance(
+ parse_hostnames(options.filename, options.hosts))
@app.command
@@ -113,13 +119,16 @@ def host_drain(cluster):
post_drain_callback = parse_script(options.post_drain_script)
- HostMaintenance(CLUSTERS[cluster], options.verbosity).perform_maintenance(
- drainable_hosts,
- grouping_function=options.grouping,
- percentage=override_percentage,
- duration=override_duration,
- output_file=options.unsafe_hosts_filename,
- callback=post_drain_callback)
+ HostMaintenance(
+ cluster=CLUSTERS[cluster],
+ verbosity=options.verbosity,
+ bypass_leader_redirect=options.bypass_leader_redirect).perform_maintenance(
+ drainable_hosts,
+ grouping_function=options.grouping,
+ percentage=override_percentage,
+ duration=override_duration,
+ output_file=options.unsafe_hosts_filename,
+ callback=post_drain_callback)
@app.command
@@ -134,6 +143,10 @@ def host_status(cluster):
"""
options = app.get_options()
checkable_hosts = parse_hostnames(options.filename, options.hosts)
- statuses = HostMaintenance(CLUSTERS[cluster], options.verbosity).check_status(checkable_hosts)
+ statuses = HostMaintenance(
+ cluster=CLUSTERS[cluster],
+ verbosity=options.verbosity,
+ bypass_leader_redirect=options.bypass_leader_redirect).check_status(checkable_hosts)
+
for pair in statuses:
log.info("%s is in state: %s" % pair)
http://git-wip-us.apache.org/repos/asf/aurora/blob/a9f4e26a/src/test/python/apache/aurora/admin/test_admin.py
----------------------------------------------------------------------
diff --git a/src/test/python/apache/aurora/admin/test_admin.py b/src/test/python/apache/aurora/admin/test_admin.py
index f720742..66abade 100644
--- a/src/test/python/apache/aurora/admin/test_admin.py
+++ b/src/test/python/apache/aurora/admin/test_admin.py
@@ -58,6 +58,7 @@ class TestQueryCommand(AuroraClientCommandTest):
mock_options.states = states
mock_options.listformat = listformat or '%role%/%name%/%instanceId% %status%'
mock_options.verbosity = False
+ mock_options.bypass_leader_redirect = False
return mock_options
@classmethod
@@ -228,6 +229,7 @@ class TestReconcileTaskCommand(AuroraClientCommandTest):
mock_options = create_autospec(spec=['type', 'batch_size'], instance=True)
mock_options.type = reconcile_type
mock_options.batch_size = batch_size
+ mock_options.bypass_leader_redirect = False
return mock_options
def test_reconcile_implicit(self):
http://git-wip-us.apache.org/repos/asf/aurora/blob/a9f4e26a/src/test/python/apache/aurora/admin/test_admin_sla.py
----------------------------------------------------------------------
diff --git a/src/test/python/apache/aurora/admin/test_admin_sla.py b/src/test/python/apache/aurora/admin/test_admin_sla.py
index 54b5a82..6d7c74d 100644
--- a/src/test/python/apache/aurora/admin/test_admin_sla.py
+++ b/src/test/python/apache/aurora/admin/test_admin_sla.py
@@ -48,6 +48,7 @@ class TestAdminSlaListSafeDomainCommand(AuroraClientCommandTest):
mock_options.disable_all_hooks = False
mock_options.min_instance_count = MIN_INSTANCE_COUNT
mock_options.grouping = grouping or DEFAULT_GROUPING
+ mock_options.bypass_leader_redirect = False
return mock_options
@classmethod
@@ -308,6 +309,7 @@ class TestAdminSlaProbeHostsCommand(AuroraClientCommandTest):
mock_options.verbosity = False
mock_options.grouping = grouping or DEFAULT_GROUPING
mock_options.min_instance_count = 1
+ mock_options.bypass_leader_redirect = False
return mock_options
@classmethod
http://git-wip-us.apache.org/repos/asf/aurora/blob/a9f4e26a/src/test/python/apache/aurora/admin/test_admin_util.py
----------------------------------------------------------------------
diff --git a/src/test/python/apache/aurora/admin/test_admin_util.py b/src/test/python/apache/aurora/admin/test_admin_util.py
index f5c8c69..4714bfa 100644
--- a/src/test/python/apache/aurora/admin/test_admin_util.py
+++ b/src/test/python/apache/aurora/admin/test_admin_util.py
@@ -18,10 +18,19 @@ import unittest
import mock
from twitter.common.contextutil import temporary_file
-from apache.aurora.admin.admin_util import parse_script
+from apache.aurora.admin.admin_util import make_admin_client, parse_script
+from apache.aurora.common.cluster import Cluster
+from apache.aurora.common.clusters import Clusters
class TestAdminUtil(unittest.TestCase):
+ TEST_CLUSTER_NAME = 'west'
+ TEST_CLUSTER = Cluster(
+ name=TEST_CLUSTER_NAME,
+ zk='zookeeper.example.com',
+ scheduler_zk_path='/foo/bar',
+ auth_mechanism='UNAUTHENTICATED')
+ TEST_CLUSTERS = Clusters([TEST_CLUSTER])
@mock.patch("apache.aurora.admin.admin_util.subprocess", spec=subprocess)
def test_parse_script(self, mock_subprocess):
@@ -34,3 +43,15 @@ class TestAdminUtil(unittest.TestCase):
def test_parse_script_invalid_filename(self):
self.assertRaises(SystemExit, parse_script, "invalid filename")
+
+ def test_make_admin_client_cluster_string(self):
+ with mock.patch('apache.aurora.admin.admin_util.CLUSTERS', new=self.TEST_CLUSTERS):
+ self.assertIsNotNone(make_admin_client(self.TEST_CLUSTER_NAME))
+
+ def test_make_admin_client_cluster_object(self):
+ with mock.patch('apache.aurora.admin.admin_util.CLUSTERS', new=self.TEST_CLUSTERS):
+ self.assertIsNotNone(make_admin_client(self.TEST_CLUSTER))
+
+ def test_make_admin_client_cluster_unknown(self):
+ with mock.patch('apache.aurora.admin.admin_util.CLUSTERS', new=self.TEST_CLUSTERS):
+ self.assertRaises(SystemExit, make_admin_client, 'east')
http://git-wip-us.apache.org/repos/asf/aurora/blob/a9f4e26a/src/test/python/apache/aurora/admin/util.py
----------------------------------------------------------------------
diff --git a/src/test/python/apache/aurora/admin/util.py b/src/test/python/apache/aurora/admin/util.py
index d0a915c..355734d 100644
--- a/src/test/python/apache/aurora/admin/util.py
+++ b/src/test/python/apache/aurora/admin/util.py
@@ -81,4 +81,5 @@ class AuroraClientCommandTest(unittest.TestCase):
def setup_mock_options(cls):
mock_options = create_autospec(spec=['verbosity'], instance=True)
mock_options.verbosity = 'verbose'
+ mock_options.bypass_leader_redirect = False
return mock_options
http://git-wip-us.apache.org/repos/asf/aurora/blob/a9f4e26a/src/test/sh/org/apache/aurora/e2e/test_end_to_end.sh
----------------------------------------------------------------------
diff --git a/src/test/sh/org/apache/aurora/e2e/test_end_to_end.sh b/src/test/sh/org/apache/aurora/e2e/test_end_to_end.sh
index e36726e..c93be9b 100755
--- a/src/test/sh/org/apache/aurora/e2e/test_end_to_end.sh
+++ b/src/test/sh/org/apache/aurora/e2e/test_end_to_end.sh
@@ -26,7 +26,7 @@ fi
set -u -e -x
set -o pipefail
-readonly TEST_SCHEDULER_IP=192.168.33.7
+readonly TEST_SLAVE_IP=192.168.33.7
_curl() { curl --silent --fail --retry 4 --retry-delay 10 "$@" ; }
@@ -371,6 +371,10 @@ test_admin() {
echo '== Testing admin commands'
echo '== Getting leading scheduler'
aurora_admin get_scheduler $_cluster | grep ":8081"
+
+ # host maintenance commands currently have a separate entry point and use their own api client.
+ # Until we address that, at least verify that the command group still works.
+ aurora_admin host_status --hosts=$TEST_SLAVE_IP $_cluster
}
test_ephemeral_daemon_with_final() {