You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@aurora.apache.org by jc...@apache.org on 2016/09/20 19:27:01 UTC

aurora git commit: Fix host maintenance commands to properly initialize the api client.

Repository: aurora
Updated Branches:
  refs/heads/master 4745c8cc2 -> a9f4e26a2


Fix host maintenance commands to properly initialize the api client.

Bugs closed: AURORA-1777

Reviewed at https://reviews.apache.org/r/52087/


Project: http://git-wip-us.apache.org/repos/asf/aurora/repo
Commit: http://git-wip-us.apache.org/repos/asf/aurora/commit/a9f4e26a
Tree: http://git-wip-us.apache.org/repos/asf/aurora/tree/a9f4e26a
Diff: http://git-wip-us.apache.org/repos/asf/aurora/diff/a9f4e26a

Branch: refs/heads/master
Commit: a9f4e26a24e63aeb6a215c0f5d5bb54b7c936abb
Parents: 4745c8c
Author: Joshua Cohen <jc...@apache.org>
Authored: Tue Sep 20 14:26:49 2016 -0500
Committer: Joshua Cohen <jc...@apache.org>
Committed: Tue Sep 20 14:26:49 2016 -0500

----------------------------------------------------------------------
 src/main/python/apache/aurora/admin/admin.py    | 49 +++++++++-----------
 .../python/apache/aurora/admin/admin_util.py    | 29 +++++++++++-
 .../apache/aurora/admin/host_maintenance.py     | 10 ++--
 .../python/apache/aurora/admin/maintenance.py   | 37 ++++++++++-----
 .../python/apache/aurora/admin/test_admin.py    |  2 +
 .../apache/aurora/admin/test_admin_sla.py       |  2 +
 .../apache/aurora/admin/test_admin_util.py      | 23 ++++++++-
 src/test/python/apache/aurora/admin/util.py     |  1 +
 .../sh/org/apache/aurora/e2e/test_end_to_end.sh |  6 ++-
 9 files changed, 114 insertions(+), 45 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/aurora/blob/a9f4e26a/src/main/python/apache/aurora/admin/admin.py
----------------------------------------------------------------------
diff --git a/src/main/python/apache/aurora/admin/admin.py b/src/main/python/apache/aurora/admin/admin.py
index 9fc89a2..070c348 100644
--- a/src/main/python/apache/aurora/admin/admin.py
+++ b/src/main/python/apache/aurora/admin/admin.py
@@ -22,10 +22,9 @@ from twitter.common import app, log
 from twitter.common.quantity import Data, Time
 from twitter.common.quantity.parse_simple import parse_data, parse_time
 
-from apache.aurora.client.api import AuroraClientAPI
+from apache.aurora.admin.admin_util import make_admin_client
 from apache.aurora.client.api.sla import JobUpTimeLimit
 from apache.aurora.client.base import (
-    AURORA_ADMIN_USER_AGENT_NAME,
     GROUPING_OPTION,
     check_and_log_response,
     combine_messages,
@@ -63,17 +62,12 @@ MIN_SLA_INSTANCE_COUNT = optparse.Option(
 )
 
 
-def make_admin_client(cluster):
-  if cluster not in CLUSTERS:
-    die('Unknown cluster: %s. Known clusters: %s' % (cluster, ", ".join(CLUSTERS.keys())))
-
+def make_admin_client_with_options(cluster):
   options = app.get_options()
-  verbose = getattr(options, 'verbosity', 'normal') == 'verbose'
 
-  return AuroraClientAPI(
-      CLUSTERS[cluster],
-      AURORA_ADMIN_USER_AGENT_NAME,
-      verbose=verbose,
+  return make_admin_client(
+      cluster=cluster,
+      verbose=getattr(options, 'verbosity', 'normal') == 'verbose',
       bypass_leader_redirect=options.bypass_leader_redirect)
 
 
@@ -158,7 +152,7 @@ def query(args, options):
   if not (states <= ACTIVE_STATES) and not options.force:
     die('--force is required for expensive queries (states outside ACTIVE states')
 
-  api = make_admin_client(cluster)
+  api = make_admin_client_with_options(cluster)
 
   query_info = api.query(TaskQuery(role=role, jobName=job, instanceIds=instances, statuses=states))
   if query_info.responseCode != ResponseCode.OK:
@@ -198,7 +192,7 @@ def set_quota(cluster, role, cpu_str, ram, disk):
   except ValueError as e:
     die(str(e))
 
-  resp = make_admin_client(cluster).set_quota(role, cpu, ram_mb, disk_mb)
+  resp = make_admin_client_with_options(cluster).set_quota(role, cpu, ram_mb, disk_mb)
   check_and_log_response(resp)
 
 
@@ -213,7 +207,7 @@ def increase_quota(cluster, role, cpu_str, ram_str, disk_str):
   ram = parse_data(ram_str).as_(Data.MB)
   disk = parse_data(disk_str).as_(Data.MB)
 
-  client = make_admin_client(cluster)
+  client = make_admin_client_with_options(cluster)
   resp = client.get_quota(role)
   quota = resp.result.getQuotaResult.quota
   resource_details = ResourceManager.resource_details_from_quota(quota)
@@ -245,7 +239,7 @@ def scheduler_backup_now(cluster):
 
   Immediately initiates a full storage backup.
   """
-  check_and_log_response(make_admin_client(cluster).perform_backup())
+  check_and_log_response(make_admin_client_with_options(cluster).perform_backup())
 
 
 @app.command
@@ -255,7 +249,7 @@ def scheduler_list_backups(cluster):
 
   Lists backups available for recovery.
   """
-  resp = make_admin_client(cluster).list_backups()
+  resp = make_admin_client_with_options(cluster).list_backups()
   check_and_log_response(resp)
   backups = resp.result.listBackupsResult.backups
   print('%s available backups:' % len(backups))
@@ -270,7 +264,7 @@ def scheduler_stage_recovery(cluster, backup_id):
 
   Stages a backup for recovery.
   """
-  check_and_log_response(make_admin_client(cluster).stage_recovery(backup_id))
+  check_and_log_response(make_admin_client_with_options(cluster).stage_recovery(backup_id))
 
 
 @app.command
@@ -280,7 +274,7 @@ def scheduler_print_recovery_tasks(cluster):
 
   Prints all active tasks in a staged recovery.
   """
-  resp = make_admin_client(cluster).query_recovery(
+  resp = make_admin_client_with_options(cluster).query_recovery(
       TaskQuery(statuses=ACTIVE_STATES))
   check_and_log_response(resp)
   log.info('Role\tJob\tShard\tStatus\tTask ID')
@@ -302,7 +296,8 @@ def scheduler_delete_recovery_tasks(cluster, task_ids):
   Deletes a comma-separated list of task IDs from a staged recovery.
   """
   ids = set(task_ids.split(','))
-  check_and_log_response(make_admin_client(cluster).delete_recovery_tasks(TaskQuery(taskIds=ids)))
+  check_and_log_response(make_admin_client_with_options(cluster).delete_recovery_tasks(
+      TaskQuery(taskIds=ids)))
 
 
 @app.command
@@ -312,7 +307,7 @@ def scheduler_commit_recovery(cluster):
 
   Commits a staged recovery.
   """
-  check_and_log_response(make_admin_client(cluster).commit_recovery())
+  check_and_log_response(make_admin_client_with_options(cluster).commit_recovery())
 
 
 @app.command
@@ -322,7 +317,7 @@ def scheduler_unload_recovery(cluster):
 
   Unloads a staged recovery.
   """
-  check_and_log_response(make_admin_client(cluster).unload_recovery())
+  check_and_log_response(make_admin_client_with_options(cluster).unload_recovery())
 
 
 @app.command
@@ -332,7 +327,7 @@ def scheduler_snapshot(cluster):
 
   Request that the scheduler perform a storage snapshot and block until complete.
   """
-  check_and_log_response(make_admin_client(cluster).snapshot())
+  check_and_log_response(make_admin_client_with_options(cluster).snapshot())
 
 
 @app.command
@@ -352,7 +347,7 @@ def reconcile_tasks(cluster):
   scheduler configuration option.
   """
   options = app.get_options()
-  client = make_admin_client(cluster)
+  client = make_admin_client_with_options(cluster)
   if options.type == 'implicit':
     resp = client.reconcile_implicit()
   elif options.type == 'explicit':
@@ -445,7 +440,7 @@ def sla_list_safe_domain(cluster, percentage, duration):
   override_jobs = parse_jobs_file(options.override_filename) if options.override_filename else {}
   get_grouping_or_die(options.grouping)
 
-  vector = make_admin_client(cluster).sla_get_safe_domain_vector(
+  vector = make_admin_client_with_options(cluster).sla_get_safe_domain_vector(
       options.min_instance_count,
       include_hosts)
   groups = vector.get_safe_hosts(sla_percentage, sla_duration.as_(Time.SECONDS),
@@ -502,7 +497,9 @@ def sla_probe_hosts(cluster, percentage, duration):
   hosts = parse_hostnames(options.filename, options.hosts)
   get_grouping_or_die(options.grouping)
 
-  vector = make_admin_client(cluster).sla_get_safe_domain_vector(options.min_instance_count, hosts)
+  vector = make_admin_client_with_options(cluster).sla_get_safe_domain_vector(
+      options.min_instance_count,
+      hosts)
   groups = vector.probe_hosts(sla_percentage, sla_duration.as_(Time.SECONDS), options.grouping)
 
   output, _ = format_sla_results(groups)
@@ -539,4 +536,4 @@ def get_scheduler(cluster):
   Dumps the leading scheduler endpoint URL.
   """
   print("Found leading scheduler at: %s" %
-      make_admin_client(cluster).scheduler_proxy.scheduler_client().raw_url)
+      make_admin_client_with_options(cluster).scheduler_proxy.scheduler_client().raw_url)

http://git-wip-us.apache.org/repos/asf/aurora/blob/a9f4e26a/src/main/python/apache/aurora/admin/admin_util.py
----------------------------------------------------------------------
diff --git a/src/main/python/apache/aurora/admin/admin_util.py b/src/main/python/apache/aurora/admin/admin_util.py
index 394deb5..8240e80 100644
--- a/src/main/python/apache/aurora/admin/admin_util.py
+++ b/src/main/python/apache/aurora/admin/admin_util.py
@@ -22,7 +22,10 @@ from uuid import uuid1
 from twitter.common.quantity import Amount, Time
 from twitter.common.quantity.parse_simple import parse_time
 
-from apache.aurora.client.base import die
+from apache.aurora.client.api import AuroraClientAPI
+from apache.aurora.client.base import AURORA_ADMIN_USER_AGENT_NAME, die
+from apache.aurora.common.cluster import Cluster
+from apache.aurora.common.clusters import CLUSTERS
 
 """Admin client utility functions shared between admin and maintenance modules."""
 
@@ -264,3 +267,27 @@ def format_sla_results(host_groups, unsafe_only=False):
         results.append(host_details)
         hostnames.add(host)
   return results, hostnames
+
+
+def make_admin_client(cluster, verbose=False, bypass_leader_redirect=False):
+  """Creates an API client with the specified options for use in admin commands.
+
+  :param cluster: The cluster to connect with.
+  :type cluster: Either a string cluster name or a Cluster object.
+  :param verbose: Should the client emit verbose output.
+  :type verbose: bool
+  :type bypass_leader_redirect: Should the client bypass the scheduler's leader redirect filter.
+  :type bypass_leader_redirect: bool
+  :rtype: an AuroraClientAPI instance.
+  """
+
+  is_cluster_object = isinstance(cluster, Cluster)
+
+  if not is_cluster_object and cluster not in CLUSTERS:
+    die('Unknown cluster: %s. Known clusters: %s' % (cluster, ", ".join(CLUSTERS.keys())))
+
+  return AuroraClientAPI(
+      cluster if is_cluster_object else CLUSTERS[cluster],
+      AURORA_ADMIN_USER_AGENT_NAME,
+      verbose=verbose,
+      bypass_leader_redirect=bypass_leader_redirect)

http://git-wip-us.apache.org/repos/asf/aurora/blob/a9f4e26a/src/main/python/apache/aurora/admin/host_maintenance.py
----------------------------------------------------------------------
diff --git a/src/main/python/apache/aurora/admin/host_maintenance.py b/src/main/python/apache/aurora/admin/host_maintenance.py
index 677f870..83fc2b6 100644
--- a/src/main/python/apache/aurora/admin/host_maintenance.py
+++ b/src/main/python/apache/aurora/admin/host_maintenance.py
@@ -16,8 +16,7 @@ from threading import Event
 from twitter.common import log
 from twitter.common.quantity import Amount, Time
 
-from apache.aurora.admin.admin_util import format_sla_results, print_results
-from apache.aurora.client.api import AuroraClientAPI
+from apache.aurora.admin.admin_util import format_sla_results, make_admin_client, print_results
 from apache.aurora.client.base import DEFAULT_GROUPING, check_and_log_response, group_hosts
 
 from gen.apache.aurora.api.ttypes import Hosts, MaintenanceMode
@@ -47,8 +46,11 @@ class HostMaintenance(object):
     for group in groups:
       yield Hosts(group[1])
 
-  def __init__(self, cluster, verbosity, wait_event=None):
-    self._client = AuroraClientAPI(cluster, verbosity == 'verbose')
+  def __init__(self, cluster, verbosity, wait_event=None, bypass_leader_redirect=False):
+    self._client = make_admin_client(
+        cluster=cluster,
+        verbose=verbosity == 'verbose',
+        bypass_leader_redirect=bypass_leader_redirect)
     self._wait_event = wait_event or Event()
 
   def _drain_hosts(self, drainable_hosts):

http://git-wip-us.apache.org/repos/asf/aurora/blob/a9f4e26a/src/main/python/apache/aurora/admin/maintenance.py
----------------------------------------------------------------------
diff --git a/src/main/python/apache/aurora/admin/maintenance.py b/src/main/python/apache/aurora/admin/maintenance.py
index bf44651..942a237 100644
--- a/src/main/python/apache/aurora/admin/maintenance.py
+++ b/src/main/python/apache/aurora/admin/maintenance.py
@@ -51,8 +51,11 @@ def host_deactivate(cluster):
   future hosts that will be drained shortly in subsequent batches.
   """
   options = app.get_options()
-  HostMaintenance(CLUSTERS[cluster], options.verbosity).start_maintenance(
-      parse_hostnames(options.filename, options.hosts))
+  HostMaintenance(
+      cluster=CLUSTERS[cluster],
+      verbosity=options.verbosity,
+      bypass_leader_redirect=options.bypass_leader_redirect).start_maintenance(
+          parse_hostnames(options.filename, options.hosts))
 
 
 @app.command
@@ -69,8 +72,11 @@ def host_activate(cluster):
   allow normal scheduling to resume on the given list of hosts.
   """
   options = app.get_options()
-  HostMaintenance(CLUSTERS[cluster], options.verbosity).end_maintenance(
-      parse_hostnames(options.filename, options.hosts))
+  HostMaintenance(
+      cluster=CLUSTERS[cluster],
+      verbosity=options.verbosity,
+      bypass_leader_redirect=options.bypass_leader_redirect).end_maintenance(
+          parse_hostnames(options.filename, options.hosts))
 
 
 @app.command
@@ -113,13 +119,16 @@ def host_drain(cluster):
 
   post_drain_callback = parse_script(options.post_drain_script)
 
-  HostMaintenance(CLUSTERS[cluster], options.verbosity).perform_maintenance(
-      drainable_hosts,
-      grouping_function=options.grouping,
-      percentage=override_percentage,
-      duration=override_duration,
-      output_file=options.unsafe_hosts_filename,
-      callback=post_drain_callback)
+  HostMaintenance(
+      cluster=CLUSTERS[cluster],
+      verbosity=options.verbosity,
+      bypass_leader_redirect=options.bypass_leader_redirect).perform_maintenance(
+          drainable_hosts,
+          grouping_function=options.grouping,
+          percentage=override_percentage,
+          duration=override_duration,
+          output_file=options.unsafe_hosts_filename,
+          callback=post_drain_callback)
 
 
 @app.command
@@ -134,6 +143,10 @@ def host_status(cluster):
   """
   options = app.get_options()
   checkable_hosts = parse_hostnames(options.filename, options.hosts)
-  statuses = HostMaintenance(CLUSTERS[cluster], options.verbosity).check_status(checkable_hosts)
+  statuses = HostMaintenance(
+      cluster=CLUSTERS[cluster],
+      verbosity=options.verbosity,
+      bypass_leader_redirect=options.bypass_leader_redirect).check_status(checkable_hosts)
+
   for pair in statuses:
     log.info("%s is in state: %s" % pair)

http://git-wip-us.apache.org/repos/asf/aurora/blob/a9f4e26a/src/test/python/apache/aurora/admin/test_admin.py
----------------------------------------------------------------------
diff --git a/src/test/python/apache/aurora/admin/test_admin.py b/src/test/python/apache/aurora/admin/test_admin.py
index f720742..66abade 100644
--- a/src/test/python/apache/aurora/admin/test_admin.py
+++ b/src/test/python/apache/aurora/admin/test_admin.py
@@ -58,6 +58,7 @@ class TestQueryCommand(AuroraClientCommandTest):
     mock_options.states = states
     mock_options.listformat = listformat or '%role%/%name%/%instanceId% %status%'
     mock_options.verbosity = False
+    mock_options.bypass_leader_redirect = False
     return mock_options
 
   @classmethod
@@ -228,6 +229,7 @@ class TestReconcileTaskCommand(AuroraClientCommandTest):
     mock_options = create_autospec(spec=['type', 'batch_size'], instance=True)
     mock_options.type = reconcile_type
     mock_options.batch_size = batch_size
+    mock_options.bypass_leader_redirect = False
     return mock_options
 
   def test_reconcile_implicit(self):

http://git-wip-us.apache.org/repos/asf/aurora/blob/a9f4e26a/src/test/python/apache/aurora/admin/test_admin_sla.py
----------------------------------------------------------------------
diff --git a/src/test/python/apache/aurora/admin/test_admin_sla.py b/src/test/python/apache/aurora/admin/test_admin_sla.py
index 54b5a82..6d7c74d 100644
--- a/src/test/python/apache/aurora/admin/test_admin_sla.py
+++ b/src/test/python/apache/aurora/admin/test_admin_sla.py
@@ -48,6 +48,7 @@ class TestAdminSlaListSafeDomainCommand(AuroraClientCommandTest):
     mock_options.disable_all_hooks = False
     mock_options.min_instance_count = MIN_INSTANCE_COUNT
     mock_options.grouping = grouping or DEFAULT_GROUPING
+    mock_options.bypass_leader_redirect = False
     return mock_options
 
   @classmethod
@@ -308,6 +309,7 @@ class TestAdminSlaProbeHostsCommand(AuroraClientCommandTest):
     mock_options.verbosity = False
     mock_options.grouping = grouping or DEFAULT_GROUPING
     mock_options.min_instance_count = 1
+    mock_options.bypass_leader_redirect = False
     return mock_options
 
   @classmethod

http://git-wip-us.apache.org/repos/asf/aurora/blob/a9f4e26a/src/test/python/apache/aurora/admin/test_admin_util.py
----------------------------------------------------------------------
diff --git a/src/test/python/apache/aurora/admin/test_admin_util.py b/src/test/python/apache/aurora/admin/test_admin_util.py
index f5c8c69..4714bfa 100644
--- a/src/test/python/apache/aurora/admin/test_admin_util.py
+++ b/src/test/python/apache/aurora/admin/test_admin_util.py
@@ -18,10 +18,19 @@ import unittest
 import mock
 from twitter.common.contextutil import temporary_file
 
-from apache.aurora.admin.admin_util import parse_script
+from apache.aurora.admin.admin_util import make_admin_client, parse_script
+from apache.aurora.common.cluster import Cluster
+from apache.aurora.common.clusters import Clusters
 
 
 class TestAdminUtil(unittest.TestCase):
+  TEST_CLUSTER_NAME = 'west'
+  TEST_CLUSTER = Cluster(
+          name=TEST_CLUSTER_NAME,
+          zk='zookeeper.example.com',
+          scheduler_zk_path='/foo/bar',
+          auth_mechanism='UNAUTHENTICATED')
+  TEST_CLUSTERS = Clusters([TEST_CLUSTER])
 
   @mock.patch("apache.aurora.admin.admin_util.subprocess", spec=subprocess)
   def test_parse_script(self, mock_subprocess):
@@ -34,3 +43,15 @@ class TestAdminUtil(unittest.TestCase):
 
   def test_parse_script_invalid_filename(self):
     self.assertRaises(SystemExit, parse_script, "invalid filename")
+
+  def test_make_admin_client_cluster_string(self):
+    with mock.patch('apache.aurora.admin.admin_util.CLUSTERS', new=self.TEST_CLUSTERS):
+      self.assertIsNotNone(make_admin_client(self.TEST_CLUSTER_NAME))
+
+  def test_make_admin_client_cluster_object(self):
+    with mock.patch('apache.aurora.admin.admin_util.CLUSTERS', new=self.TEST_CLUSTERS):
+      self.assertIsNotNone(make_admin_client(self.TEST_CLUSTER))
+
+  def test_make_admin_client_cluster_unknown(self):
+    with mock.patch('apache.aurora.admin.admin_util.CLUSTERS', new=self.TEST_CLUSTERS):
+      self.assertRaises(SystemExit, make_admin_client, 'east')

http://git-wip-us.apache.org/repos/asf/aurora/blob/a9f4e26a/src/test/python/apache/aurora/admin/util.py
----------------------------------------------------------------------
diff --git a/src/test/python/apache/aurora/admin/util.py b/src/test/python/apache/aurora/admin/util.py
index d0a915c..355734d 100644
--- a/src/test/python/apache/aurora/admin/util.py
+++ b/src/test/python/apache/aurora/admin/util.py
@@ -81,4 +81,5 @@ class AuroraClientCommandTest(unittest.TestCase):
   def setup_mock_options(cls):
     mock_options = create_autospec(spec=['verbosity'], instance=True)
     mock_options.verbosity = 'verbose'
+    mock_options.bypass_leader_redirect = False
     return mock_options

http://git-wip-us.apache.org/repos/asf/aurora/blob/a9f4e26a/src/test/sh/org/apache/aurora/e2e/test_end_to_end.sh
----------------------------------------------------------------------
diff --git a/src/test/sh/org/apache/aurora/e2e/test_end_to_end.sh b/src/test/sh/org/apache/aurora/e2e/test_end_to_end.sh
index e36726e..c93be9b 100755
--- a/src/test/sh/org/apache/aurora/e2e/test_end_to_end.sh
+++ b/src/test/sh/org/apache/aurora/e2e/test_end_to_end.sh
@@ -26,7 +26,7 @@ fi
 set -u -e -x
 set -o pipefail
 
-readonly TEST_SCHEDULER_IP=192.168.33.7
+readonly TEST_SLAVE_IP=192.168.33.7
 
 _curl() { curl --silent --fail --retry 4 --retry-delay 10 "$@" ; }
 
@@ -371,6 +371,10 @@ test_admin() {
   echo '== Testing admin commands'
   echo '== Getting leading scheduler'
   aurora_admin get_scheduler $_cluster | grep ":8081"
+
+  # host maintenance commands currently have a separate entry point and use their own api client.
+  # Until we address that, at least verify that the command group still works.
+  aurora_admin host_status --hosts=$TEST_SLAVE_IP $_cluster
 }
 
 test_ephemeral_daemon_with_final() {