You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@aurora.apache.org by ma...@apache.org on 2014/07/22 23:29:40 UTC

git commit: Fixing return value in _check_sla. Also, adding extra logging in drainHosts RPC.

Repository: incubator-aurora
Updated Branches:
  refs/heads/master ef5829c61 -> d97066e5c


Fixing return value in _check_sla.
Also, adding extra logging in drainHosts RPC.

Bugs closed: AURORA-571

Reviewed at https://reviews.apache.org/r/23244/


Project: http://git-wip-us.apache.org/repos/asf/incubator-aurora/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-aurora/commit/d97066e5
Tree: http://git-wip-us.apache.org/repos/asf/incubator-aurora/tree/d97066e5
Diff: http://git-wip-us.apache.org/repos/asf/incubator-aurora/diff/d97066e5

Branch: refs/heads/master
Commit: d97066e5ce547265a0ff31f4d4c693b0739bdc71
Parents: ef5829c
Author: Maxim Khutornenko <ma...@apache.org>
Authored: Tue Jul 22 14:29:15 2014 -0700
Committer: Maxim Khutornenko <ma...@apache.org>
Committed: Tue Jul 22 14:29:15 2014 -0700

----------------------------------------------------------------------
 .../scheduler/state/MaintenanceController.java  |  5 ++
 .../apache/aurora/admin/host_maintenance.py     |  3 +-
 .../aurora/client/commands/test_maintenance.py  | 85 ++++++++++++++++++++
 3 files changed, 92 insertions(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-aurora/blob/d97066e5/src/main/java/org/apache/aurora/scheduler/state/MaintenanceController.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/aurora/scheduler/state/MaintenanceController.java b/src/main/java/org/apache/aurora/scheduler/state/MaintenanceController.java
index bf48db9..21cfebd 100644
--- a/src/main/java/org/apache/aurora/scheduler/state/MaintenanceController.java
+++ b/src/main/java/org/apache/aurora/scheduler/state/MaintenanceController.java
@@ -14,6 +14,7 @@
 package org.apache.aurora.scheduler.state;
 
 import java.util.Set;
+import java.util.logging.Logger;
 
 import javax.inject.Inject;
 
@@ -101,6 +102,7 @@ public interface MaintenanceController {
   Set<HostStatus> endMaintenance(Set<String> hosts);
 
   class MaintenanceControllerImpl implements MaintenanceController, EventSubscriber {
+    private static final Logger LOG = Logger.getLogger(MaintenanceControllerImpl.class.getName());
     private final Storage storage;
     private final StateManager stateManager;
     private final EventSink eventSink;
@@ -117,6 +119,7 @@ public interface MaintenanceController {
     }
 
     private Set<HostStatus> watchDrainingTasks(MutableStoreProvider store, Set<String> hosts) {
+      LOG.info("Hosts to drain: " + hosts);
       Set<String> emptyHosts = Sets.newHashSet();
       for (String host : hosts) {
         // If there are no tasks on the host, immediately transition to DRAINED.
@@ -125,8 +128,10 @@ public interface MaintenanceController {
             .transform(Tasks.SCHEDULED_TO_ID)
             .toSet();
         if (activeTasks.isEmpty()) {
+          LOG.info("No tasks to drain for host: " + host);
           emptyHosts.add(host);
         } else {
+          LOG.info(String.format("Draining tasks: %s on host: %s", activeTasks, host));
           for (String taskId : activeTasks) {
             stateManager.changeState(
                 taskId,

http://git-wip-us.apache.org/repos/asf/incubator-aurora/blob/d97066e5/src/main/python/apache/aurora/admin/host_maintenance.py
----------------------------------------------------------------------
diff --git a/src/main/python/apache/aurora/admin/host_maintenance.py b/src/main/python/apache/aurora/admin/host_maintenance.py
index b5b1375..af26f7c 100644
--- a/src/main/python/apache/aurora/admin/host_maintenance.py
+++ b/src/main/python/apache/aurora/admin/host_maintenance.py
@@ -128,12 +128,13 @@ class HostMaintenance(object):
       sla_duration.as_(Time.SECONDS),
       grouping_function)
 
+    unsafe_hostnames = set()
     # Given that maintenance is performed 1 group at a time, any result longer than 1 group
     # should be considered a batch failure.
     if host_groups:
       if len(host_groups) > 1:
         log.error('Illegal multiple groups detected in SLA results. Skipping hosts: %s' % hostnames)
-        return False
+        return set(hostnames)
 
       results, unsafe_hostnames = format_sla_results(host_groups, unsafe_only=True)
       if results:

http://git-wip-us.apache.org/repos/asf/incubator-aurora/blob/d97066e5/src/test/python/apache/aurora/client/commands/test_maintenance.py
----------------------------------------------------------------------
diff --git a/src/test/python/apache/aurora/client/commands/test_maintenance.py b/src/test/python/apache/aurora/client/commands/test_maintenance.py
index c4677fc..07dd333 100644
--- a/src/test/python/apache/aurora/client/commands/test_maintenance.py
+++ b/src/test/python/apache/aurora/client/commands/test_maintenance.py
@@ -229,6 +229,91 @@ class TestMaintenanceCommands(AuroraClientCommandTest):
         mock_scheduler_proxy.startMaintenance.assert_called_with(Hosts(set(self.HOSTNAMES)))
         assert mock_scheduler_proxy.endMaintenance.call_count == len(self.HOSTNAMES)
 
+  def test_perform_maintenance_hosts_no_prod_tasks(self):
+    mock_options = self.make_mock_options()
+    mock_options.post_drain_script = None
+    mock_options.grouping = 'by_host'
+
+    def host_status_results(hostnames):
+      if isinstance(hostnames, Hosts):
+        return self.create_drained_status_result(hostnames)
+      return self.create_maintenance_status_result()
+
+    mock_api, mock_scheduler_proxy = self.create_mock_api()
+    mock_scheduler_proxy.endMaintenance.return_value = self.create_end_maintenance_result()
+    mock_scheduler_proxy.maintenanceStatus.side_effect = host_status_results
+    mock_scheduler_proxy.startMaintenance.return_value = self.create_start_maintenance_result()
+    mock_scheduler_proxy.drainHosts.return_value = self.create_start_maintenance_result()
+
+    def create_empty_sla_results():
+      mock_vector = Mock()
+      mock_vector.probe_hosts.return_value = []
+      return mock_vector
+
+    with contextlib.nested(
+        patch('time.sleep'),
+        patch('apache.aurora.client.api.SchedulerProxy', return_value=mock_scheduler_proxy),
+        patch('apache.aurora.client.api.sla.Sla.get_domain_uptime_vector',
+              return_value=create_empty_sla_results()),
+        patch('apache.aurora.client.commands.maintenance.CLUSTERS', new=self.TEST_CLUSTERS),
+        patch('twitter.common.app.get_options', return_value=mock_options)) as (
+            mock_sleep,
+            mock_scheduler_proxy_class,
+            mock_vector_class,
+            mock_clusters_maintenancepatch,
+            options):
+
+      perform_maintenance_hosts([self.TEST_CLUSTER])
+
+      mock_scheduler_proxy.startMaintenance.assert_called_with(Hosts(set(self.HOSTNAMES)))
+      assert mock_sleep.call_count == 3
+      assert mock_scheduler_proxy.maintenanceStatus.call_count == 6
+      assert mock_scheduler_proxy.drainHosts.call_count == 3
+      assert mock_scheduler_proxy.endMaintenance.call_count == 3
+
+  def test_perform_maintenance_hosts_multiple_sla_groups_failure(self):
+    mock_options = self.make_mock_options()
+    mock_options.post_drain_script = None
+    mock_options.grouping = 'by_host'
+    mock_options.unsafe_hosts_filename = None
+
+    def host_status_results(hostnames):
+      if isinstance(hostnames, Hosts):
+        return self.create_drained_status_result(hostnames)
+      return self.create_maintenance_status_result()
+
+    mock_api, mock_scheduler_proxy = self.create_mock_api()
+    mock_scheduler_proxy.endMaintenance.return_value = self.create_end_maintenance_result()
+    mock_scheduler_proxy.maintenanceStatus.side_effect = host_status_results
+    mock_scheduler_proxy.startMaintenance.return_value = self.create_start_maintenance_result()
+    mock_scheduler_proxy.drainHosts.return_value = self.create_start_maintenance_result()
+
+    def create_multiple_sla_results():
+      mock_vector = Mock()
+      mock_vector.probe_hosts.return_value = self.HOSTNAMES
+      return mock_vector
+
+    with contextlib.nested(
+        patch('time.sleep'),
+        patch('apache.aurora.client.api.SchedulerProxy', return_value=mock_scheduler_proxy),
+        patch('apache.aurora.client.api.sla.Sla.get_domain_uptime_vector',
+              return_value=create_multiple_sla_results()),
+        patch('apache.aurora.client.commands.maintenance.CLUSTERS', new=self.TEST_CLUSTERS),
+        patch('twitter.common.app.get_options', return_value=mock_options)) as (
+            mock_sleep,
+            mock_scheduler_proxy_class,
+            mock_vector_class,
+            mock_clusters_maintenancepatch,
+            options):
+
+      perform_maintenance_hosts([self.TEST_CLUSTER])
+
+      mock_scheduler_proxy.startMaintenance.assert_called_with(Hosts(set(self.HOSTNAMES)))
+      assert mock_sleep.call_count == 0
+      assert mock_scheduler_proxy.maintenanceStatus.call_count == 3
+      assert mock_scheduler_proxy.drainHosts.call_count == 0
+      assert mock_scheduler_proxy.endMaintenance.call_count == 3
+
   def test_perform_maintenance_hosts_reason_missing(self):
     mock_options = self.make_mock_options()
     mock_options.grouping = 'by_host'