You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@aurora.apache.org by mc...@apache.org on 2014/05/10 17:08:32 UTC

git commit: Add a strict mode for commands that specify instances.

Repository: incubator-aurora
Updated Branches:
  refs/heads/master 1dfbd39df -> dfebd7ad3


Add a strict mode for commands that specify instances.

If strict mode is on, and users specify an invalid shard range (a range that's larger that the actual
number of shards), then generate an error.

Bugs closed: aurora-401

Reviewed at https://reviews.apache.org/r/21232/


Project: http://git-wip-us.apache.org/repos/asf/incubator-aurora/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-aurora/commit/dfebd7ad
Tree: http://git-wip-us.apache.org/repos/asf/incubator-aurora/tree/dfebd7ad
Diff: http://git-wip-us.apache.org/repos/asf/incubator-aurora/diff/dfebd7ad

Branch: refs/heads/master
Commit: dfebd7ad3576bdca23d9e349e71a981e4db7e03e
Parents: 1dfbd39
Author: Mark Chu-Carroll <mc...@twopensource.com>
Authored: Sat May 10 11:05:38 2014 -0400
Committer: Mark Chu-Carroll <mc...@twitter.com>
Committed: Sat May 10 11:05:38 2014 -0400

----------------------------------------------------------------------
 .../python/apache/aurora/client/cli/context.py  | 13 ++++++
 .../python/apache/aurora/client/cli/jobs.py     | 12 ++++-
 .../python/apache/aurora/client/cli/options.py  |  9 ++--
 .../apache/aurora/client/cli/test_kill.py       | 49 ++++++++++++++++++++
 .../apache/aurora/client/cli/test_restart.py    | 24 ++++++++++
 5 files changed, 102 insertions(+), 5 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-aurora/blob/dfebd7ad/src/main/python/apache/aurora/client/cli/context.py
----------------------------------------------------------------------
diff --git a/src/main/python/apache/aurora/client/cli/context.py b/src/main/python/apache/aurora/client/cli/context.py
index 609285a..a537eaa 100644
--- a/src/main/python/apache/aurora/client/cli/context.py
+++ b/src/main/python/apache/aurora/client/cli/context.py
@@ -162,3 +162,16 @@ class AuroraCommandContext(Context):
     if resp.responseCode is not ResponseCode.OK:
       raise self.CommandError(EXIT_INVALID_PARAMETER, resp.message)
     return resp.result.scheduleStatusResult.tasks or None
+
+  def get_active_instances(self, key):
+    """Returns a list of the currently active instances of a job"""
+    return [task.assignedTask.instanceId for task in self.get_job_status(key)]
+
+  def verify_shards_option_validity(self, jobkey, instances):
+    """Given a jobkey, does a getTasksStatus, and then checks that the specified instances
+    are valid for the job.
+    """
+    active_instances = self.get_active_instances(jobkey)
+    if max(active_instances) < max(instances):
+      raise self.CommandError(EXIT_INVALID_PARAMETER,
+          "Invalid shards parameter: %s only has %s shards" % (jobkey, max(active_instances)))

http://git-wip-us.apache.org/repos/asf/incubator-aurora/blob/dfebd7ad/src/main/python/apache/aurora/client/cli/jobs.py
----------------------------------------------------------------------
diff --git a/src/main/python/apache/aurora/client/cli/jobs.py b/src/main/python/apache/aurora/client/cli/jobs.py
index cf45640..b50e37b 100644
--- a/src/main/python/apache/aurora/client/cli/jobs.py
+++ b/src/main/python/apache/aurora/client/cli/jobs.py
@@ -52,6 +52,7 @@ from apache.aurora.client.cli.options import (
     JSON_WRITE_OPTION,
     MAX_TOTAL_FAILURES_OPTION,
     NO_BATCHING_OPTION,
+    STRICT_OPTION,
     WATCH_OPTION,
 )
 from apache.aurora.common.aurora_job_key import AuroraJobKey
@@ -335,7 +336,7 @@ class KillCommand(AbstractKillCommand):
     return "Kill instances in a scheduled job"
 
   def get_options(self):
-    return super(KillCommand, self).get_options() + [INSTANCES_SPEC_ARGUMENT]
+    return super(KillCommand, self).get_options() + [INSTANCES_SPEC_ARGUMENT, STRICT_OPTION]
 
   def execute(self, context):
     job = context.options.instance_spec.jobkey
@@ -344,6 +345,8 @@ class KillCommand(AbstractKillCommand):
       raise context.CommandError(EXIT_INVALID_PARAMETER,
           'The instances list cannot be omitted in a kill command!; '
           'use killall to kill all instances')
+    if context.options.strict:
+      context.verify_shards_option_validity(job, instances_arg)
     api = context.get_api(job.cluster)
     if context.options.no_batching:
       resp = api.kill_job(job, instances_arg)
@@ -446,6 +449,7 @@ class RestartCommand(Verb):
              help='Maximum number of seconds before a shard must move into the RUNNING state '
                  'before considered a failure.'),
         MAX_TOTAL_FAILURES_OPTION,
+        STRICT_OPTION,
         CommandOption('--rollback-on-failure', default=True, action='store_false',
             help='If false, prevent update from performing a rollback.'),
         INSTANCES_SPEC_ARGUMENT, CONFIG_ARGUMENT]
@@ -459,6 +463,8 @@ Restarts are fully controlled client-side, so aborting halts the restart."""
     job = context.options.instance_spec.jobkey
     instances = (None if context.options.instance_spec.instance == ALL_INSTANCES else
         context.options.instance_spec.instance)
+    if instances != None and context.options.strict:
+      context.verify_shards_option_validity(job, instances)
     api = context.get_api(job.cluster)
     config = (context.get_job_config(job, context.options.config_file)
         if context.options.config_file else None)
@@ -571,7 +577,7 @@ class UpdateCommand(Verb):
 
   def get_options(self):
     return [FORCE_OPTION, BIND_OPTION, JSON_READ_OPTION, HEALTHCHECK_OPTION,
-        INSTANCES_SPEC_ARGUMENT, CONFIG_ARGUMENT]
+        INSTANCES_SPEC_ARGUMENT, STRICT_OPTION, CONFIG_ARGUMENT]
 
   @property
   def help(self):
@@ -618,6 +624,8 @@ to preview what changes will take effect.
     job = context.options.instance_spec.jobkey
     instances = (None if context.options.instance_spec.instance == ALL_INSTANCES else
         context.options.instance_spec.instance)
+    if instances != None and context.options.strict:
+      context.verify_shards_option_validity(job, instances)
     config = context.get_job_config(job, context.options.config_file)
     api = context.get_api(config.cluster())
     if not context.options.force:

http://git-wip-us.apache.org/repos/asf/incubator-aurora/blob/dfebd7ad/src/main/python/apache/aurora/client/cli/options.py
----------------------------------------------------------------------
diff --git a/src/main/python/apache/aurora/client/cli/options.py b/src/main/python/apache/aurora/client/cli/options.py
index 040c5c2..65ced8c 100644
--- a/src/main/python/apache/aurora/client/cli/options.py
+++ b/src/main/python/apache/aurora/client/cli/options.py
@@ -261,6 +261,12 @@ SSH_USER_OPTION = CommandOption('--ssh-user', '-l', default=None,
     help='ssh as this username instead of the job\'s role')
 
 
+STRICT_OPTION = CommandOption('--strict', default=False, action='store_true',
+    help=("Check instances and generate an error for instance ranges in parameters "
+    "that are larger than the actual set of instances in the job"))
+
+
+
 TASK_INSTANCE_ARGUMENT = CommandOption('task_instance', type=parse_task_instance_key,
     help='A task instance specifier, in the form CLUSTER/ROLE/ENV/NAME/INSTANCE')
 
@@ -268,6 +274,3 @@ TASK_INSTANCE_ARGUMENT = CommandOption('task_instance', type=parse_task_instance
 WATCH_OPTION = CommandOption('--watch-secs', type=int, default=30,
     help='Minimum number of seconds a instance must remain in RUNNING state before considered a '
          'success.')
-
-
-

http://git-wip-us.apache.org/repos/asf/incubator-aurora/blob/dfebd7ad/src/test/python/apache/aurora/client/cli/test_kill.py
----------------------------------------------------------------------
diff --git a/src/test/python/apache/aurora/client/cli/test_kill.py b/src/test/python/apache/aurora/client/cli/test_kill.py
index e11fc81..4f62c46 100644
--- a/src/test/python/apache/aurora/client/cli/test_kill.py
+++ b/src/test/python/apache/aurora/client/cli/test_kill.py
@@ -178,6 +178,55 @@ class TestClientKillCommand(AuroraClientCommandTest):
       api.kill_job.assert_called_with(AuroraJobKey.from_path('west/bozo/test/hello'), instances)
       self.assert_scheduler_called(api, self.get_expected_task_query(instances), 2)
 
+  def test_kill_job_with_invalid_instances_strict(self):
+    """Test kill client-side API logic."""
+    mock_context = FakeAuroraCommandContext()
+    with contextlib.nested(
+        patch('time.sleep'),
+        patch('apache.aurora.client.cli.jobs.Job.create_context', return_value=mock_context),
+        patch('apache.aurora.client.factory.CLUSTERS', new=self.TEST_CLUSTERS)):
+      api = mock_context.get_api('west')
+      self.setup_get_tasks_status_calls(api.scheduler_proxy)
+      api.kill_job.return_value = self.get_kill_job_response()
+      mock_context.add_expected_status_query_result(self.create_status_call_result(
+          self.create_mock_task(ScheduleStatus.KILLED)))
+      with temporary_file() as fp:
+        fp.write(self.get_valid_config())
+        fp.flush()
+        cmd = AuroraCommandLine()
+        cmd.execute(['job', 'kill', '--config=%s' % fp.name, '--no-batching', '--strict',
+            'west/bozo/test/hello/0,2,4-6,11-20'])
+
+      # Now check that the right API calls got made.
+      assert api.kill_job.call_count == 0
+
+
+  def test_kill_job_with_invalid_instances_nonstrict(self):
+    """Test kill client-side API logic."""
+    mock_context = FakeAuroraCommandContext()
+    with contextlib.nested(
+        patch('time.sleep'),
+        patch('apache.aurora.client.cli.jobs.Job.create_context', return_value=mock_context),
+        patch('apache.aurora.client.factory.CLUSTERS', new=self.TEST_CLUSTERS)):
+      api = mock_context.get_api('west')
+      self.setup_get_tasks_status_calls(api.scheduler_proxy)
+      api.kill_job.return_value = self.get_kill_job_response()
+      mock_context.add_expected_status_query_result(self.create_status_call_result(
+          self.create_mock_task(ScheduleStatus.KILLED)))
+      with temporary_file() as fp:
+        fp.write(self.get_valid_config())
+        fp.flush()
+        cmd = AuroraCommandLine()
+        cmd.execute(['job', 'kill', '--config=%s' % fp.name, '--no-batching',
+            'west/bozo/test/hello/0,2,4-6,11-13'])
+
+      # Now check that the right API calls got made.
+      assert api.kill_job.call_count == 1
+      instances = [0, 2, 4, 5, 6, 11, 12, 13]
+      api.kill_job.assert_called_with(AuroraJobKey.from_path('west/bozo/test/hello'), instances)
+      self.assert_scheduler_called(api, self.get_expected_task_query(instances), 2)
+
+
   def test_kill_job_with_instances_batched(self):
     """Test kill client-side API logic."""
     mock_context = FakeAuroraCommandContext()

http://git-wip-us.apache.org/repos/asf/incubator-aurora/blob/dfebd7ad/src/test/python/apache/aurora/client/cli/test_restart.py
----------------------------------------------------------------------
diff --git a/src/test/python/apache/aurora/client/cli/test_restart.py b/src/test/python/apache/aurora/client/cli/test_restart.py
index 9b7a2c6..03fdcd2 100644
--- a/src/test/python/apache/aurora/client/cli/test_restart.py
+++ b/src/test/python/apache/aurora/client/cli/test_restart.py
@@ -90,6 +90,30 @@ class TestRestartCommand(AuroraClientCommandTest):
         mock_scheduler_proxy.restartShards.assert_called_with(JobKey(environment=self.TEST_ENV,
             role=self.TEST_ROLE, name=self.TEST_JOB), [15, 16, 17, 18, 19], None)
 
+  def test_restart_invalid_shards(self):
+    # Test the client-side restart when a shard argument is too large, and it's
+    # using strict mode.
+    (mock_api, mock_scheduler_proxy) = self.create_mock_api()
+    mock_health_check = self.setup_health_checks(mock_api)
+    self.setup_mock_scheduler_for_simple_restart(mock_api)
+    with contextlib.nested(
+        patch('apache.aurora.client.api.SchedulerProxy', return_value=mock_scheduler_proxy),
+        patch('apache.aurora.client.factory.CLUSTERS', new=self.TEST_CLUSTERS),
+        patch('apache.aurora.client.api.instance_watcher.StatusHealthCheck',
+            return_value=mock_health_check),
+        patch('time.time', side_effect=functools.partial(self.fake_time, self)),
+        patch('time.sleep', return_value=None)
+    ):
+      with temporary_file() as fp:
+        fp.write(self.get_valid_config())
+        fp.flush()
+        cmd = AuroraCommandLine()
+        cmd.execute(['job', 'restart', '--batch-size=5', '--strict', 'west/bozo/test/hello/0-100',
+            fp.name])
+
+        assert mock_scheduler_proxy.getTasksStatus.call_count == 1
+        assert mock_scheduler_proxy.restartShards.call_count == 0
+
   def test_restart_failed_status(self):
     # Test the client-side updater logic in its simplest case: everything succeeds, and no rolling
     # updates.