You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@aurora.apache.org by ke...@apache.org on 2014/01/17 03:25:36 UTC

git commit: Deprecate CronCollisionPolicy.RUN_OVERLAP.

Updated Branches:
  refs/heads/master 5a4b9df49 -> ba25e6617


Deprecate CronCollisionPolicy.RUN_OVERLAP.

To prevent cluster starvation, the scheduler now ignores new cron jobs
with RUN_OVERLAP and behaves like CANCEL_NEW for existing ones. The
choice of CANCEL_NEW was arbitrary but IMO fits with the mental
preemption model of Aurora better (you're never guaranteed that a new
instance will schedule but Aurora doesn't tend to kill existing
instances outside of an established preemption routine).

Testing Done:
./gradlew build

Bugs closed: AURORA-38

Reviewed at https://reviews.apache.org/r/16874/


Project: http://git-wip-us.apache.org/repos/asf/incubator-aurora/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-aurora/commit/ba25e661
Tree: http://git-wip-us.apache.org/repos/asf/incubator-aurora/tree/ba25e661
Diff: http://git-wip-us.apache.org/repos/asf/incubator-aurora/diff/ba25e661

Branch: refs/heads/master
Commit: ba25e6617d64de3ec1727d2aebcc37496d07cbac
Parents: 5a4b9df
Author: Kevin Sweeney <ke...@apache.org>
Authored: Thu Jan 16 18:25:23 2014 -0800
Committer: Kevin Sweeney <ke...@apache.org>
Committed: Thu Jan 16 18:25:23 2014 -0800

----------------------------------------------------------------------
 .../configuration/ConfigurationManager.java     |  6 ++++
 .../aurora/scheduler/state/CronJobManager.java  | 28 ++-------------
 .../thrift/org/apache/aurora/gen/api.thrift     |  3 +-
 .../configuration/ConfigurationManagerTest.java | 20 +++++++++--
 .../state/BaseSchedulerCoreImplTest.java        | 38 --------------------
 .../scheduler/state/CronJobManagerTest.java     | 27 --------------
 6 files changed, 28 insertions(+), 94 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-aurora/blob/ba25e661/src/main/java/org/apache/aurora/scheduler/configuration/ConfigurationManager.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/aurora/scheduler/configuration/ConfigurationManager.java b/src/main/java/org/apache/aurora/scheduler/configuration/ConfigurationManager.java
index f789018..c75241e 100644
--- a/src/main/java/org/apache/aurora/scheduler/configuration/ConfigurationManager.java
+++ b/src/main/java/org/apache/aurora/scheduler/configuration/ConfigurationManager.java
@@ -32,6 +32,7 @@ import com.twitter.common.base.Closure;
 import com.twitter.common.base.MorePreconditions;
 
 import org.apache.aurora.gen.Constraint;
+import org.apache.aurora.gen.CronCollisionPolicy;
 import org.apache.aurora.gen.JobConfiguration;
 import org.apache.aurora.gen.LimitConstraint;
 import org.apache.aurora.gen.TaskConfig;
@@ -263,6 +264,11 @@ public final class ConfigurationManager {
           "A service task may not be run on a cron schedule: " + builder);
     }
 
+    if (CronCollisionPolicy.RUN_OVERLAP.equals(job.getCronCollisionPolicy())) {
+      throw new TaskDescriptionException(
+          "The RUN_OVERLAP collision policy has been removed (AURORA-38).");
+    }
+
     return IJobConfiguration.build(builder);
   }
 

http://git-wip-us.apache.org/repos/asf/incubator-aurora/blob/ba25e661/src/main/java/org/apache/aurora/scheduler/state/CronJobManager.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/aurora/scheduler/state/CronJobManager.java b/src/main/java/org/apache/aurora/scheduler/state/CronJobManager.java
index b772712..5a56a70 100644
--- a/src/main/java/org/apache/aurora/scheduler/state/CronJobManager.java
+++ b/src/main/java/org/apache/aurora/scheduler/state/CronJobManager.java
@@ -32,12 +32,9 @@ import com.google.common.annotations.VisibleForTesting;
 import com.google.common.base.Function;
 import com.google.common.base.Optional;
 import com.google.common.base.Preconditions;
-import com.google.common.base.Predicates;
 import com.google.common.collect.ImmutableMap;
 import com.google.common.collect.ImmutableSet;
-import com.google.common.collect.Iterables;
 import com.google.common.collect.Maps;
-import com.google.common.collect.Ordering;
 import com.google.common.eventbus.Subscribe;
 import com.google.common.util.concurrent.ThreadFactoryBuilder;
 import com.twitter.common.application.ShutdownRegistry;
@@ -51,11 +48,9 @@ import com.twitter.common.stats.Stats;
 import com.twitter.common.util.BackoffHelper;
 
 import org.apache.aurora.gen.CronCollisionPolicy;
-import org.apache.aurora.gen.ScheduleStatus;
 import org.apache.aurora.scheduler.base.JobKeys;
 import org.apache.aurora.scheduler.base.Query;
 import org.apache.aurora.scheduler.base.ScheduleException;
-import org.apache.aurora.scheduler.base.Tasks;
 import org.apache.aurora.scheduler.configuration.ConfigurationManager.TaskDescriptionException;
 import org.apache.aurora.scheduler.configuration.SanitizedConfiguration;
 import org.apache.aurora.scheduler.cron.CronException;
@@ -74,8 +69,6 @@ import org.apache.commons.lang.StringUtils;
 import static com.google.common.base.Preconditions.checkArgument;
 import static com.google.common.base.Preconditions.checkNotNull;
 
-import static org.apache.aurora.gen.ScheduleStatus.PENDING;
-
 /**
  * A job scheduler that receives jobs that should be run periodically on a cron schedule.
  */
@@ -304,24 +297,9 @@ public class CronJobManager extends JobManager implements EventSubscriber {
           break;
 
         case RUN_OVERLAP:
-          Map<Integer, IScheduledTask> byInstance =
-              Maps.uniqueIndex(activeTasks, Tasks.SCHEDULED_TO_INSTANCE_ID);
-          Map<Integer, ScheduleStatus> existingTasks =
-              Maps.transformValues(byInstance, Tasks.GET_STATUS);
-          if (existingTasks.isEmpty()) {
-            builder.putAll(config.getTaskConfigs());
-          } else if (Iterables.any(existingTasks.values(), Predicates.equalTo(PENDING))) {
-            LOG.info("Job " + JobKeys.toPath(job) + " has pending tasks, suppressing run.");
-          } else {
-            // To safely overlap this run, we need to adjust the instance IDs of the overlapping
-            // run (maintaining the role/job/instance UUID invariant).
-            int instanceOffset = Ordering.natural().max(existingTasks.keySet()) + 1;
-            LOG.info("Adjusting instance IDs of " + JobKeys.toPath(job) + " by " + instanceOffset
-                + " for overlapping cron run.");
-            for (Map.Entry<Integer, ITaskConfig> entry : config.getTaskConfigs().entrySet()) {
-              builder.put(entry.getKey() + instanceOffset, entry.getValue());
-            }
-          }
+          LOG.severe("Ignoring trigger for job "
+              + JobKeys.toPath(job)
+              + " with deprecated collision policy RUN_OVERLAP due to unterminated active tasks.");
           break;
 
         default:

http://git-wip-us.apache.org/repos/asf/incubator-aurora/blob/ba25e661/src/main/thrift/org/apache/aurora/gen/api.thrift
----------------------------------------------------------------------
diff --git a/src/main/thrift/org/apache/aurora/gen/api.thrift b/src/main/thrift/org/apache/aurora/gen/api.thrift
index 927552a..724e39e 100644
--- a/src/main/thrift/org/apache/aurora/gen/api.thrift
+++ b/src/main/thrift/org/apache/aurora/gen/api.thrift
@@ -176,7 +176,8 @@ struct TaskConfig {
 enum CronCollisionPolicy {
   KILL_EXISTING = 0,  // Kills the existing job with the colliding name, and runs the new cron job.
   CANCEL_NEW    = 1,  // Cancels execution of the new job, leaving the running job in tact.
-  RUN_OVERLAP   = 2   // Runs both jobs, effectively adding more tasks to the existing job.
+  RUN_OVERLAP   = 2   // DEPRECATED. For existing jobs, treated the same as CANCEL_NEW. createJob
+                      // will reject jobs with this policy.
 }
 
 // Description of an aurora job.

http://git-wip-us.apache.org/repos/asf/incubator-aurora/blob/ba25e661/src/test/java/org/apache/aurora/scheduler/configuration/ConfigurationManagerTest.java
----------------------------------------------------------------------
diff --git a/src/test/java/org/apache/aurora/scheduler/configuration/ConfigurationManagerTest.java b/src/test/java/org/apache/aurora/scheduler/configuration/ConfigurationManagerTest.java
index c9ffc70..24e1bc4 100644
--- a/src/test/java/org/apache/aurora/scheduler/configuration/ConfigurationManagerTest.java
+++ b/src/test/java/org/apache/aurora/scheduler/configuration/ConfigurationManagerTest.java
@@ -28,6 +28,8 @@ import org.apache.aurora.gen.LimitConstraint;
 import org.apache.aurora.gen.TaskConfig;
 import org.apache.aurora.gen.TaskConstraint;
 import org.apache.aurora.gen.ValueConstraint;
+import org.apache.aurora.scheduler.configuration.ConfigurationManager.TaskDescriptionException;
+import org.apache.aurora.scheduler.storage.entities.IJobConfiguration;
 import org.junit.Test;
 
 import static org.apache.aurora.gen.apiConstants.DEFAULT_ENVIRONMENT;
@@ -37,11 +39,10 @@ import static org.apache.aurora.scheduler.configuration.ConfigurationManager.isG
 import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertFalse;
 import static org.junit.Assert.assertTrue;
+import static org.junit.Assert.fail;
 
-// TODO(Sathya): Improve test coverage for this class.
+// TODO(kevints): Improve test coverage for this class.
 public class ConfigurationManagerTest {
-  // This job caused a crash when loaded in MESOS-3062
-  // TODO(ksweeney): Create a test fixtures resource file and move this to it.
   private static final JobConfiguration UNSANITIZED_JOB_CONFIGURATION = new JobConfiguration()
       .setKey(new JobKey("owner-role", DEFAULT_ENVIRONMENT, "email_stats"))
       .setCronSchedule("0 2 * * *")
@@ -99,4 +100,17 @@ public class ConfigurationManagerTest {
     assertTrue(copy.isSetKey());
     assertEquals(DEFAULT_ENVIRONMENT, copy.getKey().getEnvironment());
   }
+
+  @Test
+  public void testRunOverlapRejected() {
+    JobConfiguration copy = UNSANITIZED_JOB_CONFIGURATION.deepCopy();
+    ConfigurationManager.applyDefaultsIfUnset(copy);
+    copy.setCronCollisionPolicy(CronCollisionPolicy.RUN_OVERLAP);
+    try {
+      ConfigurationManager.validateAndPopulate(IJobConfiguration.build(copy));
+      fail("CronCollisionPolicy.RUN_OVERLAP was allowed.");
+    } catch (TaskDescriptionException e) {
+      // Expected.
+    }
+  }
 }

http://git-wip-us.apache.org/repos/asf/incubator-aurora/blob/ba25e661/src/test/java/org/apache/aurora/scheduler/state/BaseSchedulerCoreImplTest.java
----------------------------------------------------------------------
diff --git a/src/test/java/org/apache/aurora/scheduler/state/BaseSchedulerCoreImplTest.java b/src/test/java/org/apache/aurora/scheduler/state/BaseSchedulerCoreImplTest.java
index 9291fe6..79f09cd 100644
--- a/src/test/java/org/apache/aurora/scheduler/state/BaseSchedulerCoreImplTest.java
+++ b/src/test/java/org/apache/aurora/scheduler/state/BaseSchedulerCoreImplTest.java
@@ -554,44 +554,6 @@ public abstract class BaseSchedulerCoreImplTest extends EasyMockTest {
   }
 
   @Test
-  public void testStartRunningOverlapCronJob() throws Exception {
-    // Start a cron job that is already started by an earlier
-    // call and is PENDING. Make sure it follows the cron collision policy.
-    SanitizedConfiguration sanitizedConfiguration =
-        makeCronJob(KEY_A, 1, "1 1 1 1 1", CronCollisionPolicy.RUN_OVERLAP);
-    expect(cronScheduler.schedule(eq(sanitizedConfiguration.getJobConfig().getCronSchedule()),
-        EasyMock.<Runnable>anyObject()))
-        .andReturn("key");
-
-    control.replay();
-    buildScheduler();
-
-    scheduler.createJob(sanitizedConfiguration);
-    assertTaskCount(0);
-    assertTrue(cron.hasJob(KEY_A));
-
-    scheduler.startCronJob(KEY_A);
-    assertTaskCount(1);
-
-    String taskId = Tasks.id(getOnlyTask(Query.jobScoped(KEY_A)));
-
-    // Now start the same cron job immediately.
-    scheduler.startCronJob(KEY_A);
-
-    // Since the task never left PENDING, the second run should have been suppressed.
-    assertTaskCount(1);
-    assertEquals(PENDING, getTask(taskId).getStatus());
-
-    changeStatus(taskId, ASSIGNED);
-
-    scheduler.startCronJob(KEY_A);
-    assertTaskCount(2);
-    assertEquals(ASSIGNED, getTask(taskId).getStatus());
-
-    getOnlyTask(Query.unscoped().byStatus(ScheduleStatus.PENDING));
-  }
-
-  @Test
   public void testKillCreateCronJob() throws Exception {
     SanitizedConfiguration sanitizedConfiguration = makeCronJob(KEY_A, 1, "1 1 1 1 1");
     IJobConfiguration job = sanitizedConfiguration.getJobConfig();

http://git-wip-us.apache.org/repos/asf/incubator-aurora/blob/ba25e661/src/test/java/org/apache/aurora/scheduler/state/CronJobManagerTest.java
----------------------------------------------------------------------
diff --git a/src/test/java/org/apache/aurora/scheduler/state/CronJobManagerTest.java b/src/test/java/org/apache/aurora/scheduler/state/CronJobManagerTest.java
index c679123..e9886cd 100644
--- a/src/test/java/org/apache/aurora/scheduler/state/CronJobManagerTest.java
+++ b/src/test/java/org/apache/aurora/scheduler/state/CronJobManagerTest.java
@@ -15,12 +15,10 @@
  */
 package org.apache.aurora.scheduler.state;
 
-import java.util.Map;
 import java.util.concurrent.Executor;
 
 import com.google.common.base.Optional;
 import com.google.common.collect.ImmutableList;
-import com.google.common.collect.ImmutableMap;
 import com.google.inject.AbstractModule;
 import com.google.inject.Guice;
 import com.google.inject.Injector;
@@ -28,11 +26,9 @@ import com.twitter.common.application.ShutdownRegistry;
 import com.twitter.common.base.ExceptionalCommand;
 import com.twitter.common.testing.easymock.EasyMockTest;
 
-import org.apache.aurora.gen.AssignedTask;
 import org.apache.aurora.gen.ExecutorConfig;
 import org.apache.aurora.gen.Identity;
 import org.apache.aurora.gen.JobConfiguration;
-import org.apache.aurora.gen.ScheduleStatus;
 import org.apache.aurora.gen.ScheduledTask;
 import org.apache.aurora.gen.TaskConfig;
 import org.apache.aurora.scheduler.base.JobKeys;
@@ -45,7 +41,6 @@ import org.apache.aurora.scheduler.events.PubsubEvent.SchedulerActive;
 import org.apache.aurora.scheduler.storage.Storage;
 import org.apache.aurora.scheduler.storage.entities.IJobConfiguration;
 import org.apache.aurora.scheduler.storage.entities.IScheduledTask;
-import org.apache.aurora.scheduler.storage.entities.ITaskConfig;
 import org.apache.aurora.scheduler.storage.testing.StorageTestUtil;
 import org.easymock.Capture;
 import org.easymock.EasyMock;
@@ -53,7 +48,6 @@ import org.easymock.IExpectationSetters;
 import org.junit.Before;
 import org.junit.Test;
 
-import static org.apache.aurora.gen.CronCollisionPolicy.RUN_OVERLAP;
 import static org.apache.aurora.gen.apiConstants.DEFAULT_ENVIRONMENT;
 import static org.easymock.EasyMock.anyObject;
 import static org.easymock.EasyMock.capture;
@@ -287,27 +281,6 @@ public class CronJobManagerTest extends EasyMockTest {
   }
 
   @Test
-  public void testRunOverlapNoShardCollision() throws Exception {
-    IScheduledTask scheduledTask = IScheduledTask.build(new ScheduledTask()
-        .setStatus(ScheduleStatus.RUNNING)
-        .setAssignedTask(new AssignedTask().setTask(defaultTask())));
-    sanitizedConfiguration = SanitizedConfiguration.fromUnsanitized(
-        IJobConfiguration.build(job.newBuilder().setCronCollisionPolicy(RUN_OVERLAP)));
-    expectJobAccepted();
-    expectJobFetch();
-    expectActiveTaskFetch(scheduledTask);
-
-    Map<Integer, ITaskConfig> newConfig =
-        ImmutableMap.of(1, sanitizedConfiguration.getJobConfig().getTaskConfig());
-    stateManager.insertPendingTasks(newConfig);
-
-    control.replay();
-
-    cron.receiveJob(sanitizedConfiguration);
-    cron.startJobNow(job.getKey());
-  }
-
-  @Test
   public void testConsistentState() throws Exception {
     IJobConfiguration updated =
         IJobConfiguration.build(makeJob().newBuilder().setCronSchedule("1 2 3 4 5"));