You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@aurora.apache.org by ma...@apache.org on 2014/10/09 23:06:27 UTC

git commit: Implementing non-prod MTTA/R SLA metrics.

Repository: incubator-aurora
Updated Branches:
  refs/heads/master 7bc791cea -> 61f910ce6


Implementing non-prod MTTA/R SLA metrics.

Bugs closed: AURORA-774

Reviewed at https://reviews.apache.org/r/26376/


Project: http://git-wip-us.apache.org/repos/asf/incubator-aurora/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-aurora/commit/61f910ce
Tree: http://git-wip-us.apache.org/repos/asf/incubator-aurora/tree/61f910ce
Diff: http://git-wip-us.apache.org/repos/asf/incubator-aurora/diff/61f910ce

Branch: refs/heads/master
Commit: 61f910ce621a78d1a99e52aa1b7165d4fc16dba2
Parents: 7bc791c
Author: Maxim Khutornenko <ma...@apache.org>
Authored: Thu Oct 9 14:06:11 2014 -0700
Committer: Maxim Khutornenko <ma...@apache.org>
Committed: Thu Oct 9 14:06:11 2014 -0700

----------------------------------------------------------------------
 .../aurora/scheduler/sla/MetricCalculator.java  | 55 ++++++++++++---
 .../aurora/scheduler/sla/SlaAlgorithm.java      | 14 ++--
 .../scheduler/sla/MetricCalculatorTest.java     | 74 +++++++++++++++-----
 .../aurora/scheduler/sla/SlaTestUtil.java       | 13 ++--
 4 files changed, 118 insertions(+), 38 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-aurora/blob/61f910ce/src/main/java/org/apache/aurora/scheduler/sla/MetricCalculator.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/aurora/scheduler/sla/MetricCalculator.java b/src/main/java/org/apache/aurora/scheduler/sla/MetricCalculator.java
index dca6804..149bb33 100644
--- a/src/main/java/org/apache/aurora/scheduler/sla/MetricCalculator.java
+++ b/src/main/java/org/apache/aurora/scheduler/sla/MetricCalculator.java
@@ -20,6 +20,7 @@ import java.util.concurrent.atomic.AtomicReference;
 
 import javax.inject.Inject;
 
+import com.google.common.annotations.VisibleForTesting;
 import com.google.common.base.Predicate;
 import com.google.common.base.Predicates;
 import com.google.common.base.Supplier;
@@ -63,7 +64,8 @@ import static org.apache.aurora.scheduler.sla.SlaGroup.GroupType.RESOURCE_RAM;
  */
 class MetricCalculator implements Runnable {
 
-  private static final Multimap<AlgorithmType, GroupType> METRICS =
+  @VisibleForTesting
+  static final Multimap<AlgorithmType, GroupType> PROD_METRICS =
       ImmutableMultimap.<AlgorithmType, GroupType>builder()
           .put(JOB_UPTIME_50, JOB)
           .put(JOB_UPTIME_75, JOB)
@@ -75,6 +77,25 @@ class MetricCalculator implements Runnable {
           .putAll(MEDIAN_TIME_TO_RUNNING, JOB, CLUSTER, RESOURCE_CPU, RESOURCE_RAM, RESOURCE_DISK)
           .build();
 
+  @VisibleForTesting
+  static final Multimap<AlgorithmType, GroupType> NON_PROD_METRICS =
+      ImmutableMultimap.<AlgorithmType, GroupType>builder()
+          .putAll(
+              AlgorithmType.MEDIAN_TIME_TO_ASSIGNED_NON_PROD,
+              JOB,
+              CLUSTER,
+              RESOURCE_CPU,
+              RESOURCE_RAM,
+              RESOURCE_DISK)
+          .putAll(
+              AlgorithmType.MEDIAN_TIME_TO_RUNNING_NON_PROD,
+              JOB,
+              CLUSTER,
+              RESOURCE_CPU,
+              RESOURCE_RAM,
+              RESOURCE_DISK)
+          .build();
+
   private static final Predicate<ITaskConfig> IS_SERVICE =
       new Predicate<ITaskConfig>() {
         @Override
@@ -146,25 +167,37 @@ class MetricCalculator implements Runnable {
   @Timed("sla_stats_computation")
   @Override
   public void run() {
-    List<IScheduledTask> tasks =
-        FluentIterable.from(Storage.Util.weaklyConsistentFetchTasks(storage, Query.unscoped()))
-            .filter(Predicates.compose(
-                Predicates.and(Tasks.IS_PRODUCTION, IS_SERVICE),
-                Tasks.SCHEDULED_TO_INFO)).toList();
+    FluentIterable<IScheduledTask> tasks =
+        FluentIterable.from(Storage.Util.weaklyConsistentFetchTasks(storage, Query.unscoped()));
+
+    List<IScheduledTask> prodTasks = tasks.filter(Predicates.compose(
+        Predicates.and(Tasks.IS_PRODUCTION, IS_SERVICE),
+        Tasks.SCHEDULED_TO_INFO)).toList();
+
+    List<IScheduledTask> nonProdTasks = tasks.filter(Predicates.compose(
+        Predicates.and(Predicates.not(Tasks.IS_PRODUCTION), IS_SERVICE),
+        Tasks.SCHEDULED_TO_INFO)).toList();
 
     long nowMs = clock.nowMillis();
-    long intervalStartMs = nowMs - settings.getRefreshRateMs();
+    Range<Long> timeRange = Range.closedOpen(nowMs - settings.getRefreshRateMs(), nowMs);
+
+    runAlgorithms(prodTasks, PROD_METRICS, timeRange);
+    runAlgorithms(nonProdTasks, NON_PROD_METRICS, timeRange);
+  }
+
+  private void runAlgorithms(
+      List<IScheduledTask> tasks,
+      Multimap<AlgorithmType, GroupType> metrics,
+      Range<Long> timeRange) {
 
-    for (Entry<AlgorithmType, GroupType> slaMetric : METRICS.entries()) {
+    for (Entry<AlgorithmType, GroupType> slaMetric : metrics.entries()) {
       for (Entry<String, Collection<IScheduledTask>> namedGroup
           : slaMetric.getValue().getSlaGroup().createNamedGroups(tasks).asMap().entrySet()) {
 
         AlgorithmType algoType = slaMetric.getKey();
         String metricName = namedGroup.getKey() + algoType.getAlgorithmName();
         metricCache.getUnchecked(metricName)
-            .set(metricName, algoType.getAlgorithm().calculate(
-                namedGroup.getValue(),
-                Range.closedOpen(intervalStartMs, nowMs)));
+            .set(metricName, algoType.getAlgorithm().calculate(namedGroup.getValue(), timeRange));
       }
     }
   }

http://git-wip-us.apache.org/repos/asf/incubator-aurora/blob/61f910ce/src/main/java/org/apache/aurora/scheduler/sla/SlaAlgorithm.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/aurora/scheduler/sla/SlaAlgorithm.java b/src/main/java/org/apache/aurora/scheduler/sla/SlaAlgorithm.java
index 33b6bbe..0f67311 100644
--- a/src/main/java/org/apache/aurora/scheduler/sla/SlaAlgorithm.java
+++ b/src/main/java/org/apache/aurora/scheduler/sla/SlaAlgorithm.java
@@ -41,6 +41,10 @@ import org.apache.aurora.scheduler.storage.entities.ITaskEvent;
 
 import static java.util.Objects.requireNonNull;
 
+import static org.apache.aurora.gen.ScheduleStatus.ASSIGNED;
+import static org.apache.aurora.gen.ScheduleStatus.PENDING;
+import static org.apache.aurora.gen.ScheduleStatus.RUNNING;
+
 /**
  * Defines an SLA algorithm to be applied to a {@link IScheduledTask}
  * set for calculating a specific SLA metric.
@@ -68,8 +72,10 @@ interface SlaAlgorithm {
     JOB_UPTIME_75(new JobUptime(75f), String.format(JobUptime.NAME_FORMAT, 75f)),
     JOB_UPTIME_50(new JobUptime(50f), String.format(JobUptime.NAME_FORMAT, 50f)),
     AGGREGATE_PLATFORM_UPTIME(new AggregatePlatformUptime(), "platform_uptime_percent"),
-    MEDIAN_TIME_TO_ASSIGNED(new MedianAlgorithm(ScheduleStatus.ASSIGNED), "mtta_ms"),
-    MEDIAN_TIME_TO_RUNNING(new MedianAlgorithm(ScheduleStatus.RUNNING), "mttr_ms");
+    MEDIAN_TIME_TO_ASSIGNED(new MedianAlgorithm(ASSIGNED), "mtta_ms"),
+    MEDIAN_TIME_TO_RUNNING(new MedianAlgorithm(RUNNING), "mttr_ms"),
+    MEDIAN_TIME_TO_ASSIGNED_NON_PROD(new MedianAlgorithm(ASSIGNED), "mtta_nonprod_ms"),
+    MEDIAN_TIME_TO_RUNNING_NON_PROD(new MedianAlgorithm(RUNNING), "mttr_nonprod_ms");
 
     private final SlaAlgorithm algorithm;
     private final String name;
@@ -121,7 +127,7 @@ interface SlaAlgorithm {
       for (IScheduledTask task : activeTasks) {
         long pendingTs = 0;
         for (ITaskEvent event : task.getTaskEvents()) {
-          if (event.getStatus() == ScheduleStatus.PENDING) {
+          if (event.getStatus() == PENDING) {
             pendingTs = event.getTimestamp();
           } else if (event.getStatus() == status && timeFrame.contains(event.getTimestamp())) {
 
@@ -152,7 +158,7 @@ interface SlaAlgorithm {
 
     private static final Predicate<IScheduledTask> IS_RUNNING =
         Predicates.compose(
-            Predicates.in(ImmutableSet.of(ScheduleStatus.RUNNING)),
+            Predicates.in(ImmutableSet.of(RUNNING)),
             Tasks.GET_STATUS);
 
     private static final Function<IScheduledTask, ITaskEvent> TASK_TO_EVENT =

http://git-wip-us.apache.org/repos/asf/incubator-aurora/blob/61f910ce/src/test/java/org/apache/aurora/scheduler/sla/MetricCalculatorTest.java
----------------------------------------------------------------------
diff --git a/src/test/java/org/apache/aurora/scheduler/sla/MetricCalculatorTest.java b/src/test/java/org/apache/aurora/scheduler/sla/MetricCalculatorTest.java
index aeb90bb..fa6d574 100644
--- a/src/test/java/org/apache/aurora/scheduler/sla/MetricCalculatorTest.java
+++ b/src/test/java/org/apache/aurora/scheduler/sla/MetricCalculatorTest.java
@@ -13,7 +13,13 @@
  */
 package org.apache.aurora.scheduler.sla;
 
+import java.util.Map;
+import java.util.Set;
+
 import com.google.common.collect.ImmutableMap;
+import com.google.common.collect.ImmutableSet;
+import com.google.common.collect.Multimap;
+
 import com.twitter.common.base.Supplier;
 import com.twitter.common.quantity.Amount;
 import com.twitter.common.quantity.Time;
@@ -24,44 +30,76 @@ import com.twitter.common.util.testing.FakeClock;
 
 import org.apache.aurora.scheduler.base.Query;
 import org.apache.aurora.scheduler.sla.MetricCalculator.MetricCalculatorSettings;
+import org.apache.aurora.scheduler.storage.entities.IScheduledTask;
 import org.apache.aurora.scheduler.storage.testing.StorageTestUtil;
+import org.easymock.Capture;
+import org.easymock.CaptureType;
 import org.easymock.EasyMock;
-import org.junit.Before;
 import org.junit.Test;
 
 import static org.apache.aurora.gen.ScheduleStatus.PENDING;
+import static org.apache.aurora.scheduler.sla.MetricCalculator.NON_PROD_METRICS;
+import static org.apache.aurora.scheduler.sla.MetricCalculator.PROD_METRICS;
+import static org.apache.aurora.scheduler.sla.SlaAlgorithm.AlgorithmType;
+import static org.apache.aurora.scheduler.sla.SlaTestUtil.makeTask;
 import static org.easymock.EasyMock.expect;
+import static org.junit.Assert.assertEquals;
 
 public class MetricCalculatorTest extends EasyMockTest {
 
-  private final FakeClock clock = new FakeClock();
-  private StorageTestUtil storageUtil;
-  private MetricCalculator calculator;
-
-  @Before
-  public void setUp() throws Exception {
+  @Test
+  public void runTest() {
+    FakeClock clock = new FakeClock();
     StatsProvider statsProvider = createMock(StatsProvider.class);
     StatsProvider untracked = createMock(StatsProvider.class);
     MetricCalculatorSettings settings = new MetricCalculatorSettings(10000);
-    storageUtil = new StorageTestUtil(this);
-    calculator = new MetricCalculator(storageUtil.storage, clock, settings, statsProvider);
+    StorageTestUtil storageUtil = new StorageTestUtil(this);
+    MetricCalculator calculator = new MetricCalculator(
+        storageUtil.storage,
+        clock,
+        settings,
+        statsProvider);
+
     expect(statsProvider.untracked()).andReturn(untracked).anyTimes();
-    expect(untracked.makeGauge(EasyMock.anyString(), EasyMock.<Supplier<Number>>anyObject()))
+
+    Capture<String> names = new Capture<>(CaptureType.ALL);
+    expect(untracked.makeGauge(EasyMock.capture(names), EasyMock.<Supplier<Number>>anyObject()))
         .andReturn(EasyMock.<Stat<Number>>anyObject())
         .anyTimes();
-  }
 
-  @Test
-  public void runTest() {
+    IScheduledTask task1 = makeTask(ImmutableMap.of(clock.nowMillis() - 1000, PENDING), 0);
+    IScheduledTask task2 = makeTask(ImmutableMap.of(clock.nowMillis() - 2000, PENDING), 1);
+    IScheduledTask task3 = makeTask(ImmutableMap.of(clock.nowMillis() - 3000, PENDING), 2);
+    IScheduledTask task4 = makeTask(ImmutableMap.of(clock.nowMillis() - 4000, PENDING), 3, false);
+
     clock.advance(Amount.of(10L, Time.SECONDS));
-    storageUtil.expectTaskFetch(
-        Query.unscoped(),
-        SlaTestUtil.makeTask(ImmutableMap.of(clock.nowMillis() - 1000, PENDING), 0),
-        SlaTestUtil.makeTask(ImmutableMap.of(clock.nowMillis() - 2000, PENDING), 1),
-        SlaTestUtil.makeTask(ImmutableMap.of(clock.nowMillis() - 3000, PENDING), 2));
+    storageUtil.expectTaskFetch(Query.unscoped(), task1, task2, task3, task4);
     storageUtil.expectOperations();
 
     control.replay();
     calculator.run();
+
+    Set<String> metricNames = generateMetricNames(
+        ImmutableSet.of(task1, task2, task3, task4),
+        ImmutableSet.of(PROD_METRICS, NON_PROD_METRICS));
+
+    assertEquals(PROD_METRICS.size() + NON_PROD_METRICS.size(), names.getValues().size());
+    assertEquals(metricNames, ImmutableSet.copyOf(names.getValues()));
+  }
+
+  private Set<String> generateMetricNames(
+      Set<IScheduledTask> tasks,
+      Set<Multimap<AlgorithmType, SlaGroup.GroupType>> definitions) {
+
+    ImmutableSet.Builder<String> names = ImmutableSet.builder();
+    for (Multimap<AlgorithmType, SlaGroup.GroupType> definition : definitions) {
+      for (Map.Entry<AlgorithmType, SlaGroup.GroupType> entry : definition.entries()) {
+        for (String metric : entry.getValue().getSlaGroup().createNamedGroups(tasks).keys()) {
+          names.add(metric + entry.getKey().getAlgorithmName());
+        }
+      }
+    }
+
+    return names.build();
   }
 }

http://git-wip-us.apache.org/repos/asf/incubator-aurora/blob/61f910ce/src/test/java/org/apache/aurora/scheduler/sla/SlaTestUtil.java
----------------------------------------------------------------------
diff --git a/src/test/java/org/apache/aurora/scheduler/sla/SlaTestUtil.java b/src/test/java/org/apache/aurora/scheduler/sla/SlaTestUtil.java
index 96a0438..21640f7 100644
--- a/src/test/java/org/apache/aurora/scheduler/sla/SlaTestUtil.java
+++ b/src/test/java/org/apache/aurora/scheduler/sla/SlaTestUtil.java
@@ -28,14 +28,17 @@ import org.apache.aurora.gen.TaskEvent;
 import org.apache.aurora.scheduler.storage.entities.IScheduledTask;
 import org.apache.aurora.scheduler.storage.entities.ITaskEvent;
 
-public final class SlaTestUtil {
+final class SlaTestUtil {
 
   private SlaTestUtil() {
     // Utility class.
   }
 
-  public static IScheduledTask makeTask(
-      Map<Long, ScheduleStatus> events, int instanceId) {
+  static IScheduledTask makeTask(Map<Long, ScheduleStatus> events, int instanceId) {
+    return makeTask(events, instanceId, true);
+  }
+
+  static IScheduledTask makeTask(Map<Long, ScheduleStatus> events, int instanceId, boolean isProd) {
     List<ITaskEvent> taskEvents = makeEvents(events);
     return IScheduledTask.build(new ScheduledTask()
         .setStatus(Iterables.getLast(taskEvents).getStatus())
@@ -47,12 +50,12 @@ public final class SlaTestUtil {
             .setTask(new TaskConfig()
                 .setJobName("job")
                 .setIsService(true)
-                .setProduction(true)
+                .setProduction(isProd)
                 .setEnvironment("env")
                 .setOwner(new Identity("role", "role-user")))));
   }
 
-  public static List<ITaskEvent> makeEvents(Map<Long, ScheduleStatus> events) {
+  static List<ITaskEvent> makeEvents(Map<Long, ScheduleStatus> events) {
     ImmutableList.Builder<ITaskEvent> taskEvents = ImmutableList.builder();
     for (Map.Entry<Long, ScheduleStatus> entry : events.entrySet()) {
       taskEvents.add(ITaskEvent.build(new TaskEvent(entry.getKey(), entry.getValue())));