You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@aurora.apache.org by dm...@apache.org on 2016/12/15 21:16:35 UTC

aurora git commit: Avoid double writing job updates to the Scheduler Snapshot

Repository: aurora
Updated Branches:
  refs/heads/master 8e37d0f18 -> c42b1af31


Avoid double writing job updates to the Scheduler Snapshot

Motivation: Thanks to the mybatis query metrics we added, we found that double writing Snapshot fields for H2 stores adds considerable overhead to our snapshot creation time.

Snapshots are also written as backups, and many operators choose to process backups offline for analytics, rather than query the live scheduler (due to not being able to scale reads horizontally). So this allows operators to enable/disable the hydrated fields as needed.

Bugs closed: AURORA-1861

Reviewed at https://reviews.apache.org/r/54774/


Project: http://git-wip-us.apache.org/repos/asf/aurora/repo
Commit: http://git-wip-us.apache.org/repos/asf/aurora/commit/c42b1af3
Tree: http://git-wip-us.apache.org/repos/asf/aurora/tree/c42b1af3
Diff: http://git-wip-us.apache.org/repos/asf/aurora/diff/c42b1af3

Branch: refs/heads/master
Commit: c42b1af311354762ee982d2e676d0f60ce5958a5
Parents: 8e37d0f
Author: David McLaughlin <da...@dmclaughlin.com>
Authored: Thu Dec 15 13:16:23 2016 -0800
Committer: David McLaughlin <dm...@twitter.com>
Committed: Thu Dec 15 13:16:23 2016 -0800

----------------------------------------------------------------------
 RELEASE-NOTES.md                                |   5 +
 .../storage/backup/TemporaryStorage.java        |   4 +
 .../scheduler/storage/log/LogStorageModule.java |  11 ++
 .../storage/log/SnapshotStoreImpl.java          | 101 +++++++++++++++++--
 .../scheduler/storage/backup/RecoveryTest.java  |   6 +-
 .../storage/log/SnapshotStoreImplIT.java        |  33 ++++--
 6 files changed, 137 insertions(+), 23 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/aurora/blob/c42b1af3/RELEASE-NOTES.md
----------------------------------------------------------------------
diff --git a/RELEASE-NOTES.md b/RELEASE-NOTES.md
index 90c4793..a792594 100644
--- a/RELEASE-NOTES.md
+++ b/RELEASE-NOTES.md
@@ -2,6 +2,11 @@
 =========================
 
 ### New/updated:
+- Added a new flag --snapshot_hydrate_stores that controls which H2-backed stores to write fully
+  hydrated into the Scheduler snapshot. Can lead to significantly lower snapshot times for large
+  clusters if you set this flag to an empty list. Old behavior is preserved by default, but see
+  org.apache.aurora.scheduler.storage.log.SnapshotStoreImpl for which stores we currently have
+  duplicate writes for.
 - A task's tier is now mapped to a label on the Mesos `TaskInfo` proto.
 - The Aurora client is now using the Thrift binary protocol to communicate with the scheduler.
 - Introduce a new `--ip` option to bind the Thermos observer to a specific rather than all

http://git-wip-us.apache.org/repos/asf/aurora/blob/c42b1af3/src/main/java/org/apache/aurora/scheduler/storage/backup/TemporaryStorage.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/aurora/scheduler/storage/backup/TemporaryStorage.java b/src/main/java/org/apache/aurora/scheduler/storage/backup/TemporaryStorage.java
index 3fa408e..36a1bd5 100644
--- a/src/main/java/org/apache/aurora/scheduler/storage/backup/TemporaryStorage.java
+++ b/src/main/java/org/apache/aurora/scheduler/storage/backup/TemporaryStorage.java
@@ -17,6 +17,7 @@ import java.util.Set;
 
 import com.google.common.base.Function;
 import com.google.common.collect.FluentIterable;
+import com.google.common.collect.ImmutableSet;
 import com.google.inject.Inject;
 
 import org.apache.aurora.common.util.BuildInfo;
@@ -89,6 +90,9 @@ interface TemporaryStorage {
           // Safe to pass false here to default to the non-experimental task store
           // during restore from backup procedure.
           false /** useDbSnapshotForTaskStore */,
+          // Safe to pass empty set here because during backup restore we are not deciding which
+          // fields to write to the snapshot.
+          ImmutableSet.of() /** hydrateFields */,
           // We can just pass an empty lambda for the MigrationManager as migration is a no-op
           // when restoring from backup.
           () -> { } /** migrationManager */,

http://git-wip-us.apache.org/repos/asf/aurora/blob/c42b1af3/src/main/java/org/apache/aurora/scheduler/storage/log/LogStorageModule.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/aurora/scheduler/storage/log/LogStorageModule.java b/src/main/java/org/apache/aurora/scheduler/storage/log/LogStorageModule.java
index 7dcd1bf..835f160 100644
--- a/src/main/java/org/apache/aurora/scheduler/storage/log/LogStorageModule.java
+++ b/src/main/java/org/apache/aurora/scheduler/storage/log/LogStorageModule.java
@@ -13,6 +13,7 @@
  */
 package org.apache.aurora.scheduler.storage.log;
 
+import java.util.Set;
 import javax.inject.Singleton;
 
 import com.google.common.annotations.VisibleForTesting;
@@ -35,6 +36,7 @@ import org.apache.aurora.scheduler.storage.db.DbModule;
 import org.apache.aurora.scheduler.storage.log.LogManager.MaxEntrySize;
 import org.apache.aurora.scheduler.storage.log.LogStorage.Settings;
 import org.apache.aurora.scheduler.storage.log.SnapshotStoreImpl.ExperimentalTaskStore;
+import org.apache.aurora.scheduler.storage.log.SnapshotStoreImpl.HydrateSnapshotFields;
 
 import static org.apache.aurora.scheduler.storage.log.EntrySerializer.EntrySerializerImpl;
 import static org.apache.aurora.scheduler.storage.log.LogManager.LogEntryHashFunction;
@@ -64,6 +66,11 @@ public class LogStorageModule extends PrivateModule {
   public static final Arg<Amount<Integer, Data>> MAX_LOG_ENTRY_SIZE =
       Arg.create(Amount.of(512, Data.KB));
 
+  @CmdLine(name = "snapshot_hydrate_stores",
+      help = "Which H2-backed stores to fully hydrate on the Snapshot.")
+  private static final Arg<Set<String>> HYDRATE_SNAPSHOT_FIELDS =
+      Arg.create(SnapshotStoreImpl.ALL_H2_STORE_FIELDS);
+
   @Override
   protected void configure() {
     bind(Settings.class)
@@ -77,12 +84,16 @@ public class LogStorageModule extends PrivateModule {
     bind(LogManager.class).in(Singleton.class);
     bind(LogStorage.class).in(Singleton.class);
 
+    bind(new TypeLiteral<Set<String>>() { }).annotatedWith(HydrateSnapshotFields.class)
+        .toInstance(HYDRATE_SNAPSHOT_FIELDS.get());
+
     install(CallOrderEnforcingStorage.wrappingModule(LogStorage.class));
     bind(DistributedSnapshotStore.class).to(LogStorage.class);
     expose(Storage.class);
     expose(NonVolatileStorage.class);
     expose(DistributedSnapshotStore.class);
     expose(new TypeLiteral<Boolean>() { }).annotatedWith(ExperimentalTaskStore.class);
+    expose(new TypeLiteral<Set<String>>() { }).annotatedWith(HydrateSnapshotFields.class);
 
     bind(EntrySerializer.class).to(EntrySerializerImpl.class);
     // TODO(ksweeney): We don't need a cryptographic checksum here - assess performance of MD5

http://git-wip-us.apache.org/repos/asf/aurora/blob/c42b1af3/src/main/java/org/apache/aurora/scheduler/storage/log/SnapshotStoreImpl.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/aurora/scheduler/storage/log/SnapshotStoreImpl.java b/src/main/java/org/apache/aurora/scheduler/storage/log/SnapshotStoreImpl.java
index 853780b..239f2eb 100644
--- a/src/main/java/org/apache/aurora/scheduler/storage/log/SnapshotStoreImpl.java
+++ b/src/main/java/org/apache/aurora/scheduler/storage/log/SnapshotStoreImpl.java
@@ -24,6 +24,7 @@ import java.sql.SQLException;
 import java.util.Arrays;
 import java.util.List;
 import java.util.Map;
+import java.util.Set;
 
 import javax.inject.Inject;
 import javax.inject.Qualifier;
@@ -92,6 +93,25 @@ public class SnapshotStoreImpl implements SnapshotStore<Snapshot> {
         && snapshot.isExperimentalTaskStore();
   }
 
+  private static final String DB_SCRIPT_FIELD = "dbscript";
+  private static final String LOCK_FIELD = "locks";
+  private static final String HOST_ATTRIBUTES_FIELD = "hosts";
+  private static final String QUOTA_FIELD = "quota";
+  private static final String TASK_FIELD = "tasks";
+  private static final String CRON_FIELD = "crons";
+  private static final String JOB_UPDATE_FIELD = "job_updates";
+  private static final String SCHEDULER_METADATA_FIELD = "scheduler_metadata";
+
+  /**
+   * Used by LogStorageModule to maintain legacy behavior for a change to snapshot format
+   * (and thus also backup processing) behavior. See AURORA-1861 for context.
+   */
+  static final Set<String> ALL_H2_STORE_FIELDS = ImmutableSet.of(
+      LOCK_FIELD,
+      HOST_ATTRIBUTES_FIELD,
+      QUOTA_FIELD,
+      JOB_UPDATE_FIELD);
+
   private final Iterable<SnapshotField> snapshotFields = Arrays.asList(
       // Order is critical here. The DB snapshot should always be tried first to ensure
       // graceful migration to DBTaskStore. Otherwise, there is a direct risk of losing the cluster.
@@ -105,6 +125,11 @@ public class SnapshotStoreImpl implements SnapshotStore<Snapshot> {
       //   moment a new snapshot is created
       new SnapshotField() {
         @Override
+        public String getName() {
+          return DB_SCRIPT_FIELD;
+        }
+
+        @Override
         @Timed("snapshot_save_db")
         public void saveToSnapshot(MutableStoreProvider store, Snapshot snapshot) {
           LOG.info("Saving dbsnapshot");
@@ -158,12 +183,19 @@ public class SnapshotStoreImpl implements SnapshotStore<Snapshot> {
         }
       },
       new SnapshotField() {
+        @Override
+        public String getName() {
+          return LOCK_FIELD;
+        }
+
         // It's important for locks to be replayed first, since there are relations that expect
         // references to be valid on insertion.
         @Override
         @Timed("snapshot_save_lock_store")
         public void saveToSnapshot(MutableStoreProvider store, Snapshot snapshot) {
-          snapshot.setLocks(ILock.toBuildersSet(store.getLockStore().fetchLocks()));
+          if (hydrateSnapshotFields.contains(getName())) {
+            snapshot.setLocks(ILock.toBuildersSet(store.getLockStore().fetchLocks()));
+          }
         }
 
         @Override
@@ -185,10 +217,17 @@ public class SnapshotStoreImpl implements SnapshotStore<Snapshot> {
       },
       new SnapshotField() {
         @Override
+        public String getName() {
+          return HOST_ATTRIBUTES_FIELD;
+        }
+
+        @Override
         @Timed("snapshot_save_host_attributes")
         public void saveToSnapshot(MutableStoreProvider store, Snapshot snapshot) {
-          snapshot.setHostAttributes(
-              IHostAttributes.toBuildersSet(store.getAttributeStore().getHostAttributes()));
+          if (hydrateSnapshotFields.contains(getName())) {
+            snapshot.setHostAttributes(
+                IHostAttributes.toBuildersSet(store.getAttributeStore().getHostAttributes()));
+          }
         }
 
         @Override
@@ -210,6 +249,11 @@ public class SnapshotStoreImpl implements SnapshotStore<Snapshot> {
       },
       new SnapshotField() {
         @Override
+        public String getName() {
+          return TASK_FIELD;
+        }
+
+        @Override
         @Timed("snapshot_save_task_store")
         public void saveToSnapshot(MutableStoreProvider store, Snapshot snapshot) {
           snapshot.setTasks(
@@ -235,6 +279,11 @@ public class SnapshotStoreImpl implements SnapshotStore<Snapshot> {
       },
       new SnapshotField() {
         @Override
+        public String getName() {
+          return CRON_FIELD;
+        }
+
+        @Override
         @Timed("snapshot_save_cron_store")
         public void saveToSnapshot(MutableStoreProvider store, Snapshot snapshot) {
           ImmutableSet.Builder<StoredCronJob> jobs = ImmutableSet.builder();
@@ -266,6 +315,11 @@ public class SnapshotStoreImpl implements SnapshotStore<Snapshot> {
       },
       new SnapshotField() {
         @Override
+        public String getName() {
+          return SCHEDULER_METADATA_FIELD;
+        }
+
+        @Override
         public void saveToSnapshot(MutableStoreProvider store, Snapshot snapshot) {
           // SchedulerMetadata is updated outside of the static list of SnapshotFields
         }
@@ -288,16 +342,23 @@ public class SnapshotStoreImpl implements SnapshotStore<Snapshot> {
       },
       new SnapshotField() {
         @Override
+        public String getName() {
+          return QUOTA_FIELD;
+        }
+
+        @Override
         @Timed("snapshot_save_quota_store")
         public void saveToSnapshot(MutableStoreProvider store, Snapshot snapshot) {
-          ImmutableSet.Builder<QuotaConfiguration> quotas = ImmutableSet.builder();
-          for (Map.Entry<String, IResourceAggregate> entry
-              : store.getQuotaStore().fetchQuotas().entrySet()) {
+          if (hydrateSnapshotFields.contains(getName())) {
+            ImmutableSet.Builder<QuotaConfiguration> quotas = ImmutableSet.builder();
+            for (Map.Entry<String, IResourceAggregate> entry
+                : store.getQuotaStore().fetchQuotas().entrySet()) {
 
-            quotas.add(new QuotaConfiguration(entry.getKey(), entry.getValue().newBuilder()));
-          }
+              quotas.add(new QuotaConfiguration(entry.getKey(), entry.getValue().newBuilder()));
+            }
 
-          snapshot.setQuotaConfigurations(quotas.build());
+            snapshot.setQuotaConfigurations(quotas.build());
+          }
         }
 
         @Override
@@ -320,9 +381,16 @@ public class SnapshotStoreImpl implements SnapshotStore<Snapshot> {
       },
       new SnapshotField() {
         @Override
+        public String getName() {
+          return JOB_UPDATE_FIELD;
+        }
+
+        @Override
         @Timed("snapshot_save_update_store")
         public void saveToSnapshot(MutableStoreProvider store, Snapshot snapshot) {
-          snapshot.setJobUpdateDetails(store.getJobUpdateStore().fetchAllJobUpdateDetails());
+          if (hydrateSnapshotFields.contains(getName())) {
+            snapshot.setJobUpdateDetails(store.getJobUpdateStore().fetchAllJobUpdateDetails());
+          }
         }
 
         @Override
@@ -368,6 +436,7 @@ public class SnapshotStoreImpl implements SnapshotStore<Snapshot> {
   private final Clock clock;
   private final Storage storage;
   private final boolean useDbSnapshotForTaskStore;
+  private final Set<String> hydrateSnapshotFields;
   private final MigrationManager migrationManager;
   private final ThriftBackfill thriftBackfill;
 
@@ -379,12 +448,21 @@ public class SnapshotStoreImpl implements SnapshotStore<Snapshot> {
   @Qualifier
   public @interface ExperimentalTaskStore { }
 
+  /**
+   * Identifies a set of snapshot fields to be fully hydrated when creating the snapshot.
+   */
+  @Retention(RetentionPolicy.RUNTIME)
+  @Target({ ElementType.PARAMETER, ElementType.METHOD })
+  @Qualifier
+  public @interface HydrateSnapshotFields { }
+
   @Inject
   public SnapshotStoreImpl(
       BuildInfo buildInfo,
       Clock clock,
       @Volatile Storage storage,
       @ExperimentalTaskStore boolean useDbSnapshotForTaskStore,
+      @HydrateSnapshotFields Set<String> hydrateSnapshotFields,
       MigrationManager migrationManager,
       ThriftBackfill thriftBackfill) {
 
@@ -392,6 +470,7 @@ public class SnapshotStoreImpl implements SnapshotStore<Snapshot> {
     this.clock = requireNonNull(clock);
     this.storage = requireNonNull(storage);
     this.useDbSnapshotForTaskStore = useDbSnapshotForTaskStore;
+    this.hydrateSnapshotFields = requireNonNull(hydrateSnapshotFields);
     this.migrationManager = requireNonNull(migrationManager);
     this.thriftBackfill = requireNonNull(thriftBackfill);
   }
@@ -436,6 +515,8 @@ public class SnapshotStoreImpl implements SnapshotStore<Snapshot> {
   }
 
   private interface SnapshotField {
+    String getName();
+
     void saveToSnapshot(MutableStoreProvider storeProvider, Snapshot snapshot);
 
     void restoreFromSnapshot(MutableStoreProvider storeProvider, Snapshot snapshot);

http://git-wip-us.apache.org/repos/asf/aurora/blob/c42b1af3/src/test/java/org/apache/aurora/scheduler/storage/backup/RecoveryTest.java
----------------------------------------------------------------------
diff --git a/src/test/java/org/apache/aurora/scheduler/storage/backup/RecoveryTest.java b/src/test/java/org/apache/aurora/scheduler/storage/backup/RecoveryTest.java
index 7a11850..42615da 100644
--- a/src/test/java/org/apache/aurora/scheduler/storage/backup/RecoveryTest.java
+++ b/src/test/java/org/apache/aurora/scheduler/storage/backup/RecoveryTest.java
@@ -170,12 +170,8 @@ public class RecoveryTest extends EasyMockTest {
             FakeBuildInfo.GIT_TAG, FakeBuildInfo.GIT_TAG));
 
     return new Snapshot()
-        .setHostAttributes(ImmutableSet.of())
         .setCronJobs(ImmutableSet.of())
         .setSchedulerMetadata(metadata)
-        .setQuotaConfigurations(ImmutableSet.of())
-        .setTasks(IScheduledTask.toBuildersSet(ImmutableSet.copyOf(tasks)))
-        .setLocks(ImmutableSet.of())
-        .setJobUpdateDetails(ImmutableSet.of());
+        .setTasks(IScheduledTask.toBuildersSet(ImmutableSet.copyOf(tasks)));
   }
 }

http://git-wip-us.apache.org/repos/asf/aurora/blob/c42b1af3/src/test/java/org/apache/aurora/scheduler/storage/log/SnapshotStoreImplIT.java
----------------------------------------------------------------------
diff --git a/src/test/java/org/apache/aurora/scheduler/storage/log/SnapshotStoreImplIT.java b/src/test/java/org/apache/aurora/scheduler/storage/log/SnapshotStoreImplIT.java
index cf0a8f3..f56a162 100644
--- a/src/test/java/org/apache/aurora/scheduler/storage/log/SnapshotStoreImplIT.java
+++ b/src/test/java/org/apache/aurora/scheduler/storage/log/SnapshotStoreImplIT.java
@@ -14,6 +14,7 @@
 package org.apache.aurora.scheduler.storage.log;
 
 import java.util.Map;
+import java.util.Set;
 
 import com.google.common.base.Optional;
 import com.google.common.collect.ImmutableList;
@@ -74,8 +75,10 @@ import static org.apache.aurora.scheduler.storage.Storage.MutateWork.NoResult;
 import static org.apache.aurora.scheduler.storage.db.DbModule.testModuleWithWorkQueue;
 import static org.apache.aurora.scheduler.storage.db.DbUtil.createStorage;
 import static org.apache.aurora.scheduler.storage.db.DbUtil.createStorageInjector;
+import static org.apache.aurora.scheduler.storage.log.SnapshotStoreImpl.ALL_H2_STORE_FIELDS;
 import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertNull;
 import static org.junit.Assert.assertTrue;
 
 public class SnapshotStoreImplIT {
@@ -86,7 +89,7 @@ public class SnapshotStoreImplIT {
   private Storage storage;
   private SnapshotStore<Snapshot> snapshotStore;
 
-  private void setUpStore(boolean dbTaskStore) {
+  private void setUpStore(boolean dbTaskStore, Set<String> hydrateFields) {
     storage = dbTaskStore
         ? createStorage()
         : createStorageInjector(
@@ -100,6 +103,7 @@ public class SnapshotStoreImplIT {
         clock,
         storage,
         dbTaskStore,
+        hydrateFields,
         createStorageInjector(testModuleWithWorkQueue()).getInstance(MigrationManager.class),
         TaskTestUtil.THRIFT_BACKFILL);
   }
@@ -114,7 +118,7 @@ public class SnapshotStoreImplIT {
 
   @Test
   public void testNoDBTaskStore() {
-    setUpStore(false);
+    setUpStore(false, ALL_H2_STORE_FIELDS);
     populateStore();
 
     Snapshot snapshot1 = snapshotStore.createSnapshot();
@@ -129,14 +133,14 @@ public class SnapshotStoreImplIT {
 
   @Test
   public void testMigrateToDBTaskStore() {
-    setUpStore(false);
+    setUpStore(false, ALL_H2_STORE_FIELDS);
     populateStore();
 
     Snapshot snapshot1 = snapshotStore.createSnapshot();
     assertEquals(expected(), makeComparable(snapshot1));
     assertFalse(snapshot1.isExperimentalTaskStore());
 
-    setUpStore(true);
+    setUpStore(true, ALL_H2_STORE_FIELDS);
     snapshotStore.applySnapshot(snapshot1);
     Snapshot snapshot2 = snapshotStore.createSnapshot();
     assertTrue(snapshot2.isExperimentalTaskStore());
@@ -146,14 +150,14 @@ public class SnapshotStoreImplIT {
 
   @Test
   public void testMigrateFromDBTaskStore() {
-    setUpStore(true);
+    setUpStore(true, ALL_H2_STORE_FIELDS);
     populateStore();
 
     Snapshot snapshot1 = snapshotStore.createSnapshot();
     assertEquals(expected(), makeComparable(snapshot1));
     assertTrue(snapshot1.isExperimentalTaskStore());
 
-    setUpStore(false);
+    setUpStore(false, ALL_H2_STORE_FIELDS);
     snapshotStore.applySnapshot(snapshot1);
     Snapshot snapshot2 = snapshotStore.createSnapshot();
     assertFalse(snapshot2.isExperimentalTaskStore());
@@ -162,8 +166,21 @@ public class SnapshotStoreImplIT {
   }
 
   @Test
+  public void testNonDefaultHydrateOptions() {
+    setUpStore(false, ImmutableSet.of());
+    populateStore();
+
+    Snapshot snapshot = snapshotStore.createSnapshot();
+
+    assertNull(snapshot.getHostAttributes());
+    assertNull(snapshot.getJobUpdateDetails());
+    assertNull(snapshot.getLocks());
+    assertNull(snapshot.getQuotaConfigurations());
+  }
+
+  @Test
   public void testDBTaskStore() {
-    setUpStore(true);
+    setUpStore(true, ALL_H2_STORE_FIELDS);
     populateStore();
 
     Snapshot snapshot1 = snapshotStore.createSnapshot();
@@ -178,7 +195,7 @@ public class SnapshotStoreImplIT {
 
   @Test
   public void testBackfill() {
-    setUpStore(false);
+    setUpStore(false, ALL_H2_STORE_FIELDS);
     snapshotStore.applySnapshot(makeNonBackfilled());
 
     Snapshot backfilled = snapshotStore.createSnapshot();