You are viewing a plain text version of this content. The canonical link for it is here.
Posted to common-commits@hadoop.apache.org by vi...@apache.org on 2015/09/01 02:24:15 UTC

hadoop git commit: YARN-3094. Reset timer for liveness monitors after RM recovery. Contributed by Jun Gong (cherry picked from commit 0af6a99a3fcfa4b47d3bcba5e5cc5fe7b312a152)

Repository: hadoop
Updated Branches:
  refs/heads/branch-2.6.1 bb8a1551c -> a703952d3


YARN-3094. Reset timer for liveness monitors after RM recovery. Contributed by Jun Gong
(cherry picked from commit 0af6a99a3fcfa4b47d3bcba5e5cc5fe7b312a152)

(cherry picked from commit 61466809552f96a83aa19446d4d59cecd0d2cad5)
(cherry picked from commit ab654746fbad2da12b24b13425dc9bf17c46b50c)


Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo
Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/a703952d
Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/a703952d
Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/a703952d

Branch: refs/heads/branch-2.6.1
Commit: a703952d3916d33b8524236e6c6372ae4ec959c5
Parents: bb8a155
Author: Jian He <ji...@apache.org>
Authored: Mon Feb 9 13:47:08 2015 -0800
Committer: Vinod Kumar Vavilapalli <vi...@apache.org>
Committed: Mon Aug 31 17:17:47 2015 -0700

----------------------------------------------------------------------
 hadoop-yarn-project/CHANGES.txt                 |  3 +
 .../yarn/util/AbstractLivelinessMonitor.java    |  8 ++
 .../server/resourcemanager/ResourceManager.java |  2 +
 .../rmapp/attempt/AMLivelinessMonitor.java      |  6 ++
 .../rmapp/attempt/TestAMLivelinessMonitor.java  | 81 ++++++++++++++++++++
 5 files changed, 100 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hadoop/blob/a703952d/hadoop-yarn-project/CHANGES.txt
----------------------------------------------------------------------
diff --git a/hadoop-yarn-project/CHANGES.txt b/hadoop-yarn-project/CHANGES.txt
index d23fefa..915f476 100644
--- a/hadoop-yarn-project/CHANGES.txt
+++ b/hadoop-yarn-project/CHANGES.txt
@@ -72,6 +72,9 @@ Release 2.6.1 - UNRELEASED
     YARN-3103. AMRMClientImpl does not update AMRM token properly. (Jason Lowe
     via jianhe)
 
+    YARN-3094. Reset timer for liveness monitors after RM recovery. (Jun Gong
+    via jianhe)
+
 Release 2.6.0 - 2014-11-18
 
   INCOMPATIBLE CHANGES

http://git-wip-us.apache.org/repos/asf/hadoop/blob/a703952d/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/util/AbstractLivelinessMonitor.java
----------------------------------------------------------------------
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/util/AbstractLivelinessMonitor.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/util/AbstractLivelinessMonitor.java
index c182531..4f587b3 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/util/AbstractLivelinessMonitor.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/util/AbstractLivelinessMonitor.java
@@ -59,6 +59,7 @@ public abstract class AbstractLivelinessMonitor<O> extends AbstractService {
   @Override
   protected void serviceStart() throws Exception {
     assert !stopped : "starting when already stopped";
+    resetTimer();
     checkerThread = new Thread(new PingChecker());
     checkerThread.setName("Ping Checker");
     checkerThread.start();
@@ -99,6 +100,13 @@ public abstract class AbstractLivelinessMonitor<O> extends AbstractService {
     running.remove(ob);
   }
 
+  public synchronized void resetTimer() {
+    long time = clock.getTime();
+    for (O ob : running.keySet()) {
+      running.put(ob, time);
+    }
+  }
+
   private class PingChecker implements Runnable {
 
     @Override

http://git-wip-us.apache.org/repos/asf/hadoop/blob/a703952d/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ResourceManager.java
----------------------------------------------------------------------
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ResourceManager.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ResourceManager.java
index 3ce42a3..ea762c0 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ResourceManager.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ResourceManager.java
@@ -571,12 +571,14 @@ public class ResourceManager extends CompositeService implements Recoverable {
 
       if(recoveryEnabled) {
         try {
+          LOG.info("Recovery started");
           rmStore.checkVersion();
           if (rmContext.isWorkPreservingRecoveryEnabled()) {
             rmContext.setEpoch(rmStore.getAndIncrementEpoch());
           }
           RMState state = rmStore.loadState();
           recover(state);
+          LOG.info("Recovery ended");
         } catch (Exception e) {
           // the Exception from loadState() needs to be handled for
           // HA and we need to give up master status if we got fenced

http://git-wip-us.apache.org/repos/asf/hadoop/blob/a703952d/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/AMLivelinessMonitor.java
----------------------------------------------------------------------
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/AMLivelinessMonitor.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/AMLivelinessMonitor.java
index 2c1f7f1..76331bf 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/AMLivelinessMonitor.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/AMLivelinessMonitor.java
@@ -24,6 +24,7 @@ import org.apache.hadoop.yarn.conf.YarnConfiguration;
 import org.apache.hadoop.yarn.event.Dispatcher;
 import org.apache.hadoop.yarn.event.EventHandler;
 import org.apache.hadoop.yarn.util.AbstractLivelinessMonitor;
+import org.apache.hadoop.yarn.util.Clock;
 import org.apache.hadoop.yarn.util.SystemClock;
 
 public class AMLivelinessMonitor extends AbstractLivelinessMonitor<ApplicationAttemptId> {
@@ -35,6 +36,11 @@ public class AMLivelinessMonitor extends AbstractLivelinessMonitor<ApplicationAt
     this.dispatcher = d.getEventHandler();
   }
 
+  public AMLivelinessMonitor(Dispatcher d, Clock clock) {
+    super("AMLivelinessMonitor", clock);
+    this.dispatcher = d.getEventHandler();
+  }
+
   public void serviceInit(Configuration conf) throws Exception {
     super.serviceInit(conf);
     int expireIntvl = conf.getInt(YarnConfiguration.RM_AM_EXPIRY_INTERVAL_MS,

http://git-wip-us.apache.org/repos/asf/hadoop/blob/a703952d/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/TestAMLivelinessMonitor.java
----------------------------------------------------------------------
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/TestAMLivelinessMonitor.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/TestAMLivelinessMonitor.java
new file mode 100644
index 0000000..e0e6aee
--- /dev/null
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/TestAMLivelinessMonitor.java
@@ -0,0 +1,81 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt;
+
+import org.apache.hadoop.security.UserGroupInformation;
+import org.apache.hadoop.service.Service;
+import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
+import org.apache.hadoop.yarn.conf.YarnConfiguration;
+import org.apache.hadoop.yarn.event.Dispatcher;
+import org.apache.hadoop.yarn.server.resourcemanager.MockRM;
+import org.apache.hadoop.yarn.server.resourcemanager.recovery.MemoryRMStateStore;
+import org.apache.hadoop.yarn.util.ControlledClock;
+import org.apache.hadoop.yarn.util.SystemClock;
+import org.junit.Assert;
+import org.junit.Test;
+
+import static org.mockito.Mockito.mock;
+
+public class TestAMLivelinessMonitor {
+
+  @Test(timeout = 10000)
+  public void testResetTimer() throws Exception {
+    YarnConfiguration conf = new YarnConfiguration();
+    UserGroupInformation.setConfiguration(conf);
+    conf.set(YarnConfiguration.RECOVERY_ENABLED, "true");
+    conf.set(YarnConfiguration.RM_STORE, MemoryRMStateStore.class.getName());
+    conf.setBoolean(YarnConfiguration.RM_WORK_PRESERVING_RECOVERY_ENABLED, true);
+    conf.setInt(YarnConfiguration.RM_AM_EXPIRY_INTERVAL_MS, 6000);
+    final ControlledClock clock = new ControlledClock(new SystemClock());
+    clock.setTime(0);
+    MemoryRMStateStore memStore = new MemoryRMStateStore() {
+      @Override
+      public synchronized RMState loadState() throws Exception {
+        clock.setTime(8000);
+        return super.loadState();
+      }
+    };
+    memStore.init(conf);
+    final ApplicationAttemptId attemptId = mock(ApplicationAttemptId.class);
+    final Dispatcher dispatcher = mock(Dispatcher.class);
+    final boolean[] expired = new boolean[]{false};
+    final AMLivelinessMonitor monitor = new AMLivelinessMonitor(
+        dispatcher, clock) {
+      @Override
+      protected void expire(ApplicationAttemptId id) {
+        Assert.assertEquals(id, attemptId);
+        expired[0] = true;
+      }
+    };
+    monitor.register(attemptId);
+    MockRM rm = new MockRM(conf, memStore) {
+      @Override
+      protected AMLivelinessMonitor createAMLivelinessMonitor() {
+        return monitor;
+      }
+    };
+    rm.start();
+    // make sure that monitor has started
+    while (monitor.getServiceState() != Service.STATE.STARTED) {
+      Thread.sleep(100);
+    }
+    // expired[0] would be set to true without resetTimer
+    Assert.assertFalse(expired[0]);
+    rm.stop();
+  }
+}