You are viewing a plain text version of this content. The canonical link for it is here.
Posted to yarn-commits@hadoop.apache.org by vi...@apache.org on 2014/04/29 21:49:45 UTC

svn commit: r1591071 - in /hadoop/common/trunk/hadoop-yarn-project: ./ hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-reso...

Author: vinodkv
Date: Tue Apr 29 19:49:44 2014
New Revision: 1591071

URL: http://svn.apache.org/r1591071
Log:
YARN-1929. Fixed a deadlock in ResourceManager that occurs when failover happens right at the time of shutdown. Contributed by Karthik Kambatla.

Added:
    hadoop/common/trunk/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestRMEmbeddedElector.java
Modified:
    hadoop/common/trunk/hadoop-yarn-project/CHANGES.txt
    hadoop/common/trunk/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/AdminService.java
    hadoop/common/trunk/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/EmbeddedElectorService.java

Modified: hadoop/common/trunk/hadoop-yarn-project/CHANGES.txt
URL: http://svn.apache.org/viewvc/hadoop/common/trunk/hadoop-yarn-project/CHANGES.txt?rev=1591071&r1=1591070&r2=1591071&view=diff
==============================================================================
--- hadoop/common/trunk/hadoop-yarn-project/CHANGES.txt (original)
+++ hadoop/common/trunk/hadoop-yarn-project/CHANGES.txt Tue Apr 29 19:49:44 2014
@@ -164,6 +164,9 @@ Release 2.4.1 - UNRELEASED
     YARN-1975. Used resources shows escaped html in CapacityScheduler and
     FairScheduler page (Mit Desai via jlowe)
 
+    YARN-1929. Fixed a deadlock in ResourceManager that occurs when failover
+    happens right at the time of shutdown. (Karthik Kambatla via vinodkv)
+
 Release 2.4.0 - 2014-04-07 
 
   INCOMPATIBLE CHANGES

Modified: hadoop/common/trunk/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/AdminService.java
URL: http://svn.apache.org/viewvc/hadoop/common/trunk/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/AdminService.java?rev=1591071&r1=1591070&r2=1591071&view=diff
==============================================================================
--- hadoop/common/trunk/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/AdminService.java (original)
+++ hadoop/common/trunk/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/AdminService.java Tue Apr 29 19:49:44 2014
@@ -101,7 +101,7 @@ public class AdminService extends Compos
   }
 
   @Override
-  public synchronized void serviceInit(Configuration conf) throws Exception {
+  public void serviceInit(Configuration conf) throws Exception {
     if (rmContext.isHAEnabled()) {
       autoFailoverEnabled = HAUtil.isAutomaticFailoverEnabled(conf);
       if (autoFailoverEnabled) {
@@ -123,13 +123,13 @@ public class AdminService extends Compos
   }
 
   @Override
-  protected synchronized void serviceStart() throws Exception {
+  protected void serviceStart() throws Exception {
     startServer();
     super.serviceStart();
   }
 
   @Override
-  protected synchronized void serviceStop() throws Exception {
+  protected void serviceStop() throws Exception {
     stopServer();
     super.serviceStop();
   }

Modified: hadoop/common/trunk/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/EmbeddedElectorService.java
URL: http://svn.apache.org/viewvc/hadoop/common/trunk/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/EmbeddedElectorService.java?rev=1591071&r1=1591070&r2=1591071&view=diff
==============================================================================
--- hadoop/common/trunk/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/EmbeddedElectorService.java (original)
+++ hadoop/common/trunk/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/EmbeddedElectorService.java Tue Apr 29 19:49:44 2014
@@ -61,7 +61,7 @@ public class EmbeddedElectorService exte
   }
 
   @Override
-  protected synchronized void serviceInit(Configuration conf)
+  protected void serviceInit(Configuration conf)
       throws Exception {
     conf = conf instanceof YarnConfiguration ? conf : new YarnConfiguration(conf);
 
@@ -102,20 +102,20 @@ public class EmbeddedElectorService exte
   }
 
   @Override
-  protected synchronized void serviceStart() throws Exception {
+  protected void serviceStart() throws Exception {
     elector.joinElection(localActiveNodeInfo);
     super.serviceStart();
   }
 
   @Override
-  protected synchronized void serviceStop() throws Exception {
+  protected void serviceStop() throws Exception {
     elector.quitElection(false);
     elector.terminateConnection();
     super.serviceStop();
   }
 
   @Override
-  public synchronized void becomeActive() throws ServiceFailedException {
+  public void becomeActive() throws ServiceFailedException {
     try {
       rmContext.getRMAdminService().transitionToActive(req);
     } catch (Exception e) {
@@ -124,7 +124,7 @@ public class EmbeddedElectorService exte
   }
 
   @Override
-  public synchronized void becomeStandby() {
+  public void becomeStandby() {
     try {
       rmContext.getRMAdminService().transitionToStandby(req);
     } catch (Exception e) {
@@ -143,13 +143,13 @@ public class EmbeddedElectorService exte
 
   @SuppressWarnings(value = "unchecked")
   @Override
-  public synchronized void notifyFatalError(String errorMessage) {
+  public void notifyFatalError(String errorMessage) {
     rmContext.getDispatcher().getEventHandler().handle(
         new RMFatalEvent(RMFatalEventType.EMBEDDED_ELECTOR_FAILED, errorMessage));
   }
 
   @Override
-  public synchronized void fenceOldActive(byte[] oldActiveData) {
+  public void fenceOldActive(byte[] oldActiveData) {
     if (LOG.isDebugEnabled()) {
       LOG.debug("Request to fence old active being ignored, " +
           "as embedded leader election doesn't support fencing");
@@ -166,7 +166,7 @@ public class EmbeddedElectorService exte
         .toByteArray();
   }
 
-  private synchronized boolean isParentZnodeSafe(String clusterId)
+  private boolean isParentZnodeSafe(String clusterId)
       throws InterruptedException, IOException, KeeperException {
     byte[] data;
     try {

Added: hadoop/common/trunk/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestRMEmbeddedElector.java
URL: http://svn.apache.org/viewvc/hadoop/common/trunk/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestRMEmbeddedElector.java?rev=1591071&view=auto
==============================================================================
--- hadoop/common/trunk/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestRMEmbeddedElector.java (added)
+++ hadoop/common/trunk/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestRMEmbeddedElector.java Tue Apr 29 19:49:44 2014
@@ -0,0 +1,138 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.yarn.server.resourcemanager;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.ha.ClientBaseWithFixes;
+import org.apache.hadoop.ha.ServiceFailedException;
+import org.apache.hadoop.yarn.conf.HAUtil;
+import org.apache.hadoop.yarn.conf.YarnConfiguration;
+import org.junit.Before;
+import org.junit.Test;
+
+import java.io.IOException;
+import java.util.concurrent.atomic.AtomicBoolean;
+
+public class TestRMEmbeddedElector extends ClientBaseWithFixes {
+  private static final Log LOG =
+      LogFactory.getLog(TestRMEmbeddedElector.class.getName());
+
+  private static final String RM1_NODE_ID = "rm1";
+  private static final int RM1_PORT_BASE = 10000;
+  private static final String RM2_NODE_ID = "rm2";
+  private static final int RM2_PORT_BASE = 20000;
+
+  private Configuration conf;
+  private AtomicBoolean callbackCalled;
+
+  private void setConfForRM(String rmId, String prefix, String value) {
+    conf.set(HAUtil.addSuffix(prefix, rmId), value);
+  }
+
+  private void setRpcAddressForRM(String rmId, int base) {
+    setConfForRM(rmId, YarnConfiguration.RM_ADDRESS, "0.0.0.0:" +
+        (base + YarnConfiguration.DEFAULT_RM_PORT));
+    setConfForRM(rmId, YarnConfiguration.RM_SCHEDULER_ADDRESS, "0.0.0.0:" +
+        (base + YarnConfiguration.DEFAULT_RM_SCHEDULER_PORT));
+    setConfForRM(rmId, YarnConfiguration.RM_ADMIN_ADDRESS, "0.0.0.0:" +
+        (base + YarnConfiguration.DEFAULT_RM_ADMIN_PORT));
+    setConfForRM(rmId, YarnConfiguration.RM_RESOURCE_TRACKER_ADDRESS, "0.0.0.0:" +
+        (base + YarnConfiguration.DEFAULT_RM_RESOURCE_TRACKER_PORT));
+    setConfForRM(rmId, YarnConfiguration.RM_WEBAPP_ADDRESS, "0.0.0.0:" +
+        (base + YarnConfiguration.DEFAULT_RM_WEBAPP_PORT));
+    setConfForRM(rmId, YarnConfiguration.RM_WEBAPP_HTTPS_ADDRESS, "0.0.0.0:" +
+        (base + YarnConfiguration.DEFAULT_RM_WEBAPP_HTTPS_PORT));
+  }
+
+  @Before
+  public void setup() throws IOException {
+    conf = new YarnConfiguration();
+    conf.setBoolean(YarnConfiguration.RM_HA_ENABLED, true);
+    conf.setBoolean(YarnConfiguration.AUTO_FAILOVER_ENABLED, true);
+    conf.setBoolean(YarnConfiguration.AUTO_FAILOVER_EMBEDDED, true);
+    conf.set(YarnConfiguration.RM_CLUSTER_ID, "yarn-test-cluster");
+    conf.set(YarnConfiguration.RM_ZK_ADDRESS, hostPort);
+    conf.setInt(YarnConfiguration.RM_ZK_TIMEOUT_MS, 2000);
+
+    conf.set(YarnConfiguration.RM_HA_IDS, RM1_NODE_ID + "," + RM2_NODE_ID);
+    conf.set(YarnConfiguration.RM_HA_ID, RM1_NODE_ID);
+    setRpcAddressForRM(RM1_NODE_ID, RM1_PORT_BASE);
+    setRpcAddressForRM(RM2_NODE_ID, RM2_PORT_BASE);
+
+    conf.setLong(YarnConfiguration.CLIENT_FAILOVER_SLEEPTIME_BASE_MS, 100L);
+
+    callbackCalled = new AtomicBoolean(false);
+  }
+
+  /**
+   * Test that tries to see if there is a deadlock between
+   * (a) the thread stopping the RM
+   * (b) thread processing the ZK event asking RM to transition to active
+   *
+   * The test times out if there is a deadlock.
+   */
+  @Test (timeout = 10000)
+  public void testDeadlockShutdownBecomeActive() throws InterruptedException {
+    MockRM rm = new MockRMWithElector(conf, 1000);
+    rm.start();
+    LOG.info("Waiting for callback");
+    while (!callbackCalled.get());
+    LOG.info("Stopping RM");
+    rm.stop();
+    LOG.info("Stopped RM");
+  }
+
+  private class MockRMWithElector extends MockRM {
+    private long delayMs = 0;
+
+    MockRMWithElector(Configuration conf) {
+      super(conf);
+    }
+
+    MockRMWithElector(Configuration conf, long delayMs) {
+      this(conf);
+      this.delayMs = delayMs;
+    }
+
+    @Override
+    protected AdminService createAdminService() {
+      return new AdminService(MockRMWithElector.this, getRMContext()) {
+        @Override
+        protected EmbeddedElectorService createEmbeddedElectorService() {
+          return new EmbeddedElectorService(getRMContext()) {
+            @Override
+            public void becomeActive() throws
+                ServiceFailedException {
+              try {
+                callbackCalled.set(true);
+                LOG.info("Callback called. Sleeping now");
+                Thread.sleep(delayMs);
+                LOG.info("Sleep done");
+              } catch (InterruptedException e) {
+                e.printStackTrace();
+              }
+              super.becomeActive();
+            }
+          };
+        }
+      };
+    }
+  }
+}