You are viewing a plain text version of this content. The canonical link for it is here.
Posted to yarn-commits@hadoop.apache.org by vi...@apache.org on 2014/05/13 22:00:44 UTC
svn commit: r1594356 - in /hadoop/common/trunk/hadoop-yarn-project: ./
hadoop-yarn/hadoop-yarn-client/src/test/java/org/apache/hadoop/yarn/client/
hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/se...
Author: vinodkv
Date: Tue May 13 20:00:44 2014
New Revision: 1594356
URL: http://svn.apache.org/r1594356
Log:
YARN-1861. Fixed a bug in RM to reset leader-election on fencing that was causing both RMs to be stuck in standby mode when automatic failover is enabled. Contributed by Karthik Kambatla and Xuan Gong.
Modified:
hadoop/common/trunk/hadoop-yarn-project/CHANGES.txt
hadoop/common/trunk/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/test/java/org/apache/hadoop/yarn/client/TestRMFailover.java
hadoop/common/trunk/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/AdminService.java
hadoop/common/trunk/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/EmbeddedElectorService.java
hadoop/common/trunk/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ResourceManager.java
hadoop/common/trunk/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-tests/src/test/java/org/apache/hadoop/yarn/server/MiniYARNCluster.java
Modified: hadoop/common/trunk/hadoop-yarn-project/CHANGES.txt
URL: http://svn.apache.org/viewvc/hadoop/common/trunk/hadoop-yarn-project/CHANGES.txt?rev=1594356&r1=1594355&r2=1594356&view=diff
==============================================================================
--- hadoop/common/trunk/hadoop-yarn-project/CHANGES.txt (original)
+++ hadoop/common/trunk/hadoop-yarn-project/CHANGES.txt Tue May 13 20:00:44 2014
@@ -209,6 +209,10 @@ Release 2.4.1 - UNRELEASED
YARN-1201. TestAMAuthorization fails with local hostname cannot be resolved.
(Wangda Tan via junping_du)
+ YARN-1861. Fixed a bug in RM to reset leader-election on fencing that was
+ causing both RMs to be stuck in standby mode when automatic failover is
+ enabled. (Karthik Kambatla and Xuan Gong via vinodkv)
+
Release 2.4.0 - 2014-04-07
INCOMPATIBLE CHANGES
Modified: hadoop/common/trunk/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/test/java/org/apache/hadoop/yarn/client/TestRMFailover.java
URL: http://svn.apache.org/viewvc/hadoop/common/trunk/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/test/java/org/apache/hadoop/yarn/client/TestRMFailover.java?rev=1594356&r1=1594355&r2=1594356&view=diff
==============================================================================
--- hadoop/common/trunk/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/test/java/org/apache/hadoop/yarn/client/TestRMFailover.java (original)
+++ hadoop/common/trunk/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/test/java/org/apache/hadoop/yarn/client/TestRMFailover.java Tue May 13 20:00:44 2014
@@ -34,6 +34,7 @@ import org.apache.commons.logging.LogFac
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.ha.ClientBaseWithFixes;
import org.apache.hadoop.ha.HAServiceProtocol;
+import org.apache.hadoop.ha.HAServiceProtocol.HAServiceState;
import org.apache.hadoop.service.Service.STATE;
import org.apache.hadoop.yarn.api.records.ApplicationId;
import org.apache.hadoop.yarn.client.api.YarnClient;
@@ -42,6 +43,9 @@ import org.apache.hadoop.yarn.conf.YarnC
import org.apache.hadoop.yarn.exceptions.YarnException;
import org.apache.hadoop.yarn.server.MiniYARNCluster;
import org.apache.hadoop.yarn.server.resourcemanager.AdminService;
+import org.apache.hadoop.yarn.server.resourcemanager.RMFatalEvent;
+import org.apache.hadoop.yarn.server.resourcemanager.RMFatalEventType;
+import org.apache.hadoop.yarn.server.resourcemanager.ResourceManager;
import org.apache.hadoop.yarn.server.webproxy.WebAppProxyServer;
import org.junit.After;
import org.junit.Assert;
@@ -169,6 +173,7 @@ public class TestRMFailover extends Clie
verifyConnections();
}
+ @SuppressWarnings("unchecked")
@Test
public void testAutomaticFailover()
throws YarnException, InterruptedException, IOException {
@@ -186,6 +191,25 @@ public class TestRMFailover extends Clie
failover();
verifyConnections();
+
+ // Make the current Active handle an RMFatalEvent,
+ // so it transitions to standby.
+ ResourceManager rm = cluster.getResourceManager(
+ cluster.getActiveRMIndex());
+ RMFatalEvent event =
+ new RMFatalEvent(RMFatalEventType.STATE_STORE_FENCED,
+ "Fake RMFatalEvent");
+ rm.getRMContext().getDispatcher().getEventHandler().handle(event);
+ int maxWaitingAttempts = 2000;
+ while (maxWaitingAttempts-- > 0 ) {
+ if (rm.getRMContext().getHAServiceState() == HAServiceState.STANDBY) {
+ break;
+ }
+ Thread.sleep(1);
+ }
+ Assert.assertFalse("RM didn't transition to Standby ",
+ maxWaitingAttempts == 0);
+ verifyConnections();
}
@Test
Modified: hadoop/common/trunk/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/AdminService.java
URL: http://svn.apache.org/viewvc/hadoop/common/trunk/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/AdminService.java?rev=1594356&r1=1594355&r2=1594356&view=diff
==============================================================================
--- hadoop/common/trunk/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/AdminService.java (original)
+++ hadoop/common/trunk/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/AdminService.java Tue May 13 20:00:44 2014
@@ -26,6 +26,7 @@ import java.util.Set;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.CommonConfigurationKeysPublic;
import org.apache.hadoop.ha.HAServiceProtocol;
@@ -86,6 +87,7 @@ public class AdminService extends Compos
private String rmId;
private boolean autoFailoverEnabled;
+ private EmbeddedElectorService embeddedElector;
private Server server;
private InetSocketAddress masterServiceAddress;
@@ -106,7 +108,8 @@ public class AdminService extends Compos
autoFailoverEnabled = HAUtil.isAutomaticFailoverEnabled(conf);
if (autoFailoverEnabled) {
if (HAUtil.isAutomaticFailoverEmbedded(conf)) {
- addIfService(createEmbeddedElectorService());
+ embeddedElector = createEmbeddedElectorService();
+ addIfService(embeddedElector);
}
}
}
@@ -181,6 +184,13 @@ public class AdminService extends Compos
return new EmbeddedElectorService(rmContext);
}
+ @InterfaceAudience.Private
+ void resetLeaderElection() {
+ if (embeddedElector != null) {
+ embeddedElector.resetLeaderElection();
+ }
+ }
+
private UserGroupInformation checkAccess(String method) throws IOException {
return RMServerUtils.verifyAccess(adminAcl, method, LOG);
}
Modified: hadoop/common/trunk/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/EmbeddedElectorService.java
URL: http://svn.apache.org/viewvc/hadoop/common/trunk/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/EmbeddedElectorService.java?rev=1594356&r1=1594355&r2=1594356&view=diff
==============================================================================
--- hadoop/common/trunk/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/EmbeddedElectorService.java (original)
+++ hadoop/common/trunk/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/EmbeddedElectorService.java Tue May 13 20:00:44 2014
@@ -194,4 +194,9 @@ public class EmbeddedElectorService exte
}
return true;
}
+
+ public void resetLeaderElection() {
+ elector.quitElection(false);
+ elector.joinElection(localActiveNodeInfo);
+ }
}
Modified: hadoop/common/trunk/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ResourceManager.java
URL: http://svn.apache.org/viewvc/hadoop/common/trunk/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ResourceManager.java?rev=1594356&r1=1594355&r2=1594356&view=diff
==============================================================================
--- hadoop/common/trunk/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ResourceManager.java (original)
+++ hadoop/common/trunk/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ResourceManager.java Tue May 13 20:00:44 2014
@@ -664,6 +664,7 @@ public class ResourceManager extends Com
// Transition to standby and reinit active services
LOG.info("Transitioning RM to Standby mode");
rm.transitionToStandby(true);
+ rm.adminService.resetLeaderElection();
return;
} catch (Exception e) {
LOG.fatal("Failed to transition RM to Standby mode.");
Modified: hadoop/common/trunk/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-tests/src/test/java/org/apache/hadoop/yarn/server/MiniYARNCluster.java
URL: http://svn.apache.org/viewvc/hadoop/common/trunk/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-tests/src/test/java/org/apache/hadoop/yarn/server/MiniYARNCluster.java?rev=1594356&r1=1594355&r2=1594356&view=diff
==============================================================================
--- hadoop/common/trunk/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-tests/src/test/java/org/apache/hadoop/yarn/server/MiniYARNCluster.java (original)
+++ hadoop/common/trunk/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-tests/src/test/java/org/apache/hadoop/yarn/server/MiniYARNCluster.java Tue May 13 20:00:44 2014
@@ -652,12 +652,14 @@ public class MiniYARNCluster extends Com
*/
public boolean waitForNodeManagersToConnect(long timeout)
throws YarnException, InterruptedException {
- ResourceManager rm = getResourceManager();
GetClusterMetricsRequest req = GetClusterMetricsRequest.newInstance();
-
for (int i = 0; i < timeout / 100; i++) {
- if (nodeManagers.length == rm.getClientRMService().getClusterMetrics(req)
- .getClusterMetrics().getNumNodeManagers()) {
+ ResourceManager rm = getResourceManager();
+ if (rm == null) {
+ throw new YarnException("Can not find the active RM.");
+ }
+ else if (nodeManagers.length == rm.getClientRMService()
+ .getClusterMetrics(req).getClusterMetrics().getNumNodeManagers()) {
return true;
}
Thread.sleep(100);