You are viewing a plain text version of this content. The canonical link for it is here.
Posted to common-commits@hadoop.apache.org by wa...@apache.org on 2015/09/04 23:42:27 UTC
[22/50] [abbrv] hadoop git commit: YARN-3893. Both RM in active state
when Admin#transitionToActive failure from refeshAll() (Bibin A Chundatt via
rohithsharmaks)
YARN-3893. Both RM in active state when Admin#transitionToActive failure from refeshAll() (Bibin A Chundatt via rohithsharmaks)
Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo
Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/7d6687fe
Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/7d6687fe
Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/7d6687fe
Branch: refs/heads/YARN-1197
Commit: 7d6687fe76f6152a577ff2298c358dd30fce41fb
Parents: 095ab9a
Author: Rohith Sharma K S <ro...@apache.org>
Authored: Wed Sep 2 15:22:48 2015 +0530
Committer: Rohith Sharma K S <ro...@apache.org>
Committed: Wed Sep 2 15:22:48 2015 +0530
----------------------------------------------------------------------
hadoop-yarn-project/CHANGES.txt | 3 +
.../server/resourcemanager/AdminService.java | 20 ++++--
.../resourcemanager/RMFatalEventType.java | 5 +-
.../yarn/server/resourcemanager/TestRMHA.java | 71 ++++++++++++++++++++
4 files changed, 94 insertions(+), 5 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hadoop/blob/7d6687fe/hadoop-yarn-project/CHANGES.txt
----------------------------------------------------------------------
diff --git a/hadoop-yarn-project/CHANGES.txt b/hadoop-yarn-project/CHANGES.txt
index aade2d7..13fe9b0 100644
--- a/hadoop-yarn-project/CHANGES.txt
+++ b/hadoop-yarn-project/CHANGES.txt
@@ -868,6 +868,9 @@ Release 2.7.2 - UNRELEASED
YARN-3857: Memory leak in ResourceManager with SIMPLE mode.
(mujunchao via zxu)
+ YARN-3893. Both RM in active state when Admin#transitionToActive failure
+ from refeshAll() (Bibin A Chundatt via rohithsharmaks)
+
Release 2.7.1 - 2015-07-06
INCOMPATIBLE CHANGES
http://git-wip-us.apache.org/repos/asf/hadoop/blob/7d6687fe/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/AdminService.java
----------------------------------------------------------------------
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/AdminService.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/AdminService.java
index 20c9800..d96ed8c 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/AdminService.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/AdminService.java
@@ -297,6 +297,7 @@ public class AdminService extends CompositeService implements
}
}
+ @SuppressWarnings("unchecked")
@Override
public synchronized void transitionToActive(
HAServiceProtocol.StateChangeRequestInfo reqInfo) throws IOException {
@@ -312,10 +313,6 @@ public class AdminService extends CompositeService implements
checkHaStateChange(reqInfo);
try {
rm.transitionToActive();
- // call all refresh*s for active RM to get the updated configurations.
- refreshAll();
- RMAuditLogger.logSuccess(user.getShortUserName(),
- "transitionToActive", "RMHAProtocolService");
} catch (Exception e) {
RMAuditLogger.logFailure(user.getShortUserName(), "transitionToActive",
"", "RMHAProtocolService",
@@ -323,6 +320,21 @@ public class AdminService extends CompositeService implements
throw new ServiceFailedException(
"Error when transitioning to Active mode", e);
}
+ try {
+ // call all refresh*s for active RM to get the updated configurations.
+ refreshAll();
+ } catch (Exception e) {
+ LOG.error("RefreshAll failed so firing fatal event", e);
+ rmContext
+ .getDispatcher()
+ .getEventHandler()
+ .handle(
+ new RMFatalEvent(RMFatalEventType.TRANSITION_TO_ACTIVE_FAILED, e));
+ throw new ServiceFailedException(
+ "Error on refreshAll during transistion to Active", e);
+ }
+ RMAuditLogger.logSuccess(user.getShortUserName(), "transitionToActive",
+ "RMHAProtocolService");
}
@Override
http://git-wip-us.apache.org/repos/asf/hadoop/blob/7d6687fe/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/RMFatalEventType.java
----------------------------------------------------------------------
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/RMFatalEventType.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/RMFatalEventType.java
index 789c018..87cc496 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/RMFatalEventType.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/RMFatalEventType.java
@@ -26,5 +26,8 @@ public enum RMFatalEventType {
STATE_STORE_OP_FAILED,
// Source <- Embedded Elector
- EMBEDDED_ELECTOR_FAILED
+ EMBEDDED_ELECTOR_FAILED,
+
+ // Source <- Admin Service
+ TRANSITION_TO_ACTIVE_FAILED
}
http://git-wip-us.apache.org/repos/asf/hadoop/blob/7d6687fe/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestRMHA.java
----------------------------------------------------------------------
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestRMHA.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestRMHA.java
index 0200e85..62cfe84 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestRMHA.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestRMHA.java
@@ -43,6 +43,8 @@ import org.apache.hadoop.service.AbstractService;
import org.apache.hadoop.yarn.conf.HAUtil;
import org.apache.hadoop.yarn.conf.YarnConfiguration;
import org.apache.hadoop.yarn.event.Dispatcher;
+import org.apache.hadoop.yarn.event.DrainDispatcher;
+import org.apache.hadoop.yarn.event.Event;
import org.apache.hadoop.yarn.event.EventHandler;
import org.apache.hadoop.yarn.exceptions.YarnRuntimeException;
import org.apache.hadoop.yarn.server.resourcemanager.recovery.records.ApplicationStateData;
@@ -52,6 +54,7 @@ import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp;
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttempt;
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttemptState;
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.QueueMetrics;
+import org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.CapacitySchedulerConfiguration;
import org.codehaus.jettison.json.JSONException;
import org.codehaus.jettison.json.JSONObject;
import org.junit.Assert;
@@ -577,6 +580,56 @@ public class TestRMHA {
assertEquals(0, rm.getRMContext().getRMApps().size());
}
+ @Test(timeout = 90000)
+ public void testTransitionedToActiveRefreshFail() throws Exception {
+ configuration.setBoolean(YarnConfiguration.AUTO_FAILOVER_ENABLED, false);
+ YarnConfiguration conf = new YarnConfiguration(configuration);
+ configuration = new CapacitySchedulerConfiguration(conf);
+ rm = new MockRM(configuration) {
+ @Override
+ protected AdminService createAdminService() {
+ return new AdminService(this, getRMContext()) {
+ @Override
+ protected void setConfig(Configuration conf) {
+ super.setConfig(configuration);
+ }
+ };
+ }
+
+ @Override
+ protected Dispatcher createDispatcher() {
+ return new FailFastDispatcher();
+ }
+ };
+
+ rm.init(configuration);
+ rm.start();
+ final StateChangeRequestInfo requestInfo =
+ new StateChangeRequestInfo(
+ HAServiceProtocol.RequestSource.REQUEST_BY_USER);
+
+ configuration.set("yarn.scheduler.capacity.root.default.capacity", "100");
+ rm.adminService.transitionToStandby(requestInfo);
+ assertEquals(HAServiceState.STANDBY, rm.getRMContext().getHAServiceState());
+ configuration.set("yarn.scheduler.capacity.root.default.capacity", "200");
+ try {
+ rm.adminService.transitionToActive(requestInfo);
+ } catch (Exception e) {
+ assertTrue("Error on refreshAll during transistion to Active".contains(e
+ .getMessage()));
+ }
+ FailFastDispatcher dispatcher =
+ ((FailFastDispatcher) rm.rmContext.getDispatcher());
+ dispatcher.await();
+ assertEquals(1, dispatcher.getEventCount());
+ // Making correct conf and check the state
+ configuration.set("yarn.scheduler.capacity.root.default.capacity", "100");
+ rm.adminService.transitionToActive(requestInfo);
+ assertEquals(HAServiceState.ACTIVE, rm.getRMContext().getHAServiceState());
+ rm.adminService.transitionToStandby(requestInfo);
+ assertEquals(HAServiceState.STANDBY, rm.getRMContext().getHAServiceState());
+ }
+
public void innerTestHAWithRMHostName(boolean includeBindHost) {
//this is run two times, with and without a bind host configured
if (includeBindHost) {
@@ -713,4 +766,22 @@ public class TestRMHA {
return this.stopped;
}
}
+
+ class FailFastDispatcher extends DrainDispatcher {
+ int eventreceived = 0;
+
+ @SuppressWarnings("rawtypes")
+ @Override
+ protected void dispatch(Event event) {
+ if (event.getType() == RMFatalEventType.TRANSITION_TO_ACTIVE_FAILED) {
+ eventreceived++;
+ } else {
+ super.dispatch(event);
+ }
+ }
+
+ public int getEventCount() {
+ return eventreceived;
+ }
+ }
}