You are viewing a plain text version of this content. The canonical link for it is here.
Posted to common-commits@hadoop.apache.org by vi...@apache.org on 2014/11/10 04:10:56 UTC
hadoop git commit: YARN-2834. Fixed ResourceManager to ignore
token-renewal failures on recovery consistent with the (somewhat incorrect)
behaviour in the non-recovery case. Contributed by Jian He. Fixed a minor
import issue in the test during cherry-pic
Repository: hadoop
Updated Branches:
refs/heads/branch-2.6 be31db82f -> 6a9534e9c
YARN-2834. Fixed ResourceManager to ignore token-renewal failures on recovery consistent with the (somewhat incorrect) behaviour in the non-recovery case. Contributed by Jian He.
Fixed a minor import issue in the test during cherry-pick from trunk.
(cherry picked from commit e76faebc9589654e83c8244ef9aff88391e56b80)
Conflicts:
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestWorkPreservingRMRestart.java
Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo
Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/6a9534e9
Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/6a9534e9
Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/6a9534e9
Branch: refs/heads/branch-2.6
Commit: 6a9534e9cf8ede9b8a2dd368ac1ea8ea2747def6
Parents: be31db8
Author: Vinod Kumar Vavilapalli <vi...@apache.org>
Authored: Sun Nov 9 18:56:06 2014 -0800
Committer: Vinod Kumar Vavilapalli <vi...@apache.org>
Committed: Sun Nov 9 19:09:48 2014 -0800
----------------------------------------------------------------------
hadoop-yarn-project/CHANGES.txt | 4 ++
.../server/resourcemanager/rmapp/RMAppImpl.java | 27 +++-------
.../rmapp/attempt/RMAppAttemptImpl.java | 4 +-
.../TestWorkPreservingRMRestart.java | 54 ++++++++++++++++++--
.../rmapp/TestRMAppTransitions.java | 28 ----------
5 files changed, 63 insertions(+), 54 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hadoop/blob/6a9534e9/hadoop-yarn-project/CHANGES.txt
----------------------------------------------------------------------
diff --git a/hadoop-yarn-project/CHANGES.txt b/hadoop-yarn-project/CHANGES.txt
index 51e8b34..06aa52b 100644
--- a/hadoop-yarn-project/CHANGES.txt
+++ b/hadoop-yarn-project/CHANGES.txt
@@ -853,6 +853,10 @@ Release 2.6.0 - 2014-11-15
YARN-2830. Add backwords compatible ContainerId.newInstance constructor.
(jeagles via acmurthy)
+ YARN-2834. Fixed ResourceManager to ignore token-renewal failures on recovery
+ consistent with the (somewhat incorrect) behaviour in the non-recovery case.
+ (Jian He via vinodkv)
+
Release 2.5.2 - UNRELEASED
INCOMPATIBLE CHANGES
http://git-wip-us.apache.org/repos/asf/hadoop/blob/6a9534e9/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/RMAppImpl.java
----------------------------------------------------------------------
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/RMAppImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/RMAppImpl.java
index 9b10872..ad92cc4 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/RMAppImpl.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/RMAppImpl.java
@@ -714,7 +714,7 @@ public class RMAppImpl implements RMApp, Recoverable {
}
@Override
- public void recover(RMState state) throws Exception{
+ public void recover(RMState state) {
ApplicationState appState = state.getApplicationState().get(getApplicationId());
this.recoveredFinalState = appState.getState();
LOG.info("Recovering app: " + getApplicationId() + " with " +
@@ -830,14 +830,7 @@ public class RMAppImpl implements RMApp, Recoverable {
public RMAppState transition(RMAppImpl app, RMAppEvent event) {
RMAppRecoverEvent recoverEvent = (RMAppRecoverEvent) event;
- try {
- app.recover(recoverEvent.getRMState());
- } catch (Exception e) {
- String msg = app.applicationId + " failed to recover. " + e.getMessage();
- failToRecoverApp(app, event, msg, e);
- return RMAppState.FINAL_SAVING;
- }
-
+ app.recover(recoverEvent.getRMState());
// The app has completed.
if (app.recoveredFinalState != null) {
app.recoverAppAttempts();
@@ -852,10 +845,10 @@ public class RMAppImpl implements RMApp, Recoverable {
app.getApplicationId(), app.parseCredentials(),
app.submissionContext.getCancelTokensWhenComplete(), app.getUser());
} catch (Exception e) {
- String msg = "Failed to renew delegation token on recovery for "
- + app.applicationId + e.getMessage();
- failToRecoverApp(app, event, msg, e);
- return RMAppState.FINAL_SAVING;
+ String msg = "Failed to renew token for " + app.applicationId
+ + " on recovery : " + e.getMessage();
+ app.diagnostics.append(msg);
+ LOG.error(msg, e);
}
}
@@ -892,14 +885,6 @@ public class RMAppImpl implements RMApp, Recoverable {
// Thus we return ACCECPTED state on recovery.
return RMAppState.ACCEPTED;
}
-
- private void failToRecoverApp(RMAppImpl app, RMAppEvent event, String msg,
- Exception e) {
- app.diagnostics.append(msg);
- LOG.error(msg, e);
- app.rememberTargetTransitionsAndStoreState(event, new FinalTransition(
- RMAppState.FAILED), RMAppState.FAILED, RMAppState.FAILED);
- }
}
private static final class AddApplicationToSchedulerTransition extends
http://git-wip-us.apache.org/repos/asf/hadoop/blob/6a9534e9/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/RMAppAttemptImpl.java
----------------------------------------------------------------------
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/RMAppAttemptImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/RMAppAttemptImpl.java
index d3fe151..0d7e334 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/RMAppAttemptImpl.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/RMAppAttemptImpl.java
@@ -789,7 +789,7 @@ public class RMAppAttemptImpl implements RMAppAttempt, Recoverable {
}
@Override
- public void recover(RMState state) throws Exception {
+ public void recover(RMState state) {
ApplicationState appState =
state.getApplicationState().get(getAppAttemptId().getApplicationId());
ApplicationAttemptState attemptState =
@@ -823,7 +823,7 @@ public class RMAppAttemptImpl implements RMAppAttempt, Recoverable {
}
private void recoverAppAttemptCredentials(Credentials appAttemptTokens,
- RMAppAttemptState state) throws IOException {
+ RMAppAttemptState state) {
if (appAttemptTokens == null || state == RMAppAttemptState.FAILED
|| state == RMAppAttemptState.FINISHED
|| state == RMAppAttemptState.KILLED) {
http://git-wip-us.apache.org/repos/asf/hadoop/blob/6a9534e9/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestWorkPreservingRMRestart.java
----------------------------------------------------------------------
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestWorkPreservingRMRestart.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestWorkPreservingRMRestart.java
index f0b1278..73bbd81 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestWorkPreservingRMRestart.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestWorkPreservingRMRestart.java
@@ -18,13 +18,12 @@
package org.apache.hadoop.yarn.server.resourcemanager;
-import org.apache.hadoop.security.token.Token;
-import org.apache.hadoop.yarn.security.AMRMTokenIdentifier;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertFalse;
import static org.junit.Assert.assertNull;
import static org.junit.Assert.assertTrue;
+import java.io.IOException;
import java.net.UnknownHostException;
import java.util.ArrayList;
import java.util.Arrays;
@@ -33,9 +32,10 @@ import java.util.List;
import java.util.Map;
import java.util.Set;
+import org.apache.hadoop.fs.CommonConfigurationKeysPublic;
import org.apache.hadoop.metrics2.lib.DefaultMetricsSystem;
+import org.apache.hadoop.security.Credentials;
import org.apache.hadoop.security.UserGroupInformation;
-import org.apache.hadoop.service.Service;
import org.apache.hadoop.test.GenericTestUtils;
import org.apache.hadoop.yarn.api.protocolrecords.AllocateResponse;
import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
@@ -48,6 +48,7 @@ import org.apache.hadoop.yarn.api.records.Resource;
import org.apache.hadoop.yarn.api.records.ResourceRequest;
import org.apache.hadoop.yarn.conf.YarnConfiguration;
import org.apache.hadoop.yarn.server.api.protocolrecords.NMContainerStatus;
+import org.apache.hadoop.yarn.server.resourcemanager.TestRMRestart.TestSecurityMockRM;
import org.apache.hadoop.yarn.server.resourcemanager.recovery.MemoryRMStateStore;
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp;
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppState;
@@ -68,6 +69,7 @@ import org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.LeafQueu
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.ParentQueue;
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.fair.FairScheduler;
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.fifo.FifoScheduler;
+import org.apache.hadoop.yarn.server.resourcemanager.security.DelegationTokenRenewer;
import org.apache.hadoop.yarn.util.ControlledClock;
import org.apache.hadoop.yarn.util.SystemClock;
import org.apache.hadoop.yarn.util.resource.DominantResourceCalculator;
@@ -935,4 +937,50 @@ public class TestWorkPreservingRMRestart {
am0.unregisterAppAttempt(false);
}
+ @Test (timeout = 30000)
+ public void testAppFailedToRenewTokenOnRecovery() throws Exception {
+ conf.set(CommonConfigurationKeysPublic.HADOOP_SECURITY_AUTHENTICATION,
+ "kerberos");
+ conf.setInt(YarnConfiguration.RM_AM_MAX_ATTEMPTS, 1);
+ UserGroupInformation.setConfiguration(conf);
+ MemoryRMStateStore memStore = new MemoryRMStateStore();
+ memStore.init(conf);
+ MockRM rm1 = new TestSecurityMockRM(conf, memStore);
+ rm1.start();
+ MockNM nm1 =
+ new MockNM("127.0.0.1:1234", 8192, rm1.getResourceTrackerService());
+ nm1.registerNode();
+ RMApp app1 = rm1.submitApp(200);
+ MockAM am1 = MockRM.launchAndRegisterAM(app1, rm1, nm1);
+
+ MockRM rm2 = new TestSecurityMockRM(conf, memStore) {
+ protected DelegationTokenRenewer createDelegationTokenRenewer() {
+ return new DelegationTokenRenewer() {
+ @Override
+ public void addApplicationSync(ApplicationId applicationId,
+ Credentials ts, boolean shouldCancelAtEnd, String user)
+ throws IOException {
+ throw new IOException("Token renew failed !!");
+ }
+ };
+ }
+ };
+ nm1.setResourceTrackerService(rm2.getResourceTrackerService());
+ rm2.start();
+ NMContainerStatus containerStatus =
+ TestRMRestart.createNMContainerStatus(am1.getApplicationAttemptId(), 1,
+ ContainerState.RUNNING);
+ nm1.registerNode(Arrays.asList(containerStatus), null);
+
+ // am re-register
+ rm2.waitForState(app1.getApplicationId(), RMAppState.ACCEPTED);
+ am1.setAMRMProtocol(rm2.getApplicationMasterService(), rm2.getRMContext());
+ am1.registerAppAttempt(true);
+ rm2.waitForState(app1.getApplicationId(), RMAppState.RUNNING);
+
+ // Because the token expired, am could crash.
+ nm1.nodeHeartbeat(am1.getApplicationAttemptId(), 1, ContainerState.COMPLETE);
+ rm2.waitForState(am1.getApplicationAttemptId(), RMAppAttemptState.FAILED);
+ rm2.waitForState(app1.getApplicationId(), RMAppState.FAILED);
+ }
}
http://git-wip-us.apache.org/repos/asf/hadoop/blob/6a9534e9/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/TestRMAppTransitions.java
----------------------------------------------------------------------
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/TestRMAppTransitions.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/TestRMAppTransitions.java
index ecb6b5c..bbfb0ee 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/TestRMAppTransitions.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/TestRMAppTransitions.java
@@ -540,34 +540,6 @@ public class TestRMAppTransitions {
}
@Test (timeout = 30000)
- public void testAppRecoverToFailed() throws IOException {
- LOG.info("--- START: testAppRecoverToFailed ---");
- ApplicationSubmissionContext sub =
- Records.newRecord(ApplicationSubmissionContext.class);
- ContainerLaunchContext clc =
- Records.newRecord(ContainerLaunchContext.class);
- Credentials credentials = new Credentials();
- DataOutputBuffer dob = new DataOutputBuffer();
- credentials.writeTokenStorageToStream(dob);
- ByteBuffer securityTokens =
- ByteBuffer.wrap(dob.getData(), 0, dob.getLength());
- clc.setTokens(securityTokens);
- sub.setAMContainerSpec(clc);
-
- RMApp application = createNewTestApp(sub);
- // NEW => FINAL_SAVING, event RMAppEventType.RECOVER
- RMState state = new RMState();
- RMAppEvent event =
- new RMAppRecoverEvent(application.getApplicationId(), state);
- // NPE will throw on recovery.
- application.handle(event);
- assertAppState(RMAppState.FINAL_SAVING, application);
- sendAppUpdateSavedEvent(application);
- rmDispatcher.await();
- assertAppState(RMAppState.FAILED, application);
- }
-
- @Test (timeout = 30000)
public void testAppNewKill() throws IOException {
LOG.info("--- START: testAppNewKill ---");