You are viewing a plain text version of this content. The canonical link for it is here.
Posted to common-commits@hadoop.apache.org by ac...@apache.org on 2014/11/13 19:47:49 UTC
[1/5] hadoop git commit: YARN-2635. TestRM, TestRMRestart,
TestClientToAMTokens should run with both CS and FS. (Wei Yan and
kasha via kasha)
Repository: hadoop
Updated Branches:
refs/heads/branch-2.6.0 a672d7566 -> a97e9c023
YARN-2635. TestRM, TestRMRestart, TestClientToAMTokens should run with both CS and FS. (Wei Yan and kasha via kasha)
(cherry picked from commit 80d11eb68e60f88e16d7d41edecbddfc935a6b10)
Conflicts:
hadoop-yarn-project/CHANGES.txt
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/security/TestClientToAMTokens.java
Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo
Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/33031c9c
Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/33031c9c
Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/33031c9c
Branch: refs/heads/branch-2.6.0
Commit: 33031c9cc709b61c86b1df7bea20b1ab6a23eebb
Parents: a672d75
Author: Karthik Kambatla <ka...@apache.org>
Authored: Thu Oct 2 23:20:29 2014 -0700
Committer: Arun C. Murthy <ac...@apache.org>
Committed: Thu Nov 13 10:41:33 2014 -0800
----------------------------------------------------------------------
hadoop-yarn-project/CHANGES.txt | 2 -
.../ParameterizedSchedulerTestBase.java | 92 +++++++
.../yarn/server/resourcemanager/TestRM.java | 40 ++-
.../server/resourcemanager/TestRMRestart.java | 253 ++++++++++---------
.../security/TestClientToAMTokens.java | 18 +-
5 files changed, 253 insertions(+), 152 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hadoop/blob/33031c9c/hadoop-yarn-project/CHANGES.txt
----------------------------------------------------------------------
diff --git a/hadoop-yarn-project/CHANGES.txt b/hadoop-yarn-project/CHANGES.txt
index c809fd0..e42c9b2 100644
--- a/hadoop-yarn-project/CHANGES.txt
+++ b/hadoop-yarn-project/CHANGES.txt
@@ -6,8 +6,6 @@ Release 2.6.0 - 2014-11-15
NEW FEATURES
- YARN-1964. Create Docker analog of the LinuxContainerExecutor in YARN. (Abin
- Shahab via raviprak)
YARN-2131. Add a way to format the RMStateStore. (Robert Kanter via kasha)
http://git-wip-us.apache.org/repos/asf/hadoop/blob/33031c9c/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/ParameterizedSchedulerTestBase.java
----------------------------------------------------------------------
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/ParameterizedSchedulerTestBase.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/ParameterizedSchedulerTestBase.java
new file mode 100644
index 0000000..cfd1600
--- /dev/null
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/ParameterizedSchedulerTestBase.java
@@ -0,0 +1,92 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.yarn.server.resourcemanager;
+
+import org.apache.hadoop.yarn.conf.YarnConfiguration;
+import org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.CapacityScheduler;
+import org.apache.hadoop.yarn.server.resourcemanager.scheduler.fair.FairScheduler;
+import org.apache.hadoop.yarn.server.resourcemanager.scheduler.fair.FairSchedulerConfiguration;
+
+
+import org.junit.Before;
+import org.junit.runner.RunWith;
+import org.junit.runners.Parameterized;
+
+import java.io.File;
+import java.io.FileWriter;
+import java.io.IOException;
+import java.io.PrintWriter;
+import java.util.Arrays;
+import java.util.Collection;
+
+@RunWith(Parameterized.class)
+public abstract class ParameterizedSchedulerTestBase {
+ protected final static String TEST_DIR =
+ new File(System.getProperty("test.build.data", "/tmp")).getAbsolutePath();
+ private final static String FS_ALLOC_FILE =
+ new File(TEST_DIR, "test-fs-queues.xml").getAbsolutePath();
+
+ private SchedulerType schedulerType;
+ private YarnConfiguration conf = null;
+
+ public enum SchedulerType {
+ CAPACITY, FAIR
+ }
+
+ public ParameterizedSchedulerTestBase(SchedulerType type) {
+ schedulerType = type;
+ }
+
+ public YarnConfiguration getConf() {
+ return conf;
+ }
+
+ @Parameterized.Parameters
+ public static Collection<SchedulerType[]> getParameters() {
+ return Arrays.asList(new SchedulerType[][]{
+ {SchedulerType.CAPACITY}, {SchedulerType.FAIR}});
+ }
+
+ @Before
+ public void configureScheduler() throws IOException {
+ conf = new YarnConfiguration();
+ switch (schedulerType) {
+ case CAPACITY:
+ conf.set(YarnConfiguration.RM_SCHEDULER,
+ CapacityScheduler.class.getName());
+ break;
+ case FAIR:
+ configureFairScheduler(conf);
+ break;
+ }
+ }
+
+ private void configureFairScheduler(YarnConfiguration conf) throws IOException {
+ // Disable queueMaxAMShare limitation for fair scheduler
+ PrintWriter out = new PrintWriter(new FileWriter(FS_ALLOC_FILE));
+ out.println("<?xml version=\"1.0\"?>");
+ out.println("<allocations>");
+ out.println("<queueMaxAMShareDefault>-1.0</queueMaxAMShareDefault>");
+ out.println("</allocations>");
+ out.close();
+
+ conf.set(YarnConfiguration.RM_SCHEDULER, FairScheduler.class.getName());
+ conf.set(FairSchedulerConfiguration.ALLOCATION_FILE, FS_ALLOC_FILE);
+ }
+}
http://git-wip-us.apache.org/repos/asf/hadoop/blob/33031c9c/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestRM.java
----------------------------------------------------------------------
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestRM.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestRM.java
index 42a3a00..4865420 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestRM.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestRM.java
@@ -18,6 +18,7 @@
package org.apache.hadoop.yarn.server.resourcemanager;
+import org.junit.Before;
import static org.mockito.Matchers.argThat;
import static org.mockito.Mockito.doNothing;
import static org.mockito.Mockito.spy;
@@ -65,7 +66,6 @@ import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttemptE
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttemptEventType;
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttemptState;
import org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNode;
-import org.apache.hadoop.yarn.server.resourcemanager.scheduler.QueueMetrics;
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.CapacityScheduler;
import org.apache.hadoop.yarn.server.resourcemanager.security.NMTokenSecretManagerInRM;
import org.apache.log4j.Level;
@@ -75,13 +75,23 @@ import org.junit.Test;
import org.mockito.ArgumentMatcher;
@SuppressWarnings({"unchecked", "rawtypes"})
-public class TestRM {
-
+public class TestRM extends ParameterizedSchedulerTestBase {
private static final Log LOG = LogFactory.getLog(TestRM.class);
// Milliseconds to sleep for when waiting for something to happen
private final static int WAIT_SLEEP_MS = 100;
+ private YarnConfiguration conf;
+
+ public TestRM(SchedulerType type) {
+ super(type);
+ }
+
+ @Before
+ public void setup() {
+ conf = getConf();
+ }
+
@After
public void tearDown() {
ClusterMetrics.destroy();
@@ -93,7 +103,7 @@ public class TestRM {
public void testGetNewAppId() throws Exception {
Logger rootLogger = LogManager.getRootLogger();
rootLogger.setLevel(Level.DEBUG);
- MockRM rm = new MockRM();
+ MockRM rm = new MockRM(conf);
rm.start();
GetNewApplicationResponse resp = rm.getNewAppId();
@@ -106,7 +116,7 @@ public class TestRM {
public void testAppWithNoContainers() throws Exception {
Logger rootLogger = LogManager.getRootLogger();
rootLogger.setLevel(Level.DEBUG);
- MockRM rm = new MockRM();
+ MockRM rm = new MockRM(conf);
rm.start();
MockNM nm1 = rm.registerNode("h1:1234", 5120);
@@ -128,7 +138,6 @@ public class TestRM {
public void testAppOnMultiNode() throws Exception {
Logger rootLogger = LogManager.getRootLogger();
rootLogger.setLevel(Level.DEBUG);
- YarnConfiguration conf = new YarnConfiguration();
conf.set("yarn.scheduler.capacity.node-locality-delay", "-1");
MockRM rm = new MockRM(conf);
rm.start();
@@ -188,7 +197,6 @@ public class TestRM {
// corresponding NM Token.
@Test (timeout = 20000)
public void testNMTokenSentForNormalContainer() throws Exception {
- YarnConfiguration conf = new YarnConfiguration();
conf.set(YarnConfiguration.RM_SCHEDULER,
CapacityScheduler.class.getCanonicalName());
MockRM rm = new MockRM(conf);
@@ -241,7 +249,7 @@ public class TestRM {
@Test (timeout = 40000)
public void testNMToken() throws Exception {
- MockRM rm = new MockRM();
+ MockRM rm = new MockRM(conf);
try {
rm.start();
MockNM nm1 = rm.registerNode("h1:1234", 10000);
@@ -425,8 +433,6 @@ public class TestRM {
@Test (timeout = 300000)
public void testActivatingApplicationAfterAddingNM() throws Exception {
- YarnConfiguration conf = new YarnConfiguration();
-
MockRM rm1 = new MockRM(conf);
// start like normal because state is empty
@@ -472,7 +478,6 @@ public class TestRM {
// is killed or failed, so that client doesn't get the wrong information.
@Test (timeout = 80000)
public void testInvalidateAMHostPortWhenAMFailedOrKilled() throws Exception {
- YarnConfiguration conf = new YarnConfiguration();
conf.setInt(YarnConfiguration.RM_AM_MAX_ATTEMPTS, 1);
MockRM rm1 = new MockRM(conf);
rm1.start();
@@ -525,7 +530,6 @@ public class TestRM {
@Test (timeout = 60000)
public void testInvalidatedAMHostPortOnAMRestart() throws Exception {
- YarnConfiguration conf = new YarnConfiguration();
MockRM rm1 = new MockRM(conf);
rm1.start();
MockNM nm1 =
@@ -558,7 +562,6 @@ public class TestRM {
@Test (timeout = 60000)
public void testApplicationKillAtAcceptedState() throws Exception {
- YarnConfiguration conf = new YarnConfiguration();
final Dispatcher dispatcher = new AsyncDispatcher() {
@Override
public EventHandler getEventHandler() {
@@ -635,15 +638,4 @@ public class TestRM {
Assert.assertEquals(appsSubmitted + 1, metrics.getAppsSubmitted());
}
- public static void main(String[] args) throws Exception {
- TestRM t = new TestRM();
- t.testGetNewAppId();
- t.testAppWithNoContainers();
- t.testAppOnMultiNode();
- t.testNMToken();
- t.testActivatingApplicationAfterAddingNM();
- t.testInvalidateAMHostPortWhenAMFailedOrKilled();
- t.testInvalidatedAMHostPortOnAMRestart();
- t.testApplicationKillAtAcceptedState();
- }
}
http://git-wip-us.apache.org/repos/asf/hadoop/blob/33031c9c/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestRMRestart.java
----------------------------------------------------------------------
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestRMRestart.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestRMRestart.java
index b93631f..a0f8627 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestRMRestart.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestRMRestart.java
@@ -29,7 +29,6 @@ import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.net.InetSocketAddress;
-import java.net.UnknownHostException;
import java.nio.ByteBuffer;
import java.util.ArrayList;
import java.util.Arrays;
@@ -108,7 +107,7 @@ import org.junit.Assert;
import org.junit.Before;
import org.junit.Test;
-public class TestRMRestart {
+public class TestRMRestart extends ParameterizedSchedulerTestBase {
private final static File TEMP_DIR = new File(System.getProperty(
"test.build.data", "/tmp"), "decommision");
private File hostFile = new File(TEMP_DIR + File.separator + "hostFile.txt");
@@ -116,12 +115,17 @@ public class TestRMRestart {
// Fake rmAddr for token-renewal
private static InetSocketAddress rmAddr;
+ private List<MockRM> rms = new ArrayList<MockRM>();
+
+ public TestRMRestart(SchedulerType type) {
+ super(type);
+ }
@Before
- public void setup() throws UnknownHostException {
+ public void setup() throws IOException {
+ conf = getConf();
Logger rootLogger = LogManager.getRootLogger();
rootLogger.setLevel(Level.DEBUG);
- conf = new YarnConfiguration();
UserGroupInformation.setConfiguration(conf);
conf.set(YarnConfiguration.RECOVERY_ENABLED, "true");
conf.set(YarnConfiguration.RM_STORE, MemoryRMStateStore.class.getName());
@@ -131,9 +135,24 @@ public class TestRMRestart {
@After
public void tearDown() {
+ for (MockRM rm : rms) {
+ rm.stop();
+ }
+ rms.clear();
+
TEMP_DIR.delete();
}
+ /**
+ *
+ * @return a new MockRM that will be stopped at the end of the test.
+ */
+ private MockRM createMockRM(YarnConfiguration conf, RMStateStore store) {
+ MockRM rm = new MockRM(conf, store);
+ rms.add(rm);
+ return rm;
+ }
+
@SuppressWarnings("rawtypes")
@Test (timeout=180000)
public void testRMRestart() throws Exception {
@@ -150,7 +169,7 @@ public class TestRMRestart {
// PHASE 1: create state in an RM
// start RM
- MockRM rm1 = new MockRM(conf, memStore);
+ MockRM rm1 = createMockRM(conf, memStore);
// start like normal because state is empty
rm1.start();
@@ -246,7 +265,7 @@ public class TestRMRestart {
// PHASE 2: create new RM and start from old state
// create new RM to represent restart and recover state
- MockRM rm2 = new MockRM(conf, memStore);
+ MockRM rm2 = createMockRM(conf, memStore);
// start new RM
rm2.start();
@@ -317,7 +336,7 @@ public class TestRMRestart {
NMContainerStatus status =
TestRMRestart
.createNMContainerStatus(loadedApp1.getCurrentAppAttempt()
- .getAppAttemptId(), 1, ContainerState.COMPLETE);
+ .getAppAttemptId(), 1, ContainerState.COMPLETE);
nm1.registerNode(Arrays.asList(status), null);
nm2.registerNode();
@@ -414,7 +433,7 @@ public class TestRMRestart {
rmState.getApplicationState();
// start RM
- MockRM rm1 = new MockRM(conf, memStore);
+ MockRM rm1 = createMockRM(conf, memStore);
rm1.start();
MockNM nm1 =
new MockNM("127.0.0.1:1234", 15120, rm1.getResourceTrackerService());
@@ -443,13 +462,11 @@ public class TestRMRestart {
rm1.waitForState(app0.getApplicationId(), RMAppState.ACCEPTED);
// start new RM
- MockRM rm2 = new MockRM(conf, memStore);
+ MockRM rm2 = createMockRM(conf, memStore);
rm2.start();
// assert the previous AM state is loaded back on RM recovery.
rm2.waitForState(am0.getApplicationAttemptId(), RMAppAttemptState.FAILED);
- rm1.stop();
- rm2.stop();
}
@Test (timeout = 60000)
@@ -473,7 +490,7 @@ public class TestRMRestart {
rmState.getApplicationState();
// start RM
- final MockRM rm1 = new MockRM(conf, memStore);
+ final MockRM rm1 = createMockRM(conf, memStore);
rm1.start();
MockNM nm1 =
new MockNM("127.0.0.1:1234" , 16382, rm1.getResourceTrackerService());
@@ -497,8 +514,7 @@ public class TestRMRestart {
.getAppAttemptState(), RMAppAttemptState.RUNNING);
// start new RM.
- MockRM rm2 = null;
- rm2 = new MockRM(conf, memStore);
+ MockRM rm2 = createMockRM(conf, memStore);
rm2.start();
nm1.setResourceTrackerService(rm2.getResourceTrackerService());
@@ -525,7 +541,7 @@ public class TestRMRestart {
NMContainerStatus status =
TestRMRestart.createNMContainerStatus(
- am2.getApplicationAttemptId(), 1, ContainerState.COMPLETE);
+ am2.getApplicationAttemptId(), 1, ContainerState.COMPLETE);
nm1.registerNode(Arrays.asList(status), null);
rm2.waitForState(am2.getApplicationAttemptId(), RMAppAttemptState.FAILED);
launchAM(rmApp, rm2, nm1);
@@ -535,8 +551,7 @@ public class TestRMRestart {
// Now restart RM ...
// Setting AMLivelinessMonitor interval to be 10 Secs.
conf.setInt(YarnConfiguration.RM_AM_EXPIRY_INTERVAL_MS, 10000);
- MockRM rm3 = null;
- rm3 = new MockRM(conf, memStore);
+ MockRM rm3 = createMockRM(conf, memStore);
rm3.start();
// Wait for RM to process all the events as a part of rm recovery.
@@ -583,8 +598,7 @@ public class TestRMRestart {
memStore.getState().getApplicationState().get(app2.getApplicationId())
.getAttemptCount());
- MockRM rm4 = null;
- rm4 = new MockRM(conf, memStore);
+ MockRM rm4 = createMockRM(conf, memStore);
rm4.start();
rmApp = rm4.getRMContext().getRMApps().get(app1.getApplicationId());
@@ -640,7 +654,7 @@ public class TestRMRestart {
rmState.getApplicationState();
// start RM
- MockRM rm1 = new MockRM(conf, memStore);
+ MockRM rm1 = createMockRM(conf, memStore);
rm1.start();
MockNM nm1 = rm1.registerNode("127.0.0.1:1234", 15120);
RMApp app0 = rm1.submitApp(200);
@@ -657,7 +671,7 @@ public class TestRMRestart {
Assert.assertNull(rmAppState.get(app0.getApplicationId()).getState());
// start RM
- MockRM rm2 = new MockRM(conf, memStore);
+ MockRM rm2 = createMockRM(conf, memStore);
nm1.setResourceTrackerService(rm2.getResourceTrackerService());
rm2.start();
@@ -666,7 +680,7 @@ public class TestRMRestart {
rm2.waitForState(app0.getApplicationId(), RMAppState.FINISHED);
// app final state is saved via the finish event from attempt.
Assert.assertEquals(RMAppState.FINISHED,
- rmAppState.get(app0.getApplicationId()).getState());
+ rmAppState.get(app0.getApplicationId()).getState());
}
@Test (timeout = 60000)
@@ -679,7 +693,7 @@ public class TestRMRestart {
rmState.getApplicationState();
// start RM
- MockRM rm1 = new MockRM(conf, memStore);
+ MockRM rm1 = createMockRM(conf, memStore);
rm1.start();
MockNM nm1 =
new MockNM("127.0.0.1:1234", 15120, rm1.getResourceTrackerService());
@@ -701,7 +715,7 @@ public class TestRMRestart {
appState.getAttempt(am0.getApplicationAttemptId()).getState());
// start new RM
- MockRM rm2 = new MockRM(conf, memStore);
+ MockRM rm2 = createMockRM(conf, memStore);
rm2.start();
RMApp loadedApp0 = rm2.getRMContext().getRMApps().get(app0.getApplicationId());
rm2.waitForState(app0.getApplicationId(), RMAppState.FAILED);
@@ -714,8 +728,6 @@ public class TestRMRestart {
.contains("Failing the application."));
// failed diagnostics from attempt is lost because the diagnostics from
// attempt is not yet available by the time app is saving the app state.
- rm1.stop();
- rm2.stop();
}
@Test (timeout = 60000)
@@ -729,7 +741,7 @@ public class TestRMRestart {
rmState.getApplicationState();
// start RM
- MockRM rm1 = new MockRM(conf, memStore);
+ MockRM rm1 = createMockRM(conf, memStore);
rm1.start();
MockNM nm1 =
new MockNM("127.0.0.1:1234", 15120, rm1.getResourceTrackerService());
@@ -751,7 +763,7 @@ public class TestRMRestart {
appState.getAttempt(am0.getApplicationAttemptId()).getState());
// restart rm
- MockRM rm2 = new MockRM(conf, memStore);
+ MockRM rm2 = createMockRM(conf, memStore);
rm2.start();
RMApp loadedApp0 = rm2.getRMContext().getRMApps().get(app0.getApplicationId());
rm2.waitForState(app0.getApplicationId(), RMAppState.KILLED);
@@ -761,9 +773,7 @@ public class TestRMRestart {
ApplicationReport appReport = verifyAppReportAfterRMRestart(app0, rm2);
Assert.assertEquals(app0.getDiagnostics().toString(),
- appReport.getDiagnostics());
- rm1.stop();
- rm2.stop();
+ appReport.getDiagnostics());
}
@Test (timeout = 60000)
@@ -786,7 +796,7 @@ public class TestRMRestart {
memStore.init(conf);
// start RM
- MockRM rm1 = new MockRM(conf, memStore);
+ MockRM rm1 = createMockRM(conf, memStore);
rm1.start();
// create app
RMApp app0 =
@@ -798,7 +808,7 @@ public class TestRMRestart {
rm1.waitForState(app0.getApplicationId(), RMAppState.KILLED);
// restart rm
- MockRM rm2 = new MockRM(conf, memStore);
+ MockRM rm2 = createMockRM(conf, memStore);
rm2.start();
RMApp loadedApp0 =
rm2.getRMContext().getRMApps().get(app0.getApplicationId());
@@ -817,7 +827,7 @@ public class TestRMRestart {
rmState.getApplicationState();
// start RM
- MockRM rm1 = new MockRM(conf, memStore);
+ MockRM rm1 = createMockRM(conf, memStore);
rm1.start();
MockNM nm1 =
new MockNM("127.0.0.1:1234", 15120, rm1.getResourceTrackerService());
@@ -844,7 +854,7 @@ public class TestRMRestart {
Assert.assertEquals(app0.getFinishTime(), appState.getFinishTime());
// restart rm
- MockRM rm2 = new MockRM(conf, memStore);
+ MockRM rm2 = createMockRM(conf, memStore);
rm2.start();
// verify application report returns the same app info as the app info
@@ -853,9 +863,6 @@ public class TestRMRestart {
Assert.assertEquals(FinalApplicationStatus.SUCCEEDED,
appReport.getFinalApplicationStatus());
Assert.assertEquals("trackingUrl", appReport.getOriginalTrackingUrl());
-
- rm1.stop();
- rm2.stop();
}
@Test (timeout = 60000)
@@ -865,7 +872,7 @@ public class TestRMRestart {
memStore.init(conf);
// start RM
- MockRM rm1 = new MockRM(conf, memStore);
+ MockRM rm1 = createMockRM(conf, memStore);
rm1.start();
MockNM nm1 =
new MockNM("127.0.0.1:1234", 15120, rm1.getResourceTrackerService());
@@ -902,7 +909,7 @@ public class TestRMRestart {
return spy(super.createRMAppManager());
}
};
-
+ rms.add(rm2);
rm2.start();
GetApplicationsRequest request1 =
@@ -949,9 +956,6 @@ public class TestRMRestart {
// check application summary is logged for the completed apps after RM restart.
verify(rm2.getRMAppManager(), times(3)).logApplicationSummary(
isA(ApplicationId.class));
-
- rm1.stop();
- rm2.stop();
}
private MockAM launchAM(RMApp app, MockRM rm, MockNM nm)
@@ -1017,7 +1021,7 @@ public class TestRMRestart {
Map<ApplicationId, ApplicationState> rmAppState =
rmState.getApplicationState();
- MockRM rm1 = new MockRM(conf, memStore);
+ MockRM rm1 = createMockRM(conf, memStore);
rm1.start();
MockNM nm1 =
new MockNM("127.0.0.1:1234", 15120, rm1.getResourceTrackerService());
@@ -1055,7 +1059,7 @@ public class TestRMRestart {
// Setting AMLivelinessMonitor interval to be 3 Secs.
conf.setInt(YarnConfiguration.RM_AM_EXPIRY_INTERVAL_MS, 3000);
// start new RM
- MockRM rm2 = new MockRM(conf, memStore);
+ MockRM rm2 = createMockRM(conf, memStore);
rm2.start();
// verify that maxAppAttempts is set to global value
@@ -1074,10 +1078,6 @@ public class TestRMRestart {
Assert.assertEquals(RMAppState.FAILED,
rmAppState.get(app1.getApplicationId()).getState());
Assert.assertNull(rmAppState.get(app2.getApplicationId()).getState());
-
- // stop the RM
- rm1.stop();
- rm2.stop();
}
@Test (timeout = 60000)
@@ -1159,10 +1159,6 @@ public class TestRMRestart {
// verify tokens are properly populated back to rm2 DelegationTokenRenewer
Assert.assertEquals(tokenSet, rm2.getRMContext()
.getDelegationTokenRenewer().getDelegationTokens());
-
- // stop the RM
- rm1.stop();
- rm2.stop();
}
private void waitForTokensToBeRenewed(MockRM rm2) throws Exception {
@@ -1258,8 +1254,6 @@ public class TestRMRestart {
Assert.assertArrayEquals(amrmToken.getPassword(),
rm2.getRMContext().getAMRMTokenSecretManager().retrievePassword(
amrmToken.decodeIdentifier()));
- rm1.stop();
- rm2.stop();
}
@Test (timeout = 60000)
@@ -1407,10 +1401,6 @@ public class TestRMRestart {
.getAllTokens();
Assert.assertFalse(allTokensRM2.containsKey(dtId1));
Assert.assertFalse(rmDTState.containsKey(dtId1));
-
- // stop the RM
- rm1.stop();
- rm2.stop();
}
// This is to test submit an application to the new RM with the old delegation
@@ -1471,7 +1461,7 @@ public class TestRMRestart {
memStore.init(conf);
// start RM
- final MockRM rm1 = new MockRM(conf, memStore);
+ final MockRM rm1 = createMockRM(conf, memStore);
rm1.start();
// create apps.
@@ -1517,7 +1507,7 @@ public class TestRMRestart {
RMState rmState = memStore.getState();
// start RM
- MockRM rm1 = new MockRM(conf, memStore);
+ MockRM rm1 = createMockRM(conf, memStore);
rm1.start();
MockNM nm1 =
new MockNM("127.0.0.1:1234", 15120, rm1.getResourceTrackerService());
@@ -1528,7 +1518,7 @@ public class TestRMRestart {
MockAM am0 = launchAM(app0, rm1, nm1);
finishApplicationMaster(app0, rm1, nm1, am0);
- MockRM rm2 = new MockRM(conf, memStore);
+ MockRM rm2 = createMockRM(conf, memStore);
rm2.start();
nm1.setResourceTrackerService(rm2.getResourceTrackerService());
nm1 = rm2.registerNode("127.0.0.1:1234", 15120);
@@ -1550,9 +1540,6 @@ public class TestRMRestart {
Assert.assertNull(rm2.getRMContext().getRMApps()
.get(app0.getApplicationId()));
Assert.assertNull(rmAppState.get(app0.getApplicationId()));
-
- rm1.stop();
- rm2.stop();
}
// This is to test RM does not get hang on shutdown.
@@ -1569,7 +1556,7 @@ public class TestRMRestart {
memStore.init(conf);
MockRM rm1 = null;
try {
- rm1 = new MockRM(conf, memStore);
+ rm1 = createMockRM(conf, memStore);
rm1.start();
Assert.fail();
} catch (Exception e) {
@@ -1587,7 +1574,7 @@ public class TestRMRestart {
memStore.init(conf);
// start RM
- MockRM rm1 = new MockRM(conf, memStore);
+ MockRM rm1 = createMockRM(conf, memStore);
rm1.start();
MockNM nm1 =
new MockNM("127.0.0.1:1234", 15120, rm1.getResourceTrackerService());
@@ -1703,7 +1690,11 @@ public class TestRMRestart {
}
}
};
- rm1.start();
+ try {
+ rm1.start();
+ } finally {
+ rm1.stop();
+ }
}
@SuppressWarnings("resource")
@@ -1716,7 +1707,7 @@ public class TestRMRestart {
// PHASE 1: create state in an RM
// start RM
- MockRM rm1 = new MockRM(conf, memStore);
+ MockRM rm1 = createMockRM(conf, memStore);
rm1.start();
MockNM nm1 =
new MockNM("127.0.0.1:1234", 15120, rm1.getResourceTrackerService());
@@ -1754,7 +1745,7 @@ public class TestRMRestart {
// PHASE 2: create new RM and start from old state
// create new RM to represent restart and recover state
- MockRM rm2 = new MockRM(conf, memStore);
+ MockRM rm2 = createMockRM(conf, memStore);
QueueMetrics qm2 = rm2.getResourceScheduler().getRootQueueMetrics();
resetQueueMetrics(qm2);
assertQueueMetrics(qm2, 0, 0, 0, 0);
@@ -1770,7 +1761,7 @@ public class TestRMRestart {
NMContainerStatus status =
TestRMRestart
.createNMContainerStatus(loadedApp1.getCurrentAppAttempt()
- .getAppAttemptId(), 1, ContainerState.COMPLETE);
+ .getAppAttemptId(), 1, ContainerState.COMPLETE);
nm1.registerNode(Arrays.asList(status), null);
while (loadedApp1.getAppAttempts().size() != 2) {
@@ -1799,10 +1790,6 @@ public class TestRMRestart {
// finish the AMs
finishApplicationMaster(loadedApp1, rm2, nm1, am1);
assertQueueMetrics(qm2, 1, 0, 0, 1);
-
- // stop RM's
- rm2.stop();
- rm1.stop();
}
@@ -1840,43 +1827,58 @@ public class TestRMRestart {
hostFile.getAbsolutePath());
writeToHostsFile("");
final DrainDispatcher dispatcher = new DrainDispatcher();
- MockRM rm1 = new MockRM(conf) {
- @Override
- protected Dispatcher createDispatcher() {
- return dispatcher;
+ MockRM rm1 = null, rm2 = null;
+ try {
+ rm1 = new MockRM(conf) {
+ @Override
+ protected Dispatcher createDispatcher() {
+ return dispatcher;
+ }
+ };
+ rm1.start();
+ MockNM nm1 = rm1.registerNode("localhost:1234", 8000);
+ MockNM nm2 = rm1.registerNode("host2:1234", 8000);
+ Assert
+ .assertEquals(0,
+ ClusterMetrics.getMetrics().getNumDecommisionedNMs());
+ String ip = NetUtils.normalizeHostName("localhost");
+ // Add 2 hosts to exclude list.
+ writeToHostsFile("host2", ip);
+
+ // refresh nodes
+ rm1.getNodesListManager().refreshNodes(conf);
+ NodeHeartbeatResponse nodeHeartbeat = nm1.nodeHeartbeat(true);
+ Assert
+ .assertTrue(
+ NodeAction.SHUTDOWN.equals(nodeHeartbeat.getNodeAction()));
+ nodeHeartbeat = nm2.nodeHeartbeat(true);
+ Assert.assertTrue("The decommisioned metrics are not updated",
+ NodeAction.SHUTDOWN.equals(nodeHeartbeat.getNodeAction()));
+
+ dispatcher.await();
+ Assert
+ .assertEquals(2,
+ ClusterMetrics.getMetrics().getNumDecommisionedNMs());
+ rm1.stop();
+ rm1 = null;
+ Assert
+ .assertEquals(0,
+ ClusterMetrics.getMetrics().getNumDecommisionedNMs());
+
+ // restart RM.
+ rm2 = new MockRM(conf);
+ rm2.start();
+ Assert
+ .assertEquals(2,
+ ClusterMetrics.getMetrics().getNumDecommisionedNMs());
+ } finally {
+ if (rm1 != null) {
+ rm1.stop();
}
- };
- rm1.start();
- MockNM nm1 = rm1.registerNode("localhost:1234", 8000);
- MockNM nm2 = rm1.registerNode("host2:1234", 8000);
- Assert
- .assertEquals(0, ClusterMetrics.getMetrics().getNumDecommisionedNMs());
- String ip = NetUtils.normalizeHostName("localhost");
- // Add 2 hosts to exclude list.
- writeToHostsFile("host2", ip);
-
- // refresh nodes
- rm1.getNodesListManager().refreshNodes(conf);
- NodeHeartbeatResponse nodeHeartbeat = nm1.nodeHeartbeat(true);
- Assert
- .assertTrue(NodeAction.SHUTDOWN.equals(nodeHeartbeat.getNodeAction()));
- nodeHeartbeat = nm2.nodeHeartbeat(true);
- Assert.assertTrue("The decommisioned metrics are not updated",
- NodeAction.SHUTDOWN.equals(nodeHeartbeat.getNodeAction()));
-
- dispatcher.await();
- Assert
- .assertEquals(2, ClusterMetrics.getMetrics().getNumDecommisionedNMs());
- rm1.stop();
- Assert
- .assertEquals(0, ClusterMetrics.getMetrics().getNumDecommisionedNMs());
-
- // restart RM.
- MockRM rm2 = new MockRM(conf);
- rm2.start();
- Assert
- .assertEquals(2, ClusterMetrics.getMetrics().getNumDecommisionedNMs());
- rm2.stop();
+ if (rm2 != null) {
+ rm2.stop();
+ }
+ }
}
// Test Delegation token is renewed synchronously so that recover events
@@ -1891,7 +1893,7 @@ public class TestRMRestart {
memStore.init(conf);
// start RM
- MockRM rm1 = new MockRM(conf, memStore);
+ MockRM rm1 = createMockRM(conf, memStore);
rm1.start();
final MockNM nm1 =
new MockNM("127.0.0.1:1234", 15120, rm1.getResourceTrackerService());
@@ -1914,24 +1916,29 @@ public class TestRMRestart {
nm1.setResourceTrackerService(getResourceTrackerService());
NMContainerStatus status =
TestRMRestart.createNMContainerStatus(
- am0.getApplicationAttemptId(), 1, ContainerState.COMPLETE);
+ am0.getApplicationAttemptId(), 1, ContainerState.COMPLETE);
nm1.registerNode(Arrays.asList(status), null);
}
};
}
};
- // Re-start RM
- rm2.start();
- // wait for the 2nd attempt to be started.
- RMApp loadedApp0 =
- rm2.getRMContext().getRMApps().get(app0.getApplicationId());
- int timeoutSecs = 0;
- while (loadedApp0.getAppAttempts().size() != 2 && timeoutSecs++ < 40) {
- Thread.sleep(200);
+ try {
+ // Re-start RM
+ rm2.start();
+
+ // wait for the 2nd attempt to be started.
+ RMApp loadedApp0 =
+ rm2.getRMContext().getRMApps().get(app0.getApplicationId());
+ int timeoutSecs = 0;
+ while (loadedApp0.getAppAttempts().size() != 2 && timeoutSecs++ < 40) {
+ Thread.sleep(200);
+ }
+ MockAM am1 = MockRM.launchAndRegisterAM(loadedApp0, rm2, nm1);
+ MockRM.finishAMAndVerifyAppState(loadedApp0, rm2, nm1, am1);
+ } finally {
+ rm2.stop();
}
- MockAM am1 = MockRM.launchAndRegisterAM(loadedApp0, rm2, nm1);
- MockRM.finishAMAndVerifyAppState(loadedApp0, rm2, nm1, am1);
}
private void writeToHostsFile(String... hosts) throws IOException {
http://git-wip-us.apache.org/repos/asf/hadoop/blob/33031c9c/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/security/TestClientToAMTokens.java
----------------------------------------------------------------------
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/security/TestClientToAMTokens.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/security/TestClientToAMTokens.java
index b5636bd..78bc728 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/security/TestClientToAMTokens.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/security/TestClientToAMTokens.java
@@ -18,7 +18,11 @@
package org.apache.hadoop.yarn.server.resourcemanager.security;
+import org.apache.hadoop.yarn.conf.YarnConfiguration;
+import org.apache.hadoop.yarn.server.resourcemanager
+ .ParameterizedSchedulerTestBase;
import static org.junit.Assert.fail;
+import org.junit.Before;
import static org.mockito.Matchers.any;
import static org.mockito.Mockito.mock;
import static org.mockito.Mockito.when;
@@ -79,7 +83,17 @@ import org.apache.hadoop.yarn.util.ConverterUtils;
import org.apache.hadoop.yarn.util.Records;
import org.junit.Test;
-public class TestClientToAMTokens {
+public class TestClientToAMTokens extends ParameterizedSchedulerTestBase {
+ private YarnConfiguration conf;
+
+ public TestClientToAMTokens(SchedulerType type) {
+ super(type);
+ }
+
+ @Before
+ public void setup() {
+ conf = getConf();
+ }
private interface CustomProtocol {
@SuppressWarnings("unused")
@@ -166,8 +180,6 @@ public class TestClientToAMTokens {
@Test
public void testClientToAMTokens() throws Exception {
-
- final Configuration conf = new Configuration();
conf.set(CommonConfigurationKeysPublic.HADOOP_SECURITY_AUTHENTICATION,
"kerberos");
UserGroupInformation.setConfiguration(conf);
[2/5] hadoop git commit: YARN-2635. Merging to branch-2.6 for
hadoop-2.6.0-rc1.
Posted by ac...@apache.org.
YARN-2635. Merging to branch-2.6 for hadoop-2.6.0-rc1.
(cherry picked from commit 81dc0ac6dcf2f34ad607da815ea0144f178691a9)
Conflicts:
hadoop-yarn-project/CHANGES.txt
Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo
Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/86722bd1
Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/86722bd1
Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/86722bd1
Branch: refs/heads/branch-2.6.0
Commit: 86722bd19e8b7d4ea281e18db357d40fafad6e1f
Parents: 33031c9
Author: Arun C. Murthy <ac...@apache.org>
Authored: Thu Nov 13 10:27:47 2014 -0800
Committer: Arun C. Murthy <ac...@apache.org>
Committed: Thu Nov 13 10:41:57 2014 -0800
----------------------------------------------------------------------
hadoop-yarn-project/CHANGES.txt | 3 +++
1 file changed, 3 insertions(+)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hadoop/blob/86722bd1/hadoop-yarn-project/CHANGES.txt
----------------------------------------------------------------------
diff --git a/hadoop-yarn-project/CHANGES.txt b/hadoop-yarn-project/CHANGES.txt
index e42c9b2..8410d9c 100644
--- a/hadoop-yarn-project/CHANGES.txt
+++ b/hadoop-yarn-project/CHANGES.txt
@@ -367,6 +367,9 @@ Release 2.6.0 - 2014-11-15
YARN-2818. Removed the now unnecessary user entity injection from Timeline
service given we now have domains. (Zhijie Shen via vinodkv)
+ YARN-2635. TestRM, TestRMRestart, TestClientToAMTokens should run
+ with both CS and FS. (Wei Yan and kasha via kasha)
+
OPTIMIZATIONS
BUG FIXES
[4/5] hadoop git commit: YARN-2846. Incorrect persist exit code for
running containers in reacquireContainer() that interrupted by NodeManager
restart. Contributed by Junping Du
Posted by ac...@apache.org.
YARN-2846. Incorrect persist exit code for running containers in reacquireContainer() that interrupted by NodeManager restart. Contributed by Junping Du
(cherry picked from commit 33ea5ae92b9dd3abace104903d9a94d17dd75af5)
Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo
Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/9820c475
Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/9820c475
Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/9820c475
Branch: refs/heads/branch-2.6.0
Commit: 9820c475749243eae6e4ec44206296e220f03165
Parents: bf4d080
Author: Jason Lowe <jl...@apache.org>
Authored: Thu Nov 13 16:11:04 2014 +0000
Committer: Arun C. Murthy <ac...@apache.org>
Committed: Thu Nov 13 10:47:16 2014 -0800
----------------------------------------------------------------------
hadoop-yarn-project/CHANGES.txt | 4 ++++
.../server/nodemanager/ContainerExecutor.java | 21 +++++++-------------
.../nodemanager/LinuxContainerExecutor.java | 2 +-
.../launcher/RecoveredContainerLaunch.java | 20 ++++++++++++-------
4 files changed, 25 insertions(+), 22 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hadoop/blob/9820c475/hadoop-yarn-project/CHANGES.txt
----------------------------------------------------------------------
diff --git a/hadoop-yarn-project/CHANGES.txt b/hadoop-yarn-project/CHANGES.txt
index 8410d9c..b8bb9a9 100644
--- a/hadoop-yarn-project/CHANGES.txt
+++ b/hadoop-yarn-project/CHANGES.txt
@@ -869,6 +869,10 @@ Release 2.6.0 - 2014-11-15
YARN-2794. Fixed log messages about distributing system-credentials. (Jian He via
zjshen)
+ YARN-2846. Incorrect persist exit code for running containers in
+ reacquireContainer() that interrupted by NodeManager restart. (Junping Du
+ via jlowe)
+
Release 2.5.2 - 2014-11-10
INCOMPATIBLE CHANGES
http://git-wip-us.apache.org/repos/asf/hadoop/blob/9820c475/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/ContainerExecutor.java
----------------------------------------------------------------------
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/ContainerExecutor.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/ContainerExecutor.java
index 8133413..327f882 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/ContainerExecutor.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/ContainerExecutor.java
@@ -159,9 +159,10 @@ public abstract class ContainerExecutor implements Configurable {
* @param containerId The ID of the container to reacquire
* @return The exit code of the pre-existing container
* @throws IOException
+ * @throws InterruptedException
*/
public int reacquireContainer(String user, ContainerId containerId)
- throws IOException {
+ throws IOException, InterruptedException {
Path pidPath = getPidFilePath(containerId);
if (pidPath == null) {
LOG.warn(containerId + " is not active, returning terminated error");
@@ -175,13 +176,8 @@ public abstract class ContainerExecutor implements Configurable {
}
LOG.info("Reacquiring " + containerId + " with pid " + pid);
- try {
- while(isContainerProcessAlive(user, pid)) {
- Thread.sleep(1000);
- }
- } catch (InterruptedException e) {
- throw new IOException("Interrupted while waiting for process " + pid
- + " to exit", e);
+ while(isContainerProcessAlive(user, pid)) {
+ Thread.sleep(1000);
}
// wait for exit code file to appear
@@ -194,12 +190,9 @@ public abstract class ContainerExecutor implements Configurable {
LOG.info(containerId + " was deactivated");
return ExitCode.TERMINATED.getExitCode();
}
- try {
- Thread.sleep(sleepMsec);
- } catch (InterruptedException e) {
- throw new IOException(
- "Interrupted while waiting for exit code from " + containerId, e);
- }
+
+ Thread.sleep(sleepMsec);
+
msecLeft -= sleepMsec;
}
if (msecLeft < 0) {
http://git-wip-us.apache.org/repos/asf/hadoop/blob/9820c475/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/LinuxContainerExecutor.java
----------------------------------------------------------------------
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/LinuxContainerExecutor.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/LinuxContainerExecutor.java
index 884a16a..60e4713 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/LinuxContainerExecutor.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/LinuxContainerExecutor.java
@@ -345,7 +345,7 @@ public class LinuxContainerExecutor extends ContainerExecutor {
@Override
public int reacquireContainer(String user, ContainerId containerId)
- throws IOException {
+ throws IOException, InterruptedException {
try {
return super.reacquireContainer(user, containerId);
} finally {
http://git-wip-us.apache.org/repos/asf/hadoop/blob/9820c475/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/launcher/RecoveredContainerLaunch.java
----------------------------------------------------------------------
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/launcher/RecoveredContainerLaunch.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/launcher/RecoveredContainerLaunch.java
index 446695a..03a39aa 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/launcher/RecoveredContainerLaunch.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/launcher/RecoveredContainerLaunch.java
@@ -73,6 +73,7 @@ public class RecoveredContainerLaunch extends ContainerLaunch {
dispatcher.getEventHandler().handle(new ContainerEvent(containerId,
ContainerEventType.CONTAINER_LAUNCHED));
+ boolean notInterrupted = true;
try {
File pidFile = locatePidFile(appIdStr, containerIdStr);
if (pidFile != null) {
@@ -85,14 +86,19 @@ public class RecoveredContainerLaunch extends ContainerLaunch {
}
} catch (IOException e) {
LOG.error("Unable to recover container " + containerIdStr, e);
+ } catch (InterruptedException e) {
+ LOG.warn("Interrupted while waiting for exit code from " + containerId);
+ notInterrupted = false;
} finally {
- this.completed.set(true);
- exec.deactivateContainer(containerId);
- try {
- getContext().getNMStateStore().storeContainerCompleted(containerId,
- retCode);
- } catch (IOException e) {
- LOG.error("Unable to set exit code for container " + containerId);
+ if (notInterrupted) {
+ this.completed.set(true);
+ exec.deactivateContainer(containerId);
+ try {
+ getContext().getNMStateStore().storeContainerCompleted(containerId,
+ retCode);
+ } catch (IOException e) {
+ LOG.error("Unable to set exit code for container " + containerId);
+ }
}
}
[3/5] hadoop git commit: YARN-2853. Fixed a bug in ResourceManager
causing apps to hang when the user kill request races with ApplicationMaster
finish. Contributed by Jian He.
Posted by ac...@apache.org.
YARN-2853. Fixed a bug in ResourceManager causing apps to hang when the user kill request races with ApplicationMaster finish. Contributed by Jian He.
(cherry picked from commit 3651fe1b089851b38be351c00a9899817166bf3e)
Conflicts:
hadoop-yarn-project/CHANGES.txt
Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo
Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/bf4d0801
Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/bf4d0801
Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/bf4d0801
Branch: refs/heads/branch-2.6.0
Commit: bf4d08010050ed36761a7f255b7c838ede6feba4
Parents: 86722bd
Author: Vinod Kumar Vavilapalli <vi...@apache.org>
Authored: Thu Nov 13 08:12:41 2014 -0800
Committer: Arun C. Murthy <ac...@apache.org>
Committed: Thu Nov 13 10:42:29 2014 -0800
----------------------------------------------------------------------
.../ApplicationMasterService.java | 1 +
.../server/resourcemanager/rmapp/RMAppImpl.java | 24 +++-
.../yarn/server/resourcemanager/TestRM.java | 111 +++++++++++++++++++
.../rmapp/TestRMAppTransitions.java | 6 -
4 files changed, 133 insertions(+), 9 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hadoop/blob/bf4d0801/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ApplicationMasterService.java
----------------------------------------------------------------------
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ApplicationMasterService.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ApplicationMasterService.java
index 64988f5..d0b199f 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ApplicationMasterService.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ApplicationMasterService.java
@@ -348,6 +348,7 @@ public class ApplicationMasterService extends AbstractService implements
// ApplicationDoesNotExistInCacheException before and after
// RM work-preserving restart.
if (rmApp.isAppFinalStateStored()) {
+ LOG.info(rmApp.getApplicationId() + " unregistered successfully. ");
return FinishApplicationMasterResponse.newInstance(true);
}
http://git-wip-us.apache.org/repos/asf/hadoop/blob/bf4d0801/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/RMAppImpl.java
----------------------------------------------------------------------
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/RMAppImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/RMAppImpl.java
index ad92cc4..aeb2cda 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/RMAppImpl.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/RMAppImpl.java
@@ -293,13 +293,23 @@ public class RMAppImpl implements RMApp, Recoverable {
RMAppEventType.ATTEMPT_KILLED,
new FinalSavingTransition(
new AppKilledTransition(), RMAppState.KILLED))
+ .addTransition(RMAppState.KILLING, RMAppState.FINAL_SAVING,
+ RMAppEventType.ATTEMPT_UNREGISTERED,
+ new FinalSavingTransition(
+ new AttemptUnregisteredTransition(),
+ RMAppState.FINISHING, RMAppState.FINISHED))
+ .addTransition(RMAppState.KILLING, RMAppState.FINISHED,
+ // UnManagedAM directly jumps to finished
+ RMAppEventType.ATTEMPT_FINISHED, FINISHED_TRANSITION)
+ .addTransition(RMAppState.KILLING,
+ EnumSet.of(RMAppState.FINAL_SAVING),
+ RMAppEventType.ATTEMPT_FAILED,
+ new AttemptFailedTransition(RMAppState.KILLING))
+
.addTransition(RMAppState.KILLING, RMAppState.KILLING,
EnumSet.of(
RMAppEventType.NODE_UPDATE,
RMAppEventType.ATTEMPT_REGISTERED,
- RMAppEventType.ATTEMPT_UNREGISTERED,
- RMAppEventType.ATTEMPT_FINISHED,
- RMAppEventType.ATTEMPT_FAILED,
RMAppEventType.APP_UPDATE_SAVED,
RMAppEventType.KILL, RMAppEventType.MOVE))
@@ -1199,6 +1209,14 @@ public class RMAppImpl implements RMApp, Recoverable {
+ app.maxAppAttempts);
if (!app.submissionContext.getUnmanagedAM()
&& numberOfFailure < app.maxAppAttempts) {
+ if (initialState.equals(RMAppState.KILLING)) {
+ // If this is not last attempt, app should be killed instead of
+ // launching a new attempt
+ app.rememberTargetTransitionsAndStoreState(event,
+ new AppKilledTransition(), RMAppState.KILLED, RMAppState.KILLED);
+ return RMAppState.FINAL_SAVING;
+ }
+
boolean transferStateFromPreviousAttempt;
RMAppFailedAttemptEvent failedEvent = (RMAppFailedAttemptEvent) event;
transferStateFromPreviousAttempt =
http://git-wip-us.apache.org/repos/asf/hadoop/blob/bf4d0801/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestRM.java
----------------------------------------------------------------------
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestRM.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestRM.java
index 4865420..77d8cdf 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestRM.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestRM.java
@@ -21,6 +21,7 @@ package org.apache.hadoop.yarn.server.resourcemanager;
import org.junit.Before;
import static org.mockito.Matchers.argThat;
import static org.mockito.Mockito.doNothing;
+import static org.mockito.Mockito.doAnswer;
import static org.mockito.Mockito.spy;
import java.util.ArrayList;
@@ -37,16 +38,19 @@ import org.junit.Assert;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.yarn.api.protocolrecords.AllocateResponse;
+import org.apache.hadoop.yarn.api.protocolrecords.FinishApplicationMasterRequest;
import org.apache.hadoop.yarn.api.protocolrecords.GetApplicationReportRequest;
import org.apache.hadoop.yarn.api.protocolrecords.GetApplicationsRequest;
import org.apache.hadoop.yarn.api.protocolrecords.GetApplicationsResponse;
import org.apache.hadoop.yarn.api.protocolrecords.GetNewApplicationResponse;
import org.apache.hadoop.yarn.api.protocolrecords.KillApplicationRequest;
import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
+import org.apache.hadoop.yarn.api.records.ApplicationId;
import org.apache.hadoop.yarn.api.records.ApplicationReport;
import org.apache.hadoop.yarn.api.records.Container;
import org.apache.hadoop.yarn.api.records.ContainerId;
import org.apache.hadoop.yarn.api.records.ContainerState;
+import org.apache.hadoop.yarn.api.records.FinalApplicationStatus;
import org.apache.hadoop.yarn.api.records.NMToken;
import org.apache.hadoop.yarn.api.records.NodeId;
import org.apache.hadoop.yarn.api.records.ResourceRequest;
@@ -57,6 +61,8 @@ import org.apache.hadoop.yarn.event.AbstractEvent;
import org.apache.hadoop.yarn.event.AsyncDispatcher;
import org.apache.hadoop.yarn.event.Dispatcher;
import org.apache.hadoop.yarn.event.EventHandler;
+import org.apache.hadoop.yarn.server.resourcemanager.TestRMRestart.TestSecurityMockRM;
+import org.apache.hadoop.yarn.server.resourcemanager.recovery.MemoryRMStateStore;
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp;
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppEvent;
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppEventType;
@@ -73,6 +79,8 @@ import org.apache.log4j.LogManager;
import org.apache.log4j.Logger;
import org.junit.Test;
import org.mockito.ArgumentMatcher;
+import org.mockito.invocation.InvocationOnMock;
+import org.mockito.stubbing.Answer;
@SuppressWarnings({"unchecked", "rawtypes"})
public class TestRM extends ParameterizedSchedulerTestBase {
@@ -638,4 +646,107 @@ public class TestRM extends ParameterizedSchedulerTestBase {
Assert.assertEquals(appsSubmitted + 1, metrics.getAppsSubmitted());
}
+ // Test Kill an app while the app is finishing in the meanwhile.
+ @Test (timeout = 30000)
+ public void testKillFinishingApp() throws Exception{
+
+ // this dispatcher ignores RMAppAttemptEventType.KILL event
+ final Dispatcher dispatcher = new AsyncDispatcher() {
+ @Override
+ public EventHandler getEventHandler() {
+
+ class EventArgMatcher extends ArgumentMatcher<AbstractEvent> {
+ @Override
+ public boolean matches(Object argument) {
+ if (argument instanceof RMAppAttemptEvent) {
+ if (((RMAppAttemptEvent) argument).getType().equals(
+ RMAppAttemptEventType.KILL)) {
+ return true;
+ }
+ }
+ return false;
+ }
+ }
+
+ EventHandler handler = spy(super.getEventHandler());
+ doNothing().when(handler).handle(argThat(new EventArgMatcher()));
+ return handler;
+ }
+ };
+
+ MockRM rm1 = new MockRM(conf){
+ @Override
+ protected Dispatcher createDispatcher() {
+ return dispatcher;
+ }
+ };
+ rm1.start();
+ MockNM nm1 =
+ new MockNM("127.0.0.1:1234", 8192, rm1.getResourceTrackerService());
+ nm1.registerNode();
+ RMApp app1 = rm1.submitApp(200);
+ MockAM am1 = MockRM.launchAndRegisterAM(app1, rm1, nm1);
+
+ rm1.killApp(app1.getApplicationId());
+
+ FinishApplicationMasterRequest req =
+ FinishApplicationMasterRequest.newInstance(
+ FinalApplicationStatus.SUCCEEDED, "", "");
+ am1.unregisterAppAttempt(req,true);
+
+ rm1.waitForState(am1.getApplicationAttemptId(), RMAppAttemptState.FINISHING);
+ nm1.nodeHeartbeat(am1.getApplicationAttemptId(), 1, ContainerState.COMPLETE);
+ rm1.waitForState(am1.getApplicationAttemptId(), RMAppAttemptState.FINISHED);
+ rm1.waitForState(app1.getApplicationId(), RMAppState.FINISHED);
+ }
+
+ // Test Kill an app while the app is failing
+ @Test (timeout = 30000)
+ public void testKillFailingApp() throws Exception{
+
+ // this dispatcher ignores RMAppAttemptEventType.KILL event
+ final Dispatcher dispatcher = new AsyncDispatcher() {
+ @Override
+ public EventHandler getEventHandler() {
+
+ class EventArgMatcher extends ArgumentMatcher<AbstractEvent> {
+ @Override
+ public boolean matches(Object argument) {
+ if (argument instanceof RMAppAttemptEvent) {
+ if (((RMAppAttemptEvent) argument).getType().equals(
+ RMAppAttemptEventType.KILL)) {
+ return true;
+ }
+ }
+ return false;
+ }
+ }
+
+ EventHandler handler = spy(super.getEventHandler());
+ doNothing().when(handler).handle(argThat(new EventArgMatcher()));
+ return handler;
+ }
+ };
+
+ MockRM rm1 = new MockRM(conf){
+ @Override
+ protected Dispatcher createDispatcher() {
+ return dispatcher;
+ }
+ };
+ rm1.start();
+ MockNM nm1 =
+ new MockNM("127.0.0.1:1234", 8192, rm1.getResourceTrackerService());
+ nm1.registerNode();
+ RMApp app1 = rm1.submitApp(200);
+ MockAM am1 = MockRM.launchAndRegisterAM(app1, rm1, nm1);
+
+ rm1.killApp(app1.getApplicationId());
+
+ // fail the app by sending container_finished event.
+ nm1.nodeHeartbeat(am1.getApplicationAttemptId(), 1, ContainerState.COMPLETE);
+ rm1.waitForState(am1.getApplicationAttemptId(), RMAppAttemptState.FAILED);
+ // app is killed, not launching a new attempt
+ rm1.waitForState(app1.getApplicationId(), RMAppState.KILLED);
+ }
}
http://git-wip-us.apache.org/repos/asf/hadoop/blob/bf4d0801/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/TestRMAppTransitions.java
----------------------------------------------------------------------
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/TestRMAppTransitions.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/TestRMAppTransitions.java
index bbfb0ee..e68d074 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/TestRMAppTransitions.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/TestRMAppTransitions.java
@@ -726,12 +726,6 @@ public class TestRMAppTransitions {
application.handle(event);
rmDispatcher.await();
- // Ignore Attempt_Finished if we were supposed to go to Finished.
- assertAppState(RMAppState.KILLING, application);
- RMAppEvent finishEvent =
- new RMAppFinishedAttemptEvent(application.getApplicationId(), null);
- application.handle(finishEvent);
- assertAppState(RMAppState.KILLING, application);
sendAttemptUpdateSavedEvent(application);
sendAppUpdateSavedEvent(application);
assertKilled(application);
[5/5] hadoop git commit: YARN-2853. Merging to branch-2.6 for
hadoop-2.6.0-rc1.
Posted by ac...@apache.org.
YARN-2853. Merging to branch-2.6 for hadoop-2.6.0-rc1.
(cherry picked from commit d648e60ebab7f1942dba92e9cd2cb62b8d70419b)
Conflicts:
hadoop-yarn-project/CHANGES.txt
Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo
Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/a97e9c02
Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/a97e9c02
Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/a97e9c02
Branch: refs/heads/branch-2.6.0
Commit: a97e9c02358abf6dc0f01bd0a5f91a5b856c7420
Parents: 9820c47
Author: Arun C. Murthy <ac...@apache.org>
Authored: Thu Nov 13 10:04:45 2014 -0800
Committer: Arun C. Murthy <ac...@apache.org>
Committed: Thu Nov 13 10:47:38 2014 -0800
----------------------------------------------------------------------
hadoop-yarn-project/CHANGES.txt | 3 +++
1 file changed, 3 insertions(+)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hadoop/blob/a97e9c02/hadoop-yarn-project/CHANGES.txt
----------------------------------------------------------------------
diff --git a/hadoop-yarn-project/CHANGES.txt b/hadoop-yarn-project/CHANGES.txt
index b8bb9a9..69e21ee 100644
--- a/hadoop-yarn-project/CHANGES.txt
+++ b/hadoop-yarn-project/CHANGES.txt
@@ -873,6 +873,9 @@ Release 2.6.0 - 2014-11-15
reacquireContainer() that interrupted by NodeManager restart. (Junping Du
via jlowe)
+ YARN-2853. Fixed a bug in ResourceManager causing apps to hang when the user
+ kill request races with ApplicationMaster finish. (Jian He via vinodkv)
+
Release 2.5.2 - 2014-11-10
INCOMPATIBLE CHANGES