You are viewing a plain text version of this content. The canonical link for it is here.
Posted to common-commits@hadoop.apache.org by ka...@apache.org on 2014/07/26 03:29:25 UTC
svn commit: r1613547 - in /hadoop/common/trunk/hadoop-tools/hadoop-sls/src:
main/java/org/apache/hadoop/yarn/sls/appmaster/
main/java/org/apache/hadoop/yarn/sls/nodemanager/
main/java/org/apache/hadoop/yarn/sls/scheduler/
test/java/org/apache/hadoop/ya...
Author: kasha
Date: Sat Jul 26 01:29:25 2014
New Revision: 1613547
URL: http://svn.apache.org/r1613547
Log:
YARN-1726. ResourceSchedulerWrapper broken due to AbstractYarnScheduler. (Wei Yan via kasha)
Modified:
hadoop/common/trunk/hadoop-tools/hadoop-sls/src/main/java/org/apache/hadoop/yarn/sls/appmaster/AMSimulator.java
hadoop/common/trunk/hadoop-tools/hadoop-sls/src/main/java/org/apache/hadoop/yarn/sls/appmaster/MRAMSimulator.java
hadoop/common/trunk/hadoop-tools/hadoop-sls/src/main/java/org/apache/hadoop/yarn/sls/nodemanager/NMSimulator.java
hadoop/common/trunk/hadoop-tools/hadoop-sls/src/main/java/org/apache/hadoop/yarn/sls/scheduler/ResourceSchedulerWrapper.java
hadoop/common/trunk/hadoop-tools/hadoop-sls/src/main/java/org/apache/hadoop/yarn/sls/scheduler/SLSCapacityScheduler.java
hadoop/common/trunk/hadoop-tools/hadoop-sls/src/main/java/org/apache/hadoop/yarn/sls/scheduler/TaskRunner.java
hadoop/common/trunk/hadoop-tools/hadoop-sls/src/test/java/org/apache/hadoop/yarn/sls/TestSLSRunner.java
Modified: hadoop/common/trunk/hadoop-tools/hadoop-sls/src/main/java/org/apache/hadoop/yarn/sls/appmaster/AMSimulator.java
URL: http://svn.apache.org/viewvc/hadoop/common/trunk/hadoop-tools/hadoop-sls/src/main/java/org/apache/hadoop/yarn/sls/appmaster/AMSimulator.java?rev=1613547&r1=1613546&r2=1613547&view=diff
==============================================================================
--- hadoop/common/trunk/hadoop-tools/hadoop-sls/src/main/java/org/apache/hadoop/yarn/sls/appmaster/AMSimulator.java (original)
+++ hadoop/common/trunk/hadoop-tools/hadoop-sls/src/main/java/org/apache/hadoop/yarn/sls/appmaster/AMSimulator.java Sat Jul 26 01:29:25 2014
@@ -63,6 +63,8 @@ import org.apache.hadoop.yarn.security.A
import org.apache.hadoop.yarn.server.resourcemanager.ResourceManager;
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp;
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppState;
+import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttempt;
+import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttemptState;
import org.apache.hadoop.yarn.util.Records;
import org.apache.log4j.Logger;
@@ -133,8 +135,7 @@ public abstract class AMSimulator extend
* register with RM
*/
@Override
- public void firstStep()
- throws YarnException, IOException, InterruptedException {
+ public void firstStep() throws Exception {
simulateStartTimeMS = System.currentTimeMillis() -
SLSRunner.getRunner().getStartTimeMS();
@@ -149,8 +150,7 @@ public abstract class AMSimulator extend
}
@Override
- public void middleStep()
- throws InterruptedException, YarnException, IOException {
+ public void middleStep() throws Exception {
// process responses in the queue
processResponseQueue();
@@ -162,7 +162,7 @@ public abstract class AMSimulator extend
}
@Override
- public void lastStep() {
+ public void lastStep() throws Exception {
LOG.info(MessageFormat.format("Application {0} is shutting down.", appId));
// unregister tracking
if (isTracked) {
@@ -173,26 +173,19 @@ public abstract class AMSimulator extend
.newRecordInstance(FinishApplicationMasterRequest.class);
finishAMRequest.setFinalApplicationStatus(FinalApplicationStatus.SUCCEEDED);
- try {
- UserGroupInformation ugi =
- UserGroupInformation.createRemoteUser(appAttemptId.toString());
- Token<AMRMTokenIdentifier> token =
- rm.getRMContext().getRMApps().get(appAttemptId.getApplicationId())
- .getRMAppAttempt(appAttemptId).getAMRMToken();
- ugi.addTokenIdentifier(token.decodeIdentifier());
- ugi.doAs(new PrivilegedExceptionAction<Object>() {
- @Override
- public Object run() throws Exception {
- rm.getApplicationMasterService()
- .finishApplicationMaster(finishAMRequest);
- return null;
- }
- });
- } catch (IOException e) {
- e.printStackTrace();
- } catch (InterruptedException e) {
- e.printStackTrace();
- }
+ UserGroupInformation ugi =
+ UserGroupInformation.createRemoteUser(appAttemptId.toString());
+ Token<AMRMTokenIdentifier> token = rm.getRMContext().getRMApps().get(appId)
+ .getRMAppAttempt(appAttemptId).getAMRMToken();
+ ugi.addTokenIdentifier(token.decodeIdentifier());
+ ugi.doAs(new PrivilegedExceptionAction<Object>() {
+ @Override
+ public Object run() throws Exception {
+ rm.getApplicationMasterService()
+ .finishApplicationMaster(finishAMRequest);
+ return null;
+ }
+ });
simulateFinishTimeMS = System.currentTimeMillis() -
SLSRunner.getRunner().getStartTimeMS();
@@ -230,11 +223,9 @@ public abstract class AMSimulator extend
return createAllocateRequest(ask, new ArrayList<ContainerId>());
}
- protected abstract void processResponseQueue()
- throws InterruptedException, YarnException, IOException;
+ protected abstract void processResponseQueue() throws Exception;
- protected abstract void sendContainerRequest()
- throws YarnException, IOException, InterruptedException;
+ protected abstract void sendContainerRequest() throws Exception;
protected abstract void checkStop();
@@ -280,11 +271,18 @@ public abstract class AMSimulator extend
// waiting until application ACCEPTED
RMApp app = rm.getRMContext().getRMApps().get(appId);
while(app.getState() != RMAppState.ACCEPTED) {
- Thread.sleep(50);
+ Thread.sleep(10);
}
- appAttemptId = rm.getRMContext().getRMApps().get(appId)
- .getCurrentAppAttempt().getAppAttemptId();
+ // Waiting until application attempt reach LAUNCHED
+ // "Unmanaged AM must register after AM attempt reaches LAUNCHED state"
+ this.appAttemptId = rm.getRMContext().getRMApps().get(appId)
+ .getCurrentAppAttempt().getAppAttemptId();
+ RMAppAttempt rmAppAttempt = rm.getRMContext().getRMApps().get(appId)
+ .getCurrentAppAttempt();
+ while (rmAppAttempt.getAppAttemptState() != RMAppAttemptState.LAUNCHED) {
+ Thread.sleep(10);
+ }
}
private void registerAM()
@@ -297,10 +295,9 @@ public abstract class AMSimulator extend
amRegisterRequest.setTrackingUrl("localhost:1000");
UserGroupInformation ugi =
- UserGroupInformation.createRemoteUser(appAttemptId.toString());
- Token<AMRMTokenIdentifier> token =
- rm.getRMContext().getRMApps().get(appAttemptId.getApplicationId())
- .getRMAppAttempt(appAttemptId).getAMRMToken();
+ UserGroupInformation.createRemoteUser(appAttemptId.toString());
+ Token<AMRMTokenIdentifier> token = rm.getRMContext().getRMApps().get(appId)
+ .getRMAppAttempt(appAttemptId).getAMRMToken();
ugi.addTokenIdentifier(token.decodeIdentifier());
ugi.doAs(
Modified: hadoop/common/trunk/hadoop-tools/hadoop-sls/src/main/java/org/apache/hadoop/yarn/sls/appmaster/MRAMSimulator.java
URL: http://svn.apache.org/viewvc/hadoop/common/trunk/hadoop-tools/hadoop-sls/src/main/java/org/apache/hadoop/yarn/sls/appmaster/MRAMSimulator.java?rev=1613547&r1=1613546&r2=1613547&view=diff
==============================================================================
--- hadoop/common/trunk/hadoop-tools/hadoop-sls/src/main/java/org/apache/hadoop/yarn/sls/appmaster/MRAMSimulator.java (original)
+++ hadoop/common/trunk/hadoop-tools/hadoop-sls/src/main/java/org/apache/hadoop/yarn/sls/appmaster/MRAMSimulator.java Sat Jul 26 01:29:25 2014
@@ -145,8 +145,7 @@ public class MRAMSimulator extends AMSim
}
@Override
- public void firstStep()
- throws YarnException, IOException, InterruptedException {
+ public void firstStep() throws Exception {
super.firstStep();
requestAMContainer();
@@ -390,7 +389,7 @@ public class MRAMSimulator extends AMSim
}
@Override
- public void lastStep() {
+ public void lastStep() throws Exception {
super.lastStep();
// clear data structures
Modified: hadoop/common/trunk/hadoop-tools/hadoop-sls/src/main/java/org/apache/hadoop/yarn/sls/nodemanager/NMSimulator.java
URL: http://svn.apache.org/viewvc/hadoop/common/trunk/hadoop-tools/hadoop-sls/src/main/java/org/apache/hadoop/yarn/sls/nodemanager/NMSimulator.java?rev=1613547&r1=1613546&r2=1613547&view=diff
==============================================================================
--- hadoop/common/trunk/hadoop-tools/hadoop-sls/src/main/java/org/apache/hadoop/yarn/sls/nodemanager/NMSimulator.java (original)
+++ hadoop/common/trunk/hadoop-tools/hadoop-sls/src/main/java/org/apache/hadoop/yarn/sls/nodemanager/NMSimulator.java Sat Jul 26 01:29:25 2014
@@ -27,6 +27,7 @@ import java.util.Map;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.DelayQueue;
+import com.google.common.annotations.VisibleForTesting;
import org.apache.hadoop.classification.InterfaceAudience.Private;
import org.apache.hadoop.classification.InterfaceStability.Unstable;
import org.apache.hadoop.yarn.api.records.ApplicationId;
@@ -107,12 +108,12 @@ public class NMSimulator extends TaskRun
}
@Override
- public void firstStep() throws YarnException, IOException {
+ public void firstStep() {
// do nothing
}
@Override
- public void middleStep() {
+ public void middleStep() throws Exception {
// we check the lifetime for each running containers
ContainerSimulator cs = null;
synchronized(completedContainerList) {
@@ -136,37 +137,31 @@ public class NMSimulator extends TaskRun
ns.setResponseId(RESPONSE_ID ++);
ns.setNodeHealthStatus(NodeHealthStatus.newInstance(true, "", 0));
beatRequest.setNodeStatus(ns);
- try {
- NodeHeartbeatResponse beatResponse =
- rm.getResourceTrackerService().nodeHeartbeat(beatRequest);
- if (! beatResponse.getContainersToCleanup().isEmpty()) {
- // remove from queue
- synchronized(releasedContainerList) {
- for (ContainerId containerId : beatResponse.getContainersToCleanup()){
- if (amContainerList.contains(containerId)) {
- // AM container (not killed?, only release)
- synchronized(amContainerList) {
- amContainerList.remove(containerId);
- }
- LOG.debug(MessageFormat.format("NodeManager {0} releases " +
- "an AM ({1}).", node.getNodeID(), containerId));
- } else {
- cs = runningContainers.remove(containerId);
- containerQueue.remove(cs);
- releasedContainerList.add(containerId);
- LOG.debug(MessageFormat.format("NodeManager {0} releases a " +
- "container ({1}).", node.getNodeID(), containerId));
+ NodeHeartbeatResponse beatResponse =
+ rm.getResourceTrackerService().nodeHeartbeat(beatRequest);
+ if (! beatResponse.getContainersToCleanup().isEmpty()) {
+ // remove from queue
+ synchronized(releasedContainerList) {
+ for (ContainerId containerId : beatResponse.getContainersToCleanup()){
+ if (amContainerList.contains(containerId)) {
+ // AM container (not killed?, only release)
+ synchronized(amContainerList) {
+ amContainerList.remove(containerId);
}
+ LOG.debug(MessageFormat.format("NodeManager {0} releases " +
+ "an AM ({1}).", node.getNodeID(), containerId));
+ } else {
+ cs = runningContainers.remove(containerId);
+ containerQueue.remove(cs);
+ releasedContainerList.add(containerId);
+ LOG.debug(MessageFormat.format("NodeManager {0} releases a " +
+ "container ({1}).", node.getNodeID(), containerId));
}
}
}
- if (beatResponse.getNodeAction() == NodeAction.SHUTDOWN) {
- lastStep();
- }
- } catch (YarnException e) {
- e.printStackTrace();
- } catch (IOException e) {
- e.printStackTrace();
+ }
+ if (beatResponse.getNodeAction() == NodeAction.SHUTDOWN) {
+ lastStep();
}
}
@@ -262,4 +257,19 @@ public class NMSimulator extends TaskRun
completedContainerList.add(containerId);
}
}
+
+ @VisibleForTesting
+ Map<ContainerId, ContainerSimulator> getRunningContainers() {
+ return runningContainers;
+ }
+
+ @VisibleForTesting
+ List<ContainerId> getAMContainers() {
+ return amContainerList;
+ }
+
+ @VisibleForTesting
+ List<ContainerId> getCompletedContainers() {
+ return completedContainerList;
+ }
}
Modified: hadoop/common/trunk/hadoop-tools/hadoop-sls/src/main/java/org/apache/hadoop/yarn/sls/scheduler/ResourceSchedulerWrapper.java
URL: http://svn.apache.org/viewvc/hadoop/common/trunk/hadoop-tools/hadoop-sls/src/main/java/org/apache/hadoop/yarn/sls/scheduler/ResourceSchedulerWrapper.java?rev=1613547&r1=1613546&r2=1613547&view=diff
==============================================================================
--- hadoop/common/trunk/hadoop-tools/hadoop-sls/src/main/java/org/apache/hadoop/yarn/sls/scheduler/ResourceSchedulerWrapper.java (original)
+++ hadoop/common/trunk/hadoop-tools/hadoop-sls/src/main/java/org/apache/hadoop/yarn/sls/scheduler/ResourceSchedulerWrapper.java Sat Jul 26 01:29:25 2014
@@ -67,6 +67,7 @@ import org.apache.hadoop.yarn.server.res
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.QueueMetrics;
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.ResourceScheduler;
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerAppReport;
+import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerApplication;
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerApplicationAttempt;
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerNode;
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerNodeReport;
@@ -101,7 +102,6 @@ final public class ResourceSchedulerWrap
private static final String EOL = System.getProperty("line.separator");
private static final int SAMPLING_SIZE = 60;
private ScheduledExecutorService pool;
- private RMContext rmContext;
// counters for scheduler allocate/handle operations
private Counter schedulerAllocateCounter;
private Counter schedulerHandleCounter;
@@ -576,7 +576,7 @@ final public class ResourceSchedulerWrap
new Gauge<Integer>() {
@Override
public Integer getValue() {
- if(scheduler == null || scheduler.getRootQueueMetrics() == null) {
+ if (scheduler == null || scheduler.getRootQueueMetrics() == null) {
return 0;
} else {
return scheduler.getRootQueueMetrics().getAppsRunning();
@@ -723,17 +723,18 @@ final public class ResourceSchedulerWrap
public void addAMRuntime(ApplicationId appId,
long traceStartTimeMS, long traceEndTimeMS,
long simulateStartTimeMS, long simulateEndTimeMS) {
-
- try {
- // write job runtime information
- StringBuilder sb = new StringBuilder();
- sb.append(appId).append(",").append(traceStartTimeMS).append(",")
- .append(traceEndTimeMS).append(",").append(simulateStartTimeMS)
- .append(",").append(simulateEndTimeMS);
- jobRuntimeLogBW.write(sb.toString() + EOL);
- jobRuntimeLogBW.flush();
- } catch (IOException e) {
- e.printStackTrace();
+ if (metricsON) {
+ try {
+ // write job runtime information
+ StringBuilder sb = new StringBuilder();
+ sb.append(appId).append(",").append(traceStartTimeMS).append(",")
+ .append(traceEndTimeMS).append(",").append(simulateStartTimeMS)
+ .append(",").append(simulateEndTimeMS);
+ jobRuntimeLogBW.write(sb.toString() + EOL);
+ jobRuntimeLogBW.flush();
+ } catch (IOException e) {
+ e.printStackTrace();
+ }
}
}
@@ -919,4 +920,17 @@ final public class ResourceSchedulerWrap
public Resource getClusterResource() {
return null;
}
+
+ @Override
+ public synchronized List<Container> getTransferredContainers(
+ ApplicationAttemptId currentAttempt) {
+ return new ArrayList<Container>();
+ }
+
+ @Override
+ public Map<ApplicationId, SchedulerApplication<SchedulerApplicationAttempt>>
+ getSchedulerApplications() {
+ return new HashMap<ApplicationId,
+ SchedulerApplication<SchedulerApplicationAttempt>>();
+ }
}
Modified: hadoop/common/trunk/hadoop-tools/hadoop-sls/src/main/java/org/apache/hadoop/yarn/sls/scheduler/SLSCapacityScheduler.java
URL: http://svn.apache.org/viewvc/hadoop/common/trunk/hadoop-tools/hadoop-sls/src/main/java/org/apache/hadoop/yarn/sls/scheduler/SLSCapacityScheduler.java?rev=1613547&r1=1613546&r2=1613547&view=diff
==============================================================================
--- hadoop/common/trunk/hadoop-tools/hadoop-sls/src/main/java/org/apache/hadoop/yarn/sls/scheduler/SLSCapacityScheduler.java (original)
+++ hadoop/common/trunk/hadoop-tools/hadoop-sls/src/main/java/org/apache/hadoop/yarn/sls/scheduler/SLSCapacityScheduler.java Sat Jul 26 01:29:25 2014
@@ -729,16 +729,18 @@ public class SLSCapacityScheduler extend
long traceStartTimeMS, long traceEndTimeMS,
long simulateStartTimeMS, long simulateEndTimeMS) {
- try {
- // write job runtime information
- StringBuilder sb = new StringBuilder();
- sb.append(appId).append(",").append(traceStartTimeMS).append(",")
- .append(traceEndTimeMS).append(",").append(simulateStartTimeMS)
- .append(",").append(simulateEndTimeMS);
- jobRuntimeLogBW.write(sb.toString() + EOL);
- jobRuntimeLogBW.flush();
- } catch (IOException e) {
- e.printStackTrace();
+ if (metricsON) {
+ try {
+ // write job runtime information
+ StringBuilder sb = new StringBuilder();
+ sb.append(appId).append(",").append(traceStartTimeMS).append(",")
+ .append(traceEndTimeMS).append(",").append(simulateStartTimeMS)
+ .append(",").append(simulateEndTimeMS);
+ jobRuntimeLogBW.write(sb.toString() + EOL);
+ jobRuntimeLogBW.flush();
+ } catch (IOException e) {
+ e.printStackTrace();
+ }
}
}
Modified: hadoop/common/trunk/hadoop-tools/hadoop-sls/src/main/java/org/apache/hadoop/yarn/sls/scheduler/TaskRunner.java
URL: http://svn.apache.org/viewvc/hadoop/common/trunk/hadoop-tools/hadoop-sls/src/main/java/org/apache/hadoop/yarn/sls/scheduler/TaskRunner.java?rev=1613547&r1=1613546&r2=1613547&view=diff
==============================================================================
--- hadoop/common/trunk/hadoop-tools/hadoop-sls/src/main/java/org/apache/hadoop/yarn/sls/scheduler/TaskRunner.java (original)
+++ hadoop/common/trunk/hadoop-tools/hadoop-sls/src/main/java/org/apache/hadoop/yarn/sls/scheduler/TaskRunner.java Sat Jul 26 01:29:25 2014
@@ -99,12 +99,10 @@ public class TaskRunner {
} else {
lastStep();
}
- } catch (YarnException e) {
- e.printStackTrace();
- } catch (IOException e) {
- e.printStackTrace();
- } catch (InterruptedException e) {
+ } catch (Exception e) {
e.printStackTrace();
+ Thread.getDefaultUncaughtExceptionHandler()
+ .uncaughtException(Thread.currentThread(), e);
}
}
@@ -124,13 +122,11 @@ public class TaskRunner {
}
- public abstract void firstStep()
- throws YarnException, IOException, InterruptedException;
+ public abstract void firstStep() throws Exception;
- public abstract void middleStep()
- throws YarnException, InterruptedException, IOException;
+ public abstract void middleStep() throws Exception;
- public abstract void lastStep() throws YarnException;
+ public abstract void lastStep() throws Exception;
public void setEndTime(long et) {
endTime = et;
Modified: hadoop/common/trunk/hadoop-tools/hadoop-sls/src/test/java/org/apache/hadoop/yarn/sls/TestSLSRunner.java
URL: http://svn.apache.org/viewvc/hadoop/common/trunk/hadoop-tools/hadoop-sls/src/test/java/org/apache/hadoop/yarn/sls/TestSLSRunner.java?rev=1613547&r1=1613546&r2=1613547&view=diff
==============================================================================
--- hadoop/common/trunk/hadoop-tools/hadoop-sls/src/test/java/org/apache/hadoop/yarn/sls/TestSLSRunner.java (original)
+++ hadoop/common/trunk/hadoop-tools/hadoop-sls/src/test/java/org/apache/hadoop/yarn/sls/TestSLSRunner.java Sat Jul 26 01:29:25 2014
@@ -18,10 +18,13 @@
package org.apache.hadoop.yarn.sls;
-import org.apache.commons.io.FileUtils;
+import org.junit.Assert;
import org.junit.Test;
import java.io.File;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
import java.util.UUID;
public class TestSLSRunner {
@@ -30,6 +33,15 @@ public class TestSLSRunner {
@SuppressWarnings("all")
public void testSimulatorRunning() throws Exception {
File tempDir = new File("target", UUID.randomUUID().toString());
+ final List<Throwable> exceptionList =
+ Collections.synchronizedList(new ArrayList<Throwable>());
+
+ Thread.setDefaultUncaughtExceptionHandler(new Thread.UncaughtExceptionHandler() {
+ @Override
+ public void uncaughtException(Thread t, Throwable e) {
+ exceptionList.add(e);
+ }
+ });
// start the simulator
File slsOutputDir = new File(tempDir.getAbsolutePath() + "/slsoutput/");
@@ -38,8 +50,20 @@ public class TestSLSRunner {
"-output", slsOutputDir.getAbsolutePath()};
SLSRunner.main(args);
- // wait for 45 seconds before stop
- Thread.sleep(45 * 1000);
+ // wait for 20 seconds before stop
+ int count = 20;
+ while (count >= 0) {
+ Thread.sleep(1000);
+
+ if (! exceptionList.isEmpty()) {
+ SLSRunner.getRunner().stop();
+ Assert.fail("TestSLSRunner catched exception from child thread " +
+ "(TaskRunner.Task): " + exceptionList.get(0).getMessage());
+ break;
+ }
+ count--;
+ }
+
SLSRunner.getRunner().stop();
}