You are viewing a plain text version of this content. The canonical link for it is here.
Posted to yarn-commits@hadoop.apache.org by su...@apache.org on 2012/11/01 16:11:31 UTC
svn commit: r1404629 - in
/hadoop/common/branches/HDFS-2802/hadoop-yarn-project: CHANGES.txt
hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ApplicationMasterService.java
Author: suresh
Date: Thu Nov 1 15:11:27 2012
New Revision: 1404629
URL: http://svn.apache.org/viewvc?rev=1404629&view=rev
Log:
Merging trunk to HDFS-2802 branch.
Modified:
hadoop/common/branches/HDFS-2802/hadoop-yarn-project/CHANGES.txt
hadoop/common/branches/HDFS-2802/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ApplicationMasterService.java
Modified: hadoop/common/branches/HDFS-2802/hadoop-yarn-project/CHANGES.txt
URL: http://svn.apache.org/viewvc/hadoop/common/branches/HDFS-2802/hadoop-yarn-project/CHANGES.txt?rev=1404629&r1=1404628&r2=1404629&view=diff
==============================================================================
--- hadoop/common/branches/HDFS-2802/hadoop-yarn-project/CHANGES.txt (original)
+++ hadoop/common/branches/HDFS-2802/hadoop-yarn-project/CHANGES.txt Thu Nov 1 15:11:27 2012
@@ -196,6 +196,9 @@ Release 0.23.5 - UNRELEASED
YARN-166. capacity scheduler doesn't allow capacity < 1.0 (tgraves via
bobby)
+ YARN-189. Fixed a deadlock between RM's ApplicationMasterService and the
+ dispatcher. (Thomas Graves via vinodkv)
+
Release 0.23.4 - UNRELEASED
INCOMPATIBLE CHANGES
Modified: hadoop/common/branches/HDFS-2802/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ApplicationMasterService.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/HDFS-2802/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ApplicationMasterService.java?rev=1404629&r1=1404628&r2=1404629&view=diff
==============================================================================
--- hadoop/common/branches/HDFS-2802/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ApplicationMasterService.java (original)
+++ hadoop/common/branches/HDFS-2802/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ApplicationMasterService.java Thu Nov 1 15:11:27 2012
@@ -265,10 +265,10 @@ public class ApplicationMasterService ex
// Oh damn! Sending reboot isn't enough. RM state is corrupted. TODO:
allocateResponse.setAMResponse(reboot);
return allocateResponse;
- }
-
+ }
+
// Allow only one thread in AM to do heartbeat at a time.
- synchronized (lastResponse) { // BUG TODO: Locking order is screwed.
+ synchronized (lastResponse) {
// Send the status update to the appAttempt.
this.rmContext.getDispatcher().getEventHandler().handle(
@@ -282,7 +282,8 @@ public class ApplicationMasterService ex
Allocation allocation =
this.rScheduler.allocate(appAttemptId, ask, release);
- RMApp app = this.rmContext.getRMApps().get(appAttemptId.getApplicationId());
+ RMApp app = this.rmContext.getRMApps().get(
+ appAttemptId.getApplicationId());
RMAppAttempt appAttempt = app.getRMAppAttempt(appAttemptId);
AMResponse response = recordFactory.newRecordInstance(AMResponse.class);
@@ -316,7 +317,18 @@ public class ApplicationMasterService ex
.pullJustFinishedContainers());
response.setResponseId(lastResponse.getResponseId() + 1);
response.setAvailableResources(allocation.getResourceLimit());
- responseMap.put(appAttemptId, response);
+
+ AMResponse oldResponse = responseMap.put(appAttemptId, response);
+ if (oldResponse == null) {
+ // appAttempt got unregistered, remove it back out
+ responseMap.remove(appAttemptId);
+ String message = "App Attempt removed from the cache during allocate"
+ + appAttemptId;
+ LOG.error(message);
+ allocateResponse.setAMResponse(reboot);
+ return allocateResponse;
+ }
+
allocateResponse.setAMResponse(response);
allocateResponse.setNumClusterNodes(this.rScheduler.getNumClusterNodes());
return allocateResponse;
@@ -331,12 +343,7 @@ public class ApplicationMasterService ex
}
public void unregisterAttempt(ApplicationAttemptId attemptId) {
- AMResponse lastResponse = responseMap.get(attemptId);
- if (lastResponse != null) {
- synchronized (lastResponse) {
- responseMap.remove(attemptId);
- }
- }
+ responseMap.remove(attemptId);
}
public void refreshServiceAcls(Configuration configuration,