You are viewing a plain text version of this content. The canonical link for it is here.
Posted to mapreduce-commits@hadoop.apache.org by vi...@apache.org on 2011/04/05 10:40:07 UTC
svn commit: r1088923 [1/2] - in /hadoop/mapreduce/branches/MR-279:
mr-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/
mr-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/
src/contrib/fairsc...
Author: vinodkv
Date: Tue Apr 5 08:40:05 2011
New Revision: 1088923
URL: http://svn.apache.org/viewvc?rev=1088923&view=rev
Log:
Implement health-checks for the node - server side(ResourceManager) changes. Contributed by Vinod Kumar Vavilapalli.
Added:
hadoop/mapreduce/branches/MR-279/src/java/org/apache/hadoop/mapred/NodeHealthStatus.java
hadoop/mapreduce/branches/MR-279/yarn/yarn-server/yarn-server-common/src/main/java/org/apache/hadoop/Clock.java
hadoop/mapreduce/branches/MR-279/yarn/yarn-server/yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/api/records/NodeHealthStatus.java
hadoop/mapreduce/branches/MR-279/yarn/yarn-server/yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/api/records/impl/pb/NodeHealthStatusPBImpl.java
hadoop/mapreduce/branches/MR-279/yarn/yarn-server/yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/NodeManagerImpl.java
Removed:
hadoop/mapreduce/branches/MR-279/mr-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/Clock.java
hadoop/mapreduce/branches/MR-279/yarn/yarn-server/yarn-server-common/src/main/java/org/apache/hadoop/NodeHealthStatus.java
Modified:
hadoop/mapreduce/branches/MR-279/mr-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/TestRMContainerAllocator.java
hadoop/mapreduce/branches/MR-279/src/contrib/fairscheduler/src/java/org/apache/hadoop/mapred/FairScheduler.java
hadoop/mapreduce/branches/MR-279/src/java/org/apache/hadoop/mapred/JobTracker.java
hadoop/mapreduce/branches/MR-279/src/java/org/apache/hadoop/mapred/TaskTrackerStatus.java
hadoop/mapreduce/branches/MR-279/src/java/org/apache/hadoop/mapred/UserLogCleaner.java
hadoop/mapreduce/branches/MR-279/src/test/mapred/org/apache/hadoop/mapred/FakeObjectUtilities.java
hadoop/mapreduce/branches/MR-279/src/test/mapred/org/apache/hadoop/mapred/MiniMRCluster.java
hadoop/mapreduce/branches/MR-279/src/test/mapred/org/apache/hadoop/mapred/TestJobTrackerStart.java
hadoop/mapreduce/branches/MR-279/src/test/mapred/org/apache/hadoop/mapred/TestSetupTaskScheduling.java
hadoop/mapreduce/branches/MR-279/src/test/mapred/org/apache/hadoop/mapred/TestTaskTrackerBlacklisting.java
hadoop/mapreduce/branches/MR-279/src/test/mapred/org/apache/hadoop/mapred/TestTrackerReservation.java
hadoop/mapreduce/branches/MR-279/src/test/mapred/org/apache/hadoop/mapred/UtilsForTests.java
hadoop/mapreduce/branches/MR-279/yarn/yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/impl/pb/ApplicationIdPBImpl.java
hadoop/mapreduce/branches/MR-279/yarn/yarn-server/yarn-server-common/src/main/avro/ResourceTracker.genavro
hadoop/mapreduce/branches/MR-279/yarn/yarn-server/yarn-server-common/src/main/java/org/apache/hadoop/NodeHealthCheckerService.java
hadoop/mapreduce/branches/MR-279/yarn/yarn-server/yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/api/records/NodeStatus.java
hadoop/mapreduce/branches/MR-279/yarn/yarn-server/yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/api/records/impl/pb/NodeStatusPBImpl.java
hadoop/mapreduce/branches/MR-279/yarn/yarn-server/yarn-server-common/src/main/proto/yarn_server_common_protos.proto
hadoop/mapreduce/branches/MR-279/yarn/yarn-server/yarn-server-common/src/main/resources/yarn-default.xml
hadoop/mapreduce/branches/MR-279/yarn/yarn-server/yarn-server-common/src/test/java/org/apache/hadoop/TestNodeHealthService.java
hadoop/mapreduce/branches/MR-279/yarn/yarn-server/yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/Context.java
hadoop/mapreduce/branches/MR-279/yarn/yarn-server/yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeManager.java
hadoop/mapreduce/branches/MR-279/yarn/yarn-server/yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeStatusUpdaterImpl.java
hadoop/mapreduce/branches/MR-279/yarn/yarn-server/yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/resourcetracker/NodeInfo.java
hadoop/mapreduce/branches/MR-279/yarn/yarn-server/yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/resourcetracker/NodeInfoTracker.java
hadoop/mapreduce/branches/MR-279/yarn/yarn-server/yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/resourcetracker/NodeStatus.java
hadoop/mapreduce/branches/MR-279/yarn/yarn-server/yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/resourcetracker/RMResourceTrackerImpl.java
hadoop/mapreduce/branches/MR-279/yarn/yarn-server/yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/NodeManager.java
hadoop/mapreduce/branches/MR-279/yarn/yarn-server/yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/ResourceListener.java
hadoop/mapreduce/branches/MR-279/yarn/yarn-server/yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/CapacityScheduler.java
hadoop/mapreduce/branches/MR-279/yarn/yarn-server/yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fifo/FifoScheduler.java
hadoop/mapreduce/branches/MR-279/yarn/yarn-server/yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/NodesPage.java
hadoop/mapreduce/branches/MR-279/yarn/yarn-server/yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/MockNodes.java
hadoop/mapreduce/branches/MR-279/yarn/yarn-server/yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/applicationsmanager/TestAMRestart.java
hadoop/mapreduce/branches/MR-279/yarn/yarn-server/yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/applicationsmanager/TestApplicationCleanup.java
hadoop/mapreduce/branches/MR-279/yarn/yarn-server/yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/applicationsmanager/TestSchedulerNegotiator.java
hadoop/mapreduce/branches/MR-279/yarn/yarn-server/yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/resourcetracker/TestNMExpiry.java
hadoop/mapreduce/branches/MR-279/yarn/yarn-server/yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/resourcetracker/TestRMNMRPCResponseId.java
Modified: hadoop/mapreduce/branches/MR-279/mr-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/TestRMContainerAllocator.java
URL: http://svn.apache.org/viewvc/hadoop/mapreduce/branches/MR-279/mr-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/TestRMContainerAllocator.java?rev=1088923&r1=1088922&r2=1088923&view=diff
==============================================================================
--- hadoop/mapreduce/branches/MR-279/mr-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/TestRMContainerAllocator.java (original)
+++ hadoop/mapreduce/branches/MR-279/mr-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/TestRMContainerAllocator.java Tue Apr 5 08:40:05 2011
@@ -62,6 +62,8 @@ import org.apache.hadoop.yarn.ipc.RPCUti
import org.apache.hadoop.yarn.server.api.records.NodeId;
import org.apache.hadoop.yarn.server.resourcemanager.resourcetracker.NodeInfo;
import org.apache.hadoop.yarn.server.resourcemanager.resourcetracker.RMResourceTrackerImpl;
+import org.apache.hadoop.yarn.server.resourcemanager.scheduler.NodeManager;
+import org.apache.hadoop.yarn.server.resourcemanager.scheduler.NodeManagerImpl;
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.ResourceScheduler;
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.fifo.FifoScheduler;
import org.apache.hadoop.yarn.server.security.ContainerTokenSecretManager;
@@ -206,8 +208,10 @@ public class TestRMContainerAllocator {
nodeId.setId(0);
Resource resource = recordFactory.newRecordInstance(Resource.class);
resource.setMemory(memory);
- NodeInfo nodeManager = scheduler.addNode(nodeId, nodeName,
- RMResourceTrackerImpl.resolve(nodeName), resource); // Node registration
+ NodeManager nodeManager = new NodeManagerImpl(nodeId, nodeName,
+ RMResourceTrackerImpl.resolve(nodeName), resource
+ );
+ scheduler.addNode(nodeManager); // Node registration
return nodeManager;
}
Modified: hadoop/mapreduce/branches/MR-279/src/contrib/fairscheduler/src/java/org/apache/hadoop/mapred/FairScheduler.java
URL: http://svn.apache.org/viewvc/hadoop/mapreduce/branches/MR-279/src/contrib/fairscheduler/src/java/org/apache/hadoop/mapred/FairScheduler.java?rev=1088923&r1=1088922&r2=1088923&view=diff
==============================================================================
--- hadoop/mapreduce/branches/MR-279/src/contrib/fairscheduler/src/java/org/apache/hadoop/mapred/FairScheduler.java (original)
+++ hadoop/mapreduce/branches/MR-279/src/contrib/fairscheduler/src/java/org/apache/hadoop/mapred/FairScheduler.java Tue Apr 5 08:40:05 2011
@@ -31,6 +31,7 @@ import java.util.Set;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.Clock;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.http.HttpServer;
import org.apache.hadoop.mapreduce.TaskType;
Modified: hadoop/mapreduce/branches/MR-279/src/java/org/apache/hadoop/mapred/JobTracker.java
URL: http://svn.apache.org/viewvc/hadoop/mapreduce/branches/MR-279/src/java/org/apache/hadoop/mapred/JobTracker.java?rev=1088923&r1=1088922&r2=1088923&view=diff
==============================================================================
--- hadoop/mapreduce/branches/MR-279/src/java/org/apache/hadoop/mapred/JobTracker.java (original)
+++ hadoop/mapreduce/branches/MR-279/src/java/org/apache/hadoop/mapred/JobTracker.java Tue Apr 5 08:40:05 2011
@@ -55,6 +55,7 @@ import javax.security.auth.login.LoginEx
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.Clock;
import org.apache.hadoop.NodeHealthStatus;
import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.classification.InterfaceStability;
Added: hadoop/mapreduce/branches/MR-279/src/java/org/apache/hadoop/mapred/NodeHealthStatus.java
URL: http://svn.apache.org/viewvc/hadoop/mapreduce/branches/MR-279/src/java/org/apache/hadoop/mapred/NodeHealthStatus.java?rev=1088923&view=auto
==============================================================================
--- hadoop/mapreduce/branches/MR-279/src/java/org/apache/hadoop/mapred/NodeHealthStatus.java (added)
+++ hadoop/mapreduce/branches/MR-279/src/java/org/apache/hadoop/mapred/NodeHealthStatus.java Tue Apr 5 08:40:05 2011
@@ -0,0 +1,109 @@
+package org.apache.hadoop.mapred;
+
+import java.io.DataInput;
+import java.io.DataOutput;
+import java.io.IOException;
+
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.io.Writable;
+
+/**
+ * Static class which encapsulates the Node health
+ * related fields.
+ *
+ */
+public class NodeHealthStatus implements
+ org.apache.hadoop.yarn.api.records.NodeHealthStatus, Writable {
+
+ private boolean isNodeHealthy;
+ private String healthReport;
+ private long lastHealthReportTime;
+
+ public NodeHealthStatus(boolean isNodeHealthy, String healthReport,
+ long lastReported) {
+ this.isNodeHealthy = isNodeHealthy;
+ this.healthReport = healthReport;
+ this.lastHealthReportTime = lastReported;
+ }
+
+ public NodeHealthStatus() {
+ this.isNodeHealthy = true;
+ this.healthReport = "";
+ this.lastHealthReportTime = System.currentTimeMillis();
+ }
+
+ /**
+ * Sets whether or not a task tracker is healthy or not, based on the
+ * output from the node health script.
+ *
+ * @param isNodeHealthy
+ */
+ public void setNodeHealthy(boolean isNodeHealthy) {
+ this.isNodeHealthy = isNodeHealthy;
+ }
+
+ /**
+ * Returns if node is healthy or not based on result from node health
+ * script.
+ *
+ * @return true if the node is healthy.
+ */
+ public boolean isNodeHealthy() {
+ return isNodeHealthy;
+ }
+
+ /**
+ * Sets the health report based on the output from the health script.
+ *
+ * @param healthReport
+ * String listing cause of failure.
+ */
+ public void setHealthReport(String healthReport) {
+ this.healthReport = healthReport;
+ }
+
+ /**
+ * Returns the health report of the node if any, The health report is
+ * only populated when the node is not healthy.
+ *
+ * @return health report of the node if any
+ */
+ public String getHealthReport() {
+ return healthReport.toString();
+ }
+
+ /**
+ * Sets when the TT got its health information last
+ * from node health monitoring service.
+ *
+ * @param lastReported last reported time by node
+ * health script
+ */
+ public void setLastReported(long lastReported) {
+ this.lastHealthReportTime = lastReported;
+ }
+
+ /**
+ * Gets time of most recent node health update.
+ *
+ * @return time stamp of most recent health update.
+ */
+ public long getLastReported() {
+ return lastHealthReportTime;
+ }
+
+ @Override
+ public void readFields(DataInput in) throws IOException {
+ isNodeHealthy = in.readBoolean();
+ healthReport = Text.readString(in);
+ lastHealthReportTime = in.readLong();
+ }
+
+ @Override
+ public void write(DataOutput out) throws IOException {
+ out.writeBoolean(isNodeHealthy);
+ Text.writeString(out, healthReport.toString());
+ out.writeLong(lastHealthReportTime);
+ }
+
+}
\ No newline at end of file
Modified: hadoop/mapreduce/branches/MR-279/src/java/org/apache/hadoop/mapred/TaskTrackerStatus.java
URL: http://svn.apache.org/viewvc/hadoop/mapreduce/branches/MR-279/src/java/org/apache/hadoop/mapred/TaskTrackerStatus.java?rev=1088923&r1=1088922&r2=1088923&view=diff
==============================================================================
--- hadoop/mapreduce/branches/MR-279/src/java/org/apache/hadoop/mapred/TaskTrackerStatus.java (original)
+++ hadoop/mapreduce/branches/MR-279/src/java/org/apache/hadoop/mapred/TaskTrackerStatus.java Tue Apr 5 08:40:05 2011
@@ -19,7 +19,7 @@ package org.apache.hadoop.mapred;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
-import org.apache.hadoop.NodeHealthStatus;
+import org.apache.hadoop.mapred.NodeHealthStatus;
import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.classification.InterfaceStability;
import org.apache.hadoop.io.*;
Modified: hadoop/mapreduce/branches/MR-279/src/java/org/apache/hadoop/mapred/UserLogCleaner.java
URL: http://svn.apache.org/viewvc/hadoop/mapreduce/branches/MR-279/src/java/org/apache/hadoop/mapred/UserLogCleaner.java?rev=1088923&r1=1088922&r2=1088923&view=diff
==============================================================================
--- hadoop/mapreduce/branches/MR-279/src/java/org/apache/hadoop/mapred/UserLogCleaner.java (original)
+++ hadoop/mapreduce/branches/MR-279/src/java/org/apache/hadoop/mapred/UserLogCleaner.java Tue Apr 5 08:40:05 2011
@@ -29,6 +29,7 @@ import java.util.Map.Entry;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.Clock;
import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
Modified: hadoop/mapreduce/branches/MR-279/src/test/mapred/org/apache/hadoop/mapred/FakeObjectUtilities.java
URL: http://svn.apache.org/viewvc/hadoop/mapreduce/branches/MR-279/src/test/mapred/org/apache/hadoop/mapred/FakeObjectUtilities.java?rev=1088923&r1=1088922&r2=1088923&view=diff
==============================================================================
--- hadoop/mapreduce/branches/MR-279/src/test/mapred/org/apache/hadoop/mapred/FakeObjectUtilities.java (original)
+++ hadoop/mapreduce/branches/MR-279/src/test/mapred/org/apache/hadoop/mapred/FakeObjectUtilities.java Tue Apr 5 08:40:05 2011
@@ -26,6 +26,7 @@ import javax.security.auth.login.LoginEx
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.Clock;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.mapred.TaskStatus.Phase;
Modified: hadoop/mapreduce/branches/MR-279/src/test/mapred/org/apache/hadoop/mapred/MiniMRCluster.java
URL: http://svn.apache.org/viewvc/hadoop/mapreduce/branches/MR-279/src/test/mapred/org/apache/hadoop/mapred/MiniMRCluster.java?rev=1088923&r1=1088922&r2=1088923&view=diff
==============================================================================
--- hadoop/mapreduce/branches/MR-279/src/test/mapred/org/apache/hadoop/mapred/MiniMRCluster.java (original)
+++ hadoop/mapreduce/branches/MR-279/src/test/mapred/org/apache/hadoop/mapred/MiniMRCluster.java Tue Apr 5 08:40:05 2011
@@ -28,6 +28,7 @@ import java.util.List;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.Clock;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.mapred.JobTracker.State;
import org.apache.hadoop.mapreduce.MRConfig;
Modified: hadoop/mapreduce/branches/MR-279/src/test/mapred/org/apache/hadoop/mapred/TestJobTrackerStart.java
URL: http://svn.apache.org/viewvc/hadoop/mapreduce/branches/MR-279/src/test/mapred/org/apache/hadoop/mapred/TestJobTrackerStart.java?rev=1088923&r1=1088922&r2=1088923&view=diff
==============================================================================
--- hadoop/mapreduce/branches/MR-279/src/test/mapred/org/apache/hadoop/mapred/TestJobTrackerStart.java (original)
+++ hadoop/mapreduce/branches/MR-279/src/test/mapred/org/apache/hadoop/mapred/TestJobTrackerStart.java Tue Apr 5 08:40:05 2011
@@ -17,6 +17,7 @@
*/
package org.apache.hadoop.mapred;
+import org.apache.hadoop.Clock;
import org.apache.hadoop.mapred.UtilsForTests.FakeClock;
import junit.framework.TestCase;
Modified: hadoop/mapreduce/branches/MR-279/src/test/mapred/org/apache/hadoop/mapred/TestSetupTaskScheduling.java
URL: http://svn.apache.org/viewvc/hadoop/mapreduce/branches/MR-279/src/test/mapred/org/apache/hadoop/mapred/TestSetupTaskScheduling.java?rev=1088923&r1=1088922&r2=1088923&view=diff
==============================================================================
--- hadoop/mapreduce/branches/MR-279/src/test/mapred/org/apache/hadoop/mapred/TestSetupTaskScheduling.java (original)
+++ hadoop/mapreduce/branches/MR-279/src/test/mapred/org/apache/hadoop/mapred/TestSetupTaskScheduling.java Tue Apr 5 08:40:05 2011
@@ -21,6 +21,7 @@ import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
+import org.apache.hadoop.Clock;
import org.apache.hadoop.mapred.FakeObjectUtilities.FakeJobInProgress;
import org.apache.hadoop.mapred.FakeObjectUtilities.FakeTaskInProgress;
import org.apache.hadoop.mapred.FakeObjectUtilities.FakeJobTracker;
Modified: hadoop/mapreduce/branches/MR-279/src/test/mapred/org/apache/hadoop/mapred/TestTaskTrackerBlacklisting.java
URL: http://svn.apache.org/viewvc/hadoop/mapreduce/branches/MR-279/src/test/mapred/org/apache/hadoop/mapred/TestTaskTrackerBlacklisting.java?rev=1088923&r1=1088922&r2=1088923&view=diff
==============================================================================
--- hadoop/mapreduce/branches/MR-279/src/test/mapred/org/apache/hadoop/mapred/TestTaskTrackerBlacklisting.java (original)
+++ hadoop/mapreduce/branches/MR-279/src/test/mapred/org/apache/hadoop/mapred/TestTaskTrackerBlacklisting.java Tue Apr 5 08:40:05 2011
@@ -33,7 +33,8 @@ import junit.framework.Test;
import junit.framework.TestCase;
import junit.framework.TestSuite;
-import org.apache.hadoop.NodeHealthStatus;
+import org.apache.hadoop.Clock;
+import org.apache.hadoop.mapred.NodeHealthStatus;
import org.apache.hadoop.mapred.ClusterStatus.BlackListInfo;
import org.apache.hadoop.mapred.JobTracker.ReasonForBlackListing;
import org.apache.hadoop.mapreduce.TaskType;
Modified: hadoop/mapreduce/branches/MR-279/src/test/mapred/org/apache/hadoop/mapred/TestTrackerReservation.java
URL: http://svn.apache.org/viewvc/hadoop/mapreduce/branches/MR-279/src/test/mapred/org/apache/hadoop/mapred/TestTrackerReservation.java?rev=1088923&r1=1088922&r2=1088923&view=diff
==============================================================================
--- hadoop/mapreduce/branches/MR-279/src/test/mapred/org/apache/hadoop/mapred/TestTrackerReservation.java (original)
+++ hadoop/mapreduce/branches/MR-279/src/test/mapred/org/apache/hadoop/mapred/TestTrackerReservation.java Tue Apr 5 08:40:05 2011
@@ -22,6 +22,7 @@ import java.util.ArrayList;
import javax.security.auth.login.LoginException;
+import org.apache.hadoop.Clock;
import org.apache.hadoop.mapred.FakeObjectUtilities.FakeJobInProgress;
import org.apache.hadoop.mapreduce.ClusterMetrics;
import org.apache.hadoop.mapreduce.TaskType;
Modified: hadoop/mapreduce/branches/MR-279/src/test/mapred/org/apache/hadoop/mapred/UtilsForTests.java
URL: http://svn.apache.org/viewvc/hadoop/mapreduce/branches/MR-279/src/test/mapred/org/apache/hadoop/mapred/UtilsForTests.java?rev=1088923&r1=1088922&r2=1088923&view=diff
==============================================================================
--- hadoop/mapreduce/branches/MR-279/src/test/mapred/org/apache/hadoop/mapred/UtilsForTests.java (original)
+++ hadoop/mapreduce/branches/MR-279/src/test/mapred/org/apache/hadoop/mapred/UtilsForTests.java Tue Apr 5 08:40:05 2011
@@ -33,6 +33,7 @@ import java.util.List;
import java.util.Properties;
import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.Clock;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
Modified: hadoop/mapreduce/branches/MR-279/yarn/yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/impl/pb/ApplicationIdPBImpl.java
URL: http://svn.apache.org/viewvc/hadoop/mapreduce/branches/MR-279/yarn/yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/impl/pb/ApplicationIdPBImpl.java?rev=1088923&r1=1088922&r2=1088923&view=diff
==============================================================================
--- hadoop/mapreduce/branches/MR-279/yarn/yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/impl/pb/ApplicationIdPBImpl.java (original)
+++ hadoop/mapreduce/branches/MR-279/yarn/yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/impl/pb/ApplicationIdPBImpl.java Tue Apr 5 08:40:05 2011
@@ -23,7 +23,6 @@ public class ApplicationIdPBImpl extends
}
public ApplicationIdProto getProto() {
- maybeInitBuilder();
proto = viaProto ? proto : builder.build();
viaProto = true;
return proto;
Modified: hadoop/mapreduce/branches/MR-279/yarn/yarn-server/yarn-server-common/src/main/avro/ResourceTracker.genavro
URL: http://svn.apache.org/viewvc/hadoop/mapreduce/branches/MR-279/yarn/yarn-server/yarn-server-common/src/main/avro/ResourceTracker.genavro?rev=1088923&r1=1088922&r2=1088923&view=diff
==============================================================================
--- hadoop/mapreduce/branches/MR-279/yarn/yarn-server/yarn-server-common/src/main/avro/ResourceTracker.genavro (original)
+++ hadoop/mapreduce/branches/MR-279/yarn/yarn-server/yarn-server-common/src/main/avro/ResourceTracker.genavro Tue Apr 5 08:40:05 2011
@@ -7,15 +7,19 @@ protocol ResourceTracker {
record NodeID {
int id;
}
-
+
+ record NodeHealthStatus {
+ boolean isNodeHealthy;
+ union {string, null} healthReport;
+ long lastHealthReportTime;
+ }
+
record NodeStatus {
NodeID nodeId;
int responseId;
long lastSeen;
map<array<org.apache.hadoop.yarn.Container>> containers;
- boolean isNodeHealthy;
- union {string, null} healthReport;
- long lastHealthReport;
+ NodeHealthStatus nodeHealthStatus;
}
record RegistrationResponse {
Added: hadoop/mapreduce/branches/MR-279/yarn/yarn-server/yarn-server-common/src/main/java/org/apache/hadoop/Clock.java
URL: http://svn.apache.org/viewvc/hadoop/mapreduce/branches/MR-279/yarn/yarn-server/yarn-server-common/src/main/java/org/apache/hadoop/Clock.java?rev=1088923&view=auto
==============================================================================
--- hadoop/mapreduce/branches/MR-279/yarn/yarn-server/yarn-server-common/src/main/java/org/apache/hadoop/Clock.java (added)
+++ hadoop/mapreduce/branches/MR-279/yarn/yarn-server/yarn-server-common/src/main/java/org/apache/hadoop/Clock.java Tue Apr 5 08:40:05 2011
@@ -0,0 +1,28 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop;
+
+/**
+ * A clock class - can be mocked out for testing.
+ */
+class Clock {
+ long getTime() {
+ return System.currentTimeMillis();
+ }
+}
\ No newline at end of file
Modified: hadoop/mapreduce/branches/MR-279/yarn/yarn-server/yarn-server-common/src/main/java/org/apache/hadoop/NodeHealthCheckerService.java
URL: http://svn.apache.org/viewvc/hadoop/mapreduce/branches/MR-279/yarn/yarn-server/yarn-server-common/src/main/java/org/apache/hadoop/NodeHealthCheckerService.java?rev=1088923&r1=1088922&r2=1088923&view=diff
==============================================================================
--- hadoop/mapreduce/branches/MR-279/yarn/yarn-server/yarn-server-common/src/main/java/org/apache/hadoop/NodeHealthCheckerService.java (original)
+++ hadoop/mapreduce/branches/MR-279/yarn/yarn-server/yarn-server-common/src/main/java/org/apache/hadoop/NodeHealthCheckerService.java Tue Apr 5 08:40:05 2011
@@ -31,6 +31,7 @@ import org.apache.hadoop.conf.Configurat
import org.apache.hadoop.util.Shell.ExitCodeException;
import org.apache.hadoop.util.Shell.ShellCommandExecutor;
import org.apache.hadoop.util.StringUtils;
+import org.apache.hadoop.yarn.server.api.records.NodeHealthStatus;
import org.apache.hadoop.yarn.service.AbstractService;
/**
@@ -213,7 +214,6 @@ public class NodeHealthCheckerService ex
public NodeHealthCheckerService(Configuration conf) {
this();
- this.conf = conf;
init(conf);
}
@@ -222,6 +222,7 @@ public class NodeHealthCheckerService ex
*/
@Override
public void init(Configuration conf) {
+ this.conf = conf;
this.nodeHealthScript =
conf.get(HEALTH_CHECK_SCRIPT_PROPERTY);
this.intervalTime = conf.getLong(HEALTH_CHECK_INTERVAL_PROPERTY,
@@ -364,9 +365,9 @@ public class NodeHealthCheckerService ex
* @param healthStatus
*/
public synchronized void setHealthStatus(NodeHealthStatus healthStatus) {
- healthStatus.setNodeHealthy(this.isHealthy());
+ healthStatus.setIsNodeHealthy(this.isHealthy());
healthStatus.setHealthReport(this.getHealthReport());
- healthStatus.setLastReported(this.getLastReportedTime());
+ healthStatus.setLastHealthReportTime(this.getLastReportedTime());
}
/**
Added: hadoop/mapreduce/branches/MR-279/yarn/yarn-server/yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/api/records/NodeHealthStatus.java
URL: http://svn.apache.org/viewvc/hadoop/mapreduce/branches/MR-279/yarn/yarn-server/yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/api/records/NodeHealthStatus.java?rev=1088923&view=auto
==============================================================================
--- hadoop/mapreduce/branches/MR-279/yarn/yarn-server/yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/api/records/NodeHealthStatus.java (added)
+++ hadoop/mapreduce/branches/MR-279/yarn/yarn-server/yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/api/records/NodeHealthStatus.java Tue Apr 5 08:40:05 2011
@@ -0,0 +1,16 @@
+package org.apache.hadoop.yarn.server.api.records;
+
+public interface NodeHealthStatus {
+
+ boolean getIsNodeHealthy();
+
+ String getHealthReport();
+
+ long getLastHealthReportTime();
+
+ void setIsNodeHealthy(boolean isNodeHealthy);
+
+ void setHealthReport(String healthReport);
+
+ void setLastHealthReportTime(long lastHealthReport);
+}
\ No newline at end of file
Modified: hadoop/mapreduce/branches/MR-279/yarn/yarn-server/yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/api/records/NodeStatus.java
URL: http://svn.apache.org/viewvc/hadoop/mapreduce/branches/MR-279/yarn/yarn-server/yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/api/records/NodeStatus.java?rev=1088923&r1=1088922&r2=1088923&view=diff
==============================================================================
--- hadoop/mapreduce/branches/MR-279/yarn/yarn-server/yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/api/records/NodeStatus.java (original)
+++ hadoop/mapreduce/branches/MR-279/yarn/yarn-server/yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/api/records/NodeStatus.java Tue Apr 5 08:40:05 2011
@@ -14,12 +14,10 @@ public interface NodeStatus {
public abstract Map<String, List<Container>> getAllContainers();
public abstract List<Container> getContainers(String key);
-
- public abstract boolean getIsNodeHealthy();
- public abstract String getHealthReport();
- public abstract long getLastHealthReport();
-
-
+
+ NodeHealthStatus getNodeHealthStatus();
+ void setNodeHealthStatus(NodeHealthStatus healthStatus);
+
public abstract void setNodeId(NodeId nodeId);
public abstract void setResponseId(int responseId);
public abstract void setLastSeen(long lastSeen);
@@ -28,8 +26,4 @@ public interface NodeStatus {
public abstract void setContainers(String key, List<Container> containers);
public abstract void removeContainers(String key);
public abstract void clearContainers();
-
- public abstract void setIsNodeHealthy(boolean isNodeHealthy);
- public abstract void setHealthReport(String healthReport);
- public abstract void setLastHealthReport(long lastHealthReport);
}
Added: hadoop/mapreduce/branches/MR-279/yarn/yarn-server/yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/api/records/impl/pb/NodeHealthStatusPBImpl.java
URL: http://svn.apache.org/viewvc/hadoop/mapreduce/branches/MR-279/yarn/yarn-server/yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/api/records/impl/pb/NodeHealthStatusPBImpl.java?rev=1088923&view=auto
==============================================================================
--- hadoop/mapreduce/branches/MR-279/yarn/yarn-server/yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/api/records/impl/pb/NodeHealthStatusPBImpl.java (added)
+++ hadoop/mapreduce/branches/MR-279/yarn/yarn-server/yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/api/records/impl/pb/NodeHealthStatusPBImpl.java Tue Apr 5 08:40:05 2011
@@ -0,0 +1,93 @@
+package org.apache.hadoop.yarn.server.api.records.impl.pb;
+
+import org.apache.hadoop.yarn.api.records.ProtoBase;
+import org.apache.hadoop.yarn.proto.YarnServerCommonProtos.NodeHealthStatusProto;
+import org.apache.hadoop.yarn.proto.YarnServerCommonProtos.NodeHealthStatusProtoOrBuilder;
+import org.apache.hadoop.yarn.server.api.records.NodeHealthStatus;
+
+public class NodeHealthStatusPBImpl extends ProtoBase<NodeHealthStatusProto>
+ implements NodeHealthStatus {
+
+ private NodeHealthStatusProto.Builder builder;
+ private boolean viaProto = false;
+ private NodeHealthStatusProto proto = NodeHealthStatusProto
+ .getDefaultInstance();
+
+ public NodeHealthStatusPBImpl() {
+ this.builder = NodeHealthStatusProto.newBuilder();
+ }
+
+ public NodeHealthStatusPBImpl(NodeHealthStatusProto proto) {
+ this.proto = proto;
+ this.viaProto = true;
+ }
+
+ public NodeHealthStatusProto getProto() {
+ mergeLocalToProto();
+ this.proto = this.viaProto ? this.proto : this.builder.build();
+ this.viaProto = true;
+ return this.proto;
+ }
+
+ private void mergeLocalToProto() {
+ if (this.viaProto)
+ maybeInitBuilder();
+ this.proto = this.builder.build();
+
+ this.viaProto = true;
+ }
+
+ private void maybeInitBuilder() {
+ if (this.viaProto || this.builder == null) {
+ this.builder = NodeHealthStatusProto.newBuilder(this.proto);
+ }
+ this.viaProto = false;
+ }
+
+ @Override
+ public boolean getIsNodeHealthy() {
+ NodeHealthStatusProtoOrBuilder p =
+ this.viaProto ? this.proto : this.builder;
+ return p.getIsNodeHealthy();
+ }
+
+ @Override
+ public void setIsNodeHealthy(boolean isNodeHealthy) {
+ maybeInitBuilder();
+ this.builder.setIsNodeHealthy(isNodeHealthy);
+ }
+
+ @Override
+ public String getHealthReport() {
+ NodeHealthStatusProtoOrBuilder p =
+ this.viaProto ? this.proto : this.builder;
+ if (!p.hasHealthReport()) {
+ return null;
+ }
+ return (p.getHealthReport());
+ }
+
+ @Override
+ public void setHealthReport(String healthReport) {
+ maybeInitBuilder();
+ if (healthReport == null) {
+ this.builder.clearHealthReport();
+ return;
+ }
+ this.builder.setHealthReport((healthReport));
+ }
+
+ @Override
+ public long getLastHealthReportTime() {
+ NodeHealthStatusProtoOrBuilder p =
+ this.viaProto ? this.proto : this.builder;
+ return (p.getLastHealthReportTime());
+ }
+
+ @Override
+ public void setLastHealthReportTime(long lastHealthReport) {
+ maybeInitBuilder();
+ this.builder.setLastHealthReportTime((lastHealthReport));
+ }
+
+}
Modified: hadoop/mapreduce/branches/MR-279/yarn/yarn-server/yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/api/records/impl/pb/NodeStatusPBImpl.java
URL: http://svn.apache.org/viewvc/hadoop/mapreduce/branches/MR-279/yarn/yarn-server/yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/api/records/impl/pb/NodeStatusPBImpl.java?rev=1088923&r1=1088922&r2=1088923&view=diff
==============================================================================
--- hadoop/mapreduce/branches/MR-279/yarn/yarn-server/yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/api/records/impl/pb/NodeStatusPBImpl.java (original)
+++ hadoop/mapreduce/branches/MR-279/yarn/yarn-server/yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/api/records/impl/pb/NodeStatusPBImpl.java Tue Apr 5 08:40:05 2011
@@ -12,10 +12,12 @@ import org.apache.hadoop.yarn.api.record
import org.apache.hadoop.yarn.api.records.impl.pb.ContainerPBImpl;
import org.apache.hadoop.yarn.proto.YarnProtos.ContainerProto;
import org.apache.hadoop.yarn.proto.YarnServerCommonProtos.ContainerListProto;
+import org.apache.hadoop.yarn.proto.YarnServerCommonProtos.NodeHealthStatusProto;
import org.apache.hadoop.yarn.proto.YarnServerCommonProtos.NodeIdProto;
import org.apache.hadoop.yarn.proto.YarnServerCommonProtos.NodeStatusProto;
import org.apache.hadoop.yarn.proto.YarnServerCommonProtos.NodeStatusProtoOrBuilder;
import org.apache.hadoop.yarn.proto.YarnServerCommonProtos.StringContainerListMapProto;
+import org.apache.hadoop.yarn.server.api.records.NodeHealthStatus;
import org.apache.hadoop.yarn.server.api.records.NodeId;
import org.apache.hadoop.yarn.server.api.records.NodeStatus;
@@ -26,8 +28,7 @@ public class NodeStatusPBImpl extends Pr
private NodeId nodeId = null;
private Map<String, List<Container>> containers = null;
-
-
+ private NodeHealthStatus nodeHealthStatus = null;
public NodeStatusPBImpl() {
builder = NodeStatusProto.newBuilder();
@@ -53,6 +54,9 @@ public class NodeStatusPBImpl extends Pr
if (this.containers != null) {
addContainersToProto();
}
+ if (this.nodeHealthStatus != null) {
+ builder.setNodeHealthStatus(convertToProtoFormat(this.nodeHealthStatus));
+ }
}
private void mergeLocalToProto() {
@@ -184,18 +188,19 @@ public class NodeStatusPBImpl extends Pr
};
builder.addAllContainers(iterable);
}
-
+
@Override
- public boolean getIsNodeHealthy() {
+ public NodeHealthStatus getNodeHealthStatus() {
NodeStatusProtoOrBuilder p = viaProto ? proto : builder;
- return p.getIsNodeHealthy();
+ return new NodeHealthStatusPBImpl(p.getNodeHealthStatus());
}
-
+
@Override
- public void setIsNodeHealthy(boolean isNodeHealthy) {
+ public void setNodeHealthStatus(NodeHealthStatus healthStatus) {
maybeInitBuilder();
- builder.setIsNodeHealthy(isNodeHealthy);
+ builder.setNodeHealthStatus(convertToProtoFormat(healthStatus));
}
+
/*
*
* @Override
@@ -218,37 +223,6 @@ public class NodeStatusPBImpl extends Pr
}
*/
- @Override
- public String getHealthReport() {
- NodeStatusProtoOrBuilder p = viaProto ? proto : builder;
- if (!p.hasHealthReport()) {
- return null;
- }
- return (p.getHealthReport());
- }
-
- @Override
- public void setHealthReport(String healthReport) {
- maybeInitBuilder();
- if (healthReport == null) {
- builder.clearHealthReport();
- return;
- }
- builder.setHealthReport((healthReport));
- }
-
- @Override
- public long getLastHealthReport() {
- NodeStatusProtoOrBuilder p = viaProto ? proto : builder;
- return (p.getLastHealthReport());
- }
-
- @Override
- public void setLastHealthReport(long lastHealthReport) {
- maybeInitBuilder();
- builder.setLastHealthReport((lastHealthReport));
- }
-
private ContainerListProto convertToProtoFormat(List<Container> src) {
ContainerListProto.Builder ret = ContainerListProto.newBuilder();
for (Container c : src) {
@@ -294,5 +268,13 @@ public class NodeStatusPBImpl extends Pr
private NodeId convertFromProtoFormat(NodeIdProto proto) {
return new NodeIdPBImpl(proto);
}
-
+
+ private NodeHealthStatusProto convertToProtoFormat(
+ NodeHealthStatus healthStatus) {
+ return ((NodeHealthStatusPBImpl) healthStatus).getProto();
+ }
+
+ private NodeHealthStatus convertFromProtoFormat(NodeHealthStatusProto proto) {
+ return new NodeHealthStatusPBImpl(proto);
+ }
}
Modified: hadoop/mapreduce/branches/MR-279/yarn/yarn-server/yarn-server-common/src/main/proto/yarn_server_common_protos.proto
URL: http://svn.apache.org/viewvc/hadoop/mapreduce/branches/MR-279/yarn/yarn-server/yarn-server-common/src/main/proto/yarn_server_common_protos.proto?rev=1088923&r1=1088922&r2=1088923&view=diff
==============================================================================
--- hadoop/mapreduce/branches/MR-279/yarn/yarn-server/yarn-server-common/src/main/proto/yarn_server_common_protos.proto (original)
+++ hadoop/mapreduce/branches/MR-279/yarn/yarn-server/yarn-server-common/src/main/proto/yarn_server_common_protos.proto Tue Apr 5 08:40:05 2011
@@ -9,14 +9,18 @@ message NodeIdProto {
optional int32 id = 1;
}
+message NodeHealthStatusProto {
+ optional bool is_node_healthy = 1;
+ optional string health_report = 2;
+ optional int64 last_health_report_time = 3;
+}
+
message NodeStatusProto {
optional NodeIdProto node_id = 1;
optional int32 response_id = 2;
optional int64 last_seen = 3;
repeated StringContainerListMapProto containers = 4;
- optional bool is_node_healthy = 5;
- optional string health_report = 6;
- optional int64 last_health_report = 7;
+ optional NodeHealthStatusProto nodeHealthStatus = 5;
}
message RegistrationResponseProto {
Modified: hadoop/mapreduce/branches/MR-279/yarn/yarn-server/yarn-server-common/src/main/resources/yarn-default.xml
URL: http://svn.apache.org/viewvc/hadoop/mapreduce/branches/MR-279/yarn/yarn-server/yarn-server-common/src/main/resources/yarn-default.xml?rev=1088923&r1=1088922&r2=1088923&view=diff
==============================================================================
--- hadoop/mapreduce/branches/MR-279/yarn/yarn-server/yarn-server-common/src/main/resources/yarn-default.xml (original)
+++ hadoop/mapreduce/branches/MR-279/yarn/yarn-server/yarn-server-common/src/main/resources/yarn-default.xml Tue Apr 5 08:40:05 2011
@@ -73,6 +73,64 @@
<property><name>yarn.server.nodemanager.connect.rm</name><value>true</value></property>
+ <property>
+ <name>yarn.server.nodemanager.healthchecker.script.path</name>
+ <value></value>
+ <description>Location of the node's health-check script on the local
+ file-system.
+ </description>
+ </property>
+
+ <property>
+ <name>yarn.server.nodemanager.healthchecker.interval</name>
+ <value>600000</value>
+ <description>Frequency of the health-check run by the NodeManager
+ </description>
+ </property>
+
+ <property>
+ <name>yarn.server.nodemanager.healthchecker.script.timeout</name>
+ <value>1200000</value>
+ <description>Timeout for the health-check run by the NodeManager
+ </description>
+ </property>
+
+ <property>
+ <name>yarn.server.nodemanager.healthchecker.script.args</name>
+ <value></value>
+ <description>Arguments to be passed to the health-check script run
+ by the NodeManager</description>
+ </property>
+
+ <property>
+ <name>yarn.server.nodemanager.healthchecker.script.path</name>
+ <value></value>
+ <description>Location of the node's health-check script on the local
+ file-system.
+ </description>
+ </property>
+
+ <property>
+ <name>yarn.server.nodemanager.healthchecker.interval</name>
+ <value>600000</value>
+ <description>Frequency of the health-check run by the NodeManager
+ </description>
+ </property>
+
+ <property>
+ <name>yarn.server.nodemanager.healthchecker.script.timeout</name>
+ <value>1200000</value>
+ <description>Timeout for the health-check run by the NodeManager
+ </description>
+ </property>
+
+ <property>
+ <name>yarn.server.nodemanager.healthchecker.script.args</name>
+ <value></value>
+ <description>Arguments to be passed to the health-check script run
+ by the NodeManager</description>
+ </property>
+
<!-- All MRAppMaster related configuration properties -->
<property>
Modified: hadoop/mapreduce/branches/MR-279/yarn/yarn-server/yarn-server-common/src/test/java/org/apache/hadoop/TestNodeHealthService.java
URL: http://svn.apache.org/viewvc/hadoop/mapreduce/branches/MR-279/yarn/yarn-server/yarn-server-common/src/test/java/org/apache/hadoop/TestNodeHealthService.java?rev=1088923&r1=1088922&r2=1088923&view=diff
==============================================================================
--- hadoop/mapreduce/branches/MR-279/yarn/yarn-server/yarn-server-common/src/test/java/org/apache/hadoop/TestNodeHealthService.java (original)
+++ hadoop/mapreduce/branches/MR-279/yarn/yarn-server/yarn-server-common/src/test/java/org/apache/hadoop/TestNodeHealthService.java Tue Apr 5 08:40:05 2011
@@ -29,6 +29,9 @@ import org.apache.commons.logging.LogFac
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileContext;
import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.yarn.factories.RecordFactory;
+import org.apache.hadoop.yarn.factory.providers.RecordFactoryProvider;
+import org.apache.hadoop.yarn.server.api.records.NodeHealthStatus;
import org.junit.After;
import org.junit.Assert;
import org.junit.Before;
@@ -106,7 +109,9 @@ public class TestNodeHealthService {
@Test
public void testNodeHealthScript() throws Exception {
- NodeHealthStatus healthStatus = new NodeHealthStatus();
+ RecordFactory factory = RecordFactoryProvider.getRecordFactory(null);
+ NodeHealthStatus healthStatus =
+ factory.newRecordInstance(NodeHealthStatus.class);
String errorScript = "echo ERROR\n echo \"Tracker not healthy\"";
String normalScript = "echo \"I am all fine\"";
String timeOutScript = "sleep 4\n echo\"I am fine\"";
@@ -124,7 +129,7 @@ public class TestNodeHealthService {
LOG.info("Checking initial healthy condition");
// Check proper report conditions.
Assert.assertTrue("Node health status reported unhealthy", healthStatus
- .isNodeHealthy());
+ .getIsNodeHealthy());
Assert.assertTrue("Node health status reported unhealthy", healthStatus
.getHealthReport().isEmpty());
@@ -137,7 +142,7 @@ public class TestNodeHealthService {
nodeHealthChecker.setHealthStatus(healthStatus);
LOG.info("Checking Healthy--->Unhealthy");
Assert.assertFalse("Node health status reported healthy", healthStatus
- .isNodeHealthy());
+ .getIsNodeHealthy());
Assert.assertFalse("Node health status reported healthy", healthStatus
.getHealthReport().isEmpty());
@@ -148,7 +153,7 @@ public class TestNodeHealthService {
LOG.info("Checking UnHealthy--->healthy");
// Check proper report conditions.
Assert.assertTrue("Node health status reported unhealthy", healthStatus
- .isNodeHealthy());
+ .getIsNodeHealthy());
Assert.assertTrue("Node health status reported unhealthy", healthStatus
.getHealthReport().isEmpty());
@@ -158,7 +163,7 @@ public class TestNodeHealthService {
nodeHealthChecker.setHealthStatus(healthStatus);
LOG.info("Checking Healthy--->timeout");
Assert.assertFalse("Node health status reported healthy even after timeout",
- healthStatus.isNodeHealthy());
+ healthStatus.getIsNodeHealthy());
Assert.assertEquals("Node time out message not propogated", healthStatus
.getHealthReport(),
NodeHealthCheckerService.NODE_HEALTH_SCRIPT_TIMED_OUT_MSG);
Modified: hadoop/mapreduce/branches/MR-279/yarn/yarn-server/yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/Context.java
URL: http://svn.apache.org/viewvc/hadoop/mapreduce/branches/MR-279/yarn/yarn-server/yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/Context.java?rev=1088923&r1=1088922&r2=1088923&view=diff
==============================================================================
--- hadoop/mapreduce/branches/MR-279/yarn/yarn-server/yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/Context.java (original)
+++ hadoop/mapreduce/branches/MR-279/yarn/yarn-server/yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/Context.java Tue Apr 5 08:40:05 2011
@@ -22,7 +22,7 @@ import java.util.concurrent.ConcurrentMa
import org.apache.hadoop.yarn.api.records.ApplicationId;
import org.apache.hadoop.yarn.api.records.ContainerId;
-import org.apache.hadoop.NodeHealthStatus;
+import org.apache.hadoop.yarn.server.api.records.NodeHealthStatus;
import org.apache.hadoop.yarn.server.nodemanager.containermanager.application.Application;
import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.Container;
Modified: hadoop/mapreduce/branches/MR-279/yarn/yarn-server/yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeManager.java
URL: http://svn.apache.org/viewvc/hadoop/mapreduce/branches/MR-279/yarn/yarn-server/yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeManager.java?rev=1088923&r1=1088922&r2=1088923&view=diff
==============================================================================
--- hadoop/mapreduce/branches/MR-279/yarn/yarn-server/yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeManager.java (original)
+++ hadoop/mapreduce/branches/MR-279/yarn/yarn-server/yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeManager.java Tue Apr 5 08:40:05 2011
@@ -28,7 +28,6 @@ import java.util.concurrent.ConcurrentMa
import java.util.concurrent.ConcurrentSkipListMap;
import org.apache.hadoop.NodeHealthCheckerService;
-import org.apache.hadoop.NodeHealthStatus;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.security.SecurityUtil;
import org.apache.hadoop.util.ReflectionUtils;
@@ -38,7 +37,10 @@ import org.apache.hadoop.yarn.api.record
import org.apache.hadoop.yarn.conf.YarnConfiguration;
import org.apache.hadoop.yarn.event.AsyncDispatcher;
import org.apache.hadoop.yarn.event.Dispatcher;
+import org.apache.hadoop.yarn.factories.RecordFactory;
+import org.apache.hadoop.yarn.factory.providers.RecordFactoryProvider;
import org.apache.hadoop.yarn.server.YarnServerConfig;
+import org.apache.hadoop.yarn.server.api.records.NodeHealthStatus;
import org.apache.hadoop.yarn.server.nodemanager.containermanager.ContainerManagerImpl;
import org.apache.hadoop.yarn.server.nodemanager.containermanager.application.Application;
import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.Container;
@@ -154,9 +156,13 @@ public class NodeManager extends Composi
}
});
- private final NodeHealthStatus nodeHealthStatus = new NodeHealthStatus();
+ private final NodeHealthStatus nodeHealthStatus = RecordFactoryProvider
+ .getRecordFactory(null).newRecordInstance(NodeHealthStatus.class);
public NMContext() {
+ this.nodeHealthStatus.setIsNodeHealthy(true);
+ this.nodeHealthStatus.setHealthReport("Healthy");
+ this.nodeHealthStatus.setLastHealthReportTime(System.currentTimeMillis());
}
@Override
Modified: hadoop/mapreduce/branches/MR-279/yarn/yarn-server/yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeStatusUpdaterImpl.java
URL: http://svn.apache.org/viewvc/hadoop/mapreduce/branches/MR-279/yarn/yarn-server/yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeStatusUpdaterImpl.java?rev=1088923&r1=1088922&r2=1088923&view=diff
==============================================================================
--- hadoop/mapreduce/branches/MR-279/yarn/yarn-server/yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeStatusUpdaterImpl.java (original)
+++ hadoop/mapreduce/branches/MR-279/yarn/yarn-server/yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeStatusUpdaterImpl.java Tue Apr 5 08:40:05 2011
@@ -29,7 +29,6 @@ import org.apache.avro.AvroRuntimeExcept
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.NodeHealthCheckerService;
-import org.apache.hadoop.NodeHealthStatus;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.CommonConfigurationKeys;
import org.apache.hadoop.net.NetUtils;
@@ -50,6 +49,7 @@ import org.apache.hadoop.yarn.server.api
import org.apache.hadoop.yarn.server.api.protocolrecords.NodeHeartbeatRequest;
import org.apache.hadoop.yarn.server.api.protocolrecords.RegisterNodeManagerRequest;
import org.apache.hadoop.yarn.server.api.records.HeartbeatResponse;
+import org.apache.hadoop.yarn.server.api.records.NodeHealthStatus;
import org.apache.hadoop.yarn.server.api.records.NodeId;
import org.apache.hadoop.yarn.server.api.records.NodeStatus;
import org.apache.hadoop.yarn.server.api.records.RegistrationResponse;
@@ -209,13 +209,13 @@ public class NodeStatusUpdaterImpl exten
LOG.debug(this.nodeName + " sending out status for " + numActiveContainers
+ " containers");
+ NodeHealthStatus nodeHealthStatus = this.context.getNodeHealthStatus();
if (this.healthChecker != null) {
- NodeHealthStatus nodeHealthStatus = this.context.getNodeHealthStatus();
this.healthChecker.setHealthStatus(nodeHealthStatus);
- status.setIsNodeHealthy(nodeHealthStatus.isNodeHealthy());
- status.setHealthReport(nodeHealthStatus.getHealthReport());
- status.setLastHealthReport(nodeHealthStatus.getLastReported());
}
+ LOG.debug("Node's health-status : " + nodeHealthStatus.getIsNodeHealthy()
+ + ", " + nodeHealthStatus.getHealthReport());
+ status.setNodeHealthStatus(nodeHealthStatus);
return status;
}
Modified: hadoop/mapreduce/branches/MR-279/yarn/yarn-server/yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/resourcetracker/NodeInfo.java
URL: http://svn.apache.org/viewvc/hadoop/mapreduce/branches/MR-279/yarn/yarn-server/yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/resourcetracker/NodeInfo.java?rev=1088923&r1=1088922&r2=1088923&view=diff
==============================================================================
--- hadoop/mapreduce/branches/MR-279/yarn/yarn-server/yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/resourcetracker/NodeInfo.java (original)
+++ hadoop/mapreduce/branches/MR-279/yarn/yarn-server/yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/resourcetracker/NodeInfo.java Tue Apr 5 08:40:05 2011
@@ -19,6 +19,7 @@
package org.apache.hadoop.yarn.server.resourcemanager.resourcetracker;
import org.apache.hadoop.net.Node;
+import org.apache.hadoop.yarn.server.api.records.NodeHealthStatus;
import org.apache.hadoop.yarn.server.api.records.NodeId;
/**
@@ -38,6 +39,11 @@ public interface NodeInfo {
*/
public String getHostName();
/**
+ * the health-status for this node
+ * @return the health-status for this node.
+ */
+ public NodeHealthStatus getNodeHealthStatus();
+ /**
* the total available resource.
* @return the total available resource.
*/
Modified: hadoop/mapreduce/branches/MR-279/yarn/yarn-server/yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/resourcetracker/NodeInfoTracker.java
URL: http://svn.apache.org/viewvc/hadoop/mapreduce/branches/MR-279/yarn/yarn-server/yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/resourcetracker/NodeInfoTracker.java?rev=1088923&r1=1088922&r2=1088923&view=diff
==============================================================================
--- hadoop/mapreduce/branches/MR-279/yarn/yarn-server/yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/resourcetracker/NodeInfoTracker.java (original)
+++ hadoop/mapreduce/branches/MR-279/yarn/yarn-server/yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/resourcetracker/NodeInfoTracker.java Tue Apr 5 08:40:05 2011
@@ -18,9 +18,8 @@
package org.apache.hadoop.yarn.server.resourcemanager.resourcetracker;
-import org.apache.hadoop.yarn.factories.RecordFactory;
-import org.apache.hadoop.yarn.factory.providers.RecordFactoryProvider;
import org.apache.hadoop.yarn.server.api.records.HeartbeatResponse;
+import org.apache.hadoop.yarn.server.resourcemanager.scheduler.NodeManager;
/**
* Track the node info and heart beat responses for this node.
@@ -28,35 +27,33 @@ import org.apache.hadoop.yarn.server.api
*
*/
class NodeInfoTracker {
- private final NodeInfo node;
- private final RecordFactory recordFactory = RecordFactoryProvider.getRecordFactory(null);
+ private final NodeManager node;
HeartbeatResponse lastHeartBeatResponse;
- private org.apache.hadoop.yarn.server.api.records.NodeStatus nodeStatus = recordFactory.newRecordInstance(org.apache.hadoop.yarn.server.api.records.NodeStatus.class);
+ private long lastSeen;
- public NodeInfoTracker(NodeInfo node, HeartbeatResponse lastHeartBeatResponse) {
+ public NodeInfoTracker(NodeManager node, HeartbeatResponse lastHeartBeatResponse) {
this.node = node;
this.lastHeartBeatResponse = lastHeartBeatResponse;
- this.nodeStatus.setNodeId(node.getNodeID());
- this.nodeStatus.setLastSeen(System.currentTimeMillis());
+ this.lastSeen = System.currentTimeMillis();
}
- public synchronized NodeInfo getNodeInfo() {
+ public synchronized NodeManager getNodeManager() {
return this.node;
}
- public synchronized HeartbeatResponse getLastHeartBeatResponse() {
- return this.lastHeartBeatResponse;
+ public synchronized void updateLastSeen(long lastSeen) {
+ this.lastSeen = lastSeen;
}
- public synchronized void refreshHeartBeatResponse(HeartbeatResponse heartBeatResponse) {
- this.lastHeartBeatResponse = heartBeatResponse;
+ public synchronized long getNodeLastSeen() {
+ return this.lastSeen;
}
- public synchronized void updateNodeStatus(org.apache.hadoop.yarn.server.api.records.NodeStatus nodeStatus) {
- this.nodeStatus = nodeStatus;
+ public synchronized HeartbeatResponse getLastHeartBeatResponse() {
+ return this.lastHeartBeatResponse;
}
- public synchronized org.apache.hadoop.yarn.server.api.records.NodeStatus getNodeStatus() {
- return this.nodeStatus;
+ public synchronized void refreshHeartBeatResponse(HeartbeatResponse heartBeatResponse) {
+ this.lastHeartBeatResponse = heartBeatResponse;
}
}
\ No newline at end of file
Modified: hadoop/mapreduce/branches/MR-279/yarn/yarn-server/yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/resourcetracker/NodeStatus.java
URL: http://svn.apache.org/viewvc/hadoop/mapreduce/branches/MR-279/yarn/yarn-server/yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/resourcetracker/NodeStatus.java?rev=1088923&r1=1088922&r2=1088923&view=diff
==============================================================================
--- hadoop/mapreduce/branches/MR-279/yarn/yarn-server/yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/resourcetracker/NodeStatus.java (original)
+++ hadoop/mapreduce/branches/MR-279/yarn/yarn-server/yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/resourcetracker/NodeStatus.java Tue Apr 5 08:40:05 2011
@@ -22,16 +22,23 @@ import java.util.List;
import java.util.Map;
import org.apache.hadoop.yarn.api.records.Container;
+import org.apache.hadoop.yarn.factories.RecordFactory;
import org.apache.hadoop.yarn.factory.providers.RecordFactoryProvider;
+import org.apache.hadoop.yarn.server.api.records.NodeHealthStatus;
import org.apache.hadoop.yarn.server.api.records.NodeId;
public class NodeStatus {
public static org.apache.hadoop.yarn.server.api.records.NodeStatus createNodeStatus(
NodeId nodeId, Map<String, List<Container>> containers) {
- org.apache.hadoop.yarn.server.api.records.NodeStatus nodeStatus = RecordFactoryProvider.getRecordFactory(null).newRecordInstance(org.apache.hadoop.yarn.server.api.records.NodeStatus.class);
+ RecordFactory recordFactory = RecordFactoryProvider.getRecordFactory(null);
+ org.apache.hadoop.yarn.server.api.records.NodeStatus nodeStatus = recordFactory.newRecordInstance(org.apache.hadoop.yarn.server.api.records.NodeStatus.class);
nodeStatus.setNodeId(nodeId);
nodeStatus.addAllContainers(containers);
+ nodeStatus.setNodeHealthStatus(recordFactory
+ .newRecordInstance(NodeHealthStatus.class));
+ nodeStatus.getNodeHealthStatus().setIsNodeHealthy(true);
+ nodeStatus.setLastSeen(System.currentTimeMillis());
return nodeStatus;
}
}
Modified: hadoop/mapreduce/branches/MR-279/yarn/yarn-server/yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/resourcetracker/RMResourceTrackerImpl.java
URL: http://svn.apache.org/viewvc/hadoop/mapreduce/branches/MR-279/yarn/yarn-server/yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/resourcetracker/RMResourceTrackerImpl.java?rev=1088923&r1=1088922&r2=1088923&view=diff
==============================================================================
--- hadoop/mapreduce/branches/MR-279/yarn/yarn-server/yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/resourcetracker/RMResourceTrackerImpl.java (original)
+++ hadoop/mapreduce/branches/MR-279/yarn/yarn-server/yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/resourcetracker/RMResourceTrackerImpl.java Tue Apr 5 08:40:05 2011
@@ -56,8 +56,11 @@ import org.apache.hadoop.yarn.server.api
import org.apache.hadoop.yarn.server.api.protocolrecords.RegisterNodeManagerRequest;
import org.apache.hadoop.yarn.server.api.protocolrecords.RegisterNodeManagerResponse;
import org.apache.hadoop.yarn.server.api.records.HeartbeatResponse;
+import org.apache.hadoop.yarn.server.api.records.NodeHealthStatus;
import org.apache.hadoop.yarn.server.api.records.NodeId;
import org.apache.hadoop.yarn.server.api.records.RegistrationResponse;
+import org.apache.hadoop.yarn.server.resourcemanager.scheduler.NodeManager;
+import org.apache.hadoop.yarn.server.resourcemanager.scheduler.NodeManagerImpl;
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.NodeResponse;
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.ResourceListener;
import org.apache.hadoop.yarn.server.security.ContainerTokenSecretManager;
@@ -82,17 +85,21 @@ ResourceTracker, ResourceContext {
private static final RecordFactory recordFactory = RecordFactoryProvider.getRecordFactory(null);
- private final TreeSet<org.apache.hadoop.yarn.server.api.records.NodeStatus> nmExpiryQueue =
- new TreeSet<org.apache.hadoop.yarn.server.api.records.NodeStatus>(
- new Comparator<org.apache.hadoop.yarn.server.api.records.NodeStatus>() {
- public int compare(org.apache.hadoop.yarn.server.api.records.NodeStatus p1, org.apache.hadoop.yarn.server.api.records.NodeStatus p2) {
- if (p1.getLastSeen() < p2.getLastSeen()) {
+ private final TreeSet<NodeId> nmExpiryQueue =
+ new TreeSet<NodeId>(
+ new Comparator<NodeId>() {
+ public int compare(NodeId n1, NodeId n2) {
+ NodeInfoTracker nit1 = nodeManagers.get(n1);
+ NodeInfoTracker nit2 = nodeManagers.get(n2);
+ long p1LastSeen = nit1.getNodeLastSeen();
+ long p2LastSeen = nit2.getNodeLastSeen();
+ if (p1LastSeen < p2LastSeen) {
return -1;
- } else if (p1.getLastSeen() > p2.getLastSeen()) {
+ } else if (p1LastSeen > p2LastSeen) {
return 1;
} else {
- return (p1.getNodeId().getId() -
- p2.getNodeId().getId());
+ return (nit1.getNodeManager().getNodeID().getId() -
+ nit2.getNodeManager().getNodeID().getId());
}
}
}
@@ -164,21 +171,22 @@ ResourceTracker, ResourceContext {
synchronized(nodeManagers) {
if (!nodeManagers.containsKey(nodeId)) {
/* we do the resolving here, so that scheduler does not have to do it */
- NodeInfo nodeManager = resourceListener.addNode(nodeId, node.toString(),
- resolve(node.toString()),
- capability);
+ NodeManager nodeManager =
+ new NodeManagerImpl(nodeId, node.toString(),
+ resolve(node.toString()),
+ capability);
+ // Inform the scheduler
+ resourceListener.addNode(nodeManager);
HeartbeatResponse response = recordFactory.newRecordInstance(HeartbeatResponse.class);
response.setResponseId(0);
nTracker = new NodeInfoTracker(nodeManager, response);
nodeManagers.put(nodeId, nTracker);
} else {
nTracker = nodeManagers.get(nodeId);
- org.apache.hadoop.yarn.server.api.records.NodeStatus status = nTracker.getNodeStatus();
- status.setLastSeen(System.currentTimeMillis());
- nTracker.updateNodeStatus(status);
+ nTracker.updateLastSeen(System.currentTimeMillis());
}
}
- addForTracking(nTracker.getNodeStatus());
+ addForTracking(nodeId);
LOG.info("NodeManager from node " + node + " registered with capability: " +
capability.getMemory() + ", assigned nodeId " + nodeId.getId());
@@ -194,35 +202,38 @@ ResourceTracker, ResourceContext {
@Override
public NodeHeartbeatResponse nodeHeartbeat(NodeHeartbeatRequest request) throws YarnRemoteException {
- org.apache.hadoop.yarn.server.api.records.NodeStatus nodeStatus = request.getNodeStatus();
- nodeStatus.setLastSeen(System.currentTimeMillis());
+ org.apache.hadoop.yarn.server.api.records.NodeStatus remoteNodeStatus = request.getNodeStatus();
+ remoteNodeStatus.setLastSeen(System.currentTimeMillis());
NodeInfoTracker nTracker = null;
NodeHeartbeatResponse nodeHbResponse = recordFactory.newRecordInstance(NodeHeartbeatResponse.class);
synchronized(nodeManagers) {
- nTracker = nodeManagers.get(nodeStatus.getNodeId());
+ nTracker = nodeManagers.get(remoteNodeStatus.getNodeId());
}
if (nTracker == null) {
/* node does not exist */
- LOG.info("Node not found rebooting " + nodeStatus.getNodeId());
+ LOG.info("Node not found rebooting " + remoteNodeStatus.getNodeId());
nodeHbResponse.setHeartbeatResponse(reboot);
return nodeHbResponse;
}
- NodeInfo nodeInfo = nTracker.getNodeInfo();
+ NodeManager nodeManager = nTracker.getNodeManager();
/* check to see if its an old heartbeat */
- if (nodeStatus.getResponseId() + 1 == nTracker.getLastHeartBeatResponse().getResponseId()) {
+ if (remoteNodeStatus.getResponseId() + 1 == nTracker
+ .getLastHeartBeatResponse().getResponseId()) {
nodeHbResponse.setHeartbeatResponse(nTracker.getLastHeartBeatResponse());
return nodeHbResponse;
- } else if (nodeStatus.getResponseId() + 1 < nTracker.getLastHeartBeatResponse().getResponseId()) {
- LOG.info("Too far behind rm response id:" +
- nTracker.lastHeartBeatResponse.getResponseId() + " nm response id:" + nodeStatus.getResponseId());
+ } else if (remoteNodeStatus.getResponseId() + 1 < nTracker
+ .getLastHeartBeatResponse().getResponseId()) {
+ LOG.info("Too far behind rm response id:" +
+ nTracker.lastHeartBeatResponse.getResponseId() + " nm response id:"
+ + remoteNodeStatus.getResponseId());
nodeHbResponse.setHeartbeatResponse(reboot);
return nodeHbResponse;
}
/* inform any listeners of node heartbeats */
NodeResponse nodeResponse = resourceListener.nodeUpdate(
- nodeInfo, nodeStatus.getAllContainers());
+ nodeManager, remoteNodeStatus.getAllContainers());
HeartbeatResponse response = recordFactory.newRecordInstance(HeartbeatResponse.class);
@@ -232,15 +243,43 @@ ResourceTracker, ResourceContext {
response.setResponseId(nTracker.getLastHeartBeatResponse().getResponseId() + 1);
nTracker.refreshHeartBeatResponse(response);
- nTracker.updateNodeStatus(nodeStatus);
+ nTracker.updateLastSeen(remoteNodeStatus.getLastSeen());
+ boolean prevHealthStatus =
+ nTracker.getNodeManager().getNodeHealthStatus().getIsNodeHealthy();
+ NodeHealthStatus remoteNodeHealthStatus =
+ remoteNodeStatus.getNodeHealthStatus();
+ nTracker.getNodeManager().updateHealthStatus(
+ remoteNodeHealthStatus);
+// boolean prevHealthStatus = nodeHbResponse.
nodeHbResponse.setHeartbeatResponse(response);
+
+ // Take care of node-health
+ if (prevHealthStatus != remoteNodeHealthStatus
+ .getIsNodeHealthy()) {
+ // Node's health-status changed.
+ if (!remoteNodeHealthStatus.getIsNodeHealthy()) {
+ // TODO: Node has become unhealthy, remove?
+// LOG.info("Node " + nodeManager.getNodeID()
+// + " has become unhealthy. Health-check report: "
+// + remoteNodeStatus.nodeHealthStatus.healthReport
+// + "Removing it from the scheduler.");
+// resourceListener.removeNode(nodeManager);
+ } else {
+ // TODO: Node has become healthy back again, add?
+// LOG.info("Node " + nodeManager.getNodeID()
+// + " has become healthy back again. Health-check report: "
+// + remoteNodeStatus.nodeHealthStatus.healthReport
+// + " Adding it to the scheduler.");
+// this.resourceListener.addNode(nodeManager);
+ }
+ }
return nodeHbResponse;
}
@Private
public synchronized NodeInfo getNodeManager(NodeId nodeId) {
NodeInfoTracker ntracker = nodeManagers.get(nodeId);
- return (ntracker == null ? null: ntracker.getNodeInfo());
+ return (ntracker == null ? null: ntracker.getNodeManager());
}
private synchronized NodeId getNodeId(String node) {
@@ -274,22 +313,22 @@ ResourceTracker, ResourceContext {
List<NodeInfo> infoList = new ArrayList<NodeInfo>();
synchronized (nodeManagers) {
for (NodeInfoTracker t : nodeManagers.values()) {
- infoList.add(t.getNodeInfo());
+ infoList.add(t.getNodeManager());
}
}
return infoList;
}
- protected void addForTracking(org.apache.hadoop.yarn.server.api.records.NodeStatus status) {
+ protected void addForTracking(NodeId nodeID) {
synchronized(nmExpiryQueue) {
- nmExpiryQueue.add(status);
+ nmExpiryQueue.add(nodeID);
}
}
protected void expireNMs(List<NodeId> nodes) {
for (NodeId id: nodes) {
synchronized (nodeManagers) {
- NodeInfo nInfo = nodeManagers.get(id).getNodeInfo();
+ NodeInfo nInfo = nodeManagers.get(id).getNodeManager();
nodeManagers.remove(id);
resourceListener.removeNode(nInfo);
}
@@ -319,27 +358,31 @@ ResourceTracker, ResourceContext {
LOG.info("Starting expiring thread with interval " + nmExpiryInterval);
while (!stop) {
- org.apache.hadoop.yarn.server.api.records.NodeStatus leastRecent;
long now = System.currentTimeMillis();
expired.clear();
synchronized(nmExpiryQueue) {
+ NodeId leastRecent;
while ((nmExpiryQueue.size() > 0) &&
- (leastRecent = nmExpiryQueue.first()) != null &&
- ((now - leastRecent.getLastSeen()) >
- nmExpiryInterval)) {
+ (leastRecent = nmExpiryQueue.first()) != null) {
nmExpiryQueue.remove(leastRecent);
NodeInfoTracker info;
synchronized(nodeManagers) {
- info = nodeManagers.get(leastRecent.getNodeId());
+ info = nodeManagers.get(leastRecent);
}
if (info == null) {
continue;
}
- org.apache.hadoop.yarn.server.api.records.NodeStatus status = info.getNodeStatus();
- if ((now - status.getLastSeen()) > nmExpiryInterval) {
- expired.add(status.getNodeId());
+ NodeId nodeID = info.getNodeManager().getNodeID();
+ if ((now - info.getNodeLastSeen()) > nmExpiryInterval) {
+ LOG.info("Going to expire the node-manager " + nodeID
+ + " because of no updates for "
+ + (now - info.getNodeLastSeen())
+ + " seconds ( > expiry interval of " + nmExpiryInterval
+ + ").");
+ expired.add(nodeID);
} else {
- nmExpiryQueue.add(status);
+ nmExpiryQueue.add(nodeID);
+ break;
}
}
}
Modified: hadoop/mapreduce/branches/MR-279/yarn/yarn-server/yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/NodeManager.java
URL: http://svn.apache.org/viewvc/hadoop/mapreduce/branches/MR-279/yarn/yarn-server/yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/NodeManager.java?rev=1088923&r1=1088922&r2=1088923&view=diff
==============================================================================
--- hadoop/mapreduce/branches/MR-279/yarn/yarn-server/yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/NodeManager.java (original)
+++ hadoop/mapreduce/branches/MR-279/yarn/yarn-server/yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/NodeManager.java Tue Apr 5 08:40:05 2011
@@ -1,321 +1,26 @@
-/**
-* Licensed to the Apache Software Foundation (ASF) under one
-* or more contributor license agreements. See the NOTICE file
-* distributed with this work for additional information
-* regarding copyright ownership. The ASF licenses this file
-* to you under the Apache License, Version 2.0 (the
-* "License"); you may not use this file except in compliance
-* with the License. You may obtain a copy of the License at
-*
-* http://www.apache.org/licenses/LICENSE-2.0
-*
-* Unless required by applicable law or agreed to in writing, software
-* distributed under the License is distributed on an "AS IS" BASIS,
-* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-* See the License for the specific language governing permissions and
-* limitations under the License.
-*/
-
package org.apache.hadoop.yarn.server.resourcemanager.scheduler;
-import java.util.ArrayList;
-import java.util.Arrays;
import java.util.List;
import java.util.Map;
-import java.util.Set;
-import java.util.TreeMap;
-import java.util.TreeSet;
-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
-import org.apache.hadoop.classification.InterfaceAudience.Private;
-import org.apache.hadoop.classification.InterfaceStability.Unstable;
-import org.apache.hadoop.net.Node;
import org.apache.hadoop.yarn.api.records.ApplicationId;
import org.apache.hadoop.yarn.api.records.Container;
-import org.apache.hadoop.yarn.api.records.ContainerId;
-import org.apache.hadoop.yarn.api.records.ContainerState;
-import org.apache.hadoop.yarn.api.records.Resource;
-import org.apache.hadoop.yarn.factories.RecordFactory;
-import org.apache.hadoop.yarn.factory.providers.RecordFactoryProvider;
-import org.apache.hadoop.yarn.server.api.records.NodeId;
+import org.apache.hadoop.yarn.server.api.records.NodeHealthStatus;
import org.apache.hadoop.yarn.server.resourcemanager.resourcetracker.NodeInfo;
-/**
- * This class is used by ClusterInfo to keep track of all the applications/containers
- * running on a node.
- *
- */
-@Private
-@Unstable
-public class NodeManager implements NodeInfo {
- private static final Log LOG = LogFactory.getLog(NodeManager.class);
- private static final RecordFactory recordFactory = RecordFactoryProvider.getRecordFactory(null);
- private final NodeId nodeId;
- private final String hostName;
- private Resource totalCapability;
- private Resource availableResource = recordFactory.newRecordInstance(Resource.class);
- private Resource usedResource = recordFactory.newRecordInstance(Resource.class);
- private final Node node;
-
-
- private static final Container[] EMPTY_CONTAINER_ARRAY = new Container[] {};
- private static final List<Container> EMPTY_CONTAINER_LIST = Arrays.asList(EMPTY_CONTAINER_ARRAY);
- private static final ApplicationId[] EMPTY_APPLICATION_ARRAY = new ApplicationId[]{};
- private static final List<ApplicationId> EMPTY_APPLICATION_LIST = Arrays.asList(EMPTY_APPLICATION_ARRAY);
-
- public static final String ANY = "*";
- /* set of containers that are allocated containers */
- private final Map<ContainerId, Container> allocatedContainers =
- new TreeMap<ContainerId, Container>();
-
- /* set of containers that are currently active on a node manager */
- private final Map<ContainerId, Container> activeContainers =
- new TreeMap<ContainerId, Container>();
-
- /* set of containers that need to be cleaned */
- private final Set<Container> containersToClean =
- new TreeSet<Container>(new org.apache.hadoop.yarn.server.resourcemanager.resource.Container.Comparator());
-
-
- /* the list of applications that have finished and need to be purged */
- private final List<ApplicationId> finishedApplications = new ArrayList<ApplicationId>();
-
- private volatile int numContainers;
-
- public NodeManager(NodeId nodeId, String hostname,
- Node node, Resource capability) {
- this.nodeId = nodeId;
- this.totalCapability = capability;
- this.hostName = hostname;
- org.apache.hadoop.yarn.server.resourcemanager.resource.Resource.addResource(
- availableResource, capability);
- this.node = node;
- }
-
- /**
- * NodeInfo for this node.
- * @return the {@link NodeInfo} for this node.
- */
- public NodeInfo getNodeInfo() {
- return this;
- }
-
- /**
- * The Scheduler has allocated containers on this node to the
- * given application.
- *
- * @param applicationId application
- * @param containers allocated containers
- */
- public synchronized void allocateContainer(ApplicationId applicationId,
- List<Container> containers) {
- if (containers == null) {
- LOG.error("Adding null containers for application " + applicationId);
- return;
- }
- for (Container container : containers) {
- allocateContainer(container);
- }
-
- LOG.info("addContainers:" +
- " node=" + getHostName() +
- " #containers=" + containers.size() +
- " available=" + getAvailableResource().getMemory() +
- " used=" + getUsedResource().getMemory());
- }
-
- /**
- * Status update from the NodeManager
- * @param nodeStatus node status
- * @return the set of containers no longer should be used by the
- * node manager.
- */
- public synchronized NodeResponse
- statusUpdate(Map<String,List<Container>> allContainers) {
-
- if (allContainers == null) {
- return new NodeResponse(EMPTY_APPLICATION_LIST, EMPTY_CONTAINER_LIST,
- EMPTY_CONTAINER_LIST);
- }
-
- List<Container> listContainers = new ArrayList<Container>();
- // Iterate through the running containers and update their status
- for (Map.Entry<String, List<Container>> e :
- allContainers.entrySet()) {
- listContainers.addAll(e.getValue());
- }
- NodeResponse statusCheck = update(listContainers);
- return statusCheck;
- }
-
- /**
- * Status update for an application running on a given node
- * @param node node
- * @param containers containers update.
- * @return containers that are completed or need to be preempted.
- */
- private synchronized NodeResponse update(List<Container> containers) {
- List<Container> completedContainers = new ArrayList<Container>();
- List<Container> containersToCleanUp = new ArrayList<Container>();
- List<ApplicationId> lastfinishedApplications = new ArrayList<ApplicationId>();
-
- for (Container container : containers) {
- if (allocatedContainers.remove(container.getId()) != null) {
- activeContainers.put(container.getId(), container);
- LOG.info("Activated container " + container.getId() + " on node " +
- getHostName());
- }
-
- if (container.getState() == ContainerState.COMPLETE) {
- if (activeContainers.remove(container.getId()) != null) {
- updateResource(container);
- LOG.info("Completed container " + container);
- }
- completedContainers.add(container);
- LOG.info("Removed completed container " + container.getId() + " on node " +
- getHostName());
- }
- else if (container.getState() != ContainerState.COMPLETE &&
- (!allocatedContainers.containsKey(container.getId())) &&
- !activeContainers.containsKey(container.getId())) {
- containersToCleanUp.add(container);
- }
- }
- containersToCleanUp.addAll(containersToClean);
- /* clear out containers to clean */
- containersToClean.clear();
- lastfinishedApplications.addAll(finishedApplications);
- return new NodeResponse(lastfinishedApplications, completedContainers,
- containersToCleanUp);
- }
-
- private synchronized void allocateContainer(Container container) {
- deductAvailableResource(container.getResource());
- ++numContainers;
-
- allocatedContainers.put(container.getId(), container);
- LOG.info("Allocated container " + container.getId() +
- " to node " + getHostName());
-
- LOG.info("Assigned container " + container.getId() +
- " of capacity " + container.getResource() + " on host " + getHostName() +
- ", which currently has " + numContainers + " containers, " +
- getUsedResource() + " used and " +
- getAvailableResource() + " available");
- }
-
- private synchronized boolean isValidContainer(Container c) {
- if (activeContainers.containsKey(c.getId()) || allocatedContainers.containsKey(c.getId()))
- return true;
- return false;
- }
-
- private synchronized void updateResource(Container container) {
- addAvailableResource(container.getResource());
- --numContainers;
- }
-
- /**
- * Release an allocated container on this node.
- * @param container container to be released
- * @return <code>true</code> iff the container was unused,
- * <code>false</code> otherwise
- */
- public synchronized boolean releaseContainer(Container container) {
- if (!isValidContainer(container)) {
- LOG.error("Invalid container released " + container);
- return false;
- }
-
- /* remove the containers from the nodemanger */
-
- // Was this container launched?
- activeContainers.remove(container.getId());
- allocatedContainers.remove(container.getId());
- containersToClean.add(container);
- updateResource(container);
-
- LOG.info("Released container " + container.getId() +
- " of capacity " + container.getResource() + " on host " + getHostName() +
- ", which currently has " + numContainers + " containers, " +
- getUsedResource() + " used and " + getAvailableResource()
- + " available" + ", release resources=" + true);
- return true;
- }
-
- @Override
- public NodeId getNodeID() {
- return this.nodeId;
- }
-
- @Override
- public String getHostName() {
- return this.hostName;
- }
-
- @Override
- public Resource getTotalCapability() {
- return this.totalCapability;
- }
-
- @Override
- public String getRackName() {
- return node.getNetworkLocation();
- }
+public interface NodeManager extends NodeInfo {
- @Override
- public Node getNode() {
- return this.node;
- }
+ public static final String ANY = "*";
- @Override
- public synchronized Resource getAvailableResource() {
- return this.availableResource;
- }
+ void allocateContainer(ApplicationId applicationId,
+ List<Container> containers);
- @Override
- public synchronized Resource getUsedResource() {
- return this.usedResource;
- }
+ boolean releaseContainer(Container container);
- public synchronized void addAvailableResource(Resource resource) {
- if (resource == null) {
- LOG.error("Invalid resource addition of null resource for " + this.hostName);
- return;
- }
- org.apache.hadoop.yarn.server.resourcemanager.resource.Resource.addResource(
- availableResource, resource);
- org.apache.hadoop.yarn.server.resourcemanager.resource.Resource.subtractResource(
- usedResource, resource);
- }
+ void updateHealthStatus(NodeHealthStatus healthStatus);
- public synchronized void deductAvailableResource(Resource resource) {
- if (resource == null) {
- LOG.error("Invalid deduction of null resource for "+ this.hostName);
- }
- org.apache.hadoop.yarn.server.resourcemanager.resource.Resource.subtractResource(
- availableResource, resource);
- org.apache.hadoop.yarn.server.resourcemanager.resource.Resource.addResource(
- usedResource, resource);
- }
+ NodeResponse statusUpdate(Map<String, List<Container>> containers);
- public synchronized void notifyFinishedApplication(ApplicationId applicationId) {
- finishedApplications.add(applicationId);
- /* make sure to iterate through the list and remove all the containers that
- * belong to this application.
- */
- }
+ void notifyFinishedApplication(ApplicationId applicationId);
- @Override
- public int getNumContainers() {
- return numContainers;
- }
-
- @Override
- public String toString() {
- return "host: " + getHostName() + " #containers=" + getNumContainers() +
- " available=" + getAvailableResource().getMemory() +
- " used=" + getUsedResource().getMemory();
- }
- }
+}