You are viewing a plain text version of this content. The canonical link for it is here.
Posted to mapreduce-commits@hadoop.apache.org by vi...@apache.org on 2011/04/05 10:40:07 UTC

svn commit: r1088923 [1/2] - in /hadoop/mapreduce/branches/MR-279: mr-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/ mr-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/ src/contrib/fairsc...

Author: vinodkv
Date: Tue Apr  5 08:40:05 2011
New Revision: 1088923

URL: http://svn.apache.org/viewvc?rev=1088923&view=rev
Log:
Implement health-checks for the node - server side(ResourceManager) changes. Contributed by Vinod Kumar Vavilapalli.

Added:
    hadoop/mapreduce/branches/MR-279/src/java/org/apache/hadoop/mapred/NodeHealthStatus.java
    hadoop/mapreduce/branches/MR-279/yarn/yarn-server/yarn-server-common/src/main/java/org/apache/hadoop/Clock.java
    hadoop/mapreduce/branches/MR-279/yarn/yarn-server/yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/api/records/NodeHealthStatus.java
    hadoop/mapreduce/branches/MR-279/yarn/yarn-server/yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/api/records/impl/pb/NodeHealthStatusPBImpl.java
    hadoop/mapreduce/branches/MR-279/yarn/yarn-server/yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/NodeManagerImpl.java
Removed:
    hadoop/mapreduce/branches/MR-279/mr-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/Clock.java
    hadoop/mapreduce/branches/MR-279/yarn/yarn-server/yarn-server-common/src/main/java/org/apache/hadoop/NodeHealthStatus.java
Modified:
    hadoop/mapreduce/branches/MR-279/mr-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/TestRMContainerAllocator.java
    hadoop/mapreduce/branches/MR-279/src/contrib/fairscheduler/src/java/org/apache/hadoop/mapred/FairScheduler.java
    hadoop/mapreduce/branches/MR-279/src/java/org/apache/hadoop/mapred/JobTracker.java
    hadoop/mapreduce/branches/MR-279/src/java/org/apache/hadoop/mapred/TaskTrackerStatus.java
    hadoop/mapreduce/branches/MR-279/src/java/org/apache/hadoop/mapred/UserLogCleaner.java
    hadoop/mapreduce/branches/MR-279/src/test/mapred/org/apache/hadoop/mapred/FakeObjectUtilities.java
    hadoop/mapreduce/branches/MR-279/src/test/mapred/org/apache/hadoop/mapred/MiniMRCluster.java
    hadoop/mapreduce/branches/MR-279/src/test/mapred/org/apache/hadoop/mapred/TestJobTrackerStart.java
    hadoop/mapreduce/branches/MR-279/src/test/mapred/org/apache/hadoop/mapred/TestSetupTaskScheduling.java
    hadoop/mapreduce/branches/MR-279/src/test/mapred/org/apache/hadoop/mapred/TestTaskTrackerBlacklisting.java
    hadoop/mapreduce/branches/MR-279/src/test/mapred/org/apache/hadoop/mapred/TestTrackerReservation.java
    hadoop/mapreduce/branches/MR-279/src/test/mapred/org/apache/hadoop/mapred/UtilsForTests.java
    hadoop/mapreduce/branches/MR-279/yarn/yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/impl/pb/ApplicationIdPBImpl.java
    hadoop/mapreduce/branches/MR-279/yarn/yarn-server/yarn-server-common/src/main/avro/ResourceTracker.genavro
    hadoop/mapreduce/branches/MR-279/yarn/yarn-server/yarn-server-common/src/main/java/org/apache/hadoop/NodeHealthCheckerService.java
    hadoop/mapreduce/branches/MR-279/yarn/yarn-server/yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/api/records/NodeStatus.java
    hadoop/mapreduce/branches/MR-279/yarn/yarn-server/yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/api/records/impl/pb/NodeStatusPBImpl.java
    hadoop/mapreduce/branches/MR-279/yarn/yarn-server/yarn-server-common/src/main/proto/yarn_server_common_protos.proto
    hadoop/mapreduce/branches/MR-279/yarn/yarn-server/yarn-server-common/src/main/resources/yarn-default.xml
    hadoop/mapreduce/branches/MR-279/yarn/yarn-server/yarn-server-common/src/test/java/org/apache/hadoop/TestNodeHealthService.java
    hadoop/mapreduce/branches/MR-279/yarn/yarn-server/yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/Context.java
    hadoop/mapreduce/branches/MR-279/yarn/yarn-server/yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeManager.java
    hadoop/mapreduce/branches/MR-279/yarn/yarn-server/yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeStatusUpdaterImpl.java
    hadoop/mapreduce/branches/MR-279/yarn/yarn-server/yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/resourcetracker/NodeInfo.java
    hadoop/mapreduce/branches/MR-279/yarn/yarn-server/yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/resourcetracker/NodeInfoTracker.java
    hadoop/mapreduce/branches/MR-279/yarn/yarn-server/yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/resourcetracker/NodeStatus.java
    hadoop/mapreduce/branches/MR-279/yarn/yarn-server/yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/resourcetracker/RMResourceTrackerImpl.java
    hadoop/mapreduce/branches/MR-279/yarn/yarn-server/yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/NodeManager.java
    hadoop/mapreduce/branches/MR-279/yarn/yarn-server/yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/ResourceListener.java
    hadoop/mapreduce/branches/MR-279/yarn/yarn-server/yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/CapacityScheduler.java
    hadoop/mapreduce/branches/MR-279/yarn/yarn-server/yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fifo/FifoScheduler.java
    hadoop/mapreduce/branches/MR-279/yarn/yarn-server/yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/NodesPage.java
    hadoop/mapreduce/branches/MR-279/yarn/yarn-server/yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/MockNodes.java
    hadoop/mapreduce/branches/MR-279/yarn/yarn-server/yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/applicationsmanager/TestAMRestart.java
    hadoop/mapreduce/branches/MR-279/yarn/yarn-server/yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/applicationsmanager/TestApplicationCleanup.java
    hadoop/mapreduce/branches/MR-279/yarn/yarn-server/yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/applicationsmanager/TestSchedulerNegotiator.java
    hadoop/mapreduce/branches/MR-279/yarn/yarn-server/yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/resourcetracker/TestNMExpiry.java
    hadoop/mapreduce/branches/MR-279/yarn/yarn-server/yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/resourcetracker/TestRMNMRPCResponseId.java

Modified: hadoop/mapreduce/branches/MR-279/mr-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/TestRMContainerAllocator.java
URL: http://svn.apache.org/viewvc/hadoop/mapreduce/branches/MR-279/mr-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/TestRMContainerAllocator.java?rev=1088923&r1=1088922&r2=1088923&view=diff
==============================================================================
--- hadoop/mapreduce/branches/MR-279/mr-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/TestRMContainerAllocator.java (original)
+++ hadoop/mapreduce/branches/MR-279/mr-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/TestRMContainerAllocator.java Tue Apr  5 08:40:05 2011
@@ -62,6 +62,8 @@ import org.apache.hadoop.yarn.ipc.RPCUti
 import org.apache.hadoop.yarn.server.api.records.NodeId;
 import org.apache.hadoop.yarn.server.resourcemanager.resourcetracker.NodeInfo;
 import org.apache.hadoop.yarn.server.resourcemanager.resourcetracker.RMResourceTrackerImpl;
+import org.apache.hadoop.yarn.server.resourcemanager.scheduler.NodeManager;
+import org.apache.hadoop.yarn.server.resourcemanager.scheduler.NodeManagerImpl;
 import org.apache.hadoop.yarn.server.resourcemanager.scheduler.ResourceScheduler;
 import org.apache.hadoop.yarn.server.resourcemanager.scheduler.fifo.FifoScheduler;
 import org.apache.hadoop.yarn.server.security.ContainerTokenSecretManager;
@@ -206,8 +208,10 @@ public class TestRMContainerAllocator {
     nodeId.setId(0);
     Resource resource = recordFactory.newRecordInstance(Resource.class);
     resource.setMemory(memory);
-    NodeInfo nodeManager = scheduler.addNode(nodeId, nodeName,
-        RMResourceTrackerImpl.resolve(nodeName), resource); // Node registration
+    NodeManager nodeManager = new NodeManagerImpl(nodeId, nodeName,
+        RMResourceTrackerImpl.resolve(nodeName), resource
+        );
+    scheduler.addNode(nodeManager); // Node registration
     return nodeManager;
   }
 

Modified: hadoop/mapreduce/branches/MR-279/src/contrib/fairscheduler/src/java/org/apache/hadoop/mapred/FairScheduler.java
URL: http://svn.apache.org/viewvc/hadoop/mapreduce/branches/MR-279/src/contrib/fairscheduler/src/java/org/apache/hadoop/mapred/FairScheduler.java?rev=1088923&r1=1088922&r2=1088923&view=diff
==============================================================================
--- hadoop/mapreduce/branches/MR-279/src/contrib/fairscheduler/src/java/org/apache/hadoop/mapred/FairScheduler.java (original)
+++ hadoop/mapreduce/branches/MR-279/src/contrib/fairscheduler/src/java/org/apache/hadoop/mapred/FairScheduler.java Tue Apr  5 08:40:05 2011
@@ -31,6 +31,7 @@ import java.util.Set;
 
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.Clock;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.http.HttpServer;
 import org.apache.hadoop.mapreduce.TaskType;

Modified: hadoop/mapreduce/branches/MR-279/src/java/org/apache/hadoop/mapred/JobTracker.java
URL: http://svn.apache.org/viewvc/hadoop/mapreduce/branches/MR-279/src/java/org/apache/hadoop/mapred/JobTracker.java?rev=1088923&r1=1088922&r2=1088923&view=diff
==============================================================================
--- hadoop/mapreduce/branches/MR-279/src/java/org/apache/hadoop/mapred/JobTracker.java (original)
+++ hadoop/mapreduce/branches/MR-279/src/java/org/apache/hadoop/mapred/JobTracker.java Tue Apr  5 08:40:05 2011
@@ -55,6 +55,7 @@ import javax.security.auth.login.LoginEx
 
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.Clock;
 import org.apache.hadoop.NodeHealthStatus;
 import org.apache.hadoop.classification.InterfaceAudience;
 import org.apache.hadoop.classification.InterfaceStability;

Added: hadoop/mapreduce/branches/MR-279/src/java/org/apache/hadoop/mapred/NodeHealthStatus.java
URL: http://svn.apache.org/viewvc/hadoop/mapreduce/branches/MR-279/src/java/org/apache/hadoop/mapred/NodeHealthStatus.java?rev=1088923&view=auto
==============================================================================
--- hadoop/mapreduce/branches/MR-279/src/java/org/apache/hadoop/mapred/NodeHealthStatus.java (added)
+++ hadoop/mapreduce/branches/MR-279/src/java/org/apache/hadoop/mapred/NodeHealthStatus.java Tue Apr  5 08:40:05 2011
@@ -0,0 +1,109 @@
+package org.apache.hadoop.mapred;
+
+import java.io.DataInput;
+import java.io.DataOutput;
+import java.io.IOException;
+
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.io.Writable;
+
+/**
+ * Static class which encapsulates the Node health
+ * related fields.
+ * 
+ */
+public class NodeHealthStatus implements
+    org.apache.hadoop.yarn.api.records.NodeHealthStatus, Writable {
+
+  private boolean isNodeHealthy;
+  private String healthReport;
+  private long lastHealthReportTime;
+
+  public NodeHealthStatus(boolean isNodeHealthy, String healthReport,
+      long lastReported) {
+    this.isNodeHealthy = isNodeHealthy;
+    this.healthReport = healthReport;
+    this.lastHealthReportTime = lastReported;
+  }
+  
+  public NodeHealthStatus() {
+    this.isNodeHealthy = true;
+    this.healthReport = "";
+    this.lastHealthReportTime = System.currentTimeMillis();
+  }
+
+  /**
+   * Sets whether or not a task tracker is healthy or not, based on the
+   * output from the node health script.
+   * 
+   * @param isNodeHealthy
+   */
+  public void setNodeHealthy(boolean isNodeHealthy) {
+    this.isNodeHealthy = isNodeHealthy;
+  }
+
+  /**
+   * Returns if node is healthy or not based on result from node health
+   * script.
+   * 
+   * @return true if the node is healthy.
+   */
+  public boolean isNodeHealthy() {
+    return isNodeHealthy;
+  }
+
+  /**
+   * Sets the health report based on the output from the health script.
+   * 
+   * @param healthReport
+   *          String listing cause of failure.
+   */
+  public void setHealthReport(String healthReport) {
+    this.healthReport = healthReport;
+  }
+
+  /**
+   * Returns the health report of the node if any, The health report is
+   * only populated when the node is not healthy.
+   * 
+   * @return health report of the node if any
+   */
+  public String getHealthReport() {
+    return healthReport.toString();
+  }
+
+  /**
+   * Sets when the TT got its health information last 
+   * from node health monitoring service.
+   * 
+   * @param lastReported last reported time by node 
+   * health script
+   */
+  public void setLastReported(long lastReported) {
+    this.lastHealthReportTime = lastReported;
+  }
+
+  /**
+   * Gets time of most recent node health update.
+   * 
+   * @return time stamp of most recent health update.
+   */
+  public long getLastReported() {
+    return lastHealthReportTime;
+  }
+
+  @Override
+  public void readFields(DataInput in) throws IOException {
+    isNodeHealthy = in.readBoolean();
+    healthReport = Text.readString(in);
+    lastHealthReportTime = in.readLong();
+  }
+  
+  @Override
+  public void write(DataOutput out) throws IOException {
+    out.writeBoolean(isNodeHealthy);
+    Text.writeString(out, healthReport.toString());
+    out.writeLong(lastHealthReportTime);
+  }
+  
+}
\ No newline at end of file

Modified: hadoop/mapreduce/branches/MR-279/src/java/org/apache/hadoop/mapred/TaskTrackerStatus.java
URL: http://svn.apache.org/viewvc/hadoop/mapreduce/branches/MR-279/src/java/org/apache/hadoop/mapred/TaskTrackerStatus.java?rev=1088923&r1=1088922&r2=1088923&view=diff
==============================================================================
--- hadoop/mapreduce/branches/MR-279/src/java/org/apache/hadoop/mapred/TaskTrackerStatus.java (original)
+++ hadoop/mapreduce/branches/MR-279/src/java/org/apache/hadoop/mapred/TaskTrackerStatus.java Tue Apr  5 08:40:05 2011
@@ -19,7 +19,7 @@ package org.apache.hadoop.mapred;
 
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
-import org.apache.hadoop.NodeHealthStatus;
+import org.apache.hadoop.mapred.NodeHealthStatus;
 import org.apache.hadoop.classification.InterfaceAudience;
 import org.apache.hadoop.classification.InterfaceStability;
 import org.apache.hadoop.io.*;

Modified: hadoop/mapreduce/branches/MR-279/src/java/org/apache/hadoop/mapred/UserLogCleaner.java
URL: http://svn.apache.org/viewvc/hadoop/mapreduce/branches/MR-279/src/java/org/apache/hadoop/mapred/UserLogCleaner.java?rev=1088923&r1=1088922&r2=1088923&view=diff
==============================================================================
--- hadoop/mapreduce/branches/MR-279/src/java/org/apache/hadoop/mapred/UserLogCleaner.java (original)
+++ hadoop/mapreduce/branches/MR-279/src/java/org/apache/hadoop/mapred/UserLogCleaner.java Tue Apr  5 08:40:05 2011
@@ -29,6 +29,7 @@ import java.util.Map.Entry;
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 
+import org.apache.hadoop.Clock;
 import org.apache.hadoop.classification.InterfaceAudience;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FileSystem;

Modified: hadoop/mapreduce/branches/MR-279/src/test/mapred/org/apache/hadoop/mapred/FakeObjectUtilities.java
URL: http://svn.apache.org/viewvc/hadoop/mapreduce/branches/MR-279/src/test/mapred/org/apache/hadoop/mapred/FakeObjectUtilities.java?rev=1088923&r1=1088922&r2=1088923&view=diff
==============================================================================
--- hadoop/mapreduce/branches/MR-279/src/test/mapred/org/apache/hadoop/mapred/FakeObjectUtilities.java (original)
+++ hadoop/mapreduce/branches/MR-279/src/test/mapred/org/apache/hadoop/mapred/FakeObjectUtilities.java Tue Apr  5 08:40:05 2011
@@ -26,6 +26,7 @@ import javax.security.auth.login.LoginEx
 
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.Clock;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.mapred.TaskStatus.Phase;

Modified: hadoop/mapreduce/branches/MR-279/src/test/mapred/org/apache/hadoop/mapred/MiniMRCluster.java
URL: http://svn.apache.org/viewvc/hadoop/mapreduce/branches/MR-279/src/test/mapred/org/apache/hadoop/mapred/MiniMRCluster.java?rev=1088923&r1=1088922&r2=1088923&view=diff
==============================================================================
--- hadoop/mapreduce/branches/MR-279/src/test/mapred/org/apache/hadoop/mapred/MiniMRCluster.java (original)
+++ hadoop/mapreduce/branches/MR-279/src/test/mapred/org/apache/hadoop/mapred/MiniMRCluster.java Tue Apr  5 08:40:05 2011
@@ -28,6 +28,7 @@ import java.util.List;
 
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.Clock;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.mapred.JobTracker.State;
 import org.apache.hadoop.mapreduce.MRConfig;

Modified: hadoop/mapreduce/branches/MR-279/src/test/mapred/org/apache/hadoop/mapred/TestJobTrackerStart.java
URL: http://svn.apache.org/viewvc/hadoop/mapreduce/branches/MR-279/src/test/mapred/org/apache/hadoop/mapred/TestJobTrackerStart.java?rev=1088923&r1=1088922&r2=1088923&view=diff
==============================================================================
--- hadoop/mapreduce/branches/MR-279/src/test/mapred/org/apache/hadoop/mapred/TestJobTrackerStart.java (original)
+++ hadoop/mapreduce/branches/MR-279/src/test/mapred/org/apache/hadoop/mapred/TestJobTrackerStart.java Tue Apr  5 08:40:05 2011
@@ -17,6 +17,7 @@
  */
 package org.apache.hadoop.mapred;
 
+import org.apache.hadoop.Clock;
 import org.apache.hadoop.mapred.UtilsForTests.FakeClock;
 
 import junit.framework.TestCase;

Modified: hadoop/mapreduce/branches/MR-279/src/test/mapred/org/apache/hadoop/mapred/TestSetupTaskScheduling.java
URL: http://svn.apache.org/viewvc/hadoop/mapreduce/branches/MR-279/src/test/mapred/org/apache/hadoop/mapred/TestSetupTaskScheduling.java?rev=1088923&r1=1088922&r2=1088923&view=diff
==============================================================================
--- hadoop/mapreduce/branches/MR-279/src/test/mapred/org/apache/hadoop/mapred/TestSetupTaskScheduling.java (original)
+++ hadoop/mapreduce/branches/MR-279/src/test/mapred/org/apache/hadoop/mapred/TestSetupTaskScheduling.java Tue Apr  5 08:40:05 2011
@@ -21,6 +21,7 @@ import java.io.IOException;
 import java.util.ArrayList;
 import java.util.List;
 
+import org.apache.hadoop.Clock;
 import org.apache.hadoop.mapred.FakeObjectUtilities.FakeJobInProgress;
 import org.apache.hadoop.mapred.FakeObjectUtilities.FakeTaskInProgress;
 import org.apache.hadoop.mapred.FakeObjectUtilities.FakeJobTracker;

Modified: hadoop/mapreduce/branches/MR-279/src/test/mapred/org/apache/hadoop/mapred/TestTaskTrackerBlacklisting.java
URL: http://svn.apache.org/viewvc/hadoop/mapreduce/branches/MR-279/src/test/mapred/org/apache/hadoop/mapred/TestTaskTrackerBlacklisting.java?rev=1088923&r1=1088922&r2=1088923&view=diff
==============================================================================
--- hadoop/mapreduce/branches/MR-279/src/test/mapred/org/apache/hadoop/mapred/TestTaskTrackerBlacklisting.java (original)
+++ hadoop/mapreduce/branches/MR-279/src/test/mapred/org/apache/hadoop/mapred/TestTaskTrackerBlacklisting.java Tue Apr  5 08:40:05 2011
@@ -33,7 +33,8 @@ import junit.framework.Test;
 import junit.framework.TestCase;
 import junit.framework.TestSuite;
 
-import org.apache.hadoop.NodeHealthStatus;
+import org.apache.hadoop.Clock;
+import org.apache.hadoop.mapred.NodeHealthStatus;
 import org.apache.hadoop.mapred.ClusterStatus.BlackListInfo;
 import org.apache.hadoop.mapred.JobTracker.ReasonForBlackListing;
 import org.apache.hadoop.mapreduce.TaskType;

Modified: hadoop/mapreduce/branches/MR-279/src/test/mapred/org/apache/hadoop/mapred/TestTrackerReservation.java
URL: http://svn.apache.org/viewvc/hadoop/mapreduce/branches/MR-279/src/test/mapred/org/apache/hadoop/mapred/TestTrackerReservation.java?rev=1088923&r1=1088922&r2=1088923&view=diff
==============================================================================
--- hadoop/mapreduce/branches/MR-279/src/test/mapred/org/apache/hadoop/mapred/TestTrackerReservation.java (original)
+++ hadoop/mapreduce/branches/MR-279/src/test/mapred/org/apache/hadoop/mapred/TestTrackerReservation.java Tue Apr  5 08:40:05 2011
@@ -22,6 +22,7 @@ import java.util.ArrayList;
 
 import javax.security.auth.login.LoginException;
 
+import org.apache.hadoop.Clock;
 import org.apache.hadoop.mapred.FakeObjectUtilities.FakeJobInProgress;
 import org.apache.hadoop.mapreduce.ClusterMetrics;
 import org.apache.hadoop.mapreduce.TaskType;

Modified: hadoop/mapreduce/branches/MR-279/src/test/mapred/org/apache/hadoop/mapred/UtilsForTests.java
URL: http://svn.apache.org/viewvc/hadoop/mapreduce/branches/MR-279/src/test/mapred/org/apache/hadoop/mapred/UtilsForTests.java?rev=1088923&r1=1088922&r2=1088923&view=diff
==============================================================================
--- hadoop/mapreduce/branches/MR-279/src/test/mapred/org/apache/hadoop/mapred/UtilsForTests.java (original)
+++ hadoop/mapreduce/branches/MR-279/src/test/mapred/org/apache/hadoop/mapred/UtilsForTests.java Tue Apr  5 08:40:05 2011
@@ -33,6 +33,7 @@ import java.util.List;
 import java.util.Properties;
 
 import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.Clock;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;

Modified: hadoop/mapreduce/branches/MR-279/yarn/yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/impl/pb/ApplicationIdPBImpl.java
URL: http://svn.apache.org/viewvc/hadoop/mapreduce/branches/MR-279/yarn/yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/impl/pb/ApplicationIdPBImpl.java?rev=1088923&r1=1088922&r2=1088923&view=diff
==============================================================================
--- hadoop/mapreduce/branches/MR-279/yarn/yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/impl/pb/ApplicationIdPBImpl.java (original)
+++ hadoop/mapreduce/branches/MR-279/yarn/yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/impl/pb/ApplicationIdPBImpl.java Tue Apr  5 08:40:05 2011
@@ -23,7 +23,6 @@ public class ApplicationIdPBImpl extends
   }
 
   public ApplicationIdProto getProto() {
-    maybeInitBuilder();
     proto = viaProto ? proto : builder.build();
     viaProto = true;
     return proto;

Modified: hadoop/mapreduce/branches/MR-279/yarn/yarn-server/yarn-server-common/src/main/avro/ResourceTracker.genavro
URL: http://svn.apache.org/viewvc/hadoop/mapreduce/branches/MR-279/yarn/yarn-server/yarn-server-common/src/main/avro/ResourceTracker.genavro?rev=1088923&r1=1088922&r2=1088923&view=diff
==============================================================================
--- hadoop/mapreduce/branches/MR-279/yarn/yarn-server/yarn-server-common/src/main/avro/ResourceTracker.genavro (original)
+++ hadoop/mapreduce/branches/MR-279/yarn/yarn-server/yarn-server-common/src/main/avro/ResourceTracker.genavro Tue Apr  5 08:40:05 2011
@@ -7,15 +7,19 @@ protocol ResourceTracker {
   record NodeID {
     int id;
   }
- 
+
+  record NodeHealthStatus {
+    boolean isNodeHealthy;
+    union {string, null} healthReport;
+    long lastHealthReportTime;
+  }
+
   record NodeStatus {
     NodeID nodeId;
 	int responseId;
 	long lastSeen;
     map<array<org.apache.hadoop.yarn.Container>> containers;
-    boolean isNodeHealthy;
-    union {string, null} healthReport;
-    long lastHealthReport;
+	NodeHealthStatus nodeHealthStatus;
   }
 
   record RegistrationResponse {

Added: hadoop/mapreduce/branches/MR-279/yarn/yarn-server/yarn-server-common/src/main/java/org/apache/hadoop/Clock.java
URL: http://svn.apache.org/viewvc/hadoop/mapreduce/branches/MR-279/yarn/yarn-server/yarn-server-common/src/main/java/org/apache/hadoop/Clock.java?rev=1088923&view=auto
==============================================================================
--- hadoop/mapreduce/branches/MR-279/yarn/yarn-server/yarn-server-common/src/main/java/org/apache/hadoop/Clock.java (added)
+++ hadoop/mapreduce/branches/MR-279/yarn/yarn-server/yarn-server-common/src/main/java/org/apache/hadoop/Clock.java Tue Apr  5 08:40:05 2011
@@ -0,0 +1,28 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop;
+
+/**
+ * A clock class - can be mocked out for testing.
+ */
+class Clock {
+  long getTime() {
+    return System.currentTimeMillis();
+  }
+}
\ No newline at end of file

Modified: hadoop/mapreduce/branches/MR-279/yarn/yarn-server/yarn-server-common/src/main/java/org/apache/hadoop/NodeHealthCheckerService.java
URL: http://svn.apache.org/viewvc/hadoop/mapreduce/branches/MR-279/yarn/yarn-server/yarn-server-common/src/main/java/org/apache/hadoop/NodeHealthCheckerService.java?rev=1088923&r1=1088922&r2=1088923&view=diff
==============================================================================
--- hadoop/mapreduce/branches/MR-279/yarn/yarn-server/yarn-server-common/src/main/java/org/apache/hadoop/NodeHealthCheckerService.java (original)
+++ hadoop/mapreduce/branches/MR-279/yarn/yarn-server/yarn-server-common/src/main/java/org/apache/hadoop/NodeHealthCheckerService.java Tue Apr  5 08:40:05 2011
@@ -31,6 +31,7 @@ import org.apache.hadoop.conf.Configurat
 import org.apache.hadoop.util.Shell.ExitCodeException;
 import org.apache.hadoop.util.Shell.ShellCommandExecutor;
 import org.apache.hadoop.util.StringUtils;
+import org.apache.hadoop.yarn.server.api.records.NodeHealthStatus;
 import org.apache.hadoop.yarn.service.AbstractService;
 
 /**
@@ -213,7 +214,6 @@ public class NodeHealthCheckerService ex
 
   public NodeHealthCheckerService(Configuration conf) {
     this();
-    this.conf = conf;
     init(conf);
   }
 
@@ -222,6 +222,7 @@ public class NodeHealthCheckerService ex
    */
   @Override
   public void init(Configuration conf) {
+    this.conf = conf;
     this.nodeHealthScript = 
         conf.get(HEALTH_CHECK_SCRIPT_PROPERTY);
     this.intervalTime = conf.getLong(HEALTH_CHECK_INTERVAL_PROPERTY,
@@ -364,9 +365,9 @@ public class NodeHealthCheckerService ex
    * @param healthStatus
    */
   public synchronized void setHealthStatus(NodeHealthStatus healthStatus) {
-    healthStatus.setNodeHealthy(this.isHealthy());
+    healthStatus.setIsNodeHealthy(this.isHealthy());
     healthStatus.setHealthReport(this.getHealthReport());
-    healthStatus.setLastReported(this.getLastReportedTime());
+    healthStatus.setLastHealthReportTime(this.getLastReportedTime());
   }
   
   /**

Added: hadoop/mapreduce/branches/MR-279/yarn/yarn-server/yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/api/records/NodeHealthStatus.java
URL: http://svn.apache.org/viewvc/hadoop/mapreduce/branches/MR-279/yarn/yarn-server/yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/api/records/NodeHealthStatus.java?rev=1088923&view=auto
==============================================================================
--- hadoop/mapreduce/branches/MR-279/yarn/yarn-server/yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/api/records/NodeHealthStatus.java (added)
+++ hadoop/mapreduce/branches/MR-279/yarn/yarn-server/yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/api/records/NodeHealthStatus.java Tue Apr  5 08:40:05 2011
@@ -0,0 +1,16 @@
+package org.apache.hadoop.yarn.server.api.records;
+
+public interface NodeHealthStatus {
+
+  boolean getIsNodeHealthy();
+
+  String getHealthReport();
+
+  long getLastHealthReportTime();
+
+  void setIsNodeHealthy(boolean isNodeHealthy);
+
+  void setHealthReport(String healthReport);
+
+  void setLastHealthReportTime(long lastHealthReport);
+}
\ No newline at end of file

Modified: hadoop/mapreduce/branches/MR-279/yarn/yarn-server/yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/api/records/NodeStatus.java
URL: http://svn.apache.org/viewvc/hadoop/mapreduce/branches/MR-279/yarn/yarn-server/yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/api/records/NodeStatus.java?rev=1088923&r1=1088922&r2=1088923&view=diff
==============================================================================
--- hadoop/mapreduce/branches/MR-279/yarn/yarn-server/yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/api/records/NodeStatus.java (original)
+++ hadoop/mapreduce/branches/MR-279/yarn/yarn-server/yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/api/records/NodeStatus.java Tue Apr  5 08:40:05 2011
@@ -14,12 +14,10 @@ public interface NodeStatus {
   
   public abstract Map<String, List<Container>> getAllContainers();
   public abstract List<Container> getContainers(String key);
-  
-  public abstract boolean getIsNodeHealthy();
-  public abstract String getHealthReport();
-  public abstract long getLastHealthReport();
-  
-  
+
+  NodeHealthStatus getNodeHealthStatus();
+  void setNodeHealthStatus(NodeHealthStatus healthStatus);
+
   public abstract void setNodeId(NodeId nodeId);
   public abstract void setResponseId(int responseId);
   public abstract void setLastSeen(long lastSeen);
@@ -28,8 +26,4 @@ public interface NodeStatus {
   public abstract void setContainers(String key, List<Container> containers);
   public abstract void removeContainers(String key);
   public abstract void clearContainers();
-  
-  public abstract void setIsNodeHealthy(boolean isNodeHealthy);
-  public abstract void setHealthReport(String healthReport);
-  public abstract void setLastHealthReport(long lastHealthReport);
 }

Added: hadoop/mapreduce/branches/MR-279/yarn/yarn-server/yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/api/records/impl/pb/NodeHealthStatusPBImpl.java
URL: http://svn.apache.org/viewvc/hadoop/mapreduce/branches/MR-279/yarn/yarn-server/yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/api/records/impl/pb/NodeHealthStatusPBImpl.java?rev=1088923&view=auto
==============================================================================
--- hadoop/mapreduce/branches/MR-279/yarn/yarn-server/yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/api/records/impl/pb/NodeHealthStatusPBImpl.java (added)
+++ hadoop/mapreduce/branches/MR-279/yarn/yarn-server/yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/api/records/impl/pb/NodeHealthStatusPBImpl.java Tue Apr  5 08:40:05 2011
@@ -0,0 +1,93 @@
+package org.apache.hadoop.yarn.server.api.records.impl.pb;
+
+import org.apache.hadoop.yarn.api.records.ProtoBase;
+import org.apache.hadoop.yarn.proto.YarnServerCommonProtos.NodeHealthStatusProto;
+import org.apache.hadoop.yarn.proto.YarnServerCommonProtos.NodeHealthStatusProtoOrBuilder;
+import org.apache.hadoop.yarn.server.api.records.NodeHealthStatus;
+
+public class NodeHealthStatusPBImpl extends ProtoBase<NodeHealthStatusProto>
+    implements NodeHealthStatus {
+
+  private NodeHealthStatusProto.Builder builder;
+  private boolean viaProto = false;
+  private NodeHealthStatusProto proto = NodeHealthStatusProto
+      .getDefaultInstance();
+
+  public NodeHealthStatusPBImpl() {
+    this.builder = NodeHealthStatusProto.newBuilder();
+  }
+
+  public NodeHealthStatusPBImpl(NodeHealthStatusProto proto) {
+    this.proto = proto;
+    this.viaProto = true;
+  }
+
+  public NodeHealthStatusProto getProto() {
+    mergeLocalToProto();
+    this.proto = this.viaProto ? this.proto : this.builder.build();
+    this.viaProto = true;
+    return this.proto;
+  }
+
+  private void mergeLocalToProto() {
+    if (this.viaProto)
+      maybeInitBuilder();
+    this.proto = this.builder.build();
+
+    this.viaProto = true;
+  }
+
+  private void maybeInitBuilder() {
+    if (this.viaProto || this.builder == null) {
+      this.builder = NodeHealthStatusProto.newBuilder(this.proto);
+    }
+    this.viaProto = false;
+  }
+
+  @Override
+  public boolean getIsNodeHealthy() {
+    NodeHealthStatusProtoOrBuilder p =
+        this.viaProto ? this.proto : this.builder;
+    return p.getIsNodeHealthy();
+  }
+
+  @Override
+  public void setIsNodeHealthy(boolean isNodeHealthy) {
+    maybeInitBuilder();
+    this.builder.setIsNodeHealthy(isNodeHealthy);
+  }
+
+  @Override
+  public String getHealthReport() {
+    NodeHealthStatusProtoOrBuilder p =
+        this.viaProto ? this.proto : this.builder;
+    if (!p.hasHealthReport()) {
+      return null;
+    }
+    return (p.getHealthReport());
+  }
+
+  @Override
+  public void setHealthReport(String healthReport) {
+    maybeInitBuilder();
+    if (healthReport == null) {
+      this.builder.clearHealthReport();
+      return;
+    }
+    this.builder.setHealthReport((healthReport));
+  }
+
+  @Override
+  public long getLastHealthReportTime() {
+    NodeHealthStatusProtoOrBuilder p =
+        this.viaProto ? this.proto : this.builder;
+    return (p.getLastHealthReportTime());
+  }
+
+  @Override
+  public void setLastHealthReportTime(long lastHealthReport) {
+    maybeInitBuilder();
+    this.builder.setLastHealthReportTime((lastHealthReport));
+  }
+
+}

Modified: hadoop/mapreduce/branches/MR-279/yarn/yarn-server/yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/api/records/impl/pb/NodeStatusPBImpl.java
URL: http://svn.apache.org/viewvc/hadoop/mapreduce/branches/MR-279/yarn/yarn-server/yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/api/records/impl/pb/NodeStatusPBImpl.java?rev=1088923&r1=1088922&r2=1088923&view=diff
==============================================================================
--- hadoop/mapreduce/branches/MR-279/yarn/yarn-server/yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/api/records/impl/pb/NodeStatusPBImpl.java (original)
+++ hadoop/mapreduce/branches/MR-279/yarn/yarn-server/yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/api/records/impl/pb/NodeStatusPBImpl.java Tue Apr  5 08:40:05 2011
@@ -12,10 +12,12 @@ import org.apache.hadoop.yarn.api.record
 import org.apache.hadoop.yarn.api.records.impl.pb.ContainerPBImpl;
 import org.apache.hadoop.yarn.proto.YarnProtos.ContainerProto;
 import org.apache.hadoop.yarn.proto.YarnServerCommonProtos.ContainerListProto;
+import org.apache.hadoop.yarn.proto.YarnServerCommonProtos.NodeHealthStatusProto;
 import org.apache.hadoop.yarn.proto.YarnServerCommonProtos.NodeIdProto;
 import org.apache.hadoop.yarn.proto.YarnServerCommonProtos.NodeStatusProto;
 import org.apache.hadoop.yarn.proto.YarnServerCommonProtos.NodeStatusProtoOrBuilder;
 import org.apache.hadoop.yarn.proto.YarnServerCommonProtos.StringContainerListMapProto;
+import org.apache.hadoop.yarn.server.api.records.NodeHealthStatus;
 import org.apache.hadoop.yarn.server.api.records.NodeId;
 import org.apache.hadoop.yarn.server.api.records.NodeStatus;
     
@@ -26,8 +28,7 @@ public class NodeStatusPBImpl extends Pr
   
   private NodeId nodeId = null;
   private Map<String, List<Container>> containers = null;
-  
-
+  private NodeHealthStatus nodeHealthStatus = null;
   
   public NodeStatusPBImpl() {
     builder = NodeStatusProto.newBuilder();
@@ -53,6 +54,9 @@ public class NodeStatusPBImpl extends Pr
     if (this.containers != null) {
       addContainersToProto();
     }
+    if (this.nodeHealthStatus != null) {
+      builder.setNodeHealthStatus(convertToProtoFormat(this.nodeHealthStatus));
+    }
   }
 
   private void mergeLocalToProto() {
@@ -184,18 +188,19 @@ public class NodeStatusPBImpl extends Pr
     };
     builder.addAllContainers(iterable);
   }
-  
+
   @Override
-  public boolean getIsNodeHealthy() {
+  public NodeHealthStatus getNodeHealthStatus() {
     NodeStatusProtoOrBuilder p = viaProto ? proto : builder;
-    return p.getIsNodeHealthy();
+    return new NodeHealthStatusPBImpl(p.getNodeHealthStatus());
   }
-  
+
   @Override
-  public void setIsNodeHealthy(boolean isNodeHealthy) {
+  public void setNodeHealthStatus(NodeHealthStatus healthStatus) {
     maybeInitBuilder();
-    builder.setIsNodeHealthy(isNodeHealthy);
+    builder.setNodeHealthStatus(convertToProtoFormat(healthStatus));
   }
+
   /*
    * 
    * @Override
@@ -218,37 +223,6 @@ public class NodeStatusPBImpl extends Pr
   }
   */
   
-  @Override
-  public String getHealthReport() {
-    NodeStatusProtoOrBuilder p = viaProto ? proto : builder;
-    if (!p.hasHealthReport()) {
-      return null;
-    }
-    return (p.getHealthReport()); 
-  }
-  
-  @Override
-  public void setHealthReport(String healthReport) {
-    maybeInitBuilder();
-    if (healthReport == null) {
-      builder.clearHealthReport();
-      return;
-    }
-    builder.setHealthReport((healthReport));
-  }
-  
-  @Override
-  public long getLastHealthReport() {
-    NodeStatusProtoOrBuilder p = viaProto ? proto : builder;
-    return (p.getLastHealthReport());
-  }
-  
-  @Override
-  public void setLastHealthReport(long lastHealthReport) {
-    maybeInitBuilder();
-    builder.setLastHealthReport((lastHealthReport));
-  }
-  
   private ContainerListProto convertToProtoFormat(List<Container> src) {
     ContainerListProto.Builder ret = ContainerListProto.newBuilder();
     for (Container c : src) {
@@ -294,5 +268,13 @@ public class NodeStatusPBImpl extends Pr
   private NodeId convertFromProtoFormat(NodeIdProto proto) {
     return new NodeIdPBImpl(proto);
   }
-  
+
+  private NodeHealthStatusProto convertToProtoFormat(
+      NodeHealthStatus healthStatus) {
+    return ((NodeHealthStatusPBImpl) healthStatus).getProto();
+  }
+
+  private NodeHealthStatus convertFromProtoFormat(NodeHealthStatusProto proto) {
+    return new NodeHealthStatusPBImpl(proto);
+  }
 }  

Modified: hadoop/mapreduce/branches/MR-279/yarn/yarn-server/yarn-server-common/src/main/proto/yarn_server_common_protos.proto
URL: http://svn.apache.org/viewvc/hadoop/mapreduce/branches/MR-279/yarn/yarn-server/yarn-server-common/src/main/proto/yarn_server_common_protos.proto?rev=1088923&r1=1088922&r2=1088923&view=diff
==============================================================================
--- hadoop/mapreduce/branches/MR-279/yarn/yarn-server/yarn-server-common/src/main/proto/yarn_server_common_protos.proto (original)
+++ hadoop/mapreduce/branches/MR-279/yarn/yarn-server/yarn-server-common/src/main/proto/yarn_server_common_protos.proto Tue Apr  5 08:40:05 2011
@@ -9,14 +9,18 @@ message NodeIdProto {
   optional int32 id = 1;
 }
 
+message NodeHealthStatusProto {
+  optional bool is_node_healthy = 1;
+  optional string health_report = 2;
+  optional int64 last_health_report_time = 3;
+}
+
 message NodeStatusProto {
   optional NodeIdProto node_id = 1;
   optional int32 response_id = 2;
   optional int64 last_seen = 3;
   repeated StringContainerListMapProto containers = 4;
-  optional bool is_node_healthy = 5;
-  optional string health_report = 6;
-  optional int64 last_health_report = 7;
+  optional NodeHealthStatusProto nodeHealthStatus = 5;
 }
 
 message RegistrationResponseProto {

Modified: hadoop/mapreduce/branches/MR-279/yarn/yarn-server/yarn-server-common/src/main/resources/yarn-default.xml
URL: http://svn.apache.org/viewvc/hadoop/mapreduce/branches/MR-279/yarn/yarn-server/yarn-server-common/src/main/resources/yarn-default.xml?rev=1088923&r1=1088922&r2=1088923&view=diff
==============================================================================
--- hadoop/mapreduce/branches/MR-279/yarn/yarn-server/yarn-server-common/src/main/resources/yarn-default.xml (original)
+++ hadoop/mapreduce/branches/MR-279/yarn/yarn-server/yarn-server-common/src/main/resources/yarn-default.xml Tue Apr  5 08:40:05 2011
@@ -73,6 +73,64 @@
 
 <property><name>yarn.server.nodemanager.connect.rm</name><value>true</value></property> 
 
+  <property>
+    <name>yarn.server.nodemanager.healthchecker.script.path</name>
+    <value></value>
+    <description>Location of the node's health-check script on the local
+    file-system.
+    </description>
+  </property>
+
+  <property>
+    <name>yarn.server.nodemanager.healthchecker.interval</name>
+    <value>600000</value>
+    <description>Frequency of the health-check run by the NodeManager
+    </description>
+  </property>
+
+  <property>
+    <name>yarn.server.nodemanager.healthchecker.script.timeout</name>
+    <value>1200000</value>
+    <description>Timeout for the health-check run by the NodeManager
+    </description>
+  </property>
+
+  <property>
+    <name>yarn.server.nodemanager.healthchecker.script.args</name>
+    <value></value>
+    <description>Arguments to be passed to the health-check script run
+    by the NodeManager</description>
+  </property>
+
+  <property>
+    <name>yarn.server.nodemanager.healthchecker.script.path</name>
+    <value></value>
+    <description>Location of the node's health-check script on the local
+    file-system.
+    </description>
+  </property>
+
+  <property>
+    <name>yarn.server.nodemanager.healthchecker.interval</name>
+    <value>600000</value>
+    <description>Frequency of the health-check run by the NodeManager
+    </description>
+  </property>
+
+  <property>
+    <name>yarn.server.nodemanager.healthchecker.script.timeout</name>
+    <value>1200000</value>
+    <description>Timeout for the health-check run by the NodeManager
+    </description>
+  </property>
+
+  <property>
+    <name>yarn.server.nodemanager.healthchecker.script.args</name>
+    <value></value>
+    <description>Arguments to be passed to the health-check script run
+    by the NodeManager</description>
+  </property>
+
 <!-- All MRAppMaster related configuration properties -->
 
   <property>

Modified: hadoop/mapreduce/branches/MR-279/yarn/yarn-server/yarn-server-common/src/test/java/org/apache/hadoop/TestNodeHealthService.java
URL: http://svn.apache.org/viewvc/hadoop/mapreduce/branches/MR-279/yarn/yarn-server/yarn-server-common/src/test/java/org/apache/hadoop/TestNodeHealthService.java?rev=1088923&r1=1088922&r2=1088923&view=diff
==============================================================================
--- hadoop/mapreduce/branches/MR-279/yarn/yarn-server/yarn-server-common/src/test/java/org/apache/hadoop/TestNodeHealthService.java (original)
+++ hadoop/mapreduce/branches/MR-279/yarn/yarn-server/yarn-server-common/src/test/java/org/apache/hadoop/TestNodeHealthService.java Tue Apr  5 08:40:05 2011
@@ -29,6 +29,9 @@ import org.apache.commons.logging.LogFac
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FileContext;
 import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.yarn.factories.RecordFactory;
+import org.apache.hadoop.yarn.factory.providers.RecordFactoryProvider;
+import org.apache.hadoop.yarn.server.api.records.NodeHealthStatus;
 import org.junit.After;
 import org.junit.Assert;
 import org.junit.Before;
@@ -106,7 +109,9 @@ public class TestNodeHealthService {
 
   @Test
   public void testNodeHealthScript() throws Exception {
-    NodeHealthStatus healthStatus = new NodeHealthStatus();
+    RecordFactory factory = RecordFactoryProvider.getRecordFactory(null);
+    NodeHealthStatus healthStatus =
+        factory.newRecordInstance(NodeHealthStatus.class);
     String errorScript = "echo ERROR\n echo \"Tracker not healthy\"";
     String normalScript = "echo \"I am all fine\"";
     String timeOutScript = "sleep 4\n echo\"I am fine\"";
@@ -124,7 +129,7 @@ public class TestNodeHealthService {
     LOG.info("Checking initial healthy condition");
     // Check proper report conditions.
     Assert.assertTrue("Node health status reported unhealthy", healthStatus
-        .isNodeHealthy());
+        .getIsNodeHealthy());
     Assert.assertTrue("Node health status reported unhealthy", healthStatus
         .getHealthReport().isEmpty());
 
@@ -137,7 +142,7 @@ public class TestNodeHealthService {
     nodeHealthChecker.setHealthStatus(healthStatus);
     LOG.info("Checking Healthy--->Unhealthy");
     Assert.assertFalse("Node health status reported healthy", healthStatus
-        .isNodeHealthy());
+        .getIsNodeHealthy());
     Assert.assertFalse("Node health status reported healthy", healthStatus
         .getHealthReport().isEmpty());
     
@@ -148,7 +153,7 @@ public class TestNodeHealthService {
     LOG.info("Checking UnHealthy--->healthy");
     // Check proper report conditions.
     Assert.assertTrue("Node health status reported unhealthy", healthStatus
-        .isNodeHealthy());
+        .getIsNodeHealthy());
     Assert.assertTrue("Node health status reported unhealthy", healthStatus
         .getHealthReport().isEmpty());
 
@@ -158,7 +163,7 @@ public class TestNodeHealthService {
     nodeHealthChecker.setHealthStatus(healthStatus);
     LOG.info("Checking Healthy--->timeout");
     Assert.assertFalse("Node health status reported healthy even after timeout",
-        healthStatus.isNodeHealthy());
+        healthStatus.getIsNodeHealthy());
     Assert.assertEquals("Node time out message not propogated", healthStatus
         .getHealthReport(),
         NodeHealthCheckerService.NODE_HEALTH_SCRIPT_TIMED_OUT_MSG);

Modified: hadoop/mapreduce/branches/MR-279/yarn/yarn-server/yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/Context.java
URL: http://svn.apache.org/viewvc/hadoop/mapreduce/branches/MR-279/yarn/yarn-server/yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/Context.java?rev=1088923&r1=1088922&r2=1088923&view=diff
==============================================================================
--- hadoop/mapreduce/branches/MR-279/yarn/yarn-server/yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/Context.java (original)
+++ hadoop/mapreduce/branches/MR-279/yarn/yarn-server/yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/Context.java Tue Apr  5 08:40:05 2011
@@ -22,7 +22,7 @@ import java.util.concurrent.ConcurrentMa
 
 import org.apache.hadoop.yarn.api.records.ApplicationId;
 import org.apache.hadoop.yarn.api.records.ContainerId;
-import org.apache.hadoop.NodeHealthStatus;
+import org.apache.hadoop.yarn.server.api.records.NodeHealthStatus;
 import org.apache.hadoop.yarn.server.nodemanager.containermanager.application.Application;
 import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.Container;
 

Modified: hadoop/mapreduce/branches/MR-279/yarn/yarn-server/yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeManager.java
URL: http://svn.apache.org/viewvc/hadoop/mapreduce/branches/MR-279/yarn/yarn-server/yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeManager.java?rev=1088923&r1=1088922&r2=1088923&view=diff
==============================================================================
--- hadoop/mapreduce/branches/MR-279/yarn/yarn-server/yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeManager.java (original)
+++ hadoop/mapreduce/branches/MR-279/yarn/yarn-server/yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeManager.java Tue Apr  5 08:40:05 2011
@@ -28,7 +28,6 @@ import java.util.concurrent.ConcurrentMa
 import java.util.concurrent.ConcurrentSkipListMap;
 
 import org.apache.hadoop.NodeHealthCheckerService;
-import org.apache.hadoop.NodeHealthStatus;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.security.SecurityUtil;
 import org.apache.hadoop.util.ReflectionUtils;
@@ -38,7 +37,10 @@ import org.apache.hadoop.yarn.api.record
 import org.apache.hadoop.yarn.conf.YarnConfiguration;
 import org.apache.hadoop.yarn.event.AsyncDispatcher;
 import org.apache.hadoop.yarn.event.Dispatcher;
+import org.apache.hadoop.yarn.factories.RecordFactory;
+import org.apache.hadoop.yarn.factory.providers.RecordFactoryProvider;
 import org.apache.hadoop.yarn.server.YarnServerConfig;
+import org.apache.hadoop.yarn.server.api.records.NodeHealthStatus;
 import org.apache.hadoop.yarn.server.nodemanager.containermanager.ContainerManagerImpl;
 import org.apache.hadoop.yarn.server.nodemanager.containermanager.application.Application;
 import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.Container;
@@ -154,9 +156,13 @@ public class NodeManager extends Composi
             }
           });
 
-    private final NodeHealthStatus nodeHealthStatus = new NodeHealthStatus();
+    private final NodeHealthStatus nodeHealthStatus = RecordFactoryProvider
+        .getRecordFactory(null).newRecordInstance(NodeHealthStatus.class);
 
     public NMContext() {
+      this.nodeHealthStatus.setIsNodeHealthy(true);
+      this.nodeHealthStatus.setHealthReport("Healthy");
+      this.nodeHealthStatus.setLastHealthReportTime(System.currentTimeMillis());
     }
 
     @Override

Modified: hadoop/mapreduce/branches/MR-279/yarn/yarn-server/yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeStatusUpdaterImpl.java
URL: http://svn.apache.org/viewvc/hadoop/mapreduce/branches/MR-279/yarn/yarn-server/yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeStatusUpdaterImpl.java?rev=1088923&r1=1088922&r2=1088923&view=diff
==============================================================================
--- hadoop/mapreduce/branches/MR-279/yarn/yarn-server/yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeStatusUpdaterImpl.java (original)
+++ hadoop/mapreduce/branches/MR-279/yarn/yarn-server/yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeStatusUpdaterImpl.java Tue Apr  5 08:40:05 2011
@@ -29,7 +29,6 @@ import org.apache.avro.AvroRuntimeExcept
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.NodeHealthCheckerService;
-import org.apache.hadoop.NodeHealthStatus;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.CommonConfigurationKeys;
 import org.apache.hadoop.net.NetUtils;
@@ -50,6 +49,7 @@ import org.apache.hadoop.yarn.server.api
 import org.apache.hadoop.yarn.server.api.protocolrecords.NodeHeartbeatRequest;
 import org.apache.hadoop.yarn.server.api.protocolrecords.RegisterNodeManagerRequest;
 import org.apache.hadoop.yarn.server.api.records.HeartbeatResponse;
+import org.apache.hadoop.yarn.server.api.records.NodeHealthStatus;
 import org.apache.hadoop.yarn.server.api.records.NodeId;
 import org.apache.hadoop.yarn.server.api.records.NodeStatus;
 import org.apache.hadoop.yarn.server.api.records.RegistrationResponse;
@@ -209,13 +209,13 @@ public class NodeStatusUpdaterImpl exten
     LOG.debug(this.nodeName + " sending out status for " + numActiveContainers
         + " containers");
 
+    NodeHealthStatus nodeHealthStatus = this.context.getNodeHealthStatus();
     if (this.healthChecker != null) {
-      NodeHealthStatus nodeHealthStatus = this.context.getNodeHealthStatus();
       this.healthChecker.setHealthStatus(nodeHealthStatus);
-      status.setIsNodeHealthy(nodeHealthStatus.isNodeHealthy());
-      status.setHealthReport(nodeHealthStatus.getHealthReport());
-      status.setLastHealthReport(nodeHealthStatus.getLastReported());
     }
+    LOG.debug("Node's health-status : " + nodeHealthStatus.getIsNodeHealthy()
+        + ", " + nodeHealthStatus.getHealthReport());
+    status.setNodeHealthStatus(nodeHealthStatus);
 
     return status;
   }

Modified: hadoop/mapreduce/branches/MR-279/yarn/yarn-server/yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/resourcetracker/NodeInfo.java
URL: http://svn.apache.org/viewvc/hadoop/mapreduce/branches/MR-279/yarn/yarn-server/yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/resourcetracker/NodeInfo.java?rev=1088923&r1=1088922&r2=1088923&view=diff
==============================================================================
--- hadoop/mapreduce/branches/MR-279/yarn/yarn-server/yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/resourcetracker/NodeInfo.java (original)
+++ hadoop/mapreduce/branches/MR-279/yarn/yarn-server/yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/resourcetracker/NodeInfo.java Tue Apr  5 08:40:05 2011
@@ -19,6 +19,7 @@
 package org.apache.hadoop.yarn.server.resourcemanager.resourcetracker;
 
 import org.apache.hadoop.net.Node;
+import org.apache.hadoop.yarn.server.api.records.NodeHealthStatus;
 import org.apache.hadoop.yarn.server.api.records.NodeId;
 
 /**
@@ -38,6 +39,11 @@ public interface NodeInfo {
    */
   public String getHostName();
   /**
+   * the health-status for this node
+   * @return the health-status for this node.
+   */
+  public NodeHealthStatus getNodeHealthStatus();
+  /**
    * the total available resource.
    * @return the total available resource.
    */

Modified: hadoop/mapreduce/branches/MR-279/yarn/yarn-server/yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/resourcetracker/NodeInfoTracker.java
URL: http://svn.apache.org/viewvc/hadoop/mapreduce/branches/MR-279/yarn/yarn-server/yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/resourcetracker/NodeInfoTracker.java?rev=1088923&r1=1088922&r2=1088923&view=diff
==============================================================================
--- hadoop/mapreduce/branches/MR-279/yarn/yarn-server/yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/resourcetracker/NodeInfoTracker.java (original)
+++ hadoop/mapreduce/branches/MR-279/yarn/yarn-server/yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/resourcetracker/NodeInfoTracker.java Tue Apr  5 08:40:05 2011
@@ -18,9 +18,8 @@
 
 package org.apache.hadoop.yarn.server.resourcemanager.resourcetracker;
 
-import org.apache.hadoop.yarn.factories.RecordFactory;
-import org.apache.hadoop.yarn.factory.providers.RecordFactoryProvider;
 import org.apache.hadoop.yarn.server.api.records.HeartbeatResponse;
+import org.apache.hadoop.yarn.server.resourcemanager.scheduler.NodeManager;
 
 /**
  * Track the node info and heart beat responses for this node.
@@ -28,35 +27,33 @@ import org.apache.hadoop.yarn.server.api
  *
  */
 class NodeInfoTracker {
-  private final NodeInfo node;
-  private final RecordFactory recordFactory = RecordFactoryProvider.getRecordFactory(null);
+  private final NodeManager node;
   HeartbeatResponse lastHeartBeatResponse;
-  private org.apache.hadoop.yarn.server.api.records.NodeStatus nodeStatus = recordFactory.newRecordInstance(org.apache.hadoop.yarn.server.api.records.NodeStatus.class);
+  private long lastSeen;
 
-  public NodeInfoTracker(NodeInfo node, HeartbeatResponse lastHeartBeatResponse) {
+  public NodeInfoTracker(NodeManager node, HeartbeatResponse lastHeartBeatResponse) {
     this.node = node;
     this.lastHeartBeatResponse = lastHeartBeatResponse;
-    this.nodeStatus.setNodeId(node.getNodeID());
-    this.nodeStatus.setLastSeen(System.currentTimeMillis());
+    this.lastSeen = System.currentTimeMillis();
   }
 
-  public synchronized NodeInfo getNodeInfo() {
+  public synchronized NodeManager getNodeManager() {
     return this.node;
   }
 
-  public synchronized HeartbeatResponse getLastHeartBeatResponse() {
-    return this.lastHeartBeatResponse;
+  public synchronized void updateLastSeen(long lastSeen) {
+    this.lastSeen = lastSeen;
   }
 
-  public synchronized void refreshHeartBeatResponse(HeartbeatResponse heartBeatResponse) {
-    this.lastHeartBeatResponse = heartBeatResponse;
+  public synchronized long getNodeLastSeen() {
+    return this.lastSeen;
   }
 
-  public synchronized void updateNodeStatus(org.apache.hadoop.yarn.server.api.records.NodeStatus nodeStatus) {
-    this.nodeStatus = nodeStatus;
+  public synchronized HeartbeatResponse getLastHeartBeatResponse() {
+    return this.lastHeartBeatResponse;
   }
 
-  public synchronized org.apache.hadoop.yarn.server.api.records.NodeStatus getNodeStatus() {
-    return this.nodeStatus;
+  public synchronized void refreshHeartBeatResponse(HeartbeatResponse heartBeatResponse) {
+    this.lastHeartBeatResponse = heartBeatResponse;
   }
 }
\ No newline at end of file

Modified: hadoop/mapreduce/branches/MR-279/yarn/yarn-server/yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/resourcetracker/NodeStatus.java
URL: http://svn.apache.org/viewvc/hadoop/mapreduce/branches/MR-279/yarn/yarn-server/yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/resourcetracker/NodeStatus.java?rev=1088923&r1=1088922&r2=1088923&view=diff
==============================================================================
--- hadoop/mapreduce/branches/MR-279/yarn/yarn-server/yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/resourcetracker/NodeStatus.java (original)
+++ hadoop/mapreduce/branches/MR-279/yarn/yarn-server/yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/resourcetracker/NodeStatus.java Tue Apr  5 08:40:05 2011
@@ -22,16 +22,23 @@ import java.util.List;
 import java.util.Map;
 
 import org.apache.hadoop.yarn.api.records.Container;
+import org.apache.hadoop.yarn.factories.RecordFactory;
 import org.apache.hadoop.yarn.factory.providers.RecordFactoryProvider;
+import org.apache.hadoop.yarn.server.api.records.NodeHealthStatus;
 import org.apache.hadoop.yarn.server.api.records.NodeId;
 
 
 public class NodeStatus {
   public static org.apache.hadoop.yarn.server.api.records.NodeStatus createNodeStatus(
       NodeId nodeId, Map<String, List<Container>> containers) {
-    org.apache.hadoop.yarn.server.api.records.NodeStatus nodeStatus = RecordFactoryProvider.getRecordFactory(null).newRecordInstance(org.apache.hadoop.yarn.server.api.records.NodeStatus.class);
+    RecordFactory recordFactory = RecordFactoryProvider.getRecordFactory(null);
+    org.apache.hadoop.yarn.server.api.records.NodeStatus nodeStatus = recordFactory.newRecordInstance(org.apache.hadoop.yarn.server.api.records.NodeStatus.class);
     nodeStatus.setNodeId(nodeId);
     nodeStatus.addAllContainers(containers);
+    nodeStatus.setNodeHealthStatus(recordFactory
+        .newRecordInstance(NodeHealthStatus.class));
+    nodeStatus.getNodeHealthStatus().setIsNodeHealthy(true);
+    nodeStatus.setLastSeen(System.currentTimeMillis());
     return nodeStatus;
   }
 }

Modified: hadoop/mapreduce/branches/MR-279/yarn/yarn-server/yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/resourcetracker/RMResourceTrackerImpl.java
URL: http://svn.apache.org/viewvc/hadoop/mapreduce/branches/MR-279/yarn/yarn-server/yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/resourcetracker/RMResourceTrackerImpl.java?rev=1088923&r1=1088922&r2=1088923&view=diff
==============================================================================
--- hadoop/mapreduce/branches/MR-279/yarn/yarn-server/yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/resourcetracker/RMResourceTrackerImpl.java (original)
+++ hadoop/mapreduce/branches/MR-279/yarn/yarn-server/yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/resourcetracker/RMResourceTrackerImpl.java Tue Apr  5 08:40:05 2011
@@ -56,8 +56,11 @@ import org.apache.hadoop.yarn.server.api
 import org.apache.hadoop.yarn.server.api.protocolrecords.RegisterNodeManagerRequest;
 import org.apache.hadoop.yarn.server.api.protocolrecords.RegisterNodeManagerResponse;
 import org.apache.hadoop.yarn.server.api.records.HeartbeatResponse;
+import org.apache.hadoop.yarn.server.api.records.NodeHealthStatus;
 import org.apache.hadoop.yarn.server.api.records.NodeId;
 import org.apache.hadoop.yarn.server.api.records.RegistrationResponse;
+import org.apache.hadoop.yarn.server.resourcemanager.scheduler.NodeManager;
+import org.apache.hadoop.yarn.server.resourcemanager.scheduler.NodeManagerImpl;
 import org.apache.hadoop.yarn.server.resourcemanager.scheduler.NodeResponse;
 import org.apache.hadoop.yarn.server.resourcemanager.scheduler.ResourceListener;
 import org.apache.hadoop.yarn.server.security.ContainerTokenSecretManager;
@@ -82,17 +85,21 @@ ResourceTracker, ResourceContext {
   
   private static final RecordFactory recordFactory = RecordFactoryProvider.getRecordFactory(null);
   
-  private final TreeSet<org.apache.hadoop.yarn.server.api.records.NodeStatus> nmExpiryQueue =
-      new TreeSet<org.apache.hadoop.yarn.server.api.records.NodeStatus>(
-          new Comparator<org.apache.hadoop.yarn.server.api.records.NodeStatus>() {
-            public int compare(org.apache.hadoop.yarn.server.api.records.NodeStatus p1, org.apache.hadoop.yarn.server.api.records.NodeStatus p2) {
-              if (p1.getLastSeen() < p2.getLastSeen()) {
+  private final TreeSet<NodeId> nmExpiryQueue =
+    new TreeSet<NodeId>(
+        new Comparator<NodeId>() {
+          public int compare(NodeId n1, NodeId n2) {
+            NodeInfoTracker nit1 = nodeManagers.get(n1);
+            NodeInfoTracker nit2 = nodeManagers.get(n2);
+            long p1LastSeen = nit1.getNodeLastSeen();
+            long p2LastSeen = nit2.getNodeLastSeen();
+            if (p1LastSeen < p2LastSeen) {
                 return -1;
-              } else if (p1.getLastSeen() > p2.getLastSeen()) {
+            } else if (p1LastSeen > p2LastSeen) {
                 return 1;
               } else {
-                return (p1.getNodeId().getId() -
-                    p2.getNodeId().getId());
+                return (nit1.getNodeManager().getNodeID().getId() -
+                    nit2.getNodeManager().getNodeID().getId());
               }
             }
           }
@@ -164,21 +171,22 @@ ResourceTracker, ResourceContext {
     synchronized(nodeManagers) {
       if (!nodeManagers.containsKey(nodeId)) {
         /* we do the resolving here, so that scheduler does not have to do it */
-        NodeInfo nodeManager = resourceListener.addNode(nodeId, node.toString(),
-            resolve(node.toString()),
-            capability);
+        NodeManager nodeManager =
+            new NodeManagerImpl(nodeId, node.toString(),
+                resolve(node.toString()),
+                capability);
+        // Inform the scheduler
+        resourceListener.addNode(nodeManager);
         HeartbeatResponse response = recordFactory.newRecordInstance(HeartbeatResponse.class);
         response.setResponseId(0);
         nTracker = new NodeInfoTracker(nodeManager, response);
         nodeManagers.put(nodeId, nTracker);
       } else {
         nTracker = nodeManagers.get(nodeId);
-        org.apache.hadoop.yarn.server.api.records.NodeStatus status = nTracker.getNodeStatus();
-        status.setLastSeen(System.currentTimeMillis());
-        nTracker.updateNodeStatus(status);
+        nTracker.updateLastSeen(System.currentTimeMillis());
       }
     }
-    addForTracking(nTracker.getNodeStatus());
+    addForTracking(nodeId);
     LOG.info("NodeManager from node " + node + " registered with capability: " + 
         capability.getMemory() + ", assigned nodeId " + nodeId.getId());
 
@@ -194,35 +202,38 @@ ResourceTracker, ResourceContext {
 
   @Override
   public NodeHeartbeatResponse nodeHeartbeat(NodeHeartbeatRequest request) throws YarnRemoteException {
-    org.apache.hadoop.yarn.server.api.records.NodeStatus nodeStatus = request.getNodeStatus();
-    nodeStatus.setLastSeen(System.currentTimeMillis());
+    org.apache.hadoop.yarn.server.api.records.NodeStatus remoteNodeStatus = request.getNodeStatus();
+    remoteNodeStatus.setLastSeen(System.currentTimeMillis());
     NodeInfoTracker nTracker = null;
     NodeHeartbeatResponse nodeHbResponse = recordFactory.newRecordInstance(NodeHeartbeatResponse.class);
     synchronized(nodeManagers) {
-      nTracker = nodeManagers.get(nodeStatus.getNodeId());
+      nTracker = nodeManagers.get(remoteNodeStatus.getNodeId());
     }
     if (nTracker == null) {
       /* node does not exist */
-      LOG.info("Node not found rebooting " + nodeStatus.getNodeId());
+      LOG.info("Node not found rebooting " + remoteNodeStatus.getNodeId());
       nodeHbResponse.setHeartbeatResponse(reboot);
       return nodeHbResponse;
     }
 
-    NodeInfo nodeInfo = nTracker.getNodeInfo();
+    NodeManager nodeManager = nTracker.getNodeManager();
     /* check to see if its an old heartbeat */    
-    if (nodeStatus.getResponseId() + 1 == nTracker.getLastHeartBeatResponse().getResponseId()) {
+    if (remoteNodeStatus.getResponseId() + 1 == nTracker
+        .getLastHeartBeatResponse().getResponseId()) {
       nodeHbResponse.setHeartbeatResponse(nTracker.getLastHeartBeatResponse());
       return nodeHbResponse;
-    } else if (nodeStatus.getResponseId() + 1 < nTracker.getLastHeartBeatResponse().getResponseId()) {
-      LOG.info("Too far behind rm response id:" + 
-          nTracker.lastHeartBeatResponse.getResponseId() + " nm response id:" + nodeStatus.getResponseId());
+    } else if (remoteNodeStatus.getResponseId() + 1 < nTracker
+        .getLastHeartBeatResponse().getResponseId()) {
+      LOG.info("Too far behind rm response id:" +
+          nTracker.lastHeartBeatResponse.getResponseId() + " nm response id:"
+          + remoteNodeStatus.getResponseId());
       nodeHbResponse.setHeartbeatResponse(reboot);
       return nodeHbResponse;
     }
 
     /* inform any listeners of node heartbeats */
     NodeResponse nodeResponse = resourceListener.nodeUpdate(
-        nodeInfo, nodeStatus.getAllContainers());
+        nodeManager, remoteNodeStatus.getAllContainers());
 
     
     HeartbeatResponse response = recordFactory.newRecordInstance(HeartbeatResponse.class);
@@ -232,15 +243,43 @@ ResourceTracker, ResourceContext {
     response.setResponseId(nTracker.getLastHeartBeatResponse().getResponseId() + 1);
 
     nTracker.refreshHeartBeatResponse(response);
-    nTracker.updateNodeStatus(nodeStatus);
+    nTracker.updateLastSeen(remoteNodeStatus.getLastSeen());
+    boolean prevHealthStatus =
+        nTracker.getNodeManager().getNodeHealthStatus().getIsNodeHealthy();
+    NodeHealthStatus remoteNodeHealthStatus =
+        remoteNodeStatus.getNodeHealthStatus();
+    nTracker.getNodeManager().updateHealthStatus(
+        remoteNodeHealthStatus);
+//    boolean prevHealthStatus = nodeHbResponse.
     nodeHbResponse.setHeartbeatResponse(response);
+
+    // Take care of node-health
+    if (prevHealthStatus != remoteNodeHealthStatus
+        .getIsNodeHealthy()) {
+      // Node's health-status changed.
+      if (!remoteNodeHealthStatus.getIsNodeHealthy()) {
+        // TODO: Node has become unhealthy, remove?
+//        LOG.info("Node " + nodeManager.getNodeID()
+//            + " has become unhealthy. Health-check report: "
+//            + remoteNodeStatus.nodeHealthStatus.healthReport
+//            + "Removing it from the scheduler.");
+//        resourceListener.removeNode(nodeManager);
+      } else {
+        // TODO: Node has become healthy back again, add?
+//        LOG.info("Node " + nodeManager.getNodeID()
+//            + " has become healthy back again. Health-check report: "
+//            + remoteNodeStatus.nodeHealthStatus.healthReport
+//            + " Adding it to the scheduler.");
+//        this.resourceListener.addNode(nodeManager);
+      }
+    }
     return nodeHbResponse;
   }
 
   @Private
   public synchronized NodeInfo getNodeManager(NodeId nodeId) {
     NodeInfoTracker ntracker = nodeManagers.get(nodeId);
-    return (ntracker == null ? null: ntracker.getNodeInfo());
+    return (ntracker == null ? null: ntracker.getNodeManager());
   }
 
   private synchronized NodeId getNodeId(String node) {
@@ -274,22 +313,22 @@ ResourceTracker, ResourceContext {
     List<NodeInfo> infoList = new ArrayList<NodeInfo>();
     synchronized (nodeManagers) {
       for (NodeInfoTracker t : nodeManagers.values()) {
-        infoList.add(t.getNodeInfo());
+        infoList.add(t.getNodeManager());
       }
     }
     return infoList;
   }
 
-  protected void addForTracking(org.apache.hadoop.yarn.server.api.records.NodeStatus status) {
+  protected void addForTracking(NodeId nodeID) {
     synchronized(nmExpiryQueue) {
-      nmExpiryQueue.add(status);
+      nmExpiryQueue.add(nodeID);
     }
   }
   
   protected void expireNMs(List<NodeId> nodes) {
     for (NodeId id: nodes) {
       synchronized (nodeManagers) {
-        NodeInfo nInfo = nodeManagers.get(id).getNodeInfo();
+        NodeInfo nInfo = nodeManagers.get(id).getNodeManager();
         nodeManagers.remove(id);
         resourceListener.removeNode(nInfo);
       }
@@ -319,27 +358,31 @@ ResourceTracker, ResourceContext {
       LOG.info("Starting expiring thread with interval " + nmExpiryInterval);
       
       while (!stop) {
-        org.apache.hadoop.yarn.server.api.records.NodeStatus leastRecent;
         long now = System.currentTimeMillis();
         expired.clear();
         synchronized(nmExpiryQueue) {
+          NodeId leastRecent;
           while ((nmExpiryQueue.size() > 0) &&
-              (leastRecent = nmExpiryQueue.first()) != null &&
-              ((now - leastRecent.getLastSeen()) > 
-              nmExpiryInterval)) {
+              (leastRecent = nmExpiryQueue.first()) != null) {
             nmExpiryQueue.remove(leastRecent);
             NodeInfoTracker info;
             synchronized(nodeManagers) {
-              info = nodeManagers.get(leastRecent.getNodeId());
+              info = nodeManagers.get(leastRecent);
             }
             if (info == null) {
               continue;
             }
-            org.apache.hadoop.yarn.server.api.records.NodeStatus status = info.getNodeStatus();
-            if ((now - status.getLastSeen()) > nmExpiryInterval) {
-              expired.add(status.getNodeId());
+            NodeId nodeID = info.getNodeManager().getNodeID();
+            if ((now - info.getNodeLastSeen()) > nmExpiryInterval) {
+              LOG.info("Going to expire the node-manager " + nodeID
+                  + " because of no updates for "
+                  + (now - info.getNodeLastSeen())
+                  + " seconds ( > expiry interval of " + nmExpiryInterval
+                  + ").");
+              expired.add(nodeID);
             } else {
-              nmExpiryQueue.add(status);
+              nmExpiryQueue.add(nodeID);
+              break;
             }
           }
         }

Modified: hadoop/mapreduce/branches/MR-279/yarn/yarn-server/yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/NodeManager.java
URL: http://svn.apache.org/viewvc/hadoop/mapreduce/branches/MR-279/yarn/yarn-server/yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/NodeManager.java?rev=1088923&r1=1088922&r2=1088923&view=diff
==============================================================================
--- hadoop/mapreduce/branches/MR-279/yarn/yarn-server/yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/NodeManager.java (original)
+++ hadoop/mapreduce/branches/MR-279/yarn/yarn-server/yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/NodeManager.java Tue Apr  5 08:40:05 2011
@@ -1,321 +1,26 @@
-/**
-* Licensed to the Apache Software Foundation (ASF) under one
-* or more contributor license agreements.  See the NOTICE file
-* distributed with this work for additional information
-* regarding copyright ownership.  The ASF licenses this file
-* to you under the Apache License, Version 2.0 (the
-* "License"); you may not use this file except in compliance
-* with the License.  You may obtain a copy of the License at
-*
-*     http://www.apache.org/licenses/LICENSE-2.0
-*
-* Unless required by applicable law or agreed to in writing, software
-* distributed under the License is distributed on an "AS IS" BASIS,
-* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-* See the License for the specific language governing permissions and
-* limitations under the License.
-*/
-
 package org.apache.hadoop.yarn.server.resourcemanager.scheduler;
 
-import java.util.ArrayList;
-import java.util.Arrays;
 import java.util.List;
 import java.util.Map;
-import java.util.Set;
-import java.util.TreeMap;
-import java.util.TreeSet;
 
-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
-import org.apache.hadoop.classification.InterfaceAudience.Private;
-import org.apache.hadoop.classification.InterfaceStability.Unstable;
-import org.apache.hadoop.net.Node;
 import org.apache.hadoop.yarn.api.records.ApplicationId;
 import org.apache.hadoop.yarn.api.records.Container;
-import org.apache.hadoop.yarn.api.records.ContainerId;
-import org.apache.hadoop.yarn.api.records.ContainerState;
-import org.apache.hadoop.yarn.api.records.Resource;
-import org.apache.hadoop.yarn.factories.RecordFactory;
-import org.apache.hadoop.yarn.factory.providers.RecordFactoryProvider;
-import org.apache.hadoop.yarn.server.api.records.NodeId;
+import org.apache.hadoop.yarn.server.api.records.NodeHealthStatus;
 import org.apache.hadoop.yarn.server.resourcemanager.resourcetracker.NodeInfo;
 
-/**
- * This class is used by ClusterInfo to keep track of all the applications/containers
- * running on a node.
- *
- */
-@Private
-@Unstable
-public class NodeManager implements NodeInfo {
-  private static final Log LOG = LogFactory.getLog(NodeManager.class);
-  private static final RecordFactory recordFactory = RecordFactoryProvider.getRecordFactory(null);
-  private final NodeId nodeId;
-  private final String hostName;
-  private Resource totalCapability;
-  private Resource availableResource = recordFactory.newRecordInstance(Resource.class);
-  private Resource usedResource = recordFactory.newRecordInstance(Resource.class);
-  private final Node node;
-  
-  
-  private static final Container[] EMPTY_CONTAINER_ARRAY = new Container[] {};
-  private static final List<Container> EMPTY_CONTAINER_LIST = Arrays.asList(EMPTY_CONTAINER_ARRAY);
-  private static final ApplicationId[] EMPTY_APPLICATION_ARRAY = new ApplicationId[]{};
-  private static final List<ApplicationId> EMPTY_APPLICATION_LIST = Arrays.asList(EMPTY_APPLICATION_ARRAY);
-  
-  public static final String ANY = "*";  
-  /* set of containers that are allocated containers */
-  private final Map<ContainerId, Container> allocatedContainers = 
-    new TreeMap<ContainerId, Container>();
-    
-  /* set of containers that are currently active on a node manager */
-  private final Map<ContainerId, Container> activeContainers =
-    new TreeMap<ContainerId, Container>();
-  
-  /* set of containers that need to be cleaned */
-  private final Set<Container> containersToClean = 
-    new TreeSet<Container>(new org.apache.hadoop.yarn.server.resourcemanager.resource.Container.Comparator());
-
-  
-  /* the list of applications that have finished and need to be purged */
-  private final List<ApplicationId> finishedApplications = new ArrayList<ApplicationId>();
-  
-  private volatile int numContainers;
-  
-  public NodeManager(NodeId nodeId, String hostname, 
-      Node node, Resource capability) {
-    this.nodeId = nodeId;   
-    this.totalCapability = capability; 
-    this.hostName = hostname;
-    org.apache.hadoop.yarn.server.resourcemanager.resource.Resource.addResource(
-        availableResource, capability);
-    this.node = node;
-  }
-
-  /**
-   * NodeInfo for this node.
-   * @return the {@link NodeInfo} for this node.
-   */
-  public NodeInfo getNodeInfo() {
-    return this;
-  }
-  
-  /**
-   * The Scheduler has allocated containers on this node to the 
-   * given application.
-   * 
-   * @param applicationId application
-   * @param containers allocated containers
-   */
-  public synchronized void allocateContainer(ApplicationId applicationId, 
-      List<Container> containers) {
-    if (containers == null) {
-      LOG.error("Adding null containers for application " + applicationId);
-      return;
-    }   
-    for (Container container : containers) {
-      allocateContainer(container);
-    }
-
-    LOG.info("addContainers:" +
-        " node=" + getHostName() + 
-        " #containers=" + containers.size() + 
-        " available=" + getAvailableResource().getMemory() + 
-        " used=" + getUsedResource().getMemory());
-  }
-
-  /**
-   * Status update from the NodeManager
-   * @param nodeStatus node status
-   * @return the set of containers no longer should be used by the
-   * node manager.
-   */
-  public synchronized NodeResponse 
-    statusUpdate(Map<String,List<Container>> allContainers) {
-
-    if (allContainers == null) {
-      return new NodeResponse(EMPTY_APPLICATION_LIST, EMPTY_CONTAINER_LIST,
-          EMPTY_CONTAINER_LIST);
-    }
-       
-    List<Container> listContainers = new ArrayList<Container>();
-    // Iterate through the running containers and update their status
-    for (Map.Entry<String, List<Container>> e : 
-      allContainers.entrySet()) {
-      listContainers.addAll(e.getValue());
-    }
-    NodeResponse statusCheck = update(listContainers);
-    return statusCheck;
-  }
-  
-  /**
-   * Status update for an application running on a given node
-   * @param node node
-   * @param containers containers update.
-   * @return containers that are completed or need to be preempted.
-   */
-  private synchronized NodeResponse update(List<Container> containers) {
-    List<Container> completedContainers = new ArrayList<Container>();
-    List<Container> containersToCleanUp = new ArrayList<Container>();
-    List<ApplicationId> lastfinishedApplications = new ArrayList<ApplicationId>();
-    
-    for (Container container : containers) {
-      if (allocatedContainers.remove(container.getId()) != null) {
-        activeContainers.put(container.getId(), container);
-        LOG.info("Activated container " + container.getId() + " on node " + 
-         getHostName());
-      }
-
-      if (container.getState() == ContainerState.COMPLETE) {
-        if (activeContainers.remove(container.getId()) != null) {
-          updateResource(container);
-          LOG.info("Completed container " + container);
-        }
-        completedContainers.add(container);
-        LOG.info("Removed completed container " + container.getId() + " on node " + 
-            getHostName());
-      }
-      else if (container.getState() != ContainerState.COMPLETE && 
-          (!allocatedContainers.containsKey(container.getId())) && 
-          !activeContainers.containsKey(container.getId())) {
-        containersToCleanUp.add(container);
-      }
-    }
-    containersToCleanUp.addAll(containersToClean);
-    /* clear out containers to clean */
-    containersToClean.clear();
-    lastfinishedApplications.addAll(finishedApplications);
-    return new NodeResponse(lastfinishedApplications, completedContainers, 
-        containersToCleanUp);
-  }
-  
-  private synchronized void allocateContainer(Container container) {
-    deductAvailableResource(container.getResource());
-    ++numContainers;
-    
-    allocatedContainers.put(container.getId(), container);
-    LOG.info("Allocated container " + container.getId() + 
-        " to node " + getHostName());
-    
-    LOG.info("Assigned container " + container.getId() + 
-        " of capacity " + container.getResource() + " on host " + getHostName() + 
-        ", which currently has " + numContainers + " containers, " + 
-        getUsedResource() + " used and " + 
-        getAvailableResource() + " available");
-  }
-
-  private synchronized boolean isValidContainer(Container c) {    
-    if (activeContainers.containsKey(c.getId()) || allocatedContainers.containsKey(c.getId()))
-      return true;
-    return false;
-  }
-
-  private synchronized void updateResource(Container container) {
-    addAvailableResource(container.getResource());
-    --numContainers;
-  }
-  
-  /**
-   * Release an allocated container on this node.
-   * @param container container to be released
-   * @return <code>true</code> iff the container was unused, 
-   *         <code>false</code> otherwise
-   */
-  public synchronized boolean releaseContainer(Container container) {
-    if (!isValidContainer(container)) {
-      LOG.error("Invalid container released " + container);
-      return false;
-    }
-    
-    /* remove the containers from the nodemanger */
-    
-    // Was this container launched?
-    activeContainers.remove(container.getId());
-    allocatedContainers.remove(container.getId());
-    containersToClean.add(container);
-    updateResource(container);
-
-    LOG.info("Released container " + container.getId() + 
-        " of capacity " + container.getResource() + " on host " + getHostName() + 
-        ", which currently has " + numContainers + " containers, " + 
-        getUsedResource() + " used and " + getAvailableResource()
-        + " available" + ", release resources=" + true);
-    return true;
-  }
-
-  @Override
-  public NodeId getNodeID() {
-    return this.nodeId;
-  }
-
-  @Override
-  public String getHostName() {
-    return this.hostName;
-  }
-
-  @Override
-  public Resource getTotalCapability() {
-   return this.totalCapability;
-  }
-
-  @Override
-  public String getRackName() {
-    return node.getNetworkLocation();
-  }
+public interface NodeManager extends NodeInfo {
 
-  @Override
-  public Node getNode() {
-    return this.node;
-  }
+  public static final String ANY = "*";
 
-  @Override
-  public synchronized Resource getAvailableResource() {
-    return this.availableResource;
-  }
+  void allocateContainer(ApplicationId applicationId,
+      List<Container> containers);
 
-  @Override
-  public synchronized Resource getUsedResource() {
-    return this.usedResource;
-  }
+  boolean releaseContainer(Container container);
 
-  public synchronized void addAvailableResource(Resource resource) {
-    if (resource == null) {
-      LOG.error("Invalid resource addition of null resource for " + this.hostName);
-      return;
-    }
-    org.apache.hadoop.yarn.server.resourcemanager.resource.Resource.addResource(
-        availableResource, resource);
-    org.apache.hadoop.yarn.server.resourcemanager.resource.Resource.subtractResource(
-        usedResource, resource);
-  }
+  void updateHealthStatus(NodeHealthStatus healthStatus);
 
-  public synchronized void deductAvailableResource(Resource resource) {
-    if (resource == null) {
-      LOG.error("Invalid deduction of null resource for "+ this.hostName);
-    }
-    org.apache.hadoop.yarn.server.resourcemanager.resource.Resource.subtractResource(
-        availableResource, resource);
-    org.apache.hadoop.yarn.server.resourcemanager.resource.Resource.addResource(
-        usedResource, resource);
-  }
+  NodeResponse statusUpdate(Map<String, List<Container>> containers);
 
-  public synchronized void notifyFinishedApplication(ApplicationId applicationId) {  
-    finishedApplications.add(applicationId);
-    /* make sure to iterate through the list and remove all the containers that 
-     * belong to this application.
-     */
-  }
+  void notifyFinishedApplication(ApplicationId applicationId);
 
-  @Override
-  public int getNumContainers() {
-    return numContainers;
-  }
-  
-  @Override
-  public String toString() {
-    return "host: " + getHostName() + " #containers=" + getNumContainers() +  
-      " available=" + getAvailableResource().getMemory() + 
-      " used=" + getUsedResource().getMemory();
-  }
- }
+}