You are viewing a plain text version of this content. The canonical link for it is here.
Posted to mapreduce-commits@hadoop.apache.org by vi...@apache.org on 2011/05/04 12:00:23 UTC

svn commit: r1099387 - in /hadoop/mapreduce/branches/MR-279/yarn: yarn-common/src/main/java/org/apache/hadoop/yarn/conf/ yarn-server/yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ yarn-server/yarn-server-resour...

Author: vinodkv
Date: Wed May  4 10:00:22 2011
New Revision: 1099387

URL: http://svn.apache.org/viewvc?rev=1099387&view=rev
Log:
Fixing three tight-loops in RM that are causing high cpu-usage. Contributed by Vinod Kumar Vavilapalli.

Modified:
    hadoop/mapreduce/branches/MR-279/yarn/yarn-common/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java
    hadoop/mapreduce/branches/MR-279/yarn/yarn-server/yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/AdminService.java
    hadoop/mapreduce/branches/MR-279/yarn/yarn-server/yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/RMConfig.java
    hadoop/mapreduce/branches/MR-279/yarn/yarn-server/yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ResourceManager.java
    hadoop/mapreduce/branches/MR-279/yarn/yarn-server/yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/applicationsmanager/AMTracker.java
    hadoop/mapreduce/branches/MR-279/yarn/yarn-server/yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/applicationsmanager/ApplicationMasterLauncher.java
    hadoop/mapreduce/branches/MR-279/yarn/yarn-server/yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/StoreFactory.java
    hadoop/mapreduce/branches/MR-279/yarn/yarn-server/yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/ZKStore.java
    hadoop/mapreduce/branches/MR-279/yarn/yarn-server/yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/resourcetracker/RMResourceTrackerImpl.java
    hadoop/mapreduce/branches/MR-279/yarn/yarn-server/yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/tools/RMAdmin.java
    hadoop/mapreduce/branches/MR-279/yarn/yarn-server/yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/applicationsmanager/TestAMLaunchFailure.java
    hadoop/mapreduce/branches/MR-279/yarn/yarn-server/yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/applicationsmanager/TestAMRestart.java
    hadoop/mapreduce/branches/MR-279/yarn/yarn-server/yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/resourcetracker/TestNMExpiry.java
    hadoop/mapreduce/branches/MR-279/yarn/yarn-server/yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestCapacityScheduler.java

Modified: hadoop/mapreduce/branches/MR-279/yarn/yarn-common/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java
URL: http://svn.apache.org/viewvc/hadoop/mapreduce/branches/MR-279/yarn/yarn-common/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java?rev=1099387&r1=1099386&r2=1099387&view=diff
==============================================================================
--- hadoop/mapreduce/branches/MR-279/yarn/yarn-common/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java (original)
+++ hadoop/mapreduce/branches/MR-279/yarn/yarn-common/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java Wed May  4 10:00:22 2011
@@ -27,37 +27,14 @@ public class YarnConfiguration extends C
   
   public static final String AM_FAIL_COUNT_STRING = "<FAILCOUNT>";
   
-  public static final String ZK_ADDRESS = RM_PREFIX + "zookeeper.address";
-  
-  public static final String ZK_SESSION_TIMEOUT = RM_PREFIX 
-    + "zookeeper.session.timeout";
-  
   public static final String SCHEDULER_ADDRESS = RM_PREFIX
       + "scheduler.address";    
   
-  public static final String ADMIN_ADDRESS = RM_PREFIX + "admin.address";
-
   public static final String AM_EXPIRY_INTERVAL = RM_PREFIX
   + "application.expiry.interval";
   
-  public static final String AM_MAX_RETRIES = RM_PREFIX 
-  + "application.max.retries";
-  
-  public static final int DEFAULT_ZK_TIMEOUT = 60000;
-  
-  public static final int DEFAULT_AM_MAX_RETRIES = 3;
-  
-  public static final long DEFAULT_AM_EXPIRY_INTERVAL = 60000L;
-  
-  public static final String NM_EXPIRY_INTERVAL = RM_PREFIX 
-  + "nodemanager.expiry.interval";
-  
-  public static final long DEFAULT_NM_EXPIRY_INTERVAL = 600000L;
-  
   public static final String DEFAULT_SCHEDULER_BIND_ADDRESS = "0.0.0.0:8030";
 
-  public static final String DEFAULT_ADMIN_BIND_ADDRESS = "0.0.0.0:8141";
-
   public static final String APPSMANAGER_ADDRESS = RM_PREFIX
       + "appsManager.address";
 
@@ -109,10 +86,6 @@ public class YarnConfiguration extends C
   public static final String APPLICATION_CLIENT_SECRET_ENV_NAME =
       "AppClientTokenEnv";
 
-  public static final String RESOURCE_SCHEDULER = RM_PREFIX + "scheduler";
-  
-  public static final String RM_STORE = RM_PREFIX + "store";
-  
   public YarnConfiguration() {
     super();
   }

Modified: hadoop/mapreduce/branches/MR-279/yarn/yarn-server/yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/AdminService.java
URL: http://svn.apache.org/viewvc/hadoop/mapreduce/branches/MR-279/yarn/yarn-server/yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/AdminService.java?rev=1099387&r1=1099386&r2=1099387&view=diff
==============================================================================
--- hadoop/mapreduce/branches/MR-279/yarn/yarn-server/yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/AdminService.java (original)
+++ hadoop/mapreduce/branches/MR-279/yarn/yarn-server/yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/AdminService.java Wed May  4 10:00:22 2011
@@ -13,7 +13,6 @@ import org.apache.hadoop.security.Securi
 import org.apache.hadoop.yarn.server.resourcemanager.api.RMAdminProtocol;
 import org.apache.hadoop.yarn.server.resourcemanager.api.protocolrecords.RefreshQueuesRequest;
 import org.apache.hadoop.yarn.server.resourcemanager.api.protocolrecords.RefreshQueuesResponse;
-import org.apache.hadoop.yarn.conf.YarnConfiguration;
 import org.apache.hadoop.yarn.exceptions.YarnRemoteException;
 import org.apache.hadoop.yarn.factories.RecordFactory;
 import org.apache.hadoop.yarn.factory.providers.RecordFactoryProvider;
@@ -45,8 +44,8 @@ public class AdminService extends Abstra
   @Override
   public void init(Configuration conf) {
     String bindAddress =
-      conf.get(YarnConfiguration.ADMIN_ADDRESS,
-          YarnConfiguration.DEFAULT_ADMIN_BIND_ADDRESS);
+      conf.get(RMConfig.ADMIN_ADDRESS,
+          RMConfig.DEFAULT_ADMIN_BIND_ADDRESS);
     masterServiceAddress =  NetUtils.createSocketAddr(bindAddress);
     super.init(conf);
   }

Modified: hadoop/mapreduce/branches/MR-279/yarn/yarn-server/yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/RMConfig.java
URL: http://svn.apache.org/viewvc/hadoop/mapreduce/branches/MR-279/yarn/yarn-server/yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/RMConfig.java?rev=1099387&r1=1099386&r2=1099387&view=diff
==============================================================================
--- hadoop/mapreduce/branches/MR-279/yarn/yarn-server/yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/RMConfig.java (original)
+++ hadoop/mapreduce/branches/MR-279/yarn/yarn-server/yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/RMConfig.java Wed May  4 10:00:22 2011
@@ -23,4 +23,30 @@ import org.apache.hadoop.yarn.conf.YarnC
 public class RMConfig {
   public static final String RM_KEYTAB = YarnConfiguration.RM_PREFIX
       + "keytab";
+  public static final String ZK_ADDRESS = YarnConfiguration.RM_PREFIX
+      + "zookeeper.address";
+  public static final String ZK_SESSION_TIMEOUT = YarnConfiguration.RM_PREFIX
+      + "zookeeper.session.timeout";
+  public static final String ADMIN_ADDRESS = YarnConfiguration.RM_PREFIX
+      + "admin.address";
+  public static final String AM_MAX_RETRIES = YarnConfiguration.RM_PREFIX
+      + "application.max.retries";
+  public static final int DEFAULT_ZK_TIMEOUT = 60000;
+  public static final int DEFAULT_AM_MAX_RETRIES = 3;
+  public static final long DEFAULT_AM_EXPIRY_INTERVAL = 60000L;
+  public static final String NM_EXPIRY_INTERVAL = YarnConfiguration.RM_PREFIX
+      + "nodemanager.expiry.interval";
+  public static final long DEFAULT_NM_EXPIRY_INTERVAL = 600000L;
+  public static final String DEFAULT_ADMIN_BIND_ADDRESS = "0.0.0.0:8141";
+  public static final String RESOURCE_SCHEDULER = YarnConfiguration.RM_PREFIX
+      + "scheduler";
+  public static final String RM_STORE = YarnConfiguration.RM_PREFIX + "store";
+  public static final String AMLIVELINESS_MONITORING_INTERVAL =
+      YarnConfiguration.RM_PREFIX
+          + "amliveliness-monitor.monitoring-interval";
+  public static final long DEFAULT_AMLIVELINESS_MONITORING_INTERVAL = 1000;
+  public static final String NMLIVELINESS_MONITORING_INTERVAL =
+      YarnConfiguration.RM_PREFIX
+          + "nmliveliness-monitor.monitoring-interval";
+  public static final long DEFAULT_NMLIVELINESS_MONITORING_INTERVAL = 1000;
 }

Modified: hadoop/mapreduce/branches/MR-279/yarn/yarn-server/yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ResourceManager.java
URL: http://svn.apache.org/viewvc/hadoop/mapreduce/branches/MR-279/yarn/yarn-server/yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ResourceManager.java?rev=1099387&r1=1099386&r2=1099387&view=diff
==============================================================================
--- hadoop/mapreduce/branches/MR-279/yarn/yarn-server/yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ResourceManager.java (original)
+++ hadoop/mapreduce/branches/MR-279/yarn/yarn-server/yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ResourceManager.java Wed May  4 10:00:22 2011
@@ -128,7 +128,7 @@ public class ResourceManager extends Com
     // Initialize the scheduler
     this.scheduler = 
       ReflectionUtils.newInstance(
-          conf.getClass(YarnConfiguration.RESOURCE_SCHEDULER, 
+          conf.getClass(RMConfig.RESOURCE_SCHEDULER, 
               FifoScheduler.class, ResourceScheduler.class), 
           this.conf);
   

Modified: hadoop/mapreduce/branches/MR-279/yarn/yarn-server/yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/applicationsmanager/AMTracker.java
URL: http://svn.apache.org/viewvc/hadoop/mapreduce/branches/MR-279/yarn/yarn-server/yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/applicationsmanager/AMTracker.java?rev=1099387&r1=1099386&r2=1099387&view=diff
==============================================================================
--- hadoop/mapreduce/branches/MR-279/yarn/yarn-server/yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/applicationsmanager/AMTracker.java (original)
+++ hadoop/mapreduce/branches/MR-279/yarn/yarn-server/yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/applicationsmanager/AMTracker.java Wed May  4 10:00:22 2011
@@ -43,6 +43,7 @@ import org.apache.hadoop.yarn.conf.YarnC
 import org.apache.hadoop.yarn.event.EventHandler;
 import org.apache.hadoop.yarn.factories.RecordFactory;
 import org.apache.hadoop.yarn.factory.providers.RecordFactoryProvider;
+import org.apache.hadoop.yarn.server.resourcemanager.RMConfig;
 import org.apache.hadoop.yarn.server.resourcemanager.ResourceManager.RMContext;
 import org.apache.hadoop.yarn.server.resourcemanager.applicationsmanager.events.ASMEvent;
 import org.apache.hadoop.yarn.server.resourcemanager.applicationsmanager.events.ApplicationMasterEvents.ApplicationEventType;
@@ -63,7 +64,7 @@ import org.apache.hadoop.yarn.service.Ab
 public class AMTracker extends AbstractService  implements EventHandler<ASMEvent
 <ApplicationEventType>>, Recoverable {
   private static final Log LOG = LogFactory.getLog(AMTracker.class);
-  private HeartBeatThread heartBeatThread;
+  private AMLivelinessMonitor amLivelinessMonitor;
   private long amExpiryInterval; 
   @SuppressWarnings("rawtypes")
   private EventHandler handler;
@@ -95,7 +96,7 @@ public class AMTracker extends AbstractS
 
   public AMTracker(RMContext rmContext) {
     super(AMTracker.class.getName());
-    this.heartBeatThread = new HeartBeatThread();
+    this.amLivelinessMonitor = new AMLivelinessMonitor();
     this.rmContext = rmContext;
     this.appsStore = rmContext.getApplicationsStore();
   }
@@ -105,33 +106,43 @@ public class AMTracker extends AbstractS
     super.init(conf);
     this.handler = rmContext.getDispatcher().getEventHandler();
     this.amExpiryInterval = conf.getLong(YarnConfiguration.AM_EXPIRY_INTERVAL, 
-        YarnConfiguration.DEFAULT_AM_EXPIRY_INTERVAL);
+        RMConfig.DEFAULT_AM_EXPIRY_INTERVAL);
     LOG.info("AM expiry interval: " + this.amExpiryInterval);
-    this.amMaxRetries =  conf.getInt(YarnConfiguration.AM_MAX_RETRIES, 
-        YarnConfiguration.DEFAULT_AM_MAX_RETRIES);
+    this.amMaxRetries =  conf.getInt(RMConfig.AM_MAX_RETRIES, 
+        RMConfig.DEFAULT_AM_MAX_RETRIES);
     LOG.info("AM max retries: " + this.amMaxRetries);
+    this.amLivelinessMonitor.setMonitoringInterval(conf.getLong(
+        RMConfig.AMLIVELINESS_MONITORING_INTERVAL,
+        RMConfig.DEFAULT_AMLIVELINESS_MONITORING_INTERVAL));
     this.rmContext.getDispatcher().register(ApplicationEventType.class, this);
   }
 
   @Override
   public void start() {   
     super.start();
-    heartBeatThread.start();
+    amLivelinessMonitor.start();
   }
 
   /**
    * This class runs continuosly to track the application masters
    * that might be dead.
    */
-  private class HeartBeatThread extends Thread {
+  private class AMLivelinessMonitor extends Thread {
     private volatile boolean stop = false;
+    private long monitoringInterval =
+        RMConfig.DEFAULT_AMLIVELINESS_MONITORING_INTERVAL;
 
-    public HeartBeatThread() {
-      super("ApplicationsManager:" + HeartBeatThread.class.getName());
+    public AMLivelinessMonitor() {
+      super("ApplicationsManager:" + AMLivelinessMonitor.class.getName());
+    }
+
+    public void setMonitoringInterval(long interval) {
+      this.monitoringInterval = interval;
     }
 
     @Override
     public void run() {
+
       /* the expiry queue does not need to be in sync with applications,
        * if an applications in the expiry queue cannot be found in applications
        * its alright. We do not want to hold a hold on applications while going
@@ -164,6 +175,12 @@ public class AMTracker extends AbstractS
           }
         }
         expireAMs(expired);
+        try {
+          Thread.sleep(this.monitoringInterval);
+        } catch (InterruptedException e) {
+          LOG.warn(this.getClass().getName() + " interrupted. Returning.");
+          return;
+        }
       }
     }
 
@@ -185,13 +202,14 @@ public class AMTracker extends AbstractS
 
   @Override
   public void stop() {
-    heartBeatThread.interrupt();
-    heartBeatThread.shutdown();
+    amLivelinessMonitor.interrupt();
+    amLivelinessMonitor.shutdown();
     try {
-      heartBeatThread.join(1000);
+      amLivelinessMonitor.join();
     } catch (InterruptedException ie) {
-      LOG.info(heartBeatThread.getName() + " interrupted during join ", 
-          ie);    }
+      LOG.info(amLivelinessMonitor.getName() + " interrupted during join ",
+          ie);
+    }
     super.stop();
   }
 

Modified: hadoop/mapreduce/branches/MR-279/yarn/yarn-server/yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/applicationsmanager/ApplicationMasterLauncher.java
URL: http://svn.apache.org/viewvc/hadoop/mapreduce/branches/MR-279/yarn/yarn-server/yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/applicationsmanager/ApplicationMasterLauncher.java?rev=1099387&r1=1099386&r2=1099387&view=diff
==============================================================================
--- hadoop/mapreduce/branches/MR-279/yarn/yarn-server/yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/applicationsmanager/ApplicationMasterLauncher.java (original)
+++ hadoop/mapreduce/branches/MR-279/yarn/yarn-server/yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/applicationsmanager/ApplicationMasterLauncher.java Wed May  4 10:00:22 2011
@@ -17,7 +17,7 @@
 */
 
 package org.apache.hadoop.yarn.server.resourcemanager.applicationsmanager;
-import java.util.Queue;
+import java.util.concurrent.BlockingQueue;
 import java.util.concurrent.LinkedBlockingQueue;
 import java.util.concurrent.ThreadPoolExecutor;
 import java.util.concurrent.TimeUnit;
@@ -38,9 +38,9 @@ class ApplicationMasterLauncher extends 
       ApplicationMasterLauncher.class);
   private final ThreadPoolExecutor launcherPool;
   private final EventHandler handler;
-  private Thread launcherHandlingThread;
+  private LauncherThread launcherHandlingThread;
   
-  private final Queue<Runnable> masterEvents
+  private final BlockingQueue<Runnable> masterEvents
     = new LinkedBlockingQueue<Runnable>();
   
   private ApplicationTokenSecretManager applicationTokenSecretManager;
@@ -82,7 +82,7 @@ class ApplicationMasterLauncher extends 
   public void stop() {
     launcherHandlingThread.interrupt();
     try {
-      launcherHandlingThread.join(1000);
+      launcherHandlingThread.join();
     } catch (InterruptedException ie) {
       LOG.info(launcherHandlingThread.getName() + " interrupted during join ", 
           ie);    }
@@ -94,9 +94,13 @@ class ApplicationMasterLauncher extends 
     @Override
     public void run() {
       while (!this.isInterrupted()) {
-        Runnable toLaunch = masterEvents.poll();
-        if (toLaunch != null) {
+        Runnable toLaunch;
+        try {
+          toLaunch = masterEvents.take();
           launcherPool.execute(toLaunch);
+        } catch (InterruptedException e) {
+          LOG.warn(this.getClass().getName() + " interrupted. Returning.");
+          return;
         }
       }
     }

Modified: hadoop/mapreduce/branches/MR-279/yarn/yarn-server/yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/StoreFactory.java
URL: http://svn.apache.org/viewvc/hadoop/mapreduce/branches/MR-279/yarn/yarn-server/yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/StoreFactory.java?rev=1099387&r1=1099386&r2=1099387&view=diff
==============================================================================
--- hadoop/mapreduce/branches/MR-279/yarn/yarn-server/yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/StoreFactory.java (original)
+++ hadoop/mapreduce/branches/MR-279/yarn/yarn-server/yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/StoreFactory.java Wed May  4 10:00:22 2011
@@ -23,14 +23,14 @@ import org.apache.hadoop.conf.Configurat
 import org.apache.hadoop.util.ReflectionUtils;
 import org.apache.hadoop.yarn.api.records.ApplicationMaster;
 import org.apache.hadoop.yarn.api.records.Container;
-import org.apache.hadoop.yarn.conf.YarnConfiguration;
+import org.apache.hadoop.yarn.server.resourcemanager.RMConfig;
 import org.apache.hadoop.yarn.server.resourcemanager.recovery.ApplicationsStore.ApplicationStore;
 
 public class StoreFactory {
   
   public static Store getStore(Configuration conf) {
     Store store = ReflectionUtils.newInstance(
-        conf.getClass(YarnConfiguration.RM_STORE, 
+        conf.getClass(RMConfig.RM_STORE, 
             MemStore.class, Store.class), 
             conf);
     return store;

Modified: hadoop/mapreduce/branches/MR-279/yarn/yarn-server/yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/ZKStore.java
URL: http://svn.apache.org/viewvc/hadoop/mapreduce/branches/MR-279/yarn/yarn-server/yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/ZKStore.java?rev=1099387&r1=1099386&r2=1099387&view=diff
==============================================================================
--- hadoop/mapreduce/branches/MR-279/yarn/yarn-server/yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/ZKStore.java (original)
+++ hadoop/mapreduce/branches/MR-279/yarn/yarn-server/yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/ZKStore.java Wed May  4 10:00:22 2011
@@ -39,7 +39,6 @@ import org.apache.hadoop.yarn.api.record
 import org.apache.hadoop.yarn.api.records.impl.pb.ContainerPBImpl;
 import org.apache.hadoop.yarn.api.records.impl.pb.NodeIdPBImpl;
 import org.apache.hadoop.yarn.api.records.impl.pb.NodeManagerInfoPBImpl;
-import org.apache.hadoop.yarn.conf.YarnConfiguration;
 import org.apache.hadoop.yarn.factories.RecordFactory;
 import org.apache.hadoop.yarn.factory.providers.RecordFactoryProvider;
 import org.apache.hadoop.yarn.proto.YarnProtos.ApplicationMasterProto;
@@ -47,6 +46,7 @@ import org.apache.hadoop.yarn.proto.Yarn
 import org.apache.hadoop.yarn.proto.YarnProtos.ContainerProto;
 import org.apache.hadoop.yarn.proto.YarnProtos.NodeIdProto;
 import org.apache.hadoop.yarn.proto.YarnProtos.NodeManagerInfoProto;
+import org.apache.hadoop.yarn.server.resourcemanager.RMConfig;
 import org.apache.hadoop.yarn.server.resourcemanager.resourcetracker.RMResourceTrackerImpl;
 import org.apache.hadoop.yarn.server.resourcemanager.scheduler.NodeManager;
 import org.apache.hadoop.yarn.server.resourcemanager.scheduler.NodeManagerImpl;
@@ -89,9 +89,9 @@ public class ZKStore implements Store {
 
   public ZKStore(Configuration conf) throws IOException {
     this.conf = conf;
-    this.ZK_ADDRESS = conf.get(YarnConfiguration.ZK_ADDRESS);
-    this.ZK_TIMEOUT = conf.getInt(YarnConfiguration.ZK_SESSION_TIMEOUT,
-        YarnConfiguration.DEFAULT_ZK_TIMEOUT);
+    this.ZK_ADDRESS = conf.get(RMConfig.ZK_ADDRESS);
+    this.ZK_TIMEOUT = conf.getInt(RMConfig.ZK_SESSION_TIMEOUT,
+        RMConfig.DEFAULT_ZK_TIMEOUT);
     zkClient = new ZooKeeper(this.ZK_ADDRESS, 
         this.ZK_TIMEOUT,
         createZKWatcher() 

Modified: hadoop/mapreduce/branches/MR-279/yarn/yarn-server/yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/resourcetracker/RMResourceTrackerImpl.java
URL: http://svn.apache.org/viewvc/hadoop/mapreduce/branches/MR-279/yarn/yarn-server/yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/resourcetracker/RMResourceTrackerImpl.java?rev=1099387&r1=1099386&r2=1099387&view=diff
==============================================================================
--- hadoop/mapreduce/branches/MR-279/yarn/yarn-server/yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/resourcetracker/RMResourceTrackerImpl.java (original)
+++ hadoop/mapreduce/branches/MR-279/yarn/yarn-server/yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/resourcetracker/RMResourceTrackerImpl.java Wed May  4 10:00:22 2011
@@ -47,7 +47,6 @@ import org.apache.hadoop.yarn.api.record
 import org.apache.hadoop.yarn.api.records.NodeId;
 import org.apache.hadoop.yarn.api.records.Resource;
 import org.apache.hadoop.yarn.api.records.YarnClusterMetrics;
-import org.apache.hadoop.yarn.conf.YarnConfiguration;
 import org.apache.hadoop.yarn.exceptions.YarnRemoteException;
 import org.apache.hadoop.yarn.factories.RecordFactory;
 import org.apache.hadoop.yarn.factory.providers.RecordFactoryProvider;
@@ -63,6 +62,7 @@ import org.apache.hadoop.yarn.server.api
 import org.apache.hadoop.yarn.server.api.records.HeartbeatResponse;
 import org.apache.hadoop.yarn.server.api.records.NodeHealthStatus;
 import org.apache.hadoop.yarn.server.api.records.RegistrationResponse;
+import org.apache.hadoop.yarn.server.resourcemanager.RMConfig;
 import org.apache.hadoop.yarn.server.resourcemanager.ResourceManager.RMContext;
 import org.apache.hadoop.yarn.server.resourcemanager.recovery.NodeStore;
 import org.apache.hadoop.yarn.server.resourcemanager.recovery.Store.ApplicationInfo;
@@ -89,7 +89,7 @@ ResourceTracker, ClusterTracker {
   private Map<String, NodeId> nodes = new ConcurrentHashMap<String, NodeId>();
   private final Map<NodeId, NodeInfoTracker> nodeManagers = 
     new ConcurrentHashMap<NodeId, NodeInfoTracker>();
-  private final HeartBeatThread heartbeatThread;
+  private final NMLivelinessMonitor nmLivelinessMonitor;
 
   private static final RecordFactory recordFactory = RecordFactoryProvider.getRecordFactory(null);
 
@@ -127,7 +127,7 @@ ResourceTracker, ClusterTracker {
     super(RMResourceTrackerImpl.class.getName());
     reboot.setReboot(true);
     this.containerTokenSecretManager = containerTokenSecretManager;
-    this.heartbeatThread = new HeartBeatThread();
+    this.nmLivelinessMonitor = new NMLivelinessMonitor();
     this.rmContext = context;
     this.nodeStore = context.getNodeStore();
   }
@@ -138,8 +138,11 @@ ResourceTracker, ClusterTracker {
       conf.get(YarnServerConfig.RESOURCETRACKER_ADDRESS,
           YarnServerConfig.DEFAULT_RESOURCETRACKER_BIND_ADDRESS);
     resourceTrackerAddress = NetUtils.createSocketAddr(resourceTrackerBindAddress);
-    this.nmExpiryInterval =  conf.getLong(YarnConfiguration.NM_EXPIRY_INTERVAL, 
-        YarnConfiguration.DEFAULT_NM_EXPIRY_INTERVAL);
+    this.nmExpiryInterval =  conf.getLong(RMConfig.NM_EXPIRY_INTERVAL, 
+        RMConfig.DEFAULT_NM_EXPIRY_INTERVAL);
+    this.nmLivelinessMonitor.setMonitoringInterval(conf.getLong(
+        RMConfig.NMLIVELINESS_MONITORING_INTERVAL,
+        RMConfig.DEFAULT_NMLIVELINESS_MONITORING_INTERVAL));
     super.init(conf);
   }
 
@@ -161,7 +164,7 @@ ResourceTracker, ClusterTracker {
       rpc.getServer(ResourceTracker.class, this, resourceTrackerAddress,
           rtServerConf, null);
     this.server.start();
-    this.heartbeatThread.start();
+    this.nmLivelinessMonitor.start();
     LOG.info("Expiry interval of NodeManagers set to " + nmExpiryInterval);
     super.start();
   }
@@ -373,6 +376,14 @@ ResourceTracker, ClusterTracker {
 
   @Override
   public void stop() {
+    this.nmLivelinessMonitor.interrupt();
+    this.nmLivelinessMonitor.shutdown();
+    try {
+      this.nmLivelinessMonitor.join();
+    } catch (InterruptedException ie) {
+      LOG.info(this.nmLivelinessMonitor.getName() + " interrupted during join ",
+          ie);
+    }
     if (this.server != null) {
       this.server.close();
     }
@@ -410,11 +421,17 @@ ResourceTracker, ClusterTracker {
    * This class runs continuosly to track the nodemanagers
    * that might be dead.
    */
-  private class HeartBeatThread extends Thread {
+  private class NMLivelinessMonitor extends Thread {
     private volatile boolean stop = false;
+    private long monitoringInterval =
+        RMConfig.DEFAULT_NMLIVELINESS_MONITORING_INTERVAL;
+
+    public NMLivelinessMonitor() {
+      super("RMResourceTrackerImpl:" + NMLivelinessMonitor.class.getName());
+    }
 
-    public HeartBeatThread() {
-      super("RMResourceTrackerImpl:" + HeartBeatThread.class.getName());
+    public void setMonitoringInterval(long interval) {
+      this.monitoringInterval = interval;
     }
 
     @Override
@@ -458,8 +475,18 @@ ResourceTracker, ClusterTracker {
           }
         }
         expireNMs(expired);
+        try {
+          Thread.sleep(this.monitoringInterval);
+        } catch (InterruptedException e) {
+          LOG.warn(this.getClass().getName() + " interrupted. Returning.");
+          return;
+        }
       }
     }
+
+    public void shutdown() {
+      this.stop = true;
+    }
   }
 
   @Override

Modified: hadoop/mapreduce/branches/MR-279/yarn/yarn-server/yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/tools/RMAdmin.java
URL: http://svn.apache.org/viewvc/hadoop/mapreduce/branches/MR-279/yarn/yarn-server/yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/tools/RMAdmin.java?rev=1099387&r1=1099386&r2=1099387&view=diff
==============================================================================
--- hadoop/mapreduce/branches/MR-279/yarn/yarn-server/yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/tools/RMAdmin.java (original)
+++ hadoop/mapreduce/branches/MR-279/yarn/yarn-server/yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/tools/RMAdmin.java Wed May  4 10:00:22 2011
@@ -17,6 +17,7 @@ import org.apache.hadoop.yarn.factories.
 import org.apache.hadoop.yarn.factory.providers.RecordFactoryProvider;
 import org.apache.hadoop.yarn.ipc.YarnRPC;
 import org.apache.hadoop.yarn.security.admin.AdminSecurityInfo;
+import org.apache.hadoop.yarn.server.resourcemanager.RMConfig;
 import org.apache.hadoop.yarn.server.resourcemanager.api.RMAdminProtocol;
 import org.apache.hadoop.yarn.server.resourcemanager.api.protocolrecords.RefreshQueuesRequest;
 
@@ -87,8 +88,8 @@ public class RMAdmin extends Configured 
 
     // Create the client
     final String adminAddress =
-      conf.get(YarnConfiguration.ADMIN_ADDRESS,
-          YarnConfiguration.DEFAULT_ADMIN_BIND_ADDRESS);
+      conf.get(RMConfig.ADMIN_ADDRESS,
+          RMConfig.DEFAULT_ADMIN_BIND_ADDRESS);
     final YarnRPC rpc = YarnRPC.create(conf);
     
     if (UserGroupInformation.isSecurityEnabled()) {

Modified: hadoop/mapreduce/branches/MR-279/yarn/yarn-server/yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/applicationsmanager/TestAMLaunchFailure.java
URL: http://svn.apache.org/viewvc/hadoop/mapreduce/branches/MR-279/yarn/yarn-server/yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/applicationsmanager/TestAMLaunchFailure.java?rev=1099387&r1=1099386&r2=1099387&view=diff
==============================================================================
--- hadoop/mapreduce/branches/MR-279/yarn/yarn-server/yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/applicationsmanager/TestAMLaunchFailure.java (original)
+++ hadoop/mapreduce/branches/MR-279/yarn/yarn-server/yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/applicationsmanager/TestAMLaunchFailure.java Wed May  4 10:00:22 2011
@@ -42,6 +42,7 @@ import org.apache.hadoop.yarn.event.Even
 import org.apache.hadoop.yarn.factories.RecordFactory;
 import org.apache.hadoop.yarn.factory.providers.RecordFactoryProvider;
 import org.apache.hadoop.yarn.security.ApplicationTokenSecretManager;
+import org.apache.hadoop.yarn.server.resourcemanager.RMConfig;
 import org.apache.hadoop.yarn.server.resourcemanager.ResourceManager;
 import org.apache.hadoop.yarn.server.resourcemanager.ResourceManager.RMContext;
 import org.apache.hadoop.yarn.server.resourcemanager.applicationsmanager.events.ASMEvent;
@@ -176,7 +177,7 @@ public class TestAMLaunchFailure extends
     asmImpl = new ExtApplicationsManagerImpl(applicationTokenSecretManager, scheduler);
     new DummyApplicationTracker();
     conf.setLong(YarnConfiguration.AM_EXPIRY_INTERVAL, 3000L);
-    conf.setInt(YarnConfiguration.AM_MAX_RETRIES, 1);
+    conf.setInt(RMConfig.AM_MAX_RETRIES, 1);
     asmImpl.init(conf);
     asmImpl.start();  
   }

Modified: hadoop/mapreduce/branches/MR-279/yarn/yarn-server/yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/applicationsmanager/TestAMRestart.java
URL: http://svn.apache.org/viewvc/hadoop/mapreduce/branches/MR-279/yarn/yarn-server/yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/applicationsmanager/TestAMRestart.java?rev=1099387&r1=1099386&r2=1099387&view=diff
==============================================================================
--- hadoop/mapreduce/branches/MR-279/yarn/yarn-server/yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/applicationsmanager/TestAMRestart.java (original)
+++ hadoop/mapreduce/branches/MR-279/yarn/yarn-server/yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/applicationsmanager/TestAMRestart.java Wed May  4 10:00:22 2011
@@ -30,6 +30,7 @@ import org.apache.hadoop.yarn.event.Even
 import org.apache.hadoop.yarn.factories.RecordFactory;
 import org.apache.hadoop.yarn.factory.providers.RecordFactoryProvider;
 import org.apache.hadoop.yarn.security.ApplicationTokenSecretManager;
+import org.apache.hadoop.yarn.server.resourcemanager.RMConfig;
 import org.apache.hadoop.yarn.server.resourcemanager.ResourceManager;
 import org.apache.hadoop.yarn.server.resourcemanager.ResourceManager.RMContext;
 import org.apache.hadoop.yarn.server.resourcemanager.applicationsmanager.events.ASMEvent;
@@ -224,7 +225,7 @@ public class TestAMRestart extends TestC
     appImpl = new ExtApplicationsManagerImpl(appTokenSecretManager, scheduler, asmContext);
     
     conf.setLong(YarnConfiguration.AM_EXPIRY_INTERVAL, 1000L);
-    conf.setInt(YarnConfiguration.AM_MAX_RETRIES, maxFailures);
+    conf.setInt(RMConfig.AM_MAX_RETRIES, maxFailures);
     appImpl.init(conf);
     appImpl.start();
   }

Modified: hadoop/mapreduce/branches/MR-279/yarn/yarn-server/yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/resourcetracker/TestNMExpiry.java
URL: http://svn.apache.org/viewvc/hadoop/mapreduce/branches/MR-279/yarn/yarn-server/yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/resourcetracker/TestNMExpiry.java?rev=1099387&r1=1099386&r2=1099387&view=diff
==============================================================================
--- hadoop/mapreduce/branches/MR-279/yarn/yarn-server/yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/resourcetracker/TestNMExpiry.java (original)
+++ hadoop/mapreduce/branches/MR-279/yarn/yarn-server/yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/resourcetracker/TestNMExpiry.java Wed May  4 10:00:22 2011
@@ -33,13 +33,13 @@ import org.apache.hadoop.yarn.api.record
 import org.apache.hadoop.yarn.api.records.Container;
 import org.apache.hadoop.yarn.api.records.NodeId;
 import org.apache.hadoop.yarn.api.records.Resource;
-import org.apache.hadoop.yarn.conf.YarnConfiguration;
 import org.apache.hadoop.yarn.factories.RecordFactory;
 import org.apache.hadoop.yarn.factory.providers.RecordFactoryProvider;
 import org.apache.hadoop.yarn.server.api.protocolrecords.NodeHeartbeatRequest;
 import org.apache.hadoop.yarn.server.api.protocolrecords.RegisterNodeManagerRequest;
 import org.apache.hadoop.yarn.server.api.records.HeartbeatResponse;
 import org.apache.hadoop.yarn.server.api.records.RegistrationResponse;
+import org.apache.hadoop.yarn.server.resourcemanager.RMConfig;
 import org.apache.hadoop.yarn.server.resourcemanager.ResourceManager;
 import org.apache.hadoop.yarn.server.resourcemanager.ResourceManager.RMContext;
 import org.apache.hadoop.yarn.server.resourcemanager.recovery.MemStore;
@@ -113,7 +113,7 @@ public class TestNMExpiry extends TestCa
        context);
     resourceTracker.addListener(new VoidResourceListener());
     
-    conf.setLong(YarnConfiguration.NM_EXPIRY_INTERVAL, 1000);
+    conf.setLong(RMConfig.NM_EXPIRY_INTERVAL, 1000);
     resourceTracker.init(conf);
     resourceTracker.start();
   }

Modified: hadoop/mapreduce/branches/MR-279/yarn/yarn-server/yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestCapacityScheduler.java
URL: http://svn.apache.org/viewvc/hadoop/mapreduce/branches/MR-279/yarn/yarn-server/yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestCapacityScheduler.java?rev=1099387&r1=1099386&r2=1099387&view=diff
==============================================================================
--- hadoop/mapreduce/branches/MR-279/yarn/yarn-server/yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestCapacityScheduler.java (original)
+++ hadoop/mapreduce/branches/MR-279/yarn/yarn-server/yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestCapacityScheduler.java Wed May  4 10:00:22 2011
@@ -27,8 +27,8 @@ import org.apache.hadoop.conf.Configurat
 import org.apache.hadoop.net.NetworkTopology;
 import org.apache.hadoop.yarn.api.records.Priority;
 import org.apache.hadoop.yarn.api.records.Resource;
-import org.apache.hadoop.yarn.conf.YarnConfiguration;
 import org.apache.hadoop.yarn.server.resourcemanager.Application;
+import org.apache.hadoop.yarn.server.resourcemanager.RMConfig;
 import org.apache.hadoop.yarn.server.resourcemanager.ResourceManager;
 import org.apache.hadoop.yarn.server.resourcemanager.Task;
 import org.apache.hadoop.yarn.server.resourcemanager.recovery.Store;
@@ -58,7 +58,7 @@ public class TestCapacityScheduler exten
     resourceManager = new ResourceManager(store);
     CapacitySchedulerConfiguration csConf = 
       new CapacitySchedulerConfiguration();
-    csConf.setClass(YarnConfiguration.RESOURCE_SCHEDULER, 
+    csConf.setClass(RMConfig.RESOURCE_SCHEDULER, 
         CapacityScheduler.class, ResourceScheduler.class);
     setupQueueConfiguration(csConf);
     resourceManager.init(csConf);