You are viewing a plain text version of this content. The canonical link for it is here.
Posted to common-commits@hadoop.apache.org by jh...@apache.org on 2019/03/27 18:48:48 UTC

[hadoop] 02/11: YARN-6620. Add support in NodeManager to isolate GPU devices by using CGroups. Contributed by Wangda Tan.

This is an automated email from the ASF dual-hosted git repository.

jhung pushed a commit to branch YARN-8200.branch3
in repository https://gitbox.apache.org/repos/asf/hadoop.git

commit 49df5151cf1094f9d5a9fcd783577cbe2691544e
Author: Sunil G <su...@apache.org>
AuthorDate: Wed Oct 11 23:44:33 2017 +0530

    YARN-6620. Add support in NodeManager to isolate GPU devices by using CGroups. Contributed by Wangda Tan.
    
    (cherry-picked from commit fa5cfc68f37c78b6cf26ce13247b9ff34da806cd)
---
 .../yarn/api/records/ResourceInformation.java      |  10 +
 .../apache/hadoop/yarn/conf/YarnConfiguration.java |  33 ++
 .../hadoop/yarn/util/resource/ResourceUtils.java   |  51 +-
 .../src/main/resources/yarn-default.xml            |  39 ++
 .../yarn/util/resource/TestResourceUtils.java      |  17 +
 .../yarn/server/nodemanager/ContainerExecutor.java |   3 +-
 .../hadoop/yarn/server/nodemanager/Context.java    |   3 +
 .../nodemanager/DefaultContainerExecutor.java      |   2 +-
 .../server/nodemanager/LinuxContainerExecutor.java |  10 +-
 .../yarn/server/nodemanager/NodeManager.java       |  92 ++--
 .../server/nodemanager/NodeStatusUpdaterImpl.java  |  38 +-
 .../linux/privileged/PrivilegedOperation.java      |   1 +
 .../linux/resources/ResourceHandlerChain.java      |   4 +-
 .../linux/resources/ResourceHandlerModule.java     |  42 +-
 .../linux/resources/gpu/GpuResourceAllocator.java  | 242 +++++++++
 .../resources/gpu/GpuResourceHandlerImpl.java      | 153 ++++++
 .../resourceplugin/NodeResourceUpdaterPlugin.java  |  52 ++
 .../resourceplugin/ResourcePlugin.java             |  83 ++++
 .../resourceplugin/ResourcePluginManager.java      | 106 ++++
 .../resourceplugin/gpu/GpuDiscoverer.java          | 254 ++++++++++
 .../gpu/GpuNodeResourceUpdateHandler.java          |  66 +++
 .../resourceplugin/gpu/GpuResourcePlugin.java      |  61 +++
 .../webapp/dao/gpu/GpuDeviceInformation.java       |  72 +++
 .../webapp/dao/gpu/GpuDeviceInformationParser.java |  87 ++++
 .../webapp/dao/gpu/PerGpuDeviceInformation.java    | 165 +++++++
 .../webapp/dao/gpu/PerGpuMemoryUsage.java          |  58 +++
 .../webapp/dao/gpu/PerGpuTemperature.java          |  80 +++
 .../webapp/dao/gpu/PerGpuUtilizations.java         |  50 ++
 .../server/nodemanager/NodeManagerTestBase.java    | 164 ++++++
 .../nodemanager/TestDefaultContainerExecutor.java  |   4 +-
 .../nodemanager/TestLinuxContainerExecutor.java    |   2 +-
 .../TestLinuxContainerExecutorWithMocks.java       |   2 +-
 .../yarn/server/nodemanager/TestNodeManager.java   |   2 +-
 .../server/nodemanager/TestNodeStatusUpdater.java  | 100 +---
 .../nodemanager/amrmproxy/BaseAMRMProxyTest.java   |  46 +-
 .../linux/resources/TestResourceHandlerModule.java |   8 +-
 .../resources/gpu/TestGpuResourceHandler.java      | 382 ++++++++++++++
 .../TestContainersMonitorResourceChange.java       |   2 +-
 .../resourceplugin/TestResourcePluginManager.java  | 261 ++++++++++
 .../resourceplugin/gpu/TestGpuDiscoverer.java      | 123 +++++
 .../dao/gpu/TestGpuDeviceInformationParser.java    |  50 ++
 .../test/resources/nvidia-smi-sample-xml-output    | 547 +++++++++++++++++++++
 42 files changed, 3363 insertions(+), 204 deletions(-)

diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/ResourceInformation.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/ResourceInformation.java
index e8280ba..67592cc 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/ResourceInformation.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/ResourceInformation.java
@@ -18,10 +18,13 @@
 
 package org.apache.hadoop.yarn.api.records;
 
+import com.google.common.collect.ImmutableMap;
 import org.apache.hadoop.classification.InterfaceAudience;
 import org.apache.hadoop.yarn.api.protocolrecords.ResourceTypes;
 import org.apache.hadoop.yarn.util.UnitsConversionUtil;
 
+import java.util.Map;
+
 /**
  * Class to encapsulate information about a Resource - the name of the resource,
  * the units(milli, micro, etc), the type(countable), and the value.
@@ -35,13 +38,20 @@ public class ResourceInformation implements Comparable<ResourceInformation> {
   private long minimumAllocation;
   private long maximumAllocation;
 
+  // Known resource types
   public static final String MEMORY_URI = "memory-mb";
   public static final String VCORES_URI = "vcores";
+  public static final String GPU_URI = "yarn.io/gpu";
 
   public static final ResourceInformation MEMORY_MB =
       ResourceInformation.newInstance(MEMORY_URI, "Mi");
   public static final ResourceInformation VCORES =
       ResourceInformation.newInstance(VCORES_URI);
+  public static final ResourceInformation GPUS =
+      ResourceInformation.newInstance(GPU_URI);
+
+  public static final Map<String, ResourceInformation> MANDATORY_RESOURCES =
+      ImmutableMap.of(MEMORY_URI, MEMORY_MB, VCORES_URI, VCORES, GPU_URI, GPUS);
 
   /**
    * Get the name for the resource.
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java
index 6c65b19..4bde7200 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java
@@ -1430,6 +1430,39 @@ public class YarnConfiguration extends Configuration {
   public static final String NM_NETWORK_RESOURCE_OUTBOUND_BANDWIDTH_YARN_MBIT =
       NM_NETWORK_RESOURCE_PREFIX + "outbound-bandwidth-yarn-mbit";
 
+  /**
+   * Prefix for computation resources, example of computation resources like
+   * GPU / FPGA / TPU, etc.
+   */
+  @Private
+  public static final String NM_RESOURCE_PLUGINS =
+      NM_PREFIX + "resource-plugins";
+
+  /**
+   * Prefix for gpu configurations. Work in progress: This configuration
+   * parameter may be changed/removed in the future.
+   */
+  @Private
+  public static final String NM_GPU_RESOURCE_PREFIX =
+      NM_RESOURCE_PLUGINS + ".gpu.";
+
+  @Private
+  public static final String NM_GPU_ALLOWED_DEVICES =
+      NM_GPU_RESOURCE_PREFIX + "allowed-gpu-devices";
+  @Private
+  public static final String AUTOMATICALLY_DISCOVER_GPU_DEVICES = "auto";
+
+  /**
+   * This setting controls where to how to invoke GPU binaries
+   */
+  @Private
+  public static final String NM_GPU_PATH_TO_EXEC =
+      NM_GPU_RESOURCE_PREFIX + "path-to-discovery-executables";
+
+  @Private
+  public static final String DEFAULT_NM_GPU_PATH_TO_EXEC = "";
+
+
   /** NM Webapp address.**/
   public static final String NM_WEBAPP_ADDRESS = NM_PREFIX + "webapp.address";
   public static final int DEFAULT_NM_WEBAPP_PORT = 8042;
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/util/resource/ResourceUtils.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/util/resource/ResourceUtils.java
index b1d0b75..b945183 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/util/resource/ResourceUtils.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/util/resource/ResourceUtils.java
@@ -47,6 +47,8 @@ import java.util.concurrent.ConcurrentHashMap;
 import java.util.regex.Matcher;
 import java.util.regex.Pattern;
 
+import static org.apache.hadoop.yarn.api.records.ResourceInformation.GPU_URI;
+
 /**
  * Helper class to read the resource-types to be supported by the system.
  */
@@ -89,33 +91,32 @@ public class ResourceUtils {
      */
     String key = "memory";
     if (resourceInformationMap.containsKey(key)) {
-      LOG.warn("Attempt to define resource '" + key +
-          "', but it is not allowed.");
-      throw new YarnRuntimeException("Attempt to re-define mandatory resource '"
-          + key + "'.");
+      LOG.warn(
+          "Attempt to define resource '" + key + "', but it is not allowed.");
+      throw new YarnRuntimeException(
+          "Attempt to re-define mandatory resource '" + key + "'.");
     }
 
-    if (resourceInformationMap.containsKey(MEMORY)) {
-      ResourceInformation memInfo = resourceInformationMap.get(MEMORY);
-      String memUnits = ResourceInformation.MEMORY_MB.getUnits();
-      ResourceTypes memType = ResourceInformation.MEMORY_MB.getResourceType();
-      if (!memInfo.getUnits().equals(memUnits) || !memInfo.getResourceType()
-          .equals(memType)) {
-        throw new YarnRuntimeException(
-            "Attempt to re-define mandatory resource 'memory-mb'. It can only"
-                + " be of type 'COUNTABLE' and have units 'Mi'.");
-      }
-    }
-
-    if (resourceInformationMap.containsKey(VCORES)) {
-      ResourceInformation vcoreInfo = resourceInformationMap.get(VCORES);
-      String vcoreUnits = ResourceInformation.VCORES.getUnits();
-      ResourceTypes vcoreType = ResourceInformation.VCORES.getResourceType();
-      if (!vcoreInfo.getUnits().equals(vcoreUnits) || !vcoreInfo
-          .getResourceType().equals(vcoreType)) {
-        throw new YarnRuntimeException(
-            "Attempt to re-define mandatory resource 'vcores'. It can only be"
-                + " of type 'COUNTABLE' and have units ''(no units).");
+    for (Map.Entry<String, ResourceInformation> mandatoryResourceEntry :
+        ResourceInformation.MANDATORY_RESOURCES.entrySet()) {
+      key = mandatoryResourceEntry.getKey();
+      ResourceInformation mandatoryRI = mandatoryResourceEntry.getValue();
+
+      ResourceInformation newDefinedRI = resourceInformationMap.get(key);
+      if (newDefinedRI != null) {
+        String expectedUnit = mandatoryRI.getUnits();
+        ResourceTypes expectedType = mandatoryRI.getResourceType();
+        String actualUnit = newDefinedRI.getUnits();
+        ResourceTypes actualType = newDefinedRI.getResourceType();
+
+        if (!expectedUnit.equals(actualUnit) || !expectedType.equals(
+            actualType)) {
+          throw new YarnRuntimeException("Defined mandatory resource type="
+              + key + " inside resource-types.xml, however its type or "
+              + "unit is conflict to mandatory resource types, expected type="
+              + expectedType + ", unit=" + expectedUnit + "; actual type="
+              + actualType + " actual unit=" + actualUnit);
+        }
       }
     }
   }
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml
index 6d69a10..91935ad 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml
@@ -3444,6 +3444,45 @@
 
   <property>
     <description>
+      When yarn.nodemanager.resource.gpu.allowed-gpu-devices=auto specified,
+      YARN NodeManager needs to run GPU discovery binary (now only support
+      nvidia-smi) to get GPU-related information.
+      When value is empty (default), YARN NodeManager will try to locate
+      discovery executable itself.
+      An example of the config value is: /usr/local/bin/nvidia-smi
+    </description>
+    <name>yarn.nodemanager.resource-plugins.gpu.path-to-discovery-executables</name>
+    <value></value>
+  </property>
+
+  <property>
+    <description>
+      Enable additional discovery/isolation of resources on the NodeManager,
+      split by comma. By default, this is empty. Acceptable values: { "yarn-io/gpu" }.
+    </description>
+    <name>yarn.nodemanager.resource-plugins</name>
+    <value></value>
+  </property>
+
+  <property>
+    <description>
+      Specify GPU devices which can be managed by YARN NodeManager, split by comma
+      Number of GPU devices will be reported to RM to make scheduling decisions.
+      Set to auto (default) let YARN automatically discover GPU resource from
+      system.
+      Manually specify GPU devices if auto detect GPU device failed or admin
+      only want subset of GPU devices managed by YARN. GPU device is identified
+      by their minor device number. A common approach to get minor device number
+      of GPUs is using "nvidia-smi -q" and search "Minor Number" output. An
+      example of manual specification is "0,1,2,4" to allow YARN NodeManager
+      to manage GPU devices with minor number 0/1/2/4.
+    </description>
+    <name>yarn.nodemanager.resource-plugins.gpu.allowed-gpu-devices</name>
+    <value>auto</value>
+  </property>
+
+  <property>
+    <description>
       Provides an option for client to load supported resource types from RM
       instead of depending on local resource-types.xml file.
     </description>
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/util/resource/TestResourceUtils.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/util/resource/TestResourceUtils.java
index 0e5e8a8..0ad029c 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/util/resource/TestResourceUtils.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/util/resource/TestResourceUtils.java
@@ -53,6 +53,23 @@ public class TestResourceUtils {
     }
   }
 
+  public static void addNewTypesToResources(String... resourceTypes) {
+    // Initialize resource map
+    Map<String, ResourceInformation> riMap = new HashMap<>();
+
+    // Initialize mandatory resources
+    riMap.put(ResourceInformation.MEMORY_URI, ResourceInformation.MEMORY_MB);
+    riMap.put(ResourceInformation.VCORES_URI, ResourceInformation.VCORES);
+
+    for (String newResource : resourceTypes) {
+      riMap.put(newResource, ResourceInformation
+          .newInstance(newResource, "", 0, ResourceTypes.COUNTABLE, 0,
+              Integer.MAX_VALUE));
+    }
+
+    ResourceUtils.initializeResourcesFromResourceInformationMap(riMap);
+  }
+
   @Before
   public void setup() {
     ResourceUtils.resetResourceTypes();
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/ContainerExecutor.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/ContainerExecutor.java
index 3b532c9..f43b1ee 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/ContainerExecutor.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/ContainerExecutor.java
@@ -113,9 +113,10 @@ public abstract class ContainerExecutor implements Configurable {
    * Run the executor initialization steps.
    * Verify that the necessary configs and permissions are in place.
    *
+   * @param nmContext Context of NM
    * @throws IOException if initialization fails
    */
-  public abstract void init() throws IOException;
+  public abstract void init(Context nmContext) throws IOException;
 
   /**
    * This function localizes the JAR file on-demand.
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/Context.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/Context.java
index a2d00a4..a1c474f 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/Context.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/Context.java
@@ -34,6 +34,7 @@ import org.apache.hadoop.yarn.server.nodemanager.containermanager.ContainerManag
 import org.apache.hadoop.yarn.server.nodemanager.containermanager.application.Application;
 import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.Container;
 
+import org.apache.hadoop.yarn.server.nodemanager.containermanager.resourceplugin.ResourcePluginManager;
 import org.apache.hadoop.yarn.server.nodemanager.recovery.NMStateStoreService;
 import org.apache.hadoop.yarn.server.scheduler.OpportunisticContainerAllocator;
 import org.apache.hadoop.yarn.server.nodemanager.security.NMContainerTokenSecretManager;
@@ -122,4 +123,6 @@ public interface Context {
   ContainerExecutor getContainerExecutor();
 
   ContainerStateTransitionListener getContainerStateTransitionListener();
+
+  ResourcePluginManager getResourcePluginManager();
 }
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/DefaultContainerExecutor.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/DefaultContainerExecutor.java
index ac88e8c..5772403 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/DefaultContainerExecutor.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/DefaultContainerExecutor.java
@@ -133,7 +133,7 @@ public class DefaultContainerExecutor extends ContainerExecutor {
   }
 
   @Override
-  public void init() throws IOException {
+  public void init(Context nmContext) throws IOException {
     // nothing to do or verify here
   }
 
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/LinuxContainerExecutor.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/LinuxContainerExecutor.java
index 64f3d58..da1989e 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/LinuxContainerExecutor.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/LinuxContainerExecutor.java
@@ -20,6 +20,7 @@ package org.apache.hadoop.yarn.server.nodemanager;
 
 import com.google.common.annotations.VisibleForTesting;
 import com.google.common.base.Optional;
+import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.resources.ResourceHandlerChain;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 import org.apache.hadoop.conf.Configuration;
@@ -282,7 +283,7 @@ public class LinuxContainerExecutor extends ContainerExecutor {
   }
 
   @Override
-  public void init() throws IOException {
+  public void init(Context nmContext) throws IOException {
     Configuration conf = super.getConf();
 
     // Send command to executor which will just start up,
@@ -306,7 +307,7 @@ public class LinuxContainerExecutor extends ContainerExecutor {
 
     try {
       resourceHandlerChain = ResourceHandlerModule
-          .getConfiguredResourceHandlerChain(conf);
+          .getConfiguredResourceHandlerChain(conf, nmContext);
       if (LOG.isDebugEnabled()) {
         LOG.debug("Resource handler chain enabled = " + (resourceHandlerChain
             != null));
@@ -871,4 +872,9 @@ public class LinuxContainerExecutor extends ContainerExecutor {
           e);
     }
   }
+
+  @VisibleForTesting
+  public ResourceHandler getResourceHandler() {
+    return resourceHandlerChain;
+  }
 }
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeManager.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeManager.java
index 44133df..c7edec2 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeManager.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeManager.java
@@ -18,23 +18,7 @@
 
 package org.apache.hadoop.yarn.server.nodemanager;
 
-import java.io.IOException;
-import java.util.HashMap;
-import java.util.List;
-import java.util.Map;
-import java.util.concurrent.ConcurrentHashMap;
-import java.util.concurrent.ConcurrentLinkedQueue;
-import java.util.concurrent.ConcurrentMap;
-import java.util.concurrent.ConcurrentSkipListMap;
-import java.util.concurrent.atomic.AtomicBoolean;
-
-import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.ContainerEvent;
-import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.ContainerImpl;
-import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.ContainerState;
-import org.apache.hadoop.yarn.state.MultiStateTransitionListener;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
+import com.google.common.annotations.VisibleForTesting;
 import org.apache.hadoop.classification.InterfaceAudience.Private;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FileSystem;
@@ -65,12 +49,16 @@ import org.apache.hadoop.yarn.factory.providers.RecordFactoryProvider;
 import org.apache.hadoop.yarn.server.api.protocolrecords.LogAggregationReport;
 import org.apache.hadoop.yarn.server.api.records.AppCollectorData;
 import org.apache.hadoop.yarn.server.api.records.NodeHealthStatus;
-import org.apache.hadoop.yarn.server.nodemanager.containermanager.ContainerManager;
 import org.apache.hadoop.yarn.server.nodemanager.collectormanager.NMCollectorService;
+import org.apache.hadoop.yarn.server.nodemanager.containermanager.ContainerManager;
 import org.apache.hadoop.yarn.server.nodemanager.containermanager.ContainerManagerImpl;
 import org.apache.hadoop.yarn.server.nodemanager.containermanager.application.Application;
 import org.apache.hadoop.yarn.server.nodemanager.containermanager.application.ApplicationState;
 import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.Container;
+import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.ContainerEvent;
+import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.ContainerImpl;
+import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.ContainerState;
+import org.apache.hadoop.yarn.server.nodemanager.containermanager.resourceplugin.ResourcePluginManager;
 import org.apache.hadoop.yarn.server.nodemanager.metrics.NodeManagerMetrics;
 import org.apache.hadoop.yarn.server.nodemanager.nodelabels.ConfigurationNodeLabelsProvider;
 import org.apache.hadoop.yarn.server.nodemanager.nodelabels.NodeLabelsProvider;
@@ -78,14 +66,25 @@ import org.apache.hadoop.yarn.server.nodemanager.nodelabels.ScriptBasedNodeLabel
 import org.apache.hadoop.yarn.server.nodemanager.recovery.NMLeveldbStateStoreService;
 import org.apache.hadoop.yarn.server.nodemanager.recovery.NMNullStateStoreService;
 import org.apache.hadoop.yarn.server.nodemanager.recovery.NMStateStoreService;
-import org.apache.hadoop.yarn.server.scheduler.OpportunisticContainerAllocator;
 import org.apache.hadoop.yarn.server.nodemanager.security.NMContainerTokenSecretManager;
 import org.apache.hadoop.yarn.server.nodemanager.security.NMTokenSecretManagerInNM;
 import org.apache.hadoop.yarn.server.nodemanager.timelineservice.NMTimelinePublisher;
 import org.apache.hadoop.yarn.server.nodemanager.webapp.WebServer;
+import org.apache.hadoop.yarn.server.scheduler.OpportunisticContainerAllocator;
 import org.apache.hadoop.yarn.server.security.ApplicationACLsManager;
+import org.apache.hadoop.yarn.state.MultiStateTransitionListener;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
 
-import com.google.common.annotations.VisibleForTesting;
+import java.io.IOException;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.concurrent.ConcurrentHashMap;
+import java.util.concurrent.ConcurrentLinkedQueue;
+import java.util.concurrent.ConcurrentMap;
+import java.util.concurrent.ConcurrentSkipListMap;
+import java.util.concurrent.atomic.AtomicBoolean;
 
 public class NodeManager extends CompositeService 
     implements EventHandler<NodeManagerEvent> {
@@ -332,6 +331,18 @@ public class NodeManager extends CompositeService
         nmCheckintervalTime, scriptTimeout, scriptArgs);
   }
 
+  @VisibleForTesting
+  protected ResourcePluginManager createResourcePluginManager() {
+    return new ResourcePluginManager();
+  }
+
+  @VisibleForTesting
+  protected ContainerExecutor createContainerExecutor(Configuration conf) {
+    return ReflectionUtils.newInstance(
+        conf.getClass(YarnConfiguration.NM_CONTAINER_EXECUTOR,
+            DefaultContainerExecutor.class, ContainerExecutor.class), conf);
+  }
+
   @Override
   protected void serviceInit(Configuration conf) throws Exception {
     rmWorkPreservingRestartEnabled = conf.getBoolean(YarnConfiguration
@@ -357,11 +368,20 @@ public class NodeManager extends CompositeService
     
     this.aclsManager = new ApplicationACLsManager(conf);
 
-    ContainerExecutor exec = ReflectionUtils.newInstance(
-        conf.getClass(YarnConfiguration.NM_CONTAINER_EXECUTOR,
-          DefaultContainerExecutor.class, ContainerExecutor.class), conf);
+    boolean isDistSchedulingEnabled =
+        conf.getBoolean(YarnConfiguration.DIST_SCHEDULING_ENABLED,
+            YarnConfiguration.DEFAULT_DIST_SCHEDULING_ENABLED);
+
+    this.context = createNMContext(containerTokenSecretManager,
+        nmTokenSecretManager, nmStore, isDistSchedulingEnabled, conf);
+
+    ResourcePluginManager pluginManager = createResourcePluginManager();
+    pluginManager.initialize(context);
+    ((NMContext)context).setResourcePluginManager(pluginManager);
+
+    ContainerExecutor exec = createContainerExecutor(conf);
     try {
-      exec.init();
+      exec.init(context);
     } catch (IOException e) {
       throw new YarnRuntimeException("Failed to initialize container executor", e);
     }    
@@ -377,13 +397,6 @@ public class NodeManager extends CompositeService
             getNodeHealthScriptRunner(conf), dirsHandler);
     addService(nodeHealthChecker);
 
-    boolean isDistSchedulingEnabled =
-        conf.getBoolean(YarnConfiguration.DIST_SCHEDULING_ENABLED,
-            YarnConfiguration.DEFAULT_DIST_SCHEDULING_ENABLED);
-
-    this.context = createNMContext(containerTokenSecretManager,
-        nmTokenSecretManager, nmStore, isDistSchedulingEnabled, conf);
-
 
     ((NMContext)context).setContainerExecutor(exec);
 
@@ -457,6 +470,12 @@ public class NodeManager extends CompositeService
     try {
       super.serviceStop();
       DefaultMetricsSystem.shutdown();
+
+      // Cleanup ResourcePluginManager
+      ResourcePluginManager rpm = context.getResourcePluginManager();
+      if (rpm != null) {
+        rpm.cleanup();
+      }
     } finally {
       // YARN-3641: NM's services stop get failed shouldn't block the
       // release of NMLevelDBStore.
@@ -604,6 +623,8 @@ public class NodeManager extends CompositeService
 
     private ContainerStateTransitionListener containerStateTransitionListener;
 
+    private ResourcePluginManager resourcePluginManager;
+
     public NMContext(NMContainerTokenSecretManager containerTokenSecretManager,
         NMTokenSecretManagerInNM nmTokenSecretManager,
         LocalDirsHandlerService dirsHandler, ApplicationACLsManager aclsManager,
@@ -804,6 +825,15 @@ public class NodeManager extends CompositeService
         ContainerStateTransitionListener transitionListener) {
       this.containerStateTransitionListener = transitionListener;
     }
+
+    public ResourcePluginManager getResourcePluginManager() {
+      return resourcePluginManager;
+    }
+
+    public void setResourcePluginManager(
+        ResourcePluginManager resourcePluginManager) {
+      this.resourcePluginManager = resourcePluginManager;
+    }
   }
 
   /**
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeStatusUpdaterImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeStatusUpdaterImpl.java
index ee85042..91217dd 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeStatusUpdaterImpl.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeStatusUpdaterImpl.java
@@ -33,6 +33,9 @@ import java.util.Map.Entry;
 import java.util.Random;
 import java.util.Set;
 import java.util.concurrent.ConcurrentLinkedQueue;
+
+import org.apache.hadoop.yarn.server.nodemanager.containermanager.resourceplugin.ResourcePlugin;
+import org.apache.hadoop.yarn.server.nodemanager.containermanager.resourceplugin.ResourcePluginManager;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
@@ -178,14 +181,15 @@ public class NodeStatusUpdaterImpl extends AbstractService implements
     long memoryMb = totalResource.getMemorySize();
     float vMemToPMem =
         conf.getFloat(
-            YarnConfiguration.NM_VMEM_PMEM_RATIO, 
-            YarnConfiguration.DEFAULT_NM_VMEM_PMEM_RATIO); 
+            YarnConfiguration.NM_VMEM_PMEM_RATIO,
+            YarnConfiguration.DEFAULT_NM_VMEM_PMEM_RATIO);
     long virtualMemoryMb = (long)Math.ceil(memoryMb * vMemToPMem);
-    
     int virtualCores = totalResource.getVirtualCores();
-    LOG.info("Nodemanager resources: memory set to " + memoryMb + "MB.");
-    LOG.info("Nodemanager resources: vcores set to " + virtualCores + ".");
-    LOG.info("Nodemanager resources: " + totalResource);
+
+    // Update configured resources via plugins.
+    updateConfiguredResourcesViaPlugins(totalResource);
+
+    LOG.info("Nodemanager resources is set to: " + totalResource);
 
     metrics.addResource(totalResource);
 
@@ -342,12 +346,27 @@ public class NodeStatusUpdaterImpl extends AbstractService implements
     return ServerRMProxy.createRMProxy(conf, ResourceTracker.class);
   }
 
+  private void updateConfiguredResourcesViaPlugins(
+      Resource configuredResource) throws YarnException {
+    ResourcePluginManager pluginManager = context.getResourcePluginManager();
+    if (pluginManager != null && pluginManager.getNameToPlugins() != null) {
+      // Update configured resource
+      for (ResourcePlugin resourcePlugin : pluginManager.getNameToPlugins()
+          .values()) {
+        if (resourcePlugin.getNodeResourceHandlerInstance() != null) {
+          resourcePlugin.getNodeResourceHandlerInstance()
+              .updateConfiguredResource(configuredResource);
+        }
+      }
+    }
+  }
+
   @VisibleForTesting
   protected void registerWithRM()
       throws YarnException, IOException {
     RegisterNodeManagerResponse regNMResponse;
     Set<NodeLabel> nodeLabels = nodeLabelsHandler.getNodeLabelsForRegistration();
- 
+
     // Synchronize NM-RM registration with
     // ContainerManagerImpl#increaseContainersResource and
     // ContainerManagerImpl#startContainers to avoid race condition
@@ -358,6 +377,7 @@ public class NodeStatusUpdaterImpl extends AbstractService implements
           RegisterNodeManagerRequest.newInstance(nodeId, httpPort, totalResource,
               nodeManagerVersionId, containerReports, getRunningApplications(),
               nodeLabels, physicalResource);
+
       if (containerReports != null) {
         LOG.info("Registering with RM using containers :" + containerReports);
       }
@@ -406,7 +426,7 @@ public class NodeStatusUpdaterImpl extends AbstractService implements
     if (masterKey != null) {
       this.context.getContainerTokenSecretManager().setMasterKey(masterKey);
     }
-    
+
     masterKey = regNMResponse.getNMTokenMasterKey();
     if (masterKey != null) {
       this.context.getNMTokenSecretManager().setMasterKey(masterKey);
@@ -738,7 +758,7 @@ public class NodeStatusUpdaterImpl extends AbstractService implements
       }
     }
   }
-  
+
   @Override
   public long getRMIdentifier() {
     return this.rmIdentifier;
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/privileged/PrivilegedOperation.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/privileged/PrivilegedOperation.java
index 8402a16..db0b225 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/privileged/PrivilegedOperation.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/privileged/PrivilegedOperation.java
@@ -51,6 +51,7 @@ public class PrivilegedOperation {
     TC_READ_STATS("--tc-read-stats"),
     ADD_PID_TO_CGROUP(""), //no CLI switch supported yet.
     RUN_DOCKER_CMD("--run-docker"),
+    GPU("--module-gpu"),
     LIST_AS_USER(""); //no CLI switch supported yet.
 
     private final String option;
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/ResourceHandlerChain.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/ResourceHandlerChain.java
index 955d216..72bf30c 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/ResourceHandlerChain.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/ResourceHandlerChain.java
@@ -20,6 +20,7 @@
 
 package org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.resources;
 
+import com.google.common.annotations.VisibleForTesting;
 import org.apache.hadoop.classification.InterfaceAudience;
 import org.apache.hadoop.classification.InterfaceStability;
 import org.apache.hadoop.conf.Configuration;
@@ -135,7 +136,8 @@ public class ResourceHandlerChain implements ResourceHandler {
     return allOperations;
   }
 
-  List<ResourceHandler> getResourceHandlerList() {
+  @VisibleForTesting
+  public List<ResourceHandler> getResourceHandlerList() {
     return Collections.unmodifiableList(resourceHandlers);
   }
 
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/ResourceHandlerModule.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/ResourceHandlerModule.java
index 3c61cd4..ce850ab 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/ResourceHandlerModule.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/ResourceHandlerModule.java
@@ -21,25 +21,28 @@
 package org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.resources;
 
 import com.google.common.annotations.VisibleForTesting;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
 import org.apache.hadoop.classification.InterfaceAudience;
 import org.apache.hadoop.classification.InterfaceStability;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.yarn.conf.YarnConfiguration;
+import org.apache.hadoop.yarn.server.nodemanager.Context;
 import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.privileged.PrivilegedOperationExecutor;
+import org.apache.hadoop.yarn.server.nodemanager.containermanager.resourceplugin.ResourcePlugin;
+import org.apache.hadoop.yarn.server.nodemanager.containermanager.resourceplugin.ResourcePluginManager;
 import org.apache.hadoop.yarn.server.nodemanager.util.CgroupsLCEResourcesHandler;
 import org.apache.hadoop.yarn.server.nodemanager.util.DefaultLCEResourcesHandler;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
 
 import java.io.File;
 import java.io.IOException;
-import java.util.Set;
-import java.util.HashSet;
-import java.util.Map;
-import java.util.HashMap;
-import java.util.Arrays;
 import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.HashMap;
+import java.util.HashSet;
 import java.util.List;
+import java.util.Map;
+import java.util.Set;
 
 /**
  * Provides mechanisms to get various resource handlers - cpu, memory, network,
@@ -206,22 +209,41 @@ public class ResourceHandlerModule {
   }
 
   private static void initializeConfiguredResourceHandlerChain(
-      Configuration conf) throws ResourceHandlerException {
+      Configuration conf, Context nmContext)
+      throws ResourceHandlerException {
     ArrayList<ResourceHandler> handlerList = new ArrayList<>();
 
     addHandlerIfNotNull(handlerList, getOutboundBandwidthResourceHandler(conf));
     addHandlerIfNotNull(handlerList, getDiskResourceHandler(conf));
     addHandlerIfNotNull(handlerList, getMemoryResourceHandler(conf));
     addHandlerIfNotNull(handlerList, getCGroupsCpuResourceHandler(conf));
+    addHandlersFromConfiguredResourcePlugins(handlerList, conf, nmContext);
     resourceHandlerChain = new ResourceHandlerChain(handlerList);
   }
 
+  private static void addHandlersFromConfiguredResourcePlugins(
+      List<ResourceHandler> handlerList, Configuration conf,
+      Context nmContext) throws ResourceHandlerException {
+    ResourcePluginManager pluginManager = nmContext.getResourcePluginManager();
+    if (pluginManager != null) {
+       Map<String, ResourcePlugin> pluginMap = pluginManager.getNameToPlugins();
+       if (pluginMap != null) {
+        for (ResourcePlugin plugin : pluginMap.values()) {
+          addHandlerIfNotNull(handlerList, plugin
+              .createResourceHandler(nmContext,
+                  getInitializedCGroupsHandler(conf),
+                  PrivilegedOperationExecutor.getInstance(conf)));
+        }
+      }
+    }
+  }
+
   public static ResourceHandlerChain getConfiguredResourceHandlerChain(
-      Configuration conf) throws ResourceHandlerException {
+      Configuration conf, Context nmContext) throws ResourceHandlerException {
     if (resourceHandlerChain == null) {
       synchronized (ResourceHandlerModule.class) {
         if (resourceHandlerChain == null) {
-          initializeConfiguredResourceHandlerChain(conf);
+          initializeConfiguredResourceHandlerChain(conf, nmContext);
         }
       }
     }
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/gpu/GpuResourceAllocator.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/gpu/GpuResourceAllocator.java
new file mode 100644
index 0000000..d6bae09
--- /dev/null
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/gpu/GpuResourceAllocator.java
@@ -0,0 +1,242 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.resources.gpu;
+
+import com.google.common.annotations.VisibleForTesting;
+import com.google.common.collect.ImmutableSet;
+import com.google.common.collect.Sets;
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.util.StringUtils;
+import org.apache.hadoop.yarn.api.records.ContainerId;
+import org.apache.hadoop.yarn.api.records.Resource;
+import org.apache.hadoop.yarn.api.records.ResourceInformation;
+import org.apache.hadoop.yarn.exceptions.ResourceNotFoundException;
+import org.apache.hadoop.yarn.server.nodemanager.Context;
+import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.Container;
+import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.ResourceMappings;
+import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.resources.ResourceHandlerException;
+
+import java.io.IOException;
+import java.io.Serializable;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+import java.util.TreeMap;
+import java.util.TreeSet;
+
+import static org.apache.hadoop.yarn.api.records.ResourceInformation.GPU_URI;
+
+/**
+ * Allocate GPU resources according to requirements
+ */
+public class GpuResourceAllocator {
+  final static Log LOG = LogFactory.getLog(GpuResourceAllocator.class);
+
+  private Set<Integer> allowedGpuDevices = new TreeSet<>();
+  private Map<Integer, ContainerId> usedDevices = new TreeMap<>();
+  private Context nmContext;
+
+  public GpuResourceAllocator(Context ctx) {
+    this.nmContext = ctx;
+  }
+
+  /**
+   * Contains allowed and denied devices with minor number.
+   * Denied devices will be useful for cgroups devices module to do blacklisting
+   */
+  static class GpuAllocation {
+    private Set<Integer> allowed = Collections.emptySet();
+    private Set<Integer> denied = Collections.emptySet();
+
+    GpuAllocation(Set<Integer> allowed, Set<Integer> denied) {
+      if (allowed != null) {
+        this.allowed = ImmutableSet.copyOf(allowed);
+      }
+      if (denied != null) {
+        this.denied = ImmutableSet.copyOf(denied);
+      }
+    }
+
+    public Set<Integer> getAllowedGPUs() {
+      return allowed;
+    }
+
+    public Set<Integer> getDeniedGPUs() {
+      return denied;
+    }
+  }
+
+  /**
+   * Add GPU to allowed list
+   * @param minorNumber minor number of the GPU device.
+   */
+  public synchronized void addGpu(int minorNumber) {
+    allowedGpuDevices.add(minorNumber);
+  }
+
+  private String getResourceHandlerExceptionMessage(int numRequestedGpuDevices,
+      ContainerId containerId) {
+    return "Failed to find enough GPUs, requestor=" + containerId
+        + ", #RequestedGPUs=" + numRequestedGpuDevices + ", #availableGpus="
+        + getAvailableGpus();
+  }
+
+  @VisibleForTesting
+  public synchronized int getAvailableGpus() {
+    return allowedGpuDevices.size() - usedDevices.size();
+  }
+
+  public synchronized void recoverAssignedGpus(ContainerId containerId)
+      throws ResourceHandlerException {
+    Container c = nmContext.getContainers().get(containerId);
+    if (null == c) {
+      throw new ResourceHandlerException(
+          "This shouldn't happen, cannot find container with id="
+              + containerId);
+    }
+
+    for (Serializable deviceId : c.getResourceMappings().getAssignedResources(
+        GPU_URI)){
+      if (!(deviceId instanceof String)) {
+        throw new ResourceHandlerException(
+            "Trying to recover device id, however it"
+                + " is not String, this shouldn't happen");
+      }
+
+
+      int devId;
+      try {
+        devId = Integer.parseInt((String)deviceId);
+      } catch (NumberFormatException e) {
+        throw new ResourceHandlerException("Failed to recover device id because"
+            + "it is not a valid integer, devId:" + deviceId);
+      }
+
+      // Make sure it is in allowed GPU device.
+      if (!allowedGpuDevices.contains(devId)) {
+        throw new ResourceHandlerException("Try to recover device id = " + devId
+            + " however it is not in allowed device list:" + StringUtils
+            .join(",", allowedGpuDevices));
+      }
+
+      // Make sure it is not occupied by anybody else
+      if (usedDevices.containsKey(devId)) {
+        throw new ResourceHandlerException("Try to recover device id = " + devId
+            + " however it is already assigned to container=" + usedDevices
+            .get(devId) + ", please double check what happened.");
+      }
+
+      usedDevices.put(devId, containerId);
+    }
+  }
+
+  private int getRequestedGpus(Resource requestedResource) {
+    try {
+      return Long.valueOf(requestedResource.getResourceValue(
+          GPU_URI)).intValue();
+    } catch (ResourceNotFoundException e) {
+      return 0;
+    }
+  }
+
+  /**
+   * Assign GPU to requestor
+   * @param container container to allocate
+   * @return List of denied Gpus with minor numbers
+   * @throws ResourceHandlerException When failed to
+   */
+  public synchronized GpuAllocation assignGpus(Container container)
+      throws ResourceHandlerException {
+    Resource requestedResource = container.getResource();
+    ContainerId containerId = container.getContainerId();
+    int numRequestedGpuDevices = getRequestedGpus(requestedResource);
+    // Assign Gpus to container if requested some.
+    if (numRequestedGpuDevices > 0) {
+      if (numRequestedGpuDevices > getAvailableGpus()) {
+        throw new ResourceHandlerException(
+            getResourceHandlerExceptionMessage(numRequestedGpuDevices,
+                containerId));
+      }
+
+      Set<Integer> assignedGpus = new HashSet<>();
+
+      for (int deviceNum : allowedGpuDevices) {
+        if (!usedDevices.containsKey(deviceNum)) {
+          usedDevices.put(deviceNum, containerId);
+          assignedGpus.add(deviceNum);
+          if (assignedGpus.size() == numRequestedGpuDevices) {
+            break;
+          }
+        }
+      }
+
+      // Record in state store if we allocated anything
+      if (!assignedGpus.isEmpty()) {
+        List<Serializable> allocatedDevices = new ArrayList<>();
+        for (int gpu : assignedGpus) {
+          allocatedDevices.add(String.valueOf(gpu));
+        }
+        try {
+          // Update Container#getResourceMapping.
+          ResourceMappings.AssignedResources assignedResources =
+              new ResourceMappings.AssignedResources();
+          assignedResources.updateAssignedResources(allocatedDevices);
+          container.getResourceMappings().addAssignedResources(GPU_URI,
+              assignedResources);
+
+          // Update state store.
+          nmContext.getNMStateStore().storeAssignedResources(containerId,
+              GPU_URI, allocatedDevices);
+        } catch (IOException e) {
+          cleanupAssignGpus(containerId);
+          throw new ResourceHandlerException(e);
+        }
+      }
+
+      return new GpuAllocation(assignedGpus,
+          Sets.difference(allowedGpuDevices, assignedGpus));
+    }
+    return new GpuAllocation(null, allowedGpuDevices);
+  }
+
+  /**
+   * Clean up all Gpus assigned to containerId
+   * @param containerId containerId
+   */
+  public synchronized void cleanupAssignGpus(ContainerId containerId) {
+    Iterator<Map.Entry<Integer, ContainerId>> iter =
+        usedDevices.entrySet().iterator();
+    while (iter.hasNext()) {
+      if (iter.next().getValue().equals(containerId)) {
+        iter.remove();
+      }
+    }
+  }
+
+  @VisibleForTesting
+  public synchronized Map<Integer, ContainerId> getDeviceAllocationMapping() {
+     return new HashMap<>(usedDevices);
+  }
+}
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/gpu/GpuResourceHandlerImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/gpu/GpuResourceHandlerImpl.java
new file mode 100644
index 0000000..7144bb2
--- /dev/null
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/gpu/GpuResourceHandlerImpl.java
@@ -0,0 +1,153 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.resources.gpu;
+
+import com.google.common.annotations.VisibleForTesting;
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.util.StringUtils;
+import org.apache.hadoop.yarn.api.records.ContainerId;
+import org.apache.hadoop.yarn.api.records.ResourceInformation;
+import org.apache.hadoop.yarn.exceptions.ResourceNotFoundException;
+import org.apache.hadoop.yarn.exceptions.YarnException;
+import org.apache.hadoop.yarn.server.nodemanager.Context;
+import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.Container;
+import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.privileged.PrivilegedOperation;
+import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.privileged.PrivilegedOperationException;
+import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.privileged.PrivilegedOperationExecutor;
+import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.resources.CGroupsHandler;
+import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.resources.ResourceHandler;
+import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.resources.ResourceHandlerException;
+import org.apache.hadoop.yarn.server.nodemanager.containermanager.resourceplugin.gpu.GpuDiscoverer;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+
+public class GpuResourceHandlerImpl implements ResourceHandler {
+  final static Log LOG = LogFactory
+      .getLog(GpuResourceHandlerImpl.class);
+
+  // This will be used by container-executor to add necessary clis
+  public static final String EXCLUDED_GPUS_CLI_OPTION = "--excluded_gpus";
+  public static final String CONTAINER_ID_CLI_OPTION = "--container_id";
+
+  private GpuResourceAllocator gpuAllocator;
+  private CGroupsHandler cGroupsHandler;
+  private PrivilegedOperationExecutor privilegedOperationExecutor;
+
+  public GpuResourceHandlerImpl(Context nmContext,
+      CGroupsHandler cGroupsHandler,
+      PrivilegedOperationExecutor privilegedOperationExecutor) {
+    this.cGroupsHandler = cGroupsHandler;
+    this.privilegedOperationExecutor = privilegedOperationExecutor;
+    gpuAllocator = new GpuResourceAllocator(nmContext);
+  }
+
+  @Override
+  public List<PrivilegedOperation> bootstrap(Configuration configuration)
+      throws ResourceHandlerException {
+    List<Integer> minorNumbersOfUsableGpus;
+    try {
+      minorNumbersOfUsableGpus = GpuDiscoverer.getInstance()
+          .getMinorNumbersOfGpusUsableByYarn();
+    } catch (YarnException e) {
+      LOG.error("Exception when trying to get usable GPU device", e);
+      throw new ResourceHandlerException(e);
+    }
+
+    for (int minorNumber : minorNumbersOfUsableGpus) {
+      gpuAllocator.addGpu(minorNumber);
+    }
+
+    // And initialize cgroups
+    this.cGroupsHandler.initializeCGroupController(
+        CGroupsHandler.CGroupController.DEVICES);
+
+    return null;
+  }
+
+  @Override
+  public synchronized List<PrivilegedOperation> preStart(Container container)
+      throws ResourceHandlerException {
+    String containerIdStr = container.getContainerId().toString();
+
+    // Assign Gpus to container if requested some.
+    GpuResourceAllocator.GpuAllocation allocation = gpuAllocator.assignGpus(
+        container);
+
+    // Create device cgroups for the container
+    cGroupsHandler.createCGroup(CGroupsHandler.CGroupController.DEVICES,
+        containerIdStr);
+    try {
+      // Execute c-e to setup GPU isolation before launch the container
+      PrivilegedOperation privilegedOperation = new PrivilegedOperation(
+          PrivilegedOperation.OperationType.GPU, Arrays
+          .asList(CONTAINER_ID_CLI_OPTION, containerIdStr));
+      if (!allocation.getDeniedGPUs().isEmpty()) {
+        privilegedOperation.appendArgs(Arrays.asList(EXCLUDED_GPUS_CLI_OPTION,
+            StringUtils.join(",", allocation.getDeniedGPUs())));
+      }
+
+      privilegedOperationExecutor.executePrivilegedOperation(
+          privilegedOperation, true);
+    } catch (PrivilegedOperationException e) {
+      cGroupsHandler.deleteCGroup(CGroupsHandler.CGroupController.DEVICES,
+          containerIdStr);
+      LOG.warn("Could not update cgroup for container", e);
+      throw new ResourceHandlerException(e);
+    }
+
+    List<PrivilegedOperation> ret = new ArrayList<>();
+    ret.add(new PrivilegedOperation(
+        PrivilegedOperation.OperationType.ADD_PID_TO_CGROUP,
+        PrivilegedOperation.CGROUP_ARG_PREFIX
+            + cGroupsHandler.getPathForCGroupTasks(
+            CGroupsHandler.CGroupController.DEVICES, containerIdStr)));
+
+    return ret;
+  }
+
+  @VisibleForTesting
+  public GpuResourceAllocator getGpuAllocator() {
+    return gpuAllocator;
+  }
+
+  @Override
+  public List<PrivilegedOperation> reacquireContainer(ContainerId containerId)
+      throws ResourceHandlerException {
+    gpuAllocator.recoverAssignedGpus(containerId);
+    return null;
+  }
+
+  @Override
+  public synchronized List<PrivilegedOperation> postComplete(
+      ContainerId containerId) throws ResourceHandlerException {
+    gpuAllocator.cleanupAssignGpus(containerId);
+    cGroupsHandler.deleteCGroup(CGroupsHandler.CGroupController.DEVICES,
+        containerId.toString());
+    return null;
+  }
+
+  @Override
+  public List<PrivilegedOperation> teardown() throws ResourceHandlerException {
+    return null;
+  }
+}
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/NodeResourceUpdaterPlugin.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/NodeResourceUpdaterPlugin.java
new file mode 100644
index 0000000..88f77ed
--- /dev/null
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/NodeResourceUpdaterPlugin.java
@@ -0,0 +1,52 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.yarn.server.nodemanager.containermanager.resourceplugin;
+
+import org.apache.hadoop.yarn.api.records.Resource;
+import org.apache.hadoop.yarn.exceptions.YarnException;
+
+/**
+ * Plugins to handle resources on a node. This will be used by
+ * {@link org.apache.hadoop.yarn.server.nodemanager.NodeStatusUpdater}
+ */
+public abstract class NodeResourceUpdaterPlugin {
+  /**
+   * Update configured resource for the given component.
+   * @param res resource passed in by external mododule (such as
+   *            {@link org.apache.hadoop.yarn.server.nodemanager.NodeStatusUpdater}
+   * @throws YarnException when any issue happens.
+   */
+  public abstract void updateConfiguredResource(Resource res)
+      throws YarnException;
+
+  /**
+   * This method will be called when the node's resource is loaded from
+   * dynamic-resources.xml in ResourceManager.
+   *
+   * @param newResource newResource reported by RM
+   * @throws YarnException when any mismatch between NM/RM
+   */
+  public void handleUpdatedResourceFromRM(Resource newResource) throws
+      YarnException {
+    // by default do nothing, subclass should implement this method when any
+    // special activities required upon new resource reported by RM.
+  }
+
+  // TODO: add implementation to update node attribute once YARN-3409 merged.
+}
\ No newline at end of file
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/ResourcePlugin.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/ResourcePlugin.java
new file mode 100644
index 0000000..6e134b3
--- /dev/null
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/ResourcePlugin.java
@@ -0,0 +1,83 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.yarn.server.nodemanager.containermanager.resourceplugin;
+
+import org.apache.hadoop.yarn.exceptions.YarnException;
+import org.apache.hadoop.yarn.server.nodemanager.Context;
+import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.privileged.PrivilegedOperationExecutor;
+import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.resources.CGroupsHandler;
+import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.resources.ResourceHandler;
+import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.resources.ResourceHandlerChain;
+
+/**
+ * {@link ResourcePlugin} is an interface for node manager to easier support
+ * discovery/manage/isolation for new resource types.
+ *
+ * <p>
+ * It has two major part: {@link ResourcePlugin#createResourceHandler(Context,
+ * CGroupsHandler, PrivilegedOperationExecutor)} and
+ * {@link ResourcePlugin#getNodeResourceHandlerInstance()}, see javadocs below
+ * for more details.
+ * </p>
+ */
+public interface ResourcePlugin {
+  /**
+   * Initialize the plugin, this will be invoked during NM startup.
+   * @param context NM Context
+   * @throws YarnException when any issue occurs
+   */
+  void initialize(Context context) throws YarnException;
+
+  /**
+   * Plugin needs to return {@link ResourceHandler} when any special isolation
+   * required for the resource type. This will be added to
+   * {@link ResourceHandlerChain} during NodeManager startup. When no special
+   * isolation need, return null.
+   *
+   * @param nmContext NodeManager context.
+   * @param cGroupsHandler CGroupsHandler
+   * @param privilegedOperationExecutor Privileged Operation Executor.
+   * @return ResourceHandler
+   */
+  ResourceHandler createResourceHandler(Context nmContext,
+      CGroupsHandler cGroupsHandler,
+      PrivilegedOperationExecutor privilegedOperationExecutor);
+
+  /**
+   * Plugin needs to return {@link NodeResourceUpdaterPlugin} when any discovery
+   * mechanism required for the resource type. For example, if we want to set
+   * resource-value during NM registration or send update during NM-RM heartbeat
+   * We can implement a {@link NodeResourceUpdaterPlugin} and update fields of
+   * {@link org.apache.hadoop.yarn.server.api.protocolrecords.NodeHeartbeatRequest}
+   * or {@link org.apache.hadoop.yarn.server.api.protocolrecords.RegisterNodeManagerRequest}
+   *
+   * This will be invoked during every node status update or node registration,
+   * please avoid creating new instance every time.
+   *
+   * @return NodeResourceUpdaterPlugin, could be null when no discovery needed.
+   */
+  NodeResourceUpdaterPlugin getNodeResourceHandlerInstance();
+
+  /**
+   * Do cleanup of the plugin, this will be invoked when
+   * {@link org.apache.hadoop.yarn.server.nodemanager.NodeManager} stops
+   * @throws YarnException if any issue occurs
+   */
+  void cleanup() throws YarnException;
+}
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/ResourcePluginManager.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/ResourcePluginManager.java
new file mode 100644
index 0000000..73d6038
--- /dev/null
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/ResourcePluginManager.java
@@ -0,0 +1,106 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.yarn.server.nodemanager.containermanager.resourceplugin;
+
+import com.google.common.collect.ImmutableSet;
+import org.apache.commons.lang3.StringUtils;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.yarn.conf.YarnConfiguration;
+import org.apache.hadoop.yarn.exceptions.YarnException;
+import org.apache.hadoop.yarn.server.nodemanager.Context;
+import org.apache.hadoop.yarn.server.nodemanager.containermanager.resourceplugin.gpu.GpuResourcePlugin;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.Map;
+import java.util.Set;
+
+import static org.apache.hadoop.yarn.api.records.ResourceInformation.GPU_URI;
+
+/**
+ * Manages {@link ResourcePlugin} configured on this NodeManager.
+ */
+public class ResourcePluginManager {
+  private static final Logger LOG =
+      LoggerFactory.getLogger(ResourcePluginManager.class);
+  private static final Set<String> SUPPORTED_RESOURCE_PLUGINS = ImmutableSet.of(
+      GPU_URI);
+
+  private Map<String, ResourcePlugin> configuredPlugins = Collections.EMPTY_MAP;
+
+  public synchronized void initialize(Context context)
+      throws YarnException {
+    Configuration conf = context.getConf();
+    String[] plugins = conf.getStrings(YarnConfiguration.NM_RESOURCE_PLUGINS);
+
+    if (plugins != null) {
+      Map<String, ResourcePlugin> pluginMap = new HashMap<>();
+
+      // Initialize each plugins
+      for (String resourceName : plugins) {
+        resourceName = resourceName.trim();
+        if (!SUPPORTED_RESOURCE_PLUGINS.contains(resourceName)) {
+          String msg =
+              "Trying to initialize resource plugin with name=" + resourceName
+                  + ", it is not supported, list of supported plugins:"
+                  + StringUtils.join(",",
+                  SUPPORTED_RESOURCE_PLUGINS);
+          LOG.error(msg);
+          throw new YarnException(msg);
+        }
+
+        if (pluginMap.containsKey(resourceName)) {
+          // Duplicated items, ignore ...
+          continue;
+        }
+
+        ResourcePlugin plugin = null;
+        if (resourceName.equals(GPU_URI)) {
+          plugin = new GpuResourcePlugin();
+        }
+
+        if (plugin == null) {
+          throw new YarnException(
+              "This shouldn't happen, plugin=" + resourceName
+                  + " should be loaded and initialized");
+        }
+        plugin.initialize(context);
+        pluginMap.put(resourceName, plugin);
+      }
+
+      configuredPlugins = Collections.unmodifiableMap(pluginMap);
+    }
+  }
+
+  public synchronized void cleanup() throws YarnException {
+    for (ResourcePlugin plugin : configuredPlugins.values()) {
+      plugin.cleanup();
+    }
+  }
+
+  /**
+   * Get resource name (such as gpu/fpga) to plugin references.
+   * @return read-only map of resource name to plugins.
+   */
+  public synchronized Map<String, ResourcePlugin> getNameToPlugins() {
+    return configuredPlugins;
+  }
+}
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/gpu/GpuDiscoverer.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/gpu/GpuDiscoverer.java
new file mode 100644
index 0000000..61b8ce5
--- /dev/null
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/gpu/GpuDiscoverer.java
@@ -0,0 +1,254 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.yarn.server.nodemanager.containermanager.resourceplugin.gpu;
+
+import com.google.common.annotations.VisibleForTesting;
+import com.google.common.collect.ImmutableMap;
+import com.google.common.collect.ImmutableSet;
+import org.apache.hadoop.classification.InterfaceAudience;
+import org.apache.hadoop.classification.InterfaceStability;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.util.Shell;
+import org.apache.hadoop.yarn.conf.YarnConfiguration;
+import org.apache.hadoop.yarn.exceptions.YarnException;
+import org.apache.hadoop.yarn.server.nodemanager.webapp.dao.gpu.GpuDeviceInformation;
+import org.apache.hadoop.yarn.server.nodemanager.webapp.dao.gpu.GpuDeviceInformationParser;
+import org.apache.hadoop.yarn.server.nodemanager.webapp.dao.gpu.PerGpuDeviceInformation;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.io.File;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+
+@InterfaceAudience.Private
+@InterfaceStability.Unstable
+public class GpuDiscoverer {
+  public static final Logger LOG = LoggerFactory.getLogger(
+      GpuDiscoverer.class);
+  @VisibleForTesting
+  protected static final String DEFAULT_BINARY_NAME = "nvidia-smi";
+
+  // When executable path not set, try to search default dirs
+  // By default search /usr/bin, /bin, and /usr/local/nvidia/bin (when
+  // launched by nvidia-docker.
+  private static final Set<String> DEFAULT_BINARY_SEARCH_DIRS = ImmutableSet.of(
+      "/usr/bin", "/bin", "/usr/local/nvidia/bin");
+
+  // command should not run more than 10 sec.
+  private static final int MAX_EXEC_TIMEOUT_MS = 10 * 1000;
+  private static final int MAX_REPEATED_ERROR_ALLOWED = 10;
+  private static GpuDiscoverer instance;
+
+  static {
+    instance = new GpuDiscoverer();
+  }
+
+  private Configuration conf = null;
+  private String pathOfGpuBinary = null;
+  private Map<String, String> environment = new HashMap<>();
+  private GpuDeviceInformationParser parser = new GpuDeviceInformationParser();
+
+  private int numOfErrorExecutionSinceLastSucceed = 0;
+  GpuDeviceInformation lastDiscoveredGpuInformation = null;
+
+  private void validateConfOrThrowException() throws YarnException {
+    if (conf == null) {
+      throw new YarnException("Please initialize (call initialize) before use "
+          + GpuDiscoverer.class.getSimpleName());
+    }
+  }
+
+  /**
+   * Get GPU device information from system.
+   * This need to be called after initialize.
+   *
+   * Please note that this only works on *NIX platform, so external caller
+   * need to make sure this.
+   *
+   * @return GpuDeviceInformation
+   * @throws YarnException when any error happens
+   */
+  public synchronized GpuDeviceInformation getGpuDeviceInformation()
+      throws YarnException {
+    validateConfOrThrowException();
+
+    if (null == pathOfGpuBinary) {
+      throw new YarnException(
+          "Failed to find GPU discovery executable, please double check "
+              + YarnConfiguration.NM_GPU_PATH_TO_EXEC + " setting.");
+    }
+
+    if (numOfErrorExecutionSinceLastSucceed == MAX_REPEATED_ERROR_ALLOWED) {
+      String msg =
+          "Failed to execute GPU device information detection script for "
+              + MAX_REPEATED_ERROR_ALLOWED
+              + " times, skip following executions.";
+      LOG.error(msg);
+      throw new YarnException(msg);
+    }
+
+    String output;
+    try {
+      output = Shell.execCommand(environment,
+          new String[] { pathOfGpuBinary, "-x", "-q" }, MAX_EXEC_TIMEOUT_MS);
+      GpuDeviceInformation info = parser.parseXml(output);
+      numOfErrorExecutionSinceLastSucceed = 0;
+      lastDiscoveredGpuInformation = info;
+      return info;
+    } catch (IOException e) {
+      numOfErrorExecutionSinceLastSucceed++;
+      String msg =
+          "Failed to execute " + pathOfGpuBinary + " exception message:" + e
+              .getMessage() + ", continue ...";
+      if (LOG.isDebugEnabled()) {
+        LOG.debug(msg);
+      }
+      throw new YarnException(e);
+    } catch (YarnException e) {
+      numOfErrorExecutionSinceLastSucceed++;
+      String msg = "Failed to parse xml output" + e.getMessage();
+      if (LOG.isDebugEnabled()) {
+        LOG.warn(msg, e);
+      }
+      throw e;
+    }
+  }
+
+  /**
+   * Get list of minor device numbers of Gpu devices usable by YARN.
+   *
+   * @return List of minor device numbers of Gpu devices.
+   * @throws YarnException when any issue happens
+   */
+  public synchronized List<Integer> getMinorNumbersOfGpusUsableByYarn()
+      throws YarnException {
+    validateConfOrThrowException();
+
+    String allowedDevicesStr = conf.get(
+        YarnConfiguration.NM_GPU_ALLOWED_DEVICES,
+        YarnConfiguration.AUTOMATICALLY_DISCOVER_GPU_DEVICES);
+
+    List<Integer> minorNumbers = new ArrayList<>();
+
+    if (allowedDevicesStr.equals(
+        YarnConfiguration.AUTOMATICALLY_DISCOVER_GPU_DEVICES)) {
+      // Get gpu device information from system.
+      if (null == lastDiscoveredGpuInformation) {
+        String msg = YarnConfiguration.NM_GPU_ALLOWED_DEVICES + " is set to "
+            + YarnConfiguration.AUTOMATICALLY_DISCOVER_GPU_DEVICES
+            + ", however automatically discovering "
+            + "GPU information failed, please check NodeManager log for more"
+            + " details, as an alternative, admin can specify "
+            + YarnConfiguration.NM_GPU_ALLOWED_DEVICES
+            + " manually to enable GPU isolation.";
+        LOG.error(msg);
+        throw new YarnException(msg);
+      }
+
+      if (lastDiscoveredGpuInformation.getGpus() != null) {
+        for (PerGpuDeviceInformation gpu : lastDiscoveredGpuInformation
+            .getGpus()) {
+          minorNumbers.add(gpu.getMinorNumber());
+        }
+      }
+    } else{
+      for (String s : allowedDevicesStr.split(",")) {
+        if (s.trim().length() > 0) {
+          minorNumbers.add(Integer.valueOf(s.trim()));
+        }
+      }
+      LOG.info("Allowed GPU devices with minor numbers:" + allowedDevicesStr);
+    }
+
+    return minorNumbers;
+  }
+
+  public synchronized void initialize(Configuration conf) throws YarnException {
+    this.conf = conf;
+    numOfErrorExecutionSinceLastSucceed = 0;
+    String pathToExecutable = conf.get(YarnConfiguration.NM_GPU_PATH_TO_EXEC,
+        YarnConfiguration.DEFAULT_NM_GPU_PATH_TO_EXEC);
+    if (pathToExecutable.isEmpty()) {
+      pathToExecutable = DEFAULT_BINARY_NAME;
+    }
+
+    // Validate file existence
+    File binaryPath = new File(pathToExecutable);
+
+    if (!binaryPath.exists()) {
+      // When binary not exist, use default setting.
+      boolean found = false;
+      for (String dir : DEFAULT_BINARY_SEARCH_DIRS) {
+        binaryPath = new File(dir, DEFAULT_BINARY_NAME);
+        if (binaryPath.exists()) {
+          found = true;
+          pathOfGpuBinary = binaryPath.getAbsolutePath();
+          break;
+        }
+      }
+
+      if (!found) {
+        LOG.warn("Failed to locate binary at:" + binaryPath.getAbsolutePath()
+            + ", please double check [" + YarnConfiguration.NM_GPU_PATH_TO_EXEC
+            + "] setting. Now use " + "default binary:" + DEFAULT_BINARY_NAME);
+      }
+    } else{
+      // If path specified by user is a directory, use
+      if (binaryPath.isDirectory()) {
+        binaryPath = new File(binaryPath, DEFAULT_BINARY_NAME);
+        LOG.warn("Specified path is a directory, use " + DEFAULT_BINARY_NAME
+            + " under the directory, updated path-to-executable:" + binaryPath
+            .getAbsolutePath());
+      }
+      // Validated
+      pathOfGpuBinary = binaryPath.getAbsolutePath();
+    }
+
+    // Try to discover GPU information once and print
+    try {
+      LOG.info("Trying to discover GPU information ...");
+      GpuDeviceInformation info = getGpuDeviceInformation();
+      LOG.info(info.toString());
+    } catch (YarnException e) {
+      String msg =
+          "Failed to discover GPU information from system, exception message:"
+              + e.getMessage() + " continue...";
+      LOG.warn(msg);
+    }
+  }
+
+  @VisibleForTesting
+  protected Map<String, String> getEnvironmentToRunCommand() {
+    return environment;
+  }
+
+  @VisibleForTesting
+  protected String getPathOfGpuBinary() {
+    return pathOfGpuBinary;
+  }
+
+  public static GpuDiscoverer getInstance() {
+    return instance;
+  }
+}
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/gpu/GpuNodeResourceUpdateHandler.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/gpu/GpuNodeResourceUpdateHandler.java
new file mode 100644
index 0000000..f6bf506
--- /dev/null
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/gpu/GpuNodeResourceUpdateHandler.java
@@ -0,0 +1,66 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.yarn.server.nodemanager.containermanager.resourceplugin.gpu;
+
+import org.apache.hadoop.yarn.api.records.Resource;
+import org.apache.hadoop.yarn.api.records.ResourceInformation;
+import org.apache.hadoop.yarn.conf.YarnConfiguration;
+import org.apache.hadoop.yarn.exceptions.YarnException;
+import org.apache.hadoop.yarn.server.nodemanager.containermanager.resourceplugin.NodeResourceUpdaterPlugin;
+import org.apache.hadoop.yarn.util.resource.ResourceUtils;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.util.List;
+import java.util.Map;
+
+import static org.apache.hadoop.yarn.api.records.ResourceInformation.GPU_URI;
+
+public class GpuNodeResourceUpdateHandler extends NodeResourceUpdaterPlugin {
+  private static final Logger LOG =
+      LoggerFactory.getLogger(GpuNodeResourceUpdateHandler.class);
+
+  @Override
+  public void updateConfiguredResource(Resource res) throws YarnException {
+    LOG.info("Initializing configured GPU resources for the NodeManager.");
+
+    List<Integer> usableGpus =
+        GpuDiscoverer.getInstance().getMinorNumbersOfGpusUsableByYarn();
+    if (null == usableGpus || usableGpus.isEmpty()) {
+      LOG.info("Didn't find any usable GPUs on the NodeManager.");
+      // No gpu can be used by YARN.
+      return;
+    }
+
+    long nUsableGpus = usableGpus.size();
+
+    Map<String, ResourceInformation> configuredResourceTypes =
+        ResourceUtils.getResourceTypes();
+    if (!configuredResourceTypes.containsKey(GPU_URI)) {
+      throw new YarnException("Found " + nUsableGpus + " usable GPUs, however "
+          + GPU_URI
+          + " resource-type is not configured inside"
+          + " resource-types.xml, please configure it to enable GPU feature or"
+          + " remove " + GPU_URI + " from "
+          + YarnConfiguration.NM_RESOURCE_PLUGINS);
+    }
+
+    res.setResourceValue(GPU_URI, nUsableGpus);
+  }
+}
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/gpu/GpuResourcePlugin.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/gpu/GpuResourcePlugin.java
new file mode 100644
index 0000000..9576ce7
--- /dev/null
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/gpu/GpuResourcePlugin.java
@@ -0,0 +1,61 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.yarn.server.nodemanager.containermanager.resourceplugin.gpu;
+
+import org.apache.hadoop.yarn.exceptions.YarnException;
+import org.apache.hadoop.yarn.server.nodemanager.Context;
+import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.privileged.PrivilegedOperationExecutor;
+import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.resources.CGroupsHandler;
+import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.resources.ResourceHandler;
+import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.resources.gpu.GpuResourceHandlerImpl;
+import org.apache.hadoop.yarn.server.nodemanager.containermanager.resourceplugin.NodeResourceUpdaterPlugin;
+import org.apache.hadoop.yarn.server.nodemanager.containermanager.resourceplugin.ResourcePlugin;
+
+public class GpuResourcePlugin implements ResourcePlugin {
+  private ResourceHandler gpuResourceHandler = null;
+  private GpuNodeResourceUpdateHandler resourceDiscoverHandler = null;
+
+  @Override
+  public synchronized void initialize(Context context) throws YarnException {
+    resourceDiscoverHandler = new GpuNodeResourceUpdateHandler();
+    GpuDiscoverer.getInstance().initialize(context.getConf());
+  }
+
+  @Override
+  public synchronized ResourceHandler createResourceHandler(
+      Context context, CGroupsHandler cGroupsHandler,
+      PrivilegedOperationExecutor privilegedOperationExecutor) {
+    if (gpuResourceHandler == null) {
+      gpuResourceHandler = new GpuResourceHandlerImpl(context, cGroupsHandler,
+          privilegedOperationExecutor);
+    }
+
+    return gpuResourceHandler;
+  }
+
+  @Override
+  public synchronized NodeResourceUpdaterPlugin getNodeResourceHandlerInstance() {
+    return resourceDiscoverHandler;
+  }
+
+  @Override
+  public void cleanup() throws YarnException {
+    // Do nothing.
+  }
+}
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/webapp/dao/gpu/GpuDeviceInformation.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/webapp/dao/gpu/GpuDeviceInformation.java
new file mode 100644
index 0000000..977032a
--- /dev/null
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/webapp/dao/gpu/GpuDeviceInformation.java
@@ -0,0 +1,72 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.yarn.server.nodemanager.webapp.dao.gpu;
+
+import org.apache.hadoop.classification.InterfaceAudience;
+import org.apache.hadoop.classification.InterfaceStability;
+
+import javax.xml.bind.annotation.XmlRootElement;
+import java.util.List;
+
+/**
+ * All GPU Device Information in the system.
+ */
+@InterfaceAudience.Private
+@InterfaceStability.Unstable
+@XmlRootElement(name = "nvidia_smi_log")
+public class GpuDeviceInformation {
+  List<PerGpuDeviceInformation> gpus;
+
+  String driverVersion = "N/A";
+
+  // More fields like topology information could be added when needed.
+  // ...
+
+  @javax.xml.bind.annotation.XmlElement(name = "gpu")
+  public List<PerGpuDeviceInformation> getGpus() {
+    return gpus;
+  }
+
+  public void setGpus(List<PerGpuDeviceInformation> gpus) {
+    this.gpus = gpus;
+  }
+
+  @javax.xml.bind.annotation.XmlElement(name = "driver_version")
+  public String getDriverVersion() {
+    return driverVersion;
+  }
+
+  public void setDriverVersion(String driverVersion) {
+    this.driverVersion = driverVersion;
+  }
+
+  @Override
+  public String toString() {
+    StringBuilder sb = new StringBuilder();
+    sb.append("=== Gpus in the system ===\n").append("\tDriver Version:").append(
+        getDriverVersion()).append("\n");
+
+    if (gpus != null) {
+      for (PerGpuDeviceInformation gpu : gpus) {
+        sb.append("\t").append(gpu.toString()).append("\n");
+      }
+    }
+    return sb.toString();
+  }
+}
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/webapp/dao/gpu/GpuDeviceInformationParser.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/webapp/dao/gpu/GpuDeviceInformationParser.java
new file mode 100644
index 0000000..1bd92f6
--- /dev/null
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/webapp/dao/gpu/GpuDeviceInformationParser.java
@@ -0,0 +1,87 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * <p>
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * <p>
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.yarn.server.nodemanager.webapp.dao.gpu;
+
+import org.apache.hadoop.classification.InterfaceAudience;
+import org.apache.hadoop.classification.InterfaceStability;
+import org.apache.hadoop.yarn.exceptions.YarnException;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+import org.xml.sax.InputSource;
+import org.xml.sax.SAXException;
+import org.xml.sax.XMLReader;
+
+import javax.xml.bind.JAXBContext;
+import javax.xml.bind.JAXBException;
+import javax.xml.bind.Unmarshaller;
+import javax.xml.parsers.ParserConfigurationException;
+import javax.xml.parsers.SAXParserFactory;
+import javax.xml.transform.sax.SAXSource;
+import java.io.StringReader;
+
+/**
+ * Parse XML and get GPU device information
+ */
+@InterfaceAudience.Private
+@InterfaceStability.Unstable
+public class GpuDeviceInformationParser {
+  private static final Logger LOG = LoggerFactory.getLogger(
+      GpuDeviceInformationParser.class);
+
+  private Unmarshaller unmarshaller = null;
+  private XMLReader xmlReader = null;
+
+  private void init()
+      throws SAXException, ParserConfigurationException, JAXBException {
+    SAXParserFactory spf = SAXParserFactory.newInstance();
+    // Disable external-dtd since by default nvidia-smi output contains
+    // <!DOCTYPE nvidia_smi_log SYSTEM "nvsmi_device_v8.dtd"> in header
+    spf.setFeature(
+        "http://apache.org/xml/features/nonvalidating/load-external-dtd",
+        false);
+    spf.setFeature("http://xml.org/sax/features/validation", false);
+
+    JAXBContext jaxbContext = JAXBContext.newInstance(
+        GpuDeviceInformation.class);
+
+    this.xmlReader = spf.newSAXParser().getXMLReader();
+    this.unmarshaller = jaxbContext.createUnmarshaller();
+  }
+
+  public synchronized GpuDeviceInformation parseXml(String xmlContent)
+      throws YarnException {
+    if (unmarshaller == null) {
+      try {
+        init();
+      } catch (SAXException | ParserConfigurationException | JAXBException e) {
+        LOG.error("Exception while initialize parser", e);
+        throw new YarnException(e);
+      }
+    }
+
+    InputSource inputSource = new InputSource(new StringReader(xmlContent));
+    SAXSource source = new SAXSource(xmlReader, inputSource);
+    try {
+      return (GpuDeviceInformation) unmarshaller.unmarshal(source);
+    } catch (JAXBException e) {
+      LOG.error("Exception while parsing xml", e);
+      throw new YarnException(e);
+    }
+  }
+}
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/webapp/dao/gpu/PerGpuDeviceInformation.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/webapp/dao/gpu/PerGpuDeviceInformation.java
new file mode 100644
index 0000000..f315313
--- /dev/null
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/webapp/dao/gpu/PerGpuDeviceInformation.java
@@ -0,0 +1,165 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.yarn.server.nodemanager.webapp.dao.gpu;
+
+import org.apache.hadoop.classification.InterfaceAudience;
+import org.apache.hadoop.classification.InterfaceStability;
+
+import javax.xml.bind.annotation.XmlElement;
+import javax.xml.bind.annotation.XmlRootElement;
+import javax.xml.bind.annotation.adapters.XmlAdapter;
+
+/**
+ * Capture single GPU device information such as memory size, temperature,
+ * utilization.
+ */
+@InterfaceAudience.Private
+@InterfaceStability.Unstable
+@XmlRootElement(name = "gpu")
+public class PerGpuDeviceInformation {
+
+  private String productName = "N/A";
+  private String uuid = "N/A";
+  private int minorNumber = -1;
+
+  private PerGpuUtilizations gpuUtilizations;
+  private PerGpuMemoryUsage gpuMemoryUsage;
+  private PerGpuTemperature temperature;
+
+  /**
+   * Convert formats like "34 C", "75.6 %" to float.
+   */
+  @InterfaceAudience.Private
+  @InterfaceStability.Unstable
+  static class StrToFloatBeforeSpaceAdapter extends
+      XmlAdapter<String, Float> {
+    @Override
+    public String marshal(Float v) throws Exception {
+      if (v == null) {
+        return "";
+      }
+      return String.valueOf(v);
+    }
+
+    @Override
+    public Float unmarshal(String v) throws Exception {
+      if (v == null) {
+        return -1f;
+      }
+
+      return Float.valueOf(v.split(" ")[0]);
+    }
+  }
+
+  /**
+   * Convert formats like "725 MiB" to long.
+   */
+  @InterfaceAudience.Private
+  @InterfaceStability.Unstable
+  static class StrToMemAdapter extends XmlAdapter<String, Long> {
+    @Override
+    public String marshal(Long v) throws Exception {
+      if (v == null) {
+        return "";
+      }
+      return String.valueOf(v) + " MiB";
+    }
+
+    @Override
+    public Long unmarshal(String v) throws Exception {
+      if (v == null) {
+        return -1L;
+      }
+      return Long.valueOf(v.split(" ")[0]);
+    }
+  }
+
+  @XmlElement(name = "temperature")
+  public PerGpuTemperature getTemperature() {
+    return temperature;
+  }
+
+  public void setTemperature(PerGpuTemperature temperature) {
+    this.temperature = temperature;
+  }
+
+  @XmlElement(name = "uuid")
+  public String getUuid() {
+    return uuid;
+  }
+
+  public void setUuid(String uuid) {
+    this.uuid = uuid;
+  }
+
+  @XmlElement(name = "product_name")
+  public String getProductName() {
+    return productName;
+  }
+
+  public void setProductName(String productName) {
+    this.productName = productName;
+  }
+
+  @XmlElement(name = "minor_number")
+  public int getMinorNumber() {
+    return minorNumber;
+  }
+
+  public void setMinorNumber(int minorNumber) {
+    this.minorNumber = minorNumber;
+  }
+
+  @XmlElement(name = "utilization")
+  public PerGpuUtilizations getGpuUtilizations() {
+    return gpuUtilizations;
+  }
+
+  public void setGpuUtilizations(PerGpuUtilizations utilizations) {
+    this.gpuUtilizations = utilizations;
+  }
+
+  @XmlElement(name = "bar1_memory_usage")
+  public PerGpuMemoryUsage getGpuMemoryUsage() {
+    return gpuMemoryUsage;
+  }
+
+  public void setGpuMemoryUsage(PerGpuMemoryUsage gpuMemoryUsage) {
+    this.gpuMemoryUsage = gpuMemoryUsage;
+  }
+
+
+  @Override
+  public String toString() {
+    StringBuilder sb = new StringBuilder();
+    sb.append("ProductName=").append(productName).append(", MinorNumber=")
+        .append(minorNumber);
+
+    if (getGpuMemoryUsage() != null) {
+      sb.append(", TotalMemory=").append(
+          getGpuMemoryUsage().getTotalMemoryMiB()).append("MiB");
+    }
+
+    if (getGpuUtilizations() != null) {
+      sb.append(", Utilization=").append(
+          getGpuUtilizations().getOverallGpuUtilization()).append("%");
+    }
+    return sb.toString();
+  }
+}
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/webapp/dao/gpu/PerGpuMemoryUsage.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/webapp/dao/gpu/PerGpuMemoryUsage.java
new file mode 100644
index 0000000..3964c4e
--- /dev/null
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/webapp/dao/gpu/PerGpuMemoryUsage.java
@@ -0,0 +1,58 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.yarn.server.nodemanager.webapp.dao.gpu;
+
+import org.apache.hadoop.classification.InterfaceAudience;
+import org.apache.hadoop.classification.InterfaceStability;
+
+import javax.xml.bind.annotation.XmlElement;
+import javax.xml.bind.annotation.XmlRootElement;
+import javax.xml.bind.annotation.adapters.XmlJavaTypeAdapter;
+
+@InterfaceAudience.Private
+@InterfaceStability.Unstable
+@XmlRootElement(name = "bar1_memory_usage")
+public class PerGpuMemoryUsage {
+  long usedMemoryMiB = -1L;
+  long availMemoryMiB = -1L;
+
+  @XmlJavaTypeAdapter(PerGpuDeviceInformation.StrToMemAdapter.class)
+  @XmlElement(name = "used")
+  public Long getUsedMemoryMiB() {
+    return usedMemoryMiB;
+  }
+
+  public void setUsedMemoryMiB(Long usedMemoryMiB) {
+    this.usedMemoryMiB = usedMemoryMiB;
+  }
+
+  @XmlJavaTypeAdapter(PerGpuDeviceInformation.StrToMemAdapter.class)
+  @XmlElement(name = "free")
+  public Long getAvailMemoryMiB() {
+    return availMemoryMiB;
+  }
+
+  public void setAvailMemoryMiB(Long availMemoryMiB) {
+    this.availMemoryMiB = availMemoryMiB;
+  }
+
+  public long getTotalMemoryMiB() {
+    return usedMemoryMiB + availMemoryMiB;
+  }
+}
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/webapp/dao/gpu/PerGpuTemperature.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/webapp/dao/gpu/PerGpuTemperature.java
new file mode 100644
index 0000000..ccd60cb
--- /dev/null
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/webapp/dao/gpu/PerGpuTemperature.java
@@ -0,0 +1,80 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * <p>
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * <p>
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.yarn.server.nodemanager.webapp.dao.gpu;
+
+import org.apache.hadoop.classification.InterfaceAudience;
+import org.apache.hadoop.classification.InterfaceStability;
+
+import javax.xml.bind.annotation.XmlElement;
+import javax.xml.bind.annotation.XmlRootElement;
+import javax.xml.bind.annotation.adapters.XmlJavaTypeAdapter;
+
+/**
+ * Temperature of GPU
+ */
+@InterfaceAudience.Private
+@InterfaceStability.Unstable
+@XmlRootElement(name = "temperature")
+public class PerGpuTemperature {
+  private float currentGpuTemp = Float.MIN_VALUE;
+  private float maxGpuTemp = Float.MIN_VALUE;
+  private float slowThresholdGpuTemp = Float.MIN_VALUE;
+
+  /**
+   * Get current celsius GPU temperature
+   * @return temperature
+   */
+  @XmlJavaTypeAdapter(PerGpuDeviceInformation.StrToFloatBeforeSpaceAdapter.class)
+  @XmlElement(name = "gpu_temp")
+  public Float getCurrentGpuTemp() {
+    return currentGpuTemp;
+  }
+
+  public void setCurrentGpuTemp(Float currentGpuTemp) {
+    this.currentGpuTemp = currentGpuTemp;
+  }
+
+  /**
+   * Get max possible celsius GPU temperature
+   * @return temperature
+   */
+  @XmlJavaTypeAdapter(PerGpuDeviceInformation.StrToFloatBeforeSpaceAdapter.class)
+  @XmlElement(name = "gpu_temp_max_threshold")
+  public Float getMaxGpuTemp() {
+    return maxGpuTemp;
+  }
+
+  public void setMaxGpuTemp(Float maxGpuTemp) {
+    this.maxGpuTemp = maxGpuTemp;
+  }
+
+  /**
+   * Get celsius GPU temperature which could make GPU runs slower
+   * @return temperature
+   */
+  @XmlJavaTypeAdapter(PerGpuDeviceInformation.StrToFloatBeforeSpaceAdapter.class)
+  @XmlElement(name = "gpu_temp_slow_threshold")
+  public Float getSlowThresholdGpuTemp() {
+    return slowThresholdGpuTemp;
+  }
+
+  public void setSlowThresholdGpuTemp(Float slowThresholdGpuTemp) {
+    this.slowThresholdGpuTemp = slowThresholdGpuTemp;
+  }
+}
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/webapp/dao/gpu/PerGpuUtilizations.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/webapp/dao/gpu/PerGpuUtilizations.java
new file mode 100644
index 0000000..4ef218b
--- /dev/null
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/webapp/dao/gpu/PerGpuUtilizations.java
@@ -0,0 +1,50 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.yarn.server.nodemanager.webapp.dao.gpu;
+
+import org.apache.hadoop.classification.InterfaceAudience;
+import org.apache.hadoop.classification.InterfaceStability;
+
+import javax.xml.bind.annotation.XmlElement;
+import javax.xml.bind.annotation.XmlRootElement;
+import javax.xml.bind.annotation.adapters.XmlJavaTypeAdapter;
+
+/**
+ * GPU utilizations
+ */
+@InterfaceAudience.Private
+@InterfaceStability.Unstable
+@XmlRootElement(name = "utilization")
+public class PerGpuUtilizations {
+  private float overallGpuUtilization;
+
+  /**
+   * Overall percent GPU utilization
+   * @return utilization
+   */
+  @XmlJavaTypeAdapter(PerGpuDeviceInformation.StrToFloatBeforeSpaceAdapter.class)
+  @XmlElement(name = "gpu_util")
+  public Float getOverallGpuUtilization() {
+    return overallGpuUtilization;
+  }
+
+  public void setOverallGpuUtilization(Float overallGpuUtilization) {
+    this.overallGpuUtilization = overallGpuUtilization;
+  }
+}
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/NodeManagerTestBase.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/NodeManagerTestBase.java
new file mode 100644
index 0000000..13b3ee9
--- /dev/null
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/NodeManagerTestBase.java
@@ -0,0 +1,164 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.yarn.server.nodemanager;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.metrics2.lib.DefaultMetricsSystem;
+import org.apache.hadoop.net.ServerSocketUtil;
+import org.apache.hadoop.yarn.conf.YarnConfiguration;
+import org.apache.hadoop.yarn.event.Dispatcher;
+import org.apache.hadoop.yarn.exceptions.YarnException;
+import org.apache.hadoop.yarn.factories.RecordFactory;
+import org.apache.hadoop.yarn.factory.providers.RecordFactoryProvider;
+import org.apache.hadoop.yarn.server.api.ResourceTracker;
+import org.apache.hadoop.yarn.server.api.protocolrecords.NodeHeartbeatRequest;
+import org.apache.hadoop.yarn.server.api.protocolrecords.NodeHeartbeatResponse;
+import org.apache.hadoop.yarn.server.api.protocolrecords.RegisterNodeManagerRequest;
+import org.apache.hadoop.yarn.server.api.protocolrecords.RegisterNodeManagerResponse;
+import org.apache.hadoop.yarn.server.api.protocolrecords.UnRegisterNodeManagerRequest;
+import org.apache.hadoop.yarn.server.api.protocolrecords.UnRegisterNodeManagerResponse;
+import org.apache.hadoop.yarn.server.api.protocolrecords.impl.pb.NodeHeartbeatResponsePBImpl;
+import org.apache.hadoop.yarn.server.api.protocolrecords.impl.pb.RegisterNodeManagerResponsePBImpl;
+import org.apache.hadoop.yarn.server.api.protocolrecords.impl.pb.UnRegisterNodeManagerResponsePBImpl;
+import org.apache.hadoop.yarn.server.nodemanager.containermanager.ContainerManagerImpl;
+import org.apache.hadoop.yarn.server.nodemanager.metrics.NodeManagerMetrics;
+import org.junit.Assert;
+import org.junit.Before;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.io.File;
+import java.io.IOException;
+import java.net.InetAddress;
+import java.net.UnknownHostException;
+
+public class NodeManagerTestBase {
+  // temp fix until metrics system can auto-detect itself running in unit test:
+  static {
+    DefaultMetricsSystem.setMiniClusterMode(true);
+  }
+
+  protected static final Logger LOG =
+      LoggerFactory.getLogger(TestNodeStatusUpdater.class);
+  protected static final File basedir =
+      new File("target", TestNodeStatusUpdater.class.getName());
+  protected static final File nmLocalDir = new File(basedir, "nm0");
+  protected static final File tmpDir = new File(basedir, "tmpDir");
+  protected static final File remoteLogsDir = new File(basedir, "remotelogs");
+  protected static final File logsDir = new File(basedir, "logs");
+  protected static final RecordFactory recordFactory = RecordFactoryProvider
+      .getRecordFactory(null);
+  protected Configuration conf;
+
+  protected YarnConfiguration createNMConfig() throws IOException {
+    return createNMConfig(ServerSocketUtil.getPort(49170, 10));
+  }
+
+  protected YarnConfiguration createNMConfig(int port) throws IOException {
+    YarnConfiguration conf = new YarnConfiguration();
+    String localhostAddress = null;
+    try {
+      localhostAddress = InetAddress.getByName("localhost")
+          .getCanonicalHostName();
+    } catch (UnknownHostException e) {
+      Assert.fail("Unable to get localhost address: " + e.getMessage());
+    }
+    conf.setInt(YarnConfiguration.NM_PMEM_MB, 5 * 1024); // 5GB
+    conf.set(YarnConfiguration.NM_ADDRESS, localhostAddress + ":" + port);
+    conf.set(YarnConfiguration.NM_LOCALIZER_ADDRESS, localhostAddress + ":"
+        + ServerSocketUtil.getPort(49160, 10));
+    conf.set(YarnConfiguration.NM_LOG_DIRS, logsDir.getAbsolutePath());
+    conf.set(YarnConfiguration.NM_REMOTE_APP_LOG_DIR,
+        remoteLogsDir.getAbsolutePath());
+    conf.set(YarnConfiguration.NM_LOCAL_DIRS, nmLocalDir.getAbsolutePath());
+    conf.setLong(YarnConfiguration.NM_LOG_RETAIN_SECONDS, 1);
+    return conf;
+  }
+
+  public static class BaseResourceTrackerForTest implements ResourceTracker {
+    @Override
+    public RegisterNodeManagerResponse registerNodeManager(
+        RegisterNodeManagerRequest request) throws YarnException, IOException {
+      return new RegisterNodeManagerResponsePBImpl();
+    }
+
+    @Override
+    public NodeHeartbeatResponse nodeHeartbeat(NodeHeartbeatRequest request)
+        throws YarnException, IOException {
+      return new NodeHeartbeatResponsePBImpl();
+    }
+
+    @Override
+    public UnRegisterNodeManagerResponse unRegisterNodeManager(
+        UnRegisterNodeManagerRequest request)
+        throws YarnException, IOException {
+      return new UnRegisterNodeManagerResponsePBImpl();
+    }
+  }
+
+  protected static class BaseNodeStatusUpdaterForTest extends NodeStatusUpdaterImpl {
+    public ResourceTracker resourceTracker;
+    protected Context context;
+
+    public BaseNodeStatusUpdaterForTest(Context context, Dispatcher dispatcher,
+        NodeHealthCheckerService healthChecker, NodeManagerMetrics metrics,
+        ResourceTracker resourceTracker) {
+      super(context, dispatcher, healthChecker, metrics);
+      this.context = context;
+      this.resourceTracker = resourceTracker;
+    }
+    @Override
+    protected ResourceTracker getRMClient() {
+      return resourceTracker;
+    }
+
+    @Override
+    protected void stopRMProxy() {
+      return;
+    }
+  }
+
+  public class MyContainerManager extends ContainerManagerImpl {
+    public boolean signaled = false;
+
+    public MyContainerManager(Context context, ContainerExecutor exec,
+        DeletionService deletionContext, NodeStatusUpdater nodeStatusUpdater,
+        NodeManagerMetrics metrics,
+        LocalDirsHandlerService dirsHandler) {
+      super(context, exec, deletionContext, nodeStatusUpdater,
+          metrics, dirsHandler);
+    }
+
+    @Override
+    public void handle(ContainerManagerEvent event) {
+      if (event.getType() == ContainerManagerEventType.SIGNAL_CONTAINERS) {
+        signaled = true;
+      }
+    }
+  }
+
+  @Before
+  public void setUp() throws IOException {
+    nmLocalDir.mkdirs();
+    tmpDir.mkdirs();
+    logsDir.mkdirs();
+    remoteLogsDir.mkdirs();
+    conf = createNMConfig();
+  }
+}
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestDefaultContainerExecutor.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestDefaultContainerExecutor.java
index 2e9eff5..9b180c7 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestDefaultContainerExecutor.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestDefaultContainerExecutor.java
@@ -178,7 +178,7 @@ public class TestDefaultContainerExecutor {
     FileContext lfs = FileContext.getLocalFSFileContext(conf);
     DefaultContainerExecutor executor = new DefaultContainerExecutor(lfs);
     executor.setConf(conf);
-    executor.init();
+    executor.init(null);
 
     try {
       executor.createUserLocalDirs(localDirs, user);
@@ -317,7 +317,7 @@ public class TestDefaultContainerExecutor {
       Path workDir = localDir;
       Path pidFile = new Path(workDir, "pid.txt");
 
-      mockExec.init();
+      mockExec.init(null);
       mockExec.activateContainer(cId, pidFile);
       int ret = mockExec.launchContainer(new ContainerStartContext.Builder()
           .setContainer(container)
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestLinuxContainerExecutor.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestLinuxContainerExecutor.java
index d4db6b0..dcec4c3 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestLinuxContainerExecutor.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestLinuxContainerExecutor.java
@@ -628,7 +628,7 @@ public class TestLinuxContainerExecutor {
     LinuxContainerExecutor lce = new LinuxContainerExecutor();
     lce.setConf(conf);
     try {
-      lce.init();
+      lce.init(null);
     } catch (IOException e) {
       // expected if LCE isn't setup right, but not necessary for this test
     }
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestLinuxContainerExecutorWithMocks.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestLinuxContainerExecutorWithMocks.java
index 7fbc108..8bf9d2e 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestLinuxContainerExecutorWithMocks.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestLinuxContainerExecutorWithMocks.java
@@ -427,7 +427,7 @@ public class TestLinuxContainerExecutorWithMocks {
   @Test
   public void testInit() throws Exception {
 
-    mockExec.init();
+    mockExec.init(mock(Context.class));
     assertEquals(Arrays.asList("--checksetup"), readMockParams());
     
   }
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestNodeManager.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestNodeManager.java
index 9279711..b31215b 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestNodeManager.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestNodeManager.java
@@ -37,7 +37,7 @@ public class TestNodeManager {
   public static final class InvalidContainerExecutor extends
       DefaultContainerExecutor {
     @Override
-    public void init() throws IOException {
+    public void init(Context nmContext) throws IOException {
       throw new IOException("dummy executor init called");
     }
   }
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestNodeStatusUpdater.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestNodeStatusUpdater.java
index 11c3c35..8435340 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestNodeStatusUpdater.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestNodeStatusUpdater.java
@@ -20,16 +20,14 @@ package org.apache.hadoop.yarn.server.nodemanager;
 
 import static org.apache.hadoop.yarn.server.utils.YarnServerBuilderUtils.newNodeHeartbeatResponse;
 import static org.mockito.Mockito.mock;
+import static org.mockito.Mockito.verify;
 import static org.mockito.Mockito.when;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
 
 import java.io.EOFException;
 import java.io.File;
 import java.io.IOException;
 import java.net.InetAddress;
 import java.net.InetSocketAddress;
-import java.net.UnknownHostException;
 import java.nio.ByteBuffer;
 import java.util.ArrayList;
 import java.util.Collections;
@@ -81,8 +79,6 @@ import org.apache.hadoop.yarn.event.Event;
 import org.apache.hadoop.yarn.event.EventHandler;
 import org.apache.hadoop.yarn.exceptions.YarnException;
 import org.apache.hadoop.yarn.exceptions.YarnRuntimeException;
-import org.apache.hadoop.yarn.factories.RecordFactory;
-import org.apache.hadoop.yarn.factory.providers.RecordFactoryProvider;
 import org.apache.hadoop.yarn.proto.YarnServerCommonServiceProtos.NodeHeartbeatResponseProto;
 import org.apache.hadoop.yarn.security.ContainerTokenIdentifier;
 import org.apache.hadoop.yarn.server.api.ResourceTracker;
@@ -118,41 +114,14 @@ import org.junit.Before;
 import org.junit.Test;
 
 @SuppressWarnings("rawtypes")
-public class TestNodeStatusUpdater {
-
-  // temp fix until metrics system can auto-detect itself running in unit test:
-  static {
-    DefaultMetricsSystem.setMiniClusterMode(true);
-  }
-
-  static final Logger LOG =
-       LoggerFactory.getLogger(TestNodeStatusUpdater.class);
-  static final File basedir =
-      new File("target", TestNodeStatusUpdater.class.getName());
-  static final File nmLocalDir = new File(basedir, "nm0");
-  static final File tmpDir = new File(basedir, "tmpDir");
-  static final File remoteLogsDir = new File(basedir, "remotelogs");
-  static final File logsDir = new File(basedir, "logs");
-  private static final RecordFactory recordFactory = RecordFactoryProvider
-      .getRecordFactory(null);
-
+public class TestNodeStatusUpdater extends NodeManagerTestBase {
   volatile int heartBeatID = 0;
   volatile Throwable nmStartError = null;
   private final List<NodeId> registeredNodes = new ArrayList<NodeId>();
   private boolean triggered = false;
-  private Configuration conf;
   private NodeManager nm;
   private AtomicBoolean assertionFailedInThread = new AtomicBoolean(false);
 
-  @Before
-  public void setUp() throws IOException {
-    nmLocalDir.mkdirs();
-    tmpDir.mkdirs();
-    logsDir.mkdirs();
-    remoteLogsDir.mkdirs();
-    conf = createNMConfig();
-  }
-
   @After
   public void tearDown() {
     this.registeredNodes.clear();
@@ -334,29 +303,7 @@ public class TestNodeStatusUpdater {
     }
   }
 
-  private class MyContainerManager extends ContainerManagerImpl {
-    public boolean signaled = false;
-
-    public MyContainerManager(Context context, ContainerExecutor exec,
-        DeletionService deletionContext, NodeStatusUpdater nodeStatusUpdater,
-        NodeManagerMetrics metrics,
-        LocalDirsHandlerService dirsHandler) {
-      super(context, exec, deletionContext, nodeStatusUpdater,
-          metrics, dirsHandler);
-    }
-
-    @Override
-    public void handle(ContainerManagerEvent event) {
-      if (event.getType() == ContainerManagerEventType.SIGNAL_CONTAINERS) {
-        signaled = true;
-      }
-    }
-  }
-
-  private class MyNodeStatusUpdater extends NodeStatusUpdaterImpl {
-    public ResourceTracker resourceTracker;
-    private Context context;
-
+  private class MyNodeStatusUpdater extends BaseNodeStatusUpdaterForTest {
     public MyNodeStatusUpdater(Context context, Dispatcher dispatcher,
         NodeHealthCheckerService healthChecker, NodeManagerMetrics metrics) {
       this(context, dispatcher, healthChecker, metrics, false);
@@ -365,19 +312,8 @@ public class TestNodeStatusUpdater {
     public MyNodeStatusUpdater(Context context, Dispatcher dispatcher,
         NodeHealthCheckerService healthChecker, NodeManagerMetrics metrics,
         boolean signalContainer) {
-      super(context, dispatcher, healthChecker, metrics);
-      this.context = context;
-      resourceTracker = new MyResourceTracker(this.context, signalContainer);
-    }
-
-    @Override
-    protected ResourceTracker getRMClient() {
-      return resourceTracker;
-    }
-
-    @Override
-    protected void stopRMProxy() {
-      return;
+      super(context, dispatcher, healthChecker, metrics,
+          new MyResourceTracker(context, signalContainer));
     }
   }
 
@@ -1820,7 +1756,6 @@ public class TestNodeStatusUpdater {
     Assert.assertTrue("Test failed with exception(s)" + exceptions,
         exceptions.isEmpty());
   }
-
   // Add new containers info into NM context each time node heart beats.
   private class MyNMContext extends NMContext {
 
@@ -1924,31 +1859,6 @@ public class TestNodeStatusUpdater {
         this.registeredNodes.size());
   }
 
-  private YarnConfiguration createNMConfig(int port) throws IOException {
-    YarnConfiguration conf = new YarnConfiguration();
-    String localhostAddress = null;
-    try {
-      localhostAddress = InetAddress.getByName("localhost")
-          .getCanonicalHostName();
-    } catch (UnknownHostException e) {
-      Assert.fail("Unable to get localhost address: " + e.getMessage());
-    }
-    conf.setInt(YarnConfiguration.NM_PMEM_MB, 5 * 1024); // 5GB
-    conf.set(YarnConfiguration.NM_ADDRESS, localhostAddress + ":" + port);
-    conf.set(YarnConfiguration.NM_LOCALIZER_ADDRESS, localhostAddress + ":"
-        + ServerSocketUtil.getPort(49160, 10));
-    conf.set(YarnConfiguration.NM_LOG_DIRS, logsDir.getAbsolutePath());
-    conf.set(YarnConfiguration.NM_REMOTE_APP_LOG_DIR,
-      remoteLogsDir.getAbsolutePath());
-    conf.set(YarnConfiguration.NM_LOCAL_DIRS, nmLocalDir.getAbsolutePath());
-    conf.setLong(YarnConfiguration.NM_LOG_RETAIN_SECONDS, 1);
-    return conf;
-  }
-
-  private YarnConfiguration createNMConfig() throws IOException {
-    return createNMConfig(ServerSocketUtil.getPort(49170, 10));
-  }
-
   private NodeManager getNodeManager(final NodeAction nodeHeartBeatAction) {
     return new NodeManager() {
       @Override
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/amrmproxy/BaseAMRMProxyTest.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/amrmproxy/BaseAMRMProxyTest.java
index 0838f1e..3c57496 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/amrmproxy/BaseAMRMProxyTest.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/amrmproxy/BaseAMRMProxyTest.java
@@ -18,26 +18,6 @@
 
 package org.apache.hadoop.yarn.server.nodemanager.amrmproxy;
 
-import java.io.IOException;
-import java.security.PrivilegedExceptionAction;
-import java.util.ArrayList;
-import java.util.List;
-import java.util.Map;
-import java.util.Set;
-import java.util.TreeSet;
-import java.util.concurrent.Callable;
-import java.util.concurrent.ConcurrentLinkedQueue;
-import java.util.concurrent.ConcurrentMap;
-import java.util.concurrent.ExecutorCompletionService;
-import java.util.concurrent.ExecutorService;
-import java.util.concurrent.Executors;
-import java.util.concurrent.Future;
-import java.util.concurrent.TimeUnit;
-
-import org.apache.hadoop.yarn.server.nodemanager.ContainerStateTransitionListener;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.security.Credentials;
 import org.apache.hadoop.security.UserGroupInformation;
@@ -64,6 +44,7 @@ import org.apache.hadoop.yarn.server.api.protocolrecords.LogAggregationReport;
 import org.apache.hadoop.yarn.server.api.records.AppCollectorData;
 import org.apache.hadoop.yarn.server.api.records.NodeHealthStatus;
 import org.apache.hadoop.yarn.server.nodemanager.ContainerExecutor;
+import org.apache.hadoop.yarn.server.nodemanager.ContainerStateTransitionListener;
 import org.apache.hadoop.yarn.server.nodemanager.Context;
 import org.apache.hadoop.yarn.server.nodemanager.LocalDirsHandlerService;
 import org.apache.hadoop.yarn.server.nodemanager.NodeManager.NMContext;
@@ -72,17 +53,36 @@ import org.apache.hadoop.yarn.server.nodemanager.NodeStatusUpdater;
 import org.apache.hadoop.yarn.server.nodemanager.containermanager.ContainerManager;
 import org.apache.hadoop.yarn.server.nodemanager.containermanager.application.Application;
 import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.Container;
+import org.apache.hadoop.yarn.server.nodemanager.containermanager.resourceplugin.ResourcePluginManager;
 import org.apache.hadoop.yarn.server.nodemanager.recovery.NMMemoryStateStoreService;
 import org.apache.hadoop.yarn.server.nodemanager.recovery.NMStateStoreService;
-import org.apache.hadoop.yarn.server.scheduler.OpportunisticContainerAllocator;
 import org.apache.hadoop.yarn.server.nodemanager.security.NMContainerTokenSecretManager;
 import org.apache.hadoop.yarn.server.nodemanager.security.NMTokenSecretManagerInNM;
 import org.apache.hadoop.yarn.server.nodemanager.timelineservice.NMTimelinePublisher;
+import org.apache.hadoop.yarn.server.scheduler.OpportunisticContainerAllocator;
 import org.apache.hadoop.yarn.server.security.ApplicationACLsManager;
 import org.apache.hadoop.yarn.util.Records;
 import org.junit.After;
 import org.junit.Assert;
 import org.junit.Before;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.io.IOException;
+import java.security.PrivilegedExceptionAction;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+import java.util.TreeSet;
+import java.util.concurrent.Callable;
+import java.util.concurrent.ConcurrentLinkedQueue;
+import java.util.concurrent.ConcurrentMap;
+import java.util.concurrent.ExecutorCompletionService;
+import java.util.concurrent.ExecutorService;
+import java.util.concurrent.Executors;
+import java.util.concurrent.Future;
+import java.util.concurrent.TimeUnit;
 
 /**
  * Base class for all the AMRMProxyService test cases. It provides utility
@@ -773,5 +773,9 @@ public abstract class BaseAMRMProxyTest {
         getContainerStateTransitionListener() {
       return null;
     }
+
+    public ResourcePluginManager getResourcePluginManager() {
+      return null;
+    }
   }
 }
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/TestResourceHandlerModule.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/TestResourceHandlerModule.java
index e5414a5..0563694 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/TestResourceHandlerModule.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/TestResourceHandlerModule.java
@@ -22,6 +22,7 @@ package org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.resourc
 
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.yarn.conf.YarnConfiguration;
+import org.apache.hadoop.yarn.server.nodemanager.Context;
 import org.junit.Assert;
 import org.junit.Before;
 import org.junit.Test;
@@ -30,6 +31,8 @@ import org.slf4j.LoggerFactory;
 
 import java.util.List;
 
+import static org.mockito.Mockito.mock;
+
 public class TestResourceHandlerModule {
   private static final Logger LOG =
        LoggerFactory.getLogger(TestResourceHandlerModule.class);
@@ -62,7 +65,7 @@ public class TestResourceHandlerModule {
 
       //Ensure that outbound bandwidth resource handler is present in the chain
       ResourceHandlerChain resourceHandlerChain = ResourceHandlerModule
-          .getConfiguredResourceHandlerChain(networkEnabledConf);
+          .getConfiguredResourceHandlerChain(networkEnabledConf, mock(Context.class));
       List<ResourceHandler> resourceHandlers = resourceHandlerChain
           .getResourceHandlerList();
       //Exactly one resource handler in chain
@@ -88,7 +91,8 @@ public class TestResourceHandlerModule {
     Assert.assertNotNull(handler);
 
     ResourceHandlerChain resourceHandlerChain =
-        ResourceHandlerModule.getConfiguredResourceHandlerChain(diskConf);
+        ResourceHandlerModule.getConfiguredResourceHandlerChain(diskConf,
+            mock(Context.class));
     List<ResourceHandler> resourceHandlers =
         resourceHandlerChain.getResourceHandlerList();
     // Exactly one resource handler in chain
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/gpu/TestGpuResourceHandler.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/gpu/TestGpuResourceHandler.java
new file mode 100644
index 0000000..5c70f7a
--- /dev/null
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/gpu/TestGpuResourceHandler.java
@@ -0,0 +1,382 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.resources.gpu;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.util.StringUtils;
+import org.apache.hadoop.yarn.api.protocolrecords.ResourceTypes;
+import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
+import org.apache.hadoop.yarn.api.records.ApplicationId;
+import org.apache.hadoop.yarn.api.records.ContainerId;
+import org.apache.hadoop.yarn.api.records.ContainerLaunchContext;
+import org.apache.hadoop.yarn.api.records.Resource;
+import org.apache.hadoop.yarn.api.records.ResourceInformation;
+import org.apache.hadoop.yarn.conf.YarnConfiguration;
+import org.apache.hadoop.yarn.server.nodemanager.Context;
+import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.Container;
+import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.ResourceMappings;
+import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.privileged.PrivilegedOperation;
+import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.privileged.PrivilegedOperationException;
+import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.privileged.PrivilegedOperationExecutor;
+import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.resources.CGroupsHandler;
+import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.resources.ResourceHandlerException;
+import org.apache.hadoop.yarn.server.nodemanager.containermanager.resourceplugin.gpu.GpuDiscoverer;
+import org.apache.hadoop.yarn.server.nodemanager.recovery.NMStateStoreService;
+import org.apache.hadoop.yarn.util.resource.ResourceUtils;
+import org.apache.hadoop.yarn.util.resource.TestResourceUtils;
+import org.junit.Assert;
+import org.junit.Before;
+import org.junit.Test;
+
+import java.io.IOException;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.concurrent.ConcurrentHashMap;
+
+import static org.mockito.Matchers.any;
+import static org.mockito.Matchers.anyList;
+import static org.mockito.Matchers.anyString;
+import static org.mockito.Matchers.eq;
+import static org.mockito.Mockito.doThrow;
+import static org.mockito.Mockito.mock;
+import static org.mockito.Mockito.never;
+import static org.mockito.Mockito.times;
+import static org.mockito.Mockito.verify;
+import static org.mockito.Mockito.when;
+
+public class TestGpuResourceHandler {
+  private CGroupsHandler mockCGroupsHandler;
+  private PrivilegedOperationExecutor mockPrivilegedExecutor;
+  private GpuResourceHandlerImpl gpuResourceHandler;
+  private NMStateStoreService mockNMStateStore;
+  private ConcurrentHashMap<ContainerId, Container> runningContainersMap;
+
+  @Before
+  public void setup() {
+    TestResourceUtils.addNewTypesToResources(ResourceInformation.GPU_URI);
+
+    mockCGroupsHandler = mock(CGroupsHandler.class);
+    mockPrivilegedExecutor = mock(PrivilegedOperationExecutor.class);
+    mockNMStateStore = mock(NMStateStoreService.class);
+
+    Context nmctx = mock(Context.class);
+    when(nmctx.getNMStateStore()).thenReturn(mockNMStateStore);
+    runningContainersMap = new ConcurrentHashMap<>();
+    when(nmctx.getContainers()).thenReturn(runningContainersMap);
+
+    gpuResourceHandler = new GpuResourceHandlerImpl(nmctx, mockCGroupsHandler,
+        mockPrivilegedExecutor);
+  }
+
+  @Test
+  public void testBootStrap() throws Exception {
+    Configuration conf = new YarnConfiguration();
+    conf.set(YarnConfiguration.NM_GPU_ALLOWED_DEVICES, "0");
+
+    GpuDiscoverer.getInstance().initialize(conf);
+
+    gpuResourceHandler.bootstrap(conf);
+    verify(mockCGroupsHandler, times(1)).initializeCGroupController(
+        CGroupsHandler.CGroupController.DEVICES);
+  }
+
+  private static ContainerId getContainerId(int id) {
+    return ContainerId.newContainerId(ApplicationAttemptId
+        .newInstance(ApplicationId.newInstance(1234L, 1), 1), id);
+  }
+
+  private static Container mockContainerWithGpuRequest(int id,
+      int numGpuRequest) {
+    Container c = mock(Container.class);
+    when(c.getContainerId()).thenReturn(getContainerId(id));
+
+    Resource res = Resource.newInstance(1024, 1);
+    ResourceMappings resMapping = new ResourceMappings();
+
+    res.setResourceValue(ResourceInformation.GPU_URI, numGpuRequest);
+    when(c.getResource()).thenReturn(res);
+    when(c.getResourceMappings()).thenReturn(resMapping);
+    return c;
+  }
+
+  private void verifyDeniedDevices(ContainerId containerId,
+      List<Integer> deniedDevices)
+      throws ResourceHandlerException, PrivilegedOperationException {
+    verify(mockCGroupsHandler, times(1)).createCGroup(
+        CGroupsHandler.CGroupController.DEVICES, containerId.toString());
+
+    if (null != deniedDevices && !deniedDevices.isEmpty()) {
+      verify(mockPrivilegedExecutor, times(1)).executePrivilegedOperation(
+          new PrivilegedOperation(PrivilegedOperation.OperationType.GPU, Arrays
+              .asList(GpuResourceHandlerImpl.CONTAINER_ID_CLI_OPTION,
+                  containerId.toString(),
+                  GpuResourceHandlerImpl.EXCLUDED_GPUS_CLI_OPTION,
+                  StringUtils.join(",", deniedDevices))), true);
+    }
+  }
+
+  @Test
+  public void testAllocation() throws Exception {
+    Configuration conf = new YarnConfiguration();
+    conf.set(YarnConfiguration.NM_GPU_ALLOWED_DEVICES, "0,1,3,4");
+    GpuDiscoverer.getInstance().initialize(conf);
+
+    gpuResourceHandler.bootstrap(conf);
+    Assert.assertEquals(4,
+        gpuResourceHandler.getGpuAllocator().getAvailableGpus());
+
+    /* Start container 1, asks 3 containers */
+    gpuResourceHandler.preStart(mockContainerWithGpuRequest(1, 3));
+
+    // Only device=4 will be blocked.
+    verifyDeniedDevices(getContainerId(1), Arrays.asList(4));
+
+    /* Start container 2, asks 2 containers. Excepted to fail */
+    boolean failedToAllocate = false;
+    try {
+      gpuResourceHandler.preStart(mockContainerWithGpuRequest(2, 2));
+    } catch (ResourceHandlerException e) {
+      failedToAllocate = true;
+    }
+    Assert.assertTrue(failedToAllocate);
+
+    /* Start container 3, ask 1 container, succeeded */
+    gpuResourceHandler.preStart(mockContainerWithGpuRequest(3, 1));
+
+    // devices = 0/1/3 will be blocked
+    verifyDeniedDevices(getContainerId(3), Arrays.asList(0, 1, 3));
+
+    /* Start container 4, ask 0 container, succeeded */
+    gpuResourceHandler.preStart(mockContainerWithGpuRequest(4, 0));
+
+    // All devices will be blocked
+    verifyDeniedDevices(getContainerId(4), Arrays.asList(0, 1, 3, 4));
+
+    /* Release container-1, expect cgroups deleted */
+    gpuResourceHandler.postComplete(getContainerId(1));
+
+    verify(mockCGroupsHandler, times(1)).createCGroup(
+        CGroupsHandler.CGroupController.DEVICES, getContainerId(1).toString());
+    Assert.assertEquals(3,
+        gpuResourceHandler.getGpuAllocator().getAvailableGpus());
+
+    /* Release container-3, expect cgroups deleted */
+    gpuResourceHandler.postComplete(getContainerId(3));
+
+    verify(mockCGroupsHandler, times(1)).createCGroup(
+        CGroupsHandler.CGroupController.DEVICES, getContainerId(3).toString());
+    Assert.assertEquals(4,
+        gpuResourceHandler.getGpuAllocator().getAvailableGpus());
+  }
+
+  @SuppressWarnings("unchecked")
+  @Test
+  public void testAssignedGpuWillBeCleanedupWhenStoreOpFails()
+      throws Exception {
+    Configuration conf = new YarnConfiguration();
+    conf.set(YarnConfiguration.NM_GPU_ALLOWED_DEVICES, "0,1,3,4");
+    GpuDiscoverer.getInstance().initialize(conf);
+
+    gpuResourceHandler.bootstrap(conf);
+    Assert.assertEquals(4,
+        gpuResourceHandler.getGpuAllocator().getAvailableGpus());
+
+    doThrow(new IOException("Exception ...")).when(mockNMStateStore)
+        .storeAssignedResources(
+        any(ContainerId.class), anyString(), anyList());
+
+    boolean exception = false;
+    /* Start container 1, asks 3 containers */
+    try {
+      gpuResourceHandler.preStart(mockContainerWithGpuRequest(1, 3));
+    } catch (ResourceHandlerException e) {
+      exception = true;
+    }
+
+    Assert.assertTrue("preStart should throw exception", exception);
+
+    // After preStart, we still have 4 available GPU since the store op fails.
+    Assert.assertEquals(4,
+        gpuResourceHandler.getGpuAllocator().getAvailableGpus());
+  }
+
+  @Test
+  public void testAllocationWithoutAllowedGpus() throws Exception {
+    Configuration conf = new YarnConfiguration();
+    conf.set(YarnConfiguration.NM_GPU_ALLOWED_DEVICES, " ");
+    GpuDiscoverer.getInstance().initialize(conf);
+
+    gpuResourceHandler.bootstrap(conf);
+    Assert.assertEquals(0,
+        gpuResourceHandler.getGpuAllocator().getAvailableGpus());
+
+    /* Start container 1, asks 0 containers */
+    gpuResourceHandler.preStart(mockContainerWithGpuRequest(1, 0));
+    verifyDeniedDevices(getContainerId(1), Collections.emptyList());
+
+    /* Start container 2, asks 1 containers. Excepted to fail */
+    boolean failedToAllocate = false;
+    try {
+      gpuResourceHandler.preStart(mockContainerWithGpuRequest(2, 1));
+    } catch (ResourceHandlerException e) {
+      failedToAllocate = true;
+    }
+    Assert.assertTrue(failedToAllocate);
+
+    /* Release container 1, expect cgroups deleted */
+    gpuResourceHandler.postComplete(getContainerId(1));
+
+    verify(mockCGroupsHandler, times(1)).createCGroup(
+        CGroupsHandler.CGroupController.DEVICES, getContainerId(1).toString());
+    Assert.assertEquals(0,
+        gpuResourceHandler.getGpuAllocator().getAvailableGpus());
+  }
+
+  @Test
+  public void testAllocationStored() throws Exception {
+    Configuration conf = new YarnConfiguration();
+    conf.set(YarnConfiguration.NM_GPU_ALLOWED_DEVICES, "0,1,3,4");
+    GpuDiscoverer.getInstance().initialize(conf);
+
+    gpuResourceHandler.bootstrap(conf);
+    Assert.assertEquals(4,
+        gpuResourceHandler.getGpuAllocator().getAvailableGpus());
+
+    /* Start container 1, asks 3 containers */
+    Container container = mockContainerWithGpuRequest(1, 3);
+    gpuResourceHandler.preStart(container);
+
+    verify(mockNMStateStore).storeAssignedResources(getContainerId(1),
+        ResourceInformation.GPU_URI,
+        Arrays.asList("0", "1", "3"));
+
+    Assert.assertEquals(3, container.getResourceMappings()
+        .getAssignedResources(ResourceInformation.GPU_URI).size());
+
+    // Only device=4 will be blocked.
+    verifyDeniedDevices(getContainerId(1), Arrays.asList(4));
+
+    /* Start container 2, ask 0 container, succeeded */
+    container = mockContainerWithGpuRequest(2, 0);
+    gpuResourceHandler.preStart(container);
+
+    verifyDeniedDevices(getContainerId(2), Arrays.asList(0, 1, 3, 4));
+    Assert.assertEquals(0, container.getResourceMappings()
+        .getAssignedResources(ResourceInformation.GPU_URI).size());
+
+    // Store assigned resource will not be invoked.
+    verify(mockNMStateStore, never()).storeAssignedResources(
+        eq(getContainerId(2)), eq(ResourceInformation.GPU_URI), anyList());
+  }
+
+  @Test
+  public void testRecoverResourceAllocation() throws Exception {
+    Configuration conf = new YarnConfiguration();
+    conf.set(YarnConfiguration.NM_GPU_ALLOWED_DEVICES, "0,1,3,4");
+    GpuDiscoverer.getInstance().initialize(conf);
+
+    gpuResourceHandler.bootstrap(conf);
+    Assert.assertEquals(4,
+        gpuResourceHandler.getGpuAllocator().getAvailableGpus());
+
+    Container nmContainer = mock(Container.class);
+    ResourceMappings rmap = new ResourceMappings();
+    ResourceMappings.AssignedResources ar =
+        new ResourceMappings.AssignedResources();
+    ar.updateAssignedResources(Arrays.asList("1", "3"));
+    rmap.addAssignedResources(ResourceInformation.GPU_URI, ar);
+    when(nmContainer.getResourceMappings()).thenReturn(rmap);
+
+    runningContainersMap.put(getContainerId(1), nmContainer);
+
+    // TEST CASE
+    // Reacquire container restore state of GPU Resource Allocator.
+    gpuResourceHandler.reacquireContainer(getContainerId(1));
+
+    Map<Integer, ContainerId> deviceAllocationMapping =
+        gpuResourceHandler.getGpuAllocator().getDeviceAllocationMapping();
+    Assert.assertEquals(2, deviceAllocationMapping.size());
+    Assert.assertTrue(
+        deviceAllocationMapping.keySet().containsAll(Arrays.asList(1, 3)));
+    Assert.assertEquals(deviceAllocationMapping.get(1), getContainerId(1));
+
+    // TEST CASE
+    // Try to reacquire a container but requested device is not in allowed list.
+    nmContainer = mock(Container.class);
+    rmap = new ResourceMappings();
+    ar = new ResourceMappings.AssignedResources();
+    // id=5 is not in allowed list.
+    ar.updateAssignedResources(Arrays.asList("4", "5"));
+    rmap.addAssignedResources(ResourceInformation.GPU_URI, ar);
+    when(nmContainer.getResourceMappings()).thenReturn(rmap);
+
+    runningContainersMap.put(getContainerId(2), nmContainer);
+
+    boolean caughtException = false;
+    try {
+      gpuResourceHandler.reacquireContainer(getContainerId(1));
+    } catch (ResourceHandlerException e) {
+      caughtException = true;
+    }
+    Assert.assertTrue(
+        "Should fail since requested device Id is not in allowed list",
+        caughtException);
+
+    // Make sure internal state not changed.
+    deviceAllocationMapping =
+        gpuResourceHandler.getGpuAllocator().getDeviceAllocationMapping();
+    Assert.assertEquals(2, deviceAllocationMapping.size());
+    Assert.assertTrue(
+        deviceAllocationMapping.keySet().containsAll(Arrays.asList(1, 3)));
+    Assert.assertEquals(deviceAllocationMapping.get(1), getContainerId(1));
+
+    // TEST CASE
+    // Try to reacquire a container but requested device is already assigned.
+    nmContainer = mock(Container.class);
+    rmap = new ResourceMappings();
+    ar = new ResourceMappings.AssignedResources();
+    // id=3 is already assigned
+    ar.updateAssignedResources(Arrays.asList("4", "3"));
+    rmap.addAssignedResources("gpu", ar);
+    when(nmContainer.getResourceMappings()).thenReturn(rmap);
+
+    runningContainersMap.put(getContainerId(2), nmContainer);
+
+    caughtException = false;
+    try {
+      gpuResourceHandler.reacquireContainer(getContainerId(1));
+    } catch (ResourceHandlerException e) {
+      caughtException = true;
+    }
+    Assert.assertTrue(
+        "Should fail since requested device Id is not in allowed list",
+        caughtException);
+
+    // Make sure internal state not changed.
+    deviceAllocationMapping =
+        gpuResourceHandler.getGpuAllocator().getDeviceAllocationMapping();
+    Assert.assertEquals(2, deviceAllocationMapping.size());
+    Assert.assertTrue(
+        deviceAllocationMapping.keySet().containsAll(Arrays.asList(1, 3)));
+    Assert.assertEquals(deviceAllocationMapping.get(1), getContainerId(1));
+  }
+}
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/TestContainersMonitorResourceChange.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/TestContainersMonitorResourceChange.java
index e21eea0..2cca277 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/TestContainersMonitorResourceChange.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/TestContainersMonitorResourceChange.java
@@ -73,7 +73,7 @@ public class TestContainersMonitorResourceChange {
 
   private static class MockExecutor extends ContainerExecutor {
     @Override
-    public void init() throws IOException {
+    public void init(Context nmContext) throws IOException {
     }
     @Override
     public void startLocalizer(LocalizerStartContext ctx)
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/TestResourcePluginManager.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/TestResourcePluginManager.java
new file mode 100644
index 0000000..bcadf76
--- /dev/null
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/TestResourcePluginManager.java
@@ -0,0 +1,261 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.yarn.server.nodemanager.containermanager.resourceplugin;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.service.ServiceOperations;
+import org.apache.hadoop.yarn.api.records.ContainerId;
+import org.apache.hadoop.yarn.api.records.Resource;
+import org.apache.hadoop.yarn.conf.YarnConfiguration;
+import org.apache.hadoop.yarn.event.Dispatcher;
+import org.apache.hadoop.yarn.server.nodemanager.ContainerExecutor;
+import org.apache.hadoop.yarn.server.nodemanager.Context;
+import org.apache.hadoop.yarn.server.nodemanager.DeletionService;
+import org.apache.hadoop.yarn.server.nodemanager.LinuxContainerExecutor;
+import org.apache.hadoop.yarn.server.nodemanager.LocalDirsHandlerService;
+import org.apache.hadoop.yarn.server.nodemanager.NodeHealthCheckerService;
+import org.apache.hadoop.yarn.server.nodemanager.NodeManager;
+import org.apache.hadoop.yarn.server.nodemanager.NodeManagerTestBase;
+import org.apache.hadoop.yarn.server.nodemanager.NodeStatusUpdater;
+import org.apache.hadoop.yarn.server.nodemanager.containermanager.ContainerManagerImpl;
+import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.Container;
+import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.privileged.PrivilegedOperation;
+import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.privileged.PrivilegedOperationExecutor;
+import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.resources.CGroupsHandler;
+import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.resources.ResourceHandler;
+import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.resources.ResourceHandlerChain;
+import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.resources.ResourceHandlerException;
+import org.apache.hadoop.yarn.server.nodemanager.containermanager.resourceplugin.NodeResourceUpdaterPlugin;
+import org.apache.hadoop.yarn.server.nodemanager.containermanager.resourceplugin.ResourcePlugin;
+import org.apache.hadoop.yarn.server.nodemanager.containermanager.resourceplugin.ResourcePluginManager;
+import org.apache.hadoop.yarn.server.security.ApplicationACLsManager;
+import org.junit.After;
+import org.junit.Assert;
+import org.junit.Test;
+
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+import static org.mockito.Matchers.any;
+import static org.mockito.Mockito.mock;
+import static org.mockito.Mockito.times;
+import static org.mockito.Mockito.verify;
+import static org.mockito.Mockito.when;
+
+public class TestResourcePluginManager extends NodeManagerTestBase {
+  private NodeManager nm;
+
+  ResourcePluginManager stubResourcePluginmanager() {
+    // Stub ResourcePluginManager
+    final ResourcePluginManager rpm = mock(ResourcePluginManager.class);
+    Map<String, ResourcePlugin> plugins = new HashMap<>();
+
+    // First resource plugin
+    ResourcePlugin resourcePlugin = mock(ResourcePlugin.class);
+    NodeResourceUpdaterPlugin nodeResourceUpdaterPlugin = mock(
+        NodeResourceUpdaterPlugin.class);
+    when(resourcePlugin.getNodeResourceHandlerInstance()).thenReturn(
+        nodeResourceUpdaterPlugin);
+    plugins.put("resource1", resourcePlugin);
+
+    // Second resource plugin
+    resourcePlugin = mock(ResourcePlugin.class);
+    when(resourcePlugin.createResourceHandler(any(Context.class), any(
+        CGroupsHandler.class), any(PrivilegedOperationExecutor.class)))
+        .thenReturn(new CustomizedResourceHandler());
+    plugins.put("resource2", resourcePlugin);
+    when(rpm.getNameToPlugins()).thenReturn(plugins);
+    return rpm;
+  }
+
+  @After
+  public void tearDown() {
+    if (nm != null) {
+      try {
+        ServiceOperations.stop(nm);
+      } catch (Throwable t) {
+        // ignore
+      }
+    }
+  }
+
+  private class CustomizedResourceHandler implements ResourceHandler {
+
+    @Override
+    public List<PrivilegedOperation> bootstrap(Configuration configuration)
+        throws ResourceHandlerException {
+      return null;
+    }
+
+    @Override
+    public List<PrivilegedOperation> preStart(Container container)
+        throws ResourceHandlerException {
+      return null;
+    }
+
+    @Override
+    public List<PrivilegedOperation> reacquireContainer(ContainerId containerId)
+        throws ResourceHandlerException {
+      return null;
+    }
+
+    @Override
+    public List<PrivilegedOperation> postComplete(ContainerId containerId)
+        throws ResourceHandlerException {
+      return null;
+    }
+
+    @Override
+    public List<PrivilegedOperation> teardown()
+        throws ResourceHandlerException {
+      return null;
+    }
+  }
+
+  private class MyMockNM extends NodeManager {
+    private final ResourcePluginManager rpm;
+
+    public MyMockNM(ResourcePluginManager rpm) {
+      this.rpm = rpm;
+    }
+
+    @Override
+    protected NodeStatusUpdater createNodeStatusUpdater(Context context,
+        Dispatcher dispatcher, NodeHealthCheckerService healthChecker) {
+      ((NodeManager.NMContext)context).setResourcePluginManager(rpm);
+      return new BaseNodeStatusUpdaterForTest(context, dispatcher, healthChecker,
+          metrics, new BaseResourceTrackerForTest());
+    }
+
+    @Override
+    protected ContainerManagerImpl createContainerManager(Context context,
+        ContainerExecutor exec, DeletionService del,
+        NodeStatusUpdater nodeStatusUpdater,
+        ApplicationACLsManager aclsManager,
+        LocalDirsHandlerService diskhandler) {
+      return new MyContainerManager(context, exec, del, nodeStatusUpdater,
+      metrics, diskhandler);
+    }
+
+    @Override
+    protected ResourcePluginManager createResourcePluginManager() {
+      return rpm;
+    }
+  }
+
+  public class MyLCE extends LinuxContainerExecutor {
+    private PrivilegedOperationExecutor poe = mock(PrivilegedOperationExecutor.class);
+
+    @Override
+    protected PrivilegedOperationExecutor getPrivilegedOperationExecutor() {
+      return poe;
+    }
+  }
+
+  /*
+   * Make sure ResourcePluginManager is initialized during NM start up.
+   */
+  @Test(timeout = 30000)
+  public void testResourcePluginManagerInitialization() throws Exception {
+    final ResourcePluginManager rpm = stubResourcePluginmanager();
+    nm = new MyMockNM(rpm);
+
+    YarnConfiguration conf = createNMConfig();
+    nm.init(conf);
+    verify(rpm, times(1)).initialize(
+        any(Context.class));
+  }
+
+  /*
+   * Make sure ResourcePluginManager is invoked during NM update.
+   */
+  @Test(timeout = 30000)
+  public void testNodeStatusUpdaterWithResourcePluginsEnabled() throws Exception {
+    final ResourcePluginManager rpm = stubResourcePluginmanager();
+
+    nm = new MyMockNM(rpm);
+
+    YarnConfiguration conf = createNMConfig();
+    nm.init(conf);
+    nm.start();
+
+    NodeResourceUpdaterPlugin nodeResourceUpdaterPlugin =
+        rpm.getNameToPlugins().get("resource1")
+            .getNodeResourceHandlerInstance();
+
+    verify(nodeResourceUpdaterPlugin, times(1)).updateConfiguredResource(
+        any(Resource.class));
+  }
+
+  /*
+   * Make sure ResourcePluginManager is used to initialize ResourceHandlerChain
+   */
+  @Test(timeout = 30000)
+  public void testLinuxContainerExecutorWithResourcePluginsEnabled() throws Exception {
+    final ResourcePluginManager rpm = stubResourcePluginmanager();
+    final LinuxContainerExecutor lce = new MyLCE();
+
+    nm = new NodeManager() {
+      @Override
+      protected NodeStatusUpdater createNodeStatusUpdater(Context context,
+          Dispatcher dispatcher, NodeHealthCheckerService healthChecker) {
+        ((NMContext)context).setResourcePluginManager(rpm);
+        return new BaseNodeStatusUpdaterForTest(context, dispatcher, healthChecker,
+            metrics, new BaseResourceTrackerForTest());
+      }
+
+      @Override
+      protected ContainerManagerImpl createContainerManager(Context context,
+          ContainerExecutor exec, DeletionService del,
+          NodeStatusUpdater nodeStatusUpdater,
+          ApplicationACLsManager aclsManager,
+          LocalDirsHandlerService diskhandler) {
+        return new MyContainerManager(context, exec, del, nodeStatusUpdater,
+            metrics, diskhandler);
+      }
+
+      @Override
+      protected ContainerExecutor createContainerExecutor(Configuration conf) {
+        ((NMContext)this.getNMContext()).setResourcePluginManager(rpm);
+        lce.setConf(conf);
+        return lce;
+      }
+    };
+
+    YarnConfiguration conf = createNMConfig();
+
+    nm.init(conf);
+    nm.start();
+
+    ResourceHandler handler = lce.getResourceHandler();
+    Assert.assertNotNull(handler);
+    Assert.assertTrue(handler instanceof ResourceHandlerChain);
+
+    boolean newHandlerAdded = false;
+    for (ResourceHandler h : ((ResourceHandlerChain) handler)
+        .getResourceHandlerList()) {
+      if (h instanceof CustomizedResourceHandler) {
+        newHandlerAdded = true;
+        break;
+      }
+    }
+    Assert.assertTrue("New ResourceHandler should be added", newHandlerAdded);
+  }
+}
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/gpu/TestGpuDiscoverer.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/gpu/TestGpuDiscoverer.java
new file mode 100644
index 0000000..83bace2
--- /dev/null
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/gpu/TestGpuDiscoverer.java
@@ -0,0 +1,123 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.yarn.server.nodemanager.containermanager.resourceplugin.gpu;
+
+import org.apache.commons.io.FileUtils;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.yarn.conf.YarnConfiguration;
+import org.apache.hadoop.yarn.exceptions.YarnException;
+import org.apache.hadoop.yarn.server.nodemanager.webapp.dao.gpu.GpuDeviceInformation;
+import org.junit.Assert;
+import org.junit.Assume;
+import org.junit.Before;
+import org.junit.Test;
+
+import java.io.File;
+import java.io.FileOutputStream;
+import java.io.IOException;
+import java.util.List;
+
+public class TestGpuDiscoverer {
+  private String getTestParentFolder() {
+    File f = new File("target/temp/" + TestGpuDiscoverer.class.getName());
+    return f.getAbsolutePath();
+  }
+
+  private void touchFile(File f) throws IOException {
+    new FileOutputStream(f).close();
+  }
+
+  @Before
+  public void before() throws IOException {
+    String folder = getTestParentFolder();
+    File f = new File(folder);
+    FileUtils.deleteDirectory(f);
+    f.mkdirs();
+  }
+
+  @Test
+  public void testLinuxGpuResourceDiscoverPluginConfig() throws Exception {
+    // Only run this on demand.
+    Assume.assumeTrue(Boolean.valueOf(
+        System.getProperty("RunLinuxGpuResourceDiscoverPluginConfigTest")));
+
+    // test case 1, check default setting.
+    Configuration conf = new Configuration(false);
+    GpuDiscoverer plugin = new GpuDiscoverer();
+    plugin.initialize(conf);
+    Assert.assertEquals(GpuDiscoverer.DEFAULT_BINARY_NAME,
+        plugin.getPathOfGpuBinary());
+    Assert.assertNotNull(plugin.getEnvironmentToRunCommand().get("PATH"));
+    Assert.assertTrue(
+        plugin.getEnvironmentToRunCommand().get("PATH").contains("nvidia"));
+
+    // test case 2, check mandatory set path.
+    File fakeBinary = new File(getTestParentFolder(),
+        GpuDiscoverer.DEFAULT_BINARY_NAME);
+    touchFile(fakeBinary);
+    conf.set(YarnConfiguration.NM_GPU_PATH_TO_EXEC, getTestParentFolder());
+    plugin = new GpuDiscoverer();
+    plugin.initialize(conf);
+    Assert.assertEquals(fakeBinary.getAbsolutePath(),
+        plugin.getPathOfGpuBinary());
+    Assert.assertNull(plugin.getEnvironmentToRunCommand().get("PATH"));
+
+    // test case 3, check mandatory set path, but binary doesn't exist so default
+    // path will be used.
+    fakeBinary.delete();
+    plugin = new GpuDiscoverer();
+    plugin.initialize(conf);
+    Assert.assertEquals(GpuDiscoverer.DEFAULT_BINARY_NAME,
+        plugin.getPathOfGpuBinary());
+    Assert.assertTrue(
+        plugin.getEnvironmentToRunCommand().get("PATH").contains("nvidia"));
+  }
+
+  @Test
+  public void testGpuDiscover() throws YarnException {
+    // Since this is more of a performance unit test, only run if
+    // RunUserLimitThroughput is set (-DRunUserLimitThroughput=true)
+    Assume.assumeTrue(
+        Boolean.valueOf(System.getProperty("runGpuDiscoverUnitTest")));
+    Configuration conf = new Configuration(false);
+    GpuDiscoverer plugin = new GpuDiscoverer();
+    plugin.initialize(conf);
+    GpuDeviceInformation info = plugin.getGpuDeviceInformation();
+
+    Assert.assertTrue(info.getGpus().size() > 0);
+    Assert.assertEquals(plugin.getMinorNumbersOfGpusUsableByYarn().size(),
+        info.getGpus().size());
+  }
+
+  @Test
+  public void getNumberOfUsableGpusFromConfig() throws YarnException {
+    Configuration conf = new Configuration(false);
+    conf.set(YarnConfiguration.NM_GPU_ALLOWED_DEVICES, "0,1,2,4");
+    GpuDiscoverer plugin = new GpuDiscoverer();
+    plugin.initialize(conf);
+
+    List<Integer> minorNumbers = plugin.getMinorNumbersOfGpusUsableByYarn();
+    Assert.assertEquals(4, minorNumbers.size());
+
+    Assert.assertTrue(0 == minorNumbers.get(0));
+    Assert.assertTrue(1 == minorNumbers.get(1));
+    Assert.assertTrue(2 == minorNumbers.get(2));
+    Assert.assertTrue(4 == minorNumbers.get(3));
+  }
+}
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/webapp/dao/gpu/TestGpuDeviceInformationParser.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/webapp/dao/gpu/TestGpuDeviceInformationParser.java
new file mode 100644
index 0000000..e22597d
--- /dev/null
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/webapp/dao/gpu/TestGpuDeviceInformationParser.java
@@ -0,0 +1,50 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.yarn.server.nodemanager.webapp.dao.gpu;
+
+import org.apache.commons.io.FileUtils;
+import org.apache.hadoop.yarn.exceptions.YarnException;
+import org.junit.Assert;
+import org.junit.Test;
+
+import java.io.File;
+import java.io.IOException;
+
+public class TestGpuDeviceInformationParser {
+  @Test
+  public void testParse() throws IOException, YarnException {
+    File f = new File("src/test/resources/nvidia-smi-sample-xml-output");
+    String s = FileUtils.readFileToString(f, "UTF-8");
+
+    GpuDeviceInformationParser parser = new GpuDeviceInformationParser();
+
+    GpuDeviceInformation info = parser.parseXml(s);
+    Assert.assertEquals("375.66", info.getDriverVersion());
+    Assert.assertEquals(2, info.getGpus().size());
+    PerGpuDeviceInformation gpu1 = info.getGpus().get(1);
+    Assert.assertEquals("Tesla P100-PCIE-12GB", gpu1.getProductName());
+    Assert.assertEquals(16384, gpu1.getGpuMemoryUsage().getTotalMemoryMiB());
+    Assert.assertEquals(10.3f,
+        gpu1.getGpuUtilizations().getOverallGpuUtilization(), 1e-6);
+    Assert.assertEquals(34f, gpu1.getTemperature().getCurrentGpuTemp(), 1e-6);
+    Assert.assertEquals(85f, gpu1.getTemperature().getMaxGpuTemp(), 1e-6);
+    Assert.assertEquals(82f, gpu1.getTemperature().getSlowThresholdGpuTemp(),
+        1e-6);
+  }
+}
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/resources/nvidia-smi-sample-xml-output b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/resources/nvidia-smi-sample-xml-output
new file mode 100644
index 0000000..5ccb722
--- /dev/null
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/resources/nvidia-smi-sample-xml-output
@@ -0,0 +1,547 @@
+<?xml version="1.0" ?>
+<!DOCTYPE nvidia_smi_log SYSTEM "nvsmi_device_v8.dtd">
+
+<!--
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+-->
+
+<nvidia_smi_log>
+	<timestamp>Wed Sep  6 21:52:51 2017</timestamp>
+	<driver_version>375.66</driver_version>
+	<attached_gpus>2</attached_gpus>
+	<gpu id="0000:04:00.0">
+		<product_name>Tesla P100-PCIE-12GB</product_name>
+		<product_brand>Tesla</product_brand>
+		<display_mode>Disabled</display_mode>
+		<display_active>Disabled</display_active>
+		<persistence_mode>Disabled</persistence_mode>
+		<accounting_mode>Disabled</accounting_mode>
+		<accounting_mode_buffer_size>1920</accounting_mode_buffer_size>
+		<driver_model>
+			<current_dm>N/A</current_dm>
+			<pending_dm>N/A</pending_dm>
+		</driver_model>
+		<serial>0320717030197</serial>
+		<uuid>GPU-28604e81-21ec-cc48-6759-bf2648b22e16</uuid>
+		<minor_number>0</minor_number>
+		<vbios_version>86.00.3A.00.02</vbios_version>
+		<multigpu_board>No</multigpu_board>
+		<board_id>0x400</board_id>
+		<gpu_part_number>900-2H400-0110-030</gpu_part_number>
+		<inforom_version>
+			<img_version>H400.0202.00.01</img_version>
+			<oem_object>1.1</oem_object>
+			<ecc_object>4.1</ecc_object>
+			<pwr_object>N/A</pwr_object>
+		</inforom_version>
+		<gpu_operation_mode>
+			<current_gom>N/A</current_gom>
+			<pending_gom>N/A</pending_gom>
+		</gpu_operation_mode>
+		<gpu_virtualization_mode>
+			<virtualization_mode>None</virtualization_mode>
+		</gpu_virtualization_mode>
+		<pci>
+			<pci_bus>04</pci_bus>
+			<pci_device>00</pci_device>
+			<pci_domain>0000</pci_domain>
+			<pci_device_id>15F710DE</pci_device_id>
+			<pci_bus_id>0000:04:00.0</pci_bus_id>
+			<pci_sub_system_id>11DA10DE</pci_sub_system_id>
+			<pci_gpu_link_info>
+				<pcie_gen>
+					<max_link_gen>3</max_link_gen>
+					<current_link_gen>3</current_link_gen>
+				</pcie_gen>
+				<link_widths>
+					<max_link_width>16x</max_link_width>
+					<current_link_width>16x</current_link_width>
+				</link_widths>
+			</pci_gpu_link_info>
+			<pci_bridge_chip>
+				<bridge_chip_type>N/A</bridge_chip_type>
+				<bridge_chip_fw>N/A</bridge_chip_fw>
+			</pci_bridge_chip>
+			<replay_counter>0</replay_counter>
+			<tx_util>0 KB/s</tx_util>
+			<rx_util>0 KB/s</rx_util>
+		</pci>
+		<fan_speed>N/A</fan_speed>
+		<performance_state>P0</performance_state>
+		<clocks_throttle_reasons>
+			<clocks_throttle_reason_gpu_idle>Active</clocks_throttle_reason_gpu_idle>
+			<clocks_throttle_reason_applications_clocks_setting>Not Active</clocks_throttle_reason_applications_clocks_setting>
+			<clocks_throttle_reason_sw_power_cap>Not Active</clocks_throttle_reason_sw_power_cap>
+			<clocks_throttle_reason_hw_slowdown>Not Active</clocks_throttle_reason_hw_slowdown>
+			<clocks_throttle_reason_sync_boost>Not Active</clocks_throttle_reason_sync_boost>
+			<clocks_throttle_reason_unknown>Not Active</clocks_throttle_reason_unknown>
+		</clocks_throttle_reasons>
+		<fb_memory_usage>
+			<total>12193 MiB</total>
+			<used>0 MiB</used>
+			<free>12193 MiB</free>
+		</fb_memory_usage>
+		<bar1_memory_usage>
+			<total>16384 MiB</total>
+			<used>2 MiB</used>
+			<free>16382 MiB</free>
+		</bar1_memory_usage>
+		<compute_mode>Default</compute_mode>
+		<utilization>
+			<gpu_util>0 %</gpu_util>
+			<memory_util>0 %</memory_util>
+			<encoder_util>0 %</encoder_util>
+			<decoder_util>0 %</decoder_util>
+		</utilization>
+		<encoder_stats>
+			<session_count>0</session_count>
+			<average_fps>0</average_fps>
+			<average_latency>0 ms</average_latency>
+		</encoder_stats>
+		<ecc_mode>
+			<current_ecc>Enabled</current_ecc>
+			<pending_ecc>Enabled</pending_ecc>
+		</ecc_mode>
+		<ecc_errors>
+			<volatile>
+				<single_bit>
+					<device_memory>0</device_memory>
+					<register_file>0</register_file>
+					<l1_cache>N/A</l1_cache>
+					<l2_cache>0</l2_cache>
+					<texture_memory>0</texture_memory>
+					<texture_shm>0</texture_shm>
+					<total>0</total>
+				</single_bit>
+				<double_bit>
+					<device_memory>0</device_memory>
+					<register_file>0</register_file>
+					<l1_cache>N/A</l1_cache>
+					<l2_cache>0</l2_cache>
+					<texture_memory>0</texture_memory>
+					<texture_shm>0</texture_shm>
+					<total>0</total>
+				</double_bit>
+			</volatile>
+			<aggregate>
+				<single_bit>
+					<device_memory>0</device_memory>
+					<register_file>0</register_file>
+					<l1_cache>N/A</l1_cache>
+					<l2_cache>0</l2_cache>
+					<texture_memory>0</texture_memory>
+					<texture_shm>0</texture_shm>
+					<total>0</total>
+				</single_bit>
+				<double_bit>
+					<device_memory>0</device_memory>
+					<register_file>0</register_file>
+					<l1_cache>N/A</l1_cache>
+					<l2_cache>0</l2_cache>
+					<texture_memory>0</texture_memory>
+					<texture_shm>0</texture_shm>
+					<total>0</total>
+				</double_bit>
+			</aggregate>
+		</ecc_errors>
+		<retired_pages>
+			<multiple_single_bit_retirement>
+				<retired_count>0</retired_count>
+				<retired_page_addresses>
+				</retired_page_addresses>
+			</multiple_single_bit_retirement>
+			<double_bit_retirement>
+				<retired_count>0</retired_count>
+				<retired_page_addresses>
+				</retired_page_addresses>
+			</double_bit_retirement>
+			<pending_retirement>No</pending_retirement>
+		</retired_pages>
+		<temperature>
+			<gpu_temp>31 C</gpu_temp>
+			<gpu_temp_max_threshold>85 C</gpu_temp_max_threshold>
+			<gpu_temp_slow_threshold>82 C</gpu_temp_slow_threshold>
+		</temperature>
+		<power_readings>
+			<power_state>P0</power_state>
+			<power_management>Supported</power_management>
+			<power_draw>24.84 W</power_draw>
+			<power_limit>250.00 W</power_limit>
+			<default_power_limit>250.00 W</default_power_limit>
+			<enforced_power_limit>250.00 W</enforced_power_limit>
+			<min_power_limit>125.00 W</min_power_limit>
+			<max_power_limit>250.00 W</max_power_limit>
+		</power_readings>
+		<clocks>
+			<graphics_clock>405 MHz</graphics_clock>
+			<sm_clock>405 MHz</sm_clock>
+			<mem_clock>715 MHz</mem_clock>
+			<video_clock>835 MHz</video_clock>
+		</clocks>
+		<applications_clocks>
+			<graphics_clock>1189 MHz</graphics_clock>
+			<mem_clock>715 MHz</mem_clock>
+		</applications_clocks>
+		<default_applications_clocks>
+			<graphics_clock>1189 MHz</graphics_clock>
+			<mem_clock>715 MHz</mem_clock>
+		</default_applications_clocks>
+		<max_clocks>
+			<graphics_clock>1328 MHz</graphics_clock>
+			<sm_clock>1328 MHz</sm_clock>
+			<mem_clock>715 MHz</mem_clock>
+			<video_clock>1328 MHz</video_clock>
+		</max_clocks>
+		<clock_policy>
+			<auto_boost>N/A</auto_boost>
+			<auto_boost_default>N/A</auto_boost_default>
+		</clock_policy>
+		<supported_clocks>
+			<supported_mem_clock>
+				<value>715 MHz</value>
+				<supported_graphics_clock>1328 MHz</supported_graphics_clock>
+				<supported_graphics_clock>1316 MHz</supported_graphics_clock>
+				<supported_graphics_clock>1303 MHz</supported_graphics_clock>
+				<supported_graphics_clock>1290 MHz</supported_graphics_clock>
+				<supported_graphics_clock>1278 MHz</supported_graphics_clock>
+				<supported_graphics_clock>1265 MHz</supported_graphics_clock>
+				<supported_graphics_clock>1252 MHz</supported_graphics_clock>
+				<supported_graphics_clock>1240 MHz</supported_graphics_clock>
+				<supported_graphics_clock>1227 MHz</supported_graphics_clock>
+				<supported_graphics_clock>1215 MHz</supported_graphics_clock>
+				<supported_graphics_clock>1202 MHz</supported_graphics_clock>
+				<supported_graphics_clock>1189 MHz</supported_graphics_clock>
+				<supported_graphics_clock>1177 MHz</supported_graphics_clock>
+				<supported_graphics_clock>1164 MHz</supported_graphics_clock>
+				<supported_graphics_clock>1151 MHz</supported_graphics_clock>
+				<supported_graphics_clock>1139 MHz</supported_graphics_clock>
+				<supported_graphics_clock>1126 MHz</supported_graphics_clock>
+				<supported_graphics_clock>1113 MHz</supported_graphics_clock>
+				<supported_graphics_clock>1101 MHz</supported_graphics_clock>
+				<supported_graphics_clock>1088 MHz</supported_graphics_clock>
+				<supported_graphics_clock>1075 MHz</supported_graphics_clock>
+				<supported_graphics_clock>1063 MHz</supported_graphics_clock>
+				<supported_graphics_clock>1050 MHz</supported_graphics_clock>
+				<supported_graphics_clock>1037 MHz</supported_graphics_clock>
+				<supported_graphics_clock>1025 MHz</supported_graphics_clock>
+				<supported_graphics_clock>1012 MHz</supported_graphics_clock>
+				<supported_graphics_clock>999 MHz</supported_graphics_clock>
+				<supported_graphics_clock>987 MHz</supported_graphics_clock>
+				<supported_graphics_clock>974 MHz</supported_graphics_clock>
+				<supported_graphics_clock>961 MHz</supported_graphics_clock>
+				<supported_graphics_clock>949 MHz</supported_graphics_clock>
+				<supported_graphics_clock>936 MHz</supported_graphics_clock>
+				<supported_graphics_clock>923 MHz</supported_graphics_clock>
+				<supported_graphics_clock>911 MHz</supported_graphics_clock>
+				<supported_graphics_clock>898 MHz</supported_graphics_clock>
+				<supported_graphics_clock>885 MHz</supported_graphics_clock>
+				<supported_graphics_clock>873 MHz</supported_graphics_clock>
+				<supported_graphics_clock>860 MHz</supported_graphics_clock>
+				<supported_graphics_clock>847 MHz</supported_graphics_clock>
+				<supported_graphics_clock>835 MHz</supported_graphics_clock>
+				<supported_graphics_clock>822 MHz</supported_graphics_clock>
+				<supported_graphics_clock>810 MHz</supported_graphics_clock>
+				<supported_graphics_clock>797 MHz</supported_graphics_clock>
+				<supported_graphics_clock>784 MHz</supported_graphics_clock>
+				<supported_graphics_clock>772 MHz</supported_graphics_clock>
+				<supported_graphics_clock>759 MHz</supported_graphics_clock>
+				<supported_graphics_clock>746 MHz</supported_graphics_clock>
+				<supported_graphics_clock>734 MHz</supported_graphics_clock>
+				<supported_graphics_clock>721 MHz</supported_graphics_clock>
+				<supported_graphics_clock>708 MHz</supported_graphics_clock>
+				<supported_graphics_clock>696 MHz</supported_graphics_clock>
+				<supported_graphics_clock>683 MHz</supported_graphics_clock>
+				<supported_graphics_clock>670 MHz</supported_graphics_clock>
+				<supported_graphics_clock>658 MHz</supported_graphics_clock>
+				<supported_graphics_clock>645 MHz</supported_graphics_clock>
+				<supported_graphics_clock>632 MHz</supported_graphics_clock>
+				<supported_graphics_clock>620 MHz</supported_graphics_clock>
+				<supported_graphics_clock>607 MHz</supported_graphics_clock>
+				<supported_graphics_clock>594 MHz</supported_graphics_clock>
+				<supported_graphics_clock>582 MHz</supported_graphics_clock>
+				<supported_graphics_clock>569 MHz</supported_graphics_clock>
+				<supported_graphics_clock>556 MHz</supported_graphics_clock>
+				<supported_graphics_clock>544 MHz</supported_graphics_clock>
+			</supported_mem_clock>
+		</supported_clocks>
+		<processes>
+		</processes>
+		<accounted_processes>
+		</accounted_processes>
+	</gpu>
+
+	<gpu id="0000:82:00.0">
+		<product_name>Tesla P100-PCIE-12GB</product_name>
+		<product_brand>Tesla</product_brand>
+		<display_mode>Disabled</display_mode>
+		<display_active>Disabled</display_active>
+		<persistence_mode>Disabled</persistence_mode>
+		<accounting_mode>Disabled</accounting_mode>
+		<accounting_mode_buffer_size>1920</accounting_mode_buffer_size>
+		<driver_model>
+			<current_dm>N/A</current_dm>
+			<pending_dm>N/A</pending_dm>
+		</driver_model>
+		<serial>0320717031755</serial>
+		<uuid>GPU-46915a82-3fd2-8e11-ae26-a80b607c04f3</uuid>
+		<minor_number>1</minor_number>
+		<vbios_version>86.00.3A.00.02</vbios_version>
+		<multigpu_board>No</multigpu_board>
+		<board_id>0x8200</board_id>
+		<gpu_part_number>900-2H400-0110-030</gpu_part_number>
+		<inforom_version>
+			<img_version>H400.0202.00.01</img_version>
+			<oem_object>1.1</oem_object>
+			<ecc_object>4.1</ecc_object>
+			<pwr_object>N/A</pwr_object>
+		</inforom_version>
+		<gpu_operation_mode>
+			<current_gom>N/A</current_gom>
+			<pending_gom>N/A</pending_gom>
+		</gpu_operation_mode>
+		<gpu_virtualization_mode>
+			<virtualization_mode>None</virtualization_mode>
+		</gpu_virtualization_mode>
+		<pci>
+			<pci_bus>82</pci_bus>
+			<pci_device>00</pci_device>
+			<pci_domain>0000</pci_domain>
+			<pci_device_id>15F710DE</pci_device_id>
+			<pci_bus_id>0000:82:00.0</pci_bus_id>
+			<pci_sub_system_id>11DA10DE</pci_sub_system_id>
+			<pci_gpu_link_info>
+				<pcie_gen>
+					<max_link_gen>3</max_link_gen>
+					<current_link_gen>3</current_link_gen>
+				</pcie_gen>
+				<link_widths>
+					<max_link_width>16x</max_link_width>
+					<current_link_width>16x</current_link_width>
+				</link_widths>
+			</pci_gpu_link_info>
+			<pci_bridge_chip>
+				<bridge_chip_type>N/A</bridge_chip_type>
+				<bridge_chip_fw>N/A</bridge_chip_fw>
+			</pci_bridge_chip>
+			<replay_counter>0</replay_counter>
+			<tx_util>0 KB/s</tx_util>
+			<rx_util>0 KB/s</rx_util>
+		</pci>
+		<fan_speed>N/A</fan_speed>
+		<performance_state>P0</performance_state>
+		<clocks_throttle_reasons>
+			<clocks_throttle_reason_gpu_idle>Active</clocks_throttle_reason_gpu_idle>
+			<clocks_throttle_reason_applications_clocks_setting>Not Active</clocks_throttle_reason_applications_clocks_setting>
+			<clocks_throttle_reason_sw_power_cap>Not Active</clocks_throttle_reason_sw_power_cap>
+			<clocks_throttle_reason_hw_slowdown>Not Active</clocks_throttle_reason_hw_slowdown>
+			<clocks_throttle_reason_sync_boost>Not Active</clocks_throttle_reason_sync_boost>
+			<clocks_throttle_reason_unknown>Not Active</clocks_throttle_reason_unknown>
+		</clocks_throttle_reasons>
+		<fb_memory_usage>
+			<total>12193 MiB</total>
+			<used>0 MiB</used>
+			<free>12193 MiB</free>
+		</fb_memory_usage>
+		<bar1_memory_usage>
+			<total>16384 MiB</total>
+			<used>2 MiB</used>
+			<free>16382 MiB</free>
+		</bar1_memory_usage>
+		<compute_mode>Default</compute_mode>
+		<utilization>
+			<gpu_util>10.3 %</gpu_util>
+			<memory_util>0 %</memory_util>
+			<encoder_util>0 %</encoder_util>
+			<decoder_util>0 %</decoder_util>
+		</utilization>
+		<encoder_stats>
+			<session_count>0</session_count>
+			<average_fps>0</average_fps>
+			<average_latency>0 ms</average_latency>
+		</encoder_stats>
+		<ecc_mode>
+			<current_ecc>Enabled</current_ecc>
+			<pending_ecc>Enabled</pending_ecc>
+		</ecc_mode>
+		<ecc_errors>
+			<volatile>
+				<single_bit>
+					<device_memory>0</device_memory>
+					<register_file>0</register_file>
+					<l1_cache>N/A</l1_cache>
+					<l2_cache>0</l2_cache>
+					<texture_memory>0</texture_memory>
+					<texture_shm>0</texture_shm>
+					<total>0</total>
+				</single_bit>
+				<double_bit>
+					<device_memory>0</device_memory>
+					<register_file>0</register_file>
+					<l1_cache>N/A</l1_cache>
+					<l2_cache>0</l2_cache>
+					<texture_memory>0</texture_memory>
+					<texture_shm>0</texture_shm>
+					<total>0</total>
+				</double_bit>
+			</volatile>
+			<aggregate>
+				<single_bit>
+					<device_memory>0</device_memory>
+					<register_file>0</register_file>
+					<l1_cache>N/A</l1_cache>
+					<l2_cache>0</l2_cache>
+					<texture_memory>0</texture_memory>
+					<texture_shm>0</texture_shm>
+					<total>0</total>
+				</single_bit>
+				<double_bit>
+					<device_memory>0</device_memory>
+					<register_file>0</register_file>
+					<l1_cache>N/A</l1_cache>
+					<l2_cache>0</l2_cache>
+					<texture_memory>0</texture_memory>
+					<texture_shm>0</texture_shm>
+					<total>0</total>
+				</double_bit>
+			</aggregate>
+		</ecc_errors>
+		<retired_pages>
+			<multiple_single_bit_retirement>
+				<retired_count>0</retired_count>
+				<retired_page_addresses>
+				</retired_page_addresses>
+			</multiple_single_bit_retirement>
+			<double_bit_retirement>
+				<retired_count>0</retired_count>
+				<retired_page_addresses>
+				</retired_page_addresses>
+			</double_bit_retirement>
+			<pending_retirement>No</pending_retirement>
+		</retired_pages>
+		<temperature>
+			<gpu_temp>34 C</gpu_temp>
+			<gpu_temp_max_threshold>85 C</gpu_temp_max_threshold>
+			<gpu_temp_slow_threshold>82 C</gpu_temp_slow_threshold>
+		</temperature>
+		<power_readings>
+			<power_state>P0</power_state>
+			<power_management>Supported</power_management>
+			<power_draw>25.54 W</power_draw>
+			<power_limit>250.00 W</power_limit>
+			<default_power_limit>250.00 W</default_power_limit>
+			<enforced_power_limit>250.00 W</enforced_power_limit>
+			<min_power_limit>125.00 W</min_power_limit>
+			<max_power_limit>250.00 W</max_power_limit>
+		</power_readings>
+		<clocks>
+			<graphics_clock>405 MHz</graphics_clock>
+			<sm_clock>405 MHz</sm_clock>
+			<mem_clock>715 MHz</mem_clock>
+			<video_clock>835 MHz</video_clock>
+		</clocks>
+		<applications_clocks>
+			<graphics_clock>1189 MHz</graphics_clock>
+			<mem_clock>715 MHz</mem_clock>
+		</applications_clocks>
+		<default_applications_clocks>
+			<graphics_clock>1189 MHz</graphics_clock>
+			<mem_clock>715 MHz</mem_clock>
+		</default_applications_clocks>
+		<max_clocks>
+			<graphics_clock>1328 MHz</graphics_clock>
+			<sm_clock>1328 MHz</sm_clock>
+			<mem_clock>715 MHz</mem_clock>
+			<video_clock>1328 MHz</video_clock>
+		</max_clocks>
+		<clock_policy>
+			<auto_boost>N/A</auto_boost>
+			<auto_boost_default>N/A</auto_boost_default>
+		</clock_policy>
+		<supported_clocks>
+			<supported_mem_clock>
+				<value>715 MHz</value>
+				<supported_graphics_clock>1328 MHz</supported_graphics_clock>
+				<supported_graphics_clock>1316 MHz</supported_graphics_clock>
+				<supported_graphics_clock>1303 MHz</supported_graphics_clock>
+				<supported_graphics_clock>1290 MHz</supported_graphics_clock>
+				<supported_graphics_clock>1278 MHz</supported_graphics_clock>
+				<supported_graphics_clock>1265 MHz</supported_graphics_clock>
+				<supported_graphics_clock>1252 MHz</supported_graphics_clock>
+				<supported_graphics_clock>1240 MHz</supported_graphics_clock>
+				<supported_graphics_clock>1227 MHz</supported_graphics_clock>
+				<supported_graphics_clock>1215 MHz</supported_graphics_clock>
+				<supported_graphics_clock>1202 MHz</supported_graphics_clock>
+				<supported_graphics_clock>1189 MHz</supported_graphics_clock>
+				<supported_graphics_clock>1177 MHz</supported_graphics_clock>
+				<supported_graphics_clock>1164 MHz</supported_graphics_clock>
+				<supported_graphics_clock>1151 MHz</supported_graphics_clock>
+				<supported_graphics_clock>1139 MHz</supported_graphics_clock>
+				<supported_graphics_clock>1126 MHz</supported_graphics_clock>
+				<supported_graphics_clock>1113 MHz</supported_graphics_clock>
+				<supported_graphics_clock>1101 MHz</supported_graphics_clock>
+				<supported_graphics_clock>1088 MHz</supported_graphics_clock>
+				<supported_graphics_clock>1075 MHz</supported_graphics_clock>
+				<supported_graphics_clock>1063 MHz</supported_graphics_clock>
+				<supported_graphics_clock>1050 MHz</supported_graphics_clock>
+				<supported_graphics_clock>1037 MHz</supported_graphics_clock>
+				<supported_graphics_clock>1025 MHz</supported_graphics_clock>
+				<supported_graphics_clock>1012 MHz</supported_graphics_clock>
+				<supported_graphics_clock>999 MHz</supported_graphics_clock>
+				<supported_graphics_clock>987 MHz</supported_graphics_clock>
+				<supported_graphics_clock>974 MHz</supported_graphics_clock>
+				<supported_graphics_clock>961 MHz</supported_graphics_clock>
+				<supported_graphics_clock>949 MHz</supported_graphics_clock>
+				<supported_graphics_clock>936 MHz</supported_graphics_clock>
+				<supported_graphics_clock>923 MHz</supported_graphics_clock>
+				<supported_graphics_clock>911 MHz</supported_graphics_clock>
+				<supported_graphics_clock>898 MHz</supported_graphics_clock>
+				<supported_graphics_clock>885 MHz</supported_graphics_clock>
+				<supported_graphics_clock>873 MHz</supported_graphics_clock>
+				<supported_graphics_clock>860 MHz</supported_graphics_clock>
+				<supported_graphics_clock>847 MHz</supported_graphics_clock>
+				<supported_graphics_clock>835 MHz</supported_graphics_clock>
+				<supported_graphics_clock>822 MHz</supported_graphics_clock>
+				<supported_graphics_clock>810 MHz</supported_graphics_clock>
+				<supported_graphics_clock>797 MHz</supported_graphics_clock>
+				<supported_graphics_clock>784 MHz</supported_graphics_clock>
+				<supported_graphics_clock>772 MHz</supported_graphics_clock>
+				<supported_graphics_clock>759 MHz</supported_graphics_clock>
+				<supported_graphics_clock>746 MHz</supported_graphics_clock>
+				<supported_graphics_clock>734 MHz</supported_graphics_clock>
+				<supported_graphics_clock>721 MHz</supported_graphics_clock>
+				<supported_graphics_clock>708 MHz</supported_graphics_clock>
+				<supported_graphics_clock>696 MHz</supported_graphics_clock>
+				<supported_graphics_clock>683 MHz</supported_graphics_clock>
+				<supported_graphics_clock>670 MHz</supported_graphics_clock>
+				<supported_graphics_clock>658 MHz</supported_graphics_clock>
+				<supported_graphics_clock>645 MHz</supported_graphics_clock>
+				<supported_graphics_clock>632 MHz</supported_graphics_clock>
+				<supported_graphics_clock>620 MHz</supported_graphics_clock>
+				<supported_graphics_clock>607 MHz</supported_graphics_clock>
+				<supported_graphics_clock>594 MHz</supported_graphics_clock>
+				<supported_graphics_clock>582 MHz</supported_graphics_clock>
+				<supported_graphics_clock>569 MHz</supported_graphics_clock>
+				<supported_graphics_clock>556 MHz</supported_graphics_clock>
+				<supported_graphics_clock>544 MHz</supported_graphics_clock>
+			</supported_mem_clock>
+		</supported_clocks>
+		<processes>
+		</processes>
+		<accounted_processes>
+		</accounted_processes>
+	</gpu>
+
+</nvidia_smi_log>
\ No newline at end of file


---------------------------------------------------------------------
To unsubscribe, e-mail: common-commits-unsubscribe@hadoop.apache.org
For additional commands, e-mail: common-commits-help@hadoop.apache.org