You are viewing a plain text version of this content. The canonical link for it is here.
Posted to common-commits@hadoop.apache.org by xi...@apache.org on 2018/09/07 21:59:09 UTC

hadoop git commit: HDFS-13820. Add an ability to disable CacheReplicationMonitor. Contributed by Hrishikesh Gadre.

Repository: hadoop
Updated Branches:
  refs/heads/trunk 16333782c -> 335a8139f


HDFS-13820. Add an ability to disable CacheReplicationMonitor. Contributed by Hrishikesh Gadre.

Signed-off-by: Xiao Chen <xi...@apache.org>


Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo
Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/335a8139
Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/335a8139
Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/335a8139

Branch: refs/heads/trunk
Commit: 335a8139f5b9004414b2942eeac5a008283a6f75
Parents: 1633378
Author: Hrishikesh Gadre <hg...@apache.org>
Authored: Fri Sep 7 14:55:22 2018 -0700
Committer: Xiao Chen <xi...@apache.org>
Committed: Fri Sep 7 14:59:06 2018 -0700

----------------------------------------------------------------------
 .../org/apache/hadoop/hdfs/DFSConfigKeys.java   |  3 ++
 .../hdfs/server/namenode/CacheManager.java      | 42 ++++++++++++++++-
 .../src/main/resources/hdfs-default.xml         | 11 +++++
 .../site/markdown/CentralizedCacheManagement.md |  5 ++
 .../server/namenode/TestCacheDirectives.java    | 49 ++++++++++++++++++++
 5 files changed, 108 insertions(+), 2 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hadoop/blob/335a8139/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java
----------------------------------------------------------------------
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java
index bd88341..a7e7b9b 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java
@@ -390,6 +390,9 @@ public class DFSConfigKeys extends CommonConfigurationKeys {
   public static final String  DFS_NAMENODE_PATH_BASED_CACHE_REFRESH_INTERVAL_MS =
       "dfs.namenode.path.based.cache.refresh.interval.ms";
   public static final long    DFS_NAMENODE_PATH_BASED_CACHE_REFRESH_INTERVAL_MS_DEFAULT = 30000L;
+  public static final String  DFS_NAMENODE_CACHING_ENABLED_KEY =
+      "dfs.namenode.caching.enabled";
+  public static final boolean DFS_NAMENODE_CACHING_ENABLED_DEFAULT = true;
 
   /** Pending period of block deletion since NameNode startup */
   public static final String  DFS_NAMENODE_STARTUP_DELAY_BLOCK_DELETION_SEC_KEY = "dfs.namenode.startup.delay.block.deletion.sec";

http://git-wip-us.apache.org/repos/asf/hadoop/blob/335a8139/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/CacheManager.java
----------------------------------------------------------------------
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/CacheManager.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/CacheManager.java
index ab026f0..8a29492 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/CacheManager.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/CacheManager.java
@@ -25,6 +25,8 @@ import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_LIST_CACHE_POOLS
 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_LIST_CACHE_POOLS_NUM_RESPONSES_DEFAULT;
 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_PATH_BASED_CACHE_REFRESH_INTERVAL_MS;
 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_PATH_BASED_CACHE_REFRESH_INTERVAL_MS_DEFAULT;
+import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_CACHING_ENABLED_KEY;
+import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_CACHING_ENABLED_DEFAULT;
 
 import java.io.DataInput;
 import java.io.DataOutputStream;
@@ -172,6 +174,21 @@ public class CacheManager {
   private final SerializerCompat serializerCompat = new SerializerCompat();
 
   /**
+   * Whether caching is enabled.
+   *
+   * If caching is disabled, we will not process cache reports or store
+   * information about what is cached where.  We also do not start the
+   * CacheReplicationMonitor thread.  This will save resources, but provide
+   * less functionality.
+   *
+   * Even when caching is disabled, we still store path-based cache
+   * information.  This information is stored in the edit log and fsimage.  We
+   * don't want to lose it just because a configuration setting was turned off.
+   * However, we will not act on this information if caching is disabled.
+   */
+  private final boolean enabled;
+
+  /**
    * The CacheReplicationMonitor.
    */
   private CacheReplicationMonitor monitor;
@@ -194,6 +211,8 @@ public class CacheManager {
     this.namesystem = namesystem;
     this.blockManager = blockManager;
     this.nextDirectiveId = 1;
+    this.enabled = conf.getBoolean(DFS_NAMENODE_CACHING_ENABLED_KEY,
+        DFS_NAMENODE_CACHING_ENABLED_DEFAULT);
     this.maxListCachePoolsResponses = conf.getInt(
         DFS_NAMENODE_LIST_CACHE_POOLS_NUM_RESPONSES,
         DFS_NAMENODE_LIST_CACHE_POOLS_NUM_RESPONSES_DEFAULT);
@@ -211,10 +230,13 @@ public class CacheManager {
         DFS_NAMENODE_PATH_BASED_CACHE_BLOCK_MAP_ALLOCATION_PERCENT);
       cachedBlocksPercent = MIN_CACHED_BLOCKS_PERCENT;
     }
-    this.cachedBlocks = new LightWeightGSet<CachedBlock, CachedBlock>(
+    this.cachedBlocks = enabled ? new LightWeightGSet<CachedBlock, CachedBlock>(
           LightWeightGSet.computeCapacity(cachedBlocksPercent,
-              "cachedBlocks"));
+              "cachedBlocks")) : new LightWeightGSet<>(0);
+  }
 
+  public boolean isEnabled() {
+    return enabled;
   }
 
   /**
@@ -229,6 +251,12 @@ public class CacheManager {
   }
 
   public void startMonitorThread() {
+    if (!isEnabled()) {
+      LOG.info("Not starting CacheReplicationMonitor as name-node caching" +
+              " is disabled.");
+      return;
+    }
+
     crmLock.lock();
     try {
       if (this.monitor == null) {
@@ -242,6 +270,10 @@ public class CacheManager {
   }
 
   public void stopMonitorThread() {
+    if (!isEnabled()) {
+      return;
+    }
+
     crmLock.lock();
     try {
       if (this.monitor != null) {
@@ -945,6 +977,12 @@ public class CacheManager {
 
   public final void processCacheReport(final DatanodeID datanodeID,
       final List<Long> blockIds) throws IOException {
+    if (!enabled) {
+      LOG.debug("Ignoring cache report from {} because {} = false. " +
+              "number of blocks: {}", datanodeID,
+              DFS_NAMENODE_CACHING_ENABLED_KEY, blockIds.size());
+      return;
+    }
     namesystem.writeLock();
     final long startTime = Time.monotonicNow();
     final long endTime;

http://git-wip-us.apache.org/repos/asf/hadoop/blob/335a8139/hadoop-hdfs-project/hadoop-hdfs/src/main/resources/hdfs-default.xml
----------------------------------------------------------------------
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/resources/hdfs-default.xml b/hadoop-hdfs-project/hadoop-hdfs/src/main/resources/hdfs-default.xml
index 48ff329..5f115ec 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/resources/hdfs-default.xml
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/resources/hdfs-default.xml
@@ -2469,6 +2469,17 @@
 </property>
 
 <property>
+  <name>dfs.namenode.caching.enabled</name>
+  <value>true</value>
+  <description>
+    Set to true to enable block caching.  This flag enables the NameNode to
+    maintain a mapping of cached blocks to DataNodes via processing DataNode
+    cache reports.  Based on these reports and addition and removal of caching
+    directives, the NameNode will schedule caching and uncaching work.
+  </description>
+</property>
+
+<property>
   <name>dfs.namenode.path.based.cache.block.map.allocation.percent</name>
   <value>0.25</value>
   <description>

http://git-wip-us.apache.org/repos/asf/hadoop/blob/335a8139/hadoop-hdfs-project/hadoop-hdfs/src/site/markdown/CentralizedCacheManagement.md
----------------------------------------------------------------------
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/site/markdown/CentralizedCacheManagement.md b/hadoop-hdfs-project/hadoop-hdfs/src/site/markdown/CentralizedCacheManagement.md
index f2de043..7568949 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/site/markdown/CentralizedCacheManagement.md
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/site/markdown/CentralizedCacheManagement.md
@@ -238,6 +238,11 @@ The following properties are not required, but may be specified for tuning:
 
     The percentage of the Java heap which we will allocate to the cached blocks map. The cached blocks map is a hash map which uses chained hashing. Smaller maps may be accessed more slowly if the number of cached blocks is large; larger maps will consume more memory. The default is 0.25 percent.
 
+*   dfs.namenode.caching.enabled
+
+    This parameter can be used to enable/disable the centralized caching in NameNode. When centralized caching is disabled, NameNode will not process cache reports or store information about block cache locations on the cluster. Note that NameNode will continute to store the path based cache locations in the file-system metadata, even though it will not act on this information until the caching is enabled. The default value for this parameter is true (i.e. centralized caching is enabled).
+
+
 ### OS Limits
 
 If you get the error "Cannot start datanode because the configured max locked memory size... is more than the datanode's available RLIMIT\_MEMLOCK ulimit," that means that the operating system is imposing a lower limit on the amount of memory that you can lock than what you have configured. To fix this, you must adjust the ulimit -l value that the DataNode runs with. Usually, this value is configured in `/etc/security/limits.conf`. However, it will vary depending on what operating system and distribution you are using.

http://git-wip-us.apache.org/repos/asf/hadoop/blob/335a8139/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestCacheDirectives.java
----------------------------------------------------------------------
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestCacheDirectives.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestCacheDirectives.java
index fa1d3d4..b3e91e5 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestCacheDirectives.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestCacheDirectives.java
@@ -22,6 +22,7 @@ import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_CACHEREPORT_INTERVAL_MSEC
 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_DATANODE_MAX_LOCKED_MEMORY_KEY;
 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_HEARTBEAT_INTERVAL_KEY;
 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_PATH_BASED_CACHE_REFRESH_INTERVAL_MS;
+import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_CACHING_ENABLED_KEY;
 import static org.apache.hadoop.hdfs.protocol.CachePoolInfo.RELATIVE_EXPIRY_NEVER;
 import static org.apache.hadoop.test.GenericTestUtils.assertExceptionContains;
 import static org.junit.Assert.assertEquals;
@@ -1556,4 +1557,52 @@ public class TestCacheDirectives {
     cm.setCachedLocations(locations);
     Mockito.verifyZeroInteractions(locations);
   }
+
+  @Test(timeout=120000)
+  public void testAddingCacheDirectiveInfosWhenCachingIsDisabled()
+          throws Exception {
+    cluster.shutdown();
+    HdfsConfiguration config = createCachingConf();
+    config.setBoolean(DFS_NAMENODE_CACHING_ENABLED_KEY, false);
+    cluster = new MiniDFSCluster.Builder(config)
+            .numDataNodes(NUM_DATANODES).build();
+
+    cluster.waitActive();
+    dfs = cluster.getFileSystem();
+    namenode = cluster.getNameNode();
+    CacheManager cacheManager = namenode.getNamesystem().getCacheManager();
+    assertFalse(cacheManager.isEnabled());
+    assertNull(cacheManager.getCacheReplicationMonitor());
+    // Create the pool
+    String pool = "pool1";
+    namenode.getRpcServer().addCachePool(new CachePoolInfo(pool));
+    // Create some test files
+    final int numFiles = 2;
+    final int numBlocksPerFile = 2;
+    final List<String> paths = new ArrayList<String>(numFiles);
+    for (int i=0; i<numFiles; i++) {
+      Path p = new Path("/testCachePaths-" + i);
+      FileSystemTestHelper.createFile(dfs, p, numBlocksPerFile,
+              (int)BLOCK_SIZE);
+      paths.add(p.toUri().getPath());
+    }
+    // Check the initial statistics at the namenode
+    waitForCachedBlocks(namenode, 0, 0,
+            "testAddingCacheDirectiveInfosWhenCachingIsDisabled:0");
+    // Cache and check each path in sequence
+    int expected = 0;
+    for (int i=0; i<numFiles; i++) {
+      CacheDirectiveInfo directive =
+              new CacheDirectiveInfo.Builder().
+                      setPath(new Path(paths.get(i))).
+                      setPool(pool).
+                      build();
+      dfs.addCacheDirective(directive);
+      waitForCachedBlocks(namenode, expected, 0,
+              "testAddingCacheDirectiveInfosWhenCachingIsDisabled:1");
+    }
+    Thread.sleep(20000);
+    waitForCachedBlocks(namenode, expected, 0,
+            "testAddingCacheDirectiveInfosWhenCachingIsDisabled:2");
+  }
 }


---------------------------------------------------------------------
To unsubscribe, e-mail: common-commits-unsubscribe@hadoop.apache.org
For additional commands, e-mail: common-commits-help@hadoop.apache.org