You are viewing a plain text version of this content. The canonical link for it is here.
Posted to common-commits@hadoop.apache.org by ji...@apache.org on 2021/02/24 06:30:06 UTC

[hadoop] branch trunk updated: HDFS-15781. Add metrics for how blocks are moved in replaceBlock. (#2704)

This is an automated email from the ASF dual-hosted git repository.

jing9 pushed a commit to branch trunk
in repository https://gitbox.apache.org/repos/asf/hadoop.git


The following commit(s) were added to refs/heads/trunk by this push:
     new 940c780  HDFS-15781. Add metrics for how blocks are moved in replaceBlock. (#2704)
940c780 is described below

commit 940c780feb293ba84c15b8b085dcc6de953fc93d
Author: LeonGao <li...@uber.com>
AuthorDate: Tue Feb 23 22:29:48 2021 -0800

    HDFS-15781. Add metrics for how blocks are moved in replaceBlock. (#2704)
---
 .../hadoop/hdfs/server/datanode/DataXceiver.java   |  2 +
 .../datanode/fsdataset/impl/FsDatasetImpl.java     |  4 ++
 .../server/datanode/metrics/DataNodeMetrics.java   | 22 +++++++
 .../apache/hadoop/hdfs/server/mover/TestMover.java | 75 ++++++++++++++++++----
 4 files changed, 91 insertions(+), 12 deletions(-)

diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataXceiver.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataXceiver.java
index d371361..278ee09 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataXceiver.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataXceiver.java
@@ -1250,6 +1250,8 @@ class DataXceiver extends Receiver implements Runnable {
         
         LOG.info("Moved {} from {}, delHint={}",
             block, peer.getRemoteAddressString(), delHint);
+
+        datanode.metrics.incrReplaceBlockOpToOtherHost();
       }
     } catch (IOException ioe) {
       opStatus = ERROR;
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/FsDatasetImpl.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/FsDatasetImpl.java
index f8a97d7..edf2edc 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/FsDatasetImpl.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/FsDatasetImpl.java
@@ -1121,6 +1121,10 @@ class FsDatasetImpl implements FsDatasetSpi<FsVolumeImpl> {
     }
     try {
       moveBlock(block, replicaInfo, volumeRef, useVolumeOnSameMount);
+      datanode.getMetrics().incrReplaceBlockOpOnSameHost();
+      if (useVolumeOnSameMount) {
+        datanode.getMetrics().incrReplaceBlockOpOnSameMount();
+      }
     } finally {
       if (volumeRef != null) {
         volumeRef.close();
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/metrics/DataNodeMetrics.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/metrics/DataNodeMetrics.java
index 9350d95..712d8f4 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/metrics/DataNodeMetrics.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/metrics/DataNodeMetrics.java
@@ -188,6 +188,15 @@ public class DataNodeMetrics {
   @Metric MutableCounterLong packetsSlowWriteToDisk;
   @Metric MutableCounterLong packetsSlowWriteToOsCache;
 
+  @Metric("Number of replaceBlock ops between" +
+      " storage types on same host with local copy")
+  private MutableCounterLong replaceBlockOpOnSameHost;
+  @Metric("Number of replaceBlock ops between" +
+      " storage types on same disk mount with same disk tiering feature")
+  private MutableCounterLong replaceBlockOpOnSameMount;
+  @Metric("Number of replaceBlock ops to another node")
+  private MutableCounterLong replaceBlockOpToOtherHost;
+
   final MetricsRegistry registry = new MetricsRegistry("datanode");
   @Metric("Milliseconds spent on calling NN rpc")
   private MutableRatesWithAggregation
@@ -711,4 +720,17 @@ public class DataNodeMetrics {
   public void incrPacketsSlowWriteToOsCache() {
     packetsSlowWriteToOsCache.incr();
   }
+
+  public void incrReplaceBlockOpOnSameMount() {
+    replaceBlockOpOnSameMount.incr();
+  }
+
+  public void incrReplaceBlockOpOnSameHost() {
+    replaceBlockOpOnSameHost.incr();
+  }
+
+  public void incrReplaceBlockOpToOtherHost() {
+    replaceBlockOpToOtherHost.incr();
+  }
+
 }
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/mover/TestMover.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/mover/TestMover.java
index 481c7cf..f428b2c 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/mover/TestMover.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/mover/TestMover.java
@@ -34,6 +34,8 @@ import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_KEYTAB_FILE_KEY;
 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_SERVER_HTTPS_KEYSTORE_RESOURCE_KEY;
 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_WEB_AUTHENTICATION_KERBEROS_PRINCIPAL_KEY;
 import static org.apache.hadoop.hdfs.client.HdfsClientConfigKeys.DFS_DATA_TRANSFER_PROTECTION_KEY;
+import static org.apache.hadoop.test.MetricsAsserts.assertCounter;
+import static org.apache.hadoop.test.MetricsAsserts.getMetrics;
 
 import java.io.File;
 import java.io.IOException;
@@ -83,6 +85,7 @@ import org.apache.hadoop.hdfs.server.common.HdfsServerConstants;
 import org.apache.hadoop.hdfs.server.mover.Mover.MLocation;
 import org.apache.hadoop.hdfs.server.namenode.ha.HATestUtil;
 import org.apache.hadoop.http.HttpConfig;
+import org.apache.hadoop.metrics2.MetricsRecordBuilder;
 import org.apache.hadoop.minikdc.MiniKdc;
 import org.apache.hadoop.security.SecurityUtil;
 import org.apache.hadoop.security.UserGroupInformation;
@@ -170,14 +173,26 @@ public class TestMover {
     }
   }
 
-  private void testWithinSameNode(Configuration conf) throws Exception {
-    final MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf)
-        .numDataNodes(3)
-        .storageTypes(
-            new StorageType[] {StorageType.DISK, StorageType.ARCHIVE})
-        .build();
+  private void testMovementWithLocalityOption(Configuration conf,
+      boolean sameNode) throws Exception {
+    final MiniDFSCluster cluster;
+    if (sameNode) {
+      cluster = new MiniDFSCluster.Builder(conf)
+          .numDataNodes(3)
+          .storageTypes(
+              new StorageType[] {StorageType.DISK, StorageType.ARCHIVE})
+          .build();
+    } else {
+      cluster = new MiniDFSCluster.Builder(conf)
+          .numDataNodes(2)
+          .storageTypes(
+              new StorageType[][] {{StorageType.DISK}, {StorageType.ARCHIVE}})
+          .build();
+    }
+
     try {
       cluster.waitActive();
+
       final DistributedFileSystem dfs = cluster.getFileSystem();
       final String file = "/testScheduleWithinSameNode/file";
       Path dir = new Path("/testScheduleWithinSameNode");
@@ -201,12 +216,37 @@ public class TestMover {
       Assert.assertEquals("Movement to ARCHIVE should be successful", 0, rc);
 
       // Wait till namenode notified about the block location details
-      waitForLocatedBlockWithArchiveStorageType(dfs, file, 3);
+      waitForLocatedBlockWithArchiveStorageType(dfs, file, sameNode ? 3 : 1);
+
+      MetricsRecordBuilder rb =
+          getMetrics(cluster.getDataNodes().get(1).getMetrics().name());
+
+      if (!sameNode) {
+        testReplaceBlockOpLocalityMetrics(0, 0, 1, rb);
+      } else if (conf.getBoolean(
+          DFSConfigKeys.DFS_DATANODE_ALLOW_SAME_DISK_TIERING, false)) {
+        testReplaceBlockOpLocalityMetrics(1, 1, 0, rb);
+      } else {
+        testReplaceBlockOpLocalityMetrics(1, 0, 0, rb);
+      }
     } finally {
       cluster.shutdown();
     }
   }
 
+  private void testReplaceBlockOpLocalityMetrics(
+      long sameHost,
+      long sameMount,
+      long otherHost,
+      MetricsRecordBuilder rb) {
+    assertCounter("ReplaceBlockOpOnSameHost",
+        sameHost, rb);
+    assertCounter("ReplaceBlockOpOnSameMount",
+        sameMount, rb);
+    assertCounter("ReplaceBlockOpToOtherHost",
+        otherHost, rb);
+  }
+
   private void setupStoragePoliciesAndPaths(DistributedFileSystem dfs1,
                                            DistributedFileSystem dfs2,
                                             Path dir, String file)
@@ -441,17 +481,27 @@ public class TestMover {
     }, 100, 3000);
   }
 
+  /**
+   * Test block movement with different block locality scenarios.
+   * 1) Block will be copied to local host,
+   *    if there is target storage type on same datanode.
+   * 2) Block will be moved within local mount with hardlink,
+   *    if disk/archive are on same mount with same-disk-tiering feature on.
+   * 3) Block will be moved to another datanode,
+   *    if there is no available target storage type on local datanode.
+   */
   @Test
-  public void testScheduleBlockWithinSameNode() throws Exception {
+  public void testScheduleBlockLocality() throws Exception {
     final Configuration conf = new HdfsConfiguration();
     initConf(conf);
-    testWithinSameNode(conf);
+    testMovementWithLocalityOption(conf, true);
     // Test movement with hardlink, when same disk tiering is enabled.
     conf.setBoolean(DFSConfigKeys.DFS_DATANODE_ALLOW_SAME_DISK_TIERING, true);
     conf.setDouble(DFSConfigKeys
         .DFS_DATANODE_RESERVE_FOR_ARCHIVE_DEFAULT_PERCENTAGE, 0.5);
-    testWithinSameNode(conf);
+    testMovementWithLocalityOption(conf, true);
     conf.setBoolean(DFSConfigKeys.DFS_DATANODE_ALLOW_SAME_DISK_TIERING, false);
+    testMovementWithLocalityOption(conf, false);
   }
 
   private void checkMovePaths(List<Path> actual, Path... expected) {
@@ -1006,7 +1056,8 @@ public class TestMover {
 
   /**
    * Test Mover runs fine when logging in with a keytab in kerberized env.
-   * Reusing testWithinSameNode here for basic functionality testing.
+   * Reusing testMovementWithLocalityOption
+   * here for basic functionality testing.
    */
   @Test(timeout = 300000)
   public void testMoverWithKeytabs() throws Exception {
@@ -1020,7 +1071,7 @@ public class TestMover {
         @Override
         public Void run() throws Exception {
           // verify that mover runs Ok.
-          testWithinSameNode(conf);
+          testMovementWithLocalityOption(conf, true);
           // verify that UGI was logged in using keytab.
           Assert.assertTrue(UserGroupInformation.isLoginKeytabBased());
           return null;


---------------------------------------------------------------------
To unsubscribe, e-mail: common-commits-unsubscribe@hadoop.apache.org
For additional commands, e-mail: common-commits-help@hadoop.apache.org