You are viewing a plain text version of this content. The canonical link for it is here.
Posted to common-commits@hadoop.apache.org by ay...@apache.org on 2020/02/09 18:09:13 UTC

[hadoop] branch trunk updated: HDFS-15158. The number of failed volumes mismatch with volumeFailures of Datanode metrics. Contributed by Yang Yun.

This is an automated email from the ASF dual-hosted git repository.

ayushsaxena pushed a commit to branch trunk
in repository https://gitbox.apache.org/repos/asf/hadoop.git


The following commit(s) were added to refs/heads/trunk by this push:
     new 6191d4b  HDFS-15158. The number of failed volumes mismatch with volumeFailures of Datanode metrics. Contributed by Yang Yun.
6191d4b is described below

commit 6191d4b4a0919863fda78e549ab6c60022e3ebc2
Author: Ayush Saxena <ay...@apache.org>
AuthorDate: Sun Feb 9 23:19:40 2020 +0530

    HDFS-15158. The number of failed volumes mismatch with volumeFailures of Datanode metrics. Contributed by Yang Yun.
---
 .../hadoop/hdfs/server/datanode/DataNode.java      | 12 +++++-----
 .../server/datanode/metrics/DataNodeMetrics.java   |  6 ++---
 .../server/datanode/TestDataNodeVolumeFailure.java | 26 ++++++++++++++++++++++
 3 files changed, 35 insertions(+), 9 deletions(-)

diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataNode.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataNode.java
index ebf3e28..d390c1e 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataNode.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataNode.java
@@ -2215,7 +2215,7 @@ public class DataNode extends ReconfigurableBase
         });
   }
 
-  private void handleDiskError(String failedVolumes) {
+  private void handleDiskError(String failedVolumes, int failedNumber) {
     final boolean hasEnoughResources = data.hasEnoughResource();
     LOG.warn("DataNode.handleDiskError on: " +
         "[{}] Keep Running: {}", failedVolumes, hasEnoughResources);
@@ -2224,7 +2224,7 @@ public class DataNode extends ReconfigurableBase
     // shutdown the DN completely.
     int dpError = hasEnoughResources ? DatanodeProtocol.DISK_ERROR  
                                      : DatanodeProtocol.FATAL_DISK_ERROR;  
-    metrics.incrVolumeFailures();
+    metrics.incrVolumeFailures(failedNumber);
 
     //inform NameNodes
     for(BPOfferService bpos: blockPoolManager.getAllNamenodeThreads()) {
@@ -3452,8 +3452,8 @@ public class DataNode extends ReconfigurableBase
     }
 
     data.handleVolumeFailures(unhealthyVolumes);
-    Set<StorageLocation> unhealthyLocations = new HashSet<>(
-        unhealthyVolumes.size());
+    int failedNumber = unhealthyVolumes.size();
+    Set<StorageLocation> unhealthyLocations = new HashSet<>(failedNumber);
 
     StringBuilder sb = new StringBuilder("DataNode failed volumes:");
     for (FsVolumeSpi vol : unhealthyVolumes) {
@@ -3468,8 +3468,8 @@ public class DataNode extends ReconfigurableBase
       LOG.warn("Error occurred when removing unhealthy storage dirs", e);
     }
     LOG.debug("{}", sb);
-      // send blockreport regarding volume failure
-    handleDiskError(sb.toString());
+    // send blockreport regarding volume failure
+    handleDiskError(sb.toString(), failedNumber);
   }
 
   /**
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/metrics/DataNodeMetrics.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/metrics/DataNodeMetrics.java
index ffd0b7b..68eaf72 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/metrics/DataNodeMetrics.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/metrics/DataNodeMetrics.java
@@ -399,9 +399,9 @@ public class DataNodeMetrics {
       remoteBytesRead.incr(size);
     }
   }
-  
-  public void incrVolumeFailures() {
-    volumeFailures.incr();
+
+  public void incrVolumeFailures(int size) {
+    volumeFailures.incr(size);
   }
 
   public void incrDatanodeNetworkErrors() {
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDataNodeVolumeFailure.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDataNodeVolumeFailure.java
index c452751..c6f4f01 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDataNodeVolumeFailure.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDataNodeVolumeFailure.java
@@ -17,6 +17,8 @@
  */
 package org.apache.hadoop.hdfs.server.datanode;
 
+import static org.apache.hadoop.test.MetricsAsserts.getLongCounter;
+import static org.apache.hadoop.test.MetricsAsserts.getMetrics;
 import static org.apache.hadoop.test.PlatformAssumptions.assumeNotWindows;
 import static org.hamcrest.core.Is.is;
 import static org.junit.Assert.assertEquals;
@@ -77,6 +79,7 @@ import org.apache.hadoop.hdfs.server.protocol.DatanodeRegistration;
 import org.apache.hadoop.hdfs.server.protocol.NamenodeProtocols;
 import org.apache.hadoop.hdfs.server.protocol.VolumeFailureSummary;
 import org.apache.hadoop.io.IOUtils;
+import org.apache.hadoop.metrics2.MetricsRecordBuilder;
 import org.apache.hadoop.net.NetUtils;
 import org.apache.hadoop.security.token.Token;
 import org.apache.hadoop.test.GenericTestUtils;
@@ -947,4 +950,27 @@ public class TestDataNodeVolumeFailure {
       }
     }, 10, 30 * 1000);
   }
+
+  /*
+   * Fail two volumes, and check the metrics of VolumeFailures
+   */
+  @Test
+  public void testVolumeFailureTwo() throws Exception {
+    // fail two volumes
+    data_fail = cluster.getInstanceStorageDir(1, 0);
+    failedDir = MiniDFSCluster.getFinalizedDir(data_fail,
+            cluster.getNamesystem().getBlockPoolId());
+    failedDir.setReadOnly();
+    data_fail = cluster.getInstanceStorageDir(1, 1);
+    failedDir = MiniDFSCluster.getFinalizedDir(data_fail,
+            cluster.getNamesystem().getBlockPoolId());
+    failedDir.setReadOnly();
+
+    final DataNode dn = cluster.getDataNodes().get(1);
+    dn.checkDiskError();
+
+    MetricsRecordBuilder rb = getMetrics(dn.getMetrics().name());
+    long volumeFailures = getLongCounter("VolumeFailures", rb);
+    assertEquals(2, volumeFailures);
+  }
 }


---------------------------------------------------------------------
To unsubscribe, e-mail: common-commits-unsubscribe@hadoop.apache.org
For additional commands, e-mail: common-commits-help@hadoop.apache.org