You are viewing a plain text version of this content. The canonical link for it is here.
Posted to common-commits@hadoop.apache.org by ay...@apache.org on 2020/02/09 18:09:13 UTC
[hadoop] branch trunk updated: HDFS-15158. The number of failed
volumes mismatch with volumeFailures of Datanode metrics. Contributed by
Yang Yun.
This is an automated email from the ASF dual-hosted git repository.
ayushsaxena pushed a commit to branch trunk
in repository https://gitbox.apache.org/repos/asf/hadoop.git
The following commit(s) were added to refs/heads/trunk by this push:
new 6191d4b HDFS-15158. The number of failed volumes mismatch with volumeFailures of Datanode metrics. Contributed by Yang Yun.
6191d4b is described below
commit 6191d4b4a0919863fda78e549ab6c60022e3ebc2
Author: Ayush Saxena <ay...@apache.org>
AuthorDate: Sun Feb 9 23:19:40 2020 +0530
HDFS-15158. The number of failed volumes mismatch with volumeFailures of Datanode metrics. Contributed by Yang Yun.
---
.../hadoop/hdfs/server/datanode/DataNode.java | 12 +++++-----
.../server/datanode/metrics/DataNodeMetrics.java | 6 ++---
.../server/datanode/TestDataNodeVolumeFailure.java | 26 ++++++++++++++++++++++
3 files changed, 35 insertions(+), 9 deletions(-)
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataNode.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataNode.java
index ebf3e28..d390c1e 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataNode.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataNode.java
@@ -2215,7 +2215,7 @@ public class DataNode extends ReconfigurableBase
});
}
- private void handleDiskError(String failedVolumes) {
+ private void handleDiskError(String failedVolumes, int failedNumber) {
final boolean hasEnoughResources = data.hasEnoughResource();
LOG.warn("DataNode.handleDiskError on: " +
"[{}] Keep Running: {}", failedVolumes, hasEnoughResources);
@@ -2224,7 +2224,7 @@ public class DataNode extends ReconfigurableBase
// shutdown the DN completely.
int dpError = hasEnoughResources ? DatanodeProtocol.DISK_ERROR
: DatanodeProtocol.FATAL_DISK_ERROR;
- metrics.incrVolumeFailures();
+ metrics.incrVolumeFailures(failedNumber);
//inform NameNodes
for(BPOfferService bpos: blockPoolManager.getAllNamenodeThreads()) {
@@ -3452,8 +3452,8 @@ public class DataNode extends ReconfigurableBase
}
data.handleVolumeFailures(unhealthyVolumes);
- Set<StorageLocation> unhealthyLocations = new HashSet<>(
- unhealthyVolumes.size());
+ int failedNumber = unhealthyVolumes.size();
+ Set<StorageLocation> unhealthyLocations = new HashSet<>(failedNumber);
StringBuilder sb = new StringBuilder("DataNode failed volumes:");
for (FsVolumeSpi vol : unhealthyVolumes) {
@@ -3468,8 +3468,8 @@ public class DataNode extends ReconfigurableBase
LOG.warn("Error occurred when removing unhealthy storage dirs", e);
}
LOG.debug("{}", sb);
- // send blockreport regarding volume failure
- handleDiskError(sb.toString());
+ // send blockreport regarding volume failure
+ handleDiskError(sb.toString(), failedNumber);
}
/**
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/metrics/DataNodeMetrics.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/metrics/DataNodeMetrics.java
index ffd0b7b..68eaf72 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/metrics/DataNodeMetrics.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/metrics/DataNodeMetrics.java
@@ -399,9 +399,9 @@ public class DataNodeMetrics {
remoteBytesRead.incr(size);
}
}
-
- public void incrVolumeFailures() {
- volumeFailures.incr();
+
+ public void incrVolumeFailures(int size) {
+ volumeFailures.incr(size);
}
public void incrDatanodeNetworkErrors() {
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDataNodeVolumeFailure.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDataNodeVolumeFailure.java
index c452751..c6f4f01 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDataNodeVolumeFailure.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDataNodeVolumeFailure.java
@@ -17,6 +17,8 @@
*/
package org.apache.hadoop.hdfs.server.datanode;
+import static org.apache.hadoop.test.MetricsAsserts.getLongCounter;
+import static org.apache.hadoop.test.MetricsAsserts.getMetrics;
import static org.apache.hadoop.test.PlatformAssumptions.assumeNotWindows;
import static org.hamcrest.core.Is.is;
import static org.junit.Assert.assertEquals;
@@ -77,6 +79,7 @@ import org.apache.hadoop.hdfs.server.protocol.DatanodeRegistration;
import org.apache.hadoop.hdfs.server.protocol.NamenodeProtocols;
import org.apache.hadoop.hdfs.server.protocol.VolumeFailureSummary;
import org.apache.hadoop.io.IOUtils;
+import org.apache.hadoop.metrics2.MetricsRecordBuilder;
import org.apache.hadoop.net.NetUtils;
import org.apache.hadoop.security.token.Token;
import org.apache.hadoop.test.GenericTestUtils;
@@ -947,4 +950,27 @@ public class TestDataNodeVolumeFailure {
}
}, 10, 30 * 1000);
}
+
+ /*
+ * Fail two volumes, and check the metrics of VolumeFailures
+ */
+ @Test
+ public void testVolumeFailureTwo() throws Exception {
+ // fail two volumes
+ data_fail = cluster.getInstanceStorageDir(1, 0);
+ failedDir = MiniDFSCluster.getFinalizedDir(data_fail,
+ cluster.getNamesystem().getBlockPoolId());
+ failedDir.setReadOnly();
+ data_fail = cluster.getInstanceStorageDir(1, 1);
+ failedDir = MiniDFSCluster.getFinalizedDir(data_fail,
+ cluster.getNamesystem().getBlockPoolId());
+ failedDir.setReadOnly();
+
+ final DataNode dn = cluster.getDataNodes().get(1);
+ dn.checkDiskError();
+
+ MetricsRecordBuilder rb = getMetrics(dn.getMetrics().name());
+ long volumeFailures = getLongCounter("VolumeFailures", rb);
+ assertEquals(2, volumeFailures);
+ }
}
---------------------------------------------------------------------
To unsubscribe, e-mail: common-commits-unsubscribe@hadoop.apache.org
For additional commands, e-mail: common-commits-help@hadoop.apache.org