You are viewing a plain text version of this content. The canonical link for it is here.
Posted to common-commits@hadoop.apache.org by ay...@apache.org on 2020/12/15 05:58:36 UTC
[hadoop] 02/02: HDFS-15200. Delete Corrupt Replica Immediately
Irrespective of Replicas On Stale Storage. Contributed by Ayush Saxena.
This is an automated email from the ASF dual-hosted git repository.
ayushsaxena pushed a commit to branch branch-3.1
in repository https://gitbox.apache.org/repos/asf/hadoop.git
commit 9e50a1082b84b8c06c4abe33ca490f25111ea023
Author: Ayush Saxena <ay...@apache.org>
AuthorDate: Tue Dec 15 11:05:17 2020 +0530
HDFS-15200. Delete Corrupt Replica Immediately Irrespective of Replicas On Stale Storage. Contributed by Ayush Saxena.
---
.../java/org/apache/hadoop/hdfs/DFSConfigKeys.java | 7 ++++
.../hdfs/server/blockmanagement/BlockManager.java | 11 ++++++-
.../src/main/resources/hdfs-default.xml | 9 +++++
.../server/blockmanagement/TestBlockManager.java | 38 +++++++++++++++++++++-
.../TestCorruptionWithFailover.java | 4 +++
.../hadoop/hdfs/server/namenode/TestFsck.java | 3 ++
6 files changed, 70 insertions(+), 2 deletions(-)
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java
index 208dff5..0f39a89 100755
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java
@@ -269,6 +269,13 @@ public class DFSConfigKeys extends CommonConfigurationKeys {
= "dfs.namenode.blockreport.queue.size";
public static final int DFS_NAMENODE_BLOCKREPORT_QUEUE_SIZE_DEFAULT
= 1024;
+
+ public static final String
+ DFS_NAMENODE_CORRUPT_BLOCK_DELETE_IMMEDIATELY_ENABLED =
+ "dfs.namenode.corrupt.block.delete.immediately.enabled";
+ public static final boolean
+ DFS_NAMENODE_CORRUPT_BLOCK_DELETE_IMMEDIATELY_ENABLED_DEFAULT = true;
+
public static final String DFS_WEBHDFS_AUTHENTICATION_FILTER_KEY = "dfs.web.authentication.filter";
/* Phrased as below to avoid javac inlining as a constant, to match the behavior when
this was AuthFilter.class.getName(). Note that if you change the import for AuthFilter, you
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java
index c128666..5507ff9 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java
@@ -447,6 +447,11 @@ public class BlockManager implements BlockStatsMXBean {
* from ENTERING_MAINTENANCE to IN_MAINTENANCE.
*/
private final short minReplicationToBeInMaintenance;
+ /**
+ * Whether to delete corrupt replica immediately irrespective of other
+ * replicas available on stale storages.
+ */
+ private final boolean deleteCorruptReplicaImmediately;
/** Storages accessible from multiple DNs. */
private final ProvidedStorageMap providedStorageMap;
@@ -597,6 +602,10 @@ public class BlockManager implements BlockStatsMXBean {
DFSConfigKeys.DFS_NAMENODE_BLOCKREPORT_QUEUE_SIZE_DEFAULT);
blockReportThread = new BlockReportProcessingThread(queueSize);
+ this.deleteCorruptReplicaImmediately =
+ conf.getBoolean(DFS_NAMENODE_CORRUPT_BLOCK_DELETE_IMMEDIATELY_ENABLED,
+ DFS_NAMENODE_CORRUPT_BLOCK_DELETE_IMMEDIATELY_ENABLED_DEFAULT);
+
LOG.info("defaultReplication = {}", defaultReplication);
LOG.info("maxReplication = {}", maxReplication);
LOG.info("minReplication = {}", minReplication);
@@ -1851,7 +1860,7 @@ public class BlockManager implements BlockStatsMXBean {
}
// Check how many copies we have of the block
- if (nr.replicasOnStaleNodes() > 0) {
+ if (nr.replicasOnStaleNodes() > 0 && !deleteCorruptReplicaImmediately) {
blockLog.debug("BLOCK* invalidateBlocks: postponing " +
"invalidation of {} on {} because {} replica(s) are located on " +
"nodes with potentially out-of-date block reports", b, dn,
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/resources/hdfs-default.xml b/hadoop-hdfs-project/hadoop-hdfs/src/main/resources/hdfs-default.xml
index ce2a70e..9935663 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/resources/hdfs-default.xml
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/resources/hdfs-default.xml
@@ -5247,6 +5247,15 @@
</property>
<property>
+ <name>dfs.namenode.corrupt.block.delete.immediately.enabled</name>
+ <value>true</value>
+ <description>
+ Whether the corrupt replicas should be deleted immediately, irrespective
+ of other replicas on stale storages..
+ </description>
+ </property>
+
+ <property>
<name>dfs.journalnode.edits.dir.perm</name>
<value>700</value>
<description>
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestBlockManager.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestBlockManager.java
index 6adbb95..db774bd 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestBlockManager.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestBlockManager.java
@@ -23,6 +23,7 @@ import com.google.common.collect.LinkedListMultimap;
import com.google.common.collect.Lists;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.hdfs.client.HdfsClientConfigKeys;
import org.apache.hadoop.hdfs.protocol.SystemErasureCodingPolicies;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.CreateFlag;
@@ -50,6 +51,7 @@ import org.apache.hadoop.hdfs.server.blockmanagement.DatanodeDescriptor.BlockTar
import org.apache.hadoop.hdfs.server.common.blockaliasmap.BlockAliasMap;
import org.apache.hadoop.hdfs.server.common.HdfsServerConstants;
import org.apache.hadoop.hdfs.server.datanode.DataNode;
+import org.apache.hadoop.hdfs.server.datanode.DataNodeTestUtils;
import org.apache.hadoop.hdfs.server.datanode.FinalizedReplica;
import org.apache.hadoop.hdfs.server.datanode.fsdataset.impl.TestProvidedImpl;
import org.apache.hadoop.hdfs.server.datanode.InternalDataNodeTestUtils;
@@ -502,7 +504,41 @@ public class TestBlockManager {
}
}
}
-
+
+ @Test(timeout = 60000)
+ public void testDeleteCorruptReplicaWithStatleStorages() throws Exception {
+ Configuration conf = new HdfsConfiguration();
+ conf.setInt(HdfsClientConfigKeys.BlockWrite.ReplaceDatanodeOnFailure.
+ MIN_REPLICATION, 2);
+ Path file = new Path("/test-file");
+ MiniDFSCluster cluster =
+ new MiniDFSCluster.Builder(conf).numDataNodes(3).build();
+ try {
+ cluster.waitActive();
+ BlockManager blockManager = cluster.getNamesystem().getBlockManager();
+ blockManager.getDatanodeManager().markAllDatanodesStale();
+ FileSystem fs = cluster.getFileSystem();
+ FSDataOutputStream out = fs.create(file);
+ for (int i = 0; i < 1024 * 1024 * 1; i++) {
+ out.write(i);
+ }
+ out.hflush();
+ MiniDFSCluster.DataNodeProperties datanode = cluster.stopDataNode(0);
+ for (int i = 0; i < 1024 * 1024 * 1; i++) {
+ out.write(i);
+ }
+ out.close();
+ cluster.restartDataNode(datanode);
+ cluster.triggerBlockReports();
+ DataNodeTestUtils.triggerBlockReport(datanode.getDatanode());
+ assertEquals(0, blockManager.getCorruptBlocks());
+ } finally {
+ if (cluster != null) {
+ cluster.shutdown();
+ }
+ }
+ }
+
/**
* Tell the block manager that replication is completed for the given
* pipeline.
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestCorruptionWithFailover.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestCorruptionWithFailover.java
index f5899c0..06e4f60 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestCorruptionWithFailover.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestCorruptionWithFailover.java
@@ -27,6 +27,8 @@ import org.apache.hadoop.hdfs.client.HdfsClientConfigKeys;
import org.apache.hadoop.test.GenericTestUtils;
import org.junit.Test;
+import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_CORRUPT_BLOCK_DELETE_IMMEDIATELY_ENABLED;
+
/**
* Tests corruption of replicas in case of failover.
*/
@@ -35,6 +37,8 @@ public class TestCorruptionWithFailover {
@Test
public void testCorruptReplicaAfterFailover() throws Exception {
Configuration conf = new Configuration();
+ conf.setBoolean(DFS_NAMENODE_CORRUPT_BLOCK_DELETE_IMMEDIATELY_ENABLED,
+ false);
// Enable data to be written, to less replicas in case of pipeline failure.
conf.setInt(HdfsClientConfigKeys.BlockWrite.ReplaceDatanodeOnFailure.
MIN_REPLICATION, 2);
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFsck.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFsck.java
index 9a03daf..fa654e9 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFsck.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFsck.java
@@ -18,6 +18,7 @@
package org.apache.hadoop.hdfs.server.namenode;
+import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_CORRUPT_BLOCK_DELETE_IMMEDIATELY_ENABLED;
import static org.apache.hadoop.hdfs.MiniDFSCluster.HDFS_MINIDFS_BASEDIR;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertFalse;
@@ -187,6 +188,8 @@ public class TestFsck {
@Before
public void setUp() throws Exception {
conf = new Configuration();
+ conf.setBoolean(DFS_NAMENODE_CORRUPT_BLOCK_DELETE_IMMEDIATELY_ENABLED,
+ false);
}
@After
---------------------------------------------------------------------
To unsubscribe, e-mail: common-commits-unsubscribe@hadoop.apache.org
For additional commands, e-mail: common-commits-help@hadoop.apache.org