You are viewing a plain text version of this content. The canonical link for it is here.
Posted to common-commits@hadoop.apache.org by so...@apache.org on 2020/06/05 10:33:33 UTC
[hadoop] branch branch-3.3 updated: HDFS-15386.
ReplicaNotFoundException keeps happening in DN after removing multiple DN's
data directories (#2052)
This is an automated email from the ASF dual-hosted git repository.
sodonnell pushed a commit to branch branch-3.3
in repository https://gitbox.apache.org/repos/asf/hadoop.git
The following commit(s) were added to refs/heads/branch-3.3 by this push:
new ec8f371 HDFS-15386. ReplicaNotFoundException keeps happening in DN after removing multiple DN's data directories (#2052)
ec8f371 is described below
commit ec8f3714e0eb3e088c0fd9bef35b7630faa2ca7a
Author: Toshihiro Suzuki <br...@gmail.com>
AuthorDate: Fri Jun 5 19:11:49 2020 +0900
HDFS-15386. ReplicaNotFoundException keeps happening in DN after removing multiple DN's data directories (#2052)
Contributed by Toshihiro Suzuki.
(cherry picked from commit 545a0a147c5256c44911ba57b4898e01d786d836)
---
.../datanode/fsdataset/impl/FsDatasetImpl.java | 5 +-
.../datanode/fsdataset/impl/TestFsDatasetImpl.java | 101 +++++++++++++++++++--
2 files changed, 95 insertions(+), 11 deletions(-)
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/FsDatasetImpl.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/FsDatasetImpl.java
index 39152fc..5833cb1 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/FsDatasetImpl.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/FsDatasetImpl.java
@@ -575,7 +575,8 @@ class FsDatasetImpl implements FsDatasetSpi<FsVolumeImpl> {
// Unlike updating the volumeMap in addVolume(), this operation does
// not scan disks.
for (String bpid : volumeMap.getBlockPoolList()) {
- List<ReplicaInfo> blocks = new ArrayList<>();
+ List<ReplicaInfo> blocks = blkToInvalidate
+ .computeIfAbsent(bpid, (k) -> new ArrayList<>());
for (Iterator<ReplicaInfo> it =
volumeMap.replicas(bpid).iterator(); it.hasNext();) {
ReplicaInfo block = it.next();
@@ -588,9 +589,7 @@ class FsDatasetImpl implements FsDatasetSpi<FsVolumeImpl> {
it.remove();
}
}
- blkToInvalidate.put(bpid, blocks);
}
-
storageToRemove.add(sd.getStorageUuid());
storageLocationsToRemove.remove(sdLocation);
}
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/TestFsDatasetImpl.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/TestFsDatasetImpl.java
index 29e533c..273feb0 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/TestFsDatasetImpl.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/TestFsDatasetImpl.java
@@ -20,7 +20,6 @@ package org.apache.hadoop.hdfs.server.datanode.fsdataset.impl;
import com.google.common.base.Supplier;
import com.google.common.collect.Lists;
-import java.io.FileInputStream;
import java.io.OutputStream;
import java.nio.file.Files;
import java.nio.file.Paths;
@@ -106,6 +105,8 @@ import static org.mockito.Mockito.doReturn;
import static org.mockito.Mockito.doThrow;
import static org.mockito.Mockito.mock;
import static org.mockito.Mockito.spy;
+import static org.mockito.Mockito.times;
+import static org.mockito.Mockito.verify;
import static org.mockito.Mockito.when;
import org.slf4j.Logger;
@@ -268,16 +269,24 @@ public class TestFsDatasetImpl {
}
@Test(timeout = 30000)
- public void testRemoveVolumes() throws IOException {
+ public void testRemoveOneVolume() throws IOException {
// Feed FsDataset with block metadata.
- final int NUM_BLOCKS = 100;
- for (int i = 0; i < NUM_BLOCKS; i++) {
- String bpid = BLOCK_POOL_IDS[NUM_BLOCKS % BLOCK_POOL_IDS.length];
+ final int numBlocks = 100;
+ for (int i = 0; i < numBlocks; i++) {
+ String bpid = BLOCK_POOL_IDS[numBlocks % BLOCK_POOL_IDS.length];
ExtendedBlock eb = new ExtendedBlock(bpid, i);
- try (ReplicaHandler replica =
- dataset.createRbw(StorageType.DEFAULT, null, eb, false)) {
+ ReplicaHandler replica = null;
+ try {
+ replica = dataset.createRbw(StorageType.DEFAULT, null, eb,
+ false);
+ } finally {
+ if (replica != null) {
+ replica.close();
+ }
}
}
+
+ // Remove one volume
final String[] dataDirs =
conf.get(DFSConfigKeys.DFS_DATANODE_DATA_DIR_KEY).split(",");
final String volumePathToRemove = dataDirs[0];
@@ -300,6 +309,11 @@ public class TestFsDatasetImpl {
assertEquals("The volume has been removed from the storageMap.",
expectedNumVolumes, dataset.storageMap.size());
+ // DataNode.notifyNamenodeDeletedBlock() should be called 50 times
+ // as we deleted one volume that has 50 blocks
+ verify(datanode, times(50))
+ .notifyNamenodeDeletedBlock(any(), any());
+
try {
dataset.asyncDiskService.execute(volumeToRemove,
new Runnable() {
@@ -317,10 +331,81 @@ public class TestFsDatasetImpl {
totalNumReplicas += dataset.volumeMap.size(bpid);
}
assertEquals("The replica infos on this volume has been removed from the "
- + "volumeMap.", NUM_BLOCKS / NUM_INIT_VOLUMES,
+ + "volumeMap.", numBlocks / NUM_INIT_VOLUMES,
totalNumReplicas);
}
+ @Test(timeout = 30000)
+ public void testRemoveTwoVolumes() throws IOException {
+ // Feed FsDataset with block metadata.
+ final int numBlocks = 100;
+ for (int i = 0; i < numBlocks; i++) {
+ String bpid = BLOCK_POOL_IDS[numBlocks % BLOCK_POOL_IDS.length];
+ ExtendedBlock eb = new ExtendedBlock(bpid, i);
+ ReplicaHandler replica = null;
+ try {
+ replica = dataset.createRbw(StorageType.DEFAULT, null, eb,
+ false);
+ } finally {
+ if (replica != null) {
+ replica.close();
+ }
+ }
+ }
+
+ // Remove two volumes
+ final String[] dataDirs =
+ conf.get(DFSConfigKeys.DFS_DATANODE_DATA_DIR_KEY).split(",");
+ Set<StorageLocation> volumesToRemove = new HashSet<>();
+ volumesToRemove.add(StorageLocation.parse(dataDirs[0]));
+ volumesToRemove.add(StorageLocation.parse(dataDirs[1]));
+
+ FsVolumeReferences volReferences = dataset.getFsVolumeReferences();
+ Set<FsVolumeImpl> volumes = new HashSet<>();
+ for (FsVolumeSpi vol: volReferences) {
+ for (StorageLocation volume : volumesToRemove) {
+ if (vol.getStorageLocation().equals(volume)) {
+ volumes.add((FsVolumeImpl) vol);
+ }
+ }
+ }
+ assertEquals(2, volumes.size());
+ volReferences.close();
+
+ dataset.removeVolumes(volumesToRemove, true);
+ int expectedNumVolumes = dataDirs.length - 2;
+ assertEquals("The volume has been removed from the volumeList.",
+ expectedNumVolumes, getNumVolumes());
+ assertEquals("The volume has been removed from the storageMap.",
+ expectedNumVolumes, dataset.storageMap.size());
+
+ // DataNode.notifyNamenodeDeletedBlock() should be called 100 times
+ // as we deleted 2 volumes that have 100 blocks totally
+ verify(datanode, times(100))
+ .notifyNamenodeDeletedBlock(any(), any());
+
+ for (FsVolumeImpl volume : volumes) {
+ try {
+ dataset.asyncDiskService.execute(volume,
+ new Runnable() {
+ @Override
+ public void run() {}
+ });
+ fail("Expect RuntimeException: the volume has been removed from the "
+ + "AsyncDiskService.");
+ } catch (RuntimeException e) {
+ GenericTestUtils.assertExceptionContains("Cannot find volume", e);
+ }
+ }
+
+ int totalNumReplicas = 0;
+ for (String bpid : dataset.volumeMap.getBlockPoolList()) {
+ totalNumReplicas += dataset.volumeMap.size(bpid);
+ }
+ assertEquals("The replica infos on this volume has been removed from the "
+ + "volumeMap.", 0, totalNumReplicas);
+ }
+
@Test(timeout = 5000)
public void testRemoveNewlyAddedVolume() throws IOException {
final int numExistingVolumes = getNumVolumes();
---------------------------------------------------------------------
To unsubscribe, e-mail: common-commits-unsubscribe@hadoop.apache.org
For additional commands, e-mail: common-commits-help@hadoop.apache.org