You are viewing a plain text version of this content. The canonical link for it is here.
Posted to common-commits@hadoop.apache.org by wa...@apache.org on 2014/11/19 07:19:51 UTC
[1/2] hadoop git commit: HDFS-7225. Remove stale block invalidation
work when DN re-registers with different UUID. (Zhe Zhang and Andrew Wang)
Repository: hadoop
Updated Branches:
refs/heads/trunk 79301e80d -> 5bd048e83
HDFS-7225. Remove stale block invalidation work when DN re-registers with different UUID. (Zhe Zhang and Andrew Wang)
Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo
Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/406c09ad
Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/406c09ad
Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/406c09ad
Branch: refs/heads/trunk
Commit: 406c09ad1150c4971c2b7675fcb0263d40517fbf
Parents: 79301e8
Author: Andrew Wang <wa...@apache.org>
Authored: Tue Nov 18 22:14:04 2014 -0800
Committer: Andrew Wang <wa...@apache.org>
Committed: Tue Nov 18 22:14:04 2014 -0800
----------------------------------------------------------------------
hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt | 3 +
.../server/blockmanagement/BlockManager.java | 21 ++-
.../server/blockmanagement/DatanodeManager.java | 2 +
.../TestComputeInvalidateWork.java | 167 +++++++++++++++----
4 files changed, 156 insertions(+), 37 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hadoop/blob/406c09ad/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
----------------------------------------------------------------------
diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
index f34ca74..f444721 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
+++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
@@ -453,6 +453,9 @@ Release 2.7.0 - UNRELEASED
HDFS-7406. SimpleHttpProxyHandler puts incorrect "Connection: Close"
header. (wheat9)
+ HDFS-7225. Remove stale block invalidation work when DN re-registers with
+ different UUID. (Zhe Zhang and Andrew Wang)
+
Release 2.6.0 - 2014-11-18
INCOMPATIBLE CHANGES
http://git-wip-us.apache.org/repos/asf/hadoop/blob/406c09ad/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java
----------------------------------------------------------------------
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java
index b8dcd88..574abcc 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java
@@ -1112,6 +1112,18 @@ public class BlockManager {
}
/**
+ * Remove all block invalidation tasks under this datanode UUID;
+ * used when a datanode registers with a new UUID and the old one
+ * is wiped.
+ */
+ void removeFromInvalidates(final DatanodeInfo datanode) {
+ if (!namesystem.isPopulatingReplQueues()) {
+ return;
+ }
+ invalidateBlocks.remove(datanode);
+ }
+
+ /**
* Mark the block belonging to datanode as corrupt
* @param blk Block to be marked as corrupt
* @param dn Datanode which holds the corrupt replica
@@ -3382,7 +3394,14 @@ public class BlockManager {
return 0;
}
try {
- toInvalidate = invalidateBlocks.invalidateWork(datanodeManager.getDatanode(dn));
+ DatanodeDescriptor dnDescriptor = datanodeManager.getDatanode(dn);
+ if (dnDescriptor == null) {
+ LOG.warn("DataNode " + dn + " cannot be found with UUID " +
+ dn.getDatanodeUuid() + ", removing block invalidation work.");
+ invalidateBlocks.remove(dn);
+ return 0;
+ }
+ toInvalidate = invalidateBlocks.invalidateWork(dnDescriptor);
if (toInvalidate == null) {
return 0;
http://git-wip-us.apache.org/repos/asf/hadoop/blob/406c09ad/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeManager.java
----------------------------------------------------------------------
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeManager.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeManager.java
index d19aad7..0780ba3 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeManager.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeManager.java
@@ -600,6 +600,8 @@ public class DatanodeManager {
synchronized (datanodeMap) {
host2DatanodeMap.remove(datanodeMap.remove(key));
}
+ // Also remove all block invalidation tasks under this node
+ blockManager.removeFromInvalidates(new DatanodeInfo(node));
if (LOG.isDebugEnabled()) {
LOG.debug(getClass().getSimpleName() + ".wipeDatanode("
+ node + "): storage " + key
http://git-wip-us.apache.org/repos/asf/hadoop/blob/406c09ad/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestComputeInvalidateWork.java
----------------------------------------------------------------------
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestComputeInvalidateWork.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestComputeInvalidateWork.java
index d0edd48..fecca4e 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestComputeInvalidateWork.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestComputeInvalidateWork.java
@@ -17,66 +17,161 @@
*/
package org.apache.hadoop.hdfs.server.blockmanagement;
+import java.util.UUID;
+
import static org.junit.Assert.assertEquals;
import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hdfs.DFSTestUtil;
+import org.apache.hadoop.hdfs.DistributedFileSystem;
import org.apache.hadoop.hdfs.HdfsConfiguration;
import org.apache.hadoop.hdfs.MiniDFSCluster;
import org.apache.hadoop.hdfs.protocol.Block;
+import org.apache.hadoop.hdfs.protocol.DatanodeID;
+import org.apache.hadoop.hdfs.security.token.block.ExportedBlockKeys;
import org.apache.hadoop.hdfs.server.common.GenerationStamp;
+import org.apache.hadoop.hdfs.server.common.HdfsServerConstants;
+import org.apache.hadoop.hdfs.server.common.StorageInfo;
+import org.apache.hadoop.hdfs.server.datanode.DataNode;
import org.apache.hadoop.hdfs.server.namenode.FSNamesystem;
+import org.apache.hadoop.hdfs.server.protocol.DatanodeRegistration;
+import org.apache.hadoop.util.VersionInfo;
+import org.junit.After;
+import org.junit.Assert;
+import org.junit.Before;
import org.junit.Test;
+import org.mockito.internal.util.reflection.Whitebox;
/**
* Test if FSNamesystem handles heartbeat right
*/
public class TestComputeInvalidateWork {
+
+ private Configuration conf;
+ private final int NUM_OF_DATANODES = 3;
+ private MiniDFSCluster cluster;
+ private FSNamesystem namesystem;
+ private BlockManager bm;
+ private DatanodeDescriptor[] nodes;
+
+ @Before
+ public void setup() throws Exception {
+ conf = new HdfsConfiguration();
+ cluster = new MiniDFSCluster.Builder(conf).numDataNodes(NUM_OF_DATANODES)
+ .build();
+ cluster.waitActive();
+ namesystem = cluster.getNamesystem();
+ bm = namesystem.getBlockManager();
+ nodes = bm.getDatanodeManager().getHeartbeatManager().getDatanodes();
+ assertEquals(nodes.length, NUM_OF_DATANODES);
+ }
+
+ @After
+ public void teardown() throws Exception {
+ if (cluster != null) {
+ cluster.shutdown();
+ }
+ }
+
/**
- * Test if {@link FSNamesystem#computeInvalidateWork(int)}
+ * Test if {@link BlockManager#computeInvalidateWork(int)}
* can schedule invalidate work correctly
*/
- @Test
+ @Test(timeout=120000)
public void testCompInvalidate() throws Exception {
- final Configuration conf = new HdfsConfiguration();
- final int NUM_OF_DATANODES = 3;
- final MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).numDataNodes(NUM_OF_DATANODES).build();
+ final int blockInvalidateLimit = bm.getDatanodeManager()
+ .blockInvalidateLimit;
+ namesystem.writeLock();
try {
- cluster.waitActive();
- final FSNamesystem namesystem = cluster.getNamesystem();
- final BlockManager bm = namesystem.getBlockManager();
- final int blockInvalidateLimit = bm.getDatanodeManager().blockInvalidateLimit;
- final DatanodeDescriptor[] nodes = bm.getDatanodeManager(
- ).getHeartbeatManager().getDatanodes();
- assertEquals(nodes.length, NUM_OF_DATANODES);
+ for (int i=0; i<nodes.length; i++) {
+ for(int j=0; j<3*blockInvalidateLimit+1; j++) {
+ Block block = new Block(i*(blockInvalidateLimit+1)+j, 0,
+ GenerationStamp.LAST_RESERVED_STAMP);
+ bm.addToInvalidates(block, nodes[i]);
+ }
+ }
+ assertEquals(blockInvalidateLimit*NUM_OF_DATANODES,
+ bm.computeInvalidateWork(NUM_OF_DATANODES+1));
+ assertEquals(blockInvalidateLimit*NUM_OF_DATANODES,
+ bm.computeInvalidateWork(NUM_OF_DATANODES));
+ assertEquals(blockInvalidateLimit*(NUM_OF_DATANODES-1),
+ bm.computeInvalidateWork(NUM_OF_DATANODES-1));
+ int workCount = bm.computeInvalidateWork(1);
+ if (workCount == 1) {
+ assertEquals(blockInvalidateLimit+1, bm.computeInvalidateWork(2));
+ } else {
+ assertEquals(workCount, blockInvalidateLimit);
+ assertEquals(2, bm.computeInvalidateWork(2));
+ }
+ } finally {
+ namesystem.writeUnlock();
+ }
+ }
+
+ /**
+ * Reformatted DataNodes will replace the original UUID in the
+ * {@link DatanodeManager#datanodeMap}. This tests if block
+ * invalidation work on the original DataNode can be skipped.
+ */
+ @Test(timeout=120000)
+ public void testDatanodeReformat() throws Exception {
+ namesystem.writeLock();
+ try {
+ Block block = new Block(0, 0, GenerationStamp.LAST_RESERVED_STAMP);
+ bm.addToInvalidates(block, nodes[0]);
+ // Change the datanode UUID to emulate a reformation
+ nodes[0].setDatanodeUuidForTesting("fortesting");
+ // Since UUID has changed, the invalidation work should be skipped
+ assertEquals(0, bm.computeInvalidateWork(1));
+ assertEquals(0, bm.getPendingDeletionBlocksCount());
+ } finally {
+ namesystem.writeUnlock();
+ }
+ }
+
+ @Test(timeout=12000)
+ public void testDatanodeReRegistration() throws Exception {
+ // Create a test file
+ final DistributedFileSystem dfs = cluster.getFileSystem();
+ final Path path = new Path("/testRR");
+ // Create a file and shutdown the DNs, which populates InvalidateBlocks
+ DFSTestUtil.createFile(dfs, path, dfs.getDefaultBlockSize(),
+ (short) NUM_OF_DATANODES, 0xED0ED0);
+ for (DataNode dn : cluster.getDataNodes()) {
+ dn.shutdown();
+ }
+ dfs.delete(path, false);
+ namesystem.writeLock();
+ InvalidateBlocks invalidateBlocks;
+ int expected = NUM_OF_DATANODES;
+ try {
+ invalidateBlocks = (InvalidateBlocks) Whitebox
+ .getInternalState(cluster.getNamesystem().getBlockManager(),
+ "invalidateBlocks");
+ assertEquals("Expected invalidate blocks to be the number of DNs",
+ (long) expected, invalidateBlocks.numBlocks());
+ } finally {
+ namesystem.writeUnlock();
+ }
+ // Re-register each DN and see that it wipes the invalidation work
+ for (DataNode dn : cluster.getDataNodes()) {
+ DatanodeID did = dn.getDatanodeId();
+ did.setDatanodeUuidForTesting(UUID.randomUUID().toString());
+ DatanodeRegistration reg = new DatanodeRegistration(did,
+ new StorageInfo(HdfsServerConstants.NodeType.DATA_NODE),
+ new ExportedBlockKeys(),
+ VersionInfo.getVersion());
namesystem.writeLock();
try {
- for (int i=0; i<nodes.length; i++) {
- for(int j=0; j<3*blockInvalidateLimit+1; j++) {
- Block block = new Block(i*(blockInvalidateLimit+1)+j, 0,
- GenerationStamp.LAST_RESERVED_STAMP);
- bm.addToInvalidates(block, nodes[i]);
- }
- }
-
- assertEquals(blockInvalidateLimit*NUM_OF_DATANODES,
- bm.computeInvalidateWork(NUM_OF_DATANODES+1));
- assertEquals(blockInvalidateLimit*NUM_OF_DATANODES,
- bm.computeInvalidateWork(NUM_OF_DATANODES));
- assertEquals(blockInvalidateLimit*(NUM_OF_DATANODES-1),
- bm.computeInvalidateWork(NUM_OF_DATANODES-1));
- int workCount = bm.computeInvalidateWork(1);
- if (workCount == 1) {
- assertEquals(blockInvalidateLimit+1, bm.computeInvalidateWork(2));
- } else {
- assertEquals(workCount, blockInvalidateLimit);
- assertEquals(2, bm.computeInvalidateWork(2));
- }
+ bm.getDatanodeManager().registerDatanode(reg);
+ expected--;
+ assertEquals("Expected number of invalidate blocks to decrease",
+ (long) expected, invalidateBlocks.numBlocks());
} finally {
- namesystem.writeUnlock();
+ namesystem.writeUnlock();
}
- } finally {
- cluster.shutdown();
}
}
}
[2/2] hadoop git commit: HDFS-7373. Allow decommissioning of dead
DataNodes. Contributed by Zhe Zhang.
Posted by wa...@apache.org.
HDFS-7373. Allow decommissioning of dead DataNodes. Contributed by Zhe Zhang.
Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo
Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/5bd048e8
Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/5bd048e8
Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/5bd048e8
Branch: refs/heads/trunk
Commit: 5bd048e8378034b496bacc73b470a25d855aceb1
Parents: 406c09a
Author: Andrew Wang <wa...@apache.org>
Authored: Tue Nov 18 22:16:58 2014 -0800
Committer: Andrew Wang <wa...@apache.org>
Committed: Tue Nov 18 22:16:58 2014 -0800
----------------------------------------------------------------------
hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt | 2 ++
.../server/blockmanagement/DatanodeManager.java | 28 +++++++++--------
.../org/apache/hadoop/hdfs/DFSTestUtil.java | 20 +++++++++++-
.../hdfs/server/namenode/TestDeadDatanode.java | 32 ++------------------
.../namenode/TestDecommissioningStatus.java | 32 ++++++++++++++++++++
5 files changed, 72 insertions(+), 42 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hadoop/blob/5bd048e8/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
----------------------------------------------------------------------
diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
index f444721..ca2b9f6 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
+++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
@@ -456,6 +456,8 @@ Release 2.7.0 - UNRELEASED
HDFS-7225. Remove stale block invalidation work when DN re-registers with
different UUID. (Zhe Zhang and Andrew Wang)
+ HDFS-7374. Allow decommissioning of dead DataNodes. (Zhe Zhang)
+
Release 2.6.0 - 2014-11-18
INCOMPATIBLE CHANGES
http://git-wip-us.apache.org/repos/asf/hadoop/blob/5bd048e8/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeManager.java
----------------------------------------------------------------------
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeManager.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeManager.java
index 0780ba3..356a4a3 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeManager.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeManager.java
@@ -845,16 +845,21 @@ public class DatanodeManager {
@InterfaceAudience.Private
@VisibleForTesting
public void startDecommission(DatanodeDescriptor node) {
- if (!node.isDecommissionInProgress() && !node.isDecommissioned()) {
- for (DatanodeStorageInfo storage : node.getStorageInfos()) {
- LOG.info("Start Decommissioning " + node + " " + storage
- + " with " + storage.numBlocks() + " blocks");
+ if (!node.isDecommissionInProgress()) {
+ if (!node.isAlive) {
+ LOG.info("Dead node " + node + " is decommissioned immediately.");
+ node.setDecommissioned();
+ } else if (!node.isDecommissioned()) {
+ for (DatanodeStorageInfo storage : node.getStorageInfos()) {
+ LOG.info("Start Decommissioning " + node + " " + storage
+ + " with " + storage.numBlocks() + " blocks");
+ }
+ heartbeatManager.startDecommission(node);
+ node.decommissioningStatus.setStartTime(now());
+
+ // all the blocks that reside on this node have to be replicated.
+ checkDecommissionState(node);
}
- heartbeatManager.startDecommission(node);
- node.decommissioningStatus.setStartTime(now());
-
- // all the blocks that reside on this node have to be replicated.
- checkDecommissionState(node);
}
}
@@ -1009,14 +1014,13 @@ public class DatanodeManager {
// register new datanode
addDatanode(nodeDescr);
- checkDecommissioning(nodeDescr);
-
// also treat the registration message as a heartbeat
// no need to update its timestamp
// because its is done when the descriptor is created
heartbeatManager.addDatanode(nodeDescr);
- success = true;
incrementVersionCount(nodeReg.getSoftwareVersion());
+ checkDecommissioning(nodeDescr);
+ success = true;
} finally {
if (!success) {
removeDatanode(nodeDescr);
http://git-wip-us.apache.org/repos/asf/hadoop/blob/5bd048e8/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/DFSTestUtil.java
----------------------------------------------------------------------
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/DFSTestUtil.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/DFSTestUtil.java
index cea2e82..4e85311 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/DFSTestUtil.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/DFSTestUtil.java
@@ -25,7 +25,6 @@ import com.google.common.base.Supplier;
import com.google.common.collect.Lists;
import com.google.common.collect.Maps;
-import com.google.common.collect.Sets;
import org.apache.commons.io.FileUtils;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
@@ -1612,4 +1611,23 @@ public class DFSTestUtil {
LayoutVersion.updateMap(DataNodeLayoutVersion.FEATURES,
new LayoutVersion.LayoutFeature[] { feature });
}
+
+ /**
+ * Wait for datanode to reach alive or dead state for waitTime given in
+ * milliseconds.
+ */
+ public static void waitForDatanodeState(
+ final MiniDFSCluster cluster, final String nodeID,
+ final boolean alive, int waitTime)
+ throws TimeoutException, InterruptedException {
+ GenericTestUtils.waitFor(new Supplier<Boolean>() {
+ @Override
+ public Boolean get() {
+ FSNamesystem namesystem = cluster.getNamesystem();
+ final DatanodeDescriptor dd = BlockManagerTestUtil.getDatanode(
+ namesystem, nodeID);
+ return (dd.isAlive == alive);
+ }
+ }, 100, waitTime);
+ }
}
http://git-wip-us.apache.org/repos/asf/hadoop/blob/5bd048e8/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestDeadDatanode.java
----------------------------------------------------------------------
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestDeadDatanode.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestDeadDatanode.java
index 6c4bb16..71bf124 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestDeadDatanode.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestDeadDatanode.java
@@ -21,17 +21,15 @@ import static org.junit.Assert.assertEquals;
import static org.junit.Assert.fail;
import java.io.IOException;
-import java.util.concurrent.TimeoutException;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hdfs.DFSConfigKeys;
+import org.apache.hadoop.hdfs.DFSTestUtil;
import org.apache.hadoop.hdfs.HdfsConfiguration;
import org.apache.hadoop.hdfs.MiniDFSCluster;
import org.apache.hadoop.hdfs.protocol.Block;
-import org.apache.hadoop.hdfs.server.blockmanagement.BlockManagerTestUtil;
-import org.apache.hadoop.hdfs.server.blockmanagement.DatanodeDescriptor;
import org.apache.hadoop.hdfs.server.datanode.DataNode;
import org.apache.hadoop.hdfs.server.datanode.DataNodeTestUtils;
import org.apache.hadoop.hdfs.server.protocol.DatanodeCommand;
@@ -43,7 +41,6 @@ import org.apache.hadoop.hdfs.server.protocol.RegisterCommand;
import org.apache.hadoop.hdfs.server.protocol.StorageBlockReport;
import org.apache.hadoop.hdfs.server.protocol.StorageReceivedDeletedBlocks;
import org.apache.hadoop.hdfs.server.protocol.StorageReport;
-import org.apache.hadoop.util.Time;
import org.junit.After;
import org.junit.Test;
@@ -61,29 +58,6 @@ public class TestDeadDatanode {
}
/**
- * wait for datanode to reach alive or dead state for waitTime given in
- * milliseconds.
- */
- private void waitForDatanodeState(String nodeID, boolean alive, int waitTime)
- throws TimeoutException, InterruptedException {
- long stopTime = Time.now() + waitTime;
- FSNamesystem namesystem = cluster.getNamesystem();
- String state = alive ? "alive" : "dead";
- while (Time.now() < stopTime) {
- final DatanodeDescriptor dd = BlockManagerTestUtil.getDatanode(
- namesystem, nodeID);
- if (dd.isAlive == alive) {
- LOG.info("datanode " + nodeID + " is " + state);
- return;
- }
- LOG.info("Waiting for datanode " + nodeID + " to become " + state);
- Thread.sleep(1000);
- }
- throw new TimeoutException("Timedout waiting for datanode reach state "
- + state);
- }
-
- /**
* Test to ensure namenode rejects request from dead datanode
* - Start a cluster
* - Shutdown the datanode and wait for it to be marked dead at the namenode
@@ -104,11 +78,11 @@ public class TestDeadDatanode {
DatanodeRegistration reg =
DataNodeTestUtils.getDNRegistrationForBP(cluster.getDataNodes().get(0), poolId);
- waitForDatanodeState(reg.getDatanodeUuid(), true, 20000);
+ DFSTestUtil.waitForDatanodeState(cluster, reg.getDatanodeUuid(), true, 20000);
// Shutdown and wait for datanode to be marked dead
dn.shutdown();
- waitForDatanodeState(reg.getDatanodeUuid(), false, 20000);
+ DFSTestUtil.waitForDatanodeState(cluster, reg.getDatanodeUuid(), false, 20000);
DatanodeProtocol dnp = cluster.getNameNodeRpc();
http://git-wip-us.apache.org/repos/asf/hadoop/blob/5bd048e8/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestDecommissioningStatus.java
----------------------------------------------------------------------
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestDecommissioningStatus.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestDecommissioningStatus.java
index 2ee251b..4b8556b 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestDecommissioningStatus.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestDecommissioningStatus.java
@@ -28,6 +28,7 @@ import java.util.Arrays;
import java.util.Iterator;
import java.util.List;
import java.util.Random;
+import java.util.concurrent.TimeoutException;
import org.apache.commons.io.output.ByteArrayOutputStream;
import org.apache.hadoop.conf.Configuration;
@@ -360,4 +361,35 @@ public class TestDecommissioningStatus {
dm.refreshNodes(conf);
cleanupFile(fileSys, f);
}
+
+ /**
+ * Verify the support for decommissioning a datanode that is already dead.
+ * Under this scenario the datanode should immediately be marked as
+ * DECOMMISSIONED
+ */
+ @Test(timeout=120000)
+ public void testDecommissionDeadDN()
+ throws IOException, InterruptedException, TimeoutException {
+ DatanodeID dnID = cluster.getDataNodes().get(0).getDatanodeId();
+ String dnName = dnID.getXferAddr();
+ DataNodeProperties stoppedDN = cluster.stopDataNode(0);
+ DFSTestUtil.waitForDatanodeState(cluster, dnID.getDatanodeUuid(),
+ false, 30000);
+ FSNamesystem fsn = cluster.getNamesystem();
+ final DatanodeManager dm = fsn.getBlockManager().getDatanodeManager();
+ DatanodeDescriptor dnDescriptor = dm.getDatanode(dnID);
+ decommissionNode(fsn, localFileSys, dnName);
+ dm.refreshNodes(conf);
+ BlockManagerTestUtil.checkDecommissionState(dm, dnDescriptor);
+ assertTrue(dnDescriptor.isDecommissioned());
+
+ // Add the node back
+ cluster.restartDataNode(stoppedDN, true);
+ cluster.waitActive();
+
+ // Call refreshNodes on FSNamesystem with empty exclude file to remove the
+ // datanode from decommissioning list and make it available again.
+ writeConfigFile(localFileSys, excludeFile, null);
+ dm.refreshNodes(conf);
+ }
}