You are viewing a plain text version of this content. The canonical link for it is here.
Posted to common-commits@hadoop.apache.org by wa...@apache.org on 2014/11/19 07:19:51 UTC

[1/2] hadoop git commit: HDFS-7225. Remove stale block invalidation work when DN re-registers with different UUID. (Zhe Zhang and Andrew Wang)

Repository: hadoop
Updated Branches:
  refs/heads/trunk 79301e80d -> 5bd048e83


HDFS-7225. Remove stale block invalidation work when DN re-registers with different UUID. (Zhe Zhang and Andrew Wang)


Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo
Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/406c09ad
Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/406c09ad
Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/406c09ad

Branch: refs/heads/trunk
Commit: 406c09ad1150c4971c2b7675fcb0263d40517fbf
Parents: 79301e8
Author: Andrew Wang <wa...@apache.org>
Authored: Tue Nov 18 22:14:04 2014 -0800
Committer: Andrew Wang <wa...@apache.org>
Committed: Tue Nov 18 22:14:04 2014 -0800

----------------------------------------------------------------------
 hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt     |   3 +
 .../server/blockmanagement/BlockManager.java    |  21 ++-
 .../server/blockmanagement/DatanodeManager.java |   2 +
 .../TestComputeInvalidateWork.java              | 167 +++++++++++++++----
 4 files changed, 156 insertions(+), 37 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hadoop/blob/406c09ad/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
----------------------------------------------------------------------
diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
index f34ca74..f444721 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
+++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
@@ -453,6 +453,9 @@ Release 2.7.0 - UNRELEASED
     HDFS-7406. SimpleHttpProxyHandler puts incorrect "Connection: Close"
     header. (wheat9)
 
+    HDFS-7225. Remove stale block invalidation work when DN re-registers with
+    different UUID. (Zhe Zhang and Andrew Wang)
+
 Release 2.6.0 - 2014-11-18
 
   INCOMPATIBLE CHANGES

http://git-wip-us.apache.org/repos/asf/hadoop/blob/406c09ad/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java
----------------------------------------------------------------------
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java
index b8dcd88..574abcc 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java
@@ -1112,6 +1112,18 @@ public class BlockManager {
   }
 
   /**
+   * Remove all block invalidation tasks under this datanode UUID;
+   * used when a datanode registers with a new UUID and the old one
+   * is wiped.
+   */
+  void removeFromInvalidates(final DatanodeInfo datanode) {
+    if (!namesystem.isPopulatingReplQueues()) {
+      return;
+    }
+    invalidateBlocks.remove(datanode);
+  }
+
+  /**
    * Mark the block belonging to datanode as corrupt
    * @param blk Block to be marked as corrupt
    * @param dn Datanode which holds the corrupt replica
@@ -3382,7 +3394,14 @@ public class BlockManager {
         return 0;
       }
       try {
-        toInvalidate = invalidateBlocks.invalidateWork(datanodeManager.getDatanode(dn));
+        DatanodeDescriptor dnDescriptor = datanodeManager.getDatanode(dn);
+        if (dnDescriptor == null) {
+          LOG.warn("DataNode " + dn + " cannot be found with UUID " +
+              dn.getDatanodeUuid() + ", removing block invalidation work.");
+          invalidateBlocks.remove(dn);
+          return 0;
+        }
+        toInvalidate = invalidateBlocks.invalidateWork(dnDescriptor);
         
         if (toInvalidate == null) {
           return 0;

http://git-wip-us.apache.org/repos/asf/hadoop/blob/406c09ad/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeManager.java
----------------------------------------------------------------------
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeManager.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeManager.java
index d19aad7..0780ba3 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeManager.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeManager.java
@@ -600,6 +600,8 @@ public class DatanodeManager {
     synchronized (datanodeMap) {
       host2DatanodeMap.remove(datanodeMap.remove(key));
     }
+    // Also remove all block invalidation tasks under this node
+    blockManager.removeFromInvalidates(new DatanodeInfo(node));
     if (LOG.isDebugEnabled()) {
       LOG.debug(getClass().getSimpleName() + ".wipeDatanode("
           + node + "): storage " + key 

http://git-wip-us.apache.org/repos/asf/hadoop/blob/406c09ad/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestComputeInvalidateWork.java
----------------------------------------------------------------------
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestComputeInvalidateWork.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestComputeInvalidateWork.java
index d0edd48..fecca4e 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestComputeInvalidateWork.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestComputeInvalidateWork.java
@@ -17,66 +17,161 @@
  */
 package org.apache.hadoop.hdfs.server.blockmanagement;
 
+import java.util.UUID;
+
 import static org.junit.Assert.assertEquals;
 
 import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hdfs.DFSTestUtil;
+import org.apache.hadoop.hdfs.DistributedFileSystem;
 import org.apache.hadoop.hdfs.HdfsConfiguration;
 import org.apache.hadoop.hdfs.MiniDFSCluster;
 import org.apache.hadoop.hdfs.protocol.Block;
+import org.apache.hadoop.hdfs.protocol.DatanodeID;
+import org.apache.hadoop.hdfs.security.token.block.ExportedBlockKeys;
 import org.apache.hadoop.hdfs.server.common.GenerationStamp;
+import org.apache.hadoop.hdfs.server.common.HdfsServerConstants;
+import org.apache.hadoop.hdfs.server.common.StorageInfo;
+import org.apache.hadoop.hdfs.server.datanode.DataNode;
 import org.apache.hadoop.hdfs.server.namenode.FSNamesystem;
+import org.apache.hadoop.hdfs.server.protocol.DatanodeRegistration;
+import org.apache.hadoop.util.VersionInfo;
+import org.junit.After;
+import org.junit.Assert;
+import org.junit.Before;
 import org.junit.Test;
+import org.mockito.internal.util.reflection.Whitebox;
 
 /**
  * Test if FSNamesystem handles heartbeat right
  */
 public class TestComputeInvalidateWork {
+
+  private Configuration conf;
+  private final int NUM_OF_DATANODES = 3;
+  private MiniDFSCluster cluster;
+  private FSNamesystem namesystem;
+  private BlockManager bm;
+  private DatanodeDescriptor[] nodes;
+
+  @Before
+  public void setup() throws Exception {
+    conf = new HdfsConfiguration();
+    cluster = new MiniDFSCluster.Builder(conf).numDataNodes(NUM_OF_DATANODES)
+        .build();
+    cluster.waitActive();
+    namesystem = cluster.getNamesystem();
+    bm = namesystem.getBlockManager();
+    nodes = bm.getDatanodeManager().getHeartbeatManager().getDatanodes();
+    assertEquals(nodes.length, NUM_OF_DATANODES);
+  }
+
+  @After
+  public void teardown() throws Exception {
+    if (cluster != null) {
+      cluster.shutdown();
+    }
+  }
+
   /**
-   * Test if {@link FSNamesystem#computeInvalidateWork(int)}
+   * Test if {@link BlockManager#computeInvalidateWork(int)}
    * can schedule invalidate work correctly 
    */
-  @Test
+  @Test(timeout=120000)
   public void testCompInvalidate() throws Exception {
-    final Configuration conf = new HdfsConfiguration();
-    final int NUM_OF_DATANODES = 3;
-    final MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).numDataNodes(NUM_OF_DATANODES).build();
+    final int blockInvalidateLimit = bm.getDatanodeManager()
+        .blockInvalidateLimit;
+    namesystem.writeLock();
     try {
-      cluster.waitActive();
-      final FSNamesystem namesystem = cluster.getNamesystem();
-      final BlockManager bm = namesystem.getBlockManager();
-      final int blockInvalidateLimit = bm.getDatanodeManager().blockInvalidateLimit;
-      final DatanodeDescriptor[] nodes = bm.getDatanodeManager(
-          ).getHeartbeatManager().getDatanodes();
-      assertEquals(nodes.length, NUM_OF_DATANODES);
+      for (int i=0; i<nodes.length; i++) {
+        for(int j=0; j<3*blockInvalidateLimit+1; j++) {
+          Block block = new Block(i*(blockInvalidateLimit+1)+j, 0,
+              GenerationStamp.LAST_RESERVED_STAMP);
+          bm.addToInvalidates(block, nodes[i]);
+        }
+      }
       
+      assertEquals(blockInvalidateLimit*NUM_OF_DATANODES,
+          bm.computeInvalidateWork(NUM_OF_DATANODES+1));
+      assertEquals(blockInvalidateLimit*NUM_OF_DATANODES,
+          bm.computeInvalidateWork(NUM_OF_DATANODES));
+      assertEquals(blockInvalidateLimit*(NUM_OF_DATANODES-1),
+          bm.computeInvalidateWork(NUM_OF_DATANODES-1));
+      int workCount = bm.computeInvalidateWork(1);
+      if (workCount == 1) {
+        assertEquals(blockInvalidateLimit+1, bm.computeInvalidateWork(2));
+      } else {
+        assertEquals(workCount, blockInvalidateLimit);
+        assertEquals(2, bm.computeInvalidateWork(2));
+      }
+    } finally {
+      namesystem.writeUnlock();
+    }
+  }
+
+  /**
+   * Reformatted DataNodes will replace the original UUID in the
+   * {@link DatanodeManager#datanodeMap}. This tests if block
+   * invalidation work on the original DataNode can be skipped.
+   */
+  @Test(timeout=120000)
+  public void testDatanodeReformat() throws Exception {
+    namesystem.writeLock();
+    try {
+      Block block = new Block(0, 0, GenerationStamp.LAST_RESERVED_STAMP);
+      bm.addToInvalidates(block, nodes[0]);
+      // Change the datanode UUID to emulate a reformation
+      nodes[0].setDatanodeUuidForTesting("fortesting");
+      // Since UUID has changed, the invalidation work should be skipped
+      assertEquals(0, bm.computeInvalidateWork(1));
+      assertEquals(0, bm.getPendingDeletionBlocksCount());
+    } finally {
+      namesystem.writeUnlock();
+    }
+  }
+
+  @Test(timeout=12000)
+  public void testDatanodeReRegistration() throws Exception {
+    // Create a test file
+    final DistributedFileSystem dfs = cluster.getFileSystem();
+    final Path path = new Path("/testRR");
+    // Create a file and shutdown the DNs, which populates InvalidateBlocks
+    DFSTestUtil.createFile(dfs, path, dfs.getDefaultBlockSize(),
+        (short) NUM_OF_DATANODES, 0xED0ED0);
+    for (DataNode dn : cluster.getDataNodes()) {
+      dn.shutdown();
+    }
+    dfs.delete(path, false);
+    namesystem.writeLock();
+    InvalidateBlocks invalidateBlocks;
+    int expected = NUM_OF_DATANODES;
+    try {
+      invalidateBlocks = (InvalidateBlocks) Whitebox
+          .getInternalState(cluster.getNamesystem().getBlockManager(),
+              "invalidateBlocks");
+      assertEquals("Expected invalidate blocks to be the number of DNs",
+          (long) expected, invalidateBlocks.numBlocks());
+    } finally {
+      namesystem.writeUnlock();
+    }
+    // Re-register each DN and see that it wipes the invalidation work
+    for (DataNode dn : cluster.getDataNodes()) {
+      DatanodeID did = dn.getDatanodeId();
+      did.setDatanodeUuidForTesting(UUID.randomUUID().toString());
+      DatanodeRegistration reg = new DatanodeRegistration(did,
+          new StorageInfo(HdfsServerConstants.NodeType.DATA_NODE),
+          new ExportedBlockKeys(),
+          VersionInfo.getVersion());
       namesystem.writeLock();
       try {
-        for (int i=0; i<nodes.length; i++) {
-          for(int j=0; j<3*blockInvalidateLimit+1; j++) {
-            Block block = new Block(i*(blockInvalidateLimit+1)+j, 0, 
-                GenerationStamp.LAST_RESERVED_STAMP);
-            bm.addToInvalidates(block, nodes[i]);
-          }
-        }
-        
-        assertEquals(blockInvalidateLimit*NUM_OF_DATANODES, 
-            bm.computeInvalidateWork(NUM_OF_DATANODES+1));
-        assertEquals(blockInvalidateLimit*NUM_OF_DATANODES, 
-            bm.computeInvalidateWork(NUM_OF_DATANODES));
-        assertEquals(blockInvalidateLimit*(NUM_OF_DATANODES-1), 
-            bm.computeInvalidateWork(NUM_OF_DATANODES-1));
-        int workCount = bm.computeInvalidateWork(1);
-        if (workCount == 1) {
-          assertEquals(blockInvalidateLimit+1, bm.computeInvalidateWork(2));
-        } else {
-          assertEquals(workCount, blockInvalidateLimit);
-          assertEquals(2, bm.computeInvalidateWork(2));
-        }
+        bm.getDatanodeManager().registerDatanode(reg);
+        expected--;
+        assertEquals("Expected number of invalidate blocks to decrease",
+            (long) expected, invalidateBlocks.numBlocks());
       } finally {
-        namesystem.writeUnlock();
+          namesystem.writeUnlock();
       }
-    } finally {
-      cluster.shutdown();
     }
   }
 }


[2/2] hadoop git commit: HDFS-7373. Allow decommissioning of dead DataNodes. Contributed by Zhe Zhang.

Posted by wa...@apache.org.
HDFS-7373. Allow decommissioning of dead DataNodes. Contributed by Zhe Zhang.


Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo
Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/5bd048e8
Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/5bd048e8
Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/5bd048e8

Branch: refs/heads/trunk
Commit: 5bd048e8378034b496bacc73b470a25d855aceb1
Parents: 406c09a
Author: Andrew Wang <wa...@apache.org>
Authored: Tue Nov 18 22:16:58 2014 -0800
Committer: Andrew Wang <wa...@apache.org>
Committed: Tue Nov 18 22:16:58 2014 -0800

----------------------------------------------------------------------
 hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt     |  2 ++
 .../server/blockmanagement/DatanodeManager.java | 28 +++++++++--------
 .../org/apache/hadoop/hdfs/DFSTestUtil.java     | 20 +++++++++++-
 .../hdfs/server/namenode/TestDeadDatanode.java  | 32 ++------------------
 .../namenode/TestDecommissioningStatus.java     | 32 ++++++++++++++++++++
 5 files changed, 72 insertions(+), 42 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hadoop/blob/5bd048e8/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
----------------------------------------------------------------------
diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
index f444721..ca2b9f6 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
+++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
@@ -456,6 +456,8 @@ Release 2.7.0 - UNRELEASED
     HDFS-7225. Remove stale block invalidation work when DN re-registers with
     different UUID. (Zhe Zhang and Andrew Wang)
 
+    HDFS-7374. Allow decommissioning of dead DataNodes. (Zhe Zhang)
+
 Release 2.6.0 - 2014-11-18
 
   INCOMPATIBLE CHANGES

http://git-wip-us.apache.org/repos/asf/hadoop/blob/5bd048e8/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeManager.java
----------------------------------------------------------------------
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeManager.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeManager.java
index 0780ba3..356a4a3 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeManager.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeManager.java
@@ -845,16 +845,21 @@ public class DatanodeManager {
   @InterfaceAudience.Private
   @VisibleForTesting
   public void startDecommission(DatanodeDescriptor node) {
-    if (!node.isDecommissionInProgress() && !node.isDecommissioned()) {
-      for (DatanodeStorageInfo storage : node.getStorageInfos()) {
-        LOG.info("Start Decommissioning " + node + " " + storage
-            + " with " + storage.numBlocks() + " blocks");
+    if (!node.isDecommissionInProgress()) {
+      if (!node.isAlive) {
+        LOG.info("Dead node " + node + " is decommissioned immediately.");
+        node.setDecommissioned();
+      } else if (!node.isDecommissioned()) {
+        for (DatanodeStorageInfo storage : node.getStorageInfos()) {
+          LOG.info("Start Decommissioning " + node + " " + storage
+              + " with " + storage.numBlocks() + " blocks");
+        }
+        heartbeatManager.startDecommission(node);
+        node.decommissioningStatus.setStartTime(now());
+
+        // all the blocks that reside on this node have to be replicated.
+        checkDecommissionState(node);
       }
-      heartbeatManager.startDecommission(node);
-      node.decommissioningStatus.setStartTime(now());
-      
-      // all the blocks that reside on this node have to be replicated.
-      checkDecommissionState(node);
     }
   }
 
@@ -1009,14 +1014,13 @@ public class DatanodeManager {
   
         // register new datanode
         addDatanode(nodeDescr);
-        checkDecommissioning(nodeDescr);
-        
         // also treat the registration message as a heartbeat
         // no need to update its timestamp
         // because its is done when the descriptor is created
         heartbeatManager.addDatanode(nodeDescr);
-        success = true;
         incrementVersionCount(nodeReg.getSoftwareVersion());
+        checkDecommissioning(nodeDescr);
+        success = true;
       } finally {
         if (!success) {
           removeDatanode(nodeDescr);

http://git-wip-us.apache.org/repos/asf/hadoop/blob/5bd048e8/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/DFSTestUtil.java
----------------------------------------------------------------------
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/DFSTestUtil.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/DFSTestUtil.java
index cea2e82..4e85311 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/DFSTestUtil.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/DFSTestUtil.java
@@ -25,7 +25,6 @@ import com.google.common.base.Supplier;
 import com.google.common.collect.Lists;
 
 import com.google.common.collect.Maps;
-import com.google.common.collect.Sets;
 import org.apache.commons.io.FileUtils;
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
@@ -1612,4 +1611,23 @@ public class DFSTestUtil {
     LayoutVersion.updateMap(DataNodeLayoutVersion.FEATURES,
                             new LayoutVersion.LayoutFeature[] { feature });
   }
+
+  /**
+   * Wait for datanode to reach alive or dead state for waitTime given in
+   * milliseconds.
+   */
+  public static void waitForDatanodeState(
+      final MiniDFSCluster cluster, final String nodeID,
+      final boolean alive, int waitTime)
+      throws TimeoutException, InterruptedException {
+    GenericTestUtils.waitFor(new Supplier<Boolean>() {
+      @Override
+      public Boolean get() {
+        FSNamesystem namesystem = cluster.getNamesystem();
+        final DatanodeDescriptor dd = BlockManagerTestUtil.getDatanode(
+            namesystem, nodeID);
+        return (dd.isAlive == alive);
+      }
+    }, 100, waitTime);
+  }
 }

http://git-wip-us.apache.org/repos/asf/hadoop/blob/5bd048e8/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestDeadDatanode.java
----------------------------------------------------------------------
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestDeadDatanode.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestDeadDatanode.java
index 6c4bb16..71bf124 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestDeadDatanode.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestDeadDatanode.java
@@ -21,17 +21,15 @@ import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.fail;
 
 import java.io.IOException;
-import java.util.concurrent.TimeoutException;
 
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.hdfs.DFSConfigKeys;
+import org.apache.hadoop.hdfs.DFSTestUtil;
 import org.apache.hadoop.hdfs.HdfsConfiguration;
 import org.apache.hadoop.hdfs.MiniDFSCluster;
 import org.apache.hadoop.hdfs.protocol.Block;
-import org.apache.hadoop.hdfs.server.blockmanagement.BlockManagerTestUtil;
-import org.apache.hadoop.hdfs.server.blockmanagement.DatanodeDescriptor;
 import org.apache.hadoop.hdfs.server.datanode.DataNode;
 import org.apache.hadoop.hdfs.server.datanode.DataNodeTestUtils;
 import org.apache.hadoop.hdfs.server.protocol.DatanodeCommand;
@@ -43,7 +41,6 @@ import org.apache.hadoop.hdfs.server.protocol.RegisterCommand;
 import org.apache.hadoop.hdfs.server.protocol.StorageBlockReport;
 import org.apache.hadoop.hdfs.server.protocol.StorageReceivedDeletedBlocks;
 import org.apache.hadoop.hdfs.server.protocol.StorageReport;
-import org.apache.hadoop.util.Time;
 import org.junit.After;
 import org.junit.Test;
 
@@ -61,29 +58,6 @@ public class TestDeadDatanode {
   }
 
   /**
-   * wait for datanode to reach alive or dead state for waitTime given in
-   * milliseconds.
-   */
-  private void waitForDatanodeState(String nodeID, boolean alive, int waitTime)
-      throws TimeoutException, InterruptedException {
-    long stopTime = Time.now() + waitTime;
-    FSNamesystem namesystem = cluster.getNamesystem();
-    String state = alive ? "alive" : "dead";
-    while (Time.now() < stopTime) {
-      final DatanodeDescriptor dd = BlockManagerTestUtil.getDatanode(
-          namesystem, nodeID);
-      if (dd.isAlive == alive) {
-        LOG.info("datanode " + nodeID + " is " + state);
-        return;
-      }
-      LOG.info("Waiting for datanode " + nodeID + " to become " + state);
-      Thread.sleep(1000);
-    }
-    throw new TimeoutException("Timedout waiting for datanode reach state "
-        + state);
-  }
-
-  /**
    * Test to ensure namenode rejects request from dead datanode
    * - Start a cluster
    * - Shutdown the datanode and wait for it to be marked dead at the namenode
@@ -104,11 +78,11 @@ public class TestDeadDatanode {
     DatanodeRegistration reg = 
       DataNodeTestUtils.getDNRegistrationForBP(cluster.getDataNodes().get(0), poolId);
       
-    waitForDatanodeState(reg.getDatanodeUuid(), true, 20000);
+    DFSTestUtil.waitForDatanodeState(cluster, reg.getDatanodeUuid(), true, 20000);
 
     // Shutdown and wait for datanode to be marked dead
     dn.shutdown();
-    waitForDatanodeState(reg.getDatanodeUuid(), false, 20000);
+    DFSTestUtil.waitForDatanodeState(cluster, reg.getDatanodeUuid(), false, 20000);
 
     DatanodeProtocol dnp = cluster.getNameNodeRpc();
     

http://git-wip-us.apache.org/repos/asf/hadoop/blob/5bd048e8/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestDecommissioningStatus.java
----------------------------------------------------------------------
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestDecommissioningStatus.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestDecommissioningStatus.java
index 2ee251b..4b8556b 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestDecommissioningStatus.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestDecommissioningStatus.java
@@ -28,6 +28,7 @@ import java.util.Arrays;
 import java.util.Iterator;
 import java.util.List;
 import java.util.Random;
+import java.util.concurrent.TimeoutException;
 
 import org.apache.commons.io.output.ByteArrayOutputStream;
 import org.apache.hadoop.conf.Configuration;
@@ -360,4 +361,35 @@ public class TestDecommissioningStatus {
     dm.refreshNodes(conf);
     cleanupFile(fileSys, f);
   }
+
+  /**
+   * Verify the support for decommissioning a datanode that is already dead.
+   * Under this scenario the datanode should immediately be marked as
+   * DECOMMISSIONED
+   */
+  @Test(timeout=120000)
+  public void testDecommissionDeadDN()
+      throws IOException, InterruptedException, TimeoutException {
+    DatanodeID dnID = cluster.getDataNodes().get(0).getDatanodeId();
+    String dnName = dnID.getXferAddr();
+    DataNodeProperties stoppedDN = cluster.stopDataNode(0);
+    DFSTestUtil.waitForDatanodeState(cluster, dnID.getDatanodeUuid(),
+        false, 30000);
+    FSNamesystem fsn = cluster.getNamesystem();
+    final DatanodeManager dm = fsn.getBlockManager().getDatanodeManager();
+    DatanodeDescriptor dnDescriptor = dm.getDatanode(dnID);
+    decommissionNode(fsn, localFileSys, dnName);
+    dm.refreshNodes(conf);
+    BlockManagerTestUtil.checkDecommissionState(dm, dnDescriptor);
+    assertTrue(dnDescriptor.isDecommissioned());
+
+    // Add the node back
+    cluster.restartDataNode(stoppedDN, true);
+    cluster.waitActive();
+
+    // Call refreshNodes on FSNamesystem with empty exclude file to remove the
+    // datanode from decommissioning list and make it available again.
+    writeConfigFile(localFileSys, excludeFile, null);
+    dm.refreshNodes(conf);
+  }
 }