You are viewing a plain text version of this content. The canonical link for it is here.
Posted to common-commits@hadoop.apache.org by ji...@apache.org on 2015/05/21 01:49:00 UTC

[36/50] [abbrv] hadoop git commit: HDFS-8404. Pending block replication can get stuck using older genstamp. Contributed by Nathan Roberts.

HDFS-8404. Pending block replication can get stuck using older genstamp. Contributed by Nathan Roberts.


Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo
Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/8860e352
Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/8860e352
Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/8860e352

Branch: refs/heads/HDFS-7240
Commit: 8860e352c394372e4eb3ebdf82ea899567f34e4e
Parents: 470c87d
Author: Kihwal Lee <ki...@apache.org>
Authored: Tue May 19 13:05:15 2015 -0500
Committer: Kihwal Lee <ki...@apache.org>
Committed: Tue May 19 13:05:15 2015 -0500

----------------------------------------------------------------------
 hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt     |  3 +
 .../server/blockmanagement/BlockManager.java    | 17 ++--
 .../blockmanagement/TestPendingReplication.java | 98 +++++++++++++++++++-
 3 files changed, 108 insertions(+), 10 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hadoop/blob/8860e352/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
----------------------------------------------------------------------
diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
index 76888a9..5bcaddd 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
+++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
@@ -878,6 +878,9 @@ Release 2.7.1 - UNRELEASED
 
     HDFS-8405. Fix a typo in NamenodeFsck.  (Takanobu Asanuma via szetszwo)
 
+    HDFS-8404. Pending block replication can get stuck using older genstamp
+    (Nathan Roberts via kihwal)
+
 Release 2.7.0 - 2015-04-20
 
   INCOMPATIBLE CHANGES

http://git-wip-us.apache.org/repos/asf/hadoop/blob/8860e352/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java
----------------------------------------------------------------------
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java
index 8012f71..54981fb 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java
@@ -1690,13 +1690,18 @@ public class BlockManager {
       namesystem.writeLock();
       try {
         for (int i = 0; i < timedOutItems.length; i++) {
+          /*
+           * Use the blockinfo from the blocksmap to be certain we're working
+           * with the most up-to-date block information (e.g. genstamp).
+           */
+          BlockInfoContiguous bi = blocksMap.getStoredBlock(timedOutItems[i]);
+          if (bi == null) {
+            continue;
+          }
           NumberReplicas num = countNodes(timedOutItems[i]);
-          if (isNeededReplication(timedOutItems[i], getReplication(timedOutItems[i]),
-                                 num.liveReplicas())) {
-            neededReplications.add(timedOutItems[i],
-                                   num.liveReplicas(),
-                                   num.decommissionedAndDecommissioning(),
-                                   getReplication(timedOutItems[i]));
+          if (isNeededReplication(bi, getReplication(bi), num.liveReplicas())) {
+            neededReplications.add(bi, num.liveReplicas(),
+                num.decommissionedAndDecommissioning(), getReplication(bi));
           }
         }
       } finally {

http://git-wip-us.apache.org/repos/asf/hadoop/blob/8860e352/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestPendingReplication.java
----------------------------------------------------------------------
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestPendingReplication.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestPendingReplication.java
index c63badc..259404e 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestPendingReplication.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestPendingReplication.java
@@ -42,6 +42,7 @@ import org.apache.hadoop.hdfs.server.protocol.ReceivedDeletedBlockInfo;
 import org.apache.hadoop.hdfs.server.protocol.ReceivedDeletedBlockInfo.BlockStatus;
 import org.apache.hadoop.hdfs.server.protocol.StorageReceivedDeletedBlocks;
 import org.junit.Test;
+import org.mockito.Mockito;
 
 /**
  * This class tests the internals of PendingReplicationBlocks.java,
@@ -52,13 +53,11 @@ public class TestPendingReplication {
   private static final int DFS_REPLICATION_INTERVAL = 1;
   // Number of datanodes in the cluster
   private static final int DATANODE_COUNT = 5;
-
   @Test
   public void testPendingReplication() {
     PendingReplicationBlocks pendingReplications;
     pendingReplications = new PendingReplicationBlocks(TIMEOUT * 1000);
     pendingReplications.start();
-
     //
     // Add 10 blocks to pendingReplications.
     //
@@ -140,8 +139,7 @@ public class TestPendingReplication {
     //
     // Verify that everything has timed out.
     //
-    assertEquals("Size of pendingReplications ",
-                 0, pendingReplications.size());
+    assertEquals("Size of pendingReplications ", 0, pendingReplications.size());
     Block[] timedOut = pendingReplications.getTimedOutBlocks();
     assertTrue(timedOut != null && timedOut.length == 15);
     for (int i = 0; i < timedOut.length; i++) {
@@ -149,6 +147,98 @@ public class TestPendingReplication {
     }
     pendingReplications.stop();
   }
+
+/* Test that processPendingReplications will use the most recent
+ * blockinfo from the blocksmap by placing a larger genstamp into
+ * the blocksmap.
+ */
+  @Test
+  public void testProcessPendingReplications() throws Exception {
+    final Configuration conf = new HdfsConfiguration();
+    conf.setLong(
+        DFSConfigKeys.DFS_NAMENODE_REPLICATION_PENDING_TIMEOUT_SEC_KEY, TIMEOUT);
+    MiniDFSCluster cluster = null;
+    Block block;
+    BlockInfoContiguous blockInfo;
+    try {
+      cluster =
+          new MiniDFSCluster.Builder(conf).numDataNodes(DATANODE_COUNT).build();
+      cluster.waitActive();
+
+      FSNamesystem fsn = cluster.getNamesystem();
+      BlockManager blkManager = fsn.getBlockManager();
+
+      PendingReplicationBlocks pendingReplications =
+          blkManager.pendingReplications;
+      UnderReplicatedBlocks neededReplications = blkManager.neededReplications;
+      BlocksMap blocksMap = blkManager.blocksMap;
+
+      //
+      // Add 1 block to pendingReplications with GenerationStamp = 0.
+      //
+
+      block = new Block(1, 1, 0);
+      blockInfo = new BlockInfoContiguous(block, (short) 3);
+
+      pendingReplications.increment(block,
+          DatanodeStorageInfo.toDatanodeDescriptors(
+              DFSTestUtil.createDatanodeStorageInfos(1)));
+      BlockCollection bc = Mockito.mock(BlockCollection.class);
+      Mockito.doReturn((short) 3).when(bc).getPreferredBlockReplication();
+      // Place into blocksmap with GenerationStamp = 1
+      blockInfo.setGenerationStamp(1);
+      blocksMap.addBlockCollection(blockInfo, bc);
+
+      assertEquals("Size of pendingReplications ", 1,
+          pendingReplications.size());
+
+      // Add a second block to pendingReplications that has no
+      // corresponding entry in blocksmap
+      block = new Block(2, 2, 0);
+      pendingReplications.increment(block,
+          DatanodeStorageInfo.toDatanodeDescriptors(
+              DFSTestUtil.createDatanodeStorageInfos(1)));
+
+      // verify 2 blocks in pendingReplications
+      assertEquals("Size of pendingReplications ", 2,
+          pendingReplications.size());
+
+      //
+      // Wait for everything to timeout.
+      //
+      while (pendingReplications.size() > 0) {
+        try {
+          Thread.sleep(100);
+        } catch (Exception e) {
+        }
+      }
+
+      //
+      // Verify that block moves to neededReplications
+      //
+      while (neededReplications.size() == 0) {
+        try {
+          Thread.sleep(100);
+        } catch (Exception e) {
+        }
+      }
+
+      // Verify that the generation stamp we will try to replicate
+      // is now 1
+      for (Block b: neededReplications) {
+        assertEquals("Generation stamp is 1 ", 1,
+            b.getGenerationStamp());
+      }
+
+      // Verify size of neededReplications is exactly 1.
+      assertEquals("size of neededReplications is 1 ", 1,
+          neededReplications.size());
+    } finally {
+      if (cluster != null) {
+        cluster.shutdown();
+      }
+    }
+  }
   
   /**
    * Test if DatanodeProtocol#blockReceivedAndDeleted can correctly update the