You are viewing a plain text version of this content. The canonical link for it is here.
Posted to common-commits@hadoop.apache.org by na...@apache.org on 2017/12/26 22:44:04 UTC

[08/50] [abbrv] hadoop git commit: HDFS-12905. [READ] Handle decommissioning and under-maintenance Datanodes with Provided storage.

HDFS-12905. [READ] Handle decommissioning and under-maintenance Datanodes with Provided storage.


Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo
Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/0f6aa956
Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/0f6aa956
Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/0f6aa956

Branch: refs/heads/yarn-3409
Commit: 0f6aa9564cbe0812a8cab36d999e353269dd6bc9
Parents: 2298f2d
Author: Virajith Jalaparti <vi...@apache.org>
Authored: Fri Dec 8 10:07:40 2017 -0800
Committer: Chris Douglas <cd...@apache.org>
Committed: Fri Dec 15 17:51:41 2017 -0800

----------------------------------------------------------------------
 .../blockmanagement/ProvidedStorageMap.java     | 13 ++-
 .../TestNameNodeProvidedImplementation.java     | 95 ++++++++++++++++++++
 2 files changed, 107 insertions(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hadoop/blob/0f6aa956/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/ProvidedStorageMap.java
----------------------------------------------------------------------
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/ProvidedStorageMap.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/ProvidedStorageMap.java
index 7fbc71a..208ed3e 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/ProvidedStorageMap.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/ProvidedStorageMap.java
@@ -342,14 +342,25 @@ public class ProvidedStorageMap {
           return dn;
         }
       }
+      // prefer live nodes first.
+      DatanodeDescriptor dn = chooseRandomNode(excludedUUids, true);
+      if (dn == null) {
+        dn = chooseRandomNode(excludedUUids, false);
+      }
+      return dn;
+    }
 
+    private DatanodeDescriptor chooseRandomNode(Set<String> excludedUUids,
+        boolean preferLiveNodes) {
       Random r = new Random();
       for (int i = dnR.size() - 1; i >= 0; --i) {
         int pos = r.nextInt(i + 1);
         DatanodeDescriptor node = dnR.get(pos);
         String uuid = node.getDatanodeUuid();
         if (!excludedUUids.contains(uuid)) {
-          return node;
+          if (!preferLiveNodes || node.getAdminState() == AdminStates.NORMAL) {
+            return node;
+          }
         }
         Collections.swap(dnR, i, pos);
       }

http://git-wip-us.apache.org/repos/asf/hadoop/blob/0f6aa956/hadoop-tools/hadoop-fs2img/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestNameNodeProvidedImplementation.java
----------------------------------------------------------------------
diff --git a/hadoop-tools/hadoop-fs2img/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestNameNodeProvidedImplementation.java b/hadoop-tools/hadoop-fs2img/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestNameNodeProvidedImplementation.java
index d057247..394e8d8 100644
--- a/hadoop-tools/hadoop-fs2img/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestNameNodeProvidedImplementation.java
+++ b/hadoop-tools/hadoop-fs2img/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestNameNodeProvidedImplementation.java
@@ -56,6 +56,7 @@ import org.apache.hadoop.hdfs.server.blockmanagement.BlockInfo;
 import org.apache.hadoop.hdfs.server.blockmanagement.BlockManager;
 import org.apache.hadoop.hdfs.server.blockmanagement.BlockManagerTestUtil;
 import org.apache.hadoop.hdfs.server.blockmanagement.DatanodeDescriptor;
+import org.apache.hadoop.hdfs.server.blockmanagement.DatanodeManager;
 import org.apache.hadoop.hdfs.server.blockmanagement.DatanodeStatistics;
 import org.apache.hadoop.hdfs.server.blockmanagement.DatanodeStorageInfo;
 import org.apache.hadoop.hdfs.server.blockmanagement.ProvidedStorageMap;
@@ -795,4 +796,98 @@ public class TestNameNodeProvidedImplementation {
     FileUtils.deleteDirectory(tempDirectory);
   }
 
+  private DatanodeDescriptor getDatanodeDescriptor(DatanodeManager dnm,
+      int dnIndex) throws Exception {
+    return dnm.getDatanode(cluster.getDataNodes().get(dnIndex).getDatanodeId());
+  }
+
+  private void startDecommission(FSNamesystem namesystem, DatanodeManager dnm,
+      int dnIndex) throws Exception {
+    namesystem.writeLock();
+    DatanodeDescriptor dnDesc = getDatanodeDescriptor(dnm, dnIndex);
+    dnm.getDatanodeAdminManager().startDecommission(dnDesc);
+    namesystem.writeUnlock();
+  }
+
+  private void startMaintenance(FSNamesystem namesystem, DatanodeManager dnm,
+      int dnIndex) throws Exception {
+    namesystem.writeLock();
+    DatanodeDescriptor dnDesc = getDatanodeDescriptor(dnm, dnIndex);
+    dnm.getDatanodeAdminManager().startMaintenance(dnDesc, Long.MAX_VALUE);
+    namesystem.writeUnlock();
+  }
+
+  private void stopMaintenance(FSNamesystem namesystem, DatanodeManager dnm,
+      int dnIndex) throws Exception {
+    namesystem.writeLock();
+    DatanodeDescriptor dnDesc = getDatanodeDescriptor(dnm, dnIndex);
+    dnm.getDatanodeAdminManager().stopMaintenance(dnDesc);
+    namesystem.writeUnlock();
+  }
+
+  @Test
+  public void testDatanodeLifeCycle() throws Exception {
+    createImage(new FSTreeWalk(NAMEPATH, conf), NNDIRPATH,
+        FixedBlockResolver.class);
+    startCluster(NNDIRPATH, 3,
+        new StorageType[] {StorageType.PROVIDED, StorageType.DISK},
+        null, false);
+
+    int fileIndex = numFiles -1;
+
+    final BlockManager blockManager = cluster.getNamesystem().getBlockManager();
+    final DatanodeManager dnm = blockManager.getDatanodeManager();
+
+    // to start, all 3 DNs are live in ProvidedDatanodeDescriptor.
+    verifyFileLocation(fileIndex, 3);
+
+    // de-commision first DN; still get 3 replicas.
+    startDecommission(cluster.getNamesystem(), dnm, 0);
+    verifyFileLocation(fileIndex, 3);
+
+    // remains the same even after heartbeats.
+    cluster.triggerHeartbeats();
+    verifyFileLocation(fileIndex, 3);
+
+    // start maintenance for 2nd DN; still get 3 replicas.
+    startMaintenance(cluster.getNamesystem(), dnm, 1);
+    verifyFileLocation(fileIndex, 3);
+
+    DataNode dn1 = cluster.getDataNodes().get(0);
+    DataNode dn2 = cluster.getDataNodes().get(1);
+
+    // stop the 1st DN while being decomissioned.
+    MiniDFSCluster.DataNodeProperties dn1Properties = cluster.stopDataNode(0);
+    BlockManagerTestUtil.noticeDeadDatanode(cluster.getNameNode(),
+        dn1.getDatanodeId().getXferAddr());
+
+    // get 2 locations
+    verifyFileLocation(fileIndex, 2);
+
+    // stop dn2 while in maintenance.
+    MiniDFSCluster.DataNodeProperties dn2Properties = cluster.stopDataNode(1);
+    BlockManagerTestUtil.noticeDeadDatanode(cluster.getNameNode(),
+        dn2.getDatanodeId().getXferAddr());
+
+    // 2 valid locations will be found as blocks on nodes that die during
+    // maintenance are not marked for removal.
+    verifyFileLocation(fileIndex, 2);
+
+    // stop the maintenance; get only 1 replicas
+    stopMaintenance(cluster.getNamesystem(), dnm, 0);
+    verifyFileLocation(fileIndex, 1);
+
+    // restart the stopped DN.
+    cluster.restartDataNode(dn1Properties, true);
+    cluster.waitActive();
+
+    // reports all 3 replicas
+    verifyFileLocation(fileIndex, 2);
+
+    cluster.restartDataNode(dn2Properties, true);
+    cluster.waitActive();
+
+    // reports all 3 replicas
+    verifyFileLocation(fileIndex, 3);
+  }
 }


---------------------------------------------------------------------
To unsubscribe, e-mail: common-commits-unsubscribe@hadoop.apache.org
For additional commands, e-mail: common-commits-help@hadoop.apache.org