You are viewing a plain text version of this content. The canonical link for it is here.
Posted to common-commits@hadoop.apache.org by na...@apache.org on 2017/12/26 22:44:04 UTC
[08/50] [abbrv] hadoop git commit: HDFS-12905. [READ] Handle
decommissioning and under-maintenance Datanodes with Provided storage.
HDFS-12905. [READ] Handle decommissioning and under-maintenance Datanodes with Provided storage.
Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo
Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/0f6aa956
Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/0f6aa956
Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/0f6aa956
Branch: refs/heads/yarn-3409
Commit: 0f6aa9564cbe0812a8cab36d999e353269dd6bc9
Parents: 2298f2d
Author: Virajith Jalaparti <vi...@apache.org>
Authored: Fri Dec 8 10:07:40 2017 -0800
Committer: Chris Douglas <cd...@apache.org>
Committed: Fri Dec 15 17:51:41 2017 -0800
----------------------------------------------------------------------
.../blockmanagement/ProvidedStorageMap.java | 13 ++-
.../TestNameNodeProvidedImplementation.java | 95 ++++++++++++++++++++
2 files changed, 107 insertions(+), 1 deletion(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hadoop/blob/0f6aa956/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/ProvidedStorageMap.java
----------------------------------------------------------------------
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/ProvidedStorageMap.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/ProvidedStorageMap.java
index 7fbc71a..208ed3e 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/ProvidedStorageMap.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/ProvidedStorageMap.java
@@ -342,14 +342,25 @@ public class ProvidedStorageMap {
return dn;
}
}
+ // prefer live nodes first.
+ DatanodeDescriptor dn = chooseRandomNode(excludedUUids, true);
+ if (dn == null) {
+ dn = chooseRandomNode(excludedUUids, false);
+ }
+ return dn;
+ }
+ private DatanodeDescriptor chooseRandomNode(Set<String> excludedUUids,
+ boolean preferLiveNodes) {
Random r = new Random();
for (int i = dnR.size() - 1; i >= 0; --i) {
int pos = r.nextInt(i + 1);
DatanodeDescriptor node = dnR.get(pos);
String uuid = node.getDatanodeUuid();
if (!excludedUUids.contains(uuid)) {
- return node;
+ if (!preferLiveNodes || node.getAdminState() == AdminStates.NORMAL) {
+ return node;
+ }
}
Collections.swap(dnR, i, pos);
}
http://git-wip-us.apache.org/repos/asf/hadoop/blob/0f6aa956/hadoop-tools/hadoop-fs2img/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestNameNodeProvidedImplementation.java
----------------------------------------------------------------------
diff --git a/hadoop-tools/hadoop-fs2img/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestNameNodeProvidedImplementation.java b/hadoop-tools/hadoop-fs2img/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestNameNodeProvidedImplementation.java
index d057247..394e8d8 100644
--- a/hadoop-tools/hadoop-fs2img/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestNameNodeProvidedImplementation.java
+++ b/hadoop-tools/hadoop-fs2img/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestNameNodeProvidedImplementation.java
@@ -56,6 +56,7 @@ import org.apache.hadoop.hdfs.server.blockmanagement.BlockInfo;
import org.apache.hadoop.hdfs.server.blockmanagement.BlockManager;
import org.apache.hadoop.hdfs.server.blockmanagement.BlockManagerTestUtil;
import org.apache.hadoop.hdfs.server.blockmanagement.DatanodeDescriptor;
+import org.apache.hadoop.hdfs.server.blockmanagement.DatanodeManager;
import org.apache.hadoop.hdfs.server.blockmanagement.DatanodeStatistics;
import org.apache.hadoop.hdfs.server.blockmanagement.DatanodeStorageInfo;
import org.apache.hadoop.hdfs.server.blockmanagement.ProvidedStorageMap;
@@ -795,4 +796,98 @@ public class TestNameNodeProvidedImplementation {
FileUtils.deleteDirectory(tempDirectory);
}
+ private DatanodeDescriptor getDatanodeDescriptor(DatanodeManager dnm,
+ int dnIndex) throws Exception {
+ return dnm.getDatanode(cluster.getDataNodes().get(dnIndex).getDatanodeId());
+ }
+
+ private void startDecommission(FSNamesystem namesystem, DatanodeManager dnm,
+ int dnIndex) throws Exception {
+ namesystem.writeLock();
+ DatanodeDescriptor dnDesc = getDatanodeDescriptor(dnm, dnIndex);
+ dnm.getDatanodeAdminManager().startDecommission(dnDesc);
+ namesystem.writeUnlock();
+ }
+
+ private void startMaintenance(FSNamesystem namesystem, DatanodeManager dnm,
+ int dnIndex) throws Exception {
+ namesystem.writeLock();
+ DatanodeDescriptor dnDesc = getDatanodeDescriptor(dnm, dnIndex);
+ dnm.getDatanodeAdminManager().startMaintenance(dnDesc, Long.MAX_VALUE);
+ namesystem.writeUnlock();
+ }
+
+ private void stopMaintenance(FSNamesystem namesystem, DatanodeManager dnm,
+ int dnIndex) throws Exception {
+ namesystem.writeLock();
+ DatanodeDescriptor dnDesc = getDatanodeDescriptor(dnm, dnIndex);
+ dnm.getDatanodeAdminManager().stopMaintenance(dnDesc);
+ namesystem.writeUnlock();
+ }
+
+ @Test
+ public void testDatanodeLifeCycle() throws Exception {
+ createImage(new FSTreeWalk(NAMEPATH, conf), NNDIRPATH,
+ FixedBlockResolver.class);
+ startCluster(NNDIRPATH, 3,
+ new StorageType[] {StorageType.PROVIDED, StorageType.DISK},
+ null, false);
+
+ int fileIndex = numFiles -1;
+
+ final BlockManager blockManager = cluster.getNamesystem().getBlockManager();
+ final DatanodeManager dnm = blockManager.getDatanodeManager();
+
+ // to start, all 3 DNs are live in ProvidedDatanodeDescriptor.
+ verifyFileLocation(fileIndex, 3);
+
+ // de-commision first DN; still get 3 replicas.
+ startDecommission(cluster.getNamesystem(), dnm, 0);
+ verifyFileLocation(fileIndex, 3);
+
+ // remains the same even after heartbeats.
+ cluster.triggerHeartbeats();
+ verifyFileLocation(fileIndex, 3);
+
+ // start maintenance for 2nd DN; still get 3 replicas.
+ startMaintenance(cluster.getNamesystem(), dnm, 1);
+ verifyFileLocation(fileIndex, 3);
+
+ DataNode dn1 = cluster.getDataNodes().get(0);
+ DataNode dn2 = cluster.getDataNodes().get(1);
+
+ // stop the 1st DN while being decomissioned.
+ MiniDFSCluster.DataNodeProperties dn1Properties = cluster.stopDataNode(0);
+ BlockManagerTestUtil.noticeDeadDatanode(cluster.getNameNode(),
+ dn1.getDatanodeId().getXferAddr());
+
+ // get 2 locations
+ verifyFileLocation(fileIndex, 2);
+
+ // stop dn2 while in maintenance.
+ MiniDFSCluster.DataNodeProperties dn2Properties = cluster.stopDataNode(1);
+ BlockManagerTestUtil.noticeDeadDatanode(cluster.getNameNode(),
+ dn2.getDatanodeId().getXferAddr());
+
+ // 2 valid locations will be found as blocks on nodes that die during
+ // maintenance are not marked for removal.
+ verifyFileLocation(fileIndex, 2);
+
+ // stop the maintenance; get only 1 replicas
+ stopMaintenance(cluster.getNamesystem(), dnm, 0);
+ verifyFileLocation(fileIndex, 1);
+
+ // restart the stopped DN.
+ cluster.restartDataNode(dn1Properties, true);
+ cluster.waitActive();
+
+ // reports all 3 replicas
+ verifyFileLocation(fileIndex, 2);
+
+ cluster.restartDataNode(dn2Properties, true);
+ cluster.waitActive();
+
+ // reports all 3 replicas
+ verifyFileLocation(fileIndex, 3);
+ }
}
---------------------------------------------------------------------
To unsubscribe, e-mail: common-commits-unsubscribe@hadoop.apache.org
For additional commands, e-mail: common-commits-help@hadoop.apache.org