You are viewing a plain text version of this content. The canonical link for it is here.
Posted to common-commits@hadoop.apache.org by as...@apache.org on 2016/12/12 16:09:20 UTC
[44/50] [abbrv] hadoop git commit: HDFS-11224. Lifeline message
should be ignored for dead nodes (Contributed by Vinayakumar B)
HDFS-11224. Lifeline message should be ignored for dead nodes (Contributed by Vinayakumar B)
Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo
Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/d1d4aba7
Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/d1d4aba7
Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/d1d4aba7
Branch: refs/heads/YARN-5085
Commit: d1d4aba71b21871140b162583a4b94ce118e1fb3
Parents: 7d8e440
Author: Vinayakumar B <vi...@apache.org>
Authored: Fri Dec 9 14:53:50 2016 +0530
Committer: Vinayakumar B <vi...@apache.org>
Committed: Fri Dec 9 14:53:50 2016 +0530
----------------------------------------------------------------------
.../blockmanagement/DatanodeDescriptor.java | 5 +++
.../server/blockmanagement/DatanodeManager.java | 8 ++---
.../hdfs/server/datanode/BPServiceActor.java | 7 ++++-
.../blockmanagement/BlockManagerTestUtil.java | 4 ++-
.../server/datanode/TestDataNodeLifeline.java | 33 ++++++++++++++++++--
5 files changed, 49 insertions(+), 8 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hadoop/blob/d1d4aba7/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeDescriptor.java
----------------------------------------------------------------------
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeDescriptor.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeDescriptor.java
index f7da52a..320c680 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeDescriptor.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeDescriptor.java
@@ -281,6 +281,11 @@ public class DatanodeDescriptor extends DatanodeInfo {
}
@VisibleForTesting
+ public boolean isHeartbeatedSinceRegistration() {
+ return heartbeatedSinceRegistration;
+ }
+
+ @VisibleForTesting
public DatanodeStorageInfo getStorageInfo(String storageID) {
synchronized (storageMap) {
return storageMap.get(storageID);
http://git-wip-us.apache.org/repos/asf/hadoop/blob/d1d4aba7/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeManager.java
----------------------------------------------------------------------
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeManager.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeManager.java
index 6477d5c..cc64a04 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeManager.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeManager.java
@@ -1661,10 +1661,10 @@ public class DatanodeManager {
LOG.debug("Received handleLifeline from nodeReg = " + nodeReg);
}
DatanodeDescriptor nodeinfo = getDatanode(nodeReg);
- if (nodeinfo == null) {
- // This is null if the DataNode has not yet registered. We expect this
- // will never happen, because the DataNode has logic to prevent sending
- // lifeline messages until after initial registration is successful.
+ if (nodeinfo == null || !nodeinfo.isRegistered()) {
+ // This can happen if the lifeline message comes when DataNode is either
+ // not registered at all or its marked dead at NameNode and expectes
+ // re-registration. Ignore lifeline messages without registration.
// Lifeline message handling can't send commands back to the DataNode to
// tell it to register, so simply exit.
return;
http://git-wip-us.apache.org/repos/asf/hadoop/blob/d1d4aba7/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BPServiceActor.java
----------------------------------------------------------------------
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BPServiceActor.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BPServiceActor.java
index f3247fc..bb2b792 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BPServiceActor.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BPServiceActor.java
@@ -504,7 +504,12 @@ class BPServiceActor implements Runnable {
volumeFailureSummary,
requestBlockReportLease);
}
-
+
+ @VisibleForTesting
+ void sendLifelineForTests() throws IOException {
+ lifelineSender.sendLifeline();
+ }
+
//This must be called only by BPOfferService
void start() {
if ((bpThread != null) && (bpThread.isAlive())) {
http://git-wip-us.apache.org/repos/asf/hadoop/blob/d1d4aba7/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManagerTestUtil.java
----------------------------------------------------------------------
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManagerTestUtil.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManagerTestUtil.java
index 1c7442a..9dd1447 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManagerTestUtil.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManagerTestUtil.java
@@ -214,7 +214,9 @@ public class BlockManagerTestUtil {
* @param bm the BlockManager to manipulate
*/
public static void checkHeartbeat(BlockManager bm) {
- bm.getDatanodeManager().getHeartbeatManager().heartbeatCheck();
+ HeartbeatManager hbm = bm.getDatanodeManager().getHeartbeatManager();
+ hbm.restartHeartbeatStopWatch();
+ hbm.heartbeatCheck();
}
/**
http://git-wip-us.apache.org/repos/asf/hadoop/blob/d1d4aba7/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDataNodeLifeline.java
----------------------------------------------------------------------
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDataNodeLifeline.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDataNodeLifeline.java
index fd66115..df2fe5a 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDataNodeLifeline.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDataNodeLifeline.java
@@ -45,6 +45,7 @@ import org.apache.hadoop.hdfs.HdfsConfiguration;
import org.apache.hadoop.hdfs.MiniDFSCluster;
import org.apache.hadoop.hdfs.protocolPB.DatanodeLifelineProtocolClientSideTranslatorPB;
import org.apache.hadoop.hdfs.protocolPB.DatanodeProtocolClientSideTranslatorPB;
+import org.apache.hadoop.hdfs.server.blockmanagement.DatanodeDescriptor;
import org.apache.hadoop.hdfs.server.datanode.metrics.DataNodeMetrics;
import org.apache.hadoop.hdfs.server.namenode.FSNamesystem;
import org.apache.hadoop.hdfs.server.protocol.DatanodeRegistration;
@@ -90,6 +91,8 @@ public class TestDataNodeLifeline {
private DataNodeMetrics metrics;
private DatanodeProtocolClientSideTranslatorPB namenode;
private FSNamesystem namesystem;
+ private DataNode dn;
+ private BPServiceActor bpsa;
@Before
public void setup() throws Exception {
@@ -106,7 +109,7 @@ public class TestDataNodeLifeline {
namesystem = cluster.getNameNode().getNamesystem();
// Set up spies on RPC proxies so that we can inject failures.
- DataNode dn = cluster.getDataNodes().get(0);
+ dn = cluster.getDataNodes().get(0);
metrics = dn.getMetrics();
assertNotNull(metrics);
List<BPOfferService> allBpos = dn.getAllBpOs();
@@ -118,7 +121,7 @@ public class TestDataNodeLifeline {
assertNotNull(allBpsa);
assertEquals(1, allBpsa.size());
- final BPServiceActor bpsa = allBpsa.get(0);
+ bpsa = allBpsa.get(0);
assertNotNull(bpsa);
// Lifeline RPC proxy gets created on separate thread, so poll until found.
@@ -257,6 +260,32 @@ public class TestDataNodeLifeline {
getLongCounter("LifelinesNumOps", getMetrics(metrics.name())));
}
+ @Test
+ public void testLifelineForDeadNode() throws Exception {
+ long initialCapacity = cluster.getNamesystem(0).getCapacityTotal();
+ assertTrue(initialCapacity > 0);
+ dn.setHeartbeatsDisabledForTests(true);
+ cluster.setDataNodesDead();
+ assertEquals("Capacity should be 0 after all DNs dead", 0, cluster
+ .getNamesystem(0).getCapacityTotal());
+ bpsa.sendLifelineForTests();
+ assertEquals("Lifeline should be ignored for dead node", 0, cluster
+ .getNamesystem(0).getCapacityTotal());
+ // Wait for re-registration and heartbeat
+ dn.setHeartbeatsDisabledForTests(false);
+ final DatanodeDescriptor dnDesc = cluster.getNamesystem(0).getBlockManager()
+ .getDatanodeManager().getDatanodes().iterator().next();
+ GenericTestUtils.waitFor(new Supplier<Boolean>() {
+
+ @Override
+ public Boolean get() {
+ return dnDesc.isAlive() && dnDesc.isHeartbeatedSinceRegistration();
+ }
+ }, 100, 5000);
+ assertEquals("Capacity should include only live capacity", initialCapacity,
+ cluster.getNamesystem(0).getCapacityTotal());
+ }
+
/**
* Waits on a {@link CountDownLatch} before calling through to the method.
*/
---------------------------------------------------------------------
To unsubscribe, e-mail: common-commits-unsubscribe@hadoop.apache.org
For additional commands, e-mail: common-commits-help@hadoop.apache.org