You are viewing a plain text version of this content. The canonical link for it is here.
Posted to common-commits@hadoop.apache.org by wa...@apache.org on 2015/04/09 00:52:40 UTC
hadoop git commit: HDFS-7725. Incorrect 'nodes in service' metrics
caused all writes to fail. Contributed by Ming Ma.
Repository: hadoop
Updated Branches:
refs/heads/trunk a42bb1cd9 -> 6af0d74a7
HDFS-7725. Incorrect 'nodes in service' metrics caused all writes to fail. Contributed by Ming Ma.
Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo
Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/6af0d74a
Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/6af0d74a
Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/6af0d74a
Branch: refs/heads/trunk
Commit: 6af0d74a75f0f58d5e92e2e91e87735b9a62bb12
Parents: a42bb1c
Author: Andrew Wang <wa...@apache.org>
Authored: Wed Apr 8 15:52:06 2015 -0700
Committer: Andrew Wang <wa...@apache.org>
Committed: Wed Apr 8 15:52:06 2015 -0700
----------------------------------------------------------------------
hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt | 3 ++
.../blockmanagement/DecommissionManager.java | 28 +++++++++----------
.../blockmanagement/HeartbeatManager.java | 29 ++++++++++++++------
.../namenode/TestNamenodeCapacityReport.java | 5 ++++
4 files changed, 41 insertions(+), 24 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hadoop/blob/6af0d74a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
----------------------------------------------------------------------
diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
index 852006d..95c6912 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
+++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
@@ -441,6 +441,9 @@ Release 2.8.0 - UNRELEASED
HDFS-5215. dfs.datanode.du.reserved is not considered while computing
available space ( Brahma Reddy Battula via Yongjun Zhang)
+ HDFS-7725. Incorrect "nodes in service" metrics caused all writes to fail.
+ (Ming Ma via wang)
+
Release 2.7.0 - UNRELEASED
INCOMPATIBLE CHANGES
http://git-wip-us.apache.org/repos/asf/hadoop/blob/6af0d74a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DecommissionManager.java
----------------------------------------------------------------------
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DecommissionManager.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DecommissionManager.java
index 9355329..7f3d778 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DecommissionManager.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DecommissionManager.java
@@ -197,23 +197,21 @@ public class DecommissionManager {
*/
@VisibleForTesting
public void startDecommission(DatanodeDescriptor node) {
- if (!node.isDecommissionInProgress()) {
- if (!node.isAlive) {
- LOG.info("Dead node {} is decommissioned immediately.", node);
- node.setDecommissioned();
- } else if (!node.isDecommissioned()) {
+ if (!node.isDecommissionInProgress() && !node.isDecommissioned()) {
+ // Update DN stats maintained by HeartbeatManager
+ hbManager.startDecommission(node);
+ // hbManager.startDecommission will set dead node to decommissioned.
+ if (node.isDecommissionInProgress()) {
for (DatanodeStorageInfo storage : node.getStorageInfos()) {
- LOG.info("Starting decommission of {} {} with {} blocks",
+ LOG.info("Starting decommission of {} {} with {} blocks",
node, storage, storage.numBlocks());
}
- // Update DN stats maintained by HeartbeatManager
- hbManager.startDecommission(node);
node.decommissioningStatus.setStartTime(monotonicNow());
pendingNodes.add(node);
}
} else {
- LOG.trace("startDecommission: Node {} is already decommission in "
- + "progress, nothing to do.", node);
+ LOG.trace("startDecommission: Node {} in {}, nothing to do." +
+ node, node.getAdminState());
}
}
@@ -221,12 +219,12 @@ public class DecommissionManager {
* Stop decommissioning the specified datanode.
* @param node
*/
- void stopDecommission(DatanodeDescriptor node) {
+ @VisibleForTesting
+ public void stopDecommission(DatanodeDescriptor node) {
if (node.isDecommissionInProgress() || node.isDecommissioned()) {
- LOG.info("Stopping decommissioning of node {}", node);
// Update DN stats maintained by HeartbeatManager
hbManager.stopDecommission(node);
- // Over-replicated blocks will be detected and processed when
+ // Over-replicated blocks will be detected and processed when
// the dead node comes back and send in its full block report.
if (node.isAlive) {
blockManager.processOverReplicatedBlocksOnReCommission(node);
@@ -235,8 +233,8 @@ public class DecommissionManager {
pendingNodes.remove(node);
decomNodeBlocks.remove(node);
} else {
- LOG.trace("stopDecommission: Node {} is not decommission in progress " +
- "or decommissioned, nothing to do.", node);
+ LOG.trace("stopDecommission: Node {} in {}, nothing to do." +
+ node, node.getAdminState());
}
}
http://git-wip-us.apache.org/repos/asf/hadoop/blob/6af0d74a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/HeartbeatManager.java
----------------------------------------------------------------------
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/HeartbeatManager.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/HeartbeatManager.java
index d2905a2..b0ab315 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/HeartbeatManager.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/HeartbeatManager.java
@@ -20,8 +20,6 @@ package org.apache.hadoop.hdfs.server.blockmanagement;
import java.util.ArrayList;
import java.util.List;
-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hdfs.DFSConfigKeys;
import org.apache.hadoop.hdfs.DFSUtil;
@@ -31,6 +29,8 @@ import org.apache.hadoop.hdfs.server.protocol.StorageReport;
import org.apache.hadoop.hdfs.server.protocol.VolumeFailureSummary;
import org.apache.hadoop.util.Daemon;
import org.apache.hadoop.util.Time;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
/**
* Manage the heartbeats received from datanodes.
@@ -38,7 +38,7 @@ import org.apache.hadoop.util.Time;
* by the heartbeat manager lock.
*/
class HeartbeatManager implements DatanodeStatistics {
- static final Log LOG = LogFactory.getLog(HeartbeatManager.class);
+ static final Logger LOG = LoggerFactory.getLogger(HeartbeatManager.class);
/**
* Stores a subset of the datanodeMap in DatanodeManager,
@@ -227,15 +227,26 @@ class HeartbeatManager implements DatanodeStatistics {
}
synchronized void startDecommission(final DatanodeDescriptor node) {
- stats.subtract(node);
- node.startDecommission();
- stats.add(node);
+ if (!node.isAlive) {
+ LOG.info("Dead node {} is decommissioned immediately.", node);
+ node.setDecommissioned();
+ } else {
+ stats.subtract(node);
+ node.startDecommission();
+ stats.add(node);
+ }
}
synchronized void stopDecommission(final DatanodeDescriptor node) {
- stats.subtract(node);
- node.stopDecommission();
- stats.add(node);
+ LOG.info("Stopping decommissioning of {} node {}",
+ node.isAlive ? "live" : "dead", node);
+ if (!node.isAlive) {
+ node.stopDecommission();
+ } else {
+ stats.subtract(node);
+ node.stopDecommission();
+ stats.add(node);
+ }
}
/**
http://git-wip-us.apache.org/repos/asf/hadoop/blob/6af0d74a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestNamenodeCapacityReport.java
----------------------------------------------------------------------
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestNamenodeCapacityReport.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestNamenodeCapacityReport.java
index fd611ce..6f54722 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestNamenodeCapacityReport.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestNamenodeCapacityReport.java
@@ -202,9 +202,14 @@ public class TestNamenodeCapacityReport {
dn.shutdown();
DFSTestUtil.setDatanodeDead(dnd);
BlockManagerTestUtil.checkHeartbeat(namesystem.getBlockManager());
+ //Verify decommission of dead node won't impact nodesInService metrics.
+ dnm.getDecomManager().startDecommission(dnd);
expectedInServiceNodes--;
assertEquals(expectedInServiceNodes, namesystem.getNumLiveDataNodes());
assertEquals(expectedInServiceNodes, getNumDNInService(namesystem));
+ //Verify recommission of dead node won't impact nodesInService metrics.
+ dnm.getDecomManager().stopDecommission(dnd);
+ assertEquals(expectedInServiceNodes, getNumDNInService(namesystem));
}
// restart the nodes to verify that counts are correct after