You are viewing a plain text version of this content. The canonical link for it is here.
Posted to common-commits@hadoop.apache.org by cn...@apache.org on 2015/02/17 00:04:54 UTC
[4/4] hadoop git commit: HDFS-7604. Track and display failed DataNode
storage locations in NameNode. Contributed by Chris Nauroth.
HDFS-7604. Track and display failed DataNode storage locations in NameNode. Contributed by Chris Nauroth.
(cherry picked from commit 9729b244de50322c2cc889c97c2ffb2b4675cf77)
Conflicts:
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/HeartbeatManager.java
hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestDeadDatanode.java
Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo
Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/441dfa48
Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/441dfa48
Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/441dfa48
Branch: refs/heads/branch-2
Commit: 441dfa4867d832e1f9839aecc027c89fea519f75
Parents: 1d91daa
Author: cnauroth <cn...@apache.org>
Authored: Mon Feb 16 14:43:02 2015 -0800
Committer: cnauroth <cn...@apache.org>
Committed: Mon Feb 16 14:44:49 2015 -0800
----------------------------------------------------------------------
hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt | 3 +
.../DatanodeProtocolClientSideTranslatorPB.java | 9 +-
.../DatanodeProtocolServerSideTranslatorPB.java | 7 +-
.../apache/hadoop/hdfs/protocolPB/PBHelper.java | 25 ++
.../blockmanagement/DatanodeDescriptor.java | 39 +-
.../server/blockmanagement/DatanodeManager.java | 7 +-
.../blockmanagement/HeartbeatManager.java | 8 +-
.../hdfs/server/datanode/BPServiceActor.java | 10 +-
.../server/datanode/fsdataset/FsDatasetSpi.java | 8 +
.../datanode/fsdataset/impl/FsDatasetImpl.java | 108 ++++-
.../datanode/fsdataset/impl/FsVolumeList.java | 41 +-
.../fsdataset/impl/VolumeFailureInfo.java | 82 ++++
.../server/datanode/metrics/FSDatasetMBean.java | 19 +
.../hdfs/server/namenode/FSNamesystem.java | 52 ++-
.../hdfs/server/namenode/NameNodeRpcServer.java | 6 +-
.../namenode/metrics/FSNamesystemMBean.java | 13 +
.../hdfs/server/protocol/DatanodeProtocol.java | 5 +-
.../server/protocol/VolumeFailureSummary.java | 72 ++++
.../src/main/proto/DatanodeProtocol.proto | 17 +-
.../src/main/webapps/hdfs/dfshealth.html | 33 ++
.../src/main/webapps/hdfs/dfshealth.js | 43 ++
.../blockmanagement/TestBlockManager.java | 3 +-
.../TestNameNodePrunesMissingStorages.java | 4 +-
.../TestOverReplicatedBlocks.java | 2 +-
.../blockmanagement/TestReplicationPolicy.java | 2 +-
.../TestReplicationPolicyConsiderLoad.java | 8 +-
.../TestReplicationPolicyWithNodeGroup.java | 2 +-
.../server/datanode/SimulatedFSDataset.java | 22 +
.../server/datanode/TestBPOfferService.java | 4 +-
.../hdfs/server/datanode/TestBlockRecovery.java | 4 +-
.../TestDataNodeVolumeFailureReporting.java | 421 +++++++++++++++++--
.../server/datanode/TestFsDatasetCache.java | 3 +-
.../hdfs/server/datanode/TestStorageReport.java | 4 +-
.../extdataset/ExternalDatasetImpl.java | 21 +
.../fsdataset/impl/TestFsDatasetImpl.java | 6 +-
.../fsdataset/impl/TestFsVolumeList.java | 7 +-
.../server/namenode/NNThroughputBenchmark.java | 4 +-
.../hdfs/server/namenode/NameNodeAdapter.java | 2 +-
.../hdfs/server/namenode/TestDeadDatanode.java | 4 +-
39 files changed, 1023 insertions(+), 107 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hadoop/blob/441dfa48/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
----------------------------------------------------------------------
diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
index a3d6b8e..6391b34 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
+++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
@@ -338,6 +338,9 @@ Release 2.7.0 - UNRELEASED
HDFS-7430. Refactor the BlockScanner to use O(1) memory and use multiple
threads (cmccabe)
+ HDFS-7604. Track and display failed DataNode storage locations in NameNode.
+ (cnauroth)
+
OPTIMIZATIONS
HDFS-7454. Reduce memory footprint for AclEntries in NameNode.
http://git-wip-us.apache.org/repos/asf/hadoop/blob/441dfa48/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/DatanodeProtocolClientSideTranslatorPB.java
----------------------------------------------------------------------
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/DatanodeProtocolClientSideTranslatorPB.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/DatanodeProtocolClientSideTranslatorPB.java
index 46023ec..192916f 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/DatanodeProtocolClientSideTranslatorPB.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/DatanodeProtocolClientSideTranslatorPB.java
@@ -55,6 +55,7 @@ import org.apache.hadoop.hdfs.server.protocol.ReceivedDeletedBlockInfo;
import org.apache.hadoop.hdfs.server.protocol.StorageBlockReport;
import org.apache.hadoop.hdfs.server.protocol.StorageReceivedDeletedBlocks;
import org.apache.hadoop.hdfs.server.protocol.StorageReport;
+import org.apache.hadoop.hdfs.server.protocol.VolumeFailureSummary;
import org.apache.hadoop.ipc.ProtobufHelper;
import org.apache.hadoop.ipc.ProtobufRpcEngine;
import org.apache.hadoop.ipc.ProtocolMetaInterface;
@@ -121,8 +122,8 @@ public class DatanodeProtocolClientSideTranslatorPB implements
@Override
public HeartbeatResponse sendHeartbeat(DatanodeRegistration registration,
StorageReport[] reports, long cacheCapacity, long cacheUsed,
- int xmitsInProgress, int xceiverCount, int failedVolumes)
- throws IOException {
+ int xmitsInProgress, int xceiverCount, int failedVolumes,
+ VolumeFailureSummary volumeFailureSummary) throws IOException {
HeartbeatRequestProto.Builder builder = HeartbeatRequestProto.newBuilder()
.setRegistration(PBHelper.convert(registration))
.setXmitsInProgress(xmitsInProgress).setXceiverCount(xceiverCount)
@@ -134,6 +135,10 @@ public class DatanodeProtocolClientSideTranslatorPB implements
if (cacheUsed != 0) {
builder.setCacheUsed(cacheUsed);
}
+ if (volumeFailureSummary != null) {
+ builder.setVolumeFailureSummary(PBHelper.convertVolumeFailureSummary(
+ volumeFailureSummary));
+ }
HeartbeatResponseProto resp;
try {
resp = rpcProxy.sendHeartbeat(NULL_CONTROLLER, builder.build());
http://git-wip-us.apache.org/repos/asf/hadoop/blob/441dfa48/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/DatanodeProtocolServerSideTranslatorPB.java
----------------------------------------------------------------------
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/DatanodeProtocolServerSideTranslatorPB.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/DatanodeProtocolServerSideTranslatorPB.java
index d016735..1a89090 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/DatanodeProtocolServerSideTranslatorPB.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/DatanodeProtocolServerSideTranslatorPB.java
@@ -56,6 +56,7 @@ import org.apache.hadoop.hdfs.server.protocol.ReceivedDeletedBlockInfo;
import org.apache.hadoop.hdfs.server.protocol.StorageBlockReport;
import org.apache.hadoop.hdfs.server.protocol.StorageReceivedDeletedBlocks;
import org.apache.hadoop.hdfs.server.protocol.StorageReport;
+import org.apache.hadoop.hdfs.server.protocol.VolumeFailureSummary;
import com.google.protobuf.RpcController;
import com.google.protobuf.ServiceException;
@@ -104,10 +105,14 @@ public class DatanodeProtocolServerSideTranslatorPB implements
try {
final StorageReport[] report = PBHelper.convertStorageReports(
request.getReportsList());
+ VolumeFailureSummary volumeFailureSummary =
+ request.hasVolumeFailureSummary() ? PBHelper.convertVolumeFailureSummary(
+ request.getVolumeFailureSummary()) : null;
response = impl.sendHeartbeat(PBHelper.convert(request.getRegistration()),
report, request.getCacheCapacity(), request.getCacheUsed(),
request.getXmitsInProgress(),
- request.getXceiverCount(), request.getFailedVolumes());
+ request.getXceiverCount(), request.getFailedVolumes(),
+ volumeFailureSummary);
} catch (IOException e) {
throw new ServiceException(e);
}
http://git-wip-us.apache.org/repos/asf/hadoop/blob/441dfa48/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/PBHelper.java
----------------------------------------------------------------------
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/PBHelper.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/PBHelper.java
index ccee06a..fa90040 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/PBHelper.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/PBHelper.java
@@ -122,6 +122,7 @@ import org.apache.hadoop.hdfs.protocol.proto.DatanodeProtocolProtos.KeyUpdateCom
import org.apache.hadoop.hdfs.protocol.proto.DatanodeProtocolProtos.NNHAStatusHeartbeatProto;
import org.apache.hadoop.hdfs.protocol.proto.DatanodeProtocolProtos.ReceivedDeletedBlockInfoProto;
import org.apache.hadoop.hdfs.protocol.proto.DatanodeProtocolProtos.RegisterCommandProto;
+import org.apache.hadoop.hdfs.protocol.proto.DatanodeProtocolProtos.VolumeFailureSummaryProto;
import org.apache.hadoop.hdfs.protocol.proto.HdfsProtos;
import org.apache.hadoop.hdfs.protocol.proto.HdfsProtos.BlockKeyProto;
import org.apache.hadoop.hdfs.protocol.proto.HdfsProtos.BlockProto;
@@ -216,6 +217,7 @@ import org.apache.hadoop.hdfs.server.protocol.RegisterCommand;
import org.apache.hadoop.hdfs.server.protocol.RemoteEditLog;
import org.apache.hadoop.hdfs.server.protocol.RemoteEditLogManifest;
import org.apache.hadoop.hdfs.server.protocol.StorageReport;
+import org.apache.hadoop.hdfs.server.protocol.VolumeFailureSummary;
import org.apache.hadoop.hdfs.shortcircuit.ShortCircuitShm.ShmId;
import org.apache.hadoop.hdfs.shortcircuit.ShortCircuitShm.SlotId;
import org.apache.hadoop.hdfs.util.ExactSizeInputStream;
@@ -1902,6 +1904,29 @@ public class PBHelper {
return protos;
}
+ public static VolumeFailureSummary convertVolumeFailureSummary(
+ VolumeFailureSummaryProto proto) {
+ List<String> failedStorageLocations = proto.getFailedStorageLocationsList();
+ return new VolumeFailureSummary(
+ failedStorageLocations.toArray(new String[failedStorageLocations.size()]),
+ proto.getLastVolumeFailureDate(), proto.getEstimatedCapacityLostTotal());
+ }
+
+ public static VolumeFailureSummaryProto convertVolumeFailureSummary(
+ VolumeFailureSummary volumeFailureSummary) {
+ VolumeFailureSummaryProto.Builder builder =
+ VolumeFailureSummaryProto.newBuilder();
+ for (String failedStorageLocation:
+ volumeFailureSummary.getFailedStorageLocations()) {
+ builder.addFailedStorageLocations(failedStorageLocation);
+ }
+ builder.setLastVolumeFailureDate(
+ volumeFailureSummary.getLastVolumeFailureDate());
+ builder.setEstimatedCapacityLostTotal(
+ volumeFailureSummary.getEstimatedCapacityLostTotal());
+ return builder.build();
+ }
+
public static JournalInfo convert(JournalInfoProto info) {
int lv = info.hasLayoutVersion() ? info.getLayoutVersion() : 0;
int nsID = info.hasNamespaceID() ? info.getNamespaceID() : 0;
http://git-wip-us.apache.org/repos/asf/hadoop/blob/441dfa48/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeDescriptor.java
----------------------------------------------------------------------
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeDescriptor.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeDescriptor.java
index c854915..d282f17 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeDescriptor.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeDescriptor.java
@@ -42,6 +42,7 @@ import org.apache.hadoop.hdfs.protocol.DatanodeInfo;
import org.apache.hadoop.hdfs.server.namenode.CachedBlock;
import org.apache.hadoop.hdfs.server.protocol.DatanodeStorage;
import org.apache.hadoop.hdfs.server.protocol.StorageReport;
+import org.apache.hadoop.hdfs.server.protocol.VolumeFailureSummary;
import org.apache.hadoop.hdfs.util.EnumCounters;
import org.apache.hadoop.hdfs.util.LightWeightHashSet;
import org.apache.hadoop.util.IntrusiveCollection;
@@ -216,6 +217,7 @@ public class DatanodeDescriptor extends DatanodeInfo {
private long lastBlocksScheduledRollTime = 0;
private static final int BLOCKS_SCHEDULED_ROLL_INTERVAL = 600*1000; //10min
private int volumeFailures = 0;
+ private VolumeFailureSummary volumeFailureSummary = null;
/**
* When set to true, the node is not in include list and is not allowed
@@ -235,7 +237,7 @@ public class DatanodeDescriptor extends DatanodeInfo {
*/
public DatanodeDescriptor(DatanodeID nodeID) {
super(nodeID);
- updateHeartbeatState(StorageReport.EMPTY_ARRAY, 0L, 0L, 0, 0);
+ updateHeartbeatState(StorageReport.EMPTY_ARRAY, 0L, 0L, 0, 0, null);
}
/**
@@ -246,7 +248,7 @@ public class DatanodeDescriptor extends DatanodeInfo {
public DatanodeDescriptor(DatanodeID nodeID,
String networkLocation) {
super(nodeID, networkLocation);
- updateHeartbeatState(StorageReport.EMPTY_ARRAY, 0L, 0L, 0, 0);
+ updateHeartbeatState(StorageReport.EMPTY_ARRAY, 0L, 0L, 0, 0, null);
}
@VisibleForTesting
@@ -347,9 +349,10 @@ public class DatanodeDescriptor extends DatanodeInfo {
* Updates stats from datanode heartbeat.
*/
public void updateHeartbeat(StorageReport[] reports, long cacheCapacity,
- long cacheUsed, int xceiverCount, int volFailures) {
+ long cacheUsed, int xceiverCount, int volFailures,
+ VolumeFailureSummary volumeFailureSummary) {
updateHeartbeatState(reports, cacheCapacity, cacheUsed, xceiverCount,
- volFailures);
+ volFailures, volumeFailureSummary);
heartbeatedSinceRegistration = true;
}
@@ -357,7 +360,8 @@ public class DatanodeDescriptor extends DatanodeInfo {
* process datanode heartbeat or stats initialization.
*/
public void updateHeartbeatState(StorageReport[] reports, long cacheCapacity,
- long cacheUsed, int xceiverCount, int volFailures) {
+ long cacheUsed, int xceiverCount, int volFailures,
+ VolumeFailureSummary volumeFailureSummary) {
long totalCapacity = 0;
long totalRemaining = 0;
long totalBlockPoolUsed = 0;
@@ -372,7 +376,10 @@ public class DatanodeDescriptor extends DatanodeInfo {
// during the current DN registration session.
// When volumeFailures == this.volumeFailures, it implies there is no
// state change. No need to check for failed storage. This is an
- // optimization.
+ // optimization. Recent versions of the DataNode report a
+ // VolumeFailureSummary containing the date/time of the last volume
+ // failure. If that's available, then we check that instead for greater
+ // accuracy.
// 2. After DN restarts, volFailures might not increase and it is possible
// we still have new failed storage. For example, admins reduce
// available storages in configuration. Another corner case
@@ -381,8 +388,14 @@ public class DatanodeDescriptor extends DatanodeInfo {
// one element in storageReports and that is A. b) A failed. c) Before
// DN sends HB to NN to indicate A has failed, DN restarts. d) After DN
// restarts, storageReports has one element which is B.
- boolean checkFailedStorages = (volFailures > this.volumeFailures) ||
- !heartbeatedSinceRegistration;
+ final boolean checkFailedStorages;
+ if (volumeFailureSummary != null && this.volumeFailureSummary != null) {
+ checkFailedStorages = volumeFailureSummary.getLastVolumeFailureDate() >
+ this.volumeFailureSummary.getLastVolumeFailureDate();
+ } else {
+ checkFailedStorages = (volFailures > this.volumeFailures) ||
+ !heartbeatedSinceRegistration;
+ }
if (checkFailedStorages) {
LOG.info("Number of failed storage changes from "
@@ -396,6 +409,7 @@ public class DatanodeDescriptor extends DatanodeInfo {
setXceiverCount(xceiverCount);
setLastUpdate(Time.now());
this.volumeFailures = volFailures;
+ this.volumeFailureSummary = volumeFailureSummary;
for (StorageReport report : reports) {
DatanodeStorageInfo storage = updateStorage(report.getStorage());
if (checkFailedStorages) {
@@ -731,6 +745,15 @@ public class DatanodeDescriptor extends DatanodeInfo {
}
/**
+ * Returns info about volume failures.
+ *
+ * @return info about volume failures, possibly null
+ */
+ public VolumeFailureSummary getVolumeFailureSummary() {
+ return volumeFailureSummary;
+ }
+
+ /**
* @param nodeReg DatanodeID to update registration for.
*/
@Override
http://git-wip-us.apache.org/repos/asf/hadoop/blob/441dfa48/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeManager.java
----------------------------------------------------------------------
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeManager.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeManager.java
index 329fec3..ec55e4b 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeManager.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeManager.java
@@ -1386,8 +1386,8 @@ public class DatanodeManager {
public DatanodeCommand[] handleHeartbeat(DatanodeRegistration nodeReg,
StorageReport[] reports, final String blockPoolId,
long cacheCapacity, long cacheUsed, int xceiverCount,
- int maxTransfers, int failedVolumes
- ) throws IOException {
+ int maxTransfers, int failedVolumes,
+ VolumeFailureSummary volumeFailureSummary) throws IOException {
synchronized (heartbeatManager) {
synchronized (datanodeMap) {
DatanodeDescriptor nodeinfo = null;
@@ -1409,7 +1409,8 @@ public class DatanodeManager {
heartbeatManager.updateHeartbeat(nodeinfo, reports,
cacheCapacity, cacheUsed,
- xceiverCount, failedVolumes);
+ xceiverCount, failedVolumes,
+ volumeFailureSummary);
// If we are in safemode, do not send back any recovery / replication
// requests. Don't even drain the existing queue of work.
http://git-wip-us.apache.org/repos/asf/hadoop/blob/441dfa48/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/HeartbeatManager.java
----------------------------------------------------------------------
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/HeartbeatManager.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/HeartbeatManager.java
index aa4f2f8..d60a39b 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/HeartbeatManager.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/HeartbeatManager.java
@@ -28,6 +28,7 @@ import org.apache.hadoop.hdfs.DFSUtil;
import org.apache.hadoop.hdfs.protocol.DatanodeID;
import org.apache.hadoop.hdfs.server.namenode.Namesystem;
import org.apache.hadoop.hdfs.server.protocol.StorageReport;
+import org.apache.hadoop.hdfs.server.protocol.VolumeFailureSummary;
import org.apache.hadoop.util.Daemon;
import org.apache.hadoop.util.Time;
@@ -192,7 +193,7 @@ class HeartbeatManager implements DatanodeStatistics {
addDatanode(d);
//update its timestamp
- d.updateHeartbeatState(StorageReport.EMPTY_ARRAY, 0L, 0L, 0, 0);
+ d.updateHeartbeatState(StorageReport.EMPTY_ARRAY, 0L, 0L, 0, 0, null);
}
}
@@ -217,10 +218,11 @@ class HeartbeatManager implements DatanodeStatistics {
synchronized void updateHeartbeat(final DatanodeDescriptor node,
StorageReport[] reports, long cacheCapacity, long cacheUsed,
- int xceiverCount, int failedVolumes) {
+ int xceiverCount, int failedVolumes,
+ VolumeFailureSummary volumeFailureSummary) {
stats.subtract(node);
node.updateHeartbeat(reports, cacheCapacity, cacheUsed,
- xceiverCount, failedVolumes);
+ xceiverCount, failedVolumes, volumeFailureSummary);
stats.add(node);
}
http://git-wip-us.apache.org/repos/asf/hadoop/blob/441dfa48/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BPServiceActor.java
----------------------------------------------------------------------
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BPServiceActor.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BPServiceActor.java
index 3703b5c..ff1ad78 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BPServiceActor.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BPServiceActor.java
@@ -54,6 +54,7 @@ import org.apache.hadoop.hdfs.server.protocol.ReceivedDeletedBlockInfo;
import org.apache.hadoop.hdfs.server.protocol.StorageBlockReport;
import org.apache.hadoop.hdfs.server.protocol.StorageReceivedDeletedBlocks;
import org.apache.hadoop.hdfs.server.protocol.StorageReport;
+import org.apache.hadoop.hdfs.server.protocol.VolumeFailureSummary;
import org.apache.hadoop.io.IOUtils;
import org.apache.hadoop.ipc.RemoteException;
import org.apache.hadoop.util.Time;
@@ -579,14 +580,19 @@ class BPServiceActor implements Runnable {
LOG.debug("Sending heartbeat with " + reports.length +
" storage reports from service actor: " + this);
}
-
+
+ VolumeFailureSummary volumeFailureSummary = dn.getFSDataset()
+ .getVolumeFailureSummary();
+ int numFailedVolumes = volumeFailureSummary != null ?
+ volumeFailureSummary.getFailedStorageLocations().length : 0;
return bpNamenode.sendHeartbeat(bpRegistration,
reports,
dn.getFSDataset().getCacheCapacity(),
dn.getFSDataset().getCacheUsed(),
dn.getXmitsInProgress(),
dn.getXceiverCount(),
- dn.getFSDataset().getNumFailedVolumes());
+ numFailedVolumes,
+ volumeFailureSummary);
}
//This must be called only by BPOfferService
http://git-wip-us.apache.org/repos/asf/hadoop/blob/441dfa48/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/FsDatasetSpi.java
----------------------------------------------------------------------
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/FsDatasetSpi.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/FsDatasetSpi.java
index c554bc39..ca79091 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/FsDatasetSpi.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/FsDatasetSpi.java
@@ -56,6 +56,7 @@ import org.apache.hadoop.hdfs.server.protocol.DatanodeStorage;
import org.apache.hadoop.hdfs.server.protocol.NamespaceInfo;
import org.apache.hadoop.hdfs.server.protocol.ReplicaRecoveryInfo;
import org.apache.hadoop.hdfs.server.protocol.StorageReport;
+import org.apache.hadoop.hdfs.server.protocol.VolumeFailureSummary;
import org.apache.hadoop.util.DiskChecker.DiskErrorException;
import org.apache.hadoop.util.ReflectionUtils;
@@ -129,6 +130,13 @@ public interface FsDatasetSpi<V extends FsVolumeSpi> extends FSDatasetMBean {
/** @return a volume information map (name => info). */
public Map<String, Object> getVolumeInfoMap();
+ /**
+ * Returns info about volume failures.
+ *
+ * @return info about volume failures, possibly null
+ */
+ VolumeFailureSummary getVolumeFailureSummary();
+
/** @return a list of finalized blocks for the given block pool. */
public List<FinalizedReplica> getFinalizedBlocks(String bpid);
http://git-wip-us.apache.org/repos/asf/hadoop/blob/441dfa48/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/FsDatasetImpl.java
----------------------------------------------------------------------
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/FsDatasetImpl.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/FsDatasetImpl.java
index 082fb9a..e8c6873 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/FsDatasetImpl.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/FsDatasetImpl.java
@@ -101,6 +101,7 @@ import org.apache.hadoop.hdfs.server.protocol.DatanodeStorage;
import org.apache.hadoop.hdfs.server.protocol.NamespaceInfo;
import org.apache.hadoop.hdfs.server.protocol.ReplicaRecoveryInfo;
import org.apache.hadoop.hdfs.server.protocol.StorageReport;
+import org.apache.hadoop.hdfs.server.protocol.VolumeFailureSummary;
import org.apache.hadoop.io.IOUtils;
import org.apache.hadoop.io.MultipleIOException;
import org.apache.hadoop.io.nativeio.NativeIO;
@@ -114,6 +115,7 @@ import org.apache.hadoop.util.Time;
import com.google.common.base.Preconditions;
import com.google.common.collect.Lists;
+import com.google.common.collect.Sets;
/**************************************************
* FSDataset manages a set of data blocks. Each block
@@ -266,9 +268,11 @@ class FsDatasetImpl implements FsDatasetSpi<FsVolumeImpl> {
String[] dataDirs = conf.getTrimmedStrings(DFSConfigKeys.DFS_DATANODE_DATA_DIR_KEY);
Collection<StorageLocation> dataLocations = DataNode.getStorageLocations(conf);
+ List<VolumeFailureInfo> volumeFailureInfos = getInitialVolumeFailureInfos(
+ dataLocations, storage);
int volsConfigured = (dataDirs == null) ? 0 : dataDirs.length;
- int volsFailed = volsConfigured - storage.getNumStorageDirs();
+ int volsFailed = volumeFailureInfos.size();
this.validVolsRequired = volsConfigured - volFailuresTolerated;
if (volFailuresTolerated < 0 || volFailuresTolerated >= volsConfigured) {
@@ -293,7 +297,7 @@ class FsDatasetImpl implements FsDatasetSpi<FsVolumeImpl> {
DFSConfigKeys.DFS_DATANODE_FSDATASET_VOLUME_CHOOSING_POLICY_KEY,
RoundRobinVolumeChoosingPolicy.class,
VolumeChoosingPolicy.class), conf);
- volumes = new FsVolumeList(volsFailed, datanode.getBlockScanner(),
+ volumes = new FsVolumeList(volumeFailureInfos, datanode.getBlockScanner(),
blockChooserImpl);
asyncDiskService = new FsDatasetAsyncDiskService(datanode);
asyncLazyPersistService = new RamDiskAsyncLazyPersistService(datanode);
@@ -315,6 +319,36 @@ class FsDatasetImpl implements FsDatasetSpi<FsVolumeImpl> {
DFSConfigKeys.DFS_DATANODE_BLOCK_PINNING_ENABLED_DEFAULT);
}
+ /**
+ * Gets initial volume failure information for all volumes that failed
+ * immediately at startup. The method works by determining the set difference
+ * between all configured storage locations and the actual storage locations in
+ * use after attempting to put all of them into service.
+ *
+ * @return each storage location that has failed
+ */
+ private static List<VolumeFailureInfo> getInitialVolumeFailureInfos(
+ Collection<StorageLocation> dataLocations, DataStorage storage) {
+ Set<String> failedLocationSet = Sets.newHashSetWithExpectedSize(
+ dataLocations.size());
+ for (StorageLocation sl: dataLocations) {
+ failedLocationSet.add(sl.getFile().getAbsolutePath());
+ }
+ for (Iterator<Storage.StorageDirectory> it = storage.dirIterator();
+ it.hasNext(); ) {
+ Storage.StorageDirectory sd = it.next();
+ failedLocationSet.remove(sd.getRoot().getAbsolutePath());
+ }
+ List<VolumeFailureInfo> volumeFailureInfos = Lists.newArrayListWithCapacity(
+ failedLocationSet.size());
+ long failureDate = Time.now();
+ for (String failedStorageLocation: failedLocationSet) {
+ volumeFailureInfos.add(new VolumeFailureInfo(failedStorageLocation,
+ failureDate));
+ }
+ return volumeFailureInfos;
+ }
+
private void addVolume(Collection<StorageLocation> dataLocations,
Storage.StorageDirectory sd) throws IOException {
final File dir = sd.getCurrentDir();
@@ -350,8 +384,14 @@ class FsDatasetImpl implements FsDatasetSpi<FsVolumeImpl> {
final File dir = location.getFile();
// Prepare volume in DataStorage
- DataStorage.VolumeBuilder builder =
- dataStorage.prepareVolume(datanode, location.getFile(), nsInfos);
+ final DataStorage.VolumeBuilder builder;
+ try {
+ builder = dataStorage.prepareVolume(datanode, location.getFile(), nsInfos);
+ } catch (IOException e) {
+ volumes.addVolumeFailureInfo(new VolumeFailureInfo(
+ location.getFile().getAbsolutePath(), Time.now()));
+ throw e;
+ }
final Storage.StorageDirectory sd = builder.getStorageDirectory();
@@ -500,9 +540,65 @@ class FsDatasetImpl implements FsDatasetSpi<FsVolumeImpl> {
/**
* Return the number of failed volumes in the FSDataset.
*/
- @Override
+ @Override // FSDatasetMBean
public int getNumFailedVolumes() {
- return volumes.numberOfFailedVolumes();
+ return volumes.getVolumeFailureInfos().length;
+ }
+
+ @Override // FSDatasetMBean
+ public String[] getFailedStorageLocations() {
+ VolumeFailureInfo[] infos = volumes.getVolumeFailureInfos();
+ List<String> failedStorageLocations = Lists.newArrayListWithCapacity(
+ infos.length);
+ for (VolumeFailureInfo info: infos) {
+ failedStorageLocations.add(info.getFailedStorageLocation());
+ }
+ return failedStorageLocations.toArray(
+ new String[failedStorageLocations.size()]);
+ }
+
+ @Override // FSDatasetMBean
+ public long getLastVolumeFailureDate() {
+ long lastVolumeFailureDate = 0;
+ for (VolumeFailureInfo info: volumes.getVolumeFailureInfos()) {
+ long failureDate = info.getFailureDate();
+ if (failureDate > lastVolumeFailureDate) {
+ lastVolumeFailureDate = failureDate;
+ }
+ }
+ return lastVolumeFailureDate;
+ }
+
+ @Override // FSDatasetMBean
+ public long getEstimatedCapacityLostTotal() {
+ long estimatedCapacityLostTotal = 0;
+ for (VolumeFailureInfo info: volumes.getVolumeFailureInfos()) {
+ estimatedCapacityLostTotal += info.getEstimatedCapacityLost();
+ }
+ return estimatedCapacityLostTotal;
+ }
+
+ @Override // FsDatasetSpi
+ public VolumeFailureSummary getVolumeFailureSummary() {
+ VolumeFailureInfo[] infos = volumes.getVolumeFailureInfos();
+ if (infos.length == 0) {
+ return null;
+ }
+ List<String> failedStorageLocations = Lists.newArrayListWithCapacity(
+ infos.length);
+ long lastVolumeFailureDate = 0;
+ long estimatedCapacityLostTotal = 0;
+ for (VolumeFailureInfo info: infos) {
+ failedStorageLocations.add(info.getFailedStorageLocation());
+ long failureDate = info.getFailureDate();
+ if (failureDate > lastVolumeFailureDate) {
+ lastVolumeFailureDate = failureDate;
+ }
+ estimatedCapacityLostTotal += info.getEstimatedCapacityLost();
+ }
+ return new VolumeFailureSummary(
+ failedStorageLocations.toArray(new String[failedStorageLocations.size()]),
+ lastVolumeFailureDate, estimatedCapacityLostTotal);
}
@Override // FSDatasetMBean
http://git-wip-us.apache.org/repos/asf/hadoop/blob/441dfa48/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/FsVolumeList.java
----------------------------------------------------------------------
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/FsVolumeList.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/FsVolumeList.java
index ae2f5b4..4573172 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/FsVolumeList.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/FsVolumeList.java
@@ -22,9 +22,12 @@ import java.io.IOException;
import java.nio.channels.ClosedChannelException;
import java.util.ArrayList;
import java.util.Arrays;
+import java.util.Collection;
import java.util.Collections;
import java.util.Iterator;
import java.util.List;
+import java.util.Map;
+import java.util.TreeMap;
import java.util.concurrent.atomic.AtomicReference;
import com.google.common.collect.Lists;
@@ -40,21 +43,23 @@ import org.apache.hadoop.util.Time;
class FsVolumeList {
private final AtomicReference<FsVolumeImpl[]> volumes =
new AtomicReference<>(new FsVolumeImpl[0]);
+ // Tracks volume failures, sorted by volume path.
+ private final Map<String, VolumeFailureInfo> volumeFailureInfos =
+ Collections.synchronizedMap(new TreeMap<String, VolumeFailureInfo>());
private Object checkDirsMutex = new Object();
private final VolumeChoosingPolicy<FsVolumeImpl> blockChooser;
private final BlockScanner blockScanner;
- private volatile int numFailedVolumes;
- FsVolumeList(int failedVols, BlockScanner blockScanner,
+ FsVolumeList(List<VolumeFailureInfo> initialVolumeFailureInfos,
+ BlockScanner blockScanner,
VolumeChoosingPolicy<FsVolumeImpl> blockChooser) {
this.blockChooser = blockChooser;
this.blockScanner = blockScanner;
- this.numFailedVolumes = failedVols;
- }
-
- int numberOfFailedVolumes() {
- return numFailedVolumes;
+ for (VolumeFailureInfo volumeFailureInfo: initialVolumeFailureInfos) {
+ volumeFailureInfos.put(volumeFailureInfo.getFailedStorageLocation(),
+ volumeFailureInfo);
+ }
}
/**
@@ -238,7 +243,7 @@ class FsVolumeList {
}
removedVols.add(fsv);
removeVolume(fsv);
- numFailedVolumes++;
+ addVolumeFailureInfo(fsv);
} catch (ClosedChannelException e) {
FsDatasetImpl.LOG.debug("Caught exception when obtaining " +
"reference count on closed volume", e);
@@ -347,6 +352,26 @@ class FsVolumeList {
removeVolume(fsVolume);
}
}
+ removeVolumeFailureInfo(volume);
+ }
+
+ VolumeFailureInfo[] getVolumeFailureInfos() {
+ Collection<VolumeFailureInfo> infos = volumeFailureInfos.values();
+ return infos.toArray(new VolumeFailureInfo[infos.size()]);
+ }
+
+ void addVolumeFailureInfo(VolumeFailureInfo volumeFailureInfo) {
+ volumeFailureInfos.put(volumeFailureInfo.getFailedStorageLocation(),
+ volumeFailureInfo);
+ }
+
+ private void addVolumeFailureInfo(FsVolumeImpl vol) {
+ addVolumeFailureInfo(new VolumeFailureInfo(vol.getBasePath(), Time.now(),
+ vol.getCapacity()));
+ }
+
+ private void removeVolumeFailureInfo(File vol) {
+ volumeFailureInfos.remove(vol.getAbsolutePath());
}
void addBlockPool(final String bpid, final Configuration conf) throws IOException {
http://git-wip-us.apache.org/repos/asf/hadoop/blob/441dfa48/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/VolumeFailureInfo.java
----------------------------------------------------------------------
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/VolumeFailureInfo.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/VolumeFailureInfo.java
new file mode 100644
index 0000000..c3ce2a4
--- /dev/null
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/VolumeFailureInfo.java
@@ -0,0 +1,82 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hdfs.server.datanode.fsdataset.impl;
+
+/**
+ * Tracks information about failure of a data volume.
+ */
+final class VolumeFailureInfo {
+ private final String failedStorageLocation;
+ private final long failureDate;
+ private final long estimatedCapacityLost;
+
+ /**
+ * Creates a new VolumeFailureInfo, when the capacity lost from this volume
+ * failure is unknown. Typically, this means the volume failed immediately at
+ * startup, so there was never a chance to query its capacity.
+ *
+ * @param failedStorageLocation storage location that has failed
+ * @param failureDate date/time of failure in milliseconds since epoch
+ */
+ public VolumeFailureInfo(String failedStorageLocation, long failureDate) {
+ this(failedStorageLocation, failureDate, 0);
+ }
+
+ /**
+ * Creates a new VolumeFailureInfo.
+ *
+ * @param failedStorageLocation storage location that has failed
+ * @param failureDate date/time of failure in milliseconds since epoch
+ * @param estimatedCapacityLost estimate of capacity lost in bytes
+ */
+ public VolumeFailureInfo(String failedStorageLocation, long failureDate,
+ long estimatedCapacityLost) {
+ this.failedStorageLocation = failedStorageLocation;
+ this.failureDate = failureDate;
+ this.estimatedCapacityLost = estimatedCapacityLost;
+ }
+
+ /**
+ * Returns the storage location that has failed.
+ *
+ * @return storage location that has failed
+ */
+ public String getFailedStorageLocation() {
+ return this.failedStorageLocation;
+ }
+
+ /**
+ * Returns date/time of failure
+ *
+ * @return date/time of failure in milliseconds since epoch
+ */
+ public long getFailureDate() {
+ return this.failureDate;
+ }
+
+ /**
+ * Returns estimate of capacity lost. This is said to be an estimate, because
+ * in some cases it's impossible to know the capacity of the volume, such as if
+ * we never had a chance to query its capacity before the failure occurred.
+ *
+ * @return estimate of capacity lost in bytes
+ */
+ public long getEstimatedCapacityLost() {
+ return this.estimatedCapacityLost;
+ }
+}
http://git-wip-us.apache.org/repos/asf/hadoop/blob/441dfa48/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/metrics/FSDatasetMBean.java
----------------------------------------------------------------------
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/metrics/FSDatasetMBean.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/metrics/FSDatasetMBean.java
index ca8ebad..60779e8 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/metrics/FSDatasetMBean.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/metrics/FSDatasetMBean.java
@@ -78,6 +78,25 @@ public interface FSDatasetMBean {
public int getNumFailedVolumes();
/**
+ * Returns each storage location that has failed, sorted.
+ * @return each storage location that has failed, sorted
+ */
+ String[] getFailedStorageLocations();
+
+ /**
+ * Returns the date/time of the last volume failure in milliseconds since
+ * epoch.
+ * @return date/time of last volume failure in milliseconds since epoch
+ */
+ long getLastVolumeFailureDate();
+
+ /**
+ * Returns an estimate of total capacity lost due to volume failures in bytes.
+ * @return estimate of total capacity lost in bytes
+ */
+ long getEstimatedCapacityLostTotal();
+
+ /**
* Returns the amount of cache used by the datanode (in bytes).
*/
public long getCacheUsed();
http://git-wip-us.apache.org/repos/asf/hadoop/blob/441dfa48/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java
----------------------------------------------------------------------
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java
index 45072aa..15dec21 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java
@@ -254,6 +254,7 @@ import org.apache.hadoop.hdfs.server.protocol.NamenodeRegistration;
import org.apache.hadoop.hdfs.server.protocol.NamespaceInfo;
import org.apache.hadoop.hdfs.server.protocol.StorageReceivedDeletedBlocks;
import org.apache.hadoop.hdfs.server.protocol.StorageReport;
+import org.apache.hadoop.hdfs.server.protocol.VolumeFailureSummary;
import org.apache.hadoop.hdfs.StorageType;
import org.apache.hadoop.io.EnumSetWritable;
import org.apache.hadoop.io.IOUtils;
@@ -4411,8 +4412,8 @@ public class FSNamesystem implements Namesystem, FSNamesystemMBean,
*/
HeartbeatResponse handleHeartbeat(DatanodeRegistration nodeReg,
StorageReport[] reports, long cacheCapacity, long cacheUsed,
- int xceiverCount, int xmitsInProgress, int failedVolumes)
- throws IOException {
+ int xceiverCount, int xmitsInProgress, int failedVolumes,
+ VolumeFailureSummary volumeFailureSummary) throws IOException {
readLock();
try {
//get datanode commands
@@ -4420,7 +4421,7 @@ public class FSNamesystem implements Namesystem, FSNamesystemMBean,
- xmitsInProgress;
DatanodeCommand[] cmds = blockManager.getDatanodeManager().handleHeartbeat(
nodeReg, reports, blockPoolId, cacheCapacity, cacheUsed,
- xceiverCount, maxTransfer, failedVolumes);
+ xceiverCount, maxTransfer, failedVolumes, volumeFailureSummary);
//create ha status
final NNHAStatusHeartbeat haState = new NNHAStatusHeartbeat(
@@ -5942,6 +5943,32 @@ public class FSNamesystem implements Namesystem, FSNamesystemMBean,
}
@Override // FSNamesystemMBean
+ public int getVolumeFailuresTotal() {
+ List<DatanodeDescriptor> live = new ArrayList<DatanodeDescriptor>();
+ getBlockManager().getDatanodeManager().fetchDatanodes(live, null, true);
+ int volumeFailuresTotal = 0;
+ for (DatanodeDescriptor node: live) {
+ volumeFailuresTotal += node.getVolumeFailures();
+ }
+ return volumeFailuresTotal;
+ }
+
+ @Override // FSNamesystemMBean
+ public long getEstimatedCapacityLostTotal() {
+ List<DatanodeDescriptor> live = new ArrayList<DatanodeDescriptor>();
+ getBlockManager().getDatanodeManager().fetchDatanodes(live, null, true);
+ long estimatedCapacityLostTotal = 0;
+ for (DatanodeDescriptor node: live) {
+ VolumeFailureSummary volumeFailureSummary = node.getVolumeFailureSummary();
+ if (volumeFailureSummary != null) {
+ estimatedCapacityLostTotal +=
+ volumeFailureSummary.getEstimatedCapacityLostTotal();
+ }
+ }
+ return estimatedCapacityLostTotal;
+ }
+
+ @Override // FSNamesystemMBean
public int getNumDecommissioningDataNodes() {
return getBlockManager().getDatanodeManager().getDecommissioningNodes()
.size();
@@ -6784,7 +6811,9 @@ public class FSNamesystem implements Namesystem, FSNamesystemMBean,
final List<DatanodeDescriptor> live = new ArrayList<DatanodeDescriptor>();
blockManager.getDatanodeManager().fetchDatanodes(live, null, true);
for (DatanodeDescriptor node : live) {
- Map<String, Object> innerinfo = ImmutableMap.<String, Object>builder()
+ ImmutableMap.Builder<String, Object> innerinfo =
+ ImmutableMap.<String,Object>builder();
+ innerinfo
.put("infoAddr", node.getInfoAddr())
.put("infoSecureAddr", node.getInfoSecureAddr())
.put("xferaddr", node.getXferAddr())
@@ -6800,9 +6829,18 @@ public class FSNamesystem implements Namesystem, FSNamesystemMBean,
.put("blockScheduled", node.getBlocksScheduled())
.put("blockPoolUsed", node.getBlockPoolUsed())
.put("blockPoolUsedPercent", node.getBlockPoolUsedPercent())
- .put("volfails", node.getVolumeFailures())
- .build();
- info.put(node.getHostName() + ":" + node.getXferPort(), innerinfo);
+ .put("volfails", node.getVolumeFailures());
+ VolumeFailureSummary volumeFailureSummary = node.getVolumeFailureSummary();
+ if (volumeFailureSummary != null) {
+ innerinfo
+ .put("failedStorageLocations",
+ volumeFailureSummary.getFailedStorageLocations())
+ .put("lastVolumeFailureDate",
+ volumeFailureSummary.getLastVolumeFailureDate())
+ .put("estimatedCapacityLostTotal",
+ volumeFailureSummary.getEstimatedCapacityLostTotal());
+ }
+ info.put(node.getHostName() + ":" + node.getXferPort(), innerinfo.build());
}
return JSON.toString(info);
}
http://git-wip-us.apache.org/repos/asf/hadoop/blob/441dfa48/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNodeRpcServer.java
----------------------------------------------------------------------
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNodeRpcServer.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNodeRpcServer.java
index f56d30e..ca22aef 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNodeRpcServer.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNodeRpcServer.java
@@ -136,6 +136,7 @@ import org.apache.hadoop.hdfs.server.protocol.RemoteEditLogManifest;
import org.apache.hadoop.hdfs.server.protocol.StorageBlockReport;
import org.apache.hadoop.hdfs.server.protocol.StorageReceivedDeletedBlocks;
import org.apache.hadoop.hdfs.server.protocol.StorageReport;
+import org.apache.hadoop.hdfs.server.protocol.VolumeFailureSummary;
import org.apache.hadoop.hdfs.StorageType;
import org.apache.hadoop.io.EnumSetWritable;
import org.apache.hadoop.io.Text;
@@ -1275,12 +1276,13 @@ class NameNodeRpcServer implements NamenodeProtocols {
public HeartbeatResponse sendHeartbeat(DatanodeRegistration nodeReg,
StorageReport[] report, long dnCacheCapacity, long dnCacheUsed,
int xmitsInProgress, int xceiverCount,
- int failedVolumes) throws IOException {
+ int failedVolumes, VolumeFailureSummary volumeFailureSummary)
+ throws IOException {
checkNNStartup();
verifyRequest(nodeReg);
return namesystem.handleHeartbeat(nodeReg, report,
dnCacheCapacity, dnCacheUsed, xceiverCount, xmitsInProgress,
- failedVolumes);
+ failedVolumes, volumeFailureSummary);
}
@Override // DatanodeProtocol
http://git-wip-us.apache.org/repos/asf/hadoop/blob/441dfa48/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/metrics/FSNamesystemMBean.java
----------------------------------------------------------------------
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/metrics/FSNamesystemMBean.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/metrics/FSNamesystemMBean.java
index 86f4bd6..b31b7b6 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/metrics/FSNamesystemMBean.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/metrics/FSNamesystemMBean.java
@@ -132,6 +132,19 @@ public interface FSNamesystemMBean {
public int getNumDecomDeadDataNodes();
/**
+ * Number of failed data volumes across all live data nodes.
+ * @return number of failed data volumes across all live data nodes
+ */
+ int getVolumeFailuresTotal();
+
+ /**
+ * Returns an estimate of total capacity lost due to volume failures in bytes
+ * across all live data nodes.
+ * @return estimate of total capacity lost in bytes
+ */
+ long getEstimatedCapacityLostTotal();
+
+ /**
* Number of data nodes that are in the decommissioning state
*/
public int getNumDecommissioningDataNodes();
http://git-wip-us.apache.org/repos/asf/hadoop/blob/441dfa48/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/protocol/DatanodeProtocol.java
----------------------------------------------------------------------
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/protocol/DatanodeProtocol.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/protocol/DatanodeProtocol.java
index c3b0f68..047de56 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/protocol/DatanodeProtocol.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/protocol/DatanodeProtocol.java
@@ -102,6 +102,7 @@ public interface DatanodeProtocol {
* @param xmitsInProgress number of transfers from this datanode to others
* @param xceiverCount number of active transceiver threads
* @param failedVolumes number of failed volumes
+ * @param volumeFailureSummary info about volume failures
* @throws IOException on error
*/
@Idempotent
@@ -111,7 +112,9 @@ public interface DatanodeProtocol {
long dnCacheUsed,
int xmitsInProgress,
int xceiverCount,
- int failedVolumes) throws IOException;
+ int failedVolumes,
+ VolumeFailureSummary volumeFailureSummary)
+ throws IOException;
/**
* blockReport() tells the NameNode about all the locally-stored blocks.
http://git-wip-us.apache.org/repos/asf/hadoop/blob/441dfa48/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/protocol/VolumeFailureSummary.java
----------------------------------------------------------------------
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/protocol/VolumeFailureSummary.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/protocol/VolumeFailureSummary.java
new file mode 100644
index 0000000..1722dd0
--- /dev/null
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/protocol/VolumeFailureSummary.java
@@ -0,0 +1,72 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hdfs.server.protocol;
+
+/**
+ * Summarizes information about data volume failures on a DataNode.
+ */
+public class VolumeFailureSummary {
+ private final String[] failedStorageLocations;
+ private final long lastVolumeFailureDate;
+ private final long estimatedCapacityLostTotal;
+
+ /**
+ * Creates a new VolumeFailureSummary.
+ *
+ * @param failedStorageLocations storage locations that have failed
+ * @param lastVolumeFailureDate date/time of last volume failure in
+ * milliseconds since epoch
+ * @param estimatedCapacityLostTotal estimate of capacity lost in bytes
+ */
+ public VolumeFailureSummary(String[] failedStorageLocations,
+ long lastVolumeFailureDate, long estimatedCapacityLostTotal) {
+ this.failedStorageLocations = failedStorageLocations;
+ this.lastVolumeFailureDate = lastVolumeFailureDate;
+ this.estimatedCapacityLostTotal = estimatedCapacityLostTotal;
+ }
+
+ /**
+ * Returns each storage location that has failed, sorted.
+ *
+ * @return each storage location that has failed, sorted
+ */
+ public String[] getFailedStorageLocations() {
+ return this.failedStorageLocations;
+ }
+
+ /**
+ * Returns the date/time of the last volume failure in milliseconds since
+ * epoch.
+ *
+ * @return date/time of last volume failure in milliseconds since epoch
+ */
+ public long getLastVolumeFailureDate() {
+ return this.lastVolumeFailureDate;
+ }
+
+ /**
+ * Returns estimate of capacity lost. This is said to be an estimate, because
+ * in some cases it's impossible to know the capacity of the volume, such as if
+ * we never had a chance to query its capacity before the failure occurred.
+ *
+ * @return estimate of capacity lost in bytes
+ */
+ public long getEstimatedCapacityLostTotal() {
+ return this.estimatedCapacityLostTotal;
+ }
+}
http://git-wip-us.apache.org/repos/asf/hadoop/blob/441dfa48/hadoop-hdfs-project/hadoop-hdfs/src/main/proto/DatanodeProtocol.proto
----------------------------------------------------------------------
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/proto/DatanodeProtocol.proto b/hadoop-hdfs-project/hadoop-hdfs/src/main/proto/DatanodeProtocol.proto
index 53ff52e..a466da7 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/proto/DatanodeProtocol.proto
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/proto/DatanodeProtocol.proto
@@ -160,6 +160,17 @@ message RegisterDatanodeResponseProto {
}
/**
+ * failedStorageLocations - storage locations that have failed
+ * lastVolumeFailureDate - date/time of last volume failure
+ * estimatedCapacityLost - estimate of total capacity lost due to volume failures
+ */
+message VolumeFailureSummaryProto {
+ repeated string failedStorageLocations = 1;
+ required uint64 lastVolumeFailureDate = 2;
+ required uint64 estimatedCapacityLostTotal = 3;
+}
+
+/**
* registration - datanode registration information
* capacity - total storage capacity available at the datanode
* dfsUsed - storage used by HDFS
@@ -167,9 +178,12 @@ message RegisterDatanodeResponseProto {
* blockPoolUsed - storage used by the block pool
* xmitsInProgress - number of transfers from this datanode to others
* xceiverCount - number of active transceiver threads
- * failedVolumes - number of failed volumes
+ * failedVolumes - number of failed volumes. This is redundant with the
+ * information included in volumeFailureSummary, but the field is retained
+ * for backwards compatibility.
* cacheCapacity - total cache capacity available at the datanode
* cacheUsed - amount of cache used
+ * volumeFailureSummary - info about volume failures
*/
message HeartbeatRequestProto {
required DatanodeRegistrationProto registration = 1; // Datanode info
@@ -179,6 +193,7 @@ message HeartbeatRequestProto {
optional uint32 failedVolumes = 5 [ default = 0 ];
optional uint64 cacheCapacity = 6 [ default = 0 ];
optional uint64 cacheUsed = 7 [default = 0 ];
+ optional VolumeFailureSummaryProto volumeFailureSummary = 8;
}
/**
http://git-wip-us.apache.org/repos/asf/hadoop/blob/441dfa48/hadoop-hdfs-project/hadoop-hdfs/src/main/webapps/hdfs/dfshealth.html
----------------------------------------------------------------------
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/webapps/hdfs/dfshealth.html b/hadoop-hdfs-project/hadoop-hdfs/src/main/webapps/hdfs/dfshealth.html
index 9c83f3a..391ca79 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/webapps/hdfs/dfshealth.html
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/webapps/hdfs/dfshealth.html
@@ -34,6 +34,7 @@
<ul class="nav navbar-nav" id="ui-tabs">
<li><a href="#tab-overview">Overview</a></li>
<li><a href="#tab-datanode">Datanodes</a></li>
+ <li><a href="#tab-datanode-volume-failures">Datanode Volume Failures</a></li>
<li><a href="#tab-snapshot">Snapshot</a></li>
<li><a href="#tab-startup-progress">Startup Progress</a></li>
<li class="dropdown">
@@ -59,6 +60,7 @@
<div class="tab-content">
<div class="tab-pane" id="tab-overview"></div>
<div class="tab-pane" id="tab-datanode"></div>
+ <div class="tab-pane" id="tab-datanode-volume-failures"></div>
<div class="tab-pane" id="tab-snapshot"></div>
<div class="tab-pane" id="tab-startup-progress"></div>
</div>
@@ -165,6 +167,7 @@
<tr><th><a href="#tab-datanode">Live Nodes</a></th><td>{NumLiveDataNodes} (Decommissioned: {NumDecomLiveDataNodes})</td></tr>
<tr><th><a href="#tab-datanode">Dead Nodes</a></th><td>{NumDeadDataNodes} (Decommissioned: {NumDecomDeadDataNodes})</td></tr>
<tr><th><a href="#tab-datanode">Decommissioning Nodes</a></th><td>{NumDecommissioningDataNodes}</td></tr>
+ <tr><th><a href="#tab-datanode-volume-failures">Total Datanode Volume Failures</a></th><td>{VolumeFailuresTotal} ({EstimatedCapacityLostTotal|fmt_bytes})</td></tr>
<tr><th title="Excludes missing blocks.">Number of Under-Replicated Blocks</th><td>{UnderReplicatedBlocks}</td></tr>
<tr><th>Number of Blocks Pending Deletion</th><td>{PendingDeletionBlocks}</td></tr>
<tr><th>Block Deletion Start Time</th><td>{BlockDeletionStartTime|date_tostring}</td></tr>
@@ -324,6 +327,36 @@
</small>
</script>
+<script type="text/x-dust-template" id="tmpl-datanode-volume-failures">
+<div class="page-header"><h1>Datanode Volume Failures</h1></div>
+<small>
+{?LiveNodes}
+<table class="table">
+ <thead>
+ <tr>
+ <th>Node</th>
+ <th>Last Failure Date</th>
+ <th>Failed Volumes</th>
+ <th>Estimated Capacity Lost</th>
+ <th>Failed Storage Locations</th>
+ </tr>
+ </thead>
+ {#LiveNodes}
+ <tr>
+ <td>{name} ({xferaddr})</td>
+ <td>{#helper_date_tostring value="{lastVolumeFailureDate}"/}</td>
+ <td>{volfails}</td>
+ <td>{estimatedCapacityLostTotal|fmt_bytes}</td>
+ <td>{#failedStorageLocations}{.}{@sep}<br />{/sep}{/failedStorageLocations}</td>
+ </tr>
+ {/LiveNodes}
+</table>
+{:else}
+There are no reported volume failures.
+{/LiveNodes}
+</small>
+</script>
+
<script type="text/x-dust-template" id="tmpl-startup-progress">
<div class="page-header"><h1>Startup Progress</h1></div>
<p>Elapsed Time: {elapsedTime|fmt_time}, Percent Complete: {percentComplete|fmt_percentage}</p>
http://git-wip-us.apache.org/repos/asf/hadoop/blob/441dfa48/hadoop-hdfs-project/hadoop-hdfs/src/main/webapps/hdfs/dfshealth.js
----------------------------------------------------------------------
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/webapps/hdfs/dfshealth.js b/hadoop-hdfs-project/hadoop-hdfs/src/main/webapps/hdfs/dfshealth.js
index f6dc627..a045e42 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/webapps/hdfs/dfshealth.js
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/webapps/hdfs/dfshealth.js
@@ -21,6 +21,7 @@
dust.loadSource(dust.compile($('#tmpl-dfshealth').html(), 'dfshealth'));
dust.loadSource(dust.compile($('#tmpl-startup-progress').html(), 'startup-progress'));
dust.loadSource(dust.compile($('#tmpl-datanode').html(), 'datanode-info'));
+ dust.loadSource(dust.compile($('#tmpl-datanode-volume-failures').html(), 'datanode-volume-failures'));
dust.loadSource(dust.compile($('#tmpl-snapshot').html(), 'snapshot-info'));
function load_overview() {
@@ -193,6 +194,45 @@
})).error(ajax_error_handler);
}
+ function load_datanode_volume_failures() {
+
+ var HELPERS = {
+ 'helper_date_tostring' : function (chunk, ctx, bodies, params) {
+ var value = dust.helpers.tap(params.value, chunk, ctx);
+ return chunk.write('' + new Date(Number(value)).toLocaleString());
+ }
+ };
+
+ function workaround(r) {
+ function node_map_to_array(nodes) {
+ var res = [];
+ for (var n in nodes) {
+ var p = nodes[n];
+ // Filter the display to only datanodes with volume failures.
+ if (p.volfails > 0) {
+ p.name = n;
+ res.push(p);
+ }
+ }
+ return res;
+ }
+
+ r.LiveNodes = node_map_to_array(JSON.parse(r.LiveNodes));
+ return r;
+ }
+
+ $.get(
+ '/jmx?qry=Hadoop:service=NameNode,name=NameNodeInfo',
+ guard_with_startup_progress(function (resp) {
+ var data = workaround(resp.beans[0]);
+ var base = dust.makeBase(HELPERS);
+ dust.render('datanode-volume-failures', base.push(data), function(err, out) {
+ $('#tab-datanode-volume-failures').html(out);
+ $('#ui-tabs a[href="#tab-datanode-volume-failures"]').tab('show');
+ });
+ })).error(ajax_error_handler);
+ }
+
function load_snapshot_info() {
$.get(
'/jmx?qry=Hadoop:service=NameNode,name=SnapshotInfo',
@@ -210,6 +250,9 @@
case "#tab-datanode":
load_datanode_info();
break;
+ case "#tab-datanode-volume-failures":
+ load_datanode_volume_failures();
+ break;
case "#tab-snapshot":
load_snapshot_info();
break;
http://git-wip-us.apache.org/repos/asf/hadoop/blob/441dfa48/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestBlockManager.java
----------------------------------------------------------------------
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestBlockManager.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestBlockManager.java
index c7dfcf9..698f9f4 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestBlockManager.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestBlockManager.java
@@ -114,7 +114,8 @@ public class TestBlockManager {
2 * HdfsConstants.MIN_BLOCKS_FOR_WRITE*BLOCK_SIZE, 0L,
2 * HdfsConstants.MIN_BLOCKS_FOR_WRITE*BLOCK_SIZE, 0L);
dn.updateHeartbeat(
- BlockManagerTestUtil.getStorageReportsForDatanode(dn), 0L, 0L, 0, 0);
+ BlockManagerTestUtil.getStorageReportsForDatanode(dn), 0L, 0L, 0, 0,
+ null);
bm.getDatanodeManager().checkIfClusterIsNowMultiRack(dn);
}
}
http://git-wip-us.apache.org/repos/asf/hadoop/blob/441dfa48/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestNameNodePrunesMissingStorages.java
----------------------------------------------------------------------
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestNameNodePrunesMissingStorages.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestNameNodePrunesMissingStorages.java
index 88300de..b67ae7a 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestNameNodePrunesMissingStorages.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestNameNodePrunesMissingStorages.java
@@ -18,7 +18,6 @@
package org.apache.hadoop.hdfs.server.blockmanagement;
-import org.apache.commons.lang.ArrayUtils;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.commons.math3.stat.inference.TestUtils;
@@ -87,7 +86,8 @@ public class TestNameNodePrunesMissingStorages {
// Stop the DataNode and send fake heartbeat with missing storage.
cluster.stopDataNode(0);
- cluster.getNameNodeRpc().sendHeartbeat(dnReg, prunedReports, 0L, 0L, 0, 0, 0);
+ cluster.getNameNodeRpc().sendHeartbeat(dnReg, prunedReports, 0L, 0L, 0, 0,
+ 0, null);
// Check that the missing storage was pruned.
assertThat(dnDescriptor.getStorageInfos().length, is(expectedStoragesAfterTest));
http://git-wip-us.apache.org/repos/asf/hadoop/blob/441dfa48/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestOverReplicatedBlocks.java
----------------------------------------------------------------------
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestOverReplicatedBlocks.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestOverReplicatedBlocks.java
index 2942d0f..6bbb0c3 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestOverReplicatedBlocks.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestOverReplicatedBlocks.java
@@ -107,7 +107,7 @@ public class TestOverReplicatedBlocks {
datanode.getStorageInfos()[0].setUtilizationForTesting(100L, 100L, 0, 100L);
datanode.updateHeartbeat(
BlockManagerTestUtil.getStorageReportsForDatanode(datanode),
- 0L, 0L, 0, 0);
+ 0L, 0L, 0, 0, null);
}
}
http://git-wip-us.apache.org/repos/asf/hadoop/blob/441dfa48/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestReplicationPolicy.java
----------------------------------------------------------------------
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestReplicationPolicy.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestReplicationPolicy.java
index b292743..b080185 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestReplicationPolicy.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestReplicationPolicy.java
@@ -97,7 +97,7 @@ public class TestReplicationPolicy {
capacity, dfsUsed, remaining, blockPoolUsed);
dn.updateHeartbeat(
BlockManagerTestUtil.getStorageReportsForDatanode(dn),
- dnCacheCapacity, dnCacheUsed, xceiverCount, volFailures);
+ dnCacheCapacity, dnCacheUsed, xceiverCount, volFailures, null);
}
@BeforeClass
http://git-wip-us.apache.org/repos/asf/hadoop/blob/441dfa48/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestReplicationPolicyConsiderLoad.java
----------------------------------------------------------------------
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestReplicationPolicyConsiderLoad.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestReplicationPolicyConsiderLoad.java
index 4194520..1f1241f 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestReplicationPolicyConsiderLoad.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestReplicationPolicyConsiderLoad.java
@@ -97,7 +97,7 @@ public class TestReplicationPolicyConsiderLoad {
2*HdfsConstants.MIN_BLOCKS_FOR_WRITE*blockSize, 0L);
dataNodes[i].updateHeartbeat(
BlockManagerTestUtil.getStorageReportsForDatanode(dataNodes[i]),
- 0L, 0L, 0, 0);
+ 0L, 0L, 0, 0, null);
}
}
@@ -115,17 +115,17 @@ public class TestReplicationPolicyConsiderLoad {
BlockManagerTestUtil.getStorageReportsForDatanode(dataNodes[3]),
blockPoolId, dataNodes[3].getCacheCapacity(),
dataNodes[3].getCacheRemaining(),
- 2, 0, 0);
+ 2, 0, 0, null);
dnManager.handleHeartbeat(dnrList.get(4),
BlockManagerTestUtil.getStorageReportsForDatanode(dataNodes[4]),
blockPoolId, dataNodes[4].getCacheCapacity(),
dataNodes[4].getCacheRemaining(),
- 4, 0, 0);
+ 4, 0, 0, null);
dnManager.handleHeartbeat(dnrList.get(5),
BlockManagerTestUtil.getStorageReportsForDatanode(dataNodes[5]),
blockPoolId, dataNodes[5].getCacheCapacity(),
dataNodes[5].getCacheRemaining(),
- 4, 0, 0);
+ 4, 0, 0, null);
// value in the above heartbeats
final int load = 2 + 4 + 4;
http://git-wip-us.apache.org/repos/asf/hadoop/blob/441dfa48/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestReplicationPolicyWithNodeGroup.java
----------------------------------------------------------------------
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestReplicationPolicyWithNodeGroup.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestReplicationPolicyWithNodeGroup.java
index 526c490..a1ea73c 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestReplicationPolicyWithNodeGroup.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestReplicationPolicyWithNodeGroup.java
@@ -185,7 +185,7 @@ public class TestReplicationPolicyWithNodeGroup {
capacity, dfsUsed, remaining, blockPoolUsed);
dn.updateHeartbeat(
BlockManagerTestUtil.getStorageReportsForDatanode(dn),
- dnCacheCapacity, dnCacheUsed, xceiverCount, volFailures);
+ dnCacheCapacity, dnCacheUsed, xceiverCount, volFailures, null);
}
private static void setupDataNodeCapacity() {
http://git-wip-us.apache.org/repos/asf/hadoop/blob/441dfa48/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/SimulatedFSDataset.java
----------------------------------------------------------------------
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/SimulatedFSDataset.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/SimulatedFSDataset.java
index 36b62e5..991aa9e 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/SimulatedFSDataset.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/SimulatedFSDataset.java
@@ -35,6 +35,7 @@ import javax.management.NotCompliantMBeanException;
import javax.management.ObjectName;
import javax.management.StandardMBean;
+import org.apache.commons.lang.ArrayUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hdfs.DFSConfigKeys;
import org.apache.hadoop.hdfs.StorageType;
@@ -57,6 +58,7 @@ import org.apache.hadoop.hdfs.server.protocol.DatanodeStorage;
import org.apache.hadoop.hdfs.server.protocol.NamespaceInfo;
import org.apache.hadoop.hdfs.server.protocol.ReplicaRecoveryInfo;
import org.apache.hadoop.hdfs.server.protocol.StorageReport;
+import org.apache.hadoop.hdfs.server.protocol.VolumeFailureSummary;
import org.apache.hadoop.io.IOUtils;
import org.apache.hadoop.metrics2.util.MBeans;
import org.apache.hadoop.util.DataChecksum;
@@ -628,6 +630,26 @@ public class SimulatedFSDataset implements FsDatasetSpi<FsVolumeSpi> {
}
@Override // FSDatasetMBean
+ public String[] getFailedStorageLocations() {
+ return null;
+ }
+
+ @Override // FSDatasetMBean
+ public long getLastVolumeFailureDate() {
+ return 0;
+ }
+
+ @Override // FSDatasetMBean
+ public long getEstimatedCapacityLostTotal() {
+ return 0;
+ }
+
+ @Override // FsDatasetSpi
+ public VolumeFailureSummary getVolumeFailureSummary() {
+ return new VolumeFailureSummary(ArrayUtils.EMPTY_STRING_ARRAY, 0, 0);
+ }
+
+ @Override // FSDatasetMBean
public long getCacheUsed() {
return 0l;
}
http://git-wip-us.apache.org/repos/asf/hadoop/blob/441dfa48/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestBPOfferService.java
----------------------------------------------------------------------
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestBPOfferService.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestBPOfferService.java
index e21ce38..bc49793 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestBPOfferService.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestBPOfferService.java
@@ -53,6 +53,7 @@ import org.apache.hadoop.hdfs.server.protocol.ReceivedDeletedBlockInfo;
import org.apache.hadoop.hdfs.server.protocol.StorageBlockReport;
import org.apache.hadoop.hdfs.server.protocol.StorageReceivedDeletedBlocks;
import org.apache.hadoop.hdfs.server.protocol.StorageReport;
+import org.apache.hadoop.hdfs.server.protocol.VolumeFailureSummary;
import org.apache.hadoop.test.GenericTestUtils;
import org.apache.hadoop.test.PathUtils;
import org.apache.hadoop.util.Time;
@@ -137,7 +138,8 @@ public class TestBPOfferService {
Mockito.anyLong(),
Mockito.anyInt(),
Mockito.anyInt(),
- Mockito.anyInt());
+ Mockito.anyInt(),
+ Mockito.any(VolumeFailureSummary.class));
mockHaStatuses[nnIdx] = new NNHAStatusHeartbeat(HAServiceState.STANDBY, 0);
return mock;
}
http://git-wip-us.apache.org/repos/asf/hadoop/blob/441dfa48/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestBlockRecovery.java
----------------------------------------------------------------------
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestBlockRecovery.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestBlockRecovery.java
index 855cee4..454d1e4 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestBlockRecovery.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestBlockRecovery.java
@@ -77,6 +77,7 @@ import org.apache.hadoop.hdfs.server.protocol.NNHAStatusHeartbeat;
import org.apache.hadoop.hdfs.server.protocol.NamespaceInfo;
import org.apache.hadoop.hdfs.server.protocol.ReplicaRecoveryInfo;
import org.apache.hadoop.hdfs.server.protocol.StorageReport;
+import org.apache.hadoop.hdfs.server.protocol.VolumeFailureSummary;
import org.apache.hadoop.test.GenericTestUtils;
import org.apache.hadoop.util.Daemon;
import org.apache.hadoop.util.DataChecksum;
@@ -158,7 +159,8 @@ public class TestBlockRecovery {
Mockito.anyLong(),
Mockito.anyInt(),
Mockito.anyInt(),
- Mockito.anyInt()))
+ Mockito.anyInt(),
+ Mockito.any(VolumeFailureSummary.class)))
.thenReturn(new HeartbeatResponse(
new DatanodeCommand[0],
new NNHAStatusHeartbeat(HAServiceState.ACTIVE, 1),