You are viewing a plain text version of this content. The canonical link for it is here.
Posted to common-commits@hadoop.apache.org by vi...@apache.org on 2015/04/21 11:56:11 UTC

hadoop git commit: HDFS-7993. Provide each Replica details in fsck (Contributed by J.Andreina)

Repository: hadoop
Updated Branches:
  refs/heads/trunk d52de6154 -> 8ddbb8dd4


HDFS-7993. Provide each Replica details in fsck (Contributed by J.Andreina)


Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo
Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/8ddbb8dd
Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/8ddbb8dd
Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/8ddbb8dd

Branch: refs/heads/trunk
Commit: 8ddbb8dd433862509bd9b222dddafe2c3a74778a
Parents: d52de61
Author: Vinayakumar B <vi...@apache.org>
Authored: Tue Apr 21 15:24:49 2015 +0530
Committer: Vinayakumar B <vi...@apache.org>
Committed: Tue Apr 21 15:24:49 2015 +0530

----------------------------------------------------------------------
 hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt     |  2 +
 .../blockmanagement/DatanodeStorageInfo.java    |  6 +-
 .../hdfs/server/namenode/NamenodeFsck.java      | 61 ++++++++++++++---
 .../org/apache/hadoop/hdfs/tools/DFSck.java     |  6 +-
 .../hadoop/hdfs/server/namenode/TestFsck.java   | 71 ++++++++++++++++++++
 5 files changed, 132 insertions(+), 14 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hadoop/blob/8ddbb8dd/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
----------------------------------------------------------------------
diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
index 1aa9ce4..6951a08 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
+++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
@@ -526,6 +526,8 @@ Release 2.8.0 - UNRELEASED
     HDFS-8173. NPE thrown at DataNode shutdown when HTTP server was not able to
     create (surendra singh lilhore via vinayakumarb)
 
+    HDFS-7993. Provide each Replica details in fsck (J.Andreina via vinayakumarb)
+
 Release 2.7.1 - UNRELEASED
 
   INCOMPATIBLE CHANGES

http://git-wip-us.apache.org/repos/asf/hadoop/blob/8ddbb8dd/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeStorageInfo.java
----------------------------------------------------------------------
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeStorageInfo.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeStorageInfo.java
index 8c752ac..c6c9001 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeStorageInfo.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeStorageInfo.java
@@ -155,7 +155,7 @@ public class DatanodeStorageInfo {
     this.blockReportCount = blockReportCount;
   }
 
-  boolean areBlockContentsStale() {
+  public boolean areBlockContentsStale() {
     return blockContentsStale;
   }
 
@@ -205,11 +205,11 @@ public class DatanodeStorageInfo {
     return getState() == State.FAILED && numBlocks != 0;
   }
 
-  String getStorageID() {
+  public String getStorageID() {
     return storageID;
   }
 
-  StorageType getStorageType() {
+  public StorageType getStorageType() {
     return storageType;
   }
 

http://git-wip-us.apache.org/repos/asf/hadoop/blob/8ddbb8dd/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NamenodeFsck.java
----------------------------------------------------------------------
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NamenodeFsck.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NamenodeFsck.java
index a8586dd..afaec87 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NamenodeFsck.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NamenodeFsck.java
@@ -52,6 +52,7 @@ import org.apache.hadoop.hdfs.net.TcpPeerServer;
 import org.apache.hadoop.hdfs.protocol.Block;
 import org.apache.hadoop.hdfs.protocol.DatanodeID;
 import org.apache.hadoop.hdfs.protocol.DatanodeInfo;
+import org.apache.hadoop.hdfs.protocol.DatanodeInfoWithStorage;
 import org.apache.hadoop.hdfs.protocol.DirectoryListing;
 import org.apache.hadoop.hdfs.protocol.ExtendedBlock;
 import org.apache.hadoop.hdfs.protocol.HdfsConstants;
@@ -68,9 +69,11 @@ import org.apache.hadoop.hdfs.server.blockmanagement.BlockManager;
 import org.apache.hadoop.hdfs.server.blockmanagement.BlockPlacementPolicy;
 import org.apache.hadoop.hdfs.server.blockmanagement.BlockPlacementStatus;
 import org.apache.hadoop.hdfs.server.blockmanagement.DatanodeDescriptor;
+import org.apache.hadoop.hdfs.server.blockmanagement.DatanodeStorageInfo;
 import org.apache.hadoop.hdfs.server.blockmanagement.NumberReplicas;
 import org.apache.hadoop.hdfs.server.common.HdfsServerConstants;
 import org.apache.hadoop.hdfs.server.datanode.CachingStrategy;
+import org.apache.hadoop.hdfs.util.LightWeightLinkedSet;
 import org.apache.hadoop.net.NetUtils;
 import org.apache.hadoop.net.NetworkTopology;
 import org.apache.hadoop.net.NodeBase;
@@ -133,6 +136,8 @@ public class NamenodeFsck implements DataEncryptionKeyFactory {
   private boolean showprogress = false;
   private boolean showCorruptFileBlocks = false;
 
+  private boolean showReplicaDetails = false;
+  private long staleInterval;
   /**
    * True if we encountered an internal error during FSCK, such as not being
    * able to delete a corrupt file.
@@ -194,6 +199,9 @@ public class NamenodeFsck implements DataEncryptionKeyFactory {
         networktopology,
         namenode.getNamesystem().getBlockManager().getDatanodeManager()
         .getHost2DatanodeMap());
+    this.staleInterval =
+        conf.getLong(DFSConfigKeys.DFS_NAMENODE_STALE_DATANODE_INTERVAL_KEY,
+          DFSConfigKeys.DFS_NAMENODE_STALE_DATANODE_INTERVAL_DEFAULT);
     
     for (Iterator<String> it = pmap.keySet().iterator(); it.hasNext();) {
       String key = it.next();
@@ -204,6 +212,9 @@ public class NamenodeFsck implements DataEncryptionKeyFactory {
       else if (key.equals("blocks")) { this.showBlocks = true; }
       else if (key.equals("locations")) { this.showLocations = true; }
       else if (key.equals("racks")) { this.showRacks = true; }
+      else if (key.equals("replicadetails")) {
+        this.showReplicaDetails = true;
+      }
       else if (key.equals("storagepolicies")) { this.showStoragePolcies = true; }
       else if (key.equals("showprogress")) { this.showprogress = true; }
       else if (key.equals("openforwrite")) {this.showOpenFiles = true; }
@@ -507,9 +518,8 @@ public class NamenodeFsck implements DataEncryptionKeyFactory {
       ExtendedBlock block = lBlk.getBlock();
       boolean isCorrupt = lBlk.isCorrupt();
       String blkName = block.toString();
-      DatanodeInfo[] locs = lBlk.getLocations();
-      NumberReplicas numberReplicas =
-          namenode.getNamesystem().getBlockManager().countNodes(block.getLocalBlock());
+      BlockManager bm = namenode.getNamesystem().getBlockManager();
+      NumberReplicas numberReplicas = bm.countNodes(block.getLocalBlock());
       int liveReplicas = numberReplicas.liveReplicas();
       int decommissionedReplicas = numberReplicas.decommissioned();;
       int decommissioningReplicas = numberReplicas.decommissioning();
@@ -518,6 +528,10 @@ public class NamenodeFsck implements DataEncryptionKeyFactory {
       int totalReplicas = liveReplicas + decommissionedReplicas +
           decommissioningReplicas;
       res.totalReplicas += totalReplicas;
+      Collection<DatanodeDescriptor> corruptReplicas = null;
+      if (showReplicaDetails) {
+        corruptReplicas = bm.getCorruptReplicas(block.getLocalBlock());
+      }
       short targetFileReplication = file.getReplication();
       res.numExpectedReplicas += targetFileReplication;
       if(totalReplicas < minReplication){
@@ -578,14 +592,41 @@ public class NamenodeFsck implements DataEncryptionKeyFactory {
         missize += block.getNumBytes();
       } else {
         report.append(" repl=" + liveReplicas);
-        if (showLocations || showRacks) {
+        if (showLocations || showRacks || showReplicaDetails) {
           StringBuilder sb = new StringBuilder("[");
-          for (int j = 0; j < locs.length; j++) {
-            if (j > 0) { sb.append(", "); }
-            if (showRacks)
-              sb.append(NodeBase.getPath(locs[j]));
-            else
-              sb.append(locs[j]);
+          Iterable<DatanodeStorageInfo> storages = bm.getStorages(block.getLocalBlock());
+          for (Iterator<DatanodeStorageInfo> iterator = storages.iterator(); iterator.hasNext();) {
+            DatanodeStorageInfo storage = iterator.next();
+            DatanodeDescriptor dnDesc = storage.getDatanodeDescriptor();
+            if (showRacks) {
+              sb.append(NodeBase.getPath(dnDesc));
+            } else {
+              sb.append(new DatanodeInfoWithStorage(dnDesc, storage.getStorageID(), storage
+                  .getStorageType()));
+            }
+            if (showReplicaDetails) {
+              LightWeightLinkedSet<Block> blocksExcess =
+                  bm.excessReplicateMap.get(dnDesc.getDatanodeUuid());
+              sb.append("(");
+              if (dnDesc.isDecommissioned()) {
+                sb.append("DECOMMISSIONED)");
+              } else if (dnDesc.isDecommissionInProgress()) {
+                sb.append("DECOMMISSIONING)");
+              } else if (corruptReplicas != null && corruptReplicas.contains(dnDesc)) {
+                sb.append("CORRUPT)");
+              } else if (blocksExcess != null && blocksExcess.contains(block.getLocalBlock())) {
+                sb.append("EXCESS)");
+              } else if (dnDesc.isStale(this.staleInterval)) {
+                sb.append("STALE_NODE)");
+              } else if (storage.areBlockContentsStale()) {
+                sb.append("STALE_BLOCK_CONTENT)");
+              } else {
+                sb.append("LIVE)");
+              }
+            }
+            if (iterator.hasNext()) {
+              sb.append(", ");
+            }
           }
           sb.append(']');
           report.append(" " + sb.toString());

http://git-wip-us.apache.org/repos/asf/hadoop/blob/8ddbb8dd/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/DFSck.java
----------------------------------------------------------------------
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/DFSck.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/DFSck.java
index dc6d9d4..9926628 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/DFSck.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/DFSck.java
@@ -97,7 +97,8 @@ public class DFSck extends Configured implements Tool {
       + "\t-showprogress\tshow progress in output. Default is OFF (no progress)\n"
       + "\t-blockId\tprint out which file this blockId belongs to, locations"
       + " (nodes, racks) of this block, and other diagnostics info"
-      + " (under replicated, corrupted or not, etc)\n\n"
+      + " (under replicated, corrupted or not, etc)\n"
+      + "\t-replicaDetails\tprint out each replica details \n\n"
       + "Please Note:\n"
       + "\t1. By default fsck ignores files opened for write, "
       + "use -openforwrite to report such files. They are usually "
@@ -268,6 +269,9 @@ public class DFSck extends Configured implements Tool {
       else if (args[idx].equals("-blocks")) { url.append("&blocks=1"); }
       else if (args[idx].equals("-locations")) { url.append("&locations=1"); }
       else if (args[idx].equals("-racks")) { url.append("&racks=1"); }
+      else if (args[idx].equals("-replicaDetails")) {
+        url.append("&replicadetails=1");
+      }
       else if (args[idx].equals("-storagepolicies")) { url.append("&storagepolicies=1"); }
       else if (args[idx].equals("-showprogress")) { url.append("&showprogress=1"); }
       else if (args[idx].equals("-list-corruptfileblocks")) {

http://git-wip-us.apache.org/repos/asf/hadoop/blob/8ddbb8dd/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFsck.java
----------------------------------------------------------------------
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFsck.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFsck.java
index 68b7e38..4c3fa9c 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFsck.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFsck.java
@@ -779,6 +779,77 @@ public class TestFsck {
     }
   }
 
+  @Test(timeout = 60000)
+  public void testFsckReplicaDetails() throws Exception {
+
+    final short REPL_FACTOR = 1;
+    short NUM_DN = 1;
+    final long blockSize = 512;
+    final long fileSize = 1024;
+    boolean checkDecommissionInProgress = false;
+    String[] racks = { "/rack1" };
+    String[] hosts = { "host1" };
+
+    Configuration conf = new Configuration();
+    conf.setLong(DFSConfigKeys.DFS_BLOCK_SIZE_KEY, blockSize);
+    conf.setInt(DFSConfigKeys.DFS_REPLICATION_KEY, 1);
+
+    MiniDFSCluster cluster;
+    DistributedFileSystem dfs;
+    cluster =
+        new MiniDFSCluster.Builder(conf).numDataNodes(NUM_DN).hosts(hosts).racks(racks).build();
+    cluster.waitClusterUp();
+    dfs = cluster.getFileSystem();
+
+    // create files
+    final String testFile = new String("/testfile");
+    final Path path = new Path(testFile);
+    DFSTestUtil.createFile(dfs, path, fileSize, REPL_FACTOR, 1000L);
+    DFSTestUtil.waitReplication(dfs, path, REPL_FACTOR);
+    try {
+      // make sure datanode that has replica is fine before decommission
+      String fsckOut = runFsck(conf, 0, true, testFile, "-files", "-blocks", "-replicaDetails");
+      assertTrue(fsckOut.contains(NamenodeFsck.HEALTHY_STATUS));
+      assertTrue(fsckOut.contains("(LIVE)"));
+
+      // decommission datanode
+      ExtendedBlock eb = DFSTestUtil.getFirstBlock(dfs, path);
+      DatanodeDescriptor dn =
+          cluster.getNameNode().getNamesystem().getBlockManager()
+              .getBlockCollection(eb.getLocalBlock()).getBlocks()[0].getDatanode(0);
+      cluster.getNameNode().getNamesystem().getBlockManager().getDatanodeManager()
+          .getDecomManager().startDecommission(dn);
+      String dnName = dn.getXferAddr();
+
+      // check the replica status while decommissioning
+      fsckOut = runFsck(conf, 0, true, testFile, "-files", "-blocks", "-replicaDetails");
+      assertTrue(fsckOut.contains("(DECOMMISSIONING)"));
+
+      // Start 2nd Datanode and wait for decommission to start
+      cluster.startDataNodes(conf, 1, true, null, null, null);
+      DatanodeInfo datanodeInfo = null;
+      do {
+        Thread.sleep(2000);
+        for (DatanodeInfo info : dfs.getDataNodeStats()) {
+          if (dnName.equals(info.getXferAddr())) {
+            datanodeInfo = info;
+          }
+        }
+        if (!checkDecommissionInProgress && datanodeInfo != null
+            && datanodeInfo.isDecommissionInProgress()) {
+          checkDecommissionInProgress = true;
+        }
+      } while (datanodeInfo != null && !datanodeInfo.isDecommissioned());
+
+      // check the replica status after decommission is done
+      fsckOut = runFsck(conf, 0, true, testFile, "-files", "-blocks", "-replicaDetails");
+      assertTrue(fsckOut.contains("(DECOMMISSIONED)"));
+    } finally {
+      if (cluster != null) {
+        cluster.shutdown();
+      }
+    }
+  }
 
   /** Test if fsck can return -1 in case of failure
    *