You are viewing a plain text version of this content. The canonical link for it is here.
Posted to common-commits@hadoop.apache.org by ma...@apache.org on 2017/08/26 00:22:04 UTC

hadoop git commit: HDFS-11986. Dfsadmin should report erasure coding related information separately.

Repository: hadoop
Updated Branches:
  refs/heads/trunk 36bada303 -> b89ffcff3


HDFS-11986. Dfsadmin should report erasure coding related information separately.


Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo
Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/b89ffcff
Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/b89ffcff
Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/b89ffcff

Branch: refs/heads/trunk
Commit: b89ffcff362a872013f5d96c1fb76e0731402db4
Parents: 36bada3
Author: Manoj Govindassamy <ma...@apache.org>
Authored: Fri Aug 25 17:21:56 2017 -0700
Committer: Manoj Govindassamy <ma...@apache.org>
Committed: Fri Aug 25 17:21:56 2017 -0700

----------------------------------------------------------------------
 .../org/apache/hadoop/hdfs/tools/DFSAdmin.java  |  36 +++-
 .../apache/hadoop/hdfs/tools/TestDFSAdmin.java  | 182 ++++++++++++++-----
 2 files changed, 160 insertions(+), 58 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hadoop/blob/b89ffcff/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/DFSAdmin.java
----------------------------------------------------------------------
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/DFSAdmin.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/DFSAdmin.java
index 1fb1d5f..88aafe2 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/DFSAdmin.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/DFSAdmin.java
@@ -66,11 +66,13 @@ import org.apache.hadoop.hdfs.HAUtil;
 import org.apache.hadoop.hdfs.HdfsConfiguration;
 import org.apache.hadoop.hdfs.NameNodeProxies;
 import org.apache.hadoop.hdfs.NameNodeProxiesClient.ProxyAndInfo;
+import org.apache.hadoop.hdfs.protocol.BlocksStats;
 import org.apache.hadoop.hdfs.protocol.ClientDatanodeProtocol;
 import org.apache.hadoop.hdfs.protocol.ClientProtocol;
 import org.apache.hadoop.hdfs.protocol.DatanodeInfo;
 import org.apache.hadoop.hdfs.protocol.DatanodeLocalInfo;
 import org.apache.hadoop.hdfs.protocol.DatanodeVolumeInfo;
+import org.apache.hadoop.hdfs.protocol.ECBlockGroupsStats;
 import org.apache.hadoop.hdfs.protocol.HdfsConstants;
 import org.apache.hadoop.hdfs.protocol.HdfsConstants.DatanodeReportType;
 import org.apache.hadoop.hdfs.protocol.HdfsConstants.RollingUpgradeAction;
@@ -532,16 +534,30 @@ public class DFSAdmin extends FsShell {
      * minutes. Use "-metaSave" to list of all such blocks and accurate 
      * counts.
      */
-    System.out.println("Under replicated blocks: " + 
-                       dfs.getLowRedundancyBlocksCount());
-    System.out.println("Blocks with corrupt replicas: " + 
-                       dfs.getCorruptBlocksCount());
-    System.out.println("Missing blocks: " + 
-                       dfs.getMissingBlocksCount());
-    System.out.println("Missing blocks (with replication factor 1): " +
-                      dfs.getMissingReplOneBlocksCount());
-    System.out.println("Pending deletion blocks: " +
-        dfs.getPendingDeletionBlocksCount());
+    BlocksStats blocksStats = dfs.getClient().getNamenode().getBlocksStats();
+    System.out.println("Replicated Blocks:");
+    System.out.println("\tUnder replicated blocks: " +
+        blocksStats.getLowRedundancyBlocksStat());
+    System.out.println("\tBlocks with corrupt replicas: " +
+        blocksStats.getCorruptBlocksStat());
+    System.out.println("\tMissing blocks: " +
+        blocksStats.getMissingReplicaBlocksStat());
+    System.out.println("\tMissing blocks (with replication factor 1): " +
+        blocksStats.getMissingReplicationOneBlocksStat());
+    System.out.println("\tPending deletion blocks: " +
+        blocksStats.getPendingDeletionBlocksStat());
+
+    ECBlockGroupsStats ecBlockGroupsStats =
+        dfs.getClient().getNamenode().getECBlockGroupsStats();
+    System.out.println("Erasure Coded Block Groups: ");
+    System.out.println("\tLow redundancy block groups: " +
+        ecBlockGroupsStats.getLowRedundancyBlockGroupsStat());
+    System.out.println("\tBlock groups with corrupt internal blocks: " +
+        ecBlockGroupsStats.getCorruptBlockGroupsStat());
+    System.out.println("\tMissing block groups: " +
+        ecBlockGroupsStats.getMissingBlockGroupsStat());
+    System.out.println("\tPending deletion block groups: " +
+        ecBlockGroupsStats.getPendingDeletionBlockGroupsStat());
 
     System.out.println();
 

http://git-wip-us.apache.org/repos/asf/hadoop/blob/b89ffcff/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/tools/TestDFSAdmin.java
----------------------------------------------------------------------
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/tools/TestDFSAdmin.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/tools/TestDFSAdmin.java
index ed43dc0..2d38f2f 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/tools/TestDFSAdmin.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/tools/TestDFSAdmin.java
@@ -38,11 +38,18 @@ import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.hdfs.DFSClient;
 import org.apache.hadoop.hdfs.DFSConfigKeys;
 import org.apache.hadoop.hdfs.DFSTestUtil;
+import org.apache.hadoop.hdfs.DistributedFileSystem;
 import org.apache.hadoop.hdfs.HdfsConfiguration;
 import org.apache.hadoop.hdfs.MiniDFSCluster;
+import org.apache.hadoop.hdfs.protocol.DatanodeInfo;
+import org.apache.hadoop.hdfs.protocol.ErasureCodingPolicy;
 import org.apache.hadoop.hdfs.protocol.ExtendedBlock;
 import org.apache.hadoop.hdfs.protocol.HdfsConstants.DatanodeReportType;
+import org.apache.hadoop.hdfs.protocol.LocatedBlock;
 import org.apache.hadoop.hdfs.protocol.LocatedBlocks;
+import org.apache.hadoop.hdfs.protocol.LocatedStripedBlock;
+import org.apache.hadoop.hdfs.protocol.SystemErasureCodingPolicies;
+import org.apache.hadoop.hdfs.server.blockmanagement.BlockManager;
 import org.apache.hadoop.hdfs.server.blockmanagement.BlockManagerTestUtil;
 import org.apache.hadoop.hdfs.server.common.Storage;
 import org.apache.hadoop.hdfs.server.datanode.DataNode;
@@ -498,24 +505,47 @@ public class TestDFSAdmin {
     return sb.toString();
   }
 
-  @Test(timeout = 120000)
+  // get block details and check if the block is corrupt
+  private void waitForCorruptBlock(MiniDFSCluster miniCluster,
+      DFSClient client, Path file)
+      throws TimeoutException, InterruptedException {
+    GenericTestUtils.waitFor(new Supplier<Boolean>() {
+      @Override
+      public Boolean get() {
+        LocatedBlocks blocks = null;
+        try {
+          miniCluster.triggerBlockReports();
+          blocks = client.getNamenode().getBlockLocations(file.toString(), 0,
+              Long.MAX_VALUE);
+        } catch (IOException e) {
+          return false;
+        }
+        return blocks != null && blocks.get(0).isCorrupt();
+      }
+    }, 1000, 60000);
+  }
+
+  @Test(timeout = 180000)
   public void testReportCommand() throws Exception {
+    tearDown();
     redirectStream();
 
-    /* init conf */
+    // init conf
     final Configuration dfsConf = new HdfsConfiguration();
+    ErasureCodingPolicy ecPolicy = SystemErasureCodingPolicies.getByID(
+        SystemErasureCodingPolicies.XOR_2_1_POLICY_ID);
+    dfsConf.set(DFSConfigKeys.DFS_NAMENODE_EC_POLICIES_ENABLED_KEY,
+        ecPolicy.getName());
     dfsConf.setInt(
-        DFSConfigKeys.DFS_NAMENODE_HEARTBEAT_RECHECK_INTERVAL_KEY,
-        500); // 0.5s
+        DFSConfigKeys.DFS_NAMENODE_HEARTBEAT_RECHECK_INTERVAL_KEY, 500);
     dfsConf.setLong(DFS_HEARTBEAT_INTERVAL_KEY, 1);
     final Path baseDir = new Path(
         PathUtils.getTestDir(getClass()).getAbsolutePath(),
         GenericTestUtils.getMethodName());
     dfsConf.set(MiniDFSCluster.HDFS_MINIDFS_BASEDIR, baseDir.toString());
+    final int numDn =
+        ecPolicy.getNumDataUnits() + ecPolicy.getNumParityUnits();
 
-    final int numDn = 3;
-
-    /* init cluster */
     try(MiniDFSCluster miniCluster = new MiniDFSCluster
         .Builder(dfsConf)
         .numDataNodes(numDn).build()) {
@@ -523,34 +553,71 @@ public class TestDFSAdmin {
       miniCluster.waitActive();
       assertEquals(numDn, miniCluster.getDataNodes().size());
 
-      /* local vars */
       final DFSAdmin dfsAdmin = new DFSAdmin(dfsConf);
       final DFSClient client = miniCluster.getFileSystem().getClient();
 
-      /* run and verify report command */
+      // Verify report command for all counts to be zero
       resetStream();
       assertEquals(0, ToolRunner.run(dfsAdmin, new String[] {"-report"}));
-      verifyNodesAndCorruptBlocks(numDn, numDn, 0, client);
+      verifyNodesAndCorruptBlocks(numDn, numDn, 0, 0, client);
 
-      /* shut down one DN */
-      final List<DataNode> datanodes = miniCluster.getDataNodes();
-      final DataNode last = datanodes.get(datanodes.size() - 1);
-      last.shutdown();
-      miniCluster.setDataNodeDead(last.getDatanodeId());
-
-      /* run and verify report command */
-      assertEquals(0, ToolRunner.run(dfsAdmin, new String[] {"-report"}));
-      verifyNodesAndCorruptBlocks(numDn, numDn - 1, 0, client);
-
-      /* corrupt one block */
       final short replFactor = 1;
       final long fileLength = 512L;
-      final FileSystem fs = miniCluster.getFileSystem();
+      final DistributedFileSystem fs = miniCluster.getFileSystem();
       final Path file = new Path(baseDir, "/corrupted");
       DFSTestUtil.createFile(fs, file, fileLength, replFactor, 12345L);
       DFSTestUtil.waitReplication(fs, file, replFactor);
-
       final ExtendedBlock block = DFSTestUtil.getFirstBlock(fs, file);
+      LocatedBlocks lbs = miniCluster.getFileSystem().getClient().
+          getNamenode().getBlockLocations(
+          file.toString(), 0, fileLength);
+      assertTrue("Unexpected block type: " + lbs.get(0),
+          lbs.get(0) instanceof LocatedBlock);
+      LocatedBlock locatedBlock = lbs.get(0);
+      DatanodeInfo locatedDataNode = locatedBlock.getLocations()[0];
+      LOG.info("Replica block located on: " + locatedDataNode);
+
+      Path ecDir = new Path(baseDir, "ec");
+      fs.mkdirs(ecDir);
+      fs.getClient().setErasureCodingPolicy(ecDir.toString(),
+          ecPolicy.getName());
+      Path ecFile = new Path(ecDir, "ec-file");
+      int stripesPerBlock = 2;
+      int cellSize = ecPolicy.getCellSize();
+      int blockSize = stripesPerBlock * cellSize;
+      int blockGroupSize =  ecPolicy.getNumDataUnits() * blockSize;
+      int totalBlockGroups = 1;
+      DFSTestUtil.createStripedFile(miniCluster, ecFile, ecDir,
+          totalBlockGroups, stripesPerBlock, false, ecPolicy);
+
+      // Verify report command for all counts to be zero
+      resetStream();
+      assertEquals(0, ToolRunner.run(dfsAdmin, new String[] {"-report"}));
+      verifyNodesAndCorruptBlocks(numDn, numDn, 0, 0, client);
+
+      // Choose a DataNode to shutdown
+      final List<DataNode> datanodes = miniCluster.getDataNodes();
+      DataNode dataNodeToShutdown = null;
+      for (DataNode dn : datanodes) {
+        if (!dn.getDatanodeId().getDatanodeUuid().equals(
+            locatedDataNode.getDatanodeUuid())) {
+          dataNodeToShutdown = dn;
+          break;
+        }
+      }
+      assertTrue("Unable to choose a DataNode to shutdown!",
+          dataNodeToShutdown != null);
+
+      // Shut down the DataNode not hosting the replicated block
+      LOG.info("Shutting down: " + dataNodeToShutdown);
+      dataNodeToShutdown.shutdown();
+      miniCluster.setDataNodeDead(dataNodeToShutdown.getDatanodeId());
+
+      // Verify report command to show dead DataNode
+      assertEquals(0, ToolRunner.run(dfsAdmin, new String[] {"-report"}));
+      verifyNodesAndCorruptBlocks(numDn, numDn - 1, 0, 0, client);
+
+      // Corrupt the replicated block
       final int blockFilesCorrupted = miniCluster
           .corruptBlockOnDataNodes(block);
       assertEquals("Fail to corrupt all replicas for block " + block,
@@ -564,35 +631,44 @@ public class TestDFSAdmin {
         // expected exception reading corrupt blocks
       }
 
-      /*
-       * Increase replication factor, this should invoke transfer request.
-       * Receiving datanode fails on checksum and reports it to namenode
-       */
+      // Increase replication factor, this should invoke transfer request.
+      // Receiving datanode fails on checksum and reports it to namenode
       fs.setReplication(file, (short) (replFactor + 1));
 
-      /* get block details and check if the block is corrupt */
-      GenericTestUtils.waitFor(new Supplier<Boolean>() {
-        @Override
-        public Boolean get() {
-          LocatedBlocks blocks = null;
-          try {
-            miniCluster.triggerBlockReports();
-            blocks = client.getNamenode().getBlockLocations(file.toString(), 0,
-                Long.MAX_VALUE);
-          } catch (IOException e) {
-            return false;
-          }
-          return blocks != null && blocks.get(0).isCorrupt();
-        }
-      }, 1000, 60000);
-
+      // get block details and check if the block is corrupt
       BlockManagerTestUtil.updateState(
           miniCluster.getNameNode().getNamesystem().getBlockManager());
+      waitForCorruptBlock(miniCluster, client, file);
+
+      // verify report command for corrupt replicated block
+      resetStream();
+      assertEquals(0, ToolRunner.run(dfsAdmin, new String[] {"-report"}));
+      verifyNodesAndCorruptBlocks(numDn, numDn - 1, 1, 0, client);
+
+      lbs = miniCluster.getFileSystem().getClient().
+          getNamenode().getBlockLocations(
+          ecFile.toString(), 0, blockGroupSize);
+      assertTrue("Unexpected block type: " + lbs.get(0),
+          lbs.get(0) instanceof LocatedStripedBlock);
+      LocatedStripedBlock bg =
+          (LocatedStripedBlock)(lbs.get(0));
+
+      miniCluster.getNamesystem().writeLock();
+      try {
+        BlockManager bm = miniCluster.getNamesystem().getBlockManager();
+        bm.findAndMarkBlockAsCorrupt(bg.getBlock(), bg.getLocations()[0],
+            "STORAGE_ID", "TEST");
+        BlockManagerTestUtil.updateState(bm);
+      } finally {
+        miniCluster.getNamesystem().writeUnlock();
+      }
+      waitForCorruptBlock(miniCluster, client, file);
 
-      /* run and verify report command */
+      // verify report command for corrupt replicated block
+      // and EC block group
       resetStream();
       assertEquals(0, ToolRunner.run(dfsAdmin, new String[] {"-report"}));
-      verifyNodesAndCorruptBlocks(numDn, numDn - 1, 1, client);
+      verifyNodesAndCorruptBlocks(numDn, numDn - 1, 1, 1, client);
     }
   }
 
@@ -669,6 +745,7 @@ public class TestDFSAdmin {
       final int numDn,
       final int numLiveDn,
       final int numCorruptBlocks,
+      final int numCorruptECBlockGroups,
       final DFSClient client) throws IOException {
 
     /* init vars */
@@ -679,11 +756,15 @@ public class TestDFSAdmin {
     final String expectedCorruptedBlocksStr = String.format(
         "Blocks with corrupt replicas: %d",
         numCorruptBlocks);
+    final String expectedCorruptedECBlockGroupsStr = String.format(
+        "Block groups with corrupt internal blocks: %d",
+        numCorruptECBlockGroups);
 
-    /* verify nodes and corrupt blocks */
+    // verify nodes and corrupt blocks
     assertThat(outStr, is(allOf(
         containsString(expectedLiveNodesStr),
-        containsString(expectedCorruptedBlocksStr))));
+        containsString(expectedCorruptedBlocksStr),
+        containsString(expectedCorruptedECBlockGroupsStr))));
 
     assertEquals(
         numDn,
@@ -694,7 +775,12 @@ public class TestDFSAdmin {
     assertEquals(
         numDn - numLiveDn,
         client.getDatanodeStorageReport(DatanodeReportType.DEAD).length);
-    assertEquals(numCorruptBlocks, client.getCorruptBlocksCount());
+    assertEquals(numCorruptBlocks + numCorruptECBlockGroups,
+        client.getCorruptBlocksCount());
+    assertEquals(numCorruptBlocks, client.getNamenode()
+        .getBlocksStats().getCorruptBlocksStat());
+    assertEquals(numCorruptECBlockGroups, client.getNamenode()
+        .getECBlockGroupsStats().getCorruptBlockGroupsStat());
   }
 
   @Test


---------------------------------------------------------------------
To unsubscribe, e-mail: common-commits-unsubscribe@hadoop.apache.org
For additional commands, e-mail: common-commits-help@hadoop.apache.org