You are viewing a plain text version of this content. The canonical link for it is here.
Posted to common-commits@hadoop.apache.org by ma...@apache.org on 2017/08/26 00:22:04 UTC
hadoop git commit: HDFS-11986. Dfsadmin should report erasure coding
related information separately.
Repository: hadoop
Updated Branches:
refs/heads/trunk 36bada303 -> b89ffcff3
HDFS-11986. Dfsadmin should report erasure coding related information separately.
Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo
Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/b89ffcff
Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/b89ffcff
Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/b89ffcff
Branch: refs/heads/trunk
Commit: b89ffcff362a872013f5d96c1fb76e0731402db4
Parents: 36bada3
Author: Manoj Govindassamy <ma...@apache.org>
Authored: Fri Aug 25 17:21:56 2017 -0700
Committer: Manoj Govindassamy <ma...@apache.org>
Committed: Fri Aug 25 17:21:56 2017 -0700
----------------------------------------------------------------------
.../org/apache/hadoop/hdfs/tools/DFSAdmin.java | 36 +++-
.../apache/hadoop/hdfs/tools/TestDFSAdmin.java | 182 ++++++++++++++-----
2 files changed, 160 insertions(+), 58 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hadoop/blob/b89ffcff/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/DFSAdmin.java
----------------------------------------------------------------------
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/DFSAdmin.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/DFSAdmin.java
index 1fb1d5f..88aafe2 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/DFSAdmin.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/DFSAdmin.java
@@ -66,11 +66,13 @@ import org.apache.hadoop.hdfs.HAUtil;
import org.apache.hadoop.hdfs.HdfsConfiguration;
import org.apache.hadoop.hdfs.NameNodeProxies;
import org.apache.hadoop.hdfs.NameNodeProxiesClient.ProxyAndInfo;
+import org.apache.hadoop.hdfs.protocol.BlocksStats;
import org.apache.hadoop.hdfs.protocol.ClientDatanodeProtocol;
import org.apache.hadoop.hdfs.protocol.ClientProtocol;
import org.apache.hadoop.hdfs.protocol.DatanodeInfo;
import org.apache.hadoop.hdfs.protocol.DatanodeLocalInfo;
import org.apache.hadoop.hdfs.protocol.DatanodeVolumeInfo;
+import org.apache.hadoop.hdfs.protocol.ECBlockGroupsStats;
import org.apache.hadoop.hdfs.protocol.HdfsConstants;
import org.apache.hadoop.hdfs.protocol.HdfsConstants.DatanodeReportType;
import org.apache.hadoop.hdfs.protocol.HdfsConstants.RollingUpgradeAction;
@@ -532,16 +534,30 @@ public class DFSAdmin extends FsShell {
* minutes. Use "-metaSave" to list of all such blocks and accurate
* counts.
*/
- System.out.println("Under replicated blocks: " +
- dfs.getLowRedundancyBlocksCount());
- System.out.println("Blocks with corrupt replicas: " +
- dfs.getCorruptBlocksCount());
- System.out.println("Missing blocks: " +
- dfs.getMissingBlocksCount());
- System.out.println("Missing blocks (with replication factor 1): " +
- dfs.getMissingReplOneBlocksCount());
- System.out.println("Pending deletion blocks: " +
- dfs.getPendingDeletionBlocksCount());
+ BlocksStats blocksStats = dfs.getClient().getNamenode().getBlocksStats();
+ System.out.println("Replicated Blocks:");
+ System.out.println("\tUnder replicated blocks: " +
+ blocksStats.getLowRedundancyBlocksStat());
+ System.out.println("\tBlocks with corrupt replicas: " +
+ blocksStats.getCorruptBlocksStat());
+ System.out.println("\tMissing blocks: " +
+ blocksStats.getMissingReplicaBlocksStat());
+ System.out.println("\tMissing blocks (with replication factor 1): " +
+ blocksStats.getMissingReplicationOneBlocksStat());
+ System.out.println("\tPending deletion blocks: " +
+ blocksStats.getPendingDeletionBlocksStat());
+
+ ECBlockGroupsStats ecBlockGroupsStats =
+ dfs.getClient().getNamenode().getECBlockGroupsStats();
+ System.out.println("Erasure Coded Block Groups: ");
+ System.out.println("\tLow redundancy block groups: " +
+ ecBlockGroupsStats.getLowRedundancyBlockGroupsStat());
+ System.out.println("\tBlock groups with corrupt internal blocks: " +
+ ecBlockGroupsStats.getCorruptBlockGroupsStat());
+ System.out.println("\tMissing block groups: " +
+ ecBlockGroupsStats.getMissingBlockGroupsStat());
+ System.out.println("\tPending deletion block groups: " +
+ ecBlockGroupsStats.getPendingDeletionBlockGroupsStat());
System.out.println();
http://git-wip-us.apache.org/repos/asf/hadoop/blob/b89ffcff/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/tools/TestDFSAdmin.java
----------------------------------------------------------------------
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/tools/TestDFSAdmin.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/tools/TestDFSAdmin.java
index ed43dc0..2d38f2f 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/tools/TestDFSAdmin.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/tools/TestDFSAdmin.java
@@ -38,11 +38,18 @@ import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hdfs.DFSClient;
import org.apache.hadoop.hdfs.DFSConfigKeys;
import org.apache.hadoop.hdfs.DFSTestUtil;
+import org.apache.hadoop.hdfs.DistributedFileSystem;
import org.apache.hadoop.hdfs.HdfsConfiguration;
import org.apache.hadoop.hdfs.MiniDFSCluster;
+import org.apache.hadoop.hdfs.protocol.DatanodeInfo;
+import org.apache.hadoop.hdfs.protocol.ErasureCodingPolicy;
import org.apache.hadoop.hdfs.protocol.ExtendedBlock;
import org.apache.hadoop.hdfs.protocol.HdfsConstants.DatanodeReportType;
+import org.apache.hadoop.hdfs.protocol.LocatedBlock;
import org.apache.hadoop.hdfs.protocol.LocatedBlocks;
+import org.apache.hadoop.hdfs.protocol.LocatedStripedBlock;
+import org.apache.hadoop.hdfs.protocol.SystemErasureCodingPolicies;
+import org.apache.hadoop.hdfs.server.blockmanagement.BlockManager;
import org.apache.hadoop.hdfs.server.blockmanagement.BlockManagerTestUtil;
import org.apache.hadoop.hdfs.server.common.Storage;
import org.apache.hadoop.hdfs.server.datanode.DataNode;
@@ -498,24 +505,47 @@ public class TestDFSAdmin {
return sb.toString();
}
- @Test(timeout = 120000)
+ // get block details and check if the block is corrupt
+ private void waitForCorruptBlock(MiniDFSCluster miniCluster,
+ DFSClient client, Path file)
+ throws TimeoutException, InterruptedException {
+ GenericTestUtils.waitFor(new Supplier<Boolean>() {
+ @Override
+ public Boolean get() {
+ LocatedBlocks blocks = null;
+ try {
+ miniCluster.triggerBlockReports();
+ blocks = client.getNamenode().getBlockLocations(file.toString(), 0,
+ Long.MAX_VALUE);
+ } catch (IOException e) {
+ return false;
+ }
+ return blocks != null && blocks.get(0).isCorrupt();
+ }
+ }, 1000, 60000);
+ }
+
+ @Test(timeout = 180000)
public void testReportCommand() throws Exception {
+ tearDown();
redirectStream();
- /* init conf */
+ // init conf
final Configuration dfsConf = new HdfsConfiguration();
+ ErasureCodingPolicy ecPolicy = SystemErasureCodingPolicies.getByID(
+ SystemErasureCodingPolicies.XOR_2_1_POLICY_ID);
+ dfsConf.set(DFSConfigKeys.DFS_NAMENODE_EC_POLICIES_ENABLED_KEY,
+ ecPolicy.getName());
dfsConf.setInt(
- DFSConfigKeys.DFS_NAMENODE_HEARTBEAT_RECHECK_INTERVAL_KEY,
- 500); // 0.5s
+ DFSConfigKeys.DFS_NAMENODE_HEARTBEAT_RECHECK_INTERVAL_KEY, 500);
dfsConf.setLong(DFS_HEARTBEAT_INTERVAL_KEY, 1);
final Path baseDir = new Path(
PathUtils.getTestDir(getClass()).getAbsolutePath(),
GenericTestUtils.getMethodName());
dfsConf.set(MiniDFSCluster.HDFS_MINIDFS_BASEDIR, baseDir.toString());
+ final int numDn =
+ ecPolicy.getNumDataUnits() + ecPolicy.getNumParityUnits();
- final int numDn = 3;
-
- /* init cluster */
try(MiniDFSCluster miniCluster = new MiniDFSCluster
.Builder(dfsConf)
.numDataNodes(numDn).build()) {
@@ -523,34 +553,71 @@ public class TestDFSAdmin {
miniCluster.waitActive();
assertEquals(numDn, miniCluster.getDataNodes().size());
- /* local vars */
final DFSAdmin dfsAdmin = new DFSAdmin(dfsConf);
final DFSClient client = miniCluster.getFileSystem().getClient();
- /* run and verify report command */
+ // Verify report command for all counts to be zero
resetStream();
assertEquals(0, ToolRunner.run(dfsAdmin, new String[] {"-report"}));
- verifyNodesAndCorruptBlocks(numDn, numDn, 0, client);
+ verifyNodesAndCorruptBlocks(numDn, numDn, 0, 0, client);
- /* shut down one DN */
- final List<DataNode> datanodes = miniCluster.getDataNodes();
- final DataNode last = datanodes.get(datanodes.size() - 1);
- last.shutdown();
- miniCluster.setDataNodeDead(last.getDatanodeId());
-
- /* run and verify report command */
- assertEquals(0, ToolRunner.run(dfsAdmin, new String[] {"-report"}));
- verifyNodesAndCorruptBlocks(numDn, numDn - 1, 0, client);
-
- /* corrupt one block */
final short replFactor = 1;
final long fileLength = 512L;
- final FileSystem fs = miniCluster.getFileSystem();
+ final DistributedFileSystem fs = miniCluster.getFileSystem();
final Path file = new Path(baseDir, "/corrupted");
DFSTestUtil.createFile(fs, file, fileLength, replFactor, 12345L);
DFSTestUtil.waitReplication(fs, file, replFactor);
-
final ExtendedBlock block = DFSTestUtil.getFirstBlock(fs, file);
+ LocatedBlocks lbs = miniCluster.getFileSystem().getClient().
+ getNamenode().getBlockLocations(
+ file.toString(), 0, fileLength);
+ assertTrue("Unexpected block type: " + lbs.get(0),
+ lbs.get(0) instanceof LocatedBlock);
+ LocatedBlock locatedBlock = lbs.get(0);
+ DatanodeInfo locatedDataNode = locatedBlock.getLocations()[0];
+ LOG.info("Replica block located on: " + locatedDataNode);
+
+ Path ecDir = new Path(baseDir, "ec");
+ fs.mkdirs(ecDir);
+ fs.getClient().setErasureCodingPolicy(ecDir.toString(),
+ ecPolicy.getName());
+ Path ecFile = new Path(ecDir, "ec-file");
+ int stripesPerBlock = 2;
+ int cellSize = ecPolicy.getCellSize();
+ int blockSize = stripesPerBlock * cellSize;
+ int blockGroupSize = ecPolicy.getNumDataUnits() * blockSize;
+ int totalBlockGroups = 1;
+ DFSTestUtil.createStripedFile(miniCluster, ecFile, ecDir,
+ totalBlockGroups, stripesPerBlock, false, ecPolicy);
+
+ // Verify report command for all counts to be zero
+ resetStream();
+ assertEquals(0, ToolRunner.run(dfsAdmin, new String[] {"-report"}));
+ verifyNodesAndCorruptBlocks(numDn, numDn, 0, 0, client);
+
+ // Choose a DataNode to shutdown
+ final List<DataNode> datanodes = miniCluster.getDataNodes();
+ DataNode dataNodeToShutdown = null;
+ for (DataNode dn : datanodes) {
+ if (!dn.getDatanodeId().getDatanodeUuid().equals(
+ locatedDataNode.getDatanodeUuid())) {
+ dataNodeToShutdown = dn;
+ break;
+ }
+ }
+ assertTrue("Unable to choose a DataNode to shutdown!",
+ dataNodeToShutdown != null);
+
+ // Shut down the DataNode not hosting the replicated block
+ LOG.info("Shutting down: " + dataNodeToShutdown);
+ dataNodeToShutdown.shutdown();
+ miniCluster.setDataNodeDead(dataNodeToShutdown.getDatanodeId());
+
+ // Verify report command to show dead DataNode
+ assertEquals(0, ToolRunner.run(dfsAdmin, new String[] {"-report"}));
+ verifyNodesAndCorruptBlocks(numDn, numDn - 1, 0, 0, client);
+
+ // Corrupt the replicated block
final int blockFilesCorrupted = miniCluster
.corruptBlockOnDataNodes(block);
assertEquals("Fail to corrupt all replicas for block " + block,
@@ -564,35 +631,44 @@ public class TestDFSAdmin {
// expected exception reading corrupt blocks
}
- /*
- * Increase replication factor, this should invoke transfer request.
- * Receiving datanode fails on checksum and reports it to namenode
- */
+ // Increase replication factor, this should invoke transfer request.
+ // Receiving datanode fails on checksum and reports it to namenode
fs.setReplication(file, (short) (replFactor + 1));
- /* get block details and check if the block is corrupt */
- GenericTestUtils.waitFor(new Supplier<Boolean>() {
- @Override
- public Boolean get() {
- LocatedBlocks blocks = null;
- try {
- miniCluster.triggerBlockReports();
- blocks = client.getNamenode().getBlockLocations(file.toString(), 0,
- Long.MAX_VALUE);
- } catch (IOException e) {
- return false;
- }
- return blocks != null && blocks.get(0).isCorrupt();
- }
- }, 1000, 60000);
-
+ // get block details and check if the block is corrupt
BlockManagerTestUtil.updateState(
miniCluster.getNameNode().getNamesystem().getBlockManager());
+ waitForCorruptBlock(miniCluster, client, file);
+
+ // verify report command for corrupt replicated block
+ resetStream();
+ assertEquals(0, ToolRunner.run(dfsAdmin, new String[] {"-report"}));
+ verifyNodesAndCorruptBlocks(numDn, numDn - 1, 1, 0, client);
+
+ lbs = miniCluster.getFileSystem().getClient().
+ getNamenode().getBlockLocations(
+ ecFile.toString(), 0, blockGroupSize);
+ assertTrue("Unexpected block type: " + lbs.get(0),
+ lbs.get(0) instanceof LocatedStripedBlock);
+ LocatedStripedBlock bg =
+ (LocatedStripedBlock)(lbs.get(0));
+
+ miniCluster.getNamesystem().writeLock();
+ try {
+ BlockManager bm = miniCluster.getNamesystem().getBlockManager();
+ bm.findAndMarkBlockAsCorrupt(bg.getBlock(), bg.getLocations()[0],
+ "STORAGE_ID", "TEST");
+ BlockManagerTestUtil.updateState(bm);
+ } finally {
+ miniCluster.getNamesystem().writeUnlock();
+ }
+ waitForCorruptBlock(miniCluster, client, file);
- /* run and verify report command */
+ // verify report command for corrupt replicated block
+ // and EC block group
resetStream();
assertEquals(0, ToolRunner.run(dfsAdmin, new String[] {"-report"}));
- verifyNodesAndCorruptBlocks(numDn, numDn - 1, 1, client);
+ verifyNodesAndCorruptBlocks(numDn, numDn - 1, 1, 1, client);
}
}
@@ -669,6 +745,7 @@ public class TestDFSAdmin {
final int numDn,
final int numLiveDn,
final int numCorruptBlocks,
+ final int numCorruptECBlockGroups,
final DFSClient client) throws IOException {
/* init vars */
@@ -679,11 +756,15 @@ public class TestDFSAdmin {
final String expectedCorruptedBlocksStr = String.format(
"Blocks with corrupt replicas: %d",
numCorruptBlocks);
+ final String expectedCorruptedECBlockGroupsStr = String.format(
+ "Block groups with corrupt internal blocks: %d",
+ numCorruptECBlockGroups);
- /* verify nodes and corrupt blocks */
+ // verify nodes and corrupt blocks
assertThat(outStr, is(allOf(
containsString(expectedLiveNodesStr),
- containsString(expectedCorruptedBlocksStr))));
+ containsString(expectedCorruptedBlocksStr),
+ containsString(expectedCorruptedECBlockGroupsStr))));
assertEquals(
numDn,
@@ -694,7 +775,12 @@ public class TestDFSAdmin {
assertEquals(
numDn - numLiveDn,
client.getDatanodeStorageReport(DatanodeReportType.DEAD).length);
- assertEquals(numCorruptBlocks, client.getCorruptBlocksCount());
+ assertEquals(numCorruptBlocks + numCorruptECBlockGroups,
+ client.getCorruptBlocksCount());
+ assertEquals(numCorruptBlocks, client.getNamenode()
+ .getBlocksStats().getCorruptBlocksStat());
+ assertEquals(numCorruptECBlockGroups, client.getNamenode()
+ .getECBlockGroupsStats().getCorruptBlockGroupsStat());
}
@Test
---------------------------------------------------------------------
To unsubscribe, e-mail: common-commits-unsubscribe@hadoop.apache.org
For additional commands, e-mail: common-commits-help@hadoop.apache.org