You are viewing a plain text version of this content. The canonical link for it is here.
Posted to common-commits@hadoop.apache.org by ar...@apache.org on 2017/06/30 00:15:37 UTC

hadoop git commit: HDFS-12043. Add counters for block re-replication. Contributed by Chen Liang.

Repository: hadoop
Updated Branches:
  refs/heads/trunk 72993b33b -> 900221f95


HDFS-12043. Add counters for block re-replication. Contributed by Chen Liang.


Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo
Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/900221f9
Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/900221f9
Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/900221f9

Branch: refs/heads/trunk
Commit: 900221f95ea9fe1936b4d5f277e6047ee8734eca
Parents: 72993b3
Author: Arpit Agarwal <ar...@apache.org>
Authored: Thu Jun 29 17:15:13 2017 -0700
Committer: Arpit Agarwal <ar...@apache.org>
Committed: Thu Jun 29 17:15:13 2017 -0700

----------------------------------------------------------------------
 .../server/blockmanagement/BlockManager.java    | 13 ++-
 .../PendingReconstructionBlocks.java            |  8 +-
 .../namenode/metrics/NameNodeMetrics.java       | 18 ++++
 .../TestPendingReconstruction.java              | 86 +++++++++++++++++++-
 4 files changed, 118 insertions(+), 7 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hadoop/blob/900221f9/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java
----------------------------------------------------------------------
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java
index a0c4698..a5ee30b 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java
@@ -1851,7 +1851,7 @@ public class BlockManager implements BlockStatsMXBean {
         (pendingReplicaNum > 0 || isPlacementPolicySatisfied(block));
   }
 
-  private BlockReconstructionWork scheduleReconstruction(BlockInfo block,
+  BlockReconstructionWork scheduleReconstruction(BlockInfo block,
       int priority) {
     // skip abandoned block or block reopened for append
     if (block.isDeleted() || !block.isCompleteOrCommitted()) {
@@ -1873,6 +1873,7 @@ public class BlockManager implements BlockStatsMXBean {
     if(srcNodes == null || srcNodes.length == 0) {
       // block can not be reconstructed from any node
       LOG.debug("Block {} cannot be reconstructed from any node", block);
+      NameNode.getNameNodeMetrics().incNumTimesReReplicationNotScheduled();
       return null;
     }
 
@@ -1885,6 +1886,7 @@ public class BlockManager implements BlockStatsMXBean {
       neededReconstruction.remove(block, priority);
       blockLog.debug("BLOCK* Removing {} from neededReconstruction as" +
           " it has enough replicas", block);
+      NameNode.getNameNodeMetrics().incNumTimesReReplicationNotScheduled();
       return null;
     }
 
@@ -1900,6 +1902,7 @@ public class BlockManager implements BlockStatsMXBean {
     if (block.isStriped()) {
       if (pendingNum > 0) {
         // Wait the previous reconstruction to finish.
+        NameNode.getNameNodeMetrics().incNumTimesReReplicationNotScheduled();
         return null;
       }
 
@@ -3727,8 +3730,8 @@ public class BlockManager implements BlockStatsMXBean {
    * The given node is reporting that it received a certain block.
    */
   @VisibleForTesting
-  void addBlock(DatanodeStorageInfo storageInfo, Block block, String delHint)
-      throws IOException {
+  public void addBlock(DatanodeStorageInfo storageInfo, Block block,
+      String delHint) throws IOException {
     DatanodeDescriptor node = storageInfo.getDatanodeDescriptor();
     // Decrement number of blocks scheduled to this datanode.
     // for a retry request (of DatanodeProtocol#blockReceivedAndDeleted with 
@@ -3751,7 +3754,9 @@ public class BlockManager implements BlockStatsMXBean {
     BlockInfo storedBlock = getStoredBlock(block);
     if (storedBlock != null &&
         block.getGenerationStamp() == storedBlock.getGenerationStamp()) {
-      pendingReconstruction.decrement(storedBlock, node);
+      if (pendingReconstruction.decrement(storedBlock, node)) {
+        NameNode.getNameNodeMetrics().incSuccessfulReReplications();
+      }
     }
     processAndHandleReportedBlock(storageInfo, block, ReplicaState.FINALIZED,
         delHintNode);

http://git-wip-us.apache.org/repos/asf/hadoop/blob/900221f9/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/PendingReconstructionBlocks.java
----------------------------------------------------------------------
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/PendingReconstructionBlocks.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/PendingReconstructionBlocks.java
index 2221d1d..0f20daa 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/PendingReconstructionBlocks.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/PendingReconstructionBlocks.java
@@ -30,6 +30,7 @@ import java.util.List;
 import java.util.Map;
 
 import org.apache.hadoop.hdfs.protocol.Block;
+import org.apache.hadoop.hdfs.server.namenode.NameNode;
 import org.apache.hadoop.util.Daemon;
 import org.slf4j.Logger;
 
@@ -97,8 +98,10 @@ class PendingReconstructionBlocks {
    * for this block.
    *
    * @param dn The DataNode that finishes the reconstruction
+   * @return true if the block is decremented to 0 and got removed.
    */
-  void decrement(BlockInfo block, DatanodeDescriptor dn) {
+  boolean decrement(BlockInfo block, DatanodeDescriptor dn) {
+    boolean removed = false;
     synchronized (pendingReconstructions) {
       PendingBlockInfo found = pendingReconstructions.get(block);
       if (found != null) {
@@ -106,9 +109,11 @@ class PendingReconstructionBlocks {
         found.decrementReplicas(dn);
         if (found.getNumReplicas() <= 0) {
           pendingReconstructions.remove(block);
+          removed = true;
         }
       }
     }
+    return removed;
   }
 
   /**
@@ -263,6 +268,7 @@ class PendingReconstructionBlocks {
               timedOutItems.add(block);
             }
             LOG.warn("PendingReconstructionMonitor timed out " + block);
+            NameNode.getNameNodeMetrics().incTimeoutReReplications();
             iter.remove();
           }
         }

http://git-wip-us.apache.org/repos/asf/hadoop/blob/900221f9/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/metrics/NameNodeMetrics.java
----------------------------------------------------------------------
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/metrics/NameNodeMetrics.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/metrics/NameNodeMetrics.java
index cb81f5a..f2534e4 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/metrics/NameNodeMetrics.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/metrics/NameNodeMetrics.java
@@ -58,6 +58,12 @@ public class NameNodeMetrics {
   @Metric MutableCounterLong createSymlinkOps;
   @Metric MutableCounterLong getLinkTargetOps;
   @Metric MutableCounterLong filesInGetListingOps;
+  @Metric ("Number of successful re-replications")
+  MutableCounterLong successfulReReplications;
+  @Metric ("Number of times we failed to schedule a block re-replication.")
+  MutableCounterLong numTimesReReplicationNotScheduled;
+  @Metric("Number of timed out block re-replications")
+  MutableCounterLong timeoutReReplications;
   @Metric("Number of allowSnapshot operations")
   MutableCounterLong allowSnapshotOps;
   @Metric("Number of disallowSnapshot operations")
@@ -300,6 +306,18 @@ public class NameNodeMetrics {
     transactionsBatchedInSync.incr(count);
   }
 
+  public void incSuccessfulReReplications() {
+    successfulReReplications.incr();
+  }
+
+  public void incNumTimesReReplicationNotScheduled() {
+    numTimesReReplicationNotScheduled.incr();
+  }
+
+  public void incTimeoutReReplications() {
+    timeoutReReplications.incr();
+  }
+
   public void addSync(long elapsed) {
     syncs.add(elapsed);
     for (MutableQuantiles q : syncsQuantiles) {

http://git-wip-us.apache.org/repos/asf/hadoop/blob/900221f9/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestPendingReconstruction.java
----------------------------------------------------------------------
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestPendingReconstruction.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestPendingReconstruction.java
index 7679f9d..042eae7 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestPendingReconstruction.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestPendingReconstruction.java
@@ -17,6 +17,10 @@
  */
 package org.apache.hadoop.hdfs.server.blockmanagement;
 
+import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_RECONSTRUCTION_PENDING_TIMEOUT_SEC_KEY;
+import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_REDUNDANCY_INTERVAL_SECONDS_KEY;
+import static org.apache.hadoop.test.MetricsAsserts.assertCounter;
+import static org.apache.hadoop.test.MetricsAsserts.getMetrics;
 import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertNotNull;
 import static org.junit.Assert.assertNull;
@@ -44,6 +48,7 @@ import org.apache.hadoop.hdfs.server.protocol.DatanodeStorage;
 import org.apache.hadoop.hdfs.server.protocol.ReceivedDeletedBlockInfo;
 import org.apache.hadoop.hdfs.server.protocol.ReceivedDeletedBlockInfo.BlockStatus;
 import org.apache.hadoop.hdfs.server.protocol.StorageReceivedDeletedBlocks;
+import org.apache.hadoop.metrics2.MetricsRecordBuilder;
 import org.junit.Test;
 import org.mockito.Mockito;
 
@@ -178,7 +183,7 @@ public class TestPendingReconstruction {
   public void testProcessPendingReconstructions() throws Exception {
     final Configuration conf = new HdfsConfiguration();
     conf.setLong(
-        DFSConfigKeys.DFS_NAMENODE_RECONSTRUCTION_PENDING_TIMEOUT_SEC_KEY, TIMEOUT);
+        DFS_NAMENODE_RECONSTRUCTION_PENDING_TIMEOUT_SEC_KEY, TIMEOUT);
     MiniDFSCluster cluster = null;
     Block block;
     BlockInfo blockInfo;
@@ -418,7 +423,7 @@ public class TestPendingReconstruction {
     CONF.setLong(DFSConfigKeys.DFS_BLOCK_SIZE_KEY, 1024);
     CONF.setLong(DFSConfigKeys.DFS_HEARTBEAT_INTERVAL_KEY,
         DFS_REPLICATION_INTERVAL);
-    CONF.setInt(DFSConfigKeys.DFS_NAMENODE_REDUNDANCY_INTERVAL_SECONDS_KEY,
+    CONF.setInt(DFS_NAMENODE_REDUNDANCY_INTERVAL_SECONDS_KEY,
         DFS_REPLICATION_INTERVAL);
     MiniDFSCluster cluster = new MiniDFSCluster.Builder(CONF).numDataNodes(
         DATANODE_COUNT).build();
@@ -471,4 +476,81 @@ public class TestPendingReconstruction {
       cluster.shutdown();
     }
   }
+
+  @Test
+  public void testReplicationCounter() throws Exception {
+    HdfsConfiguration conf = new HdfsConfiguration();
+    conf.setInt(DFS_NAMENODE_REDUNDANCY_INTERVAL_SECONDS_KEY, 1);
+    conf.setInt(DFS_NAMENODE_RECONSTRUCTION_PENDING_TIMEOUT_SEC_KEY, 2);
+    MiniDFSCluster tmpCluster = new MiniDFSCluster.Builder(conf).numDataNodes(
+        DATANODE_COUNT).build();
+    tmpCluster.waitActive();
+    FSNamesystem fsn = tmpCluster.getNamesystem(0);
+    fsn.writeLock();
+
+    try {
+      BlockManager bm = fsn.getBlockManager();
+      BlocksMap blocksMap = bm.blocksMap;
+
+      // create three blockInfo below, blockInfo0 will success, blockInfo1 will
+      // time out, blockInfo2 will fail the replication.
+      BlockCollection bc0 = Mockito.mock(BlockCollection.class);
+      BlockInfo blockInfo0 = new BlockInfoContiguous((short) 3);
+      blockInfo0.setBlockId(0);
+
+      BlockCollection bc1 = Mockito.mock(BlockCollection.class);
+      BlockInfo blockInfo1 = new BlockInfoContiguous((short) 3);
+      blockInfo1.setBlockId(1);
+
+      BlockCollection bc2 = Mockito.mock(BlockCollection.class);
+      Mockito.when(bc2.getId()).thenReturn((2L));
+      BlockInfo blockInfo2 = new BlockInfoContiguous((short) 3);
+      blockInfo2.setBlockId(2);
+
+      blocksMap.addBlockCollection(blockInfo0, bc0);
+      blocksMap.addBlockCollection(blockInfo1, bc1);
+      blocksMap.addBlockCollection(blockInfo2, bc2);
+
+      PendingReconstructionBlocks pending = bm.pendingReconstruction;
+
+      MetricsRecordBuilder rb = getMetrics("NameNodeActivity");
+      assertCounter("SuccessfulReReplications", 0L, rb);
+      assertCounter("NumTimesReReplicationNotScheduled", 0L, rb);
+      assertCounter("TimeoutReReplications", 0L, rb);
+
+      // add block0 and block1 to pending queue.
+      pending.increment(blockInfo0);
+      pending.increment(blockInfo1);
+
+      Thread.sleep(2000);
+
+      rb = getMetrics("NameNodeActivity");
+      assertCounter("SuccessfulReReplications", 0L, rb);
+      assertCounter("NumTimesReReplicationNotScheduled", 0L, rb);
+      assertCounter("TimeoutReReplications", 0L, rb);
+
+      // call addBlock on block0 will make it successfully replicated.
+      // not callign addBlock on block1 will make it timeout later.
+      DatanodeStorageInfo[] storageInfos =
+          DFSTestUtil.createDatanodeStorageInfos(1);
+      bm.addBlock(storageInfos[0], blockInfo0, null);
+
+      // call schedule replication on blockInfo2 will fail the re-replication.
+      // because there is no source data to replicate from.
+      bm.scheduleReconstruction(blockInfo2, 0);
+
+      Thread.sleep(2000);
+
+      rb = getMetrics("NameNodeActivity");
+      assertCounter("SuccessfulReReplications", 1L, rb);
+      assertCounter("NumTimesReReplicationNotScheduled", 1L, rb);
+      assertCounter("TimeoutReReplications", 1L, rb);
+
+    } finally {
+      tmpCluster.shutdown();
+      fsn.writeUnlock();
+    }
+  }
+
+
 }


---------------------------------------------------------------------
To unsubscribe, e-mail: common-commits-unsubscribe@hadoop.apache.org
For additional commands, e-mail: common-commits-help@hadoop.apache.org