You are viewing a plain text version of this content. The canonical link for it is here.
Posted to common-commits@hadoop.apache.org by ji...@apache.org on 2016/06/07 20:26:05 UTC

[13/50] [abbrv] hadoop git commit: HDFS-10341. Add a metric to expose the timeout number of pending replication blocks. (Contributed by Akira Ajisaka)

HDFS-10341. Add a metric to expose the timeout number of pending replication blocks. (Contributed by Akira Ajisaka)


Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo
Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/97e24494
Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/97e24494
Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/97e24494

Branch: refs/heads/YARN-4757
Commit: 97e244947719d483c3f80521a00fec8e13dcb637
Parents: 1df6f57
Author: Arpit Agarwal <ar...@apache.org>
Authored: Thu Jun 2 13:14:45 2016 -0700
Committer: Arpit Agarwal <ar...@apache.org>
Committed: Thu Jun 2 13:14:45 2016 -0700

----------------------------------------------------------------------
 .../hadoop-common/src/site/markdown/Metrics.md  |  1 +
 .../server/blockmanagement/BlockManager.java    |  4 ++++
 .../PendingReconstructionBlocks.java            | 16 +++++++++++++++-
 .../hdfs/server/namenode/FSNamesystem.java      |  5 +++++
 .../TestPendingReconstruction.java              | 20 ++++++++++++++------
 5 files changed, 39 insertions(+), 7 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hadoop/blob/97e24494/hadoop-common-project/hadoop-common/src/site/markdown/Metrics.md
----------------------------------------------------------------------
diff --git a/hadoop-common-project/hadoop-common/src/site/markdown/Metrics.md b/hadoop-common-project/hadoop-common/src/site/markdown/Metrics.md
index 699316f..e4e2443 100644
--- a/hadoop-common-project/hadoop-common/src/site/markdown/Metrics.md
+++ b/hadoop-common-project/hadoop-common/src/site/markdown/Metrics.md
@@ -218,6 +218,7 @@ Each metrics record contains tags such as HAState and Hostname as additional inf
 | `TotalSyncCount` | Total number of sync operations performed by edit log |
 | `TotalSyncTimes` | Total number of milliseconds spent by various edit logs in sync operation|
 | `NameDirSize` | NameNode name directories size in bytes |
+| `NumTimedOutPendingReconstructions` | The number of timed out reconstructions. Not the number of unique blocks that timed out. |
 
 JournalNode
 -----------

http://git-wip-us.apache.org/repos/asf/hadoop/blob/97e24494/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java
----------------------------------------------------------------------
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java
index ed57a86..1a76e09 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java
@@ -198,6 +198,10 @@ public class BlockManager implements BlockStatsMXBean {
   public int getPendingDataNodeMessageCount() {
     return pendingDNMessages.count();
   }
+  /** Used by metrics. */
+  public long getNumTimedOutPendingReconstructions() {
+    return pendingReconstruction.getNumTimedOuts();
+  }
 
   /**replicationRecheckInterval is how often namenode checks for new replication work*/
   private final long replicationRecheckInterval;

http://git-wip-us.apache.org/repos/asf/hadoop/blob/97e24494/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/PendingReconstructionBlocks.java
----------------------------------------------------------------------
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/PendingReconstructionBlocks.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/PendingReconstructionBlocks.java
index 528199c..956e94f 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/PendingReconstructionBlocks.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/PendingReconstructionBlocks.java
@@ -50,6 +50,7 @@ class PendingReconstructionBlocks {
   private final ArrayList<BlockInfo> timedOutItems;
   Daemon timerThread = null;
   private volatile boolean fsRunning = true;
+  private long timedOutCount = 0L;
 
   //
   // It might take anywhere between 5 to 10 minutes before
@@ -125,6 +126,7 @@ class PendingReconstructionBlocks {
     synchronized (pendingReconstructions) {
       pendingReconstructions.clear();
       timedOutItems.clear();
+      timedOutCount = 0L;
     }
   }
 
@@ -149,6 +151,16 @@ class PendingReconstructionBlocks {
   }
 
   /**
+   * Used for metrics.
+   * @return The number of timeouts
+   */
+  long getNumTimedOuts() {
+    synchronized (timedOutItems) {
+      return timedOutCount + timedOutItems.size();
+    }
+  }
+
+  /**
    * Returns a list of blocks that have timed out their
    * reconstruction requests. Returns null if no blocks have
    * timed out.
@@ -158,9 +170,11 @@ class PendingReconstructionBlocks {
       if (timedOutItems.size() <= 0) {
         return null;
       }
+      int size = timedOutItems.size();
       BlockInfo[] blockList = timedOutItems.toArray(
-          new BlockInfo[timedOutItems.size()]);
+          new BlockInfo[size]);
       timedOutItems.clear();
+      timedOutCount += size;
       return blockList;
     }
   }

http://git-wip-us.apache.org/repos/asf/hadoop/blob/97e24494/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java
----------------------------------------------------------------------
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java
index a2df6d2..c9f2487 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java
@@ -4507,6 +4507,11 @@ public class FSNamesystem implements Namesystem, FSNamesystemMBean,
   public long getExcessBlocks() {
     return blockManager.getExcessBlocksCount();
   }
+
+  @Metric
+  public long getNumTimedOutPendingReconstructions() {
+    return blockManager.getNumTimedOutPendingReconstructions();
+  }
   
   // HA-only metric
   @Metric

http://git-wip-us.apache.org/repos/asf/hadoop/blob/97e24494/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestPendingReconstruction.java
----------------------------------------------------------------------
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestPendingReconstruction.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestPendingReconstruction.java
index d07c657..c30f630 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestPendingReconstruction.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestPendingReconstruction.java
@@ -18,6 +18,8 @@
 package org.apache.hadoop.hdfs.server.blockmanagement;
 
 import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNotNull;
+import static org.junit.Assert.assertNull;
 import static org.junit.Assert.assertTrue;
 
 import java.util.ArrayList;
@@ -117,14 +119,15 @@ public class TestPendingReconstruction {
     //
     // verify that nothing has timed out so far
     //
-    assertTrue(pendingReconstructions.getTimedOutBlocks() == null);
+    assertNull(pendingReconstructions.getTimedOutBlocks());
+    assertEquals(0L, pendingReconstructions.getNumTimedOuts());
 
     //
     // Wait for one second and then insert some more items.
     //
     try {
       Thread.sleep(1000);
-    } catch (Exception e) {
+    } catch (Exception ignored) {
     }
 
     for (int i = 10; i < 15; i++) {
@@ -133,7 +136,8 @@ public class TestPendingReconstruction {
           DatanodeStorageInfo.toDatanodeDescriptors(
               DFSTestUtil.createDatanodeStorageInfos(i)));
     }
-    assertTrue(pendingReconstructions.size() == 15);
+    assertEquals(15, pendingReconstructions.size());
+    assertEquals(0L, pendingReconstructions.getNumTimedOuts());
 
     //
     // Wait for everything to timeout.
@@ -153,10 +157,14 @@ public class TestPendingReconstruction {
     // Verify that everything has timed out.
     //
     assertEquals("Size of pendingReconstructions ", 0, pendingReconstructions.size());
+    assertEquals(15L, pendingReconstructions.getNumTimedOuts());
     Block[] timedOut = pendingReconstructions.getTimedOutBlocks();
-    assertTrue(timedOut != null && timedOut.length == 15);
-    for (int i = 0; i < timedOut.length; i++) {
-      assertTrue(timedOut[i].getBlockId() < 15);
+    assertNotNull(timedOut);
+    assertEquals(15, timedOut.length);
+    // Verify the number is not reset
+    assertEquals(15L, pendingReconstructions.getNumTimedOuts());
+    for (Block block : timedOut) {
+      assertTrue(block.getBlockId() < 15);
     }
     pendingReconstructions.stop();
   }


---------------------------------------------------------------------
To unsubscribe, e-mail: common-commits-unsubscribe@hadoop.apache.org
For additional commands, e-mail: common-commits-help@hadoop.apache.org