You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hbase.apache.org by ns...@apache.org on 2011/10/11 04:22:28 UTC

svn commit: r1181581 - in /hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/master: HMaster.java ProcessServerShutdown.java RegionManager.java ServerManager.java metrics/MasterMetrics.java

Author: nspiegelberg
Date: Tue Oct 11 02:22:28 2011
New Revision: 1181581

URL: http://svn.apache.org/viewvc?rev=1181581&view=rev
Log:
expose more stats from HBase master node

Summary:
Added the following new stats to HMaster:
"hadoop.master_avgrequestcount":
"hadoop.master_maxrequestcount":
"hadoop.master_minrequestcount":
"hadoop.master_numlogssplit": 4,
"hadoop.master_numregionservers":
"hadoop.master_numregionsopened":
"hadoop.master_numrsexpired":
"hadoop.master_sizeoflogssplit":

=========
Sample output
=========
{
"hadoop.master_avgrequestcount": 384,
"hadoop.master_cluster_requests": 456.75,
"hadoop.master_maxrequestcount": 1134,
"hadoop.master_minrequestcount": 0,
"hadoop.master_numlogssplit": 4,
"hadoop.master_numregionservers": 2,
"hadoop.master_numregionsopened": 33,
"hadoop.master_numrsexpired": 2,
"hadoop.master_rpc_getclusterstatus_avg_time": 0,
"hadoop.master_rpc_getclusterstatus_num_ops": 2,
"hadoop.master_rpc_getprotocolversion_avg_time": 0,
"hadoop.master_rpc_getprotocolversion_num_ops": 11,
"hadoop.master_rpc_ismasterrunning_avg_time": 0,
"hadoop.master_rpc_ismasterrunning_num_ops": 3,
"hadoop.master_rpc_regionserverreport_avg_time": 0,
"hadoop.master_rpc_regionserverreport_num_ops": 4908,
"hadoop.master_rpc_regionserverstartup_avg_time": 0,
"hadoop.master_rpc_regionserverstartup_num_ops": 5,
"hadoop.master_rpc_rpcprocessingtime_avg_time": 0,
"hadoop.master_rpc_rpcprocessingtime_num_ops": 0,
"hadoop.master_rpc_rpcqueuetime_avg_time": 0,
"hadoop.master_rpc_rpcqueuetime_num_ops": 0,
"hadoop.master_sizeoflogssplit": 2335562724,
"hadoop.master_splitsizeavgtime": 56676160,
"hadoop.master_splitsizemaxtime": 510085452,
"hadoop.master_splitsizemintime": 0,
"hadoop.master_splitsizenumops": 0,
"hadoop.master_splittimeavgtime": 1934,
"hadoop.master_splittimemaxtime": 16394,
"hadoop.master_splittimemintime": 0,
"hadoop.master_splittimenumops": 0
}

Test Plan:
tested on devcluster
1. Tested by killing region servers to check the RS_Expired count.
2. Checked the number of logs that were split and the size by disabling the
delete operation after the split.
3. Tested that the number of regions opened updates correctly when a particular
regionserver is shut down.

Reviewed By: kannan
Reviewers: kannan, nspiegelberg
Commenters: nspiegelberg
CC: alasla, , kannan, nspiegelberg
Differential Revision: 268851

Modified:
    hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/master/HMaster.java
    hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/master/ProcessServerShutdown.java
    hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/master/RegionManager.java
    hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/master/ServerManager.java
    hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/master/metrics/MasterMetrics.java

Modified: hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/master/HMaster.java
URL: http://svn.apache.org/viewvc/hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/master/HMaster.java?rev=1181581&r1=1181580&r2=1181581&view=diff
==============================================================================
--- hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/master/HMaster.java (original)
+++ hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/master/HMaster.java Tue Oct 11 02:22:28 2011
@@ -36,7 +36,8 @@ import java.util.concurrent.atomic.Atomi
 import java.util.concurrent.atomic.AtomicReference;
 import java.util.concurrent.locks.Lock;
 import java.util.concurrent.locks.ReentrantLock;
-
+import org.apache.hadoop.fs.ContentSummary;
+import org.apache.hadoop.fs.FileStatus;
 import org.apache.commons.cli.CommandLine;
 import org.apache.commons.cli.GnuParser;
 import org.apache.commons.cli.Options;
@@ -275,7 +276,7 @@ public class HMaster extends Thread impl
     regionManager = new RegionManager(this);
 
     setName(MASTER);
-    this.metrics = new MasterMetrics(MASTER);
+    this.metrics = new MasterMetrics(MASTER, this.serverManager);
     // We're almost open for business
     this.closed.set(false);
     LOG.info("HMaster w/ hbck initialized on " + this.address.toString());
@@ -716,7 +717,7 @@ public class HMaster extends Thread impl
       if(this.serverManager.getServerInfo(serverName) == null) {
         LOG.info("Log folder doesn't belong " +
           "to a known region server, splitting");
-        long splitTime = 0, splitSize = 0;
+        long splitTime = 0, splitSize = 0, splitCount = 0;
 
         this.splitLogLock.lock();
         try {
@@ -731,15 +732,17 @@ public class HMaster extends Thread impl
             logDir = splitDir;
             LOG.debug("Renamed region directory: " + splitDir);
           }
+          ContentSummary contentSummary = fs.getContentSummary(logDir);
+          splitCount = contentSummary.getFileCount();
+          splitSize = contentSummary.getSpaceConsumed();
           HLog.splitLog(this.rootdir, logDir, oldLogDir, this.fs, getConfiguration());
           splitTime = HLog.lastSplitTime;
-          splitSize = HLog.lastSplitSize;
+          this.metrics.addSplit(splitTime, splitCount, splitSize );
         } catch (IOException e) {
           LOG.error("Failed splitting " + logDir.toString(), e);
         } finally {
           this.splitLogLock.unlock();
         }
-        this.metrics.addSplit(splitTime, splitSize);
       } else {
         LOG.info("Log folder belongs to an existing region server");
       }

Modified: hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/master/ProcessServerShutdown.java
URL: http://svn.apache.org/viewvc/hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/master/ProcessServerShutdown.java?rev=1181581&r1=1181580&r2=1181581&view=diff
==============================================================================
--- hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/master/ProcessServerShutdown.java (original)
+++ hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/master/ProcessServerShutdown.java Tue Oct 11 02:22:28 2011
@@ -19,6 +19,8 @@
  */
 package org.apache.hadoop.hbase.master;
 
+import org.apache.hadoop.fs.ContentSummary;
+import org.apache.hadoop.fs.FileStatus;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.hbase.HConstants;
@@ -292,7 +294,7 @@ class ProcessServerShutdown extends Regi
       ", onlineMetaRegions.size(): " +
       master.getRegionManager().numOnlineMetaRegions());
     if (!isSplitFinished) {
-      long splitTime = 0, splitSize = 0;
+      long splitTime = 0, splitSize = 0, splitCount = 0;
       FileSystem fs = this.master.getFileSystem();
       // we rename during split, so check both names
       Path rsSplitDir = new Path(rsLogDir.getParent(),
@@ -315,18 +317,19 @@ class ProcessServerShutdown extends Regi
             }
             LOG.debug("Renamed region directory: " + rsSplitDir);
           }
-
+          ContentSummary contentSum = fs.getContentSummary(rsSplitDir);
+          splitCount = contentSum.getFileCount();
+          splitSize = contentSum.getSpaceConsumed();
           // Process the old log files
           HLog.splitLog(master.getRootDir(), rsSplitDir,
             this.master.getOldLogDir(), this.master.getFileSystem(),
             this.master.getConfiguration());
           splitTime = HLog.lastSplitTime;
-          splitSize = HLog.lastSplitSize;
+          this.master.getMetrics().addSplit(splitTime, splitCount, splitSize);
         } finally {
           master.splitLogLock.unlock();
         }
 
-        this.master.getMetrics().addSplit(splitTime, splitSize);
       }
       isSplitFinished = true;
     }

Modified: hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/master/RegionManager.java
URL: http://svn.apache.org/viewvc/hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/master/RegionManager.java?rev=1181581&r1=1181580&r2=1181581&view=diff
==============================================================================
--- hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/master/RegionManager.java (original)
+++ hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/master/RegionManager.java Tue Oct 11 02:22:28 2011
@@ -1106,8 +1106,10 @@ public class RegionManager {
       RegionState s = regionsInTransition.get(regionName);
       if (s != null) {
         s.setOpen();
+        this.master.getMetrics().incRegionsOpened();
       }
     }
+
   }
 
   /**

Modified: hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/master/ServerManager.java
URL: http://svn.apache.org/viewvc/hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/master/ServerManager.java?rev=1181581&r1=1181580&r2=1181581&view=diff
==============================================================================
--- hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/master/ServerManager.java (original)
+++ hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/master/ServerManager.java Tue Oct 11 02:22:28 2011
@@ -872,6 +872,7 @@ public class ServerManager {
     this.deadServers.add(serverName);
     this.master.getRegionServerOperationQueue().
       put(new ProcessServerShutdown(master, info));
+    this.master.getMetrics().incRegionServerExpired();
   }
 
   /**

Modified: hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/master/metrics/MasterMetrics.java
URL: http://svn.apache.org/viewvc/hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/master/metrics/MasterMetrics.java?rev=1181581&r1=1181580&r2=1181581&view=diff
==============================================================================
--- hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/master/metrics/MasterMetrics.java (original)
+++ hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/master/metrics/MasterMetrics.java Tue Oct 11 02:22:28 2011
@@ -18,9 +18,9 @@
 package org.apache.hadoop.hbase.master.metrics;
 
 import java.io.IOException;
-
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.hbase.master.ServerManager;
 import org.apache.hadoop.hbase.metrics.HBaseInfo;
 import org.apache.hadoop.hbase.metrics.MetricsRate;
 import org.apache.hadoop.hbase.metrics.PersistentMetricsTimeVaryingRate;
@@ -30,6 +30,7 @@ import org.apache.hadoop.metrics.Metrics
 import org.apache.hadoop.metrics.MetricsUtil;
 import org.apache.hadoop.metrics.Updater;
 import org.apache.hadoop.metrics.jvm.JvmMetrics;
+import org.apache.hadoop.metrics.util.MetricsIntValue;
 import org.apache.hadoop.metrics.util.MetricsLongValue;
 import org.apache.hadoop.metrics.util.MetricsRegistry;
 
@@ -50,19 +51,46 @@ public class MasterMetrics implements Up
   private long lastUpdate = System.currentTimeMillis();
   private long lastExtUpdate = System.currentTimeMillis();
   private long extendedPeriod = 0;
-/*
+  /*
    * Count of requests to the cluster since last call to metrics update
    */
   private final MetricsRate cluster_requests =
     new MetricsRate("cluster_requests", registry);
 
   /** Time it takes to finish HLog.splitLog() */
-  final PersistentMetricsTimeVaryingRate splitTime =
-    new PersistentMetricsTimeVaryingRate("splitTime", registry);
+  final MetricsLongValue  splitTime =
+    new MetricsLongValue("splitTime", registry);
+
+
+  /*  Number of active region servers. This number is updated
+   *  every time a regionserver joins or leaves.
+   */
+  public MetricsIntValue numRegionServers =
+	new MetricsIntValue("numRegionServers", registry);
+
+  /*  This is the number of dead region servers.
+   *  This is cumululative across all intervals from startup time.
+   */
+  public MetricsIntValue numRSExpired =
+	new MetricsIntValue("numRSExpired", registry);
+
+  /** Metrics to keep track of the number and size of logs split.
+   *  This is cumulative across all intervals from startup time.
+   */
+  public MetricsLongValue numLogsSplit =
+	  new MetricsLongValue("numLogsSplit", registry);
+
+  private MetricsLongValue sizeOfLogsSplit =
+	  new MetricsLongValue("sizeOfLogsSplit", registry);
+
+  /** Track the number of regions opened. Useful for identifying
+   *  open/close of regions due to load balancing.
+   *  This is a cumulative metric.
+   */
+  private MetricsIntValue numRegionsOpened =
+	  new MetricsIntValue("numRegionsOpened", registry);
 
-  /** Size of HLog files being split */
-  final PersistentMetricsTimeVaryingRate splitSize =
-    new PersistentMetricsTimeVaryingRate("splitSize", registry);
+  private ServerManager serverManager;
 
   public MasterMetrics(final String name) {
     MetricsContext context = MetricsUtil.getContext("hbase");
@@ -71,7 +99,6 @@ public class MasterMetrics implements Up
     context.registerUpdater(this);
     JvmMetrics.init("Master", name);
     HBaseInfo.init();
-
     // expose the MBean for metrics
     masterStatistics = new MasterStatistics(this.registry);
 
@@ -88,6 +115,10 @@ public class MasterMetrics implements Up
 
     LOG.info("Initialized");
   }
+  public MasterMetrics(final String name, ServerManager serverMgr) {
+	  this(name);
+	  serverManager = serverMgr;
+  }
 
   public void shutdown() {
     if (masterStatistics != null)
@@ -100,21 +131,24 @@ public class MasterMetrics implements Up
    * @param unused
    */
   public void doUpdates(MetricsContext unused) {
+
     synchronized (this) {
       this.lastUpdate = System.currentTimeMillis();
-
+      this.numRegionServers.set(this.serverManager.numServers());
       // has the extended period for long-living stats elapsed?
       if (this.extendedPeriod > 0 &&
           this.lastUpdate - this.lastExtUpdate >= this.extendedPeriod) {
         this.lastExtUpdate = this.lastUpdate;
-        this.splitTime.resetMinMaxAvg();
-        this.splitSize.resetMinMaxAvg();
         this.resetAllMinMax();
       }
 
       this.cluster_requests.pushMetric(metricsRecord);
       this.splitTime.pushMetric(metricsRecord);
-      this.splitSize.pushMetric(metricsRecord);
+      this.numRegionServers.pushMetric(metricsRecord);
+      this.numRSExpired.pushMetric(metricsRecord);
+      this.numLogsSplit.pushMetric(metricsRecord);
+      this.sizeOfLogsSplit.pushMetric(metricsRecord);
+      this.numRegionsOpened.pushMetric(metricsRecord);
     }
     this.metricsRecord.update();
   }
@@ -128,9 +162,10 @@ public class MasterMetrics implements Up
    * @param time time that the split took
    * @param size length of original HLogs that were split
    */
-  public synchronized void addSplit(long time, long size) {
-    splitTime.inc(time);
-    splitSize.inc(size);
+  public synchronized void addSplit(long time, long splitCount, long splitSize) {
+	  splitTime.set(splitTime.get() + time);
+	  numLogsSplit.set(numLogsSplit.get() + splitCount);
+	  sizeOfLogsSplit.set(sizeOfLogsSplit.get() + splitSize);
   }
 
   /**
@@ -146,4 +181,13 @@ public class MasterMetrics implements Up
   public void incrementRequests(final int inc) {
     this.cluster_requests.inc(inc);
   }
-}
\ No newline at end of file
+
+  public synchronized void incRegionsOpened() {
+	  numRegionsOpened.set(numRegionsOpened.get() + 1);
+  }
+
+  public synchronized void incRegionServerExpired() {
+	  numRSExpired.set(numRSExpired.get() + 1);
+  }
+
+}