You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hbase.apache.org by ns...@apache.org on 2011/10/11 04:22:28 UTC
svn commit: r1181581 - in
/hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/master:
HMaster.java ProcessServerShutdown.java RegionManager.java
ServerManager.java metrics/MasterMetrics.java
Author: nspiegelberg
Date: Tue Oct 11 02:22:28 2011
New Revision: 1181581
URL: http://svn.apache.org/viewvc?rev=1181581&view=rev
Log:
expose more stats from HBase master node
Summary:
Added the following new stats to HMaster:
"hadoop.master_avgrequestcount":
"hadoop.master_maxrequestcount":
"hadoop.master_minrequestcount":
"hadoop.master_numlogssplit": 4,
"hadoop.master_numregionservers":
"hadoop.master_numregionsopened":
"hadoop.master_numrsexpired":
"hadoop.master_sizeoflogssplit":
=========
Sample output
=========
{
"hadoop.master_avgrequestcount": 384,
"hadoop.master_cluster_requests": 456.75,
"hadoop.master_maxrequestcount": 1134,
"hadoop.master_minrequestcount": 0,
"hadoop.master_numlogssplit": 4,
"hadoop.master_numregionservers": 2,
"hadoop.master_numregionsopened": 33,
"hadoop.master_numrsexpired": 2,
"hadoop.master_rpc_getclusterstatus_avg_time": 0,
"hadoop.master_rpc_getclusterstatus_num_ops": 2,
"hadoop.master_rpc_getprotocolversion_avg_time": 0,
"hadoop.master_rpc_getprotocolversion_num_ops": 11,
"hadoop.master_rpc_ismasterrunning_avg_time": 0,
"hadoop.master_rpc_ismasterrunning_num_ops": 3,
"hadoop.master_rpc_regionserverreport_avg_time": 0,
"hadoop.master_rpc_regionserverreport_num_ops": 4908,
"hadoop.master_rpc_regionserverstartup_avg_time": 0,
"hadoop.master_rpc_regionserverstartup_num_ops": 5,
"hadoop.master_rpc_rpcprocessingtime_avg_time": 0,
"hadoop.master_rpc_rpcprocessingtime_num_ops": 0,
"hadoop.master_rpc_rpcqueuetime_avg_time": 0,
"hadoop.master_rpc_rpcqueuetime_num_ops": 0,
"hadoop.master_sizeoflogssplit": 2335562724,
"hadoop.master_splitsizeavgtime": 56676160,
"hadoop.master_splitsizemaxtime": 510085452,
"hadoop.master_splitsizemintime": 0,
"hadoop.master_splitsizenumops": 0,
"hadoop.master_splittimeavgtime": 1934,
"hadoop.master_splittimemaxtime": 16394,
"hadoop.master_splittimemintime": 0,
"hadoop.master_splittimenumops": 0
}
Test Plan:
tested on devcluster
1. Tested by killing region servers to check the RS_Expired count.
2. Checked the number of logs that were split and the size by disabling the
delete operation after the split.
3. Tested that the number of regions opened updates correctly when a particular
regionserver is shut down.
Reviewed By: kannan
Reviewers: kannan, nspiegelberg
Commenters: nspiegelberg
CC: alasla, , kannan, nspiegelberg
Differential Revision: 268851
Modified:
hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/master/HMaster.java
hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/master/ProcessServerShutdown.java
hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/master/RegionManager.java
hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/master/ServerManager.java
hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/master/metrics/MasterMetrics.java
Modified: hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/master/HMaster.java
URL: http://svn.apache.org/viewvc/hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/master/HMaster.java?rev=1181581&r1=1181580&r2=1181581&view=diff
==============================================================================
--- hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/master/HMaster.java (original)
+++ hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/master/HMaster.java Tue Oct 11 02:22:28 2011
@@ -36,7 +36,8 @@ import java.util.concurrent.atomic.Atomi
import java.util.concurrent.atomic.AtomicReference;
import java.util.concurrent.locks.Lock;
import java.util.concurrent.locks.ReentrantLock;
-
+import org.apache.hadoop.fs.ContentSummary;
+import org.apache.hadoop.fs.FileStatus;
import org.apache.commons.cli.CommandLine;
import org.apache.commons.cli.GnuParser;
import org.apache.commons.cli.Options;
@@ -275,7 +276,7 @@ public class HMaster extends Thread impl
regionManager = new RegionManager(this);
setName(MASTER);
- this.metrics = new MasterMetrics(MASTER);
+ this.metrics = new MasterMetrics(MASTER, this.serverManager);
// We're almost open for business
this.closed.set(false);
LOG.info("HMaster w/ hbck initialized on " + this.address.toString());
@@ -716,7 +717,7 @@ public class HMaster extends Thread impl
if(this.serverManager.getServerInfo(serverName) == null) {
LOG.info("Log folder doesn't belong " +
"to a known region server, splitting");
- long splitTime = 0, splitSize = 0;
+ long splitTime = 0, splitSize = 0, splitCount = 0;
this.splitLogLock.lock();
try {
@@ -731,15 +732,17 @@ public class HMaster extends Thread impl
logDir = splitDir;
LOG.debug("Renamed region directory: " + splitDir);
}
+ ContentSummary contentSummary = fs.getContentSummary(logDir);
+ splitCount = contentSummary.getFileCount();
+ splitSize = contentSummary.getSpaceConsumed();
HLog.splitLog(this.rootdir, logDir, oldLogDir, this.fs, getConfiguration());
splitTime = HLog.lastSplitTime;
- splitSize = HLog.lastSplitSize;
+ this.metrics.addSplit(splitTime, splitCount, splitSize );
} catch (IOException e) {
LOG.error("Failed splitting " + logDir.toString(), e);
} finally {
this.splitLogLock.unlock();
}
- this.metrics.addSplit(splitTime, splitSize);
} else {
LOG.info("Log folder belongs to an existing region server");
}
Modified: hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/master/ProcessServerShutdown.java
URL: http://svn.apache.org/viewvc/hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/master/ProcessServerShutdown.java?rev=1181581&r1=1181580&r2=1181581&view=diff
==============================================================================
--- hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/master/ProcessServerShutdown.java (original)
+++ hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/master/ProcessServerShutdown.java Tue Oct 11 02:22:28 2011
@@ -19,6 +19,8 @@
*/
package org.apache.hadoop.hbase.master;
+import org.apache.hadoop.fs.ContentSummary;
+import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.HConstants;
@@ -292,7 +294,7 @@ class ProcessServerShutdown extends Regi
", onlineMetaRegions.size(): " +
master.getRegionManager().numOnlineMetaRegions());
if (!isSplitFinished) {
- long splitTime = 0, splitSize = 0;
+ long splitTime = 0, splitSize = 0, splitCount = 0;
FileSystem fs = this.master.getFileSystem();
// we rename during split, so check both names
Path rsSplitDir = new Path(rsLogDir.getParent(),
@@ -315,18 +317,19 @@ class ProcessServerShutdown extends Regi
}
LOG.debug("Renamed region directory: " + rsSplitDir);
}
-
+ ContentSummary contentSum = fs.getContentSummary(rsSplitDir);
+ splitCount = contentSum.getFileCount();
+ splitSize = contentSum.getSpaceConsumed();
// Process the old log files
HLog.splitLog(master.getRootDir(), rsSplitDir,
this.master.getOldLogDir(), this.master.getFileSystem(),
this.master.getConfiguration());
splitTime = HLog.lastSplitTime;
- splitSize = HLog.lastSplitSize;
+ this.master.getMetrics().addSplit(splitTime, splitCount, splitSize);
} finally {
master.splitLogLock.unlock();
}
- this.master.getMetrics().addSplit(splitTime, splitSize);
}
isSplitFinished = true;
}
Modified: hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/master/RegionManager.java
URL: http://svn.apache.org/viewvc/hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/master/RegionManager.java?rev=1181581&r1=1181580&r2=1181581&view=diff
==============================================================================
--- hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/master/RegionManager.java (original)
+++ hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/master/RegionManager.java Tue Oct 11 02:22:28 2011
@@ -1106,8 +1106,10 @@ public class RegionManager {
RegionState s = regionsInTransition.get(regionName);
if (s != null) {
s.setOpen();
+ this.master.getMetrics().incRegionsOpened();
}
}
+
}
/**
Modified: hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/master/ServerManager.java
URL: http://svn.apache.org/viewvc/hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/master/ServerManager.java?rev=1181581&r1=1181580&r2=1181581&view=diff
==============================================================================
--- hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/master/ServerManager.java (original)
+++ hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/master/ServerManager.java Tue Oct 11 02:22:28 2011
@@ -872,6 +872,7 @@ public class ServerManager {
this.deadServers.add(serverName);
this.master.getRegionServerOperationQueue().
put(new ProcessServerShutdown(master, info));
+ this.master.getMetrics().incRegionServerExpired();
}
/**
Modified: hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/master/metrics/MasterMetrics.java
URL: http://svn.apache.org/viewvc/hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/master/metrics/MasterMetrics.java?rev=1181581&r1=1181580&r2=1181581&view=diff
==============================================================================
--- hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/master/metrics/MasterMetrics.java (original)
+++ hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/master/metrics/MasterMetrics.java Tue Oct 11 02:22:28 2011
@@ -18,9 +18,9 @@
package org.apache.hadoop.hbase.master.metrics;
import java.io.IOException;
-
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.hbase.master.ServerManager;
import org.apache.hadoop.hbase.metrics.HBaseInfo;
import org.apache.hadoop.hbase.metrics.MetricsRate;
import org.apache.hadoop.hbase.metrics.PersistentMetricsTimeVaryingRate;
@@ -30,6 +30,7 @@ import org.apache.hadoop.metrics.Metrics
import org.apache.hadoop.metrics.MetricsUtil;
import org.apache.hadoop.metrics.Updater;
import org.apache.hadoop.metrics.jvm.JvmMetrics;
+import org.apache.hadoop.metrics.util.MetricsIntValue;
import org.apache.hadoop.metrics.util.MetricsLongValue;
import org.apache.hadoop.metrics.util.MetricsRegistry;
@@ -50,19 +51,46 @@ public class MasterMetrics implements Up
private long lastUpdate = System.currentTimeMillis();
private long lastExtUpdate = System.currentTimeMillis();
private long extendedPeriod = 0;
-/*
+ /*
* Count of requests to the cluster since last call to metrics update
*/
private final MetricsRate cluster_requests =
new MetricsRate("cluster_requests", registry);
/** Time it takes to finish HLog.splitLog() */
- final PersistentMetricsTimeVaryingRate splitTime =
- new PersistentMetricsTimeVaryingRate("splitTime", registry);
+ final MetricsLongValue splitTime =
+ new MetricsLongValue("splitTime", registry);
+
+
+ /* Number of active region servers. This number is updated
+ * every time a regionserver joins or leaves.
+ */
+ public MetricsIntValue numRegionServers =
+ new MetricsIntValue("numRegionServers", registry);
+
+ /* This is the number of dead region servers.
+ * This is cumululative across all intervals from startup time.
+ */
+ public MetricsIntValue numRSExpired =
+ new MetricsIntValue("numRSExpired", registry);
+
+ /** Metrics to keep track of the number and size of logs split.
+ * This is cumulative across all intervals from startup time.
+ */
+ public MetricsLongValue numLogsSplit =
+ new MetricsLongValue("numLogsSplit", registry);
+
+ private MetricsLongValue sizeOfLogsSplit =
+ new MetricsLongValue("sizeOfLogsSplit", registry);
+
+ /** Track the number of regions opened. Useful for identifying
+ * open/close of regions due to load balancing.
+ * This is a cumulative metric.
+ */
+ private MetricsIntValue numRegionsOpened =
+ new MetricsIntValue("numRegionsOpened", registry);
- /** Size of HLog files being split */
- final PersistentMetricsTimeVaryingRate splitSize =
- new PersistentMetricsTimeVaryingRate("splitSize", registry);
+ private ServerManager serverManager;
public MasterMetrics(final String name) {
MetricsContext context = MetricsUtil.getContext("hbase");
@@ -71,7 +99,6 @@ public class MasterMetrics implements Up
context.registerUpdater(this);
JvmMetrics.init("Master", name);
HBaseInfo.init();
-
// expose the MBean for metrics
masterStatistics = new MasterStatistics(this.registry);
@@ -88,6 +115,10 @@ public class MasterMetrics implements Up
LOG.info("Initialized");
}
+ public MasterMetrics(final String name, ServerManager serverMgr) {
+ this(name);
+ serverManager = serverMgr;
+ }
public void shutdown() {
if (masterStatistics != null)
@@ -100,21 +131,24 @@ public class MasterMetrics implements Up
* @param unused
*/
public void doUpdates(MetricsContext unused) {
+
synchronized (this) {
this.lastUpdate = System.currentTimeMillis();
-
+ this.numRegionServers.set(this.serverManager.numServers());
// has the extended period for long-living stats elapsed?
if (this.extendedPeriod > 0 &&
this.lastUpdate - this.lastExtUpdate >= this.extendedPeriod) {
this.lastExtUpdate = this.lastUpdate;
- this.splitTime.resetMinMaxAvg();
- this.splitSize.resetMinMaxAvg();
this.resetAllMinMax();
}
this.cluster_requests.pushMetric(metricsRecord);
this.splitTime.pushMetric(metricsRecord);
- this.splitSize.pushMetric(metricsRecord);
+ this.numRegionServers.pushMetric(metricsRecord);
+ this.numRSExpired.pushMetric(metricsRecord);
+ this.numLogsSplit.pushMetric(metricsRecord);
+ this.sizeOfLogsSplit.pushMetric(metricsRecord);
+ this.numRegionsOpened.pushMetric(metricsRecord);
}
this.metricsRecord.update();
}
@@ -128,9 +162,10 @@ public class MasterMetrics implements Up
* @param time time that the split took
* @param size length of original HLogs that were split
*/
- public synchronized void addSplit(long time, long size) {
- splitTime.inc(time);
- splitSize.inc(size);
+ public synchronized void addSplit(long time, long splitCount, long splitSize) {
+ splitTime.set(splitTime.get() + time);
+ numLogsSplit.set(numLogsSplit.get() + splitCount);
+ sizeOfLogsSplit.set(sizeOfLogsSplit.get() + splitSize);
}
/**
@@ -146,4 +181,13 @@ public class MasterMetrics implements Up
public void incrementRequests(final int inc) {
this.cluster_requests.inc(inc);
}
-}
\ No newline at end of file
+
+ public synchronized void incRegionsOpened() {
+ numRegionsOpened.set(numRegionsOpened.get() + 1);
+ }
+
+ public synchronized void incRegionServerExpired() {
+ numRSExpired.set(numRSExpired.get() + 1);
+ }
+
+}