You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hbase.apache.org by st...@apache.org on 2011/12/14 20:39:24 UTC

svn commit: r1214419 - in /hbase/trunk/src: main/java/org/apache/hadoop/hbase/master/ test/java/org/apache/hadoop/hbase/ test/java/org/apache/hadoop/hbase/master/ test/resources/

Author: stack
Date: Wed Dec 14 19:39:23 2011
New Revision: 1214419

URL: http://svn.apache.org/viewvc?rev=1214419&view=rev
Log:
HBASE-4993 Performance regression in minicluster creation

Modified:
    hbase/trunk/src/main/java/org/apache/hadoop/hbase/master/ServerManager.java
    hbase/trunk/src/test/java/org/apache/hadoop/hbase/HBaseTestingUtility.java
    hbase/trunk/src/test/java/org/apache/hadoop/hbase/master/TestMasterFailover.java
    hbase/trunk/src/test/resources/hbase-site.xml

Modified: hbase/trunk/src/main/java/org/apache/hadoop/hbase/master/ServerManager.java
URL: http://svn.apache.org/viewvc/hbase/trunk/src/main/java/org/apache/hadoop/hbase/master/ServerManager.java?rev=1214419&r1=1214418&r2=1214419&view=diff
==============================================================================
--- hbase/trunk/src/main/java/org/apache/hadoop/hbase/master/ServerManager.java (original)
+++ hbase/trunk/src/main/java/org/apache/hadoop/hbase/master/ServerManager.java Wed Dec 14 19:39:23 2011
@@ -516,48 +516,73 @@ public class ServerManager {
   }
 
   /**
-   * Waits for the regionservers to report in.
+   * Wait for the region servers to report in.
+   * We will wait until one of this condition is met:
+   *  - the master is stopped
+   *  - the 'hbase.master.wait.on.regionservers.timeout' is reached
+   *  - the 'hbase.master.wait.on.regionservers.maxtostart' number of
+   *    region servers is reached
+   *  - the 'hbase.master.wait.on.regionservers.mintostart' is reached AND
+   *   there have been no new region server in for
+   *      'hbase.master.wait.on.regionservers.interval' time
+   *
    * @throws InterruptedException
    */
   public void waitForRegionServers(MonitoredTask status)
   throws InterruptedException {
-    long interval = this.master.getConfiguration().
+    final long interval = this.master.getConfiguration().
       getLong("hbase.master.wait.on.regionservers.interval", 1500);
-    long timeout = this.master.getConfiguration().
+    final long timeout = this.master.getConfiguration().
     getLong("hbase.master.wait.on.regionservers.timeout", 4500);
-    int minToStart = this.master.getConfiguration().
+    final int minToStart = this.master.getConfiguration().
     getInt("hbase.master.wait.on.regionservers.mintostart", 1);
-    int maxToStart = this.master.getConfiguration().
-    getInt("hbase.master.wait.on.regionservers.maxtostart", Integer.MAX_VALUE);    
-    // So, number of regionservers > 0 and its been n since last check in, break,
-    // else just stall here
-    int count = 0;
-    long slept = 0;
-    for (int oldcount = countOfRegionServers(); !this.master.isStopped();) {
-      Thread.sleep(interval);
-      slept += interval;
-      count = countOfRegionServers();
+    final int maxToStart = this.master.getConfiguration().
+    getInt("hbase.master.wait.on.regionservers.maxtostart", Integer.MAX_VALUE);
 
-      String msg;
-      if (count == oldcount && count >= minToStart && slept >= timeout) {
-        LOG.info("Finished waiting for regionserver count to settle; " +
-            "count=" + count + ", sleptFor=" + slept);
-        break;
-      }
-      if (count >= maxToStart) {
-        LOG.info("At least the max configured number of regionserver(s) have " +
-            "checked in: " + count);
-        break;
+    long now =  System.currentTimeMillis();
+    final long startTime = now;
+    long slept = 0;
+    long lastLogTime = 0;
+    long lastCountChange = startTime;
+    int count = countOfRegionServers();
+    int oldCount = 0;
+    while (
+      !this.master.isStopped() &&
+        slept < timeout &&
+        count < maxToStart &&
+        !(lastCountChange+interval > now && count >= minToStart)
+      ){
+
+      // Log some info at every interval time or if there is a change
+      if (oldCount != count || lastLogTime+interval < now){
+        lastLogTime = now;
+        String msg =
+          "Waiting for region servers count to settle; currently"+
+            " checked in " + count + ", slept for " + slept + " ms," +
+            " expecting minimum of " + minToStart + ", maximum of "+ maxToStart+
+            ", timeout of "+timeout+" ms, interval of "+interval+" ms.";
+        LOG.info(msg);
+        status.setStatus(msg);
       }
-      if (count == 0) {
-        msg = "Waiting on regionserver(s) to checkin";
-      } else {
-        msg = "Waiting on regionserver(s) count to settle; currently=" + count;
+
+      // We sleep for some time
+      final long sleepTime = 50;
+      Thread.sleep(sleepTime);
+      now =  System.currentTimeMillis();
+      slept = now - startTime;
+
+      oldCount = count;
+      count = countOfRegionServers();
+      if (count != oldCount) {
+        lastCountChange = now;
       }
-      LOG.info(msg);
-      status.setStatus(msg);
-      oldcount = count;
     }
+
+    LOG.info("Finished waiting for region servers count to settle;" +
+      " checked in " + count + ", slept for " + slept + " ms," +
+      " expecting minimum of " + minToStart + ", maximum of "+ maxToStart+","+
+      " master is "+ (this.master.isStopped() ? "stopped.": "running.")
+    );
   }
 
   /**

Modified: hbase/trunk/src/test/java/org/apache/hadoop/hbase/HBaseTestingUtility.java
URL: http://svn.apache.org/viewvc/hbase/trunk/src/test/java/org/apache/hadoop/hbase/HBaseTestingUtility.java?rev=1214419&r1=1214418&r2=1214419&view=diff
==============================================================================
--- hbase/trunk/src/test/java/org/apache/hadoop/hbase/HBaseTestingUtility.java (original)
+++ hbase/trunk/src/test/java/org/apache/hadoop/hbase/HBaseTestingUtility.java Wed Dec 14 19:39:23 2011
@@ -561,6 +561,12 @@ public class HBaseTestingUtility {
   throws IOException, InterruptedException {
     // Now do the mini hbase cluster.  Set the hbase.rootdir in config.
     createRootDir();
+
+    // These settings will make the server waits until this exact number of
+    //  regions servers are connected.
+    conf.setInt("hbase.master.wait.on.regionservers.mintostart", numSlaves);
+    conf.setInt("hbase.master.wait.on.regionservers.maxtostart", numSlaves);
+
     Configuration c = new Configuration(this.conf);
     this.hbaseCluster = new MiniHBaseCluster(c, numMasters, numSlaves);
     // Don't leave here till we've done a successful scan of the .META.

Modified: hbase/trunk/src/test/java/org/apache/hadoop/hbase/master/TestMasterFailover.java
URL: http://svn.apache.org/viewvc/hbase/trunk/src/test/java/org/apache/hadoop/hbase/master/TestMasterFailover.java?rev=1214419&r1=1214418&r2=1214419&view=diff
==============================================================================
--- hbase/trunk/src/test/java/org/apache/hadoop/hbase/master/TestMasterFailover.java (original)
+++ hbase/trunk/src/test/java/org/apache/hadoop/hbase/master/TestMasterFailover.java Wed Dec 14 19:39:23 2011
@@ -71,8 +71,6 @@ public class TestMasterFailover {
 
     // Create config to use for this cluster
     Configuration conf = HBaseConfiguration.create();
-    conf.setInt("hbase.master.wait.on.regionservers.mintostart", 3);
-    conf.setInt("hbase.master.wait.on.regionservers.maxtostart", 3);
 
     // Start the cluster
     HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility(conf);

Modified: hbase/trunk/src/test/resources/hbase-site.xml
URL: http://svn.apache.org/viewvc/hbase/trunk/src/test/resources/hbase-site.xml?rev=1214419&r1=1214418&r2=1214419&view=diff
==============================================================================
--- hbase/trunk/src/test/resources/hbase-site.xml (original)
+++ hbase/trunk/src/test/resources/hbase-site.xml Wed Dec 14 19:39:23 2011
@@ -23,12 +23,6 @@
 -->
 <configuration>
   <property>
-    <name>hbase.master.wait.on.regionservers.interval</name>
-    <value>100</value>
-    <description>How long we wait on regionservers to check in
-    </description>
-  </property>
-  <property>
     <name>hbase.regionserver.msginterval</name>
     <value>1000</value>
     <description>Interval between messages from the RegionServer to HMaster