You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hbase.apache.org by ns...@apache.org on 2011/10/11 04:14:27 UTC

svn commit: r1181504 - in /hbase/branches/0.89/src: main/java/org/apache/hadoop/hbase/master/RegionManager.java test/java/org/apache/hadoop/hbase/TestRegionRebalancing.java

Author: nspiegelberg
Date: Tue Oct 11 02:14:26 2011
New Revision: 1181504

URL: http://svn.apache.org/viewvc?rev=1181504&view=rev
Log:
Tweak Load Balancer for Single Server Restart Case Summary: Task 505791; HBase-3663 To avoid starvation problem for new servers Trac Bug: #

Blame Rev:

Reviewed By:
nspiegelberg, kannan
Test Plan:
Passing unit test
Revert Plan:

Database Impact:

Memcache Impact:

Other Notes:

EImportant:

- begin *PUBLIC* platform impact section -
Bugzilla: #
- end platform impact -

Modified:
    hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/master/RegionManager.java
    hbase/branches/0.89/src/test/java/org/apache/hadoop/hbase/TestRegionRebalancing.java

Modified: hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/master/RegionManager.java
URL: http://svn.apache.org/viewvc/hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/master/RegionManager.java?rev=1181504&r1=1181503&r2=1181504&view=diff
==============================================================================
--- hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/master/RegionManager.java (original)
+++ hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/master/RegionManager.java Tue Oct 11 02:14:26 2011
@@ -1381,7 +1381,7 @@ public class RegionManager {
       double avg = master.getAverageLoad();
 
       // nothing to balance if server load not more then average load
-      if(servLoad.getLoad() <= Math.ceil(avg) || avg <= 2.0) {
+      if(servLoad.getLoad() <= Math.floor(avg) || avg <= 2.0) {
         return;
       }
 
@@ -1447,12 +1447,12 @@ public class RegionManager {
         return 0; // there is no low loaded servers
 
       int lowSrvCount = loadToServers.get(loadToServers.firstKey()).size();
-      int numRegionsToClose = 0;
-
       int numSrvRegs = srvLoad.getNumberOfRegions();
       int numMoveToLowLoaded = (avgLoadMinusSlop - lowestLoad) * lowSrvCount;
-      numRegionsToClose = numSrvRegs - (int)Math.ceil(avgLoad);
+
+      int numRegionsToClose = numSrvRegs - (int)Math.floor(avgLoad);
       numRegionsToClose = Math.min(numRegionsToClose, numMoveToLowLoaded);
+
       if (LOG.isDebugEnabled()) {
         LOG.debug("Server(s) are carrying only " + lowestLoad + " regions. " +
           "Server " + srvName + " is most loaded (" + numSrvRegs +

Modified: hbase/branches/0.89/src/test/java/org/apache/hadoop/hbase/TestRegionRebalancing.java
URL: http://svn.apache.org/viewvc/hbase/branches/0.89/src/test/java/org/apache/hadoop/hbase/TestRegionRebalancing.java?rev=1181504&r1=1181503&r2=1181504&view=diff
==============================================================================
--- hbase/branches/0.89/src/test/java/org/apache/hadoop/hbase/TestRegionRebalancing.java (original)
+++ hbase/branches/0.89/src/test/java/org/apache/hadoop/hbase/TestRegionRebalancing.java Tue Oct 11 02:14:26 2011
@@ -3,7 +3,7 @@
  *
  * Licensed to the Apache Software Foundation (ASF) under one
  * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
+ * distributed with this work for additional infomation
  * regarding copyright ownership.  The ASF licenses this file
  * to you under the Apache License, Version 2.0 (the
  * "License"); you may not use this file except in compliance
@@ -36,9 +36,12 @@ import org.apache.hadoop.hbase.util.JVMC
 
 /**
  * Test whether region rebalancing works. (HBASE-71)
+ * Test HBASE-3663 whether region rebalancing works after a new server booted
+ * especially when no server has more regions than the ceils of avg load
  */
 public class TestRegionRebalancing extends HBaseClusterTestCase {
   final Log LOG = LogFactory.getLog(this.getClass().getName());
+
   HTable table;
 
   HTableDescriptor desc;
@@ -67,14 +70,14 @@ public class TestRegionRebalancing exten
     // create a 20-region table by writing directly to disk
     List<byte []> startKeys = new ArrayList<byte []>();
     startKeys.add(null);
-    for (int i = 10; i < 29; i++) {
+    for (int i = 10; i < 70; i++) {
       startKeys.add(Bytes.toBytes("row_" + i));
     }
     startKeys.add(null);
-    LOG.info(startKeys.size() + " start keys generated");
+    LOG.debug(startKeys.size() + " start keys generated");
 
     List<HRegion> regions = new ArrayList<HRegion>();
-    for (int i = 0; i < 20; i++) {
+    for (int i = 0; i < 60; i++) {
       regions.add(createAregion(startKeys.get(i), startKeys.get(i+1)));
     }
 
@@ -89,46 +92,37 @@ public class TestRegionRebalancing exten
   }
 
   /**
-   * For HBASE-71. Try a few different configurations of starting and stopping
-   * region servers to see if the assignment or regions is pretty balanced.
-   * @throws IOException
+   * In this case, create 16 servers here, there will be 17 servers and 62 regions totally.
+   * When one of the server shuts down, the avg load is 3.875.
+   * When this server comes back, the avg load will be 3.64
+   * Set the slot number near 0, so no server's load will large than 4.
+   * The load balance algorithm should handle this case properly.
    */
   public void testRebalancing() throws IOException {
-    table = new HTable(conf, "test");
-    assertEquals("Test table should have 20 regions",
-      20, table.getStartKeys().length);
-
-    // verify that the region assignments are balanced to start out
-    assertRegionsAreBalanced();
-
-    LOG.debug("Adding 2nd region server.");
-    // add a region server - total of 2
-    cluster.startRegionServer();
-    assertRegionsAreBalanced();
 
-    // add a region server - total of 3
-    LOG.debug("Adding 3rd region server.");
-    cluster.startRegionServer();
-    assertRegionsAreBalanced();
+    for (int i = 1; i <= 16; i++){
+      LOG.debug("Adding region server #"+i);
+      cluster.startRegionServer();
+      checkingServerStatus();
+    }
 
-    // kill a region server - total of 2
-    LOG.debug("Killing the 3rd region server.");
+    LOG.debug("Restart: killing 1 region server.");
     cluster.stopRegionServer(2, false);
     cluster.waitOnRegionServer(2);
     assertRegionsAreBalanced();
 
-    // start two more region servers - total of 4
-    LOG.debug("Adding 3rd region server");
-    cluster.startRegionServer();
-    LOG.debug("Adding 4th region server");
+    LOG.debug("Restart: adding that region server back");
     cluster.startRegionServer();
     assertRegionsAreBalanced();
+  }
 
-    for (int i = 0; i < 6; i++){
-      LOG.debug("Adding " + (i + 5) + "th region server");
-      cluster.startRegionServer();
+  private void checkingServerStatus() {
+    List<HRegionServer> servers = getOnlineRegionServers();
+    double avg = cluster.getMaster().getAverageLoad();
+    for (HRegionServer server : servers) {
+      int serverLoad = server.getOnlineRegions().size();
+      LOG.debug(server.hashCode() + " Avg: " + avg + " actual: " + serverLoad);
     }
-    assertRegionsAreBalanced();
   }
 
   /** figure out how many regions are currently being served. */
@@ -160,6 +154,7 @@ public class TestRegionRebalancing exten
       double avg = cluster.getMaster().getAverageLoad();
       int avgLoadPlusSlop = (int)Math.ceil(avg * (1 + slop));
       int avgLoadMinusSlop = (int)Math.floor(avg * (1 - slop)) - 1;
+
       LOG.debug("There are " + servers.size() + " servers and " + regionCount
         + " regions. Load Average: " + avg + " low border: " + avgLoadMinusSlop
         + ", up border: " + avgLoadPlusSlop + "; attempt: " + i);
@@ -207,9 +202,8 @@ public class TestRegionRebalancing exten
    * Wait until all the regions are assigned.
    */
   private void waitForAllRegionsAssigned() {
-    while (getRegionCount() < 22) {
-    // while (!cluster.getMaster().allRegionsAssigned()) {
-      LOG.debug("Waiting for there to be 22 regions, but there are " + getRegionCount() + " right now.");
+    while (getRegionCount() < 62) {
+      LOG.debug("Waiting for there to be 62 regions, but there are " + getRegionCount() + " right now.");
       try {
         Thread.sleep(1000);
       } catch (InterruptedException e) {}
@@ -223,7 +217,7 @@ public class TestRegionRebalancing exten
   private HRegion createAregion(byte [] startKey, byte [] endKey)
   throws IOException {
     HRegion region = createNewHRegion(desc, startKey, endKey);
-    byte [] keyToWrite = startKey == null ? Bytes.toBytes("row_000") : startKey;
+    byte [] keyToWrite = startKey == null ? Bytes.toBytes("row_0000") : startKey;
     Put put = new Put(keyToWrite);
     put.add(FAMILY_NAME, null, Bytes.toBytes("test"));
     region.put(put);
@@ -231,4 +225,4 @@ public class TestRegionRebalancing exten
     region.getLog().closeAndDelete();
     return region;
   }
-}
\ No newline at end of file
+}