You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hbase.apache.org by dd...@apache.org on 2014/03/07 00:34:44 UTC

svn commit: r1575097 - in /hbase/branches/hbase-10070/hbase-server/src: main/java/org/apache/hadoop/hbase/master/ main/java/org/apache/hadoop/hbase/master/balancer/ test/java/org/apache/hadoop/hbase/ test/java/org/apache/hadoop/hbase/master/balancer/

Author: ddas
Date: Thu Mar  6 23:34:44 2014
New Revision: 1575097

URL: http://svn.apache.org/r1575097
Log:
HBASE-10620. LoadBalancer.needsBalance() should check for co-located region replicas as well

Modified:
    hbase/branches/hbase-10070/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java
    hbase/branches/hbase-10070/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/BaseLoadBalancer.java
    hbase/branches/hbase-10070/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/SimpleLoadBalancer.java
    hbase/branches/hbase-10070/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/StochasticLoadBalancer.java
    hbase/branches/hbase-10070/hbase-server/src/test/java/org/apache/hadoop/hbase/TestRegionRebalancing.java
    hbase/branches/hbase-10070/hbase-server/src/test/java/org/apache/hadoop/hbase/master/balancer/TestStochasticLoadBalancer.java

Modified: hbase/branches/hbase-10070/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java
URL: http://svn.apache.org/viewvc/hbase/branches/hbase-10070/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java?rev=1575097&r1=1575096&r2=1575097&view=diff
==============================================================================
--- hbase/branches/hbase-10070/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java (original)
+++ hbase/branches/hbase-10070/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java Thu Mar  6 23:34:44 2014
@@ -1479,7 +1479,7 @@ MasterServices, Server {
       long cutoffTime = System.currentTimeMillis() + maximumBalanceTime;
       int rpCount = 0;  // number of RegionPlans balanced so far
       long totalRegPlanExecTime = 0;
-      balancerRan = plans != null;
+      balancerRan = plans.size() != 0;
       if (plans != null && !plans.isEmpty()) {
         for (RegionPlan plan: plans) {
           LOG.info("balance " + plan);

Modified: hbase/branches/hbase-10070/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/BaseLoadBalancer.java
URL: http://svn.apache.org/viewvc/hbase/branches/hbase-10070/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/BaseLoadBalancer.java?rev=1575097&r1=1575096&r2=1575097&view=diff
==============================================================================
--- hbase/branches/hbase-10070/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/BaseLoadBalancer.java (original)
+++ hbase/branches/hbase-10070/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/BaseLoadBalancer.java Thu Mar  6 23:34:44 2014
@@ -131,6 +131,7 @@ public abstract class BaseLoadBalancer i
 
     int numMovedRegions = 0; //num moved regions from the initial configuration
     int numMovedMetaRegions = 0;       //num of moved regions that are META
+    Map<ServerName, List<HRegionInfo>> clusterState;
 
     protected final RackManager rackManager;
 
@@ -163,6 +164,7 @@ public abstract class BaseLoadBalancer i
 
       List<List<Integer>> serversPerHostList = new ArrayList<List<Integer>>();
       List<List<Integer>> serversPerRackList = new ArrayList<List<Integer>>();
+      this.clusterState = clusterState;
 
       // Use servername and port as there can be dead servers in this list. We want everything with
       // a matching hostname and port to have the same index.
@@ -821,7 +823,8 @@ public abstract class BaseLoadBalancer i
     this.rackManager = rackManager;
   }
 
-  protected boolean needsBalance(ClusterLoadState cs) {
+  protected boolean needsBalance(Cluster c) {
+    ClusterLoadState cs = new ClusterLoadState(c.clusterState);
     if (cs.getNumServers() < MIN_SERVER_BALANCE) {
       if (LOG.isDebugEnabled()) {
         LOG.debug("Not running balancer because only " + cs.getNumServers()
@@ -829,8 +832,7 @@ public abstract class BaseLoadBalancer i
       }
       return false;
     }
-    // TODO: check for co-located region replicas as well
-
+    if(areSomeRegionReplicasColocated(c)) return true;
     // Check if we even need to do any load balancing
     // HBASE-3681 check sloppiness first
     float average = cs.getLoadAverage(); // for logging
@@ -852,6 +854,17 @@ public abstract class BaseLoadBalancer i
   }
 
   /**
+   * Subclasses should implement this to return true if the cluster has nodes that hosts
+   * multiple replicas for the same region, or, if there are multiple racks and the same
+   * rack hosts replicas of the same region
+   * @param c
+   * @return
+   */
+  protected boolean areSomeRegionReplicasColocated(Cluster c) {
+    return false;
+  }
+
+  /**
    * Generates a bulk assignment plan to be used on cluster startup using a
    * simple round-robin assignment.
    * <p>

Modified: hbase/branches/hbase-10070/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/SimpleLoadBalancer.java
URL: http://svn.apache.org/viewvc/hbase/branches/hbase-10070/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/SimpleLoadBalancer.java?rev=1575097&r1=1575096&r2=1575097&view=diff
==============================================================================
--- hbase/branches/hbase-10070/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/SimpleLoadBalancer.java (original)
+++ hbase/branches/hbase-10070/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/SimpleLoadBalancer.java Thu Mar  6 23:34:44 2014
@@ -184,8 +184,10 @@ public class SimpleLoadBalancer extends 
     long startTime = System.currentTimeMillis();
 
     ClusterLoadState cs = new ClusterLoadState(clusterMap);
-
-    if (!this.needsBalance(cs)) return null;
+    // construct a Cluster object with clusterMap and rest of the 
+    // argument as defaults
+    Cluster c = new Cluster(clusterMap, null, this.regionFinder, this.rackManager);
+    if (!this.needsBalance(c)) return null;
     
     int numServers = cs.getNumServers();
     NavigableMap<ServerAndLoad, List<HRegionInfo>> serversByLoad = cs.getServersByLoad();

Modified: hbase/branches/hbase-10070/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/StochasticLoadBalancer.java
URL: http://svn.apache.org/viewvc/hbase/branches/hbase-10070/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/StochasticLoadBalancer.java?rev=1575097&r1=1575096&r2=1575097&view=diff
==============================================================================
--- hbase/branches/hbase-10070/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/StochasticLoadBalancer.java (original)
+++ hbase/branches/hbase-10070/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/StochasticLoadBalancer.java Thu Mar  6 23:34:44 2014
@@ -123,6 +123,8 @@ public class StochasticLoadBalancer exte
   // when new services are offered
   private LocalityBasedCandidateGenerator localityCandidateGenerator;
   private LocalityCostFunction localityCost;
+  private RegionReplicaHostCostFunction regionReplicaHostCostFunction;
+  private RegionReplicaRackCostFunction regionReplicaRackCostFunction;
 
   @Override
   public void setConf(Configuration conf) {
@@ -152,13 +154,16 @@ public class StochasticLoadBalancer exte
       new StoreFileCostFunction(conf)
     };
 
+    regionReplicaHostCostFunction = new RegionReplicaHostCostFunction(conf);
+    regionReplicaRackCostFunction = new RegionReplicaRackCostFunction(conf);
+
     costFunctions = new CostFunction[]{
       new RegionCountSkewCostFunction(conf),
       new MoveCostFunction(conf),
       localityCost,
       new TableSkewCostFunction(conf),
-      new RegionReplicaHostCostFunction(conf),
-      new RegionReplicaRackCostFunction(conf),
+      regionReplicaHostCostFunction,
+      regionReplicaRackCostFunction,
       regionLoadFunctions[0],
       regionLoadFunctions[1],
       regionLoadFunctions[2],
@@ -189,6 +194,15 @@ public class StochasticLoadBalancer exte
 
   }
 
+  @Override
+  protected boolean areSomeRegionReplicasColocated(Cluster c) {
+    regionReplicaHostCostFunction.init(c);
+    if (regionReplicaHostCostFunction.cost() > 0) return true;
+    regionReplicaRackCostFunction.init(c);
+    if (regionReplicaRackCostFunction.cost() > 0) return true;
+    return false;
+  }
+
   /**
    * Given the cluster state this will try and approach an optimal balance. This
    * should always approach the optimal state given enough steps.
@@ -197,14 +211,14 @@ public class StochasticLoadBalancer exte
   public List<RegionPlan> balanceCluster(Map<ServerName, List<HRegionInfo>> clusterState) {
     //The clusterState that is given to this method contains the state
     //of all the regions in the table(s) (that's true today)
-    if (!needsBalance(new ClusterLoadState(clusterState))) {
+    // Keep track of servers to iterate through them.
+    Cluster cluster = new Cluster(clusterState, loads, regionFinder, rackManager);
+    if (!needsBalance(cluster)) {
       return null;
     }
 
     long startTime = EnvironmentEdgeManager.currentTimeMillis();
 
-    // Keep track of servers to iterate through them.
-    Cluster cluster = new Cluster(clusterState, loads, regionFinder, rackManager);
     initCosts(cluster);
     double currentCost = computeCost(cluster, Double.MAX_VALUE);
 

Modified: hbase/branches/hbase-10070/hbase-server/src/test/java/org/apache/hadoop/hbase/TestRegionRebalancing.java
URL: http://svn.apache.org/viewvc/hbase/branches/hbase-10070/hbase-server/src/test/java/org/apache/hadoop/hbase/TestRegionRebalancing.java?rev=1575097&r1=1575096&r2=1575097&view=diff
==============================================================================
--- hbase/branches/hbase-10070/hbase-server/src/test/java/org/apache/hadoop/hbase/TestRegionRebalancing.java (original)
+++ hbase/branches/hbase-10070/hbase-server/src/test/java/org/apache/hadoop/hbase/TestRegionRebalancing.java Thu Mar  6 23:34:44 2014
@@ -118,10 +118,14 @@ public class TestRegionRebalancing {
     UTIL.getHBaseCluster().getMaster().balance();
     assertRegionsAreBalanced();
 
+    // On a balanced cluster, calling balance() should return false
+    assert(UTIL.getHBaseCluster().getMaster().balance() == false);
+
+    // However if we add a server, then the balance() call should return true 
     // add a region server - total of 3
     LOG.info("Started third server=" +
         UTIL.getHBaseCluster().startRegionServer().getRegionServer().getServerName());
-    UTIL.getHBaseCluster().getMaster().balance();
+    assert(UTIL.getHBaseCluster().getMaster().balance() == true);
     assertRegionsAreBalanced();
 
     // kill a region server - total of 2
@@ -135,14 +139,14 @@ public class TestRegionRebalancing {
         UTIL.getHBaseCluster().startRegionServer().getRegionServer().getServerName());
     LOG.info("Added fourth server=" +
         UTIL.getHBaseCluster().startRegionServer().getRegionServer().getServerName());
-    UTIL.getHBaseCluster().getMaster().balance();
+    assert(UTIL.getHBaseCluster().getMaster().balance() == true);
     assertRegionsAreBalanced();
 
     for (int i = 0; i < 6; i++){
       LOG.info("Adding " + (i + 5) + "th region server");
       UTIL.getHBaseCluster().startRegionServer();
     }
-    UTIL.getHBaseCluster().getMaster().balance();
+    assert(UTIL.getHBaseCluster().getMaster().balance() == true);
     assertRegionsAreBalanced();
     table.close();
   }

Modified: hbase/branches/hbase-10070/hbase-server/src/test/java/org/apache/hadoop/hbase/master/balancer/TestStochasticLoadBalancer.java
URL: http://svn.apache.org/viewvc/hbase/branches/hbase-10070/hbase-server/src/test/java/org/apache/hadoop/hbase/master/balancer/TestStochasticLoadBalancer.java?rev=1575097&r1=1575096&r2=1575097&view=diff
==============================================================================
--- hbase/branches/hbase-10070/hbase-server/src/test/java/org/apache/hadoop/hbase/master/balancer/TestStochasticLoadBalancer.java (original)
+++ hbase/branches/hbase-10070/hbase-server/src/test/java/org/apache/hadoop/hbase/master/balancer/TestStochasticLoadBalancer.java Thu Mar  6 23:34:44 2014
@@ -26,6 +26,7 @@ import static org.mockito.Mockito.when;
 
 import java.util.ArrayList;
 import java.util.Arrays;
+import java.util.HashMap;
 import java.util.Iterator;
 import java.util.List;
 import java.util.Map;
@@ -46,6 +47,7 @@ import org.apache.hadoop.hbase.ServerNam
 import org.apache.hadoop.hbase.client.RegionReplicaUtil;
 import org.apache.hadoop.hbase.master.RackManager;
 import org.apache.hadoop.hbase.master.RegionPlan;
+import org.apache.hadoop.hbase.master.balancer.BaseLoadBalancer.Cluster;
 import org.apache.hadoop.hbase.util.Bytes;
 import org.apache.hadoop.net.DNSToSwitchMapping;
 import org.apache.hadoop.net.NetworkTopology;
@@ -382,6 +384,34 @@ public class TestStochasticLoadBalancer 
     assertTrue(costWith2ReplicasOnTwoServers < costWith3ReplicasSameServer);
   }
 
+  @Test
+  public void testNeedsBalanceForColocatedReplicas() {
+    // check for the case where there are two hosts and with one rack, and where
+    // both the replicas are hosted on the same server
+    List<HRegionInfo> regions = randomRegions(1);
+    ServerName s1 = ServerName.valueOf("host1", 1000, 11111);
+    ServerName s2 = ServerName.valueOf("host11", 1000, 11111);
+    Map<ServerName, List<HRegionInfo>> map = new HashMap<ServerName, List<HRegionInfo>>();
+    map.put(s1, regions);
+    regions.add(RegionReplicaUtil.getRegionInfoForReplica(regions.get(0), 1));
+    // until the step above s1 holds two replicas of a region
+    regions = randomRegions(1);
+    map.put(s2, regions);
+    assertTrue(loadBalancer.needsBalance(new Cluster(map, null, null, null)));
+    // check for the case where there are two hosts on the same rack and there are two racks
+    // and both the replicas are on the same rack
+    map.clear();
+    regions = randomRegions(1);
+    List<HRegionInfo> regionsOnS2 = new ArrayList<HRegionInfo>(1);
+    regionsOnS2.add(RegionReplicaUtil.getRegionInfoForReplica(regions.get(0), 1));
+    map.put(s1, regions);
+    map.put(s2, regionsOnS2);
+    // add another server so that the cluster has some host on another rack
+    map.put(ServerName.valueOf("host2", 1000, 11111), randomRegions(1));
+    assertTrue(loadBalancer.needsBalance(new Cluster(map, null, null,
+        new ForTestRackManagerOne())));
+  }
+
   @Test (timeout = 60000)
   public void testSmallCluster() {
     int numNodes = 10;
@@ -546,6 +576,13 @@ public class TestStochasticLoadBalancer 
     }
   }
 
+  private static class ForTestRackManagerOne extends RackManager {
+  @Override
+    public String getRack(ServerName server) {
+      return server.getHostname().endsWith("1") ? "rack1" : "rack2";
+    }
+  }
+
   @Test (timeout = 120000)
   public void testRegionReplicationOnMidClusterWithRacks() {
     conf.setLong(StochasticLoadBalancer.MAX_STEPS_KEY, 4000000L);