You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hbase.apache.org by dd...@apache.org on 2014/03/07 00:34:44 UTC
svn commit: r1575097 - in /hbase/branches/hbase-10070/hbase-server/src:
main/java/org/apache/hadoop/hbase/master/
main/java/org/apache/hadoop/hbase/master/balancer/
test/java/org/apache/hadoop/hbase/
test/java/org/apache/hadoop/hbase/master/balancer/
Author: ddas
Date: Thu Mar 6 23:34:44 2014
New Revision: 1575097
URL: http://svn.apache.org/r1575097
Log:
HBASE-10620. LoadBalancer.needsBalance() should check for co-located region replicas as well
Modified:
hbase/branches/hbase-10070/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java
hbase/branches/hbase-10070/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/BaseLoadBalancer.java
hbase/branches/hbase-10070/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/SimpleLoadBalancer.java
hbase/branches/hbase-10070/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/StochasticLoadBalancer.java
hbase/branches/hbase-10070/hbase-server/src/test/java/org/apache/hadoop/hbase/TestRegionRebalancing.java
hbase/branches/hbase-10070/hbase-server/src/test/java/org/apache/hadoop/hbase/master/balancer/TestStochasticLoadBalancer.java
Modified: hbase/branches/hbase-10070/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java
URL: http://svn.apache.org/viewvc/hbase/branches/hbase-10070/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java?rev=1575097&r1=1575096&r2=1575097&view=diff
==============================================================================
--- hbase/branches/hbase-10070/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java (original)
+++ hbase/branches/hbase-10070/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java Thu Mar 6 23:34:44 2014
@@ -1479,7 +1479,7 @@ MasterServices, Server {
long cutoffTime = System.currentTimeMillis() + maximumBalanceTime;
int rpCount = 0; // number of RegionPlans balanced so far
long totalRegPlanExecTime = 0;
- balancerRan = plans != null;
+ balancerRan = plans.size() != 0;
if (plans != null && !plans.isEmpty()) {
for (RegionPlan plan: plans) {
LOG.info("balance " + plan);
Modified: hbase/branches/hbase-10070/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/BaseLoadBalancer.java
URL: http://svn.apache.org/viewvc/hbase/branches/hbase-10070/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/BaseLoadBalancer.java?rev=1575097&r1=1575096&r2=1575097&view=diff
==============================================================================
--- hbase/branches/hbase-10070/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/BaseLoadBalancer.java (original)
+++ hbase/branches/hbase-10070/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/BaseLoadBalancer.java Thu Mar 6 23:34:44 2014
@@ -131,6 +131,7 @@ public abstract class BaseLoadBalancer i
int numMovedRegions = 0; //num moved regions from the initial configuration
int numMovedMetaRegions = 0; //num of moved regions that are META
+ Map<ServerName, List<HRegionInfo>> clusterState;
protected final RackManager rackManager;
@@ -163,6 +164,7 @@ public abstract class BaseLoadBalancer i
List<List<Integer>> serversPerHostList = new ArrayList<List<Integer>>();
List<List<Integer>> serversPerRackList = new ArrayList<List<Integer>>();
+ this.clusterState = clusterState;
// Use servername and port as there can be dead servers in this list. We want everything with
// a matching hostname and port to have the same index.
@@ -821,7 +823,8 @@ public abstract class BaseLoadBalancer i
this.rackManager = rackManager;
}
- protected boolean needsBalance(ClusterLoadState cs) {
+ protected boolean needsBalance(Cluster c) {
+ ClusterLoadState cs = new ClusterLoadState(c.clusterState);
if (cs.getNumServers() < MIN_SERVER_BALANCE) {
if (LOG.isDebugEnabled()) {
LOG.debug("Not running balancer because only " + cs.getNumServers()
@@ -829,8 +832,7 @@ public abstract class BaseLoadBalancer i
}
return false;
}
- // TODO: check for co-located region replicas as well
-
+ if(areSomeRegionReplicasColocated(c)) return true;
// Check if we even need to do any load balancing
// HBASE-3681 check sloppiness first
float average = cs.getLoadAverage(); // for logging
@@ -852,6 +854,17 @@ public abstract class BaseLoadBalancer i
}
/**
+ * Subclasses should implement this to return true if the cluster has nodes that hosts
+ * multiple replicas for the same region, or, if there are multiple racks and the same
+ * rack hosts replicas of the same region
+ * @param c
+ * @return
+ */
+ protected boolean areSomeRegionReplicasColocated(Cluster c) {
+ return false;
+ }
+
+ /**
* Generates a bulk assignment plan to be used on cluster startup using a
* simple round-robin assignment.
* <p>
Modified: hbase/branches/hbase-10070/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/SimpleLoadBalancer.java
URL: http://svn.apache.org/viewvc/hbase/branches/hbase-10070/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/SimpleLoadBalancer.java?rev=1575097&r1=1575096&r2=1575097&view=diff
==============================================================================
--- hbase/branches/hbase-10070/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/SimpleLoadBalancer.java (original)
+++ hbase/branches/hbase-10070/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/SimpleLoadBalancer.java Thu Mar 6 23:34:44 2014
@@ -184,8 +184,10 @@ public class SimpleLoadBalancer extends
long startTime = System.currentTimeMillis();
ClusterLoadState cs = new ClusterLoadState(clusterMap);
-
- if (!this.needsBalance(cs)) return null;
+ // construct a Cluster object with clusterMap and rest of the
+ // argument as defaults
+ Cluster c = new Cluster(clusterMap, null, this.regionFinder, this.rackManager);
+ if (!this.needsBalance(c)) return null;
int numServers = cs.getNumServers();
NavigableMap<ServerAndLoad, List<HRegionInfo>> serversByLoad = cs.getServersByLoad();
Modified: hbase/branches/hbase-10070/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/StochasticLoadBalancer.java
URL: http://svn.apache.org/viewvc/hbase/branches/hbase-10070/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/StochasticLoadBalancer.java?rev=1575097&r1=1575096&r2=1575097&view=diff
==============================================================================
--- hbase/branches/hbase-10070/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/StochasticLoadBalancer.java (original)
+++ hbase/branches/hbase-10070/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/StochasticLoadBalancer.java Thu Mar 6 23:34:44 2014
@@ -123,6 +123,8 @@ public class StochasticLoadBalancer exte
// when new services are offered
private LocalityBasedCandidateGenerator localityCandidateGenerator;
private LocalityCostFunction localityCost;
+ private RegionReplicaHostCostFunction regionReplicaHostCostFunction;
+ private RegionReplicaRackCostFunction regionReplicaRackCostFunction;
@Override
public void setConf(Configuration conf) {
@@ -152,13 +154,16 @@ public class StochasticLoadBalancer exte
new StoreFileCostFunction(conf)
};
+ regionReplicaHostCostFunction = new RegionReplicaHostCostFunction(conf);
+ regionReplicaRackCostFunction = new RegionReplicaRackCostFunction(conf);
+
costFunctions = new CostFunction[]{
new RegionCountSkewCostFunction(conf),
new MoveCostFunction(conf),
localityCost,
new TableSkewCostFunction(conf),
- new RegionReplicaHostCostFunction(conf),
- new RegionReplicaRackCostFunction(conf),
+ regionReplicaHostCostFunction,
+ regionReplicaRackCostFunction,
regionLoadFunctions[0],
regionLoadFunctions[1],
regionLoadFunctions[2],
@@ -189,6 +194,15 @@ public class StochasticLoadBalancer exte
}
+ @Override
+ protected boolean areSomeRegionReplicasColocated(Cluster c) {
+ regionReplicaHostCostFunction.init(c);
+ if (regionReplicaHostCostFunction.cost() > 0) return true;
+ regionReplicaRackCostFunction.init(c);
+ if (regionReplicaRackCostFunction.cost() > 0) return true;
+ return false;
+ }
+
/**
* Given the cluster state this will try and approach an optimal balance. This
* should always approach the optimal state given enough steps.
@@ -197,14 +211,14 @@ public class StochasticLoadBalancer exte
public List<RegionPlan> balanceCluster(Map<ServerName, List<HRegionInfo>> clusterState) {
//The clusterState that is given to this method contains the state
//of all the regions in the table(s) (that's true today)
- if (!needsBalance(new ClusterLoadState(clusterState))) {
+ // Keep track of servers to iterate through them.
+ Cluster cluster = new Cluster(clusterState, loads, regionFinder, rackManager);
+ if (!needsBalance(cluster)) {
return null;
}
long startTime = EnvironmentEdgeManager.currentTimeMillis();
- // Keep track of servers to iterate through them.
- Cluster cluster = new Cluster(clusterState, loads, regionFinder, rackManager);
initCosts(cluster);
double currentCost = computeCost(cluster, Double.MAX_VALUE);
Modified: hbase/branches/hbase-10070/hbase-server/src/test/java/org/apache/hadoop/hbase/TestRegionRebalancing.java
URL: http://svn.apache.org/viewvc/hbase/branches/hbase-10070/hbase-server/src/test/java/org/apache/hadoop/hbase/TestRegionRebalancing.java?rev=1575097&r1=1575096&r2=1575097&view=diff
==============================================================================
--- hbase/branches/hbase-10070/hbase-server/src/test/java/org/apache/hadoop/hbase/TestRegionRebalancing.java (original)
+++ hbase/branches/hbase-10070/hbase-server/src/test/java/org/apache/hadoop/hbase/TestRegionRebalancing.java Thu Mar 6 23:34:44 2014
@@ -118,10 +118,14 @@ public class TestRegionRebalancing {
UTIL.getHBaseCluster().getMaster().balance();
assertRegionsAreBalanced();
+ // On a balanced cluster, calling balance() should return false
+ assert(UTIL.getHBaseCluster().getMaster().balance() == false);
+
+ // However if we add a server, then the balance() call should return true
// add a region server - total of 3
LOG.info("Started third server=" +
UTIL.getHBaseCluster().startRegionServer().getRegionServer().getServerName());
- UTIL.getHBaseCluster().getMaster().balance();
+ assert(UTIL.getHBaseCluster().getMaster().balance() == true);
assertRegionsAreBalanced();
// kill a region server - total of 2
@@ -135,14 +139,14 @@ public class TestRegionRebalancing {
UTIL.getHBaseCluster().startRegionServer().getRegionServer().getServerName());
LOG.info("Added fourth server=" +
UTIL.getHBaseCluster().startRegionServer().getRegionServer().getServerName());
- UTIL.getHBaseCluster().getMaster().balance();
+ assert(UTIL.getHBaseCluster().getMaster().balance() == true);
assertRegionsAreBalanced();
for (int i = 0; i < 6; i++){
LOG.info("Adding " + (i + 5) + "th region server");
UTIL.getHBaseCluster().startRegionServer();
}
- UTIL.getHBaseCluster().getMaster().balance();
+ assert(UTIL.getHBaseCluster().getMaster().balance() == true);
assertRegionsAreBalanced();
table.close();
}
Modified: hbase/branches/hbase-10070/hbase-server/src/test/java/org/apache/hadoop/hbase/master/balancer/TestStochasticLoadBalancer.java
URL: http://svn.apache.org/viewvc/hbase/branches/hbase-10070/hbase-server/src/test/java/org/apache/hadoop/hbase/master/balancer/TestStochasticLoadBalancer.java?rev=1575097&r1=1575096&r2=1575097&view=diff
==============================================================================
--- hbase/branches/hbase-10070/hbase-server/src/test/java/org/apache/hadoop/hbase/master/balancer/TestStochasticLoadBalancer.java (original)
+++ hbase/branches/hbase-10070/hbase-server/src/test/java/org/apache/hadoop/hbase/master/balancer/TestStochasticLoadBalancer.java Thu Mar 6 23:34:44 2014
@@ -26,6 +26,7 @@ import static org.mockito.Mockito.when;
import java.util.ArrayList;
import java.util.Arrays;
+import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
@@ -46,6 +47,7 @@ import org.apache.hadoop.hbase.ServerNam
import org.apache.hadoop.hbase.client.RegionReplicaUtil;
import org.apache.hadoop.hbase.master.RackManager;
import org.apache.hadoop.hbase.master.RegionPlan;
+import org.apache.hadoop.hbase.master.balancer.BaseLoadBalancer.Cluster;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.net.DNSToSwitchMapping;
import org.apache.hadoop.net.NetworkTopology;
@@ -382,6 +384,34 @@ public class TestStochasticLoadBalancer
assertTrue(costWith2ReplicasOnTwoServers < costWith3ReplicasSameServer);
}
+ @Test
+ public void testNeedsBalanceForColocatedReplicas() {
+ // check for the case where there are two hosts and with one rack, and where
+ // both the replicas are hosted on the same server
+ List<HRegionInfo> regions = randomRegions(1);
+ ServerName s1 = ServerName.valueOf("host1", 1000, 11111);
+ ServerName s2 = ServerName.valueOf("host11", 1000, 11111);
+ Map<ServerName, List<HRegionInfo>> map = new HashMap<ServerName, List<HRegionInfo>>();
+ map.put(s1, regions);
+ regions.add(RegionReplicaUtil.getRegionInfoForReplica(regions.get(0), 1));
+ // until the step above s1 holds two replicas of a region
+ regions = randomRegions(1);
+ map.put(s2, regions);
+ assertTrue(loadBalancer.needsBalance(new Cluster(map, null, null, null)));
+ // check for the case where there are two hosts on the same rack and there are two racks
+ // and both the replicas are on the same rack
+ map.clear();
+ regions = randomRegions(1);
+ List<HRegionInfo> regionsOnS2 = new ArrayList<HRegionInfo>(1);
+ regionsOnS2.add(RegionReplicaUtil.getRegionInfoForReplica(regions.get(0), 1));
+ map.put(s1, regions);
+ map.put(s2, regionsOnS2);
+ // add another server so that the cluster has some host on another rack
+ map.put(ServerName.valueOf("host2", 1000, 11111), randomRegions(1));
+ assertTrue(loadBalancer.needsBalance(new Cluster(map, null, null,
+ new ForTestRackManagerOne())));
+ }
+
@Test (timeout = 60000)
public void testSmallCluster() {
int numNodes = 10;
@@ -546,6 +576,13 @@ public class TestStochasticLoadBalancer
}
}
+ private static class ForTestRackManagerOne extends RackManager {
+ @Override
+ public String getRack(ServerName server) {
+ return server.getHostname().endsWith("1") ? "rack1" : "rack2";
+ }
+ }
+
@Test (timeout = 120000)
public void testRegionReplicationOnMidClusterWithRacks() {
conf.setLong(StochasticLoadBalancer.MAX_STEPS_KEY, 4000000L);