You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hbase.apache.org by ap...@apache.org on 2009/05/21 07:10:21 UTC
svn commit: r776952 - in /hadoop/hbase/trunk_on_hadoop-0.18.3: ./
src/java/org/apache/hadoop/hbase/master/
src/java/org/apache/hadoop/hbase/regionserver/
src/test/org/apache/hadoop/hbase/
Author: apurtell
Date: Thu May 21 05:10:21 2009
New Revision: 776952
URL: http://svn.apache.org/viewvc?rev=776952&view=rev
Log:
HBASE-1162, HBASE-1010, HBASE-1415, HBASE-1017
Modified:
hadoop/hbase/trunk_on_hadoop-0.18.3/CHANGES.txt
hadoop/hbase/trunk_on_hadoop-0.18.3/src/java/org/apache/hadoop/hbase/master/RegionManager.java
hadoop/hbase/trunk_on_hadoop-0.18.3/src/java/org/apache/hadoop/hbase/master/ServerManager.java
hadoop/hbase/trunk_on_hadoop-0.18.3/src/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java
hadoop/hbase/trunk_on_hadoop-0.18.3/src/java/org/apache/hadoop/hbase/regionserver/MemcacheFlusher.java
hadoop/hbase/trunk_on_hadoop-0.18.3/src/test/org/apache/hadoop/hbase/TestRegionRebalancing.java
Modified: hadoop/hbase/trunk_on_hadoop-0.18.3/CHANGES.txt
URL: http://svn.apache.org/viewvc/hadoop/hbase/trunk_on_hadoop-0.18.3/CHANGES.txt?rev=776952&r1=776951&r2=776952&view=diff
==============================================================================
--- hadoop/hbase/trunk_on_hadoop-0.18.3/CHANGES.txt (original)
+++ hadoop/hbase/trunk_on_hadoop-0.18.3/CHANGES.txt Thu May 21 05:10:21 2009
@@ -144,6 +144,9 @@
HBASE-1440 master won't go down because joined on a rootscanner that is
waiting for ever
HBASE-1441 NPE in ProcessRegionStatusChange#getMetaRegion
+ HBASE-1162 CME in Master in RegionManager.applyActions
+ HBASE-1010 IOE on regionserver shutdown because hadn't opened an HLog
+ HBASE-1415 Stuck on memcache flush
IMPROVEMENTS
HBASE-1089 Add count of regions on filesystem to master UI; add percentage
@@ -272,6 +275,8 @@
HBASE-1420 add abliity to add and remove (table) indexes on existing
tables (Clint Morgan via Stack)
HBASE-1430 Read the logs in batches during log splitting to avoid OOME
+ HBASE-1017 Region balancing does not bring newly added node within
+ acceptable range (Evgeny Ryabitskiy via Stack)
OPTIMIZATIONS
HBASE-1412 Change values for delete column and column family in KeyValue
Modified: hadoop/hbase/trunk_on_hadoop-0.18.3/src/java/org/apache/hadoop/hbase/master/RegionManager.java
URL: http://svn.apache.org/viewvc/hadoop/hbase/trunk_on_hadoop-0.18.3/src/java/org/apache/hadoop/hbase/master/RegionManager.java?rev=776952&r1=776951&r2=776952&view=diff
==============================================================================
--- hadoop/hbase/trunk_on_hadoop-0.18.3/src/java/org/apache/hadoop/hbase/master/RegionManager.java (original)
+++ hadoop/hbase/trunk_on_hadoop-0.18.3/src/java/org/apache/hadoop/hbase/master/RegionManager.java Thu May 21 05:10:21 2009
@@ -102,9 +102,9 @@
// How many regions to assign a server at a time.
private final int maxAssignInOneGo;
- private final HMaster master;
+ final HMaster master;
private final RegionHistorian historian;
- private final float slop;
+ private final LoadBalancer loadBalancer;
/** Set of regions to split. */
private final SortedMap<byte[], Pair<HRegionInfo,HServerAddress>>
@@ -137,7 +137,7 @@
this.master = master;
this.historian = RegionHistorian.getInstance();
this.maxAssignInOneGo = conf.getInt("hbase.regions.percheckin", 10);
- this.slop = conf.getFloat("hbase.regions.slop", (float)0.1);
+ this.loadBalancer = new LoadBalancer(conf);
// The root region
rootScannerThread = new RootScanner(master);
@@ -199,20 +199,7 @@
if (!inSafeMode()) {
// We only do load balancing once all regions are assigned.
// This prevents churn while the cluster is starting up.
- double avgLoad = master.serverManager.getAverageLoad();
- double avgLoadWithSlop = avgLoad +
- ((this.slop != 0)? avgLoad * this.slop: avgLoad);
- if (avgLoad > 2.0 &&
- thisServersLoad.getNumberOfRegions() > avgLoadWithSlop) {
- if (LOG.isDebugEnabled()) {
- LOG.debug("Server " + info.getServerName() +
- " is overloaded. Server load: " +
- thisServersLoad.getNumberOfRegions() + " avg: " + avgLoad +
- ", slop: " + this.slop);
- }
- unassignSomeRegions(info, thisServersLoad,
- avgLoad, mostLoadedRegions, returnMsgs);
- }
+ loadBalancer.loadBalancing(info, mostLoadedRegions, returnMsgs);
}
} else {
// if there's only one server, just give it all the regions
@@ -432,10 +419,9 @@
* Note that no synchronization is needed because the only caller
* (assignRegions) whose caller owns the monitor for RegionManager
*/
- private void unassignSomeRegions(final HServerInfo info,
- final HServerLoad load, final double avgLoad,
- final HRegionInfo[] mostLoadedRegions, ArrayList<HMsg> returnMsgs) {
- int numRegionsToClose = load.getNumberOfRegions() - (int)Math.ceil(avgLoad);
+ void unassignSomeRegions(final HServerInfo info,
+ int numRegionsToClose, final HRegionInfo[] mostLoadedRegions,
+ ArrayList<HMsg> returnMsgs) {
LOG.debug("Choosing to reassign " + numRegionsToClose
+ " regions. mostLoadedRegions has " + mostLoadedRegions.length
+ " regions in it.");
@@ -1136,11 +1122,11 @@
private void applyActions(final HServerInfo serverInfo,
final ArrayList<HMsg> returnMsgs,
- SortedMap<byte[], Pair<HRegionInfo,HServerAddress>> map,
+ final SortedMap<byte[], Pair<HRegionInfo,HServerAddress>> map,
final HMsg.Type msg) {
HServerAddress addr = serverInfo.getServerAddress();
- Iterator<Pair<HRegionInfo, HServerAddress>> i = map.values().iterator();
synchronized (map) {
+ Iterator<Pair<HRegionInfo, HServerAddress>> i = map.values().iterator();
while (i.hasNext()) {
Pair<HRegionInfo,HServerAddress> pair = i.next();
if (addr.equals(pair.getSecond())) {
@@ -1154,6 +1140,115 @@
}
}
+ /**
+ * Class to balance region servers load.
+ * It keeps Region Servers load in slop range by unassigning Regions
+ * from most loaded servers.
+ *
+ * Equilibrium is reached when load of all serves are in slop range
+ * [avgLoadMinusSlop, avgLoadPlusSlop], where
+ * avgLoadPlusSlop = Math.ceil(avgLoad * (1 + this.slop)), and
+ * avgLoadMinusSlop = Math.floor(avgLoad * (1 - this.slop)) - 1.
+ */
+ private class LoadBalancer {
+ private float slop; // hbase.regions.slop
+ private final int maxRegToClose; // hbase.regions.close.max
+
+ LoadBalancer(HBaseConfiguration conf) {
+ this.slop = conf.getFloat("hbase.regions.slop", (float)0.1);
+ if (this.slop <= 0) this.slop = 1;
+ //maxRegToClose to constrain balance closing per one iteration
+ // -1 to turn off
+ // TODO: change default in HBASE-862, need a suggestion
+ this.maxRegToClose = conf.getInt("hbase.regions.close.max", -1);
+ }
+
+ /**
+ * Balance server load by unassigning some regions.
+ *
+ * @param info - server info
+ * @param mostLoadedRegions - array of most loaded regions
+ * @param returnMsgs - array of return massages
+ */
+ void loadBalancing(HServerInfo info, HRegionInfo[] mostLoadedRegions,
+ ArrayList<HMsg> returnMsgs) {
+ HServerLoad servLoad = info.getLoad();
+ double avg = master.serverManager.getAverageLoad();
+
+ // nothing to balance if server load not more then average load
+ if (servLoad.getLoad() <= Math.ceil(avg) || avg <= 2.0) return;
+
+ // check if server is overloaded
+ int numRegionsToClose = balanceFromOverloaded(servLoad, avg);
+
+ // check if we can unload server by low loaded servers
+ if (numRegionsToClose <= 0)
+ balanceToLowloaded(info.getServerName(), servLoad, avg);
+
+ if (maxRegToClose > 0)
+ numRegionsToClose = Math.min(numRegionsToClose, maxRegToClose);
+
+ if (numRegionsToClose > 0){
+ unassignSomeRegions(info, numRegionsToClose, mostLoadedRegions,
+ returnMsgs);
+ }
+ }
+
+ /*
+ * Check if server load is not overloaded (with load > avgLoadPlusSlop).
+ * @return number of regions to unassign.
+ */
+ private int balanceFromOverloaded(HServerLoad srvLoad, double avgLoad) {
+ int avgLoadPlusSlop = (int)Math.ceil(avgLoad * (1 + this.slop));
+ int numSrvRegs = srvLoad.getNumberOfRegions();
+ if (numSrvRegs > avgLoadPlusSlop) {
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("Server is overloaded. Server load: " + numSrvRegs +
+ " avg: " + avgLoad + ", slop: " + this.slop);
+ }
+ return numSrvRegs - (int)Math.ceil(avgLoad);
+ }
+ return 0;
+ }
+
+ /*
+ * Check if server is most loaded and can be unloaded to
+ * low loaded servers (with load < avgLoadMinusSlop).
+ * @return number of regions to unassign.
+ */
+ private int balanceToLowloaded(String srvName, HServerLoad srvLoad,
+ double avgLoad) {
+
+ SortedMap<HServerLoad, Set<String>> loadToServers =
+ master.serverManager.getLoadToServers();
+ // check if server most loaded
+ if (!loadToServers.get(loadToServers.lastKey()).contains(srvName))
+ return 0;
+
+ // this server is most loaded, we will try to unload it by lowest
+ // loaded servers
+ int avgLoadMinusSlop = (int)Math.floor(avgLoad * (1 - this.slop)) - 1;
+ int lowestLoad = loadToServers.firstKey().getNumberOfRegions();
+
+ if(lowestLoad >= avgLoadMinusSlop)
+ return 0; // there is no low loaded servers
+
+ int lowSrvCount = loadToServers.get(loadToServers.firstKey()).size();
+ int numRegionsToClose = 0;
+
+ int numSrvRegs = srvLoad.getNumberOfRegions();
+ int numMoveToLowLoaded = (avgLoadMinusSlop - lowestLoad) * lowSrvCount;
+ numRegionsToClose = numSrvRegs - (int)Math.ceil(avgLoad);
+ numRegionsToClose = Math.min(numRegionsToClose, numMoveToLowLoaded);
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("Server " + srvName + " will be unloaded for " +
+ "balance. Server load: " + numSrvRegs + " avg: " +
+ avgLoad + ", regions can be moved: " + numMoveToLowLoaded);
+ }
+ return numRegionsToClose;
+ }
+ }
+
/*
* State of a Region as it transitions from closed to open, etc. See
* note on regionsInTransition data member above for listing of state
Modified: hadoop/hbase/trunk_on_hadoop-0.18.3/src/java/org/apache/hadoop/hbase/master/ServerManager.java
URL: http://svn.apache.org/viewvc/hadoop/hbase/trunk_on_hadoop-0.18.3/src/java/org/apache/hadoop/hbase/master/ServerManager.java?rev=776952&r1=776951&r2=776952&view=diff
==============================================================================
--- hadoop/hbase/trunk_on_hadoop-0.18.3/src/java/org/apache/hadoop/hbase/master/ServerManager.java (original)
+++ hadoop/hbase/trunk_on_hadoop-0.18.3/src/java/org/apache/hadoop/hbase/master/ServerManager.java Thu May 21 05:10:21 2009
@@ -174,7 +174,10 @@
Set<String> servers = loadToServers.get(load);
if (servers != null) {
servers.remove(serverName);
- loadToServers.put(load, servers);
+ if (servers.size() > 0)
+ loadToServers.put(load, servers);
+ else
+ loadToServers.remove(load);
}
}
}
@@ -374,7 +377,10 @@
// Note that servers should never be null because loadToServers
// and serversToLoad are manipulated in pairs
servers.remove(serverInfo.getServerName());
- loadToServers.put(load, servers);
+ if (servers.size() > 0)
+ loadToServers.put(load, servers);
+ else
+ loadToServers.remove(load);
}
}
}
@@ -641,7 +647,10 @@
Set<String> servers = loadToServers.get(load);
if (servers != null) {
servers.remove(serverName);
- loadToServers.put(load, servers);
+ if(servers.size() > 0)
+ loadToServers.put(load, servers);
+ else
+ loadToServers.remove(load);
}
}
}
@@ -664,7 +673,7 @@
for (HServerLoad load : serversToLoad.values()) {
totalLoad += load.getNumberOfRegions();
}
- averageLoad = Math.ceil((double)totalLoad / (double)numServers);
+ averageLoad = (double)totalLoad / (double)numServers;
}
return averageLoad;
}
@@ -708,6 +717,15 @@
}
/**
+ * @return Read-only map of load to servers.
+ */
+ SortedMap<HServerLoad, Set<String>> getLoadToServers() {
+ synchronized (loadToServers) {
+ return Collections.unmodifiableSortedMap(loadToServers);
+ }
+ }
+
+ /**
* Wakes up threads waiting on serversToServerInfo
*/
public void notifyServers() {
@@ -775,7 +793,10 @@
Set<String> servers = loadToServers.get(load);
if (servers != null) {
servers.remove(serverName);
- loadToServers.put(load, servers);
+ if(servers.size() > 0)
+ loadToServers.put(load, servers);
+ else
+ loadToServers.remove(load);
}
}
}
Modified: hadoop/hbase/trunk_on_hadoop-0.18.3/src/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java
URL: http://svn.apache.org/viewvc/hadoop/hbase/trunk_on_hadoop-0.18.3/src/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java?rev=776952&r1=776951&r2=776952&view=diff
==============================================================================
--- hadoop/hbase/trunk_on_hadoop-0.18.3/src/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java (original)
+++ hadoop/hbase/trunk_on_hadoop-0.18.3/src/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java Thu May 21 05:10:21 2009
@@ -644,7 +644,9 @@
} else {
ArrayList<HRegion> closedRegions = closeAllRegions();
try {
- hlog.closeAndDelete();
+ if (this.hlog != null) {
+ hlog.closeAndDelete();
+ }
} catch (Throwable e) {
LOG.error("Close and delete failed",
RemoteExceptionHandler.checkThrowable(e));
Modified: hadoop/hbase/trunk_on_hadoop-0.18.3/src/java/org/apache/hadoop/hbase/regionserver/MemcacheFlusher.java
URL: http://svn.apache.org/viewvc/hadoop/hbase/trunk_on_hadoop-0.18.3/src/java/org/apache/hadoop/hbase/regionserver/MemcacheFlusher.java?rev=776952&r1=776951&r2=776952&view=diff
==============================================================================
--- hadoop/hbase/trunk_on_hadoop-0.18.3/src/java/org/apache/hadoop/hbase/regionserver/MemcacheFlusher.java (original)
+++ hadoop/hbase/trunk_on_hadoop-0.18.3/src/java/org/apache/hadoop/hbase/regionserver/MemcacheFlusher.java Thu May 21 05:10:21 2009
@@ -222,27 +222,34 @@
* not flushed.
*/
private boolean flushRegion(HRegion region, boolean removeFromQueue) {
- // Wait until it is safe to flush.
- boolean toomany;
- do {
- toomany = false;
+ // Wait until it is safe to flush
+ int count = 0;
+ boolean triggered = false;
+ while (count++ < (blockingWaitTime / 500)) {
for (Store hstore: region.stores.values()) {
- int files = hstore.getStorefilesCount();
- if (files > this.blockingStoreFilesNumber) {
- if (LOG.isDebugEnabled()) {
- LOG.debug("Too many store files in store " + hstore + ": " +
- files + ", waiting");
- }
- toomany = true;
+ if (hstore.getStorefilesCount() > this.blockingStoreFilesNumber) {
+ // always request a compaction
server.compactSplitThread.compactionRequested(region, getName());
+ // only log once
+ if (!triggered) {
+ LOG.info("Too many store files for region " + region + ": " +
+ hstore.getStorefilesCount() + ", waiting");
+ triggered = true;
+ }
try {
- Thread.sleep(blockingWaitTime);
+ Thread.sleep(500);
} catch (InterruptedException e) {
// ignore
}
+ continue;
}
}
- } while (toomany);
+ if (triggered) {
+ LOG.info("Compaction completed on region " + region +
+ ", proceeding");
+ }
+ break;
+ }
synchronized (regionsInQueue) {
// See comment above for removeFromQueue on why we do not
// take the region out of the set. If removeFromQueue is true, remove it
Modified: hadoop/hbase/trunk_on_hadoop-0.18.3/src/test/org/apache/hadoop/hbase/TestRegionRebalancing.java
URL: http://svn.apache.org/viewvc/hadoop/hbase/trunk_on_hadoop-0.18.3/src/test/org/apache/hadoop/hbase/TestRegionRebalancing.java?rev=776952&r1=776951&r2=776952&view=diff
==============================================================================
--- hadoop/hbase/trunk_on_hadoop-0.18.3/src/test/org/apache/hadoop/hbase/TestRegionRebalancing.java (original)
+++ hadoop/hbase/trunk_on_hadoop-0.18.3/src/test/org/apache/hadoop/hbase/TestRegionRebalancing.java Thu May 21 05:10:21 2009
@@ -122,6 +122,12 @@
LOG.debug("Adding 4th region server");
cluster.startRegionServer();
assertRegionsAreBalanced();
+
+ for (int i = 0; i < 6; i++){
+ LOG.debug("Adding " + (i + 5) + "th region server");
+ cluster.startRegionServer();
+ }
+ assertRegionsAreBalanced();
}
/** figure out how many regions are currently being served. */
@@ -140,6 +146,8 @@
*/
private void assertRegionsAreBalanced() {
boolean success = false;
+ float slop = conf.getFloat("hbase.regions.slop", (float)0.1);
+ if (slop <= 0) slop = 1;
for (int i = 0; i < 5; i++) {
success = true;
@@ -148,14 +156,20 @@
int regionCount = getRegionCount();
List<HRegionServer> servers = getOnlineRegionServers();
- double avg = Math.ceil((double)regionCount / (double)servers.size());
+ double avg = cluster.getMaster().getAverageLoad();
+ int avgLoadPlusSlop = (int)Math.ceil(avg * (1 + slop));
+ int avgLoadMinusSlop = (int)Math.floor(avg * (1 - slop)) - 1;
LOG.debug("There are " + servers.size() + " servers and " + regionCount
- + " regions. Load Average: " + avg);
+ + " regions. Load Average: " + avg + " low border: " + avgLoadMinusSlop
+ + ", up border: " + avgLoadPlusSlop + "; attempt: " + i);
for (HRegionServer server : servers) {
int serverLoad = server.getOnlineRegions().size();
LOG.debug(server.hashCode() + " Avg: " + avg + " actual: " + serverLoad);
- if (!(serverLoad <= avg + 2 && serverLoad >= avg - 2)) {
+ if (!(avg > 2.0 && serverLoad <= avgLoadPlusSlop
+ && serverLoad >= avgLoadMinusSlop)) {
+ LOG.debug(server.hashCode() + " Isn't balanced!!! Avg: " + avg +
+ " actual: " + serverLoad + " slop: " + slop);
success = false;
}
}
@@ -216,4 +230,4 @@
region.getLog().closeAndDelete();
return region;
}
-}
\ No newline at end of file
+}