You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hbase.apache.org by st...@apache.org on 2008/10/15 07:18:32 UTC

svn commit: r704781 - in /hadoop/hbase: branches/0.18/ branches/0.18/src/java/org/apache/hadoop/hbase/master/ trunk/ trunk/conf/ trunk/src/java/org/apache/hadoop/hbase/master/ trunk/src/java/org/apache/hadoop/hbase/regionserver/

Author: stack
Date: Tue Oct 14 22:18:31 2008
New Revision: 704781

URL: http://svn.apache.org/viewvc?rev=704781&view=rev
Log:
HBASE-920 Make region balancing sloppier

Modified:
    hadoop/hbase/branches/0.18/CHANGES.txt
    hadoop/hbase/branches/0.18/src/java/org/apache/hadoop/hbase/master/RegionManager.java
    hadoop/hbase/trunk/CHANGES.txt
    hadoop/hbase/trunk/conf/hbase-default.xml
    hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/master/RegionManager.java
    hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/regionserver/HStoreFile.java

Modified: hadoop/hbase/branches/0.18/CHANGES.txt
URL: http://svn.apache.org/viewvc/hadoop/hbase/branches/0.18/CHANGES.txt?rev=704781&r1=704780&r2=704781&view=diff
==============================================================================
--- hadoop/hbase/branches/0.18/CHANGES.txt (original)
+++ hadoop/hbase/branches/0.18/CHANGES.txt Tue Oct 14 22:18:31 2008
@@ -14,6 +14,9 @@
    HBASE-576   Investigate IPC performance; partial.
    HBASE-924   Update hadoop in lib on 0.18 hbase branch to 0.18.1
 
+  IMPROVEMENTS
+   HBASE-920   Make region balancing sloppier
+
 Release 0.18.0 - September 21st, 2008
 
   INCOMPATIBLE CHANGES

Modified: hadoop/hbase/branches/0.18/src/java/org/apache/hadoop/hbase/master/RegionManager.java
URL: http://svn.apache.org/viewvc/hadoop/hbase/branches/0.18/src/java/org/apache/hadoop/hbase/master/RegionManager.java?rev=704781&r1=704780&r2=704781&view=diff
==============================================================================
--- hadoop/hbase/branches/0.18/src/java/org/apache/hadoop/hbase/master/RegionManager.java (original)
+++ hadoop/hbase/branches/0.18/src/java/org/apache/hadoop/hbase/master/RegionManager.java Tue Oct 14 22:18:31 2008
@@ -118,15 +118,17 @@
   private final int maxAssignInOneGo;
 
   private final HMaster master;
-  
   private final RegionHistorian historian;
+  private final float slop;
   
   RegionManager(HMaster master) {
     this.master = master;
     this.historian = RegionHistorian.getInstance();
     this.maxAssignInOneGo = this.master.getConfiguration().
       getInt("hbase.regions.percheckin", 10);
-    
+    this.slop = this.master.getConfiguration().getFloat("hbase.regions.slop",
+      (float)0.1);
+
     // The root region
     rootScannerThread = new RootScanner(master, this);
 
@@ -183,13 +185,18 @@
           // We only do load balancing once all regions are assigned.
           // This prevents churn while the cluster is starting up.
           double avgLoad = master.serverManager.getAverageLoad();
-          if (avgLoad > 2.0 && thisServersLoad.getNumberOfRegions() > avgLoad) {
+          double avgLoadWithSlop = avgLoad +
+            ((this.slop != 0)? avgLoad * this.slop: avgLoad);
+          if (avgLoad > 2.0 &&
+              thisServersLoad.getNumberOfRegions() > avgLoadWithSlop) {
             if (LOG.isDebugEnabled()) {
-              LOG.debug("Server " + serverName + " is overloaded. Server load: " + 
-                  thisServersLoad.getNumberOfRegions() + " avg: " + avgLoad);
+              LOG.debug("Server " + serverName +
+                " is overloaded. Server load: " + 
+                thisServersLoad.getNumberOfRegions() + " avg: " + avgLoad +
+                ", slop: " + this.slop);
             }
-            unassignSomeRegions(thisServersLoad, avgLoad, mostLoadedRegions, 
-                returnMsgs);
+            unassignSomeRegions(thisServersLoad, avgLoad, mostLoadedRegions,
+              returnMsgs);
           }
         }
       } else {

Modified: hadoop/hbase/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/hadoop/hbase/trunk/CHANGES.txt?rev=704781&r1=704780&r2=704781&view=diff
==============================================================================
--- hadoop/hbase/trunk/CHANGES.txt (original)
+++ hadoop/hbase/trunk/CHANGES.txt Tue Oct 14 22:18:31 2008
@@ -40,6 +40,7 @@
    HBASE-908   Add approximate counting to CountingBloomFilter
                (Andrzej Bialecki via Stack)
    HBASE-576   Investigate IPC performance
+   HBASE-920   Make region balancing sloppier
 
   NEW FEATURES
    HBASE-875   Use MurmurHash instead of JenkinsHash [in bloomfilters]

Modified: hadoop/hbase/trunk/conf/hbase-default.xml
URL: http://svn.apache.org/viewvc/hadoop/hbase/trunk/conf/hbase-default.xml?rev=704781&r1=704780&r2=704781&view=diff
==============================================================================
--- hadoop/hbase/trunk/conf/hbase-default.xml (original)
+++ hadoop/hbase/trunk/conf/hbase-default.xml Tue Oct 14 22:18:31 2008
@@ -23,14 +23,6 @@
 -->
 <configuration>
   <property>
-    <name>hbase.master</name>
-    <value>local</value>
-    <description>The host and port that the HBase master runs at.
-    A value of 'local' runs the master and a regionserver in
-    a single process.
-    </description>
-  </property>
-  <property>
     <name>hbase.rootdir</name>
     <value>file:///tmp/hbase-${user.name}/hbase</value>
     <description>The directory shared by region servers.
@@ -39,6 +31,14 @@
     </description>
   </property>
   <property>
+    <name>hbase.master</name>
+    <value>local</value>
+    <description>The host and port that the HBase master runs at.
+    A value of 'local' runs the master and a regionserver in
+    a single process.
+    </description>
+  </property>
+  <property>
     <name>hbase.master.info.port</name>
     <value>60010</value>
     <description>The port for the hbase master web UI
@@ -52,6 +52,21 @@
     </description>
   </property>
   <property>
+    <name>hbase.master.meta.thread.rescanfrequency</name>
+    <value>60000</value>
+    <description>How long the HMaster sleeps (in milliseconds) between scans of
+    the root and meta tables.
+    </description>
+  </property>
+  <property>
+    <name>hbase.master.lease.period</name>
+    <value>120000</value>
+    <description>HMaster server lease period in milliseconds. Default is
+    120 seconds.  Region servers must report in within this period else
+    they are considered dead.  On loaded cluster, may need to up this
+    period.</description>
+  </property>
+  <property>
     <name>hbase.regionserver</name>
     <value>0.0.0.0:60020</value>
     <description>The host and port a HBase region server runs at.
@@ -111,28 +126,6 @@
     </description>
   </property>
   <property>
-    <name>hbase.master.meta.thread.rescanfrequency</name>
-    <value>60000</value>
-    <description>How long the HMaster sleeps (in milliseconds) between scans of
-    the root and meta tables.
-    </description>
-  </property>
-  <property>
-    <name>hbase.master.lease.period</name>
-    <value>120000</value>
-    <description>HMaster server lease period in milliseconds. Default is
-    120 seconds.  Region servers must report in within this period else
-    they are considered dead.  On loaded cluster, may need to up this
-    period.</description>
-  </property>
-  <property>
-    <name>hbase.hbasemaster.maxregionopen</name>
-    <value>120000</value>
-    <description>Period to wait for a region open.  If regionserver
-    takes longer than this interval, assign to a new regionserver.
-    </description>
-  </property>
-  <property>
     <name>hbase.regionserver.lease.period</name>
     <value>60000</value>
     <description>HRegion server lease period in milliseconds. Default is
@@ -140,13 +133,6 @@
     considered dead.</description>
   </property>
   <property>
-    <name>hbase.server.thread.wakefrequency</name>
-    <value>10000</value>
-    <description>Time to sleep in between searches for work (in milliseconds).
-    Used as sleep interval by service threads such as META scanner and log roller.
-    </description>
-  </property>
-  <property>
     <name>hbase.regionserver.handler.count</name>
     <value>10</value>
     <description>Count of RPC Server instances spun up on RegionServers
@@ -190,6 +176,50 @@
     </description>
   </property>
   <property>
+    <name>hbase.regionserver.thread.splitcompactcheckfrequency</name>
+    <value>20000</value>
+    <description>How often a region server runs the split/compaction check.
+    </description>
+  </property>
+  <property>
+    <name>hbase.regionserver.nbreservationblocks</name>
+    <value>4</value>
+    <description>The number of reservation blocks which are used to prevent
+    unstable region servers caused by an OOME.
+    </description>
+  </property>
+  <property>
+    <name>hbase.regionserver.globalMemcacheLimit</name>
+    <value>536870912</value>
+    <description>Maximum size of all memcaches in a region server before new 
+      updates are blocked and flushes are forced. Defaults to 512MB.
+    </description>
+  </property>
+  <property>
+    <name>hbase.regionserver.globalMemcacheLimitlowMark</name>
+    <value>256435456</value>
+    <description>When memcaches are being forced to flush to make room in
+      memory, keep flushing until we hit this mark. Defaults to 256MB. Setting
+      this value equal to hbase.regionserver.globalmemcachelimit causes the 
+      minimum possible flushing to occur when updates are blocked due to 
+      memcache limiting.
+    </description>
+  </property>  
+  <property>
+    <name>hbase.hbasemaster.maxregionopen</name>
+    <value>120000</value>
+    <description>Period to wait for a region open.  If regionserver
+    takes longer than this interval, assign to a new regionserver.
+    </description>
+  </property>
+  <property>
+    <name>hbase.server.thread.wakefrequency</name>
+    <value>10000</value>
+    <description>Time to sleep in between searches for work (in milliseconds).
+    Used as sleep interval by service threads such as META scanner and log roller.
+    </description>
+  </property>
+  <property>
     <name>hbase.hregion.memcache.flush.size</name>
     <value>67108864</value>
     <description>
@@ -235,12 +265,6 @@
     </description>
   </property>
   <property>
-    <name>hbase.regionserver.thread.splitcompactcheckfrequency</name>
-    <value>20000</value>
-    <description>How often a region server runs the split/compaction check.
-    </description>
-  </property>
-  <property>
     <name>hbase.hstore.compaction.max</name>
     <value>10</value>
     <description>Max number of HStoreFiles to compact per 'minor' compaction.
@@ -254,10 +278,10 @@
     </description>
   </property>
   <property>
-    <name>hbase.regionserver.nbreservationblocks</name>
-    <value>4</value>
-    <description>The number of reservation blocks which are used to prevent
-    unstable region servers caused by an OOME.
+    <name>hbase.regions.slop</name>
+    <value>0.1</value>
+    <description>Rebalance if regionserver has average + (average * slop) regions.
+    Default is 10% slop.
     </description>
   </property>
   <property>
@@ -284,23 +308,6 @@
     </description>
   </property>
   <property>
-    <name>hbase.regionserver.globalMemcacheLimit</name>
-    <value>536870912</value>
-    <description>Maximum size of all memcaches in a region server before new 
-      updates are blocked and flushes are forced. Defaults to 512MB.
-    </description>
-  </property>
-  <property>
-    <name>hbase.regionserver.globalMemcacheLimitlowMark</name>
-    <value>256435456</value>
-    <description>When memcaches are being forced to flush to make room in
-      memory, keep flushing until we hit this mark. Defaults to 256MB. Setting
-      this value equal to hbase.regionserver.globalmemcachelimit causes the 
-      minimum possible flushing to occur when updates are blocked due to 
-      memcache limiting.
-    </description>
-  </property>  
-  <property>
     <name>hbase.hash.type</name>
     <value>murmur</value>
     <description>The hashing algorithm for use in HashFunction. Two values are

Modified: hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/master/RegionManager.java
URL: http://svn.apache.org/viewvc/hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/master/RegionManager.java?rev=704781&r1=704780&r2=704781&view=diff
==============================================================================
--- hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/master/RegionManager.java (original)
+++ hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/master/RegionManager.java Tue Oct 14 22:18:31 2008
@@ -118,15 +118,17 @@
   private final int maxAssignInOneGo;
 
   private final HMaster master;
-  
   private final RegionHistorian historian;
+  private final float slop;
   
   RegionManager(HMaster master) {
     this.master = master;
     this.historian = RegionHistorian.getInstance();
     this.maxAssignInOneGo = this.master.getConfiguration().
       getInt("hbase.regions.percheckin", 10);
-    
+    this.slop = this.master.getConfiguration().getFloat("hbase.regions.slop",
+      (float)0.1);
+
     // The root region
     rootScannerThread = new RootScanner(master, this);
 
@@ -183,13 +185,18 @@
           // We only do load balancing once all regions are assigned.
           // This prevents churn while the cluster is starting up.
           double avgLoad = master.serverManager.getAverageLoad();
-          if (avgLoad > 2.0 && thisServersLoad.getNumberOfRegions() > avgLoad) {
+          double avgLoadWithSlop = avgLoad +
+            ((this.slop != 0)? avgLoad * this.slop: avgLoad);
+          if (avgLoad > 2.0 &&
+              thisServersLoad.getNumberOfRegions() > avgLoadWithSlop) {
             if (LOG.isDebugEnabled()) {
-              LOG.debug("Server " + serverName + " is overloaded. Server load: " + 
-                  thisServersLoad.getNumberOfRegions() + " avg: " + avgLoad);
+              LOG.debug("Server " + serverName +
+                " is overloaded. Server load: " + 
+                thisServersLoad.getNumberOfRegions() + " avg: " + avgLoad +
+                ", slop: " + this.slop);
             }
-            unassignSomeRegions(thisServersLoad, avgLoad, mostLoadedRegions, 
-                returnMsgs);
+            unassignSomeRegions(thisServersLoad, avgLoad, mostLoadedRegions,
+              returnMsgs);
           }
         }
       } else {

Modified: hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/regionserver/HStoreFile.java
URL: http://svn.apache.org/viewvc/hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/regionserver/HStoreFile.java?rev=704781&r1=704780&r2=704781&view=diff
==============================================================================
--- hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/regionserver/HStoreFile.java (original)
+++ hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/regionserver/HStoreFile.java Tue Oct 14 22:18:31 2008
@@ -36,6 +36,10 @@
 import org.apache.hadoop.fs.FSDataOutputStream;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hbase.HBaseConfiguration;
+import org.apache.hadoop.hbase.HConstants;
+import org.apache.hadoop.hbase.HRegionInfo;
+import org.apache.hadoop.hbase.HStoreKey;
 import org.apache.hadoop.hbase.io.BlockFSInputStream;
 import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
 import org.apache.hadoop.hbase.util.Bytes;
@@ -46,14 +50,8 @@
 import org.apache.hadoop.io.Writable;
 import org.apache.hadoop.io.WritableComparable;
 import org.onelab.filter.BloomFilter;
-import org.onelab.filter.HashFunction;
 import org.onelab.filter.Key;
 
-import org.apache.hadoop.hbase.HConstants;
-import org.apache.hadoop.hbase.HBaseConfiguration;
-import org.apache.hadoop.hbase.HRegionInfo;
-import org.apache.hadoop.hbase.HStoreKey;
-
 /**
  * A HStore data file.  HStores usually have one or more of these files.  They
  * are produced by flushing the memcache to disk.