You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hbase.apache.org by la...@apache.org on 2014/02/18 20:41:52 UTC

svn commit: r1569504 - in /hbase/branches/0.94/src: main/java/org/apache/hadoop/hbase/regionserver/IncreasingToUpperBoundRegionSplitPolicy.java test/java/org/apache/hadoop/hbase/regionserver/TestRegionSplitPolicy.java

Author: larsh
Date: Tue Feb 18 19:41:51 2014
New Revision: 1569504

URL: http://svn.apache.org/r1569504
Log:
HBASE-10501 Improve IncreasingToUpperBoundRegionSplitPolicy to avoid too many regions.

Modified:
    hbase/branches/0.94/src/main/java/org/apache/hadoop/hbase/regionserver/IncreasingToUpperBoundRegionSplitPolicy.java
    hbase/branches/0.94/src/test/java/org/apache/hadoop/hbase/regionserver/TestRegionSplitPolicy.java

Modified: hbase/branches/0.94/src/main/java/org/apache/hadoop/hbase/regionserver/IncreasingToUpperBoundRegionSplitPolicy.java
URL: http://svn.apache.org/viewvc/hbase/branches/0.94/src/main/java/org/apache/hadoop/hbase/regionserver/IncreasingToUpperBoundRegionSplitPolicy.java?rev=1569504&r1=1569503&r2=1569504&view=diff
==============================================================================
--- hbase/branches/0.94/src/main/java/org/apache/hadoop/hbase/regionserver/IncreasingToUpperBoundRegionSplitPolicy.java (original)
+++ hbase/branches/0.94/src/main/java/org/apache/hadoop/hbase/regionserver/IncreasingToUpperBoundRegionSplitPolicy.java Tue Feb 18 19:41:51 2014
@@ -29,30 +29,34 @@ import org.apache.hadoop.hbase.util.Byte
 
 /**
  * Split size is the number of regions that are on this server that all are
- * of the same table, squared, times the region flush size OR the maximum
+ * of the same table, cubed, times 2x the region flush size OR the maximum
  * region split size, whichever is smaller.  For example, if the flush size
- * is 128M, then on first flush we will split which will make two regions
- * that will split when their size is 2 * 2 * 128M = 512M.  If one of these
+ * is 128M, then after two flushes (256MB) we will split which will make two regions
+ * that will split when their size is 2^3 * 128M*2 = 2048M.  If one of these
  * regions splits, then there are three regions and now the split size is
- * 3 * 3 * 128M =  1152M, and so on until we reach the configured
+ * 3^3 * 128M*2 =  6912M, and so on until we reach the configured
  * maximum filesize and then from there on out, we'll use that.
  */
 public class IncreasingToUpperBoundRegionSplitPolicy
 extends ConstantSizeRegionSplitPolicy {
   static final Log LOG =
     LogFactory.getLog(IncreasingToUpperBoundRegionSplitPolicy.class);
-  private long flushSize;
+  private long initialSize;
 
   @Override
   protected void configureForRegion(HRegion region) {
     super.configureForRegion(region);
     Configuration conf = getConf();
+    this.initialSize = conf.getLong("hbase.increasing.policy.initial.size", -1);
+    if (this.initialSize > 0) {
+      return;
+    }
     HTableDescriptor desc = region.getTableDesc();
     if (desc != null) {
-      this.flushSize = desc.getMemStoreFlushSize();
+      this.initialSize = 2*desc.getMemStoreFlushSize();
     }
-    if (this.flushSize <= 0) {
-      this.flushSize = conf.getLong(HConstants.HREGION_MEMSTORE_FLUSH_SIZE,
+    if (this.initialSize <= 0) {
+      this.initialSize = 2*conf.getLong(HConstants.HREGION_MEMSTORE_FLUSH_SIZE,
         HTableDescriptor.DEFAULT_MEMSTORE_FLUSH_SIZE);
     }
   }
@@ -90,10 +94,11 @@ extends ConstantSizeRegionSplitPolicy {
    * @return Region max size or <code>count of regions squared * flushsize, which ever is
    * smaller; guard against there being zero regions on this server.
    */
-  long getSizeToCheck(final int tableRegionsCount) {
-    return tableRegionsCount == 0? getDesiredMaxFileSize():
+  protected long getSizeToCheck(final int tableRegionsCount) {
+    // safety check for 100 to avoid numerical overflow in extreme cases
+    return tableRegionsCount == 0 || tableRegionsCount > 100 ? getDesiredMaxFileSize():
       Math.min(getDesiredMaxFileSize(),
-        this.flushSize * (tableRegionsCount * tableRegionsCount));
+        this.initialSize * tableRegionsCount * tableRegionsCount * tableRegionsCount);
   }
 
   /**

Modified: hbase/branches/0.94/src/test/java/org/apache/hadoop/hbase/regionserver/TestRegionSplitPolicy.java
URL: http://svn.apache.org/viewvc/hbase/branches/0.94/src/test/java/org/apache/hadoop/hbase/regionserver/TestRegionSplitPolicy.java?rev=1569504&r1=1569503&r2=1569504&view=diff
==============================================================================
--- hbase/branches/0.94/src/test/java/org/apache/hadoop/hbase/regionserver/TestRegionSplitPolicy.java (original)
+++ hbase/branches/0.94/src/test/java/org/apache/hadoop/hbase/regionserver/TestRegionSplitPolicy.java Tue Feb 18 19:41:51 2014
@@ -75,9 +75,9 @@ public class TestRegionSplitPolicy {
     // Set max size for this 'table'.
     long maxSplitSize = 1024L;
     htd.setMaxFileSize(maxSplitSize);
-    // Set flush size to 1/4.  IncreasingToUpperBoundRegionSplitPolicy
-    // grows by the square of the number of regions times flushsize each time.
-    long flushSize = maxSplitSize/4;
+    // Set flush size to 1/8.  IncreasingToUpperBoundRegionSplitPolicy
+    // grows by the cube of the number of regions times flushsize each time.
+    long flushSize = maxSplitSize/8;
     conf.setLong(HConstants.HREGION_MEMSTORE_FLUSH_SIZE, flushSize);
     htd.setMemStoreFlushSize(flushSize);
     // If RegionServerService with no regions in it -- 'online regions' == 0 --
@@ -100,18 +100,18 @@ public class TestRegionSplitPolicy {
     // Now test that we increase our split size as online regions for a table
     // grows. With one region, split size should be flushsize.
     regions.add(mockRegion);
-    Mockito.doReturn(flushSize/2).when(mockStore).getSize();
-    // Should not split since store is 1/2 flush size.
+    Mockito.doReturn(flushSize).when(mockStore).getSize();
+    // Should not split since store is flush size.
     assertFalse(policy.shouldSplit());
-    // Set size of store to be > flush size and we should split
-    Mockito.doReturn(flushSize + 1).when(mockStore).getSize();
+    // Set size of store to be > 2*flush size and we should split
+    Mockito.doReturn(flushSize*2 + 1).when(mockStore).getSize();
     assertTrue(policy.shouldSplit());
     // Add another region to the 'online regions' on this server and we should
     // now be no longer be splittable since split size has gone up.
     regions.add(mockRegion);
     assertFalse(policy.shouldSplit());
     // Quadruple (2 squared) the store size and make sure its just over; verify it'll split
-    Mockito.doReturn((flushSize * 2 * 2) + 1).when(mockStore).getSize();
+    Mockito.doReturn((flushSize * 2 * 2 * 2) + 1).when(mockStore).getSize();
     assertTrue(policy.shouldSplit());
 
     // Finally assert that even if loads of regions, we'll split at max size