You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hbase.apache.org by wc...@apache.org on 2020/07/17 09:47:14 UTC

[hbase] branch branch-2 updated: HBASE-24664 Some changing of split region by overall region size rather than only one store size (#2054)

This is an automated email from the ASF dual-hosted git repository.

wchevreuil pushed a commit to branch branch-2
in repository https://gitbox.apache.org/repos/asf/hbase.git


The following commit(s) were added to refs/heads/branch-2 by this push:
     new ec3beaf  HBASE-24664 Some changing of split region by overall region size rather than only one store size (#2054)
ec3beaf is described below

commit ec3beaf5a28aef1e40fac7af41ef27451b2297c6
Author: bsglz <18...@qq.com>
AuthorDate: Fri Jul 17 17:47:01 2020 +0800

    HBASE-24664 Some changing of split region by overall region size rather than only one store size (#2054)
    
    Signed-off-by: Wellington Chevreuil <wc...@apache.org>
---
 .../java/org/apache/hadoop/hbase/HConstants.java   |  7 +++
 hbase-common/src/main/resources/hbase-default.xml  |  5 ++
 .../ConstantSizeRegionSplitPolicy.java             | 55 ++++++++++++++++------
 .../IncreasingToUpperBoundRegionSplitPolicy.java   | 29 ++++--------
 .../hbase/regionserver/RegionSplitPolicy.java      |  4 +-
 .../hbase/regionserver/TestRegionSplitPolicy.java  | 39 +++++++++++++++
 src/main/asciidoc/_chapters/hbase-default.adoc     | 11 +++++
 7 files changed, 112 insertions(+), 38 deletions(-)

diff --git a/hbase-common/src/main/java/org/apache/hadoop/hbase/HConstants.java b/hbase-common/src/main/java/org/apache/hadoop/hbase/HConstants.java
index 8196e51..475989b 100644
--- a/hbase-common/src/main/java/org/apache/hadoop/hbase/HConstants.java
+++ b/hbase-common/src/main/java/org/apache/hadoop/hbase/HConstants.java
@@ -424,6 +424,13 @@ public final class HConstants {
   /** Default maximum file size */
   public static final long DEFAULT_MAX_FILE_SIZE = 10 * 1024 * 1024 * 1024L;
 
+  /** Conf key for if we should sum overall region files size when check to split */
+  public static final String OVERALL_HREGION_FILES =
+    "hbase.hregion.split.overallfiles";
+
+  /** Default overall region files */
+  public static final boolean DEFAULT_OVERALL_HREGION_FILES = false;
+
   /**
    * Max size of single row for Get's or Scan's without in-row scanning flag set.
    */
diff --git a/hbase-common/src/main/resources/hbase-default.xml b/hbase-common/src/main/resources/hbase-default.xml
index 6f84eba..61d3a31 100644
--- a/hbase-common/src/main/resources/hbase-default.xml
+++ b/hbase-common/src/main/resources/hbase-default.xml
@@ -755,6 +755,11 @@ possible configurations would overwhelm and obscure the important.
     value, the region is split in two.</description>
   </property>
   <property>
+    <name>hbase.hregion.split.overallfiles</name>
+    <value>false</value>
+    <description>If we should sum overall region files size when check to split.</description>
+  </property>
+  <property>
     <name>hbase.hregion.majorcompaction</name>
     <value>604800000</value>
     <description>Time between major compactions, expressed in milliseconds. Set to 0 to disable
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/ConstantSizeRegionSplitPolicy.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/ConstantSizeRegionSplitPolicy.java
index 8ad8126..5ffd960 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/ConstantSizeRegionSplitPolicy.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/ConstantSizeRegionSplitPolicy.java
@@ -26,6 +26,9 @@ import org.apache.hadoop.hbase.HBaseInterfaceAudience;
 import org.apache.hadoop.hbase.HConstants;
 import org.apache.yetus.audience.InterfaceAudience;
 import org.apache.hadoop.hbase.client.TableDescriptor;
+import org.apache.hadoop.hbase.procedure2.util.StringUtils;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
 
 /**
  * A {@link RegionSplitPolicy} implementation which splits a region
@@ -38,10 +41,13 @@ import org.apache.hadoop.hbase.client.TableDescriptor;
  */
 @InterfaceAudience.LimitedPrivate(HBaseInterfaceAudience.CONFIG)
 public class ConstantSizeRegionSplitPolicy extends RegionSplitPolicy {
+  private static final Logger LOG =
+    LoggerFactory.getLogger(ConstantSizeRegionSplitPolicy.class);
   private static final Random RANDOM = new Random();
 
   private long desiredMaxFileSize;
   private double jitterRate;
+  protected boolean overallHRegionFiles;
 
   @Override
   protected void configureForRegion(HRegion region) {
@@ -55,6 +61,8 @@ public class ConstantSizeRegionSplitPolicy extends RegionSplitPolicy {
       this.desiredMaxFileSize = conf.getLong(HConstants.HREGION_MAX_FILESIZE,
         HConstants.DEFAULT_MAX_FILE_SIZE);
     }
+    this.overallHRegionFiles = conf.getBoolean(HConstants.OVERALL_HREGION_FILES,
+      HConstants.DEFAULT_OVERALL_HREGION_FILES);
     double jitter = conf.getDouble("hbase.hregion.max.filesize.jitter", 0.25D);
     this.jitterRate = (RANDOM.nextFloat() - 0.5D) * jitter;
     long jitterValue = (long) (this.desiredMaxFileSize * this.jitterRate);
@@ -68,22 +76,10 @@ public class ConstantSizeRegionSplitPolicy extends RegionSplitPolicy {
 
   @Override
   protected boolean shouldSplit() {
-    boolean foundABigStore = false;
-
-    for (HStore store : region.getStores()) {
-      // If any of the stores are unable to split (eg they contain reference files)
-      // then don't split
-      if ((!store.canSplit())) {
-        return false;
-      }
-
-      // Mark if any store is big enough
-      if (store.getSize() > desiredMaxFileSize) {
-        foundABigStore = true;
-      }
+    if (!canSplit()) {
+      return false;
     }
-
-    return foundABigStore;
+    return isExceedSize(desiredMaxFileSize);
   }
 
   long getDesiredMaxFileSize() {
@@ -94,4 +90,33 @@ public class ConstantSizeRegionSplitPolicy extends RegionSplitPolicy {
   public boolean positiveJitterRate() {
     return this.jitterRate > 0;
   }
+
+  /**
+   * @return true if region size exceed the sizeToCheck
+   */
+  protected final boolean isExceedSize(long sizeToCheck) {
+    if (overallHRegionFiles) {
+      long sumSize = 0;
+      for (HStore store : region.getStores()) {
+        sumSize += store.getSize();
+      }
+      if (sumSize > sizeToCheck) {
+        LOG.debug("ShouldSplit because region size is big enough "
+            + "size={}, sizeToCheck={}{}", StringUtils.humanSize(sumSize),
+          StringUtils.humanSize(sizeToCheck));
+        return true;
+      }
+    } else {
+      for (HStore store : region.getStores()) {
+        long size = store.getSize();
+        if (size > sizeToCheck) {
+          LOG.debug("ShouldSplit because {} size={}, sizeToCheck={}{}",
+            store.getColumnFamilyName(), StringUtils.humanSize(size),
+            StringUtils.humanSize(sizeToCheck));
+          return true;
+        }
+      }
+    }
+    return false;
+  }
 }
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/IncreasingToUpperBoundRegionSplitPolicy.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/IncreasingToUpperBoundRegionSplitPolicy.java
index c40d6aa..72e8853 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/IncreasingToUpperBoundRegionSplitPolicy.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/IncreasingToUpperBoundRegionSplitPolicy.java
@@ -28,7 +28,6 @@ import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 import org.apache.hadoop.hbase.client.TableDescriptor;
 import org.apache.hadoop.hbase.client.TableDescriptorBuilder;
-import org.apache.hadoop.hbase.procedure2.util.StringUtils;
 
 /**
  * Split size is the number of regions that are on this server that all are
@@ -70,31 +69,18 @@ public class IncreasingToUpperBoundRegionSplitPolicy extends ConstantSizeRegionS
 
   @Override
   protected boolean shouldSplit() {
-    boolean foundABigStore = false;
+    if (!canSplit()) {
+      return false;
+    }
     // Get count of regions that have the same common table as this.region
     int tableRegionsCount = getCountOfCommonTableRegions();
     // Get size to check
     long sizeToCheck = getSizeToCheck(tableRegionsCount);
-
-    for (HStore store : region.getStores()) {
-      // If any of the stores is unable to split (eg they contain reference files)
-      // then don't split
-      if (!store.canSplit()) {
-        return false;
-      }
-
-      // Mark if any store is big enough
-      long size = store.getSize();
-      if (size > sizeToCheck) {
-        LOG.debug("ShouldSplit because " + store.getColumnFamilyName() +
-          " size=" + StringUtils.humanSize(size) +
-          ", sizeToCheck=" + StringUtils.humanSize(sizeToCheck) +
-          ", regionsWithCommonTable=" + tableRegionsCount);
-        foundABigStore = true;
-      }
+    boolean shouldSplit = isExceedSize(sizeToCheck);
+    if (shouldSplit) {
+      LOG.debug("regionsWithCommonTable={}", tableRegionsCount);
     }
-
-    return foundABigStore;
+    return shouldSplit;
   }
 
   /**
@@ -129,4 +115,5 @@ public class IncreasingToUpperBoundRegionSplitPolicy extends ConstantSizeRegionS
                : Math.min(getDesiredMaxFileSize(),
                           initialSize * tableRegionsCount * tableRegionsCount * tableRegionsCount);
   }
+
 }
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/RegionSplitPolicy.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/RegionSplitPolicy.java
index c89f888..3079925 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/RegionSplitPolicy.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/RegionSplitPolicy.java
@@ -75,9 +75,9 @@ public abstract class RegionSplitPolicy extends Configured {
    * @return {@code true} if the specified region can be split.
    */
   protected boolean canSplit() {
-    return !region.getRegionInfo().isMetaRegion() &&
+    return !region.getRegionInfo().isMetaRegion() && region.isAvailable() &&
       !TableName.NAMESPACE_TABLE_NAME.equals(region.getRegionInfo().getTable()) &&
-      region.isAvailable() && !region.hasReferences();
+      region.getStores().stream().allMatch(HStore::canSplit);
   }
 
   /**
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestRegionSplitPolicy.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestRegionSplitPolicy.java
index 5ca693c..0ef3002 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestRegionSplitPolicy.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestRegionSplitPolicy.java
@@ -65,6 +65,7 @@ public class TestRegionSplitPolicy {
     RegionInfo hri = RegionInfoBuilder.newBuilder(TABLENAME).build();
     mockRegion = mock(HRegion.class);
     doReturn(hri).when(mockRegion).getRegionInfo();
+    doReturn(true).when(mockRegion).isAvailable();
     stores = new ArrayList<>();
     doReturn(stores).when(mockRegion).getStores();
   }
@@ -154,6 +155,44 @@ public class TestRegionSplitPolicy {
   }
 
   @Test
+  public void testIsExceedSize() throws IOException {
+    // Configure SteppingAllStoresSizeSplitPolicy as our split policy
+    conf.set(HConstants.HBASE_REGION_SPLIT_POLICY_KEY,
+      ConstantSizeRegionSplitPolicy.class.getName());
+    conf.set(HConstants.OVERALL_HREGION_FILES, "true");
+    // Now make it so the mock region has a RegionServerService that will
+    // return 'online regions'.
+    RegionServerServices rss = mock(RegionServerServices.class);
+    final List<HRegion> regions = new ArrayList<>();
+    doReturn(regions).when(rss).getRegions(TABLENAME);
+    when(mockRegion.getRegionServerServices()).thenReturn(rss);
+
+    TableDescriptor td = TableDescriptorBuilder.newBuilder(TABLENAME).build();
+    doReturn(td).when(mockRegion).getTableDescriptor();
+    ConstantSizeRegionSplitPolicy policy =
+      (ConstantSizeRegionSplitPolicy) RegionSplitPolicy.create(mockRegion, conf);
+    regions.add(mockRegion);
+
+    HStore mockStore1 = mock(HStore.class);
+    doReturn(100L).when(mockStore1).getSize();
+    HStore mockStore2 = mock(HStore.class);
+    doReturn(924L).when(mockStore2).getSize();
+    HStore mockStore3 = mock(HStore.class);
+    doReturn(925L).when(mockStore3).getSize();
+
+    // test sum of store's size not greater than sizeToCheck
+    stores.add(mockStore1);
+    stores.add(mockStore2);
+    assertFalse(policy.isExceedSize(1024));
+    stores.clear();
+
+    // test sum of store's size greater than sizeToCheck
+    stores.add(mockStore1);
+    stores.add(mockStore3);
+    assertTrue(policy.isExceedSize(1024));
+  }
+
+  @Test
   public void testBusyRegionSplitPolicy() throws Exception {
     doReturn(TableDescriptorBuilder.newBuilder(TABLENAME).build()).when(mockRegion)
       .getTableDescriptor();
diff --git a/src/main/asciidoc/_chapters/hbase-default.adoc b/src/main/asciidoc/_chapters/hbase-default.adoc
index cdefb5c..5768add 100644
--- a/src/main/asciidoc/_chapters/hbase-default.adoc
+++ b/src/main/asciidoc/_chapters/hbase-default.adoc
@@ -894,6 +894,17 @@ Time to sleep in between searches for work (in milliseconds).
 `10737418240`
 
 
+[[hbase.hregion.split.overallfiles]]
+*`hbase.hregion.split.overallfiles`*::
++
+.Description
+
+    If we should sum overall region files size when check to split.
++
+.Default
+`false`
+
+
 [[hbase.hregion.majorcompaction]]
 *`hbase.hregion.majorcompaction`*::
 +