You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hbase.apache.org by te...@apache.org on 2015/12/29 15:22:08 UTC

hbase git commit: HBASE-14867 SimpleRegionNormalizer needs to have better heuristics to trigger merge operation

Repository: hbase
Updated Branches:
  refs/heads/master 6868c6366 -> 1e4992c6e


HBASE-14867 SimpleRegionNormalizer needs to have better heuristics to trigger merge operation


Project: http://git-wip-us.apache.org/repos/asf/hbase/repo
Commit: http://git-wip-us.apache.org/repos/asf/hbase/commit/1e4992c6
Tree: http://git-wip-us.apache.org/repos/asf/hbase/tree/1e4992c6
Diff: http://git-wip-us.apache.org/repos/asf/hbase/diff/1e4992c6

Branch: refs/heads/master
Commit: 1e4992c6eccb81166cdda842a68644fa962a3fdc
Parents: 6868c63
Author: tedyu <yu...@gmail.com>
Authored: Tue Dec 29 06:21:57 2015 -0800
Committer: tedyu <yu...@gmail.com>
Committed: Tue Dec 29 06:21:57 2015 -0800

----------------------------------------------------------------------
 .../normalizer/SimpleRegionNormalizer.java      | 106 +++++++++----------
 .../normalizer/TestSimpleRegionNormalizer.java  |  43 +++++++-
 2 files changed, 90 insertions(+), 59 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hbase/blob/1e4992c6/hbase-server/src/main/java/org/apache/hadoop/hbase/master/normalizer/SimpleRegionNormalizer.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/normalizer/SimpleRegionNormalizer.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/normalizer/SimpleRegionNormalizer.java
index 659b3dc..fe6034b 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/normalizer/SimpleRegionNormalizer.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/normalizer/SimpleRegionNormalizer.java
@@ -27,8 +27,11 @@ import org.apache.hadoop.hbase.ServerName;
 import org.apache.hadoop.hbase.TableName;
 import org.apache.hadoop.hbase.classification.InterfaceAudience;
 import org.apache.hadoop.hbase.master.MasterServices;
-import org.apache.hadoop.hbase.util.Pair;
+import org.apache.hadoop.hbase.util.Triple;
 
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.Comparator;
 import java.util.List;
 
 /**
@@ -66,6 +69,22 @@ public class SimpleRegionNormalizer implements RegionNormalizer {
     this.masterServices = masterServices;
   }
 
+  /*
+   * This comparator compares the region size.
+   * The second element in the triple is region size while the 3rd element
+   * is the index of the region in the underlying List
+   */
+  private Comparator<Triple<HRegionInfo, Long, Integer>> regionSizeComparator =
+      new Comparator<Triple<HRegionInfo, Long, Integer>>() {
+    @Override
+    public int compare(Triple<HRegionInfo, Long, Integer> pair,
+        Triple<HRegionInfo, Long, Integer> pair2) {
+      long sz = pair.getSecond();
+      long sz2 = pair2.getSecond();
+      return (sz < sz2) ? -1 : ((sz == sz2) ? 0 : 1);
+    }
+  };
+
   /**
    * Computes next most "urgent" normalization action on the table.
    * Action may be either a split, or a merge, or no action.
@@ -76,7 +95,7 @@ public class SimpleRegionNormalizer implements RegionNormalizer {
   @Override
   public NormalizationPlan computePlanForTable(TableName table) throws HBaseIOException {
     if (table == null || table.isSystemTable()) {
-      LOG.debug("Normalization of table " + table + " isn't allowed");
+      LOG.debug("Normalization of system table " + table + " isn't allowed");
       return EmptyNormalizationPlan.getInstance();
     }
 
@@ -95,57 +114,18 @@ public class SimpleRegionNormalizer implements RegionNormalizer {
       ", number of regions: " + tableRegions.size());
 
     long totalSizeMb = 0;
-    Pair<HRegionInfo, Long> largestRegion = new Pair<>();
-
-    // A is a smallest region, B is it's smallest neighbor
-    Pair<HRegionInfo, Long> smallestRegion = new Pair<>();
-    int smallestRegionIndex = 0;
 
+    ArrayList<Triple<HRegionInfo, Long, Integer>> regionsWithSize =
+        new ArrayList<Triple<HRegionInfo, Long, Integer>>(tableRegions.size());
     for (int i = 0; i < tableRegions.size(); i++) {
       HRegionInfo hri = tableRegions.get(i);
       long regionSize = getRegionSize(hri);
+      regionsWithSize.add(new Triple<HRegionInfo, Long, Integer>(hri, regionSize, i));
       totalSizeMb += regionSize;
-
-      if (largestRegion.getFirst() == null || regionSize > largestRegion.getSecond()) {
-        largestRegion.setFirst(hri);
-        largestRegion.setSecond(regionSize);
-      }
-
-      if (smallestRegion.getFirst() == null || regionSize < smallestRegion.getSecond()) {
-        smallestRegion.setFirst(hri);
-        smallestRegion.setSecond(regionSize);
-        smallestRegionIndex = i;
-      }
     }
+    Collections.sort(regionsWithSize, regionSizeComparator);
 
-    // now get smallest neighbor of smallest region
-    long leftNeighborSize = -1;
-
-    if (smallestRegionIndex > 0) {
-      leftNeighborSize = getRegionSize(tableRegions.get(smallestRegionIndex - 1));
-    }
-
-    long rightNeighborSize = -1;
-    if (smallestRegionIndex < tableRegions.size() - 1) {
-      rightNeighborSize = getRegionSize(tableRegions.get(smallestRegionIndex + 1));
-    }
-
-    Pair<HRegionInfo, Long> smallestNeighborOfSmallestRegion;
-    if (leftNeighborSize == -1) {
-      smallestNeighborOfSmallestRegion =
-        new Pair<>(tableRegions.get(smallestRegionIndex + 1), rightNeighborSize);
-    } else if (rightNeighborSize == -1) {
-      smallestNeighborOfSmallestRegion =
-        new Pair<>(tableRegions.get(smallestRegionIndex - 1), leftNeighborSize);
-    } else {
-      if (leftNeighborSize < rightNeighborSize) {
-        smallestNeighborOfSmallestRegion =
-          new Pair<>(tableRegions.get(smallestRegionIndex - 1), leftNeighborSize);
-      } else {
-        smallestNeighborOfSmallestRegion =
-          new Pair<>(tableRegions.get(smallestRegionIndex + 1), rightNeighborSize);
-      }
-    }
+    Triple<HRegionInfo, Long, Integer> largestRegion = regionsWithSize.get(tableRegions.size()-1);
 
     double avgRegionSize = totalSizeMb / (double) tableRegions.size();
 
@@ -159,19 +139,31 @@ public class SimpleRegionNormalizer implements RegionNormalizer {
         + largestRegion.getFirst().getRegionNameAsString() + " has size "
         + largestRegion.getSecond() + ", more than 2 times than avg size, splitting");
       return new SplitNormalizationPlan(largestRegion.getFirst(), null);
-    } else {
-      if (smallestRegion.getSecond() + smallestNeighborOfSmallestRegion.getSecond()
-          < avgRegionSize) {
-        LOG.debug("Table " + table + ", smallest region size: " + smallestRegion.getSecond()
-          + " and its smallest neighbor size: " + smallestNeighborOfSmallestRegion.getSecond()
-          + ", less than the avg size, merging them");
-        return new MergeNormalizationPlan(smallestRegion.getFirst(),
-          smallestNeighborOfSmallestRegion.getFirst());
-      } else {
-        LOG.debug("No normalization needed, regions look good for table: " + table);
-        return EmptyNormalizationPlan.getInstance();
+    }
+    int candidateIdx = 0;
+    // look for two successive entries whose indices are adjacent
+    while (candidateIdx < tableRegions.size()-1) {
+      if (Math.abs(regionsWithSize.get(candidateIdx).getThird() -
+        regionsWithSize.get(candidateIdx + 1).getThird()) == 1) {
+        break;
       }
+      candidateIdx++;
+    }
+    if (candidateIdx == tableRegions.size()-1) {
+      LOG.debug("No neighboring regions found for table: " + table);
+      return EmptyNormalizationPlan.getInstance();
+    }
+    Triple<HRegionInfo, Long, Integer> candidateRegion = regionsWithSize.get(candidateIdx);
+    Triple<HRegionInfo, Long, Integer> candidateRegion2 = regionsWithSize.get(candidateIdx+1);
+    if (candidateRegion.getSecond() + candidateRegion2.getSecond() < avgRegionSize) {
+      LOG.debug("Table " + table + ", smallest region size: " + candidateRegion.getSecond()
+        + " and its smallest neighbor size: " + candidateRegion2.getSecond()
+        + ", less than the avg size, merging them");
+      return new MergeNormalizationPlan(candidateRegion.getFirst(),
+        candidateRegion2.getFirst());
     }
+    LOG.debug("No normalization needed, regions look good for table: " + table);
+    return EmptyNormalizationPlan.getInstance();
   }
 
   private long getRegionSize(HRegionInfo hri) {

http://git-wip-us.apache.org/repos/asf/hbase/blob/1e4992c6/hbase-server/src/test/java/org/apache/hadoop/hbase/master/normalizer/TestSimpleRegionNormalizer.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/normalizer/TestSimpleRegionNormalizer.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/normalizer/TestSimpleRegionNormalizer.java
index 211911e..970af43 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/normalizer/TestSimpleRegionNormalizer.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/normalizer/TestSimpleRegionNormalizer.java
@@ -142,8 +142,8 @@ public class TestSimpleRegionNormalizer {
     hris.add(hri4);
     regionSizes.put(hri4.getRegionName(), 15);
 
-    HRegionInfo hri5 = new HRegionInfo(testTable, Bytes.toBytes("ddd"), Bytes.toBytes("eee"));
-    hris.add(hri4);
+    HRegionInfo hri5 = new HRegionInfo(testTable, Bytes.toBytes("eee"), Bytes.toBytes("fff"));
+    hris.add(hri5);
     regionSizes.put(hri5.getRegionName(), 16);
 
     setupMocksForNormalizer(regionSizes, hris);
@@ -154,6 +154,45 @@ public class TestSimpleRegionNormalizer {
     assertEquals(hri3, ((MergeNormalizationPlan) plan).getSecondRegion());
   }
 
+  // Test for situation illustrated in HBASE-14867
+  @Test
+  public void testMergeOfSecondSmallestRegions() throws HBaseIOException {
+    TableName testTable = TableName.valueOf("testMergeOfSmallRegions");
+    List<HRegionInfo> hris = new ArrayList<>();
+    Map<byte[], Integer> regionSizes = new HashMap<>();
+
+    HRegionInfo hri1 = new HRegionInfo(testTable, Bytes.toBytes("aaa"), Bytes.toBytes("bbb"));
+    hris.add(hri1);
+    regionSizes.put(hri1.getRegionName(), 1);
+
+    HRegionInfo hri2 = new HRegionInfo(testTable, Bytes.toBytes("bbb"), Bytes.toBytes("ccc"));
+    hris.add(hri2);
+    regionSizes.put(hri2.getRegionName(), 10000);
+
+    HRegionInfo hri3 = new HRegionInfo(testTable, Bytes.toBytes("ccc"), Bytes.toBytes("ddd"));
+    hris.add(hri3);
+    regionSizes.put(hri3.getRegionName(), 10000);
+
+    HRegionInfo hri4 = new HRegionInfo(testTable, Bytes.toBytes("ddd"), Bytes.toBytes("eee"));
+    hris.add(hri4);
+    regionSizes.put(hri4.getRegionName(), 10000);
+
+    HRegionInfo hri5 = new HRegionInfo(testTable, Bytes.toBytes("eee"), Bytes.toBytes("fff"));
+    hris.add(hri5);
+    regionSizes.put(hri5.getRegionName(), 2700);
+
+    HRegionInfo hri6 = new HRegionInfo(testTable, Bytes.toBytes("fff"), Bytes.toBytes("ggg"));
+    hris.add(hri6);
+    regionSizes.put(hri6.getRegionName(), 2700);
+
+    setupMocksForNormalizer(regionSizes, hris);
+    NormalizationPlan plan = normalizer.computePlanForTable(testTable);
+
+    assertTrue(plan instanceof MergeNormalizationPlan);
+    assertEquals(hri5, ((MergeNormalizationPlan) plan).getFirstRegion());
+    assertEquals(hri6, ((MergeNormalizationPlan) plan).getSecondRegion());
+  }
+
   @Test
   public void testMergeOfSmallNonAdjacentRegions() throws HBaseIOException {
     TableName testTable = TableName.valueOf("testMergeOfSmallRegions");