You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hbase.apache.org by hu...@apache.org on 2021/11/03 19:53:36 UTC

[hbase] branch branch-2.4 updated: HBASE-26311 Balancer gets stuck in cohosted replica distribution (#3805)

This is an automated email from the ASF dual-hosted git repository.

huaxiangsun pushed a commit to branch branch-2.4
in repository https://gitbox.apache.org/repos/asf/hbase.git


The following commit(s) were added to refs/heads/branch-2.4 by this push:
     new 8ea5484  HBASE-26311 Balancer gets stuck in cohosted replica distribution (#3805)
8ea5484 is described below

commit 8ea548441c347590baab25538a88126e9523a44a
Author: clarax <cl...@gmail.com>
AuthorDate: Wed Nov 3 12:52:51 2021 -0700

    HBASE-26311 Balancer gets stuck in cohosted replica distribution (#3805)
    
    Signed-off-by: Huaxiang Sun <hu...@apache.org>
---
 .../hbase/master/balancer/BaseLoadBalancer.java    | 59 ++++------------------
 .../hbase/master/balancer/DoubleArrayCost.java     | 23 ++++++---
 .../master/balancer/StochasticLoadBalancer.java    | 39 ++++++++++----
 .../hbase/master/balancer/BalancerTestBase.java    |  1 -
 .../master/balancer/TestBaseLoadBalancer.java      |  2 -
 .../hbase/master/balancer/TestDoubleArrayCost.java |  4 +-
 .../balancer/TestStochasticBalancerJmxMetrics.java |  1 -
 .../balancer/TestStochasticLoadBalancer.java       |  6 +--
 .../TestStochasticLoadBalancerBalanceCluster.java  |  1 -
 ...ochasticLoadBalancerRegionReplicaSameHosts.java |  1 -
 ...ochasticLoadBalancerRegionReplicaWithRacks.java |  1 -
 11 files changed, 61 insertions(+), 77 deletions(-)

diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/BaseLoadBalancer.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/BaseLoadBalancer.java
index 03713c9..e8c6b1a 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/BaseLoadBalancer.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/BaseLoadBalancer.java
@@ -167,12 +167,8 @@ public abstract class BaseLoadBalancer implements LoadBalancer {
     int[]   regionIndexToServerIndex;    //regionIndex -> serverIndex
     int[]   initialRegionIndexToServerIndex;    //regionIndex -> serverIndex (initial cluster state)
     int[]   regionIndexToTableIndex;     //regionIndex -> tableIndex
-    int[][] numRegionsPerServerPerTable; // serverIndex -> tableIndex -> # regions
+    int[][] numRegionsPerServerPerTable; // tableIndex -> serverIndex -> # regions
     int[] numRegionsPerTable; // tableIndex -> region count
-    double[] meanRegionsPerTable; // mean region count per table
-    double[] regionSkewByTable;       // skew on RS per by table
-    double[] minRegionSkewByTable;       // min skew on RS per by table
-    double[] maxRegionSkewByTable;       // max skew on RS per by table
     int[]   regionIndexToPrimaryIndex;   //regionIndex -> regionIndex of the primary
     boolean hasRegionReplicas = false;   //whether there is regions with replicas
 
@@ -382,42 +378,24 @@ public abstract class BaseLoadBalancer implements LoadBalancer {
       }
 
       numTables = tables.size();
-      LOG.debug("Number of tables={}", numTables);
-      numRegionsPerServerPerTable = new int[numServers][numTables];
+      LOG.debug("Number of tables={}, number of hosts={}, number of racks={}", numTables,
+        numHosts, numRacks);
+      numRegionsPerServerPerTable = new int[numTables][numServers];
       numRegionsPerTable = new int[numTables];
 
-      for (int i = 0; i < numServers; i++) {
-        for (int j = 0; j < numTables; j++) {
+      for (int i = 0; i < numTables; i++) {
+        for (int j = 0; j < numServers; j++) {
           numRegionsPerServerPerTable[i][j] = 0;
         }
       }
 
       for (int i=0; i < regionIndexToServerIndex.length; i++) {
         if (regionIndexToServerIndex[i] >= 0) {
-          numRegionsPerServerPerTable[regionIndexToServerIndex[i]][regionIndexToTableIndex[i]]++;
+          numRegionsPerServerPerTable[regionIndexToTableIndex[i]][regionIndexToServerIndex[i]]++;
           numRegionsPerTable[regionIndexToTableIndex[i]]++;
         }
       }
 
-      // Avoid repeated computation for planning
-      meanRegionsPerTable = new double[numTables];
-      regionSkewByTable = new double[numTables];
-      maxRegionSkewByTable  = new double[numTables];
-      minRegionSkewByTable = new double[numTables];
-
-      for (int i = 0; i < numTables; i++) {
-        meanRegionsPerTable[i] = Double.valueOf(numRegionsPerTable[i]) / numServers;
-        minRegionSkewByTable[i] += DoubleArrayCost.getMinSkew(numRegionsPerTable[i], numServers);
-        maxRegionSkewByTable[i] += DoubleArrayCost.getMaxSkew(numRegionsPerTable[i], numServers);
-      }
-
-      for (int[] aNumRegionsPerServerPerTable : numRegionsPerServerPerTable) {
-        for (int tableIdx = 0; tableIdx < aNumRegionsPerServerPerTable.length; tableIdx++) {
-          regionSkewByTable[tableIdx] +=
-            Math.abs(aNumRegionsPerServerPerTable[tableIdx] - meanRegionsPerTable[tableIdx]);
-        }
-      }
-
       for (int i = 0; i < regions.length; i ++) {
         RegionInfo info = regions[i];
         if (RegionReplicaUtil.isDefaultReplica(info)) {
@@ -853,15 +831,9 @@ public abstract class BaseLoadBalancer implements LoadBalancer {
       }
       int tableIndex = regionIndexToTableIndex[region];
       if (oldServer >= 0) {
-        numRegionsPerServerPerTable[oldServer][tableIndex]--;
-        // update regionSkewPerTable for the move from old server
-        regionSkewByTable[tableIndex] += getSkewChangeFor(oldServer, tableIndex, -1);
+        numRegionsPerServerPerTable[tableIndex][oldServer]--;
       }
-      numRegionsPerServerPerTable[newServer][tableIndex]++;
-
-      // update regionSkewPerTable for the move to new server
-      regionSkewByTable[tableIndex] += getSkewChangeFor(newServer, tableIndex, 1);
-
+      numRegionsPerServerPerTable[tableIndex][newServer]++;
       // update for servers
       int primary = regionIndexToPrimaryIndex[region];
       if (oldServer >= 0) {
@@ -1034,20 +1006,11 @@ public abstract class BaseLoadBalancer implements LoadBalancer {
           .append(Arrays.toString(serverIndicesSortedByRegionCount))
           .append(", regionsPerServer=").append(Arrays.deepToString(regionsPerServer));
 
-      desc.append(", regionSkewByTable=").append(Arrays.toString(regionSkewByTable))
-          .append(", numRegions=").append(numRegions).append(", numServers=").append(numServers)
-          .append(", numTables=").append(numTables).append(", numMovedRegions=")
+      desc.append(", numRegions=").append(numRegions).append(", numServers=").append(numServers)
+        .append(", numTables=").append(numTables).append(", numMovedRegions=")
           .append(numMovedRegions).append('}');
       return desc.toString();
     }
-
-    private double getSkewChangeFor(int serverIndex, int tableIndex, double regionCountChange) {
-      double curSkew = Math.abs(numRegionsPerServerPerTable[serverIndex][tableIndex] -
-        meanRegionsPerTable[tableIndex]);
-      double oldSkew = Math.abs(numRegionsPerServerPerTable[serverIndex][tableIndex] -
-        regionCountChange - meanRegionsPerTable[tableIndex]);
-      return curSkew - oldSkew;
-    }
   }
 
   // slop for regions
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/DoubleArrayCost.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/DoubleArrayCost.java
index 93a20b9..17e3fc7 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/DoubleArrayCost.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/DoubleArrayCost.java
@@ -66,6 +66,9 @@ final class DoubleArrayCost {
   }
 
   private static double computeCost(double[] stats) {
+    if (stats == null || stats.length == 0) {
+      return 0;
+    }
     double totalCost = 0;
     double total = getSum(stats);
 
@@ -74,10 +77,11 @@ final class DoubleArrayCost {
 
     for (int i = 0; i < stats.length; i++) {
       double n = stats[i];
-      double diff = Math.abs(mean - n);
+      double diff = (mean - n) * (mean - n);
       totalCost += diff;
     }
-
+    // No need to compute standard deviation with division by cluster size when scaling.
+    totalCost = Math.sqrt(totalCost);
     return StochasticLoadBalancer.scale(getMinSkew(total, count),
       getMaxSkew(total, count), totalCost);
   }
@@ -95,18 +99,22 @@ final class DoubleArrayCost {
    * @param total is total number of regions
    */
   public static double getMinSkew(double total, double numServers) {
+    if (numServers == 0) {
+      return 0;
+    }
     double mean = total / numServers;
     // It's possible that there aren't enough regions to go around
     double min;
     if (numServers > total) {
-      min = ((numServers - total) * mean + (1 - mean) * total) ;
+      min = ((numServers - total) * mean * mean + (1 - mean) * (1 - mean) * total);
     } else {
       // Some will have 1 more than everything else.
       int numHigh = (int) (total - (Math.floor(mean) * numServers));
       int numLow = (int) (numServers - numHigh);
-      min = numHigh * (Math.ceil(mean) - mean) + numLow * (mean - Math.floor(mean));
+      min = numHigh * (Math.ceil(mean) - mean) * (Math.ceil(mean) - mean) +
+        numLow * (mean - Math.floor(mean)) * (mean - Math.floor(mean));
     }
-    return min;
+    return Math.sqrt(min);
   }
 
   /**
@@ -116,7 +124,10 @@ final class DoubleArrayCost {
    * @param total is total number of regions
    */
   public static double getMaxSkew(double total, double numServers) {
+    if (numServers == 0) {
+      return 0;
+    }
     double mean = total / numServers;
-    return (total - mean) + (numServers - 1) * mean;
+    return Math.sqrt((total - mean) * (total - mean) + (numServers - 1) * mean * mean);
   }
 }
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/StochasticLoadBalancer.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/StochasticLoadBalancer.java
index d58bdf0..b874b54 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/StochasticLoadBalancer.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/StochasticLoadBalancer.java
@@ -884,7 +884,7 @@ public class StochasticLoadBalancer extends BaseLoadBalancer {
     static final float DEFAULT_MOVE_COST = 7;
     static final float DEFAULT_MOVE_COST_OFFPEAK = 3;
     private static final int DEFAULT_MAX_MOVES = 600;
-    private static final float DEFAULT_MAX_MOVE_PERCENT = 0.25f;
+    private static final float DEFAULT_MAX_MOVE_PERCENT = 1.0f;
 
     private final float maxMovesPercent;
     private final OffPeakHours offPeakHours;
@@ -959,14 +959,6 @@ public class StochasticLoadBalancer extends BaseLoadBalancer {
           costs[i] = cluster.regionsPerServer[i].length;
         }
       });
-      LOG.debug("{} sees a total of {} servers and {} regions.", getClass().getSimpleName(),
-        cluster.numServers, cluster.numRegions);
-      if (LOG.isTraceEnabled()) {
-        for (int i =0; i < cluster.numServers; i++) {
-          LOG.trace("{} sees server '{}' has {} regions", getClass().getSimpleName(),
-              cluster.servers[i], cluster.regionsPerServer[i].length);
-        }
-      }
     }
 
     @Override
@@ -1052,18 +1044,43 @@ public class StochasticLoadBalancer extends BaseLoadBalancer {
     private static final String TABLE_SKEW_COST_KEY =
         "hbase.master.balancer.stochastic.tableSkewCost";
     private static final float DEFAULT_TABLE_SKEW_COST = 35;
+    DoubleArrayCost[] costsPerTable;
 
     TableSkewCostFunction(Configuration conf) {
       super(conf);
       this.setMultiplier(conf.getFloat(TABLE_SKEW_COST_KEY, DEFAULT_TABLE_SKEW_COST));
     }
+    @Override
+    void init(Cluster cluster) {
+      super.init(cluster);
+      costsPerTable = new DoubleArrayCost[cluster.numTables];
+      for (int tableIdx = 0; tableIdx < cluster.numTables; tableIdx++) {
+        costsPerTable[tableIdx] = new DoubleArrayCost();
+        costsPerTable[tableIdx].prepare(cluster.numServers);
+        final int tableIndex = tableIdx;
+        costsPerTable[tableIdx].applyCostsChange(costs -> {
+          // Keep a cached deep copy for change-only recomputation
+          for (int i = 0; i < cluster.numServers; i++) {
+            costs[i] = cluster.numRegionsPerServerPerTable[tableIndex][i];
+          }
+        });
+      }
+    }
+
+    @Override
+    protected void regionMoved(int region, int oldServer, int newServer) {
+      int tableIdx = cluster.regionIndexToTableIndex[region];
+      costsPerTable[tableIdx].applyCostsChange(costs -> {
+        costs[oldServer] = cluster.numRegionsPerServerPerTable[tableIdx][oldServer];
+        costs[newServer] = cluster.numRegionsPerServerPerTable[tableIdx][newServer];
+      });
+    }
 
     @Override
     protected double cost() {
       double cost = 0;
       for (int tableIdx = 0; tableIdx < cluster.numTables; tableIdx++) {
-        cost += scale(cluster.minRegionSkewByTable[tableIdx],
-          cluster.maxRegionSkewByTable[tableIdx], cluster.regionSkewByTable[tableIdx]);
+        cost += costsPerTable[tableIdx].cost();
       }
       return cost;
     }
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/balancer/BalancerTestBase.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/balancer/BalancerTestBase.java
index a4823ae..3fdcf57 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/balancer/BalancerTestBase.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/balancer/BalancerTestBase.java
@@ -69,7 +69,6 @@ public class BalancerTestBase {
   public static void beforeAllTests() throws Exception {
     conf = HBaseConfiguration.create();
     conf.setClass("hbase.util.ip.to.rack.determiner", MockMapping.class, DNSToSwitchMapping.class);
-    conf.setFloat("hbase.master.balancer.stochastic.maxMovePercent", 0.75f);
     conf.setFloat("hbase.regions.slop", 0.0f);
     conf.setFloat("hbase.master.balancer.stochastic.localityCost", 0);
     loadBalancer = new StochasticLoadBalancer();
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/balancer/TestBaseLoadBalancer.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/balancer/TestBaseLoadBalancer.java
index 0621116..a2d2643 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/balancer/TestBaseLoadBalancer.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/balancer/TestBaseLoadBalancer.java
@@ -389,8 +389,6 @@ public class TestBaseLoadBalancer extends BalancerTestBase {
 
     // now move region1 from servers[0] to servers[2]
     cluster.doAction(new MoveRegionAction(0, 0, 2));
-    // check that the regionSkewByTable for "table" has increased to 2
-    assertEquals(2, cluster.regionSkewByTable[0], 0.01);
     // now repeat check whether moving region1 from servers[1] to servers[2]
     // would lower availability
     assertTrue(cluster.wouldLowerAvailability(hri1, servers[2]));
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/balancer/TestDoubleArrayCost.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/balancer/TestDoubleArrayCost.java
index f0b8db8..38834a8 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/balancer/TestDoubleArrayCost.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/balancer/TestDoubleArrayCost.java
@@ -62,6 +62,6 @@ public class TestDoubleArrayCost {
       }
       costs[100] = 100;
     });
-    assertEquals(0.5, cost.cost(), 0.01);
+    assertEquals(0.0708, cost.cost(), 0.01);
   }
-}
\ No newline at end of file
+}
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/balancer/TestStochasticBalancerJmxMetrics.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/balancer/TestStochasticBalancerJmxMetrics.java
index ab1c76c..f85bd9e 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/balancer/TestStochasticBalancerJmxMetrics.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/balancer/TestStochasticBalancerJmxMetrics.java
@@ -94,7 +94,6 @@ public class TestStochasticBalancerJmxMetrics extends BalancerTestBase {
     conf = UTIL.getConfiguration();
 
     conf.setClass("hbase.util.ip.to.rack.determiner", MockMapping.class, DNSToSwitchMapping.class);
-    conf.setFloat("hbase.master.balancer.stochastic.maxMovePercent", 0.75f);
     conf.setFloat("hbase.regions.slop", 0.0f);
     conf.set(CoprocessorHost.REGIONSERVER_COPROCESSOR_CONF_KEY, JMXListener.class.getName());
     Random rand = new Random();
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/balancer/TestStochasticLoadBalancer.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/balancer/TestStochasticLoadBalancer.java
index ea65f96..369ac3e 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/balancer/TestStochasticLoadBalancer.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/balancer/TestStochasticLoadBalancer.java
@@ -263,13 +263,13 @@ public class TestStochasticLoadBalancer extends BalancerTestBase {
       cluster.setNumRegions(10000);
       cluster.setNumMovedRegions(250);
       cost = costFunction.cost();
-      assertEquals(0.1f, cost, 0.001);
+      assertEquals(0.025f, cost, 0.001);
       cluster.setNumMovedRegions(1250);
       cost = costFunction.cost();
-      assertEquals(0.5f, cost, 0.001);
+      assertEquals(0.125f, cost, 0.001);
       cluster.setNumMovedRegions(2500);
       cost = costFunction.cost();
-      assertEquals(1.0f, cost, 0.01);
+      assertEquals(0.25f, cost, 0.01);
     }
   }
 
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/balancer/TestStochasticLoadBalancerBalanceCluster.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/balancer/TestStochasticLoadBalancerBalanceCluster.java
index 48a996e..125eda3 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/balancer/TestStochasticLoadBalancerBalanceCluster.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/balancer/TestStochasticLoadBalancerBalanceCluster.java
@@ -52,7 +52,6 @@ public class TestStochasticLoadBalancerBalanceCluster extends BalancerTestBase {
   @Test
   public void testBalanceCluster() throws Exception {
     conf.setLong("hbase.master.balancer.stochastic.maxRunningTime", 3 * 60 * 1000); // 300 sec
-    conf.setFloat("hbase.master.balancer.stochastic.maxMovePercent", 1.0f);
     conf.setLong(StochasticLoadBalancer.MAX_STEPS_KEY, 20000000L);
     loadBalancer.setConf(conf);
     for (int[] mockCluster : clusterStateMocks) {
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/balancer/TestStochasticLoadBalancerRegionReplicaSameHosts.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/balancer/TestStochasticLoadBalancerRegionReplicaSameHosts.java
index 470230d..3a3d701 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/balancer/TestStochasticLoadBalancerRegionReplicaSameHosts.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/balancer/TestStochasticLoadBalancerRegionReplicaSameHosts.java
@@ -41,7 +41,6 @@ public class TestStochasticLoadBalancerRegionReplicaSameHosts extends BalancerTe
   public void testRegionReplicationOnMidClusterSameHosts() {
     conf.setLong(StochasticLoadBalancer.MAX_STEPS_KEY, 2000000L);
     conf.setLong("hbase.master.balancer.stochastic.maxRunningTime", 90 * 1000); // 90 sec
-    conf.setFloat("hbase.master.balancer.stochastic.maxMovePercent", 1.0f);
     loadBalancer.setConf(conf);
     int numHosts = 30;
     int numRegions = 30 * 30;
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/balancer/TestStochasticLoadBalancerRegionReplicaWithRacks.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/balancer/TestStochasticLoadBalancerRegionReplicaWithRacks.java
index 648d860..c1291a9 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/balancer/TestStochasticLoadBalancerRegionReplicaWithRacks.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/balancer/TestStochasticLoadBalancerRegionReplicaWithRacks.java
@@ -58,7 +58,6 @@ public class TestStochasticLoadBalancerRegionReplicaWithRacks extends BalancerTe
 
   @Test
   public void testRegionReplicationOnMidClusterWithRacks() {
-    conf.setLong(StochasticLoadBalancer.MAX_STEPS_KEY, 100000000L);
     conf.setBoolean("hbase.master.balancer.stochastic.runMaxSteps", true);
     conf.setFloat("hbase.master.balancer.stochastic.maxMovePercent", 1.0f);
     conf.setLong("hbase.master.balancer.stochastic.maxRunningTime", 120 * 1000); // 120 sec