You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hbase.apache.org by zh...@apache.org on 2021/04/16 05:41:00 UTC
[hbase] branch branch-2 updated: HBASE-25775 Use a special balancer
to deal with maintenance mode (#3161)
This is an automated email from the ASF dual-hosted git repository.
zhangduo pushed a commit to branch branch-2
in repository https://gitbox.apache.org/repos/asf/hbase.git
The following commit(s) were added to refs/heads/branch-2 by this push:
new a177fb4 HBASE-25775 Use a special balancer to deal with maintenance mode (#3161)
a177fb4 is described below
commit a177fb4c62e3af40caa1046c52b1390c12022cc7
Author: Duo Zhang <zh...@apache.org>
AuthorDate: Fri Apr 16 09:50:24 2021 +0800
HBASE-25775 Use a special balancer to deal with maintenance mode (#3161)
Signed-off-by: Wellington Chevreuil <wc...@apache.org>
---
.../org/apache/hadoop/hbase/master/HMaster.java | 11 +-
.../hbase/master/balancer/BaseLoadBalancer.java | 51 ++++----
.../master/balancer/MaintenanceLoadBalancer.java | 131 +++++++++++++++++++++
.../hadoop/hbase/master/TestMasterRepairMode.java | 47 ++++----
4 files changed, 192 insertions(+), 48 deletions(-)
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java
index 1089adf..62d77b2 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java
@@ -107,6 +107,7 @@ import org.apache.hadoop.hbase.master.balancer.BalancerChore;
import org.apache.hadoop.hbase.master.balancer.BaseLoadBalancer;
import org.apache.hadoop.hbase.master.balancer.ClusterStatusChore;
import org.apache.hadoop.hbase.master.balancer.LoadBalancerFactory;
+import org.apache.hadoop.hbase.master.balancer.MaintenanceLoadBalancer;
import org.apache.hadoop.hbase.master.cleaner.DirScanPool;
import org.apache.hadoop.hbase.master.cleaner.HFileCleaner;
import org.apache.hadoop.hbase.master.cleaner.LogCleaner;
@@ -670,9 +671,13 @@ public class HMaster extends HRegionServer implements MasterServices {
* Initialize all ZK based system trackers. But do not include {@link RegionServerTracker}, it
* should have already been initialized along with {@link ServerManager}.
*/
- @InterfaceAudience.Private
- protected void initializeZKBasedSystemTrackers()
- throws IOException, InterruptedException, KeeperException, ReplicationException {
+ private void initializeZKBasedSystemTrackers()
+ throws IOException, KeeperException, ReplicationException {
+ if (maintenanceMode) {
+ // in maintenance mode, always use MaintenanceLoadBalancer.
+ conf.setClass(HConstants.HBASE_MASTER_LOADBALANCER_CLASS, MaintenanceLoadBalancer.class,
+ LoadBalancer.class);
+ }
this.balancer = LoadBalancerFactory.getLoadBalancer(conf);
this.loadBalancerTracker = new LoadBalancerTracker(zooKeeper, this);
this.loadBalancerTracker.start();
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/BaseLoadBalancer.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/BaseLoadBalancer.java
index f10f455..d925a79 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/BaseLoadBalancer.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/BaseLoadBalancer.java
@@ -35,6 +35,7 @@ import java.util.NavigableMap;
import java.util.Random;
import java.util.Set;
import java.util.TreeMap;
+import java.util.concurrent.ThreadLocalRandom;
import java.util.function.Predicate;
import java.util.stream.Collectors;
import org.apache.commons.lang3.NotImplementedException;
@@ -1027,7 +1028,6 @@ public abstract class BaseLoadBalancer implements LoadBalancer {
protected float overallSlop;
protected Configuration config = HBaseConfiguration.create();
protected RackManager rackManager;
- private static final Random RANDOM = new Random(System.currentTimeMillis());
private static final Logger LOG = LoggerFactory.getLogger(BaseLoadBalancer.class);
protected MetricsBalancer metricsBalancer = null;
protected ClusterMetrics clusterStatus = null;
@@ -1041,17 +1041,21 @@ public abstract class BaseLoadBalancer implements LoadBalancer {
@Deprecated
protected boolean onlySystemTablesOnMaster;
- protected boolean maintenanceMode;
-
@Override
public void setConf(Configuration conf) {
this.config = conf;
setSlop(conf);
- if (slop < 0) slop = 0;
- else if (slop > 1) slop = 1;
+ if (slop < 0) {
+ slop = 0;
+ } else if (slop > 1) {
+ slop = 1;
+ }
- if (overallSlop < 0) overallSlop = 0;
- else if (overallSlop > 1) overallSlop = 1;
+ if (overallSlop < 0) {
+ overallSlop = 0;
+ } else if (overallSlop > 1) {
+ overallSlop = 1;
+ }
this.onlySystemTablesOnMaster = LoadBalancer.isSystemTablesOnlyOnMaster(this.config);
@@ -1061,8 +1065,7 @@ public abstract class BaseLoadBalancer implements LoadBalancer {
}
this.isByTable = conf.getBoolean(HConstants.HBASE_MASTER_LOADBALANCE_BYTABLE, isByTable);
// Print out base configs. Don't print overallSlop since it for simple balancer exclusively.
- LOG.info("slop={}, systemTablesOnMaster={}",
- this.slop, this.onlySystemTablesOnMaster);
+ LOG.info("slop={}, systemTablesOnMaster={}", this.slop, this.onlySystemTablesOnMaster);
}
protected void setSlop(Configuration conf) {
@@ -1079,8 +1082,7 @@ public abstract class BaseLoadBalancer implements LoadBalancer {
*/
@Deprecated
public boolean shouldBeOnMaster(RegionInfo region) {
- return (this.maintenanceMode || this.onlySystemTablesOnMaster)
- && region.getTable().isSystemTable();
+ return this.onlySystemTablesOnMaster && region.getTable().isSystemTable();
}
/**
@@ -1147,7 +1149,7 @@ public abstract class BaseLoadBalancer implements LoadBalancer {
protected Map<ServerName, List<RegionInfo>> assignMasterSystemRegions(
Collection<RegionInfo> regions, List<ServerName> servers) {
Map<ServerName, List<RegionInfo>> assignments = new TreeMap<>();
- if (this.maintenanceMode || this.onlySystemTablesOnMaster) {
+ if (this.onlySystemTablesOnMaster) {
if (masterServerName != null && servers.contains(masterServerName)) {
assignments.put(masterServerName, new ArrayList<>());
for (RegionInfo region : regions) {
@@ -1181,9 +1183,6 @@ public abstract class BaseLoadBalancer implements LoadBalancer {
if (useRegionFinder) {
this.regionFinder.setServices(masterServices);
}
- if (this.services.isInMaintenanceMode()) {
- this.maintenanceMode = true;
- }
}
@Override
@@ -1298,7 +1297,7 @@ public abstract class BaseLoadBalancer implements LoadBalancer {
/**
* only need assign system table
*/
- if (this.maintenanceMode || regions.isEmpty()) {
+ if (regions.isEmpty()) {
return assignments;
}
@@ -1438,7 +1437,7 @@ public abstract class BaseLoadBalancer implements LoadBalancer {
regions = regions.entrySet().stream().filter(e -> !masterRegions.contains(e.getKey()))
.collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue));
}
- if (this.maintenanceMode || regions.isEmpty()) {
+ if (regions.isEmpty()) {
return assignments;
}
@@ -1585,8 +1584,9 @@ public abstract class BaseLoadBalancer implements LoadBalancer {
final int maxIterations = numServers * 4;
int iterations = 0;
List<ServerName> usedSNs = new ArrayList<>(servers.size());
+ Random rand = ThreadLocalRandom.current();
do {
- int i = RANDOM.nextInt(numServers);
+ int i = rand.nextInt(numServers);
sn = servers.get(i);
if (!usedSNs.contains(sn)) {
usedSNs.add(sn);
@@ -1616,13 +1616,14 @@ public abstract class BaseLoadBalancer implements LoadBalancer {
*/
private void roundRobinAssignment(Cluster cluster, List<RegionInfo> regions,
List<ServerName> servers, Map<ServerName, List<RegionInfo>> assignments) {
+ Random rand = ThreadLocalRandom.current();
List<RegionInfo> unassignedRegions = new ArrayList<>();
int numServers = servers.size();
int numRegions = regions.size();
int max = (int) Math.ceil((float) numRegions / numServers);
int serverIdx = 0;
if (numServers > 1) {
- serverIdx = RANDOM.nextInt(numServers);
+ serverIdx = rand.nextInt(numServers);
}
int regionIdx = 0;
for (int j = 0; j < numServers; j++) {
@@ -1644,17 +1645,17 @@ public abstract class BaseLoadBalancer implements LoadBalancer {
List<RegionInfo> lastFewRegions = new ArrayList<>();
// assign the remaining by going through the list and try to assign to servers one-by-one
- serverIdx = RANDOM.nextInt(numServers);
- OUTER : for (RegionInfo region : unassignedRegions) {
+ serverIdx = rand.nextInt(numServers);
+ for (RegionInfo region : unassignedRegions) {
boolean assigned = false;
- INNER : for (int j = 0; j < numServers; j++) { // try all servers one by one
+ for (int j = 0; j < numServers; j++) { // try all servers one by one
ServerName server = servers.get((j + serverIdx) % numServers);
if (cluster.wouldLowerAvailability(region, server)) {
- continue INNER;
+ continue;
} else {
assignments.computeIfAbsent(server, k -> new ArrayList<>()).add(region);
cluster.doAssignRegion(region, server);
- serverIdx = (j + serverIdx + 1) % numServers; //remain from next server
+ serverIdx = (j + serverIdx + 1) % numServers; // remain from next server
assigned = true;
break;
}
@@ -1666,7 +1667,7 @@ public abstract class BaseLoadBalancer implements LoadBalancer {
// just sprinkle the rest of the regions on random regionservers. The balanceCluster will
// make it optimal later. we can end up with this if numReplicas > numServers.
for (RegionInfo region : lastFewRegions) {
- int i = RANDOM.nextInt(numServers);
+ int i = rand.nextInt(numServers);
ServerName server = servers.get(i);
assignments.computeIfAbsent(server, k -> new ArrayList<>()).add(region);
cluster.doAssignRegion(region, server);
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/MaintenanceLoadBalancer.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/MaintenanceLoadBalancer.java
new file mode 100644
index 0000000..5c25272
--- /dev/null
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/MaintenanceLoadBalancer.java
@@ -0,0 +1,131 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.master.balancer;
+
+import java.io.IOException;
+import java.util.Collection;
+import java.util.Collections;
+import java.util.List;
+import java.util.Map;
+import java.util.stream.Collectors;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.conf.Configured;
+import org.apache.hadoop.hbase.ClusterMetrics;
+import org.apache.hadoop.hbase.ServerName;
+import org.apache.hadoop.hbase.TableName;
+import org.apache.hadoop.hbase.client.RegionInfo;
+import org.apache.hadoop.hbase.master.LoadBalancer;
+import org.apache.hadoop.hbase.master.MasterServices;
+import org.apache.hadoop.hbase.master.RegionPlan;
+import org.apache.yetus.audience.InterfaceAudience;
+
+/**
+ * a balancer which is only used in maintenance mode.
+ */
+@InterfaceAudience.Private
+public class MaintenanceLoadBalancer extends Configured implements LoadBalancer {
+
+ private volatile boolean stopped = false;
+
+ @Override
+ public void stop(String why) {
+ stopped = true;
+ }
+
+ @Override
+ public boolean isStopped() {
+ return stopped;
+ }
+
+ @Override
+ public void setClusterMetrics(ClusterMetrics st) {
+ }
+
+ @Override
+ public void setMasterServices(MasterServices masterServices) {
+ }
+
+ @Override
+ public List<RegionPlan> balanceCluster(
+ Map<TableName, Map<ServerName, List<RegionInfo>>> loadOfAllTable) throws IOException {
+ // do not need to balance in maintenance mode
+ return Collections.emptyList();
+ }
+
+ @Override
+ public List<RegionPlan> balanceTable(TableName tableName,
+ Map<ServerName, List<RegionInfo>> loadOfOneTable) {
+ return Collections.emptyList();
+ }
+
+ private Map<ServerName, List<RegionInfo>> assign(Collection<RegionInfo> regions,
+ List<ServerName> servers) {
+ // should only have 1 region server in maintenance mode
+ assert servers.size() == 1;
+ List<RegionInfo> systemRegions =
+ regions.stream().filter(r -> r.getTable().isSystemTable()).collect(Collectors.toList());
+ if (!systemRegions.isEmpty()) {
+ return Collections.singletonMap(servers.get(0), systemRegions);
+ } else {
+ return Collections.emptyMap();
+ }
+ }
+
+ @Override
+ public Map<ServerName, List<RegionInfo>> roundRobinAssignment(List<RegionInfo> regions,
+ List<ServerName> servers) {
+ return assign(regions, servers);
+ }
+
+ @Override
+ public Map<ServerName, List<RegionInfo>> retainAssignment(Map<RegionInfo, ServerName> regions,
+ List<ServerName> servers) {
+ return assign(regions.keySet(), servers);
+ }
+
+ @Override
+ public ServerName randomAssignment(RegionInfo regionInfo, List<ServerName> servers) {
+ // should only have 1 region server in maintenance mode
+ assert servers.size() == 1;
+ return regionInfo.getTable().isSystemTable() ? servers.get(0) : null;
+ }
+
+ @Override
+ public void initialize() {
+ }
+
+ @Override
+ public void regionOnline(RegionInfo regionInfo, ServerName sn) {
+ }
+
+ @Override
+ public void regionOffline(RegionInfo regionInfo) {
+ }
+
+ @Override
+ public void onConfigurationChange(Configuration conf) {
+ }
+
+ @Override
+ public void postMasterStartupInitialize() {
+ }
+
+ @Override
+ public void updateBalancerStatus(boolean status) {
+ }
+}
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestMasterRepairMode.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestMasterRepairMode.java
index 6e85448..f506816 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestMasterRepairMode.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestMasterRepairMode.java
@@ -18,9 +18,11 @@
package org.apache.hadoop.hbase.master;
import static org.junit.Assert.assertNotNull;
+import static org.junit.Assert.assertThrows;
import static org.junit.Assert.assertTrue;
-import static org.junit.Assert.fail;
+
import java.util.Arrays;
+import java.util.concurrent.TimeUnit;
import java.util.stream.Stream;
import java.util.stream.StreamSupport;
import org.apache.hadoop.conf.Configuration;
@@ -29,7 +31,10 @@ import org.apache.hadoop.hbase.HBaseTestingUtility;
import org.apache.hadoop.hbase.HConstants;
import org.apache.hadoop.hbase.StartMiniClusterOption;
import org.apache.hadoop.hbase.TableName;
+import org.apache.hadoop.hbase.client.AsyncConnection;
+import org.apache.hadoop.hbase.client.AsyncTable;
import org.apache.hadoop.hbase.client.Connection;
+import org.apache.hadoop.hbase.client.ConnectionFactory;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.client.ResultScanner;
@@ -48,12 +53,12 @@ import org.junit.rules.TestName;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
-@Category({MasterTests.class, LargeTests.class})
+@Category({ MasterTests.class, LargeTests.class })
public class TestMasterRepairMode {
@ClassRule
public static final HBaseClassTestRule CLASS_RULE =
- HBaseClassTestRule.forClass(TestMasterRepairMode.class);
+ HBaseClassTestRule.forClass(TestMasterRepairMode.class);
@Rule
public TestName name = new TestName();
@@ -84,16 +89,14 @@ public class TestMasterRepairMode {
public void testNewCluster() throws Exception {
enableMaintenanceMode();
- TEST_UTIL.startMiniCluster(StartMiniClusterOption.builder()
- .numRegionServers(0)
- .numDataNodes(3)
- .build());
+ TEST_UTIL.startMiniCluster(
+ StartMiniClusterOption.builder().numRegionServers(0).numDataNodes(3).build());
Connection conn = TEST_UTIL.getConnection();
assertTrue(conn.getAdmin().isMasterInMaintenanceMode());
try (Table table = conn.getTable(TableName.META_TABLE_NAME);
- ResultScanner scanner = table.getScanner(new Scan())) {
+ ResultScanner scanner = table.getScanner(new Scan())) {
assertNotNull("Could not read meta.", scanner.next());
}
}
@@ -113,25 +116,29 @@ public class TestMasterRepairMode {
LOG.info("Starting master-only");
enableMaintenanceMode();
- TEST_UTIL.startMiniHBaseCluster(StartMiniClusterOption.builder()
- .numRegionServers(0).createRootDir(false).build());
+ TEST_UTIL.startMiniHBaseCluster(
+ StartMiniClusterOption.builder().numRegionServers(0).createRootDir(false).build());
Connection conn = TEST_UTIL.getConnection();
assertTrue(conn.getAdmin().isMasterInMaintenanceMode());
try (Table table = conn.getTable(TableName.META_TABLE_NAME);
- ResultScanner scanner = table.getScanner(HConstants.TABLE_FAMILY);
- Stream<Result> results = StreamSupport.stream(scanner.spliterator(), false)) {
+ ResultScanner scanner = table.getScanner(HConstants.TABLE_FAMILY);
+ Stream<Result> results = StreamSupport.stream(scanner.spliterator(), false)) {
assertTrue("Did not find user table records while reading hbase:meta",
- results.anyMatch(r -> Arrays.equals(r.getRow(), testRepairMode.getName())));
+ results.anyMatch(r -> Arrays.equals(r.getRow(), testRepairMode.getName())));
}
-
- try (Table table = conn.getTable(testRepairMode);
- ResultScanner scanner = table.getScanner(new Scan())) {
- scanner.next();
- fail("Should not be able to access user-space tables in repair mode.");
- } catch (Exception e) {
- // Expected
+ try (AsyncConnection asyncConn =
+ ConnectionFactory.createAsyncConnection(TEST_UTIL.getConfiguration()).get()) {
+ // use async table so we can set the timeout and retry value to let the operation fail fast
+ AsyncTable<?> table = asyncConn.getTableBuilder(testRepairMode)
+ .setScanTimeout(5, TimeUnit.SECONDS).setMaxRetries(2).build();
+ assertThrows("Should not be able to access user-space tables in repair mode.",
+ Exception.class, () -> {
+ try (ResultScanner scanner = table.getScanner(new Scan())) {
+ scanner.next();
+ }
+ });
}
}
}