You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hbase.apache.org by zh...@apache.org on 2022/01/07 15:51:13 UTC
[hbase] branch master updated: HBASE-26643 LoadBalancer should not return empty map (#4005)
This is an automated email from the ASF dual-hosted git repository.
zhangduo pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hbase.git
The following commit(s) were added to refs/heads/master by this push:
new 443bf30 HBASE-26643 LoadBalancer should not return empty map (#4005)
443bf30 is described below
commit 443bf30a5d12f74573447c1262f55009440bc122
Author: Duo Zhang <zh...@apache.org>
AuthorDate: Fri Jan 7 23:50:37 2022 +0800
HBASE-26643 LoadBalancer should not return empty map (#4005)
Signed-off-by: Yulin Niu <ni...@apache.org>
---
.../hbase/master/balancer/BaseLoadBalancer.java | 4 +-
.../procedure/TestCreateTableNoRegionServer.java | 162 +++++++++++++++++++++
2 files changed, 164 insertions(+), 2 deletions(-)
diff --git a/hbase-balancer/src/main/java/org/apache/hadoop/hbase/master/balancer/BaseLoadBalancer.java b/hbase-balancer/src/main/java/org/apache/hadoop/hbase/master/balancer/BaseLoadBalancer.java
index 01cb200..ac5ef44 100644
--- a/hbase-balancer/src/main/java/org/apache/hadoop/hbase/master/balancer/BaseLoadBalancer.java
+++ b/hbase-balancer/src/main/java/org/apache/hadoop/hbase/master/balancer/BaseLoadBalancer.java
@@ -164,7 +164,7 @@ public abstract class BaseLoadBalancer implements LoadBalancer {
int numServers = servers == null ? 0 : servers.size();
if (numServers == 0) {
LOG.warn("Wanted to do round robin assignment but no servers to assign to");
- return Collections.emptyMap();
+ return Collections.singletonMap(BOGUS_SERVER_NAME, new ArrayList<>(regions));
}
// TODO: instead of retainAssignment() and roundRobinAssignment(), we should just run the
@@ -272,7 +272,7 @@ public abstract class BaseLoadBalancer implements LoadBalancer {
int numServers = servers == null ? 0 : servers.size();
if (numServers == 0) {
LOG.warn("Wanted to do retain assignment but no servers to assign to");
- return Collections.emptyMap();
+ return Collections.singletonMap(BOGUS_SERVER_NAME, new ArrayList<>(regions.keySet()));
}
if (numServers == 1) { // Only one server, nothing fancy we can do here
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/procedure/TestCreateTableNoRegionServer.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/procedure/TestCreateTableNoRegionServer.java
new file mode 100644
index 0000000..a2d3aca
--- /dev/null
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/procedure/TestCreateTableNoRegionServer.java
@@ -0,0 +1,162 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.master.procedure;
+
+import java.io.IOException;
+import java.io.UncheckedIOException;
+import java.util.concurrent.CompletableFuture;
+import java.util.concurrent.CountDownLatch;
+import java.util.concurrent.TimeUnit;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hbase.HBaseClassTestRule;
+import org.apache.hadoop.hbase.HBaseTestingUtil;
+import org.apache.hadoop.hbase.StartTestingClusterOption;
+import org.apache.hadoop.hbase.TableName;
+import org.apache.hadoop.hbase.client.AsyncAdmin;
+import org.apache.hadoop.hbase.client.ColumnFamilyDescriptorBuilder;
+import org.apache.hadoop.hbase.client.Put;
+import org.apache.hadoop.hbase.client.Table;
+import org.apache.hadoop.hbase.client.TableDescriptor;
+import org.apache.hadoop.hbase.client.TableDescriptorBuilder;
+import org.apache.hadoop.hbase.master.HMaster;
+import org.apache.hadoop.hbase.master.assignment.AssignmentManager;
+import org.apache.hadoop.hbase.procedure2.Procedure;
+import org.apache.hadoop.hbase.testclassification.MasterTests;
+import org.apache.hadoop.hbase.testclassification.MediumTests;
+import org.apache.hadoop.hbase.util.Bytes;
+import org.apache.hadoop.metrics2.impl.JmxCacheBuster;
+import org.junit.AfterClass;
+import org.junit.BeforeClass;
+import org.junit.ClassRule;
+import org.junit.Test;
+import org.junit.experimental.categories.Category;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProcedureProtos.CreateTableState;
+
+@Category({ MasterTests.class, MediumTests.class })
+public class TestCreateTableNoRegionServer {
+
+ @ClassRule
+ public static final HBaseClassTestRule CLASS_RULE =
+ HBaseClassTestRule.forClass(TestCreateTableNoRegionServer.class);
+
+ private static final Logger LOG = LoggerFactory.getLogger(TestCreateTableNoRegionServer.class);
+
+ private static final HBaseTestingUtil UTIL = new HBaseTestingUtil();
+
+ private static CountDownLatch ARRIVE;
+
+ private static CountDownLatch RESUME;
+
+ public static final class HMasterForTest extends HMaster {
+
+ public HMasterForTest(Configuration conf) throws IOException {
+ super(conf);
+ }
+
+ private boolean isInAssignRegionsState() {
+ try {
+ for (StackTraceElement e : Thread.currentThread().getStackTrace()) {
+ if (e.getClassName().equals(CreateTableProcedure.class.getName()) &&
+ e.getMethodName().equals("executeFromState")) {
+ for (Procedure<?> proc : getProcedures()) {
+ if (proc instanceof CreateTableProcedure && ((CreateTableProcedure) proc)
+ .getCurrentStateId() == CreateTableState.CREATE_TABLE_ASSIGN_REGIONS_VALUE) {
+ return true;
+ }
+ }
+ }
+ }
+ } catch (IOException e) {
+ throw new UncheckedIOException(e);
+ }
+ return false;
+ }
+
+ @Override
+ public AssignmentManager getAssignmentManager() {
+ if (ARRIVE != null && isInAssignRegionsState()) {
+ ARRIVE.countDown();
+ ARRIVE = null;
+ try {
+ RESUME.await();
+ } catch (InterruptedException e) {
+ }
+ }
+ return super.getAssignmentManager();
+ }
+ }
+
+ @BeforeClass
+ public static void setUp() throws Exception {
+ UTIL.startMiniCluster(
+ StartTestingClusterOption.builder().masterClass(HMasterForTest.class).build());
+ // this may cause dead lock if there is no live region server and want to start a new server.
+ // In JmxCacheBuster we will reinitialize the metrics system so it will get some metrics which
+ // will need to access meta, since there is no region server, the request will hang there for a
+ // long time while holding the lock of MetricsSystemImpl, but when start a new region server, we
+ // also need to update metrics in handleReportForDutyResponse, since we are all in the same
+ // process and uses the same metrics instance, we hit dead lock.
+ JmxCacheBuster.stop();
+ }
+
+ @AfterClass
+ public static void tearDown() throws Exception {
+ UTIL.shutdownMiniCluster();
+ }
+
+ @Test
+ public void testCreate() throws Exception {
+ TableDescriptor td = TableDescriptorBuilder.newBuilder(TableName.valueOf("test"))
+ .setColumnFamily(ColumnFamilyDescriptorBuilder.of("f1")).build();
+ AsyncAdmin admin = UTIL.getAsyncConnection().getAdmin();
+ ARRIVE = new CountDownLatch(1);
+ RESUME = new CountDownLatch(1);
+ CompletableFuture<Void> future = admin.createTable(td);
+ ARRIVE.await();
+
+ UTIL.getMiniHBaseCluster().stopRegionServer(0).join();
+
+ // make sure we the region server is done.
+ UTIL.waitFor(30000,
+ () -> UTIL.getMiniHBaseCluster().getMaster().getServerManager().getOnlineServers().isEmpty());
+ RESUME.countDown();
+
+ Thread.sleep(10000);
+ // the procedure should still be in the CREATE_TABLE_ASSIGN_REGIONS state, but here, we just
+ // warn it as it may cause more serious problem later.
+ for (Procedure<?> proc : UTIL.getMiniHBaseCluster().getMaster().getProcedures()) {
+ if (proc instanceof CreateTableProcedure && !proc.isFinished() &&
+ ((CreateTableProcedure) proc)
+ .getCurrentStateId() != CreateTableState.CREATE_TABLE_ASSIGN_REGIONS_VALUE) {
+ LOG.warn("Create table procedure {} assigned regions without a region server!", proc);
+ }
+ }
+ UTIL.getMiniHBaseCluster().startRegionServer();
+ // the creation should finally be done
+ future.get(30, TimeUnit.SECONDS);
+ // make sure we could put to the table
+ try (Table table = UTIL.getConnection().getTableBuilder(td.getTableName(), null)
+ .setOperationTimeout(5000).build()) {
+ table.put(new Put(Bytes.toBytes(0)).addColumn(td.getColumnFamilies()[0].getName(),
+ Bytes.toBytes("q"), Bytes.toBytes(0)));
+ }
+ }
+}