You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hbase.apache.org by ch...@apache.org on 2022/02/18 03:00:09 UTC

[hbase] branch branch-2.4 updated: HBASE-26712 Balancer encounters NPE in rare case (#4117)

This is an automated email from the ASF dual-hosted git repository.

chenglei pushed a commit to branch branch-2.4
in repository https://gitbox.apache.org/repos/asf/hbase.git


The following commit(s) were added to refs/heads/branch-2.4 by this push:
     new 9999f87  HBASE-26712 Balancer encounters NPE in rare case (#4117)
9999f87 is described below

commit 9999f875c92e7ddac8780d31e217058c50a445d9
Author: chenglei <ch...@apache.org>
AuthorDate: Fri Feb 18 10:59:29 2022 +0800

    HBASE-26712 Balancer encounters NPE in rare case (#4117)
---
 .../org/apache/hadoop/hbase/master/HMaster.java    |  24 ++-
 .../hbase/master/assignment/AssignmentManager.java |   4 +-
 .../hadoop/hbase/master/TestMasterBalancerNPE.java | 212 +++++++++++++++++++++
 3 files changed, 237 insertions(+), 3 deletions(-)

diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java
index 9e7942b..bb8c4ef 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java
@@ -22,6 +22,7 @@ import static org.apache.hadoop.hbase.HConstants.HBASE_MASTER_LOGCLEANER_PLUGINS
 import static org.apache.hadoop.hbase.HConstants.HBASE_SPLIT_WAL_COORDINATED_BY_ZK;
 import static org.apache.hadoop.hbase.util.DNS.MASTER_HOSTNAME_KEY;
 
+import com.google.errorprone.annotations.RestrictedApi;
 import com.google.protobuf.Descriptors;
 import com.google.protobuf.Service;
 import java.io.IOException;
@@ -332,6 +333,7 @@ public class HMaster extends HRegionServer implements MasterServices {
 
   private LoadBalancer balancer;
   private BalancerChore balancerChore;
+  private static boolean disableBalancerChoreForTest = false;
   private RegionNormalizerManager regionNormalizerManager;
   private ClusterStatusChore clusterStatusChore;
   private ClusterStatusPublisher clusterStatusPublisherChore = null;
@@ -1027,7 +1029,9 @@ public class HMaster extends HRegionServer implements MasterServices {
     this.clusterStatusChore = new ClusterStatusChore(this, balancer);
     getChoreService().scheduleChore(clusterStatusChore);
     this.balancerChore = new BalancerChore(this);
-    getChoreService().scheduleChore(balancerChore);
+    if (!disableBalancerChoreForTest) {
+      getChoreService().scheduleChore(balancerChore);
+    }
     if (regionNormalizerManager != null) {
       getChoreService().scheduleChore(regionNormalizerManager.getRegionNormalizerChore());
     }
@@ -3826,4 +3830,22 @@ public class HMaster extends HRegionServer implements MasterServices {
   public MetaLocationSyncer getMetaLocationSyncer() {
     return metaLocationSyncer;
   }
+
+  @RestrictedApi(explanation = "Should only be called in tests", link = "",
+      allowedOnPath = ".*/src/test/.*")
+  void setLoadBalancer(LoadBalancer loadBalancer) {
+    this.balancer = loadBalancer;
+  }
+
+  @RestrictedApi(explanation = "Should only be called in tests", link = "",
+      allowedOnPath = ".*/src/test/.*")
+  void setAssignmentManager(AssignmentManager assignmentManager) {
+    this.assignmentManager = assignmentManager;
+  }
+
+  @RestrictedApi(explanation = "Should only be called in tests", link = "",
+      allowedOnPath = ".*/src/test/.*")
+  static void setDisableBalancerChoreForTest(boolean disable) {
+    disableBalancerChoreForTest = disable;
+  }
 }
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/AssignmentManager.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/AssignmentManager.java
index 0bb05e3..ba56ef5 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/AssignmentManager.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/AssignmentManager.java
@@ -755,9 +755,9 @@ public class AssignmentManager {
   public Future<byte[]> balance(RegionPlan regionPlan) throws HBaseIOException {
     ServerName current =
       this.getRegionStates().getRegionAssignments().get(regionPlan.getRegionInfo());
-    if (!current.equals(regionPlan.getSource())) {
+    if (current == null || !current.equals(regionPlan.getSource())) {
       LOG.debug("Skip region plan {}, source server not match, current region location is {}",
-        regionPlan, current);
+        regionPlan, current == null ? "(null)" : current);
       return null;
     }
     return moveAsync(regionPlan);
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestMasterBalancerNPE.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestMasterBalancerNPE.java
new file mode 100644
index 0000000..c651672
--- /dev/null
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestMasterBalancerNPE.java
@@ -0,0 +1,212 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.master;
+
+import static org.junit.Assert.assertTrue;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+import java.util.Map;
+import java.util.concurrent.CyclicBarrier;
+import java.util.concurrent.atomic.AtomicReference;
+
+import org.apache.hadoop.hbase.HBaseClassTestRule;
+import org.apache.hadoop.hbase.HBaseTestingUtility;
+import org.apache.hadoop.hbase.HConstants;
+import org.apache.hadoop.hbase.ServerName;
+import org.apache.hadoop.hbase.TableName;
+import org.apache.hadoop.hbase.client.RegionInfo;
+import org.apache.hadoop.hbase.master.assignment.AssignmentManager;
+import org.apache.hadoop.hbase.master.balancer.StochasticLoadBalancer;
+import org.apache.hadoop.hbase.testclassification.MasterTests;
+import org.apache.hadoop.hbase.testclassification.MediumTests;
+import org.apache.hadoop.hbase.util.Bytes;
+import org.junit.After;
+import org.junit.Before;
+import org.junit.ClassRule;
+import org.junit.Rule;
+import org.junit.Test;
+import org.junit.experimental.categories.Category;
+import org.junit.rules.TestName;
+import org.mockito.Mockito;
+import org.mockito.invocation.InvocationOnMock;
+
+@Category({ MasterTests.class, MediumTests.class })
+public class TestMasterBalancerNPE {
+
+  @ClassRule
+  public static final HBaseClassTestRule CLASS_RULE =
+      HBaseClassTestRule.forClass(TestMasterBalancerNPE.class);
+
+  private static final HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility();
+  private static final byte[] FAMILYNAME = Bytes.toBytes("fam");
+  @Rule
+  public TestName name = new TestName();
+
+  @Before
+  public void setupConfiguration() {
+    /**
+     * Make {@link BalancerChore} not run,so does not disrupt the test.
+     */
+    HMaster.setDisableBalancerChoreForTest(true);
+  }
+
+  @After
+  public void shutdown() throws Exception {
+    HMaster.setDisableBalancerChoreForTest(false);
+    TEST_UTIL.shutdownMiniCluster();
+  }
+
+  /**
+   * This test is for HBASE-26712, to make the region is unassigned just before
+   * {@link AssignmentManager#balance} is invoked on the region.
+   */
+  @Test
+  public void testBalancerNPE() throws Exception {
+    TEST_UTIL.startMiniCluster(2);
+    TEST_UTIL.getAdmin().balancerSwitch(false, true);
+    TableName tableName = createTable(name.getMethodName());
+    final HMaster master = TEST_UTIL.getHBaseCluster().getMaster();
+    List<RegionInfo> regionInfos = TEST_UTIL.getAdmin().getRegions(tableName);
+    assertTrue(regionInfos.size() == 1);
+    final ServerName serverName1 =
+        TEST_UTIL.getMiniHBaseCluster().getRegionServer(0).getServerName();
+    final ServerName serverName2 =
+        TEST_UTIL.getMiniHBaseCluster().getRegionServer(1).getServerName();
+
+    final RegionInfo regionInfo = regionInfos.get(0);
+
+    StochasticLoadBalancer loadBalancer = (StochasticLoadBalancer)master.getLoadBalancer();
+    StochasticLoadBalancer spiedLoadBalancer = Mockito.spy(loadBalancer);
+    final AtomicReference<RegionPlan> regionPlanRef = new AtomicReference<RegionPlan>();
+
+    /**
+     * Mock {@link StochasticLoadBalancer#balanceTable} to return the {@link RegionPlan} to move the
+     * only region to the other RegionServer.
+     */
+    Mockito.doAnswer((InvocationOnMock invocation) -> {
+      @SuppressWarnings("unchecked")
+      Map<ServerName, List<RegionInfo>> regionServerNameToRegionInfos =
+          (Map<ServerName, List<RegionInfo>>) invocation.getArgument(1);
+
+
+      List<ServerName> assignedRegionServerNames = new ArrayList<ServerName>();
+      for (Map.Entry<ServerName, List<RegionInfo>> entry : regionServerNameToRegionInfos
+          .entrySet()) {
+        if (entry.getValue()!= null) {
+          entry.getValue().stream().forEach((reginInfo) -> {
+            if (reginInfo.getTable().equals(tableName)) {
+              assignedRegionServerNames.add(entry.getKey());
+            }
+          });
+        }
+      }
+      assertTrue(assignedRegionServerNames.size() == 1);
+      ServerName assignedRegionServerName = assignedRegionServerNames.get(0);
+      ServerName notAssignedRegionServerName =
+          assignedRegionServerName.equals(serverName1) ? serverName2 : serverName1;
+      RegionPlan regionPlan =
+          new RegionPlan(regionInfo, assignedRegionServerName, notAssignedRegionServerName);
+      regionPlanRef.set(regionPlan);
+      return Arrays.asList(regionPlan);
+    }).when(spiedLoadBalancer).balanceTable(Mockito.eq(HConstants.ENSEMBLE_TABLE_NAME),
+      Mockito.anyMap());
+
+    AssignmentManager assignmentManager = master.getAssignmentManager();
+    final AssignmentManager spiedAssignmentManager = Mockito.spy(assignmentManager);
+    final CyclicBarrier cyclicBarrier = new CyclicBarrier(2);
+
+    /**
+     * Override {@link AssignmentManager#balance} to invoke real {@link AssignmentManager#balance}
+     * after the region is successfully unassigned.
+     */
+    Mockito.doAnswer((InvocationOnMock invocation) -> {
+      RegionPlan regionPlan = invocation.getArgument(0);
+      RegionPlan referedRegionPlan = regionPlanRef.get();
+      assertTrue(referedRegionPlan != null);
+      if (referedRegionPlan.equals(regionPlan)) {
+        /**
+         * To make {@link AssignmentManager#unassign} could be invoked just before
+         * {@link AssignmentManager#balance} is invoked.
+         */
+        cyclicBarrier.await();
+        /**
+         * After {@link AssignmentManager#unassign} is completed,we could invoke
+         * {@link AssignmentManager#balance}.
+         */
+        cyclicBarrier.await();
+      }
+      /**
+       * Before HBASE-26712,here may throw NPE.
+       */
+      return invocation.callRealMethod();
+    }).when(spiedAssignmentManager).balance(Mockito.any());
+
+
+    try {
+      final AtomicReference<Throwable> exceptionRef = new AtomicReference<Throwable>(null);
+      Thread unassignThread = new Thread(() -> {
+        try {
+          /**
+           * To invoke {@link AssignmentManager#unassign} just before
+           * {@link AssignmentManager#balance} is invoked.
+           */
+          cyclicBarrier.await();
+          spiedAssignmentManager.unassign(regionInfo);
+          assertTrue(spiedAssignmentManager.getRegionStates().getRegionAssignments()
+              .get(regionInfo) == null);
+          /**
+           * After {@link AssignmentManager#unassign} is completed,we could invoke
+           * {@link AssignmentManager#balance}.
+           */
+          cyclicBarrier.await();
+        } catch (Exception e) {
+          exceptionRef.set(e);
+        }
+      });
+      unassignThread.setName("UnassignThread");
+      unassignThread.start();
+
+      master.setLoadBalancer(spiedLoadBalancer);
+      master.setAssignmentManager(spiedAssignmentManager);
+      /**
+       * enable balance
+       */
+      TEST_UTIL.getAdmin().balancerSwitch(true, false);
+      /**
+       * Before HBASE-26712,here invokes {@link AssignmentManager#balance(RegionPlan)}
+       * which may throw NPE.
+       */
+      master.balance();
+
+      unassignThread.join();
+      assertTrue(exceptionRef.get() == null);
+    } finally {
+      master.setLoadBalancer(loadBalancer);
+      master.setAssignmentManager(assignmentManager);
+    }
+  }
+
+  private TableName createTable(String table) throws IOException {
+    TableName tableName = TableName.valueOf(table);
+    TEST_UTIL.createTable(tableName, FAMILYNAME);
+    return tableName;
+  }
+}