You are viewing a plain text version of this content. The canonical link for it is here.
Posted to issues@hbase.apache.org by GitBox <gi...@apache.org> on 2020/08/28 02:55:19 UTC

[GitHub] [hbase] anoopsjohn commented on a change in pull request #2313: HBASE-24900 Make retain assignment configurable during SCP

anoopsjohn commented on a change in pull request #2313:
URL: https://github.com/apache/hbase/pull/2313#discussion_r478798975



##########
File path: hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/TransitRegionStateProcedure.java
##########
@@ -416,13 +416,8 @@ public void reportTransition(MasterProcedureEnv env, RegionStateNode regionNode,
 
   // Should be called with RegionStateNode locked
   public void serverCrashed(MasterProcedureEnv env, RegionStateNode regionNode,
-      ServerName serverName) throws IOException {
-    // force to assign to a new candidate server
-    // AssignmentManager#regionClosedAbnormally will set region location to null
-    // TODO: the forceNewPlan flag not be persistent so if master crash then the flag will be lost.
-    // But assign to old server is not big deal because it not effect correctness.
-    // See HBASE-23035 for more details.
-    forceNewPlan = true;
+      ServerName serverName, boolean forceNewPlan) throws IOException {
+    this.forceNewPlan = forceNewPlan;

Review comment:
       On a method call, we set a state variable. !  Is that really wanted?  Seems before also this was happening but always set to true.  Now it can change.  I did not see the class how this var being used.  But can we check pls?

##########
File path: hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestRetainAssignmentOnRestart.java
##########
@@ -0,0 +1,155 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.master;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNotEquals;
+import static org.junit.Assert.assertTrue;
+
+import java.util.List;
+import java.util.Map;
+
+import org.apache.hadoop.hbase.HBaseClassTestRule;
+import org.apache.hadoop.hbase.HConstants;
+import org.apache.hadoop.hbase.MiniHBaseCluster;
+import org.apache.hadoop.hbase.ServerName;
+import org.apache.hadoop.hbase.TableName;
+import org.apache.hadoop.hbase.client.RegionInfo;
+import org.apache.hadoop.hbase.master.procedure.ServerCrashProcedure;
+import org.apache.hadoop.hbase.testclassification.MasterTests;
+import org.apache.hadoop.hbase.testclassification.MediumTests;
+import org.apache.hadoop.hbase.util.JVMClusterUtil;
+import org.junit.ClassRule;
+import org.junit.Test;
+import org.junit.experimental.categories.Category;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+@Category({ MasterTests.class, MediumTests.class })
+public class TestRetainAssignmentOnRestart extends AbstractTestRestartCluster {
+
+  @ClassRule
+  public static final HBaseClassTestRule CLASS_RULE =
+      HBaseClassTestRule.forClass(TestRetainAssignmentOnRestart.class);
+
+  private static final Logger LOG = LoggerFactory.getLogger(TestRetainAssignmentOnRestart.class);
+
+  @Override
+  protected boolean splitWALCoordinatedByZk() {
+    return true;
+  }
+
+  /**
+   * This tests retaining assignments on a cluster restart
+   */
+  @Test
+  public void test() throws Exception {
+    // Set Zookeeper based connection registry since we will stop master and start a new master
+    // without populating the underlying config for the connection.
+    UTIL.getConfiguration().set(HConstants.CLIENT_CONNECTION_REGISTRY_IMPL_CONF_KEY,
+      HConstants.ZK_CONNECTION_REGISTRY_CLASS);
+    // Enable retain assignment during ServerCrashProcedure
+    UTIL.getConfiguration().setBoolean(ServerCrashProcedure.MASTER_SCP_RETAIN_ASSIGNMENT, true);
+    UTIL.startMiniCluster(2);
+    // Turn off balancer
+    UTIL.getMiniHBaseCluster().getMaster().getMasterRpcServices().synchronousBalanceSwitch(false);
+    LOG.info("\n\nCreating tables");
+    for (TableName TABLE : TABLES) {
+      UTIL.createTable(TABLE, FAMILY);
+    }
+    for (TableName TABLE : TABLES) {
+      UTIL.waitTableEnabled(TABLE);
+    }
+
+    HMaster master = UTIL.getMiniHBaseCluster().getMaster();
+    UTIL.waitUntilNoRegionsInTransition(60000);
+
+    // We don't have to use SnapshotOfRegionAssignmentFromMeta.
+    // We use it here because AM used to use it to load all user region placements
+    SnapshotOfRegionAssignmentFromMeta snapshot =
+        new SnapshotOfRegionAssignmentFromMeta(master.getConnection());
+    snapshot.initialize();
+    Map<RegionInfo, ServerName> regionToRegionServerMap = snapshot.getRegionToRegionServerMap();
+
+    MiniHBaseCluster cluster = UTIL.getHBaseCluster();
+    List<JVMClusterUtil.RegionServerThread> threads = cluster.getLiveRegionServerThreads();
+    assertEquals(2, threads.size());
+    int[] rsPorts = new int[3];
+    for (int i = 0; i < 2; i++) {
+      rsPorts[i] = threads.get(i).getRegionServer().getServerName().getPort();
+    }
+    rsPorts[2] = cluster.getMaster().getServerName().getPort();
+    for (ServerName serverName : regionToRegionServerMap.values()) {
+      boolean found = false; // Test only, no need to optimize
+      for (int k = 0; k < 3 && !found; k++) {
+        found = serverName.getPort() == rsPorts[k];
+      }
+      assertTrue(found);
+    }
+
+    LOG.info("\n\nShutting down HBase cluster");
+    cluster.stopMaster(0);
+    cluster.shutdown();
+    cluster.waitUntilShutDown();
+
+    LOG.info("\n\nSleeping a bit");
+    Thread.sleep(2000);
+
+    LOG.info("\n\nStarting cluster the second time with the same ports");
+    cluster.getConf().setInt(ServerManager.WAIT_ON_REGIONSERVERS_MINTOSTART, 3);
+    master = cluster.startMaster().getMaster();
+    for (int i = 0; i < 3; i++) {

Review comment:
       Here at restart , making it to 3 RSs from 2??  The value for rsPorts[2] is HM port

##########
File path: hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/ServerCrashProcedure.java
##########
@@ -65,6 +65,14 @@
     implements ServerProcedureInterface {
   private static final Logger LOG = LoggerFactory.getLogger(ServerCrashProcedure.class);
 
+  /**
+   * Configuration parameter to retain the region assignment during ServerCrashProcedure, see

Review comment:
       Can u add few more lines as code level comments here?  What is the pros and cons of setting this. Will be easy for some one coming in here later to understand clearly.

##########
File path: hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestRetainAssignmentOnRestart.java
##########
@@ -0,0 +1,155 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.master;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNotEquals;
+import static org.junit.Assert.assertTrue;
+
+import java.util.List;
+import java.util.Map;
+
+import org.apache.hadoop.hbase.HBaseClassTestRule;
+import org.apache.hadoop.hbase.HConstants;
+import org.apache.hadoop.hbase.MiniHBaseCluster;
+import org.apache.hadoop.hbase.ServerName;
+import org.apache.hadoop.hbase.TableName;
+import org.apache.hadoop.hbase.client.RegionInfo;
+import org.apache.hadoop.hbase.master.procedure.ServerCrashProcedure;
+import org.apache.hadoop.hbase.testclassification.MasterTests;
+import org.apache.hadoop.hbase.testclassification.MediumTests;
+import org.apache.hadoop.hbase.util.JVMClusterUtil;
+import org.junit.ClassRule;
+import org.junit.Test;
+import org.junit.experimental.categories.Category;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+@Category({ MasterTests.class, MediumTests.class })
+public class TestRetainAssignmentOnRestart extends AbstractTestRestartCluster {
+
+  @ClassRule
+  public static final HBaseClassTestRule CLASS_RULE =
+      HBaseClassTestRule.forClass(TestRetainAssignmentOnRestart.class);
+
+  private static final Logger LOG = LoggerFactory.getLogger(TestRetainAssignmentOnRestart.class);
+
+  @Override
+  protected boolean splitWALCoordinatedByZk() {
+    return true;
+  }
+
+  /**
+   * This tests retaining assignments on a cluster restart
+   */
+  @Test
+  public void test() throws Exception {

Review comment:
       Give a better name for the test method?

##########
File path: hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestRetainAssignmentOnRestart.java
##########
@@ -0,0 +1,155 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.master;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNotEquals;
+import static org.junit.Assert.assertTrue;
+
+import java.util.List;
+import java.util.Map;
+
+import org.apache.hadoop.hbase.HBaseClassTestRule;
+import org.apache.hadoop.hbase.HConstants;
+import org.apache.hadoop.hbase.MiniHBaseCluster;
+import org.apache.hadoop.hbase.ServerName;
+import org.apache.hadoop.hbase.TableName;
+import org.apache.hadoop.hbase.client.RegionInfo;
+import org.apache.hadoop.hbase.master.procedure.ServerCrashProcedure;
+import org.apache.hadoop.hbase.testclassification.MasterTests;
+import org.apache.hadoop.hbase.testclassification.MediumTests;
+import org.apache.hadoop.hbase.util.JVMClusterUtil;
+import org.junit.ClassRule;
+import org.junit.Test;
+import org.junit.experimental.categories.Category;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+@Category({ MasterTests.class, MediumTests.class })
+public class TestRetainAssignmentOnRestart extends AbstractTestRestartCluster {

Review comment:
       This tests the cluster restart case.  The config is applicable for single RS down and that getting restarted within the zk session time out.  Then also we will go with retain assign as per the config.  Can we add test for that?




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org