You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hbase.apache.org by vj...@apache.org on 2020/06/10 07:44:15 UTC
[hbase] branch branch-2.3 updated: HBASE-24511 Ability to configure
timeout between RPC retry to RS from master (#1861)
This is an automated email from the ASF dual-hosted git repository.
vjasani pushed a commit to branch branch-2.3
in repository https://gitbox.apache.org/repos/asf/hbase.git
The following commit(s) were added to refs/heads/branch-2.3 by this push:
new 5989de4 HBASE-24511 Ability to configure timeout between RPC retry to RS from master (#1861)
5989de4 is described below
commit 5989de440220c7859fa1e89f4e27bcfb2b8a071e
Author: sguggilam <sa...@gmail.com>
AuthorDate: Wed Jun 10 00:23:08 2020 -0700
HBASE-24511 Ability to configure timeout between RPC retry to RS from master (#1861)
Signed-off-by: Viraj Jasani <vj...@apache.org>
---
.../master/procedure/RSProcedureDispatcher.java | 17 +++++++---
.../TestRegionServerReportForDuty.java | 38 +++++++++++++++++++++-
2 files changed, 50 insertions(+), 5 deletions(-)
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/RSProcedureDispatcher.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/RSProcedureDispatcher.java
index 74e00f5..3b4ee4c 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/RSProcedureDispatcher.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/RSProcedureDispatcher.java
@@ -235,12 +235,19 @@ public class RSProcedureDispatcher
private int numberOfAttemptsSoFar = 0;
private long maxWaitTime = -1;
+ private final long rsRpcRetryInterval;
+ private static final String RS_RPC_RETRY_INTERVAL_CONF_KEY =
+ "hbase.regionserver.rpc.retry.interval";
+ private static final int DEFAULT_RS_RPC_RETRY_INTERVAL = 100;
+
private ExecuteProceduresRequest.Builder request = null;
public ExecuteProceduresRemoteCall(final ServerName serverName,
final Set<RemoteProcedure> remoteProcedures) {
this.serverName = serverName;
this.remoteProcedures = remoteProcedures;
+ this.rsRpcRetryInterval = master.getConfiguration().getLong(RS_RPC_RETRY_INTERVAL_CONF_KEY,
+ DEFAULT_RS_RPC_RETRY_INTERVAL);
}
private AdminService.BlockingInterface getRsAdmin() throws IOException {
@@ -265,8 +272,8 @@ public class RSProcedureDispatcher
LOG.warn("Waiting a little before retrying {}, try={}, can wait up to {}ms",
serverName, numberOfAttemptsSoFar, remainingTime);
numberOfAttemptsSoFar++;
- // Retry every 100ms up to maximum wait time.
- submitTask(this, 100, TimeUnit.MILLISECONDS);
+ // Retry every rsRpcRetryInterval millis up to maximum wait time.
+ submitTask(this, rsRpcRetryInterval, TimeUnit.MILLISECONDS);
return true;
}
LOG.warn("{} is throwing ServerNotRunningYetException for {}ms; trying another server",
@@ -311,10 +318,12 @@ public class RSProcedureDispatcher
numberOfAttemptsSoFar++;
// Add some backoff here as the attempts rise otherwise if a stuck condition, will fill logs
// with failed attempts. None of our backoff classes -- RetryCounter or ClientBackoffPolicy
- // -- fit here nicely so just do something simple; increment by 100ms * retry^2 on each try
+ // -- fit here nicely so just do something simple; increment by rsRpcRetryInterval millis *
+ // retry^2 on each try
// up to max of 10 seconds (don't want to back off too much in case of situation change).
submitTask(this,
- Math.min(100 * (this.numberOfAttemptsSoFar * this.numberOfAttemptsSoFar), 10 * 1000),
+ Math.min(rsRpcRetryInterval * (this.numberOfAttemptsSoFar * this.numberOfAttemptsSoFar),
+ 10 * 1000),
TimeUnit.MILLISECONDS);
return true;
}
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestRegionServerReportForDuty.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestRegionServerReportForDuty.java
index dd1c085..0e1c280 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestRegionServerReportForDuty.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestRegionServerReportForDuty.java
@@ -22,7 +22,8 @@ import static org.junit.Assert.assertTrue;
import java.io.IOException;
import java.io.StringWriter;
-
+import java.util.concurrent.ScheduledThreadPoolExecutor;
+import java.util.concurrent.TimeUnit;
import org.apache.commons.lang3.StringUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.HBaseClassTestRule;
@@ -38,6 +39,7 @@ import org.apache.hadoop.hbase.master.ServerManager;
import org.apache.hadoop.hbase.testclassification.LargeTests;
import org.apache.hadoop.hbase.util.JVMClusterUtil.MasterThread;
import org.apache.hadoop.hbase.util.JVMClusterUtil.RegionServerThread;
+import org.apache.hadoop.hbase.util.Threads;
import org.apache.log4j.Appender;
import org.apache.log4j.Layout;
import org.apache.log4j.PatternLayout;
@@ -222,6 +224,40 @@ public class TestRegionServerReportForDuty {
tablesOnMaster? 3: 2);
}
+
+ /**
+ * Tests region sever reportForDuty with RS RPC retry
+ */
+ @Test
+ public void testReportForDutyWithRSRpcRetry() throws Exception {
+ ScheduledThreadPoolExecutor scheduledThreadPoolExecutor =
+ new ScheduledThreadPoolExecutor(1, Threads.newDaemonThreadFactory("RSDelayedStart"));
+
+ // Start a master and wait for it to become the active/primary master.
+ // Use a random unique port
+ cluster.getConfiguration().setInt(HConstants.MASTER_PORT, HBaseTestingUtility.randomFreePort());
+ // Override the default RS RPC retry interval of 100ms to 300ms
+ cluster.getConfiguration().setLong("hbase.regionserver.rpc.retry.interval", 300);
+ // master has a rs. defaultMinToStart = 2
+ boolean tablesOnMaster = LoadBalancer.isTablesOnMaster(testUtil.getConfiguration());
+ cluster.getConfiguration().setInt(ServerManager.WAIT_ON_REGIONSERVERS_MINTOSTART,
+ tablesOnMaster ? 2 : 1);
+ cluster.getConfiguration().setInt(ServerManager.WAIT_ON_REGIONSERVERS_MAXTOSTART,
+ tablesOnMaster ? 2 : 1);
+ master = cluster.addMaster();
+ rs = cluster.addRegionServer();
+ LOG.debug("Starting master: " + master.getMaster().getServerName());
+ master.start();
+ // Delay the RS start so that the meta assignment fails in first attempt and goes to retry block
+ scheduledThreadPoolExecutor.schedule(new Runnable() {
+ @Override
+ public void run() {
+ rs.start();
+ }
+ }, 1000, TimeUnit.MILLISECONDS);
+
+ waitForClusterOnline(master);
+ }
private void waitForClusterOnline(MasterThread master) throws InterruptedException {
while (true) {