You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hbase.apache.org by pa...@apache.org on 2020/12/29 16:56:05 UTC
[hbase] branch master updated: HBASE-25379 Make retry pause time
configurable for regionserver short operation RPC
(reportRegionStateTransition/reportProcedureDone) (#2757)
This is an automated email from the ASF dual-hosted git repository.
pankajkumar pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hbase.git
The following commit(s) were added to refs/heads/master by this push:
new c96fbf0 HBASE-25379 Make retry pause time configurable for regionserver short operation RPC (reportRegionStateTransition/reportProcedureDone) (#2757)
c96fbf0 is described below
commit c96fbf04077fa37555033f88939fdd69ac810b35
Author: Pankaj <pa...@apache.org>
AuthorDate: Tue Dec 29 22:25:36 2020 +0530
HBASE-25379 Make retry pause time configurable for regionserver short operation RPC (reportRegionStateTransition/reportProcedureDone) (#2757)
* HBASE-25379 Make retry pause time configurable for regionserver short operation RPC (reportRegionStateTransition/reportProcedureDone)
* HBASE-25379 RemoteProcedureResultReporter also should retry after the configured pause time
* Addressed the review comments
Signed-off-by: Yulin Niu <ni...@apache.org>
---
.../java/org/apache/hadoop/hbase/HConstants.java | 11 +++++++++++
.../hadoop/hbase/regionserver/HRegionServer.java | 23 +++++++++++++++++-----
.../RemoteProcedureResultReporter.java | 7 ++-----
3 files changed, 31 insertions(+), 10 deletions(-)
diff --git a/hbase-common/src/main/java/org/apache/hadoop/hbase/HConstants.java b/hbase-common/src/main/java/org/apache/hadoop/hbase/HConstants.java
index e1d3de9..05782fc 100644
--- a/hbase-common/src/main/java/org/apache/hadoop/hbase/HConstants.java
+++ b/hbase-common/src/main/java/org/apache/hadoop/hbase/HConstants.java
@@ -966,6 +966,17 @@ public final class HConstants {
public static final int DEFAULT_HBASE_RPC_SHORTOPERATION_TIMEOUT = 10000;
/**
+ * Retry pause time for short operation RPC
+ */
+ public static final String HBASE_RPC_SHORTOPERATION_RETRY_PAUSE_TIME =
+ "hbase.rpc.shortoperation.retry.pause.time";
+
+ /**
+ * Default value of {@link #HBASE_RPC_SHORTOPERATION_RETRY_PAUSE_TIME}
+ */
+ public static final long DEFAULT_HBASE_RPC_SHORTOPERATION_RETRY_PAUSE_TIME = 1000;
+
+ /**
* Value indicating the server name was saved with no sequence number.
*/
public static final long NO_SEQNUM = -1;
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java
index 87d073c..bcb1436 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java
@@ -435,6 +435,9 @@ public class HRegionServer extends Thread implements
private final int shortOperationTimeout;
+ // Time to pause if master says 'please hold'
+ private final long retryPauseTime;
+
private final RegionServerAccounting regionServerAccounting;
private SlowLogTableOpsChore slowLogTableOpsChore = null;
@@ -615,6 +618,9 @@ public class HRegionServer extends Thread implements
this.shortOperationTimeout = conf.getInt(HConstants.HBASE_RPC_SHORTOPERATION_TIMEOUT_KEY,
HConstants.DEFAULT_HBASE_RPC_SHORTOPERATION_TIMEOUT);
+ this.retryPauseTime = conf.getLong(HConstants.HBASE_RPC_SHORTOPERATION_RETRY_PAUSE_TIME,
+ HConstants.DEFAULT_HBASE_RPC_SHORTOPERATION_RETRY_PAUSE_TIME);
+
this.abortRequested = new AtomicBoolean(false);
this.stopped = false;
@@ -2436,10 +2442,8 @@ public class HRegionServer extends Thread implements
final ReportRegionStateTransitionRequest request =
createReportRegionStateTransitionRequest(context);
- // Time to pause if master says 'please hold'. Make configurable if needed.
- final long initPauseTime = 1000;
int tries = 0;
- long pauseTime;
+ long pauseTime = this.retryPauseTime;
// Keep looping till we get an error. We want to send reports even though server is going down.
// Only go down if clusterConnection is null. It is set to null almost as last thing as the
// HRegionServer does down.
@@ -2470,9 +2474,9 @@ public class HRegionServer extends Thread implements
|| ioe instanceof CallQueueTooBigException;
if (pause) {
// Do backoff else we flood the Master with requests.
- pauseTime = ConnectionUtils.getPauseTime(initPauseTime, tries);
+ pauseTime = ConnectionUtils.getPauseTime(this.retryPauseTime, tries);
} else {
- pauseTime = initPauseTime; // Reset.
+ pauseTime = this.retryPauseTime; // Reset.
}
LOG.info("Failed report transition " +
TextFormat.shortDebugString(request) + "; retry (#" + tries + ")" +
@@ -3938,4 +3942,13 @@ public class HRegionServer extends Thread implements
public CompactedHFilesDischarger getCompactedHFilesDischarger() {
return compactedFileDischarger;
}
+
+ /**
+ * Return pause time configured in {@link HConstants#HBASE_RPC_SHORTOPERATION_RETRY_PAUSE_TIME}}
+ * @return pause time
+ */
+ @InterfaceAudience.Private
+ public long getRetryPauseTime() {
+ return this.retryPauseTime;
+ }
}
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/RemoteProcedureResultReporter.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/RemoteProcedureResultReporter.java
index 981f090..63e050a 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/RemoteProcedureResultReporter.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/RemoteProcedureResultReporter.java
@@ -41,9 +41,6 @@ class RemoteProcedureResultReporter extends Thread {
private static final Logger LOG = LoggerFactory.getLogger(RemoteProcedureResultReporter.class);
- // Time to pause if master says 'please hold'. Make configurable if needed.
- private static final int INIT_PAUSE_TIME_MS = 1000;
-
private static final int MAX_BATCH = 100;
private final HRegionServer server;
@@ -98,9 +95,9 @@ class RemoteProcedureResultReporter extends Thread {
long pauseTime;
if (pause) {
// Do backoff else we flood the Master with requests.
- pauseTime = ConnectionUtils.getPauseTime(INIT_PAUSE_TIME_MS, tries);
+ pauseTime = ConnectionUtils.getPauseTime(server.getRetryPauseTime(), tries);
} else {
- pauseTime = INIT_PAUSE_TIME_MS; // Reset.
+ pauseTime = server.getRetryPauseTime(); // Reset.
}
LOG.info("Failed procedure report " + TextFormat.shortDebugString(request) + "; retry (#" +
tries + ")" + (pause ? " after " + pauseTime + "ms delay (Master is coming online...)."