You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hbase.apache.org by pa...@apache.org on 2020/12/29 17:04:30 UTC

[hbase] branch branch-2 updated: HBASE-25379 Make retry pause time configurable for regionserver short operation RPC (reportRegionStateTransition/reportProcedureDone) (#2757)

This is an automated email from the ASF dual-hosted git repository.

pankajkumar pushed a commit to branch branch-2
in repository https://gitbox.apache.org/repos/asf/hbase.git


The following commit(s) were added to refs/heads/branch-2 by this push:
     new 382ebf3  HBASE-25379 Make retry pause time configurable for regionserver short operation RPC (reportRegionStateTransition/reportProcedureDone) (#2757)
382ebf3 is described below

commit 382ebf3f4eeb3e8b5055dc957a16825b34c0ed7d
Author: Pankaj <pa...@apache.org>
AuthorDate: Tue Dec 29 22:25:36 2020 +0530

    HBASE-25379 Make retry pause time configurable for regionserver short operation RPC (reportRegionStateTransition/reportProcedureDone) (#2757)
    
    * HBASE-25379 Make retry pause time configurable for regionserver short operation RPC (reportRegionStateTransition/reportProcedureDone)
    * HBASE-25379 RemoteProcedureResultReporter also should retry after the configured pause time
    * Addressed the review comments
    
    Signed-off-by: Yulin Niu <ni...@apache.org>
    (cherry picked from commit c96fbf04077fa37555033f88939fdd69ac810b35)
---
 .../java/org/apache/hadoop/hbase/HConstants.java   | 11 +++++++++++
 .../hadoop/hbase/regionserver/HRegionServer.java   | 23 +++++++++++++++++-----
 .../RemoteProcedureResultReporter.java             |  7 ++-----
 3 files changed, 31 insertions(+), 10 deletions(-)

diff --git a/hbase-common/src/main/java/org/apache/hadoop/hbase/HConstants.java b/hbase-common/src/main/java/org/apache/hadoop/hbase/HConstants.java
index c7b13af..dc43e8a 100644
--- a/hbase-common/src/main/java/org/apache/hadoop/hbase/HConstants.java
+++ b/hbase-common/src/main/java/org/apache/hadoop/hbase/HConstants.java
@@ -988,6 +988,17 @@ public final class HConstants {
   public static final int DEFAULT_HBASE_RPC_SHORTOPERATION_TIMEOUT = 10000;
 
   /**
+   * Retry pause time for short operation RPC
+   */
+  public static final String HBASE_RPC_SHORTOPERATION_RETRY_PAUSE_TIME =
+      "hbase.rpc.shortoperation.retry.pause.time";
+
+  /**
+   * Default value of {@link #HBASE_RPC_SHORTOPERATION_RETRY_PAUSE_TIME}
+   */
+  public static final long DEFAULT_HBASE_RPC_SHORTOPERATION_RETRY_PAUSE_TIME = 1000;
+
+  /**
    * Value indicating the server name was saved with no sequence number.
    */
   public static final long NO_SEQNUM = -1;
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java
index 67a93a6..702d147 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java
@@ -437,6 +437,9 @@ public class HRegionServer extends Thread implements
   private final int operationTimeout;
   private final int shortOperationTimeout;
 
+  // Time to pause if master says 'please hold'
+  private final long retryPauseTime;
+
   private final RegionServerAccounting regionServerAccounting;
 
   private SlowLogTableOpsChore slowLogTableOpsChore = null;
@@ -617,6 +620,9 @@ public class HRegionServer extends Thread implements
       this.shortOperationTimeout = conf.getInt(HConstants.HBASE_RPC_SHORTOPERATION_TIMEOUT_KEY,
           HConstants.DEFAULT_HBASE_RPC_SHORTOPERATION_TIMEOUT);
 
+      this.retryPauseTime = conf.getLong(HConstants.HBASE_RPC_SHORTOPERATION_RETRY_PAUSE_TIME,
+        HConstants.DEFAULT_HBASE_RPC_SHORTOPERATION_RETRY_PAUSE_TIME);
+
       this.abortRequested = new AtomicBoolean(false);
       this.stopped = false;
 
@@ -2425,10 +2431,8 @@ public class HRegionServer extends Thread implements
     final ReportRegionStateTransitionRequest request =
         createReportRegionStateTransitionRequest(context);
 
-    // Time to pause if master says 'please hold'. Make configurable if needed.
-    final long initPauseTime = 1000;
     int tries = 0;
-    long pauseTime;
+    long pauseTime = this.retryPauseTime;
     // Keep looping till we get an error. We want to send reports even though server is going down.
     // Only go down if clusterConnection is null. It is set to null almost as last thing as the
     // HRegionServer does down.
@@ -2459,9 +2463,9 @@ public class HRegionServer extends Thread implements
                 || ioe instanceof CallQueueTooBigException;
         if (pause) {
           // Do backoff else we flood the Master with requests.
-          pauseTime = ConnectionUtils.getPauseTime(initPauseTime, tries);
+          pauseTime = ConnectionUtils.getPauseTime(this.retryPauseTime, tries);
         } else {
-          pauseTime = initPauseTime; // Reset.
+          pauseTime = this.retryPauseTime; // Reset.
         }
         LOG.info("Failed report transition " +
           TextFormat.shortDebugString(request) + "; retry (#" + tries + ")" +
@@ -3916,4 +3920,13 @@ public class HRegionServer extends Thread implements
   public CompactedHFilesDischarger getCompactedHFilesDischarger() {
     return compactedFileDischarger;
   }
+
+  /**
+   * Return pause time configured in {@link HConstants#HBASE_RPC_SHORTOPERATION_RETRY_PAUSE_TIME}}
+   * @return pause time
+   */
+  @InterfaceAudience.Private
+  public long getRetryPauseTime() {
+    return this.retryPauseTime;
+  }
 }
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/RemoteProcedureResultReporter.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/RemoteProcedureResultReporter.java
index 981f090..63e050a 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/RemoteProcedureResultReporter.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/RemoteProcedureResultReporter.java
@@ -41,9 +41,6 @@ class RemoteProcedureResultReporter extends Thread {
 
   private static final Logger LOG = LoggerFactory.getLogger(RemoteProcedureResultReporter.class);
 
-  // Time to pause if master says 'please hold'. Make configurable if needed.
-  private static final int INIT_PAUSE_TIME_MS = 1000;
-
   private static final int MAX_BATCH = 100;
 
   private final HRegionServer server;
@@ -98,9 +95,9 @@ class RemoteProcedureResultReporter extends Thread {
         long pauseTime;
         if (pause) {
           // Do backoff else we flood the Master with requests.
-          pauseTime = ConnectionUtils.getPauseTime(INIT_PAUSE_TIME_MS, tries);
+          pauseTime = ConnectionUtils.getPauseTime(server.getRetryPauseTime(), tries);
         } else {
-          pauseTime = INIT_PAUSE_TIME_MS; // Reset.
+          pauseTime = server.getRetryPauseTime(); // Reset.
         }
         LOG.info("Failed procedure report " + TextFormat.shortDebugString(request) + "; retry (#" +
           tries + ")" + (pause ? " after " + pauseTime + "ms delay (Master is coming online...)."