You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hbase.apache.org by st...@apache.org on 2021/03/18 18:35:27 UTC

[hbase] branch branch-2.4 updated: HBASE-25677 Server+table counters on each scan #nextRaw invocation becomes a bottleneck when heavy load (#3061)

This is an automated email from the ASF dual-hosted git repository.

stack pushed a commit to branch branch-2.4
in repository https://gitbox.apache.org/repos/asf/hbase.git


The following commit(s) were added to refs/heads/branch-2.4 by this push:
     new 3331e83  HBASE-25677 Server+table counters on each scan #nextRaw invocation becomes a bottleneck when heavy load (#3061)
3331e83 is described below

commit 3331e8307ae0da8dc0779751aafd38f034837fb5
Author: Michael Stack <sa...@users.noreply.github.com>
AuthorDate: Thu Mar 18 11:33:45 2021 -0700

    HBASE-25677 Server+table counters on each scan #nextRaw invocation becomes a bottleneck when heavy load (#3061)
    
    Don't have every handler update regionserver metrics on each
    scan#nextRaw; instead, do a batch update just before Scan
    returns. Otherwise, all running handlers end up contending
    on metrics update.
    
    M hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegion.java
     Update of regionserver metrics counters moved out to caller where
     can be done as a batch update instead of per-next.
    
    M hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/MetricsRegionServer.java
     Class doc to encourage batch updating metrics.
     Remove the single update as unused anymore.
    
    M hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/RSRpcServices.java
     Count calls to nextRaw. Update regionserver count in finally block when
     scan is done rather than per nextRaw call. Move all metrics updates to
     finally.
    
    Signed-off-by: Reid Chan <re...@apache.org>
    Signed-off-by: Baiqiang Zhao <ZhaoBQ>
---
 .../org/apache/hadoop/hbase/regionserver/HRegion.java |  3 ---
 .../hbase/regionserver/MetricsRegionServer.java       | 18 ++++++------------
 .../hadoop/hbase/regionserver/RSRpcServices.java      | 19 +++++++++++++------
 3 files changed, 19 insertions(+), 21 deletions(-)

diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegion.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegion.java
index d54dc79..0f3649b 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegion.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegion.java
@@ -7282,9 +7282,6 @@ public class HRegion implements HeapSize, PropagatingConfigurationObserver, Regi
           metricsRegion.updateReadRequestCount();
         }
       }
-      if (rsServices != null && rsServices.getMetrics() != null) {
-        rsServices.getMetrics().updateReadQueryMeter(getRegionInfo().getTable());
-      }
 
       // If the size limit was reached it means a partial Result is being returned. Returning a
       // partial Result means that we should not reset the filters; filters should only be reset in
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/MetricsRegionServer.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/MetricsRegionServer.java
index 00324f3..f593f39 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/MetricsRegionServer.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/MetricsRegionServer.java
@@ -28,12 +28,11 @@ import org.apache.yetus.audience.InterfaceAudience;
 import org.apache.yetus.audience.InterfaceStability;
 
 /**
- * <p>
- * This class is for maintaining the various regionserver statistics
- * and publishing them through the metrics interfaces.
- * </p>
+ * Maintains regionserver statistics and publishes them through the metrics interfaces.
  * This class has a number of metrics variables that are publicly accessible;
- * these variables (objects) have methods to update their values.
+ * these variables (objects) have methods to update their values. Batch your updates rather than
+ * call on each instance else all threads will do nothing but contend trying to maintain metric
+ * counters!
  */
 @InterfaceStability.Evolving
 @InterfaceAudience.Private
@@ -52,7 +51,9 @@ public class MetricsRegionServer {
 
   private MetricRegistry metricRegistry;
   private Timer bulkLoadTimer;
+  // Incremented once for each call to Scan#nextRaw
   private Meter serverReadQueryMeter;
+  // Incremented per write.
   private Meter serverWriteQueryMeter;
   protected long slowMetricTime;
   protected static final int DEFAULT_SLOW_METRIC_TIME = 1000; // milliseconds
@@ -272,13 +273,6 @@ public class MetricsRegionServer {
     this.serverReadQueryMeter.mark(count);
   }
 
-  public void updateReadQueryMeter(TableName tn) {
-    if (tableMetrics != null && tn != null) {
-      tableMetrics.updateTableReadQueryMeter(tn);
-    }
-    this.serverReadQueryMeter.mark();
-  }
-
   public void updateWriteQueryMeter(TableName tn, long count) {
     if (tableMetrics != null && tn != null) {
       tableMetrics.updateTableWriteQueryMeter(tn, count);
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/RSRpcServices.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/RSRpcServices.java
index 59ca7c8..288ba76 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/RSRpcServices.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/RSRpcServices.java
@@ -3282,10 +3282,13 @@ public class RSRpcServices implements HBaseRPCErrorHandler,
     // arbitrary 32. TODO: keep record of general size of results being returned.
     List<Cell> values = new ArrayList<>(32);
     region.startRegionOperation(Operation.SCAN);
+    long before = EnvironmentEdgeManager.currentTime();
+    // Used to check if we've matched the row limit set on the Scan
+    int numOfCompleteRows = 0;
+    // Count of times we call nextRaw; can be > numOfCompleteRows.
+    int numOfNextRawCalls = 0;
     try {
       int numOfResults = 0;
-      int numOfCompleteRows = 0;
-      long before = EnvironmentEdgeManager.currentTime();
       synchronized (scanner) {
         boolean stale = (region.getRegionInfo().getReplicaId() != 0);
         boolean clientHandlesPartials =
@@ -3341,6 +3344,7 @@ public class RSRpcServices implements HBaseRPCErrorHandler,
 
           // Collect values to be returned here
           moreRows = scanner.nextRaw(values, scannerContext);
+          numOfNextRawCalls++;
 
           if (!values.isEmpty()) {
             if (limitOfRows > 0) {
@@ -3432,18 +3436,21 @@ public class RSRpcServices implements HBaseRPCErrorHandler,
           builder.setScanMetrics(metricBuilder.build());
         }
       }
+    } finally {
+      region.closeRegionOperation();
+      // Update serverside metrics, even on error.
       long end = EnvironmentEdgeManager.currentTime();
       long responseCellSize = context != null ? context.getResponseCellSize() : 0;
       region.getMetrics().updateScanTime(end - before);
       final MetricsRegionServer metricsRegionServer = regionServer.getMetrics();
       if (metricsRegionServer != null) {
         metricsRegionServer.updateScanSize(
-            region.getTableDescriptor().getTableName(), responseCellSize);
+          region.getTableDescriptor().getTableName(), responseCellSize);
         metricsRegionServer.updateScanTime(
-            region.getTableDescriptor().getTableName(), end - before);
+          region.getTableDescriptor().getTableName(), end - before);
+        metricsRegionServer.updateReadQueryMeter(region.getRegionInfo().getTable(),
+          numOfNextRawCalls);
       }
-    } finally {
-      region.closeRegionOperation();
     }
     // coprocessor postNext hook
     if (region.getCoprocessorHost() != null) {