You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by ab...@apache.org on 2019/12/09 13:08:09 UTC

[lucene-solr] branch branch_8x updated: SOLR-13979: Expose separate metrics for distributed and non-distributed requests.

This is an automated email from the ASF dual-hosted git repository.

ab pushed a commit to branch branch_8x
in repository https://gitbox.apache.org/repos/asf/lucene-solr.git


The following commit(s) were added to refs/heads/branch_8x by this push:
     new 349fe42  SOLR-13979: Expose separate metrics for distributed and non-distributed requests.
349fe42 is described below

commit 349fe428c4888d7140372087a4ffec8647a5fa5f
Author: Andrzej Bialecki <ab...@apache.org>
AuthorDate: Mon Dec 9 13:46:33 2019 +0100

    SOLR-13979: Expose separate metrics for distributed and non-distributed requests.
---
 solr/CHANGES.txt                                    |  2 ++
 .../org/apache/solr/handler/RequestHandlerBase.java | 21 ++++++++++++++++++++-
 .../src/performance-statistics-reference.adoc       | 14 ++++++++++++++
 3 files changed, 36 insertions(+), 1 deletion(-)

diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt
index 9ce9357..68c49e5 100644
--- a/solr/CHANGES.txt
+++ b/solr/CHANGES.txt
@@ -93,6 +93,8 @@ Improvements
 
 * SOLR-13987: Admin UI should not rely on javascript eval() (rmuir, Kevin Risden)
 
+* SOLR-13979: Expose separate metrics for distributed and non-distributed requests. (ab)
+
 Optimizations
 ---------------------
 (No changes)
diff --git a/solr/core/src/java/org/apache/solr/handler/RequestHandlerBase.java b/solr/core/src/java/org/apache/solr/handler/RequestHandlerBase.java
index 4d9e96b..0cb7d58 100644
--- a/solr/core/src/java/org/apache/solr/handler/RequestHandlerBase.java
+++ b/solr/core/src/java/org/apache/solr/handler/RequestHandlerBase.java
@@ -30,6 +30,7 @@ import org.apache.solr.api.Api;
 import org.apache.solr.api.ApiBag;
 import org.apache.solr.api.ApiSupport;
 import org.apache.solr.common.SolrException;
+import org.apache.solr.common.params.CommonParams;
 import org.apache.solr.common.params.ShardParams;
 import org.apache.solr.common.params.SolrParams;
 import org.apache.solr.common.util.NamedList;
@@ -69,7 +70,11 @@ public abstract class RequestHandlerBase implements SolrRequestHandler, SolrInfo
   private Counter requests = new Counter();
   private final Map<String, Counter> shardPurposes = new ConcurrentHashMap<>();
   private Timer requestTimes = new Timer();
+  private Timer distribRequestTimes = new Timer();
+  private Timer localRequestTimes = new Timer();
   private Counter totalTime = new Counter();
+  private Counter distribTotalTime = new Counter();
+  private Counter localTotalTime = new Counter();
 
   private final long handlerStart;
 
@@ -156,8 +161,12 @@ public abstract class RequestHandlerBase implements SolrRequestHandler, SolrInfo
     MetricsMap metricsMap = new MetricsMap((detail, map) ->
         shardPurposes.forEach((k, v) -> map.put(k, v.getCount())));
     solrMetricsContext.gauge(this, metricsMap, true, "shardRequests", getCategory().toString(), scope);
-    requestTimes = solrMetricsContext.timer(this,"requestTimes", getCategory().toString(), scope);
+    requestTimes = solrMetricsContext.timer(this, "requestTimes", getCategory().toString(), scope);
+    distribRequestTimes = solrMetricsContext.timer(this, "requestTimes", getCategory().toString(), scope, "distrib");
+    localRequestTimes = solrMetricsContext.timer(this, "requestTimes", getCategory().toString(), scope, "local");
     totalTime = solrMetricsContext.counter(this, "totalTime", getCategory().toString(), scope);
+    distribTotalTime = solrMetricsContext.counter(this, "totalTime", getCategory().toString(), scope, "distrib");
+    localTotalTime = solrMetricsContext.counter(this, "totalTime", getCategory().toString(), scope, "local");
     solrMetricsContext.gauge(this, () -> handlerStart, true, "handlerStart", getCategory().toString(), scope);
   }
 
@@ -178,6 +187,9 @@ public abstract class RequestHandlerBase implements SolrRequestHandler, SolrInfo
   @Override
   public void handleRequest(SolrQueryRequest req, SolrQueryResponse rsp) {
     requests.inc();
+    // requests are distributed by default when ZK is in use, unless indicated otherwise
+    boolean distrib = req.getParams().getBool(CommonParams.DISTRIB,
+        req.getCore() != null ? req.getCore().getCoreContainer().isZooKeeperAware() : false);
     if (req.getParams().getBool(ShardParams.IS_SHARD, false)) {
       shardPurposes.computeIfAbsent("total", name -> new Counter()).inc();
       int purpose = req.getParams().getInt(ShardParams.SHARDS_PURPOSE, 0);
@@ -189,6 +201,7 @@ public abstract class RequestHandlerBase implements SolrRequestHandler, SolrInfo
       }
     }
     Timer.Context timer = requestTimes.time();
+    Timer.Context dTimer = distrib ? distribRequestTimes.time() : localRequestTimes.time();
     try {
       if (pluginInfo != null && pluginInfo.attributes.containsKey(USEPARAM))
         req.getContext().put(USEPARAM, pluginInfo.attributes.get(USEPARAM));
@@ -247,8 +260,14 @@ public abstract class RequestHandlerBase implements SolrRequestHandler, SolrInfo
         }
       }
     } finally {
+      dTimer.stop();
       long elapsed = timer.stop();
       totalTime.inc(elapsed);
+      if (distrib) {
+        distribTotalTime.inc(elapsed);
+      } else {
+        localTotalTime.inc(elapsed);
+      }
     }
   }
 
diff --git a/solr/solr-ref-guide/src/performance-statistics-reference.adoc b/solr/solr-ref-guide/src/performance-statistics-reference.adoc
index 06959de..a5300fb 100644
--- a/solr/solr-ref-guide/src/performance-statistics-reference.adoc
+++ b/solr/solr-ref-guide/src/performance-statistics-reference.adoc
@@ -94,6 +94,20 @@ The table below shows the metric names and attributes to request:
 `UPDATE./update.handlerStart` |Epoch time when the handler was registered.
 |===
 
+*Distributed vs. Local Request Times*
+
+Processing of a single distributed request in SolrCloud usually requires making several requests to
+other nodes and other replicas. The common statistics listed above lump these timings together, even though
+they are very different in nature, thus making it difficult to measure the latency of distributed and
+local requests separately. Solr 8.4 introduced additional statistics that help to do this.
+
+These metrics are structured the same as `requestTimes` and `totalTime` metrics above but they use
+different full names, eg. `QUERY./select.distrib.requestTimes` and `QUERY./select.local.requestTimes`.
+The metrics under the `distrib` path correspond to the time it takes for a (potentially) distributed
+request to complete all remote calls plus any local processing, and return the result to the caller.
+The metrics under the `local` path correspond to the time it takes for a local call (non-distributed,
+i.e. being processed only by the Solr core where the handler operates) to complete.
+
 == Update Handler
 
 This section has information on the total number of adds and how many commits have been fired against a Solr core.