You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by ab...@apache.org on 2019/12/09 12:46:55 UTC

[lucene-solr] branch master updated: SOLR-13979: Expose separate metrics for distributed and non-distributed requests.

This is an automated email from the ASF dual-hosted git repository.

ab pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/lucene-solr.git


The following commit(s) were added to refs/heads/master by this push:
     new 86cab79  SOLR-13979: Expose separate metrics for distributed and non-distributed requests.
86cab79 is described below

commit 86cab79730b7471c4764f93c5265ae429cd5a27e
Author: Andrzej Bialecki <ab...@apache.org>
AuthorDate: Mon Dec 9 13:46:33 2019 +0100

    SOLR-13979: Expose separate metrics for distributed and non-distributed requests.
---
 solr/CHANGES.txt                                    |  2 ++
 .../org/apache/solr/handler/RequestHandlerBase.java | 21 +++++++++++++++++++--
 .../src/performance-statistics-reference.adoc       | 14 ++++++++++++++
 3 files changed, 35 insertions(+), 2 deletions(-)

diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt
index fc87ee2..f713305 100644
--- a/solr/CHANGES.txt
+++ b/solr/CHANGES.txt
@@ -165,6 +165,8 @@ Improvements
 
 * SOLR-13987: Admin UI should not rely on javascript eval() (rmuir, Kevin Risden)
 
+* SOLR-13979: Expose separate metrics for distributed and non-distributed requests. (ab)
+
 Optimizations
 ---------------------
 (No changes)
diff --git a/solr/core/src/java/org/apache/solr/handler/RequestHandlerBase.java b/solr/core/src/java/org/apache/solr/handler/RequestHandlerBase.java
index 7149821..e514c5f 100644
--- a/solr/core/src/java/org/apache/solr/handler/RequestHandlerBase.java
+++ b/solr/core/src/java/org/apache/solr/handler/RequestHandlerBase.java
@@ -19,7 +19,6 @@ package org.apache.solr.handler;
 import java.lang.invoke.MethodHandles;
 import java.util.Collection;
 import java.util.Map;
-import java.util.Set;
 import java.util.concurrent.ConcurrentHashMap;
 
 import com.codahale.metrics.Counter;
@@ -30,6 +29,7 @@ import org.apache.solr.api.Api;
 import org.apache.solr.api.ApiBag;
 import org.apache.solr.api.ApiSupport;
 import org.apache.solr.common.SolrException;
+import org.apache.solr.common.params.CommonParams;
 import org.apache.solr.common.params.ShardParams;
 import org.apache.solr.common.params.SolrParams;
 import org.apache.solr.common.util.NamedList;
@@ -68,7 +68,11 @@ public abstract class RequestHandlerBase implements SolrRequestHandler, SolrInfo
   private Counter requests = new Counter();
   private final Map<String, Counter> shardPurposes = new ConcurrentHashMap<>();
   private Timer requestTimes = new Timer();
+  private Timer distribRequestTimes = new Timer();
+  private Timer localRequestTimes = new Timer();
   private Counter totalTime = new Counter();
+  private Counter distribTotalTime = new Counter();
+  private Counter localTotalTime = new Counter();
 
   private final long handlerStart;
 
@@ -76,7 +80,6 @@ public abstract class RequestHandlerBase implements SolrRequestHandler, SolrInfo
 
   private PluginInfo pluginInfo;
 
-  private Set<String> metricNames = ConcurrentHashMap.newKeySet();
   protected SolrMetricsContext solrMetricsContext;
 
 
@@ -156,7 +159,11 @@ public abstract class RequestHandlerBase implements SolrRequestHandler, SolrInfo
         shardPurposes.forEach((k, v) -> map.put(k, v.getCount())));
     solrMetricsContext.gauge(metricsMap, true, "shardRequests", getCategory().toString(), scope);
     requestTimes = solrMetricsContext.timer("requestTimes", getCategory().toString(), scope);
+    distribRequestTimes = solrMetricsContext.timer("requestTimes", getCategory().toString(), scope, "distrib");
+    localRequestTimes = solrMetricsContext.timer("requestTimes", getCategory().toString(), scope, "local");
     totalTime = solrMetricsContext.counter("totalTime", getCategory().toString(), scope);
+    distribTotalTime = solrMetricsContext.counter("totalTime", getCategory().toString(), scope, "distrib");
+    localTotalTime = solrMetricsContext.counter("totalTime", getCategory().toString(), scope, "local");
     solrMetricsContext.gauge(() -> handlerStart, true, "handlerStart", getCategory().toString(), scope);
   }
 
@@ -177,6 +184,9 @@ public abstract class RequestHandlerBase implements SolrRequestHandler, SolrInfo
   @Override
   public void handleRequest(SolrQueryRequest req, SolrQueryResponse rsp) {
     requests.inc();
+    // requests are distributed by default when ZK is in use, unless indicated otherwise
+    boolean distrib = req.getParams().getBool(CommonParams.DISTRIB,
+        req.getCore() != null ? req.getCore().getCoreContainer().isZooKeeperAware() : false);
     if (req.getParams().getBool(ShardParams.IS_SHARD, false)) {
       shardPurposes.computeIfAbsent("total", name -> new Counter()).inc();
       int purpose = req.getParams().getInt(ShardParams.SHARDS_PURPOSE, 0);
@@ -188,6 +198,7 @@ public abstract class RequestHandlerBase implements SolrRequestHandler, SolrInfo
       }
     }
     Timer.Context timer = requestTimes.time();
+    Timer.Context dTimer = distrib ? distribRequestTimes.time() : localRequestTimes.time();
     try {
       if (pluginInfo != null && pluginInfo.attributes.containsKey(USEPARAM))
         req.getContext().put(USEPARAM, pluginInfo.attributes.get(USEPARAM));
@@ -246,8 +257,14 @@ public abstract class RequestHandlerBase implements SolrRequestHandler, SolrInfo
         }
       }
     } finally {
+      dTimer.stop();
       long elapsed = timer.stop();
       totalTime.inc(elapsed);
+      if (distrib) {
+        distribTotalTime.inc(elapsed);
+      } else {
+        localTotalTime.inc(elapsed);
+      }
     }
   }
 
diff --git a/solr/solr-ref-guide/src/performance-statistics-reference.adoc b/solr/solr-ref-guide/src/performance-statistics-reference.adoc
index 06959de..a5300fb 100644
--- a/solr/solr-ref-guide/src/performance-statistics-reference.adoc
+++ b/solr/solr-ref-guide/src/performance-statistics-reference.adoc
@@ -94,6 +94,20 @@ The table below shows the metric names and attributes to request:
 `UPDATE./update.handlerStart` |Epoch time when the handler was registered.
 |===
 
+*Distributed vs. Local Request Times*
+
+Processing of a single distributed request in SolrCloud usually requires making several requests to
+other nodes and other replicas. The common statistics listed above lump these timings together, even though
+they are very different in nature, thus making it difficult to measure the latency of distributed and
+local requests separately. Solr 8.4 introduced additional statistics that help to do this.
+
+These metrics are structured the same as `requestTimes` and `totalTime` metrics above but they use
+different full names, eg. `QUERY./select.distrib.requestTimes` and `QUERY./select.local.requestTimes`.
+The metrics under the `distrib` path correspond to the time it takes for a (potentially) distributed
+request to complete all remote calls plus any local processing, and return the result to the caller.
+The metrics under the `local` path correspond to the time it takes for a local call (non-distributed,
+i.e. being processed only by the Solr core where the handler operates) to complete.
+
 == Update Handler
 
 This section has information on the total number of adds and how many commits have been fired against a Solr core.