You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@solr.apache.org by no...@apache.org on 2022/08/29 18:08:00 UTC

[solr] branch branch_9x updated: SOLR-16337: implement Zk metrics (#986)

This is an automated email from the ASF dual-hosted git repository.

noble pushed a commit to branch branch_9x
in repository https://gitbox.apache.org/repos/asf/solr.git


The following commit(s) were added to refs/heads/branch_9x by this push:
     new bc38bc1e057 SOLR-16337: implement Zk metrics (#986)
bc38bc1e057 is described below

commit bc38bc1e0579328e46f0e3ff92ff5ace96d4513b
Author: Noble Paul <no...@users.noreply.github.com>
AuthorDate: Tue Aug 30 04:03:09 2022 +1000

    SOLR-16337: implement Zk metrics (#986)
---
 solr/CHANGES.txt                                   |   1 +
 .../java/org/apache/solr/core/CoreContainer.java   |   2 +
 .../src/java/org/apache/solr/core/ZkContainer.java |  26 ++++
 .../solr/metrics/SolrMetricsIntegrationTest.java   |  66 ++++++++++
 .../org/apache/solr/common/cloud/SolrZkClient.java | 137 ++++++++++++++++++---
 .../org/apache/solr/common/util/JavaBinCodec.java  |   8 ++
 .../org/apache/solr/common/util/TextWriter.java    |   6 +
 7 files changed, 228 insertions(+), 18 deletions(-)

diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt
index 1a71fa4f676..701af2e889d 100644
--- a/solr/CHANGES.txt
+++ b/solr/CHANGES.txt
@@ -45,6 +45,7 @@ Improvements
 
 * SOLR-16323: The Docker image now uses the Solr User ID instead of the User Name, helps with non-root checks (Houston Putman)
 
+* SOLR-16337: implement Zk metrics (noble)
 Optimizations
 ---------------------
 * SOLR-16120: Optimise hl.fl expansion. (Christine Poerschke, David Smiley, Mike Drob)
diff --git a/solr/core/src/java/org/apache/solr/core/CoreContainer.java b/solr/core/src/java/org/apache/solr/core/CoreContainer.java
index a5b99edb38c..3c2aba578e3 100644
--- a/solr/core/src/java/org/apache/solr/core/CoreContainer.java
+++ b/solr/core/src/java/org/apache/solr/core/CoreContainer.java
@@ -751,6 +751,8 @@ public class CoreContainer {
 
     zkSys.initZooKeeper(this, cfg.getCloudConfig());
     if (isZooKeeperAware()) {
+      // initialize ZkClient metrics
+      zkSys.getZkMetricsProducer().initializeMetrics(solrMetricsContext, "zkClient");
       pkiAuthenticationSecurityBuilder =
           new PKIAuthenticationPlugin(
               this,
diff --git a/solr/core/src/java/org/apache/solr/core/ZkContainer.java b/solr/core/src/java/org/apache/solr/core/ZkContainer.java
index fef2afa7cd9..7f67496fb7e 100644
--- a/solr/core/src/java/org/apache/solr/core/ZkContainer.java
+++ b/solr/core/src/java/org/apache/solr/core/ZkContainer.java
@@ -41,6 +41,9 @@ import org.apache.solr.common.cloud.ZooKeeperException;
 import org.apache.solr.common.util.ExecutorUtil;
 import org.apache.solr.common.util.SolrNamedThreadFactory;
 import org.apache.solr.logging.MDCLoggingContext;
+import org.apache.solr.metrics.MetricsMap;
+import org.apache.solr.metrics.SolrMetricProducer;
+import org.apache.solr.metrics.SolrMetricsContext;
 import org.apache.zookeeper.KeeperException;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
@@ -66,6 +69,8 @@ public class ZkContainer {
   // see ZkController.zkRunOnly
   private boolean zkRunOnly = Boolean.getBoolean("zkRunOnly"); // expert
 
+  private SolrMetricProducer metricProducer;
+
   public ZkContainer() {}
 
   public void initZooKeeper(final CoreContainer cc, CloudConfig config) {
@@ -152,6 +157,23 @@ public class ZkContainer {
         }
 
         this.zkController = zkController;
+        MetricsMap metricsMap = new MetricsMap(zkController.getZkClient().getMetrics());
+        metricProducer =
+            new SolrMetricProducer() {
+              SolrMetricsContext ctx;
+
+              @Override
+              public void initializeMetrics(SolrMetricsContext parentContext, String scope) {
+                ctx = parentContext.getChildContext(this);
+                ctx.gauge(
+                    metricsMap, true, scope, null, SolrInfoBean.Category.CONTAINER.toString());
+              }
+
+              @Override
+              public SolrMetricsContext getSolrMetricsContext() {
+                return ctx;
+              }
+            };
       } catch (InterruptedException e) {
         // Restore the interrupted status
         Thread.currentThread().interrupt();
@@ -249,4 +271,8 @@ public class ZkContainer {
   public ExecutorService getCoreZkRegisterExecutorService() {
     return coreZkRegister;
   }
+
+  public SolrMetricProducer getZkMetricsProducer() {
+    return this.metricProducer;
+  }
 }
diff --git a/solr/core/src/test/org/apache/solr/metrics/SolrMetricsIntegrationTest.java b/solr/core/src/test/org/apache/solr/metrics/SolrMetricsIntegrationTest.java
index 557d5a62c3e..42d80453dff 100644
--- a/solr/core/src/test/org/apache/solr/metrics/SolrMetricsIntegrationTest.java
+++ b/solr/core/src/test/org/apache/solr/metrics/SolrMetricsIntegrationTest.java
@@ -24,10 +24,17 @@ import com.codahale.metrics.Timer;
 import java.nio.charset.StandardCharsets;
 import java.nio.file.Files;
 import java.nio.file.Path;
+import java.util.List;
 import java.util.Map;
 import java.util.Random;
+import java.util.Set;
+import org.apache.http.client.HttpClient;
 import org.apache.lucene.tests.util.TestUtil;
 import org.apache.solr.SolrTestCaseJ4;
+import org.apache.solr.client.solrj.embedded.JettySolrRunner;
+import org.apache.solr.client.solrj.impl.HttpSolrClient;
+import org.apache.solr.cloud.MiniSolrCloudCluster;
+import org.apache.solr.common.util.Utils;
 import org.apache.solr.core.CoreContainer;
 import org.apache.solr.core.NodeConfig;
 import org.apache.solr.core.PluginInfo;
@@ -216,4 +223,63 @@ public class SolrMetricsIntegrationTest extends SolrTestCaseJ4 {
     Gauge<?> g = (Gauge<?>) metrics.get("CONTAINER.fs.path");
     assertEquals(g.getValue(), cc.getSolrHome());
   }
+
+  @Test
+  public void testZkMetrics() throws Exception {
+    System.setProperty("metricsEnabled", "true");
+    MiniSolrCloudCluster cluster =
+        new MiniSolrCloudCluster.Builder(3, createTempDir())
+            .addConfig("conf", configset("conf2"))
+            .configure();
+    System.clearProperty("metricsEnabled");
+    try {
+      JettySolrRunner j = cluster.getRandomJetty(random());
+      String url = j.getBaseUrl() + "/admin/metrics?key=solr.node:CONTAINER.zknSolrClient&wt=json";
+      HttpClient httpClient = ((HttpSolrClient) j.newClient()).getHttpClient();
+      @SuppressWarnings("unchecked")
+      Map<String, Object> zkMmetrics =
+          (Map<String, Object>)
+              Utils.getObjectByPath(
+                  Utils.executeGET(httpClient, url, Utils.JSONCONSUMER),
+                  false,
+                  List.of("metrics", "solr.node:CONTAINER.zkClient"));
+
+      Set<String> allKeys =
+          Set.of(
+              "watchesFired",
+              "reads",
+              "writes",
+              "bytesRead",
+              "bytesWritten",
+              "multiOps",
+              "cumulativeMultiOps",
+              "childFetches",
+              "cumulativeChildrenFetched",
+              "existsChecks",
+              "deletes");
+
+      for (String k : allKeys) {
+        assertNotNull(zkMmetrics.get(k));
+      }
+      Utils.executeGET(
+          httpClient, j.getBaseURLV2() + "/cluster/zk/ls/live_nodes", Utils.JSONCONSUMER);
+      @SuppressWarnings("unchecked")
+      Map<String, Object> zkMmetricsNew =
+          (Map<String, Object>)
+              Utils.getObjectByPath(
+                  Utils.executeGET(httpClient, url, Utils.JSONCONSUMER),
+                  false,
+                  List.of("metrics", "solr.node:CONTAINER.zkClient"));
+
+      assertTrue(findDelta(zkMmetrics, zkMmetricsNew, "childFetches") >= 1);
+      assertTrue(findDelta(zkMmetrics, zkMmetricsNew, "cumulativeChildrenFetched") >= 3);
+      assertTrue(findDelta(zkMmetrics, zkMmetricsNew, "existsChecks") >= 4);
+    } finally {
+      cluster.shutdown();
+    }
+  }
+
+  private long findDelta(Map<String, Object> m1, Map<String, Object> m2, String k) {
+    return ((Number) m2.get(k)).longValue() - ((Number) m1.get(k)).longValue();
+  }
 }
diff --git a/solr/solrj-zookeeper/src/java/org/apache/solr/common/cloud/SolrZkClient.java b/solr/solrj-zookeeper/src/java/org/apache/solr/common/cloud/SolrZkClient.java
index 0222e2f115d..4a602a944cc 100644
--- a/solr/solrj-zookeeper/src/java/org/apache/solr/common/cloud/SolrZkClient.java
+++ b/solr/solrj-zookeeper/src/java/org/apache/solr/common/cloud/SolrZkClient.java
@@ -26,16 +26,20 @@ import java.nio.file.Path;
 import java.util.List;
 import java.util.concurrent.ExecutorService;
 import java.util.concurrent.RejectedExecutionException;
+import java.util.concurrent.atomic.LongAdder;
 import java.util.function.BiFunction;
 import java.util.function.Function;
 import java.util.function.Predicate;
 import java.util.regex.Pattern;
 import org.apache.solr.client.solrj.SolrServerException;
+import org.apache.solr.common.MapWriter;
 import org.apache.solr.common.SolrException;
 import org.apache.solr.common.StringUtils;
+import org.apache.solr.common.annotation.JsonProperty;
 import org.apache.solr.common.cloud.ConnectionManager.IsClosed;
 import org.apache.solr.common.util.ExecutorUtil;
 import org.apache.solr.common.util.ObjectReleaseTracker;
+import org.apache.solr.common.util.ReflectMapWriter;
 import org.apache.solr.common.util.SolrNamedThreadFactory;
 import org.apache.zookeeper.CreateMode;
 import org.apache.zookeeper.KeeperException;
@@ -72,6 +76,12 @@ public class SolrZkClient implements Closeable {
 
   private ZkCmdExecutor zkCmdExecutor;
 
+  private final ZkMetrics metrics = new ZkMetrics();
+
+  public MapWriter getMetrics() {
+    return metrics::writeMap;
+  }
+
   private final ExecutorService zkCallbackExecutor =
       ExecutorUtil.newMDCAwareCachedThreadPool(new SolrNamedThreadFactory("zkCallback"));
   private final ExecutorService zkConnManagerCallbackExecutor =
@@ -284,6 +294,7 @@ public class SolrZkClient implements Closeable {
     } else {
       keeper.delete(path, version);
     }
+    metrics.deletes.increment();
   }
 
   /**
@@ -314,65 +325,96 @@ public class SolrZkClient implements Closeable {
    */
   public Stat exists(final String path, final Watcher watcher, boolean retryOnConnLoss)
       throws KeeperException, InterruptedException {
+    Stat result = null;
     if (retryOnConnLoss) {
-      return zkCmdExecutor.retryOperation(() -> keeper.exists(path, wrapWatcher(watcher)));
+      result = zkCmdExecutor.retryOperation(() -> keeper.exists(path, wrapWatcher(watcher)));
     } else {
-      return keeper.exists(path, wrapWatcher(watcher));
+      result = keeper.exists(path, wrapWatcher(watcher));
     }
+    metrics.existsChecks.increment();
+    return result;
   }
 
   /** Returns true if path exists */
   public Boolean exists(final String path, boolean retryOnConnLoss)
       throws KeeperException, InterruptedException {
+    Boolean result = null;
     if (retryOnConnLoss) {
-      return zkCmdExecutor.retryOperation(() -> keeper.exists(path, null) != null);
+      result = zkCmdExecutor.retryOperation(() -> keeper.exists(path, null) != null);
     } else {
-      return keeper.exists(path, null) != null;
+      result = keeper.exists(path, null) != null;
     }
+    metrics.existsChecks.increment();
+    return result;
   }
 
   /** Returns children of the node at the path */
   public List<String> getChildren(final String path, final Watcher watcher, boolean retryOnConnLoss)
       throws KeeperException, InterruptedException {
+    List<String> result = null;
     if (retryOnConnLoss) {
       return zkCmdExecutor.retryOperation(() -> keeper.getChildren(path, wrapWatcher(watcher)));
     } else {
-      return keeper.getChildren(path, wrapWatcher(watcher));
+      result = keeper.getChildren(path, wrapWatcher(watcher));
+    }
+
+    metrics.childFetches.increment();
+    if (result != null) {
+      metrics.cumulativeChildrenFetched.add(result.size());
     }
+    return result;
   }
 
   /** Returns children of the node at the path */
   public List<String> getChildren(
       final String path, final Watcher watcher, Stat stat, boolean retryOnConnLoss)
       throws KeeperException, InterruptedException {
+    List<String> result = null;
     if (retryOnConnLoss) {
-      return zkCmdExecutor.retryOperation(
-          () -> keeper.getChildren(path, wrapWatcher(watcher), stat));
+      result =
+          zkCmdExecutor.retryOperation(() -> keeper.getChildren(path, wrapWatcher(watcher), stat));
     } else {
-      return keeper.getChildren(path, wrapWatcher(watcher), stat);
+      result = keeper.getChildren(path, wrapWatcher(watcher), stat);
+    }
+    metrics.childFetches.increment();
+    if (result != null) {
+      metrics.cumulativeChildrenFetched.add(result.size());
     }
+    return result;
   }
 
   /** Returns node's data */
   public byte[] getData(
       final String path, final Watcher watcher, final Stat stat, boolean retryOnConnLoss)
       throws KeeperException, InterruptedException {
+    byte[] result = null;
     if (retryOnConnLoss) {
-      return zkCmdExecutor.retryOperation(() -> keeper.getData(path, wrapWatcher(watcher), stat));
+      result = zkCmdExecutor.retryOperation(() -> keeper.getData(path, wrapWatcher(watcher), stat));
     } else {
-      return keeper.getData(path, wrapWatcher(watcher), stat);
+      result = keeper.getData(path, wrapWatcher(watcher), stat);
     }
+    metrics.reads.increment();
+    if (result != null) {
+      metrics.bytesRead.add(result.length);
+    }
+    return result;
   }
 
   /** Returns node's state */
   public Stat setData(
       final String path, final byte data[], final int version, boolean retryOnConnLoss)
       throws KeeperException, InterruptedException {
+    Stat result = null;
     if (retryOnConnLoss) {
-      return zkCmdExecutor.retryOperation(() -> keeper.setData(path, data, version));
+      result = zkCmdExecutor.retryOperation(() -> keeper.setData(path, data, version));
     } else {
-      return keeper.setData(path, data, version);
+      result = keeper.setData(path, data, version);
+    }
+    metrics.writes.increment();
+    if (data != null) {
+      metrics.bytesWritten.add(data.length);
     }
+    return result;
   }
 
   public void atomicUpdate(String path, Function<byte[], byte[]> editor)
@@ -415,13 +457,20 @@ public class SolrZkClient implements Closeable {
   public String create(
       final String path, final byte[] data, final CreateMode createMode, boolean retryOnConnLoss)
       throws KeeperException, InterruptedException {
+    String result = null;
     if (retryOnConnLoss) {
-      return zkCmdExecutor.retryOperation(
-          () -> keeper.create(path, data, zkACLProvider.getACLsToAdd(path), createMode));
+      result =
+          zkCmdExecutor.retryOperation(
+              () -> keeper.create(path, data, zkACLProvider.getACLsToAdd(path), createMode));
     } else {
       List<ACL> acls = zkACLProvider.getACLsToAdd(path);
-      return keeper.create(path, data, acls, createMode);
+      result = keeper.create(path, data, acls, createMode);
+    }
+    metrics.writes.increment();
+    if (data != null) {
+      metrics.bytesWritten.add(data.length);
     }
+    return result;
   }
 
   /**
@@ -542,6 +591,10 @@ public class SolrZkClient implements Closeable {
       int skipPathParts)
       throws KeeperException, InterruptedException {
     log.debug("makePath: {}", path);
+    metrics.writes.increment();
+    if (data != null) {
+      metrics.bytesWritten.add(data.length);
+    }
     boolean retry = true;
 
     if (path.startsWith("/")) {
@@ -632,11 +685,17 @@ public class SolrZkClient implements Closeable {
 
   public List<OpResult> multi(final Iterable<Op> ops, boolean retryOnConnLoss)
       throws InterruptedException, KeeperException {
+    List<OpResult> result = null;
     if (retryOnConnLoss) {
-      return zkCmdExecutor.retryOperation(() -> keeper.multi(ops));
+      result = zkCmdExecutor.retryOperation(() -> keeper.multi(ops));
     } else {
-      return keeper.multi(ops);
+      result = keeper.multi(ops);
     }
+    metrics.multiOps.increment();
+    if (result != null) {
+      metrics.cumulativeMultiOps.add(result.size());
+    }
+    return result;
   }
 
   /** Fills string with printout of current ZooKeeper layout. */
@@ -919,7 +978,11 @@ public class SolrZkClient implements Closeable {
         if (watcher instanceof ConnectionManager) {
           zkConnManagerCallbackExecutor.submit(() -> watcher.process(event));
         } else {
-          zkCallbackExecutor.submit(() -> watcher.process(event));
+          zkCallbackExecutor.submit(
+              () -> {
+                metrics.watchesFired.increment();
+                watcher.process(event);
+              });
         }
       } catch (RejectedExecutionException e) {
         // If not a graceful shutdown
@@ -948,4 +1011,42 @@ public class SolrZkClient implements Closeable {
       return false;
     }
   }
+
+  // all fields of this class are public because ReflectMapWriter requires them to be.
+  // however the object itself is private and only this class can modify it
+  public static class ZkMetrics implements ReflectMapWriter {
+    @JsonProperty public final LongAdder watchesFired = new LongAdder();
+    @JsonProperty public final LongAdder reads = new LongAdder();
+    @JsonProperty public final LongAdder writes = new LongAdder();
+    @JsonProperty public final LongAdder bytesRead = new LongAdder();
+    @JsonProperty public final LongAdder bytesWritten = new LongAdder();
+
+    @JsonProperty public final LongAdder multiOps = new LongAdder();
+
+    @JsonProperty public final LongAdder cumulativeMultiOps = new LongAdder();
+
+    @JsonProperty public final LongAdder childFetches = new LongAdder();
+
+    @JsonProperty public final LongAdder cumulativeChildrenFetched = new LongAdder();
+
+    @JsonProperty public final LongAdder existsChecks = new LongAdder();
+
+    @JsonProperty public final LongAdder deletes = new LongAdder();
+
+    @Override
+    public void writeMap(EntryWriter ew) throws IOException {
+      ReflectMapWriter.super.writeMap(
+          new EntryWriter() {
+            @Override
+            public EntryWriter put(CharSequence k, Object v) throws IOException {
+              if (v instanceof LongAdder) {
+                ew.put(k, ((LongAdder) v).longValue());
+              } else {
+                ew.put(k, v);
+              }
+              return this;
+            }
+          });
+    }
+  }
 }
diff --git a/solr/solrj/src/java/org/apache/solr/common/util/JavaBinCodec.java b/solr/solrj/src/java/org/apache/solr/common/util/JavaBinCodec.java
index 573026d2d88..23210417a5a 100644
--- a/solr/solrj/src/java/org/apache/solr/common/util/JavaBinCodec.java
+++ b/solr/solrj/src/java/org/apache/solr/common/util/JavaBinCodec.java
@@ -37,6 +37,8 @@ import java.util.Map.Entry;
 import java.util.concurrent.atomic.AtomicBoolean;
 import java.util.concurrent.atomic.AtomicInteger;
 import java.util.concurrent.atomic.AtomicLong;
+import java.util.concurrent.atomic.LongAccumulator;
+import java.util.concurrent.atomic.LongAdder;
 import java.util.function.BiConsumer;
 import java.util.function.Function;
 import java.util.function.Predicate;
@@ -1093,6 +1095,12 @@ public class JavaBinCodec implements PushWriter {
     } else if (val == END_OBJ) {
       writeTag(END);
       return true;
+    } else if (val instanceof LongAdder) {
+      daos.writeLong(((LongAdder) val).longValue());
+      return true;
+    } else if (val instanceof LongAccumulator) {
+      daos.writeLong(((LongAccumulator) val).longValue());
+      return true;
     }
     return false;
   }
diff --git a/solr/solrj/src/java/org/apache/solr/common/util/TextWriter.java b/solr/solrj/src/java/org/apache/solr/common/util/TextWriter.java
index b68bc30c51d..3ec8df2168b 100644
--- a/solr/solrj/src/java/org/apache/solr/common/util/TextWriter.java
+++ b/solr/solrj/src/java/org/apache/solr/common/util/TextWriter.java
@@ -32,6 +32,8 @@ import java.util.Map;
 import java.util.concurrent.atomic.AtomicBoolean;
 import java.util.concurrent.atomic.AtomicInteger;
 import java.util.concurrent.atomic.AtomicLong;
+import java.util.concurrent.atomic.LongAccumulator;
+import java.util.concurrent.atomic.LongAdder;
 import org.apache.solr.common.EnumFieldValue;
 import org.apache.solr.common.IteratorWriter;
 import org.apache.solr.common.MapSerializable;
@@ -167,6 +169,10 @@ public interface TextWriter extends PushWriter {
       writeInt(name, ((AtomicInteger) val).get());
     } else if (val instanceof AtomicLong) {
       writeLong(name, ((AtomicLong) val).get());
+    } else if (val instanceof LongAdder) {
+      writeLong(name, val.toString());
+    } else if (val instanceof LongAccumulator) {
+      writeLong(name, val.toString());
     } else {
       // default... for debugging only
       writeStr(name, val.getClass().getName() + ':' + val.toString(), true);