You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@solr.apache.org by ja...@apache.org on 2021/05/12 08:18:10 UTC

[solr] branch main updated: SOLR-15397 Expose zookeeper status in the exporter, and in grafana da… (#116)

This is an automated email from the ASF dual-hosted git repository.

janhoy pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/solr.git


The following commit(s) were added to refs/heads/main by this push:
     new 151539db SOLR-15397 Expose zookeeper status in the exporter, and in grafana da… (#116)
151539db is described below

commit 151539db89ba7b6c08c65d6b533463f1686f8cb7
Author: Jan Høydahl <ja...@users.noreply.github.com>
AuthorDate: Wed May 12 10:18:03 2021 +0200

    SOLR-15397 Expose zookeeper status in the exporter, and in grafana da… (#116)
---
 solr/CHANGES.txt                                   |   4 +-
 .../conf/grafana-solr-dashboard.json               | 129 ++++++++++++++++++++-
 .../conf/solr-exporter-config.xml                  |  45 +++++++
 .../solr/handler/admin/ZookeeperStatusHandler.java |  26 +++--
 .../admin/ZookeeperStatusHandlerFailureTest.java   |  74 ++++++++++++
 5 files changed, 267 insertions(+), 11 deletions(-)

diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt
index cccc193..73d4cac 100644
--- a/solr/CHANGES.txt
+++ b/solr/CHANGES.txt
@@ -320,6 +320,8 @@ New Features
 
 * SOLR-15365: Improved Grafana dashboard for Prometheus Exporter with new Solr Cluster row (janhoy)
 
+* SOLR-15397: Expose zookeeper status in the Prometheus exporter (janhoy)
+
 Improvements
 ---------------------
 * SOLR-15081: Metrics for a core: add SolrCloud "isLeader" and "replicaState".  (David Smiley)
@@ -347,8 +349,6 @@ Improvements
 
 * SOLR-11233: Add optional JAVA8_GC_LOG_FILE_OPTS for bin/solr. (Pranav Murugappan, Christine Poerschke)
 
-* SOLR-15365: Improved Grafana dashboard for Prometheus Exporter with a new "cluster" row (janhoy)
-
 * SOLR-15155: Let CloudHttp2SolrClient accept an external Http2SolrClient Builder (Tomás Fernández Löbbe)
 
 * SOLR-15156: [child] doc transformer's childFilter param no longer applies query syntax escaping.
diff --git a/solr/contrib/prometheus-exporter/conf/grafana-solr-dashboard.json b/solr/contrib/prometheus-exporter/conf/grafana-solr-dashboard.json
index 6b8e01f..c28bc49 100644
--- a/solr/contrib/prometheus-exporter/conf/grafana-solr-dashboard.json
+++ b/solr/contrib/prometheus-exporter/conf/grafana-solr-dashboard.json
@@ -124,7 +124,7 @@
       "fillGradient": 0,
       "gridPos": {
         "h": 8,
-        "w": 12,
+        "w": 6,
         "x": 0,
         "y": 1
       },
@@ -176,7 +176,7 @@
           "refId": "A"
         },
         {
-          "expr": "count(count by (node_name) (solr_collections_replica_state))",
+          "expr": "count(count by (node_name) (solr_collections_replica_state)) < solr_collections_live_nodes or solr_collections_live_nodes",
           "hide": false,
           "interval": "",
           "intervalFactor": 1,
@@ -228,6 +228,131 @@
     },
     {
       "aliasColors": {},
+      "bars": false,
+      "dashLength": 10,
+      "dashes": false,
+      "datasource": "Prometheus",
+      "description": "",
+      "fieldConfig": {
+        "defaults": {
+          "unit": "short"
+        },
+        "overrides": []
+      },
+      "fill": 0,
+      "fillGradient": 0,
+      "gridPos": {
+        "h": 8,
+        "w": 6,
+        "x": 6,
+        "y": 1
+      },
+      "hiddenSeries": false,
+      "id": 221,
+      "legend": {
+        "alignAsTable": false,
+        "avg": false,
+        "current": false,
+        "max": false,
+        "min": false,
+        "show": true,
+        "total": false,
+        "values": false
+      },
+      "lines": true,
+      "linewidth": 2,
+      "nullPointMode": "null",
+      "options": {
+        "alertThreshold": true
+      },
+      "percentage": false,
+      "pluginVersion": "7.5.4",
+      "pointradius": 2,
+      "points": false,
+      "renderer": "flot",
+      "seriesOverrides": [
+        {
+          "$$hashKey": "object:395",
+          "alias": "Ensemble size",
+          "lines": true
+        },
+        {
+          "$$hashKey": "object:396",
+          "alias": "Healthy nodes",
+          "bars": true,
+          "color": "#5794F2",
+          "lines": false
+        }
+      ],
+      "spaceLength": 10,
+      "stack": false,
+      "steppedLine": false,
+      "targets": [
+        {
+          "exemplar": true,
+          "expr": "count(solr_zookeeper_nodestatus == 1)",
+          "format": "time_series",
+          "instant": false,
+          "interval": "",
+          "legendFormat": "Healthy nodes",
+          "refId": "A"
+        },
+        {
+          "exemplar": true,
+          "expr": "solr_zookeeper_ensemble_size",
+          "hide": false,
+          "interval": "",
+          "intervalFactor": 1,
+          "legendFormat": "Ensemble size",
+          "refId": "B"
+        }
+      ],
+      "thresholds": [],
+      "timeFrom": null,
+      "timeRegions": [],
+      "timeShift": null,
+      "title": "Zookeepers",
+      "tooltip": {
+        "shared": true,
+        "sort": 0,
+        "value_type": "individual"
+      },
+      "type": "graph",
+      "xaxis": {
+        "buckets": null,
+        "mode": "time",
+        "name": null,
+        "show": true,
+        "values": []
+      },
+      "yaxes": [
+        {
+          "$$hashKey": "object:413",
+          "decimals": 0,
+          "format": "short",
+          "label": "",
+          "logBase": 1,
+          "max": null,
+          "min": "0",
+          "show": true
+        },
+        {
+          "$$hashKey": "object:414",
+          "format": "short",
+          "label": null,
+          "logBase": 1,
+          "max": null,
+          "min": null,
+          "show": true
+        }
+      ],
+      "yaxis": {
+        "align": false,
+        "alignLevel": null
+      }
+    },
+    {
+      "aliasColors": {},
       "bars": true,
       "dashLength": 10,
       "dashes": false,
diff --git a/solr/contrib/prometheus-exporter/conf/solr-exporter-config.xml b/solr/contrib/prometheus-exporter/conf/solr-exporter-config.xml
index 3595c1c..d25e033 100644
--- a/solr/contrib/prometheus-exporter/conf/solr-exporter-config.xml
+++ b/solr/contrib/prometheus-exporter/conf/solr-exporter-config.xml
@@ -1053,6 +1053,51 @@
           </str>
         </arr>
       </lst>
+      <lst name="request">
+        <lst name="query">
+          <str name="path">/admin/zookeeper/status</str>
+        </lst>
+        <arr name="jsonQueries">
+          <str>
+            .zkStatus.ensembleSize as $value |
+            .zkStatus.mode as $mode |
+            {
+            name         : "solr_zookeeper_ensemble_size",
+            type         : "GAUGE",
+            help         : "See following URL: https://solr.apache.org/guide/cloud-screens.html#zk-status-view",
+            label_names  : [],
+            label_values : [],
+            value        : $value
+            }
+          </str>
+          <str>
+            .zkStatus.details[] as $object |
+            $object.host as $host |
+            $object.ok as $ok |
+            (if $object.clientPort != null and $ok then 1.0 else 0.0 end) as $value |
+            {
+            name         : "solr_zookeeper_nodestatus",
+            type         : "GAUGE",
+            help         : "See following URL: https://solr.apache.org/guide/cloud-screens.html#zk-status-view",
+            label_names  : ["host"],
+            label_values : [$host],
+            value        : $value
+            }
+          </str>
+          <str>
+            .zkStatus.status as $statusText |
+            (if $statusText == "green" then 1.0 else 0.0 end) as $value |
+            {
+            name         : "solr_zookeeper_status",
+            type         : "GAUGE",
+            help         : "See following URL: https://solr.apache.org/guide/cloud-screens.html#zk-status-view",
+            label_names  : ["status"],
+            label_values : [$statusText],
+            value        : $value
+            }
+          </str>
+        </arr>
+      </lst>
     </collections>
 
     <!--
diff --git a/solr/core/src/java/org/apache/solr/handler/admin/ZookeeperStatusHandler.java b/solr/core/src/java/org/apache/solr/handler/admin/ZookeeperStatusHandler.java
index 2c45f7f..2aa4655 100644
--- a/solr/core/src/java/org/apache/solr/handler/admin/ZookeeperStatusHandler.java
+++ b/solr/core/src/java/org/apache/solr/handler/admin/ZookeeperStatusHandler.java
@@ -40,6 +40,7 @@ import org.apache.solr.core.CoreContainer;
 import org.apache.solr.handler.RequestHandlerBase;
 import org.apache.solr.request.SolrQueryRequest;
 import org.apache.solr.response.SolrQueryResponse;
+import org.apache.zookeeper.KeeperException;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
@@ -79,8 +80,18 @@ public class ZookeeperStatusHandler extends RequestHandlerBase {
     NamedList values = rsp.getValues();
     if (cores.isZooKeeperAware()) {
       String zkHost = cores.getZkController().getZkServerAddress();
-      SolrZkClient zkClient = cores.getZkController().getZkClient();
-      final ZkDynamicConfig dynConfig = ZkDynamicConfig.parseLines(zkClient.getConfig());
+      ZkDynamicConfig dynConfig = null;
+      try {
+        SolrZkClient zkClient = cores.getZkController().getZkClient();
+        dynConfig = ZkDynamicConfig.parseLines(zkClient.getConfig());
+      } catch (SolrException e) {
+        if (!(e.getCause() instanceof KeeperException)) {
+          throw e;
+        }
+        if (log.isWarnEnabled()) {
+          log.warn("{} - Continuing with static connection string", e.toString());
+        }
+      }
       values.add("zkStatus", getZkStatus(zkHost, dynConfig));
     } else {
       throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "The Zookeeper status API is only available in Cloud mode");
@@ -102,7 +113,7 @@ public class ZookeeperStatusHandler extends RequestHandlerBase {
     final List<String> errors = new ArrayList<>();
     String status = STATUS_NA;
 
-    if (zkDynamicConfig.size() == 0) {
+    if (zkDynamicConfig == null || zkDynamicConfig.size() == 0) {
       // Fallback to parsing zkHost for older zk servers without support for dynamic reconfiguration
       dynamicReconfig = false;
       zookeepers = hostsFromConnectionString;
@@ -147,9 +158,6 @@ public class ZookeeperStatusHandler extends RequestHandlerBase {
           stat.remove("errors");
         }
         details.add(stat);
-        if ("true".equals(String.valueOf(stat.get("ok")))) {
-          numOk++;
-        }
         String state = String.valueOf(stat.get("zk_server_state"));
         if ("follower".equals(state) || "observer".equals(state)) {
           followers++;
@@ -176,6 +184,7 @@ public class ZookeeperStatusHandler extends RequestHandlerBase {
       }
     }
     zkStatus.put("details", details);
+    numOk = (int) details.stream().filter(m -> ((boolean) ((HashMap<String, Object>) m).get("ok"))).count();
     zkStatus.put("dynamicReconfig", dynamicReconfig);
     if (followers+leaders > 0 && standalone > 0) {
       status = STATUS_RED;
@@ -201,7 +210,7 @@ public class ZookeeperStatusHandler extends RequestHandlerBase {
       errors.add("Leader reports " + reportedFollowers + " followers, but we only found " + followers + 
         ". Please check zkHost configuration");
     }
-    if (followers+leaders == 0 && standalone == 1) {
+    if (followers+leaders == 0 && (standalone == 1 || zookeepers.size() == 1)) {
       zkStatus.put("mode", "standalone");
     }
     if (followers+leaders > 0 && (zookeepers.size())%2 == 0) {
@@ -213,6 +222,9 @@ public class ZookeeperStatusHandler extends RequestHandlerBase {
     if (followers+leaders > 0 && standalone == 0) {
       zkStatus.put("mode", "ensemble");
     }
+    if (numOk == 0) {
+      status = STATUS_RED;
+    }
     if (status.equals(STATUS_NA)) {
       if (numOk == zookeepers.size()) {
         status = STATUS_GREEN;
diff --git a/solr/core/src/test/org/apache/solr/handler/admin/ZookeeperStatusHandlerFailureTest.java b/solr/core/src/test/org/apache/solr/handler/admin/ZookeeperStatusHandlerFailureTest.java
new file mode 100644
index 0000000..090117a
--- /dev/null
+++ b/solr/core/src/test/org/apache/solr/handler/admin/ZookeeperStatusHandlerFailureTest.java
@@ -0,0 +1,74 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.solr.handler.admin;
+
+import org.apache.solr.client.solrj.SolrRequest;
+import org.apache.solr.client.solrj.SolrServerException;
+import org.apache.solr.client.solrj.impl.HttpSolrClient;
+import org.apache.solr.client.solrj.request.GenericSolrRequest;
+import org.apache.solr.client.solrj.response.DelegationTokenResponse;
+import org.apache.solr.cloud.SolrCloudTestCase;
+import org.apache.solr.common.params.ModifiableSolrParams;
+import org.apache.solr.common.util.NamedList;
+import org.junit.BeforeClass;
+import org.junit.Test;
+
+import java.io.IOException;
+import java.net.URL;
+import java.util.List;
+import java.util.Map;
+import java.util.concurrent.ExecutionException;
+import java.util.concurrent.TimeUnit;
+import java.util.concurrent.TimeoutException;
+
+public class ZookeeperStatusHandlerFailureTest extends SolrCloudTestCase {
+  @BeforeClass
+  public static void setupCluster() throws Exception {
+    configureCluster(1)
+        .addConfig("conf", configset("cloud-minimal"))
+        .configure();
+    // Kill the ZK
+    cluster.getZkServer().shutdown();
+  }
+
+  /*
+   Test the monitoring endpoint, when no Zookeeper is answering. There should still be a response
+  */
+  @Test
+  public void monitorZookeeperAfterZkShutdown() throws IOException, SolrServerException, InterruptedException, ExecutionException, TimeoutException {
+    URL baseUrl = cluster.getJettySolrRunner(0).getBaseUrl();
+    HttpSolrClient solr = new HttpSolrClient.Builder(baseUrl.toString()).build();
+    GenericSolrRequest mntrReq = new GenericSolrRequest(SolrRequest.METHOD.GET, "/admin/zookeeper/status", new ModifiableSolrParams());
+    mntrReq.setResponseParser(new DelegationTokenResponse.JsonMapResponseParser());
+    NamedList<Object> nl = solr.httpUriRequest(mntrReq).future.get(10000, TimeUnit.MILLISECONDS);
+
+    assertEquals("zkStatus", nl.getName(1));
+    @SuppressWarnings({"unchecked"})
+    Map<String,Object> zkStatus = (Map<String,Object>) nl.get("zkStatus");
+    assertEquals("red", zkStatus.get("status"));
+    assertEquals("standalone", zkStatus.get("mode"));
+    assertEquals(1L, zkStatus.get("ensembleSize"));
+    @SuppressWarnings({"unchecked"})
+    List<Object> detailsList = (List<Object>)zkStatus.get("details");
+    assertEquals(1, detailsList.size());
+    @SuppressWarnings({"unchecked"})
+    Map<String,Object> details = (Map<String,Object>) detailsList.get(0);
+    assertEquals(false, details.get("ok"));
+    solr.close();
+  }
+}
\ No newline at end of file