You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@solr.apache.org by ja...@apache.org on 2021/05/12 08:18:10 UTC
[solr] branch main updated: SOLR-15397 Expose zookeeper status in the exporter, and in grafana da… (#116)
This is an automated email from the ASF dual-hosted git repository.
janhoy pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/solr.git
The following commit(s) were added to refs/heads/main by this push:
new 151539db SOLR-15397 Expose zookeeper status in the exporter, and in grafana da… (#116)
151539db is described below
commit 151539db89ba7b6c08c65d6b533463f1686f8cb7
Author: Jan Høydahl <ja...@users.noreply.github.com>
AuthorDate: Wed May 12 10:18:03 2021 +0200
SOLR-15397 Expose zookeeper status in the exporter, and in grafana da… (#116)
---
solr/CHANGES.txt | 4 +-
.../conf/grafana-solr-dashboard.json | 129 ++++++++++++++++++++-
.../conf/solr-exporter-config.xml | 45 +++++++
.../solr/handler/admin/ZookeeperStatusHandler.java | 26 +++--
.../admin/ZookeeperStatusHandlerFailureTest.java | 74 ++++++++++++
5 files changed, 267 insertions(+), 11 deletions(-)
diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt
index cccc193..73d4cac 100644
--- a/solr/CHANGES.txt
+++ b/solr/CHANGES.txt
@@ -320,6 +320,8 @@ New Features
* SOLR-15365: Improved Grafana dashboard for Prometheus Exporter with new Solr Cluster row (janhoy)
+* SOLR-15397: Expose zookeeper status in the Prometheus exporter (janhoy)
+
Improvements
---------------------
* SOLR-15081: Metrics for a core: add SolrCloud "isLeader" and "replicaState". (David Smiley)
@@ -347,8 +349,6 @@ Improvements
* SOLR-11233: Add optional JAVA8_GC_LOG_FILE_OPTS for bin/solr. (Pranav Murugappan, Christine Poerschke)
-* SOLR-15365: Improved Grafana dashboard for Prometheus Exporter with a new "cluster" row (janhoy)
-
* SOLR-15155: Let CloudHttp2SolrClient accept an external Http2SolrClient Builder (Tomás Fernández Löbbe)
* SOLR-15156: [child] doc transformer's childFilter param no longer applies query syntax escaping.
diff --git a/solr/contrib/prometheus-exporter/conf/grafana-solr-dashboard.json b/solr/contrib/prometheus-exporter/conf/grafana-solr-dashboard.json
index 6b8e01f..c28bc49 100644
--- a/solr/contrib/prometheus-exporter/conf/grafana-solr-dashboard.json
+++ b/solr/contrib/prometheus-exporter/conf/grafana-solr-dashboard.json
@@ -124,7 +124,7 @@
"fillGradient": 0,
"gridPos": {
"h": 8,
- "w": 12,
+ "w": 6,
"x": 0,
"y": 1
},
@@ -176,7 +176,7 @@
"refId": "A"
},
{
- "expr": "count(count by (node_name) (solr_collections_replica_state))",
+ "expr": "count(count by (node_name) (solr_collections_replica_state)) < solr_collections_live_nodes or solr_collections_live_nodes",
"hide": false,
"interval": "",
"intervalFactor": 1,
@@ -228,6 +228,131 @@
},
{
"aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "Prometheus",
+ "description": "",
+ "fieldConfig": {
+ "defaults": {
+ "unit": "short"
+ },
+ "overrides": []
+ },
+ "fill": 0,
+ "fillGradient": 0,
+ "gridPos": {
+ "h": 8,
+ "w": 6,
+ "x": 6,
+ "y": 1
+ },
+ "hiddenSeries": false,
+ "id": 221,
+ "legend": {
+ "alignAsTable": false,
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "show": true,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 2,
+ "nullPointMode": "null",
+ "options": {
+ "alertThreshold": true
+ },
+ "percentage": false,
+ "pluginVersion": "7.5.4",
+ "pointradius": 2,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [
+ {
+ "$$hashKey": "object:395",
+ "alias": "Ensemble size",
+ "lines": true
+ },
+ {
+ "$$hashKey": "object:396",
+ "alias": "Healthy nodes",
+ "bars": true,
+ "color": "#5794F2",
+ "lines": false
+ }
+ ],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "exemplar": true,
+ "expr": "count(solr_zookeeper_nodestatus == 1)",
+ "format": "time_series",
+ "instant": false,
+ "interval": "",
+ "legendFormat": "Healthy nodes",
+ "refId": "A"
+ },
+ {
+ "exemplar": true,
+ "expr": "solr_zookeeper_ensemble_size",
+ "hide": false,
+ "interval": "",
+ "intervalFactor": 1,
+ "legendFormat": "Ensemble size",
+ "refId": "B"
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeRegions": [],
+ "timeShift": null,
+ "title": "Zookeepers",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "$$hashKey": "object:413",
+ "decimals": 0,
+ "format": "short",
+ "label": "",
+ "logBase": 1,
+ "max": null,
+ "min": "0",
+ "show": true
+ },
+ {
+ "$$hashKey": "object:414",
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ },
+ {
+ "aliasColors": {},
"bars": true,
"dashLength": 10,
"dashes": false,
diff --git a/solr/contrib/prometheus-exporter/conf/solr-exporter-config.xml b/solr/contrib/prometheus-exporter/conf/solr-exporter-config.xml
index 3595c1c..d25e033 100644
--- a/solr/contrib/prometheus-exporter/conf/solr-exporter-config.xml
+++ b/solr/contrib/prometheus-exporter/conf/solr-exporter-config.xml
@@ -1053,6 +1053,51 @@
</str>
</arr>
</lst>
+ <lst name="request">
+ <lst name="query">
+ <str name="path">/admin/zookeeper/status</str>
+ </lst>
+ <arr name="jsonQueries">
+ <str>
+ .zkStatus.ensembleSize as $value |
+ .zkStatus.mode as $mode |
+ {
+ name : "solr_zookeeper_ensemble_size",
+ type : "GAUGE",
+ help : "See following URL: https://solr.apache.org/guide/cloud-screens.html#zk-status-view",
+ label_names : [],
+ label_values : [],
+ value : $value
+ }
+ </str>
+ <str>
+ .zkStatus.details[] as $object |
+ $object.host as $host |
+ $object.ok as $ok |
+ (if $object.clientPort != null and $ok then 1.0 else 0.0 end) as $value |
+ {
+ name : "solr_zookeeper_nodestatus",
+ type : "GAUGE",
+ help : "See following URL: https://solr.apache.org/guide/cloud-screens.html#zk-status-view",
+ label_names : ["host"],
+ label_values : [$host],
+ value : $value
+ }
+ </str>
+ <str>
+ .zkStatus.status as $statusText |
+ (if $statusText == "green" then 1.0 else 0.0 end) as $value |
+ {
+ name : "solr_zookeeper_status",
+ type : "GAUGE",
+ help : "See following URL: https://solr.apache.org/guide/cloud-screens.html#zk-status-view",
+ label_names : ["status"],
+ label_values : [$statusText],
+ value : $value
+ }
+ </str>
+ </arr>
+ </lst>
</collections>
<!--
diff --git a/solr/core/src/java/org/apache/solr/handler/admin/ZookeeperStatusHandler.java b/solr/core/src/java/org/apache/solr/handler/admin/ZookeeperStatusHandler.java
index 2c45f7f..2aa4655 100644
--- a/solr/core/src/java/org/apache/solr/handler/admin/ZookeeperStatusHandler.java
+++ b/solr/core/src/java/org/apache/solr/handler/admin/ZookeeperStatusHandler.java
@@ -40,6 +40,7 @@ import org.apache.solr.core.CoreContainer;
import org.apache.solr.handler.RequestHandlerBase;
import org.apache.solr.request.SolrQueryRequest;
import org.apache.solr.response.SolrQueryResponse;
+import org.apache.zookeeper.KeeperException;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -79,8 +80,18 @@ public class ZookeeperStatusHandler extends RequestHandlerBase {
NamedList values = rsp.getValues();
if (cores.isZooKeeperAware()) {
String zkHost = cores.getZkController().getZkServerAddress();
- SolrZkClient zkClient = cores.getZkController().getZkClient();
- final ZkDynamicConfig dynConfig = ZkDynamicConfig.parseLines(zkClient.getConfig());
+ ZkDynamicConfig dynConfig = null;
+ try {
+ SolrZkClient zkClient = cores.getZkController().getZkClient();
+ dynConfig = ZkDynamicConfig.parseLines(zkClient.getConfig());
+ } catch (SolrException e) {
+ if (!(e.getCause() instanceof KeeperException)) {
+ throw e;
+ }
+ if (log.isWarnEnabled()) {
+ log.warn("{} - Continuing with static connection string", e.toString());
+ }
+ }
values.add("zkStatus", getZkStatus(zkHost, dynConfig));
} else {
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "The Zookeeper status API is only available in Cloud mode");
@@ -102,7 +113,7 @@ public class ZookeeperStatusHandler extends RequestHandlerBase {
final List<String> errors = new ArrayList<>();
String status = STATUS_NA;
- if (zkDynamicConfig.size() == 0) {
+ if (zkDynamicConfig == null || zkDynamicConfig.size() == 0) {
// Fallback to parsing zkHost for older zk servers without support for dynamic reconfiguration
dynamicReconfig = false;
zookeepers = hostsFromConnectionString;
@@ -147,9 +158,6 @@ public class ZookeeperStatusHandler extends RequestHandlerBase {
stat.remove("errors");
}
details.add(stat);
- if ("true".equals(String.valueOf(stat.get("ok")))) {
- numOk++;
- }
String state = String.valueOf(stat.get("zk_server_state"));
if ("follower".equals(state) || "observer".equals(state)) {
followers++;
@@ -176,6 +184,7 @@ public class ZookeeperStatusHandler extends RequestHandlerBase {
}
}
zkStatus.put("details", details);
+ numOk = (int) details.stream().filter(m -> ((boolean) ((HashMap<String, Object>) m).get("ok"))).count();
zkStatus.put("dynamicReconfig", dynamicReconfig);
if (followers+leaders > 0 && standalone > 0) {
status = STATUS_RED;
@@ -201,7 +210,7 @@ public class ZookeeperStatusHandler extends RequestHandlerBase {
errors.add("Leader reports " + reportedFollowers + " followers, but we only found " + followers +
". Please check zkHost configuration");
}
- if (followers+leaders == 0 && standalone == 1) {
+ if (followers+leaders == 0 && (standalone == 1 || zookeepers.size() == 1)) {
zkStatus.put("mode", "standalone");
}
if (followers+leaders > 0 && (zookeepers.size())%2 == 0) {
@@ -213,6 +222,9 @@ public class ZookeeperStatusHandler extends RequestHandlerBase {
if (followers+leaders > 0 && standalone == 0) {
zkStatus.put("mode", "ensemble");
}
+ if (numOk == 0) {
+ status = STATUS_RED;
+ }
if (status.equals(STATUS_NA)) {
if (numOk == zookeepers.size()) {
status = STATUS_GREEN;
diff --git a/solr/core/src/test/org/apache/solr/handler/admin/ZookeeperStatusHandlerFailureTest.java b/solr/core/src/test/org/apache/solr/handler/admin/ZookeeperStatusHandlerFailureTest.java
new file mode 100644
index 0000000..090117a
--- /dev/null
+++ b/solr/core/src/test/org/apache/solr/handler/admin/ZookeeperStatusHandlerFailureTest.java
@@ -0,0 +1,74 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.solr.handler.admin;
+
+import org.apache.solr.client.solrj.SolrRequest;
+import org.apache.solr.client.solrj.SolrServerException;
+import org.apache.solr.client.solrj.impl.HttpSolrClient;
+import org.apache.solr.client.solrj.request.GenericSolrRequest;
+import org.apache.solr.client.solrj.response.DelegationTokenResponse;
+import org.apache.solr.cloud.SolrCloudTestCase;
+import org.apache.solr.common.params.ModifiableSolrParams;
+import org.apache.solr.common.util.NamedList;
+import org.junit.BeforeClass;
+import org.junit.Test;
+
+import java.io.IOException;
+import java.net.URL;
+import java.util.List;
+import java.util.Map;
+import java.util.concurrent.ExecutionException;
+import java.util.concurrent.TimeUnit;
+import java.util.concurrent.TimeoutException;
+
+public class ZookeeperStatusHandlerFailureTest extends SolrCloudTestCase {
+ @BeforeClass
+ public static void setupCluster() throws Exception {
+ configureCluster(1)
+ .addConfig("conf", configset("cloud-minimal"))
+ .configure();
+ // Kill the ZK
+ cluster.getZkServer().shutdown();
+ }
+
+ /*
+ Test the monitoring endpoint, when no Zookeeper is answering. There should still be a response
+ */
+ @Test
+ public void monitorZookeeperAfterZkShutdown() throws IOException, SolrServerException, InterruptedException, ExecutionException, TimeoutException {
+ URL baseUrl = cluster.getJettySolrRunner(0).getBaseUrl();
+ HttpSolrClient solr = new HttpSolrClient.Builder(baseUrl.toString()).build();
+ GenericSolrRequest mntrReq = new GenericSolrRequest(SolrRequest.METHOD.GET, "/admin/zookeeper/status", new ModifiableSolrParams());
+ mntrReq.setResponseParser(new DelegationTokenResponse.JsonMapResponseParser());
+ NamedList<Object> nl = solr.httpUriRequest(mntrReq).future.get(10000, TimeUnit.MILLISECONDS);
+
+ assertEquals("zkStatus", nl.getName(1));
+ @SuppressWarnings({"unchecked"})
+ Map<String,Object> zkStatus = (Map<String,Object>) nl.get("zkStatus");
+ assertEquals("red", zkStatus.get("status"));
+ assertEquals("standalone", zkStatus.get("mode"));
+ assertEquals(1L, zkStatus.get("ensembleSize"));
+ @SuppressWarnings({"unchecked"})
+ List<Object> detailsList = (List<Object>)zkStatus.get("details");
+ assertEquals(1, detailsList.size());
+ @SuppressWarnings({"unchecked"})
+ Map<String,Object> details = (Map<String,Object>) detailsList.get(0);
+ assertEquals(false, details.get("ok"));
+ solr.close();
+ }
+}
\ No newline at end of file