You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by ma...@apache.org on 2020/07/30 07:39:58 UTC
[lucene-solr] 01/02: @465 Fix global metrics collection in a
tolerable way.
This is an automated email from the ASF dual-hosted git repository.
markrmiller pushed a commit to branch reference_impl_dev
in repository https://gitbox.apache.org/repos/asf/lucene-solr.git
commit ecdaa14c0506ad20d7cf5f1b32ad0d343a005dd4
Author: markrmiller@gmail.com <ma...@gmail.com>
AuthorDate: Thu Jul 30 02:36:55 2020 -0500
@465 Fix global metrics collection in a tolerable way.
---
.../cloud/autoscaling/sim/SimCloudManager.java | 5 +-
.../java/org/apache/solr/core/CoreContainer.java | 2 +-
.../solr/handler/admin/MetricsHistoryHandler.java | 67 +++-------------------
.../handler/admin/MetricsHistoryHandlerTest.java | 8 +--
4 files changed, 14 insertions(+), 68 deletions(-)
diff --git a/solr/core/src/java/org/apache/solr/cloud/autoscaling/sim/SimCloudManager.java b/solr/core/src/java/org/apache/solr/cloud/autoscaling/sim/SimCloudManager.java
index 9f9e9a2..1a0319c 100644
--- a/solr/core/src/java/org/apache/solr/cloud/autoscaling/sim/SimCloudManager.java
+++ b/solr/core/src/java/org/apache/solr/cloud/autoscaling/sim/SimCloudManager.java
@@ -84,7 +84,6 @@ import org.apache.solr.common.params.CoreAdminParams;
import org.apache.solr.common.params.ModifiableSolrParams;
import org.apache.solr.common.params.SolrParams;
import org.apache.solr.common.util.ContentStreamBase;
-import org.apache.solr.common.util.ExecutorUtil;
import org.apache.solr.common.util.IOUtils;
import org.apache.solr.common.util.NamedList;
import org.apache.solr.common.util.ObjectCache;
@@ -102,7 +101,6 @@ import org.apache.solr.metrics.SolrMetricManager;
import org.apache.solr.metrics.SolrMetricsContext;
import org.apache.solr.request.LocalSolrQueryRequest;
import org.apache.solr.response.SolrQueryResponse;
-import org.apache.solr.common.util.SolrNamedThreadFactory;
import org.apache.solr.util.MockSearchableSolrClient;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -485,7 +483,8 @@ public class SimCloudManager implements SolrCloudManager {
// initialize history handler if this is the first node
if (metricsHistoryHandler == null && liveNodesSet.size() == 1) {
metricsHandler = new MetricsHandler(metricManager);
- metricsHistoryHandler = new MetricsHistoryHandler(nodeId, metricsHandler, solrClient, this, new HashMap<>());
+ metricsHistoryHandler = new MetricsHistoryHandler(nodeId, metricsHandler, solrClient, this, new HashMap<>(),
+ null);
SolrMetricsContext solrMetricsContext = new SolrMetricsContext(metricManager, SolrMetricManager.getRegistryName(SolrInfoBean.Group.node), metricTag);
metricsHistoryHandler.initializeMetrics(solrMetricsContext, CommonParams.METRICS_HISTORY_PATH);
}
diff --git a/solr/core/src/java/org/apache/solr/core/CoreContainer.java b/solr/core/src/java/org/apache/solr/core/CoreContainer.java
index 3a858c4..f0fc756 100644
--- a/solr/core/src/java/org/apache/solr/core/CoreContainer.java
+++ b/solr/core/src/java/org/apache/solr/core/CoreContainer.java
@@ -991,7 +991,7 @@ public class CoreContainer implements Closeable {
}
}
metricsHistoryHandler = new MetricsHistoryHandler(name, metricsHandler,
- client, cloudManager, initArgs);
+ client, cloudManager, initArgs, zkSys.getZkController().getOverseer());
containerHandlers.put(METRICS_HISTORY_PATH, metricsHistoryHandler);
metricsHistoryHandler.initializeMetrics(solrMetricsContext, METRICS_HISTORY_PATH);
}
diff --git a/solr/core/src/java/org/apache/solr/handler/admin/MetricsHistoryHandler.java b/solr/core/src/java/org/apache/solr/handler/admin/MetricsHistoryHandler.java
index 05234a3..097959e 100644
--- a/solr/core/src/java/org/apache/solr/handler/admin/MetricsHistoryHandler.java
+++ b/solr/core/src/java/org/apache/solr/handler/admin/MetricsHistoryHandler.java
@@ -19,14 +19,11 @@ package org.apache.solr.handler.admin;
import javax.imageio.ImageIO;
import java.awt.Color;
import java.awt.image.BufferedImage;
-import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.Closeable;
import java.io.IOException;
import java.lang.invoke.MethodHandles;
-import java.net.MalformedURLException;
import java.net.URI;
-import java.net.URL;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
@@ -38,7 +35,6 @@ import java.util.LinkedHashMap;
import java.util.List;
import java.util.Locale;
import java.util.Map;
-import java.util.NoSuchElementException;
import java.util.Set;
import java.util.TimeZone;
import java.util.concurrent.ConcurrentHashMap;
@@ -57,14 +53,10 @@ import org.apache.solr.api.Api;
import org.apache.solr.api.ApiBag;
import org.apache.solr.client.solrj.SolrClient;
import org.apache.solr.client.solrj.SolrQuery;
-import org.apache.solr.client.solrj.SolrRequest;
import org.apache.solr.client.solrj.cloud.NodeStateProvider;
import org.apache.solr.client.solrj.cloud.SolrCloudManager;
import org.apache.solr.client.solrj.cloud.autoscaling.ReplicaInfo;
import org.apache.solr.client.solrj.cloud.autoscaling.Variable;
-import org.apache.solr.client.solrj.cloud.autoscaling.VersionedData;
-import org.apache.solr.client.solrj.impl.HttpClientUtil;
-import org.apache.solr.cloud.LeaderElector;
import org.apache.solr.cloud.Overseer;
import org.apache.solr.common.ParWork;
import org.apache.solr.common.SolrException;
@@ -72,20 +64,17 @@ import org.apache.solr.common.cloud.ClusterState;
import org.apache.solr.common.cloud.DocCollection;
import org.apache.solr.common.cloud.Replica;
import org.apache.solr.common.cloud.Slice;
-import org.apache.solr.common.cloud.ZkNodeProps;
import org.apache.solr.common.params.CollectionAdminParams;
import org.apache.solr.common.params.CommonParams;
import org.apache.solr.common.params.ModifiableSolrParams;
import org.apache.solr.common.params.SolrParams;
import org.apache.solr.common.util.Base64;
import org.apache.solr.common.util.ExecutorUtil;
-import org.apache.solr.common.util.JavaBinCodec;
import org.apache.solr.common.util.NamedList;
import org.apache.solr.common.util.ObjectReleaseTracker;
import org.apache.solr.common.util.Pair;
import org.apache.solr.common.util.SimpleOrderedMap;
import org.apache.solr.common.util.TimeSource;
-import org.apache.solr.common.util.Utils;
import org.apache.solr.handler.RequestHandlerBase;
import org.apache.solr.metrics.SolrMetricManager;
import org.apache.solr.metrics.rrd.SolrRrdBackendFactory;
@@ -94,7 +83,6 @@ import org.apache.solr.response.SolrQueryResponse;
import org.apache.solr.security.AuthorizationContext;
import org.apache.solr.security.PermissionNameProvider;
import org.apache.solr.common.util.SolrNamedThreadFactory;
-import org.apache.zookeeper.KeeperException;
import org.rrd4j.ConsolFun;
import org.rrd4j.DsType;
import org.rrd4j.core.ArcDef;
@@ -104,7 +92,6 @@ import org.rrd4j.core.DsDef;
import org.rrd4j.core.FetchData;
import org.rrd4j.core.FetchRequest;
import org.rrd4j.core.RrdDb;
-import org.rrd4j.core.RrdDbPool;
import org.rrd4j.core.RrdDef;
import org.rrd4j.core.Sample;
import org.rrd4j.graph.RrdGraph;
@@ -113,7 +100,6 @@ import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import static java.util.stream.Collectors.toMap;
-import static org.apache.solr.common.params.CommonParams.ID;
/**
* Collects metrics from all nodes in the system on a regular basis in a background thread.
@@ -170,6 +156,7 @@ public class MetricsHistoryHandler extends RequestHandlerBase implements Permiss
private final String overseerUrlScheme;
private final Map<String, RrdDb> knownDbs = new ConcurrentHashMap<>(16, 0.75f, 4);
+ private final Overseer overseer;
private ScheduledThreadPoolExecutor collectService;
private boolean logMissingCollection = true;
@@ -179,8 +166,9 @@ public class MetricsHistoryHandler extends RequestHandlerBase implements Permiss
private volatile String versionString;
public MetricsHistoryHandler(String nodeName, MetricsHandler metricsHandler,
- SolrClient solrClient, SolrCloudManager cloudManager, Map<String, Object> pluginArgs) {
-
+ SolrClient solrClient, SolrCloudManager cloudManager,
+ Map<String,Object> pluginArgs, Overseer overseer) {
+ this.overseer = overseer;
Map<String, Object> args = new HashMap<>();
// init from optional solr.xml config
if (pluginArgs != null) {
@@ -232,7 +220,7 @@ public class MetricsHistoryHandler extends RequestHandlerBase implements Permiss
}
if (enable) {
- collectService = (ScheduledThreadPoolExecutor) Executors.newScheduledThreadPool(1,
+ collectService = (ScheduledThreadPoolExecutor) Executors.newFixedThreadPool(1,
new SolrNamedThreadFactory("MetricsHistoryHandler"));
collectService.setRemoveOnCancelPolicy(true);
collectService.setExecuteExistingDelayedTasksAfterShutdownPolicy(false);
@@ -318,44 +306,6 @@ public class MetricsHistoryHandler extends RequestHandlerBase implements Permiss
return factory;
}
- private String getOverseerLeader() {
- // non-ZK node has no Overseer
- if (cloudManager == null) {
- return null;
- }
- ZkNodeProps props = null;
- try {
- VersionedData data = cloudManager.getDistribStateManager().getData(
- Overseer.OVERSEER_ELECT + "/leader");
- if (data != null && data.getData() != null) {
- props = ZkNodeProps.load(data.getData());
- }
- } catch (IOException | NoSuchElementException e) {
- log.warn("Could not obtain overseer's address, skipping.", e);
- return null;
- } catch (InterruptedException e) {
- ParWork.propegateInterrupt(e);
- return null;
- } catch (KeeperException e) {
- throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, e);
- }
- if (props == null) {
- return null;
- }
- String oid = props.getStr(ID);
- if (oid == null) {
- return null;
- }
- String nodeName = null;
- try {
- nodeName = LeaderElector.getNodeName(oid);
- } catch (Exception e) {
- ParWork.propegateInterrupt(e);
- log.warn("Unknown format of leader id, skipping: {}", oid, e);
- return null;
- }
- return nodeName;
- }
private void collectMetrics() {
log.debug("-- collectMetrics");
@@ -462,10 +412,9 @@ public class MetricsHistoryHandler extends RequestHandlerBase implements Permiss
}
private void collectGlobalMetrics() {
- // nocommit - this stuff is slow and hackey and too hard to do righ this way
-// if (!amIOverseerLeader()) {
-// return;
-// }
+ if (overseer == null || !overseer.getUpdaterThread().isAlive()) {
+ return;
+ }
Set<String> nodes = new HashSet<>(cloudManager.getClusterStateProvider().getLiveNodes());
NodeStateProvider nodeStateProvider = cloudManager.getNodeStateProvider();
Set<String> collTags = new HashSet<>();
diff --git a/solr/core/src/test/org/apache/solr/handler/admin/MetricsHistoryHandlerTest.java b/solr/core/src/test/org/apache/solr/handler/admin/MetricsHistoryHandlerTest.java
index e975675..224fb21 100644
--- a/solr/core/src/test/org/apache/solr/handler/admin/MetricsHistoryHandlerTest.java
+++ b/solr/core/src/test/org/apache/solr/handler/admin/MetricsHistoryHandlerTest.java
@@ -33,7 +33,6 @@ import org.apache.solr.common.params.CommonParams;
import org.apache.solr.common.util.Pair;
import org.apache.solr.common.util.TimeSource;
import org.apache.solr.core.SolrInfoBean;
-import org.apache.solr.metrics.SolrCoreMetricManager;
import org.apache.solr.metrics.SolrMetricManager;
import org.apache.solr.metrics.SolrMetricsContext;
import org.apache.solr.util.LogLevel;
@@ -86,7 +85,7 @@ public class MetricsHistoryHandlerTest extends SolrCloudTestCase {
metricsHandler = new MetricsHandler(metricManager);
SolrMetricsContext solrMetricsContext = new SolrMetricsContext(metricManager, SolrInfoBean.Group.node.toString(), "");
handler = new MetricsHistoryHandler(cloudManager.getClusterStateProvider().getLiveNodes().iterator().next(),
- metricsHandler, solrClient, cloudManager, args);
+ metricsHandler, solrClient, cloudManager, args, null);
handler.initializeMetrics(solrMetricsContext, CommonParams.METRICS_HISTORY_PATH);
}
configureCluster(1)
@@ -98,7 +97,8 @@ public class MetricsHistoryHandlerTest extends SolrCloudTestCase {
metricManager = cluster.getJettySolrRunner(0).getCoreContainer().getMetricManager();
solrClient = cluster.getSolrClient();
metricsHandler = new MetricsHandler(metricManager);
- handler = new MetricsHistoryHandler(cluster.getJettySolrRunner(0).getNodeName(), metricsHandler, solrClient, cloudManager, args);
+ handler = new MetricsHistoryHandler(cluster.getJettySolrRunner(0).getNodeName(), metricsHandler, solrClient, cloudManager, args,
+ null);
SolrMetricsContext solrMetricsContext = new SolrMetricsContext(metricManager, SolrInfoBean.Group.node.toString(), "");
handler.initializeMetrics(solrMetricsContext, CommonParams.METRICS_HISTORY_PATH);
SPEED = 1;
@@ -109,8 +109,6 @@ public class MetricsHistoryHandlerTest extends SolrCloudTestCase {
CollectionAdminRequest.Create create = CollectionAdminRequest.createCollection(CollectionAdminParams.SYSTEM_COLL,
"conf", 1, 1);
create.process(solrClient);
- CloudUtil.waitForState(cloudManager, "failed to create " + CollectionAdminParams.SYSTEM_COLL,
- CollectionAdminParams.SYSTEM_COLL, CloudUtil.clusterShape(1, 1));
}
@AfterClass