You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by ma...@apache.org on 2020/07/30 07:39:58 UTC

[lucene-solr] 01/02: @465 Fix global metrics collection in a tolerable way.

This is an automated email from the ASF dual-hosted git repository.

markrmiller pushed a commit to branch reference_impl_dev
in repository https://gitbox.apache.org/repos/asf/lucene-solr.git

commit ecdaa14c0506ad20d7cf5f1b32ad0d343a005dd4
Author: markrmiller@gmail.com <ma...@gmail.com>
AuthorDate: Thu Jul 30 02:36:55 2020 -0500

    @465 Fix global metrics collection in a tolerable way.
---
 .../cloud/autoscaling/sim/SimCloudManager.java     |  5 +-
 .../java/org/apache/solr/core/CoreContainer.java   |  2 +-
 .../solr/handler/admin/MetricsHistoryHandler.java  | 67 +++-------------------
 .../handler/admin/MetricsHistoryHandlerTest.java   |  8 +--
 4 files changed, 14 insertions(+), 68 deletions(-)

diff --git a/solr/core/src/java/org/apache/solr/cloud/autoscaling/sim/SimCloudManager.java b/solr/core/src/java/org/apache/solr/cloud/autoscaling/sim/SimCloudManager.java
index 9f9e9a2..1a0319c 100644
--- a/solr/core/src/java/org/apache/solr/cloud/autoscaling/sim/SimCloudManager.java
+++ b/solr/core/src/java/org/apache/solr/cloud/autoscaling/sim/SimCloudManager.java
@@ -84,7 +84,6 @@ import org.apache.solr.common.params.CoreAdminParams;
 import org.apache.solr.common.params.ModifiableSolrParams;
 import org.apache.solr.common.params.SolrParams;
 import org.apache.solr.common.util.ContentStreamBase;
-import org.apache.solr.common.util.ExecutorUtil;
 import org.apache.solr.common.util.IOUtils;
 import org.apache.solr.common.util.NamedList;
 import org.apache.solr.common.util.ObjectCache;
@@ -102,7 +101,6 @@ import org.apache.solr.metrics.SolrMetricManager;
 import org.apache.solr.metrics.SolrMetricsContext;
 import org.apache.solr.request.LocalSolrQueryRequest;
 import org.apache.solr.response.SolrQueryResponse;
-import org.apache.solr.common.util.SolrNamedThreadFactory;
 import org.apache.solr.util.MockSearchableSolrClient;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
@@ -485,7 +483,8 @@ public class SimCloudManager implements SolrCloudManager {
     // initialize history handler if this is the first node
     if (metricsHistoryHandler == null && liveNodesSet.size() == 1) {
       metricsHandler = new MetricsHandler(metricManager);
-      metricsHistoryHandler = new MetricsHistoryHandler(nodeId, metricsHandler, solrClient, this, new HashMap<>());
+      metricsHistoryHandler = new MetricsHistoryHandler(nodeId, metricsHandler, solrClient, this, new HashMap<>(),
+          null);
       SolrMetricsContext solrMetricsContext = new SolrMetricsContext(metricManager, SolrMetricManager.getRegistryName(SolrInfoBean.Group.node), metricTag);
       metricsHistoryHandler.initializeMetrics(solrMetricsContext, CommonParams.METRICS_HISTORY_PATH);
     }
diff --git a/solr/core/src/java/org/apache/solr/core/CoreContainer.java b/solr/core/src/java/org/apache/solr/core/CoreContainer.java
index 3a858c4..f0fc756 100644
--- a/solr/core/src/java/org/apache/solr/core/CoreContainer.java
+++ b/solr/core/src/java/org/apache/solr/core/CoreContainer.java
@@ -991,7 +991,7 @@ public class CoreContainer implements Closeable {
       }
     }
     metricsHistoryHandler = new MetricsHistoryHandler(name, metricsHandler,
-        client, cloudManager, initArgs);
+        client, cloudManager, initArgs, zkSys.getZkController().getOverseer());
     containerHandlers.put(METRICS_HISTORY_PATH, metricsHistoryHandler);
     metricsHistoryHandler.initializeMetrics(solrMetricsContext, METRICS_HISTORY_PATH);
   }
diff --git a/solr/core/src/java/org/apache/solr/handler/admin/MetricsHistoryHandler.java b/solr/core/src/java/org/apache/solr/handler/admin/MetricsHistoryHandler.java
index 05234a3..097959e 100644
--- a/solr/core/src/java/org/apache/solr/handler/admin/MetricsHistoryHandler.java
+++ b/solr/core/src/java/org/apache/solr/handler/admin/MetricsHistoryHandler.java
@@ -19,14 +19,11 @@ package org.apache.solr.handler.admin;
 import javax.imageio.ImageIO;
 import java.awt.Color;
 import java.awt.image.BufferedImage;
-import java.io.ByteArrayInputStream;
 import java.io.ByteArrayOutputStream;
 import java.io.Closeable;
 import java.io.IOException;
 import java.lang.invoke.MethodHandles;
-import java.net.MalformedURLException;
 import java.net.URI;
-import java.net.URL;
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.Collection;
@@ -38,7 +35,6 @@ import java.util.LinkedHashMap;
 import java.util.List;
 import java.util.Locale;
 import java.util.Map;
-import java.util.NoSuchElementException;
 import java.util.Set;
 import java.util.TimeZone;
 import java.util.concurrent.ConcurrentHashMap;
@@ -57,14 +53,10 @@ import org.apache.solr.api.Api;
 import org.apache.solr.api.ApiBag;
 import org.apache.solr.client.solrj.SolrClient;
 import org.apache.solr.client.solrj.SolrQuery;
-import org.apache.solr.client.solrj.SolrRequest;
 import org.apache.solr.client.solrj.cloud.NodeStateProvider;
 import org.apache.solr.client.solrj.cloud.SolrCloudManager;
 import org.apache.solr.client.solrj.cloud.autoscaling.ReplicaInfo;
 import org.apache.solr.client.solrj.cloud.autoscaling.Variable;
-import org.apache.solr.client.solrj.cloud.autoscaling.VersionedData;
-import org.apache.solr.client.solrj.impl.HttpClientUtil;
-import org.apache.solr.cloud.LeaderElector;
 import org.apache.solr.cloud.Overseer;
 import org.apache.solr.common.ParWork;
 import org.apache.solr.common.SolrException;
@@ -72,20 +64,17 @@ import org.apache.solr.common.cloud.ClusterState;
 import org.apache.solr.common.cloud.DocCollection;
 import org.apache.solr.common.cloud.Replica;
 import org.apache.solr.common.cloud.Slice;
-import org.apache.solr.common.cloud.ZkNodeProps;
 import org.apache.solr.common.params.CollectionAdminParams;
 import org.apache.solr.common.params.CommonParams;
 import org.apache.solr.common.params.ModifiableSolrParams;
 import org.apache.solr.common.params.SolrParams;
 import org.apache.solr.common.util.Base64;
 import org.apache.solr.common.util.ExecutorUtil;
-import org.apache.solr.common.util.JavaBinCodec;
 import org.apache.solr.common.util.NamedList;
 import org.apache.solr.common.util.ObjectReleaseTracker;
 import org.apache.solr.common.util.Pair;
 import org.apache.solr.common.util.SimpleOrderedMap;
 import org.apache.solr.common.util.TimeSource;
-import org.apache.solr.common.util.Utils;
 import org.apache.solr.handler.RequestHandlerBase;
 import org.apache.solr.metrics.SolrMetricManager;
 import org.apache.solr.metrics.rrd.SolrRrdBackendFactory;
@@ -94,7 +83,6 @@ import org.apache.solr.response.SolrQueryResponse;
 import org.apache.solr.security.AuthorizationContext;
 import org.apache.solr.security.PermissionNameProvider;
 import org.apache.solr.common.util.SolrNamedThreadFactory;
-import org.apache.zookeeper.KeeperException;
 import org.rrd4j.ConsolFun;
 import org.rrd4j.DsType;
 import org.rrd4j.core.ArcDef;
@@ -104,7 +92,6 @@ import org.rrd4j.core.DsDef;
 import org.rrd4j.core.FetchData;
 import org.rrd4j.core.FetchRequest;
 import org.rrd4j.core.RrdDb;
-import org.rrd4j.core.RrdDbPool;
 import org.rrd4j.core.RrdDef;
 import org.rrd4j.core.Sample;
 import org.rrd4j.graph.RrdGraph;
@@ -113,7 +100,6 @@ import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
 import static java.util.stream.Collectors.toMap;
-import static org.apache.solr.common.params.CommonParams.ID;
 
 /**
  * Collects metrics from all nodes in the system on a regular basis in a background thread.
@@ -170,6 +156,7 @@ public class MetricsHistoryHandler extends RequestHandlerBase implements Permiss
   private final String overseerUrlScheme;
 
   private final Map<String, RrdDb> knownDbs = new ConcurrentHashMap<>(16, 0.75f, 4);
+  private final Overseer overseer;
 
   private ScheduledThreadPoolExecutor collectService;
   private boolean logMissingCollection = true;
@@ -179,8 +166,9 @@ public class MetricsHistoryHandler extends RequestHandlerBase implements Permiss
   private volatile String versionString;
 
   public MetricsHistoryHandler(String nodeName, MetricsHandler metricsHandler,
-        SolrClient solrClient, SolrCloudManager cloudManager, Map<String, Object> pluginArgs) {
-
+      SolrClient solrClient, SolrCloudManager cloudManager,
+      Map<String,Object> pluginArgs, Overseer overseer) {
+    this.overseer = overseer;
     Map<String, Object> args = new HashMap<>();
     // init from optional solr.xml config
     if (pluginArgs != null) {
@@ -232,7 +220,7 @@ public class MetricsHistoryHandler extends RequestHandlerBase implements Permiss
     }
 
     if (enable) {
-      collectService = (ScheduledThreadPoolExecutor) Executors.newScheduledThreadPool(1,
+      collectService = (ScheduledThreadPoolExecutor) Executors.newFixedThreadPool(1,
           new SolrNamedThreadFactory("MetricsHistoryHandler"));
       collectService.setRemoveOnCancelPolicy(true);
       collectService.setExecuteExistingDelayedTasksAfterShutdownPolicy(false);
@@ -318,44 +306,6 @@ public class MetricsHistoryHandler extends RequestHandlerBase implements Permiss
     return factory;
   }
 
-  private String getOverseerLeader() {
-    // non-ZK node has no Overseer
-    if (cloudManager == null) {
-      return null;
-    }
-    ZkNodeProps props = null;
-    try {
-      VersionedData data = cloudManager.getDistribStateManager().getData(
-          Overseer.OVERSEER_ELECT + "/leader");
-      if (data != null && data.getData() != null) {
-        props = ZkNodeProps.load(data.getData());
-      }
-    } catch (IOException | NoSuchElementException e) {
-      log.warn("Could not obtain overseer's address, skipping.", e);
-      return null;
-    } catch (InterruptedException e) {
-      ParWork.propegateInterrupt(e);
-      return null;
-    } catch (KeeperException e) {
-      throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, e);
-    }
-    if (props == null) {
-      return null;
-    }
-    String oid = props.getStr(ID);
-    if (oid == null) {
-      return null;
-    }
-    String nodeName = null;
-    try {
-      nodeName = LeaderElector.getNodeName(oid);
-    } catch (Exception e) {
-      ParWork.propegateInterrupt(e);
-      log.warn("Unknown format of leader id, skipping: {}", oid, e);
-      return null;
-    }
-    return nodeName;
-  }
 
   private void collectMetrics() {
     log.debug("-- collectMetrics");
@@ -462,10 +412,9 @@ public class MetricsHistoryHandler extends RequestHandlerBase implements Permiss
   }
 
   private void collectGlobalMetrics() {
-    // nocommit - this stuff is slow and hackey and too hard to do righ this way
-//    if (!amIOverseerLeader()) {
-//      return;
-//    }
+    if (overseer == null || !overseer.getUpdaterThread().isAlive()) {
+      return;
+    }
     Set<String> nodes = new HashSet<>(cloudManager.getClusterStateProvider().getLiveNodes());
     NodeStateProvider nodeStateProvider = cloudManager.getNodeStateProvider();
     Set<String> collTags = new HashSet<>();
diff --git a/solr/core/src/test/org/apache/solr/handler/admin/MetricsHistoryHandlerTest.java b/solr/core/src/test/org/apache/solr/handler/admin/MetricsHistoryHandlerTest.java
index e975675..224fb21 100644
--- a/solr/core/src/test/org/apache/solr/handler/admin/MetricsHistoryHandlerTest.java
+++ b/solr/core/src/test/org/apache/solr/handler/admin/MetricsHistoryHandlerTest.java
@@ -33,7 +33,6 @@ import org.apache.solr.common.params.CommonParams;
 import org.apache.solr.common.util.Pair;
 import org.apache.solr.common.util.TimeSource;
 import org.apache.solr.core.SolrInfoBean;
-import org.apache.solr.metrics.SolrCoreMetricManager;
 import org.apache.solr.metrics.SolrMetricManager;
 import org.apache.solr.metrics.SolrMetricsContext;
 import org.apache.solr.util.LogLevel;
@@ -86,7 +85,7 @@ public class MetricsHistoryHandlerTest extends SolrCloudTestCase {
       metricsHandler = new MetricsHandler(metricManager);
       SolrMetricsContext solrMetricsContext = new SolrMetricsContext(metricManager, SolrInfoBean.Group.node.toString(), "");
       handler = new MetricsHistoryHandler(cloudManager.getClusterStateProvider().getLiveNodes().iterator().next(),
-          metricsHandler, solrClient, cloudManager, args);
+          metricsHandler, solrClient, cloudManager, args, null);
       handler.initializeMetrics(solrMetricsContext, CommonParams.METRICS_HISTORY_PATH);
     }
     configureCluster(1)
@@ -98,7 +97,8 @@ public class MetricsHistoryHandlerTest extends SolrCloudTestCase {
       metricManager = cluster.getJettySolrRunner(0).getCoreContainer().getMetricManager();
       solrClient = cluster.getSolrClient();
       metricsHandler = new MetricsHandler(metricManager);
-      handler = new MetricsHistoryHandler(cluster.getJettySolrRunner(0).getNodeName(), metricsHandler, solrClient, cloudManager, args);
+      handler = new MetricsHistoryHandler(cluster.getJettySolrRunner(0).getNodeName(), metricsHandler, solrClient, cloudManager, args,
+          null);
       SolrMetricsContext solrMetricsContext = new SolrMetricsContext(metricManager, SolrInfoBean.Group.node.toString(), "");
       handler.initializeMetrics(solrMetricsContext, CommonParams.METRICS_HISTORY_PATH);
       SPEED = 1;
@@ -109,8 +109,6 @@ public class MetricsHistoryHandlerTest extends SolrCloudTestCase {
     CollectionAdminRequest.Create create = CollectionAdminRequest.createCollection(CollectionAdminParams.SYSTEM_COLL,
         "conf", 1, 1);
     create.process(solrClient);
-    CloudUtil.waitForState(cloudManager, "failed to create " + CollectionAdminParams.SYSTEM_COLL,
-        CollectionAdminParams.SYSTEM_COLL, CloudUtil.clusterShape(1, 1));
   }
 
   @AfterClass