You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@solr.apache.org by no...@apache.org on 2022/06/01 05:50:58 UTC

[solr] branch main updated: SOLR-16146: Avoid loading all collections during node startup (#804)

This is an automated email from the ASF dual-hosted git repository.

noble pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/solr.git


The following commit(s) were added to refs/heads/main by this push:
     new 51cf9776420 SOLR-16146:  Avoid loading all collections during node startup (#804)
51cf9776420 is described below

commit 51cf9776420b953a48fbea5194b1a4dc0fc39f17
Author: Noble Paul <no...@users.noreply.github.com>
AuthorDate: Wed Jun 1 01:50:54 2022 -0400

    SOLR-16146:  Avoid loading all collections during node startup (#804)
---
 solr/CHANGES.txt                                   |   2 +
 .../java/org/apache/solr/cloud/ZkController.java   |   5 +-
 .../handler/component/HttpShardHandlerFactory.java |   4 +
 .../src/java/org/apache/solr/common/MapWriter.java |   3 +
 .../solr/common/cloud/NodesSysPropsCacher.java     | 231 ++++++---------------
 .../solr/common/cloud/TestNodesSysPropsCacher.java |  56 +++++
 6 files changed, 132 insertions(+), 169 deletions(-)

diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt
index 4a0cdd94fd3..7227324bc55 100644
--- a/solr/CHANGES.txt
+++ b/solr/CHANGES.txt
@@ -55,6 +55,8 @@ Optimizations
 * SOLR-14765: Optimize DocList creation for sort-irrelevant cases. This issue also reworks the building and caching of liveDocs
   in SolrIndexSearcher, and refines/clarifies the effect of `useFilterForSortedQuery` (Michael Gibney, Mike Drob, David Smiley)
 
+* SOLR-16146: Avoid loading all collections during node startup (noble)
+
 Bug Fixes
 ---------------------
 * SOLR-15918: Skip repetitive parent znode creation on config set upload (Mike Drob)
diff --git a/solr/core/src/java/org/apache/solr/cloud/ZkController.java b/solr/core/src/java/org/apache/solr/cloud/ZkController.java
index bb8936b78b7..5b55bc4ed8d 100644
--- a/solr/core/src/java/org/apache/solr/cloud/ZkController.java
+++ b/solr/core/src/java/org/apache/solr/cloud/ZkController.java
@@ -91,6 +91,7 @@ import org.apache.solr.core.NodeRoles;
 import org.apache.solr.core.SolrCore;
 import org.apache.solr.core.SolrCoreInitializationException;
 import org.apache.solr.handler.component.HttpShardHandler;
+import org.apache.solr.handler.component.HttpShardHandlerFactory;
 import org.apache.solr.logging.MDCLoggingContext;
 import org.apache.solr.search.SolrIndexSearcher;
 import org.apache.solr.update.UpdateLog;
@@ -504,8 +505,8 @@ public class ZkController implements Closeable {
     this.overseerConfigSetQueue = overseer.getConfigSetQueue(zkClient);
     this.sysPropsCacher =
         new NodesSysPropsCacher(
-            getSolrCloudManager().getNodeStateProvider(), getNodeName(), zkStateReader);
-
+            ((HttpShardHandlerFactory) getCoreContainer().getShardHandlerFactory()).getClient(),
+            zkStateReader);
     assert ObjectReleaseTracker.track(this);
   }
 
diff --git a/solr/core/src/java/org/apache/solr/handler/component/HttpShardHandlerFactory.java b/solr/core/src/java/org/apache/solr/handler/component/HttpShardHandlerFactory.java
index 275403426c5..977e3f180dc 100644
--- a/solr/core/src/java/org/apache/solr/handler/component/HttpShardHandlerFactory.java
+++ b/solr/core/src/java/org/apache/solr/handler/component/HttpShardHandlerFactory.java
@@ -386,6 +386,10 @@ public class HttpShardHandlerFactory extends ShardHandlerFactory
     }
   }
 
+  public Http2SolrClient getClient() {
+    return defaultClient;
+  }
+
   /**
    * Rebuilds the URL replacing the URL scheme of the passed URL with the configured scheme
    * replacement.If no scheme was configured, the passed URL's scheme is left alone.
diff --git a/solr/solrj/src/java/org/apache/solr/common/MapWriter.java b/solr/solrj/src/java/org/apache/solr/common/MapWriter.java
index 74fc498432f..9e2a25d00dd 100644
--- a/solr/solrj/src/java/org/apache/solr/common/MapWriter.java
+++ b/solr/solrj/src/java/org/apache/solr/common/MapWriter.java
@@ -19,6 +19,7 @@ package org.apache.solr.common;
 
 import java.io.IOException;
 import java.util.ArrayList;
+import java.util.Collections;
 import java.util.LinkedHashMap;
 import java.util.List;
 import java.util.Map;
@@ -156,4 +157,6 @@ public interface MapWriter extends MapSerializable, NavigableObject {
       return (k, v) -> putNoEx(k, v);
     }
   }
+
+  MapWriter EMPTY = new MapWriterMap(Collections.emptyMap());
 }
diff --git a/solr/solrj/src/java/org/apache/solr/common/cloud/NodesSysPropsCacher.java b/solr/solrj/src/java/org/apache/solr/common/cloud/NodesSysPropsCacher.java
index 26ef61fef86..550c9889a4d 100644
--- a/solr/solrj/src/java/org/apache/solr/common/cloud/NodesSysPropsCacher.java
+++ b/solr/solrj/src/java/org/apache/solr/common/cloud/NodesSysPropsCacher.java
@@ -17,193 +17,90 @@
 
 package org.apache.solr.common.cloud;
 
-import static org.apache.solr.common.cloud.rule.ImplicitSnitch.SYSPROP;
-
-import java.lang.invoke.MethodHandles;
-import java.util.ArrayList;
 import java.util.Collection;
-import java.util.Collections;
-import java.util.HashMap;
+import java.util.LinkedHashMap;
 import java.util.Map;
-import java.util.Set;
 import java.util.concurrent.ConcurrentHashMap;
-import java.util.concurrent.atomic.AtomicBoolean;
-import java.util.concurrent.atomic.AtomicInteger;
-import java.util.stream.Collectors;
-import org.apache.solr.client.solrj.cloud.NodeStateProvider;
-import org.apache.solr.client.solrj.routing.PreferenceRule;
-import org.apache.solr.common.SolrCloseable;
-import org.apache.solr.common.params.ShardParams;
-import org.apache.solr.common.util.CommonTestInjection;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-/**
- * Caching other nodes system properties. The properties that will be cached based on the value
- * define in {@link org.apache.solr.common.cloud.ZkStateReader#DEFAULT_SHARD_PREFERENCES } of {@link
- * org.apache.solr.common.cloud.ZkStateReader#CLUSTER_PROPS }. If that key does not present then
- * this cacher will do nothing.
- *
- * <p>The cache will be refresh whenever /live_nodes get changed.
- */
-public class NodesSysPropsCacher implements SolrCloseable {
-  private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
-  private static final int NUM_RETRY = 5;
-
-  private final AtomicBoolean isRunning = new AtomicBoolean(false);
-  private final NodeStateProvider nodeStateProvider;
-  private Map<String, String> additionalProps = CommonTestInjection.injectAdditionalProps();
-  private final String currentNode;
-  private final ConcurrentHashMap<String, Map<String, Object>> cache = new ConcurrentHashMap<>();
-  private final AtomicInteger fetchCounting = new AtomicInteger(0);
-
-  private volatile boolean isClosed;
-  private volatile Collection<String> tags = new ArrayList<>();
-
-  public NodesSysPropsCacher(
-      NodeStateProvider nodeStateProvider, String currentNode, ZkStateReader stateReader) {
-    this.nodeStateProvider = nodeStateProvider;
-    this.currentNode = currentNode;
-
-    stateReader.registerClusterPropertiesListener(
-        properties -> {
-          Collection<String> tags = new ArrayList<>();
-          String shardPreferences =
-              (String) properties.getOrDefault(ZkStateReader.DEFAULT_SHARD_PREFERENCES, "");
-          if (shardPreferences.contains(ShardParams.SHARDS_PREFERENCE_NODE_WITH_SAME_SYSPROP)) {
-            try {
-              tags =
-                  PreferenceRule.from(shardPreferences).stream()
-                      .filter(
-                          r -> ShardParams.SHARDS_PREFERENCE_NODE_WITH_SAME_SYSPROP.equals(r.name))
-                      .map(r -> r.value)
-                      .collect(Collectors.toSet());
-            } catch (Exception e) {
-              log.info("Error on parsing shards preference:{}", shardPreferences);
+import org.apache.solr.client.solrj.SolrClient;
+import org.apache.solr.client.solrj.SolrRequest;
+import org.apache.solr.client.solrj.request.GenericSolrRequest;
+import org.apache.solr.common.MapWriter;
+import org.apache.solr.common.NavigableObject;
+import org.apache.solr.common.SolrException;
+import org.apache.solr.common.params.CommonParams;
+import org.apache.solr.common.params.ModifiableSolrParams;
+
+/** Fetch lazily and cache a node's system properties */
+public class NodesSysPropsCacher implements AutoCloseable {
+  private volatile boolean isClosed = false;
+  private final Map<String, Map<String, Object>> nodeVsTagsCache = new ConcurrentHashMap<>();
+  private ZkStateReader zkStateReader;
+  private final SolrClient solrClient;
+
+  public NodesSysPropsCacher(SolrClient solrClient, ZkStateReader zkStateReader) {
+    this.zkStateReader = zkStateReader;
+    this.solrClient = solrClient;
+    zkStateReader.registerLiveNodesListener(
+        (oldNodes, newNodes) -> {
+          for (String n : oldNodes) {
+            if (!newNodes.contains(n)) {
+              // this node has gone down, clear data
+              nodeVsTagsCache.remove(n);
             }
           }
-
-          if (tags.isEmpty()) {
-            pause();
-          } else {
-            start(tags);
-            // start fetching now
-            fetchSysProps(stateReader.getClusterState().getLiveNodes());
-          }
-          return isClosed;
-        });
-
-    stateReader.registerLiveNodesListener(
-        (oldLiveNodes, newLiveNodes) -> {
-          fetchSysProps(newLiveNodes);
           return isClosed;
         });
   }
 
-  private void start(Collection<String> tags) {
-    if (isClosed) return;
-    this.tags = tags;
-    isRunning.set(true);
-  }
-
-  private void fetchSysProps(Set<String> newLiveNodes) {
-    if (isRunning.get()) {
-      int fetchRound = fetchCounting.incrementAndGet();
-      // TODO smarter keeping caching entries by relying on Stat.cversion
-      cache.clear();
-      for (String node : newLiveNodes) {
-        // this might takes some times to finish, therefore if there are a latter change in listener
-        // triggering this method, skipping the old runner
-        if (isClosed && fetchRound != fetchCounting.get()) return;
-
-        if (currentNode.equals(node)) {
-          Map<String, String> props = new HashMap<>();
-          for (String tag : tags) {
-            String propName = tag.substring(SYSPROP.length());
-            if (additionalProps != null && additionalProps.containsKey(propName)) {
-              props.put(tag, additionalProps.get(propName));
-            } else {
-              props.put(tag, System.getProperty(propName));
-            }
-          }
-          cache.put(node, Collections.unmodifiableMap(props));
-        } else {
-          fetchRemoteProps(node, fetchRound);
-        }
+  public Map<String, Object> getSysProps(String nodeName, Collection<String> tags) {
+    Map<String, Object> cached =
+        nodeVsTagsCache.computeIfAbsent(nodeName, s -> new LinkedHashMap<>());
+    Map<String, Object> result = new LinkedHashMap<>();
+    for (String tag : tags) {
+      if (!cached.containsKey(tag)) {
+        // at least one property is missing. fetch properties from the node
+        Map<String, Object> props = fetchProps(nodeName, tags);
+        // make a copy
+        cached = new LinkedHashMap<>(cached);
+        // merge all properties
+        cached.putAll(props);
+        // update the cache with the new set of properties
+        nodeVsTagsCache.put(nodeName, cached);
+        return props;
+      } else {
+        result.put(tag, cached.get(tag));
       }
     }
+    return result;
   }
 
-  private void fetchRemoteProps(String node, int fetchRound) {
-    for (int i = 0; i < NUM_RETRY; i++) {
-      if (isClosed && fetchRound != fetchCounting.get()) return;
-
-      try {
-        Map<String, Object> props = nodeStateProvider.getNodeValues(node, tags);
-        cache.put(node, Collections.unmodifiableMap(props));
-        break;
-      } catch (Exception e) {
-        try {
-          // 1, 4, 9
-          int backOffTime = 1000 * (i + 1);
-          backOffTime = backOffTime * backOffTime;
-          backOffTime = Math.min(10000, backOffTime);
-          Thread.sleep(backOffTime);
-        } catch (InterruptedException e1) {
-          Thread.currentThread().interrupt();
-          log.info(
-              "Exception on caching node:{} system.properties:{}, retry {}/{}",
-              node,
-              tags,
-              i + 1,
-              NUM_RETRY,
-              e); // nowarn
-          break;
-        }
-        log.info(
-            "Exception on caching node:{} system.properties:{}, retry {}/{}",
-            node,
-            tags,
-            i + 1,
-            NUM_RETRY,
-            e); // nowarn
-      }
+  private Map<String, Object> fetchProps(String nodeName, Collection<String> tags) {
+    StringBuilder sb = new StringBuilder(zkStateReader.getBaseUrlForNodeName(nodeName));
+    sb.append("/admin/metrics?omitHeader=true&wt=javabin");
+    ModifiableSolrParams msp = new ModifiableSolrParams();
+    msp.add(CommonParams.OMIT_HEADER, "true");
+    LinkedHashMap<String, String> keys = new LinkedHashMap<>();
+    for (String tag : tags) {
+      String metricsKey = "solr.jvm:system.properties:" + tag;
+      keys.put(tag, metricsKey);
+      msp.add("key", metricsKey);
     }
-  }
 
-  public Map<String, Object> getSysProps(String node, Collection<String> tags) {
-    Map<String, Object> props = cache.get(node);
-    HashMap<String, Object> result = new HashMap<>();
-    if (props != null) {
-      for (String tag : tags) {
-        if (props.containsKey(tag)) {
-          result.put(tag, props.get(tag));
-        }
-      }
+    GenericSolrRequest req = new GenericSolrRequest(SolrRequest.METHOD.GET, "/admin/metrics", msp);
+    req.setBasePath(zkStateReader.getBaseUrlForNodeName(nodeName));
+    try {
+      LinkedHashMap<String, Object> result = new LinkedHashMap<>();
+      NavigableObject response = solrClient.request(req);
+      NavigableObject metrics = (NavigableObject) response._get("metrics", MapWriter.EMPTY);
+      keys.forEach((tag, key) -> result.put(tag, metrics._get(key, null)));
+      return result;
+    } catch (Exception e) {
+      throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, e);
     }
-    return result;
-  }
-
-  public int getCacheSize() {
-    return cache.size();
-  }
-
-  public boolean isRunning() {
-    return isRunning.get();
-  }
-
-  private void pause() {
-    isRunning.set(false);
-  }
-
-  @Override
-  public boolean isClosed() {
-    return isClosed;
   }
 
   @Override
   public void close() {
     isClosed = true;
-    pause();
   }
 }
diff --git a/solr/solrj/src/test/org/apache/solr/common/cloud/TestNodesSysPropsCacher.java b/solr/solrj/src/test/org/apache/solr/common/cloud/TestNodesSysPropsCacher.java
new file mode 100644
index 00000000000..44fd02a9dde
--- /dev/null
+++ b/solr/solrj/src/test/org/apache/solr/common/cloud/TestNodesSysPropsCacher.java
@@ -0,0 +1,56 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.solr.common.cloud;
+
+import java.util.Arrays;
+import java.util.List;
+import java.util.Map;
+import org.apache.solr.client.solrj.embedded.JettySolrRunner;
+import org.apache.solr.cloud.MiniSolrCloudCluster;
+import org.apache.solr.cloud.SolrCloudTestCase;
+import org.junit.Test;
+
+public class TestNodesSysPropsCacher extends SolrCloudTestCase {
+
+  @Test
+  public void testSysProps() throws Exception {
+    System.setProperty("metricsEnabled", "true");
+    MiniSolrCloudCluster cluster =
+        configureCluster(4)
+            .withJettyConfig(jetty -> jetty.enableV2(true))
+            .addConfig("config", getFile("solrj/solr/collection1/conf").toPath())
+            .configure();
+
+    System.clearProperty("metricsEnabled");
+    NodesSysPropsCacher nodesSysPropsCacher =
+        cluster.getRandomJetty(random()).getCoreContainer().getZkController().getSysPropsCacher();
+
+    try {
+      for (JettySolrRunner j : cluster.getJettySolrRunners()) {
+        List<String> tags = Arrays.asList("file.encoding", "java.vm.version");
+        Map<String, Object> props = nodesSysPropsCacher.getSysProps(j.getNodeName(), tags);
+        for (String tag : tags) assertNotNull(props.get(tag));
+        tags = Arrays.asList("file.encoding", "java.vm.version", "os.arch");
+        props = nodesSysPropsCacher.getSysProps(j.getNodeName(), tags);
+        for (String tag : tags) assertNotNull(props.get(tag));
+      }
+    } finally {
+      cluster.shutdown();
+    }
+  }
+}