You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@solr.apache.org by ds...@apache.org on 2024/03/16 03:31:26 UTC

(solr) branch main updated: SOLR-17198: AttributeFetcher no longer fails when it observes multiple shard leaders (#2335)

This is an automated email from the ASF dual-hosted git repository.

dsmiley pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/solr.git


The following commit(s) were added to refs/heads/main by this push:
     new a4fbad59fcf SOLR-17198: AttributeFetcher no longer fails when it observes multiple shard leaders (#2335)
a4fbad59fcf is described below

commit a4fbad59fcf61ad1b6b5fb02fd2803e1a52379c1
Author: pjmcarthur <92...@users.noreply.github.com>
AuthorDate: Fri Mar 15 20:31:20 2024 -0700

    SOLR-17198: AttributeFetcher no longer fails when it observes multiple shard leaders (#2335)
    
    AffinityPlacementFactory can fail if Shard leadership changes occur while it is collecting metrics.
    
    
    Co-authored-by: Paul McArthur <pm...@proton.me>
---
 solr/CHANGES.txt                                   |  4 +
 .../placement/impl/CollectionMetricsBuilder.java   | 19 +++--
 .../impl/CollectionMetricsBuilderTest.java         | 93 ++++++++++++++++++++++
 3 files changed, 108 insertions(+), 8 deletions(-)

diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt
index f55d4e82900..70571a9c2ed 100644
--- a/solr/CHANGES.txt
+++ b/solr/CHANGES.txt
@@ -27,6 +27,7 @@ Optimizations
 
 Bug Fixes
 ---------------------
+(No changes)
 
 Deprecation Removals
 ----------------------
@@ -139,6 +140,9 @@ Bug Fixes
 
 * SOLR-17197: Fix getting fieldType by its name in FileBasedSpellChecker (Andrey Bozhko via Eric Pugh)
 
+* SOLR-17198: AffinityPlacementFactory can fail if Shard leadership changes occur while it is collecting metrics.
+  (Paul McArthur)
+
 Dependency Upgrades
 ---------------------
 (No changes)
diff --git a/solr/core/src/java/org/apache/solr/cluster/placement/impl/CollectionMetricsBuilder.java b/solr/core/src/java/org/apache/solr/cluster/placement/impl/CollectionMetricsBuilder.java
index 1ef71279277..7fac922899b 100644
--- a/solr/core/src/java/org/apache/solr/cluster/placement/impl/CollectionMetricsBuilder.java
+++ b/solr/core/src/java/org/apache/solr/cluster/placement/impl/CollectionMetricsBuilder.java
@@ -16,6 +16,7 @@
  */
 package org.apache.solr.cluster.placement.impl;
 
+import java.lang.invoke.MethodHandles;
 import java.util.HashMap;
 import java.util.Iterator;
 import java.util.Map;
@@ -24,10 +25,14 @@ import org.apache.solr.cluster.placement.CollectionMetrics;
 import org.apache.solr.cluster.placement.ReplicaMetric;
 import org.apache.solr.cluster.placement.ReplicaMetrics;
 import org.apache.solr.cluster.placement.ShardMetrics;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
 
 /** Builder class for constructing instances of {@link CollectionMetrics}. */
 public class CollectionMetricsBuilder {
 
+  private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
+
   final Map<String, ShardMetricsBuilder> shardMetricsBuilders = new HashMap<>();
 
   public Map<String, ShardMetricsBuilder> getShardMetricsBuilders() {
@@ -78,15 +83,13 @@ public class CollectionMetricsBuilder {
             ReplicaMetrics metrics = replicaBuilder.build();
             metricsMap.put(name, metrics);
             if (replicaBuilder.leader) {
-              if (leaderMetricsBuilder == null) {
-                leaderMetricsBuilder = replicaBuilder;
-              } else if (!leaderMetricsBuilder.replicaName.equals(replicaBuilder.replicaName)) {
-                throw new RuntimeException(
-                    "two replicas claim to be the shard leader! existing="
-                        + leaderMetricsBuilder
-                        + " and current "
-                        + replicaBuilder);
+              if (leaderMetricsBuilder != null
+                  && !leaderMetricsBuilder.replicaName.equals(replicaBuilder.replicaName)) {
+                log.warn(
+                    "Multiple replicas claim to be shard leader, selecting the latest candidate ({}) for metrics purposes",
+                    replicaBuilder.replicaName);
               }
+              leaderMetricsBuilder = replicaBuilder;
             }
           });
       final ReplicaMetrics finalLeaderMetrics =
diff --git a/solr/core/src/test/org/apache/solr/cluster/placement/impl/CollectionMetricsBuilderTest.java b/solr/core/src/test/org/apache/solr/cluster/placement/impl/CollectionMetricsBuilderTest.java
new file mode 100644
index 00000000000..48ca86f0151
--- /dev/null
+++ b/solr/core/src/test/org/apache/solr/cluster/placement/impl/CollectionMetricsBuilderTest.java
@@ -0,0 +1,93 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.solr.cluster.placement.impl;
+
+import java.util.Arrays;
+import org.apache.solr.SolrTestCaseJ4;
+import org.apache.solr.cluster.placement.CollectionMetrics;
+import org.apache.solr.cluster.placement.ReplicaMetric;
+import org.apache.solr.cluster.placement.ReplicaMetrics;
+import org.apache.solr.cluster.placement.ShardMetrics;
+import org.junit.Test;
+
+public class CollectionMetricsBuilderTest extends SolrTestCaseJ4 {
+
+  @Test
+  public void testMultipleShardLeaders() {
+    CollectionMetricsBuilder.ReplicaMetricsBuilder r1 =
+        createReplicaMetricsBuilder(
+            "r1", ReplicaMetricImpl.INDEX_SIZE_GB, 1.5 * MetricImpl.GB, true);
+    CollectionMetricsBuilder.ReplicaMetricsBuilder r2 =
+        createReplicaMetricsBuilder(
+            "r2", ReplicaMetricImpl.INDEX_SIZE_GB, 2.5 * MetricImpl.GB, true);
+
+    CollectionMetrics metrics = collectionMetricsFromShardReplicaBuilders("shard1", r1, r2);
+    ShardMetrics shardMetrics = metrics.getShardMetrics("shard1").get();
+
+    assertTrue("Shard metrics not found", shardMetrics.getLeaderMetrics().isPresent());
+    assertTrue(
+        "No metrics were present for leader replica", shardMetrics.getLeaderMetrics().isPresent());
+    ReplicaMetrics leaderMetrics = shardMetrics.getLeaderMetrics().get();
+
+    // Both replicas claimed to be shard leader, so either metric value is acceptable, and an
+    // exception should not be raised
+    Double indexSize = leaderMetrics.getReplicaMetric(ReplicaMetricImpl.INDEX_SIZE_GB).get();
+    assertTrue(
+        "Metric value " + indexSize + " should have matched one of the replica's values",
+        indexSize.equals(1.5) || indexSize.equals(2.5));
+  }
+
+  @Test
+  public void testNoShardLeader() {
+    CollectionMetricsBuilder.ReplicaMetricsBuilder r1 =
+        createReplicaMetricsBuilder(
+            "r1", ReplicaMetricImpl.INDEX_SIZE_GB, 1.5 * MetricImpl.GB, false);
+    CollectionMetricsBuilder.ReplicaMetricsBuilder r2 =
+        createReplicaMetricsBuilder(
+            "r2", ReplicaMetricImpl.INDEX_SIZE_GB, 2.5 * MetricImpl.GB, false);
+
+    CollectionMetrics metrics = collectionMetricsFromShardReplicaBuilders("shard1", r1, r2);
+    assertTrue("Shard metrics not found", metrics.getShardMetrics("shard1").isPresent());
+    ShardMetrics shardMetrics = metrics.getShardMetrics("shard1").get();
+
+    assertFalse(
+        "No replica was leader, so leader metrics should not be present",
+        shardMetrics.getLeaderMetrics().isPresent());
+  }
+
+  private <T> CollectionMetricsBuilder.ReplicaMetricsBuilder createReplicaMetricsBuilder(
+      final String name, final ReplicaMetric<T> metric, final T value, final boolean leader) {
+    CollectionMetricsBuilder.ReplicaMetricsBuilder replicaMetricsBuilder =
+        new CollectionMetricsBuilder.ReplicaMetricsBuilder(name);
+    replicaMetricsBuilder.addMetric(metric, value);
+    replicaMetricsBuilder.setLeader(leader);
+    return replicaMetricsBuilder;
+  }
+
+  private CollectionMetrics collectionMetricsFromShardReplicaBuilders(
+      String shardName, CollectionMetricsBuilder.ReplicaMetricsBuilder... replicaMetrics) {
+    CollectionMetricsBuilder.ShardMetricsBuilder shardMetricsBuilder =
+        new CollectionMetricsBuilder.ShardMetricsBuilder(shardName);
+    Arrays.stream(replicaMetrics)
+        .forEach(r -> shardMetricsBuilder.replicaMetricsBuilders.put(r.replicaName, r));
+
+    CollectionMetricsBuilder builder = new CollectionMetricsBuilder();
+    builder.shardMetricsBuilders.put(shardName, shardMetricsBuilder);
+
+    return builder.build();
+  }
+}