You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by ho...@apache.org on 2021/11/16 21:33:17 UTC
[lucene-solr] branch branch_8_11 updated: SOLR-15795: Fix REPLACENODE to not use source node (#414)
This is an automated email from the ASF dual-hosted git repository.
houston pushed a commit to branch branch_8_11
in repository https://gitbox.apache.org/repos/asf/lucene-solr.git
The following commit(s) were added to refs/heads/branch_8_11 by this push:
new 7cd45b7 SOLR-15795: Fix REPLACENODE to not use source node (#414)
7cd45b7 is described below
commit 7cd45b750d4f7b1ae360e83fdf74a5f6ae183eaf
Author: Houston Putman <ho...@apache.org>
AuthorDate: Tue Nov 16 16:05:44 2021 -0500
SOLR-15795: Fix REPLACENODE to not use source node (#414)
---
solr/CHANGES.txt | 2 +
.../apache/solr/cloud/api/collections/Assign.java | 23 +++++++
.../solr/cloud/api/collections/ReplaceNodeCmd.java | 5 +-
.../org/apache/solr/cloud/ReplaceNodeTest.java | 73 ++++++++++++++--------
4 files changed, 75 insertions(+), 28 deletions(-)
diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt
index f922881..8fa5d06 100644
--- a/solr/CHANGES.txt
+++ b/solr/CHANGES.txt
@@ -24,6 +24,8 @@ Bug Fixes
---------------------
* SOLR-15635: Don't close hooks twice when SolrRequestInfo is cleared twice; or /export with classic join
closed fromCore if provided (Mikhail Khludnev, David Smiley)
+
+* SOLR-15795: Fix REPLACENODE to not use source node when choosing a target node for new replicas (Houston Putman)
Build
---------------------
diff --git a/solr/core/src/java/org/apache/solr/cloud/api/collections/Assign.java b/solr/core/src/java/org/apache/solr/cloud/api/collections/Assign.java
index 8f3b247..138f5af 100644
--- a/solr/core/src/java/org/apache/solr/cloud/api/collections/Assign.java
+++ b/solr/core/src/java/org/apache/solr/cloud/api/collections/Assign.java
@@ -453,6 +453,26 @@ public class Assign {
return nodeNameVsShardCount;
}
+ // throw an exception if all nodes in the supplied list are not live.
+ // Empty list will also fail.
+ // Returns the input
+ private static List<String> checkAnyLiveNodes(List<String> createNodeList, ClusterState clusterState) {
+ Set<String> liveNodes = clusterState.getLiveNodes();
+ if (createNodeList == null) {
+ createNodeList = Collections.emptyList();
+ }
+ boolean anyLiveNodes = false;
+ for (String node : createNodeList) {
+ anyLiveNodes |= liveNodes.contains(node);
+ }
+ if (!anyLiveNodes) {
+ throw new SolrException(SolrException.ErrorCode.BAD_REQUEST,
+ "None of the node(s) specified " + createNodeList + " are currently active in "
+ + liveNodes + ", no action taken.");
+ }
+ return createNodeList; // unmodified, but return for inline use. Only modified if empty, and that will throw an error
+ }
+
/**
* Thrown if there is an exception while assigning nodes for replicas
*/
@@ -559,6 +579,9 @@ public class Assign {
nodeList = sortedNodeList.stream().map(replicaCount -> replicaCount.nodeName).collect(Collectors.toList());
}
+ // Throw an error if there aren't any live nodes.
+ checkAnyLiveNodes(nodeList, solrCloudManager.getClusterStateProvider().getClusterState());
+
int i = 0;
List<ReplicaPosition> result = new ArrayList<>();
for (String aShard : assignRequest.shardNames)
diff --git a/solr/core/src/java/org/apache/solr/cloud/api/collections/ReplaceNodeCmd.java b/solr/core/src/java/org/apache/solr/cloud/api/collections/ReplaceNodeCmd.java
index f1c1f8c..238adee 100644
--- a/solr/core/src/java/org/apache/solr/cloud/api/collections/ReplaceNodeCmd.java
+++ b/solr/core/src/java/org/apache/solr/cloud/api/collections/ReplaceNodeCmd.java
@@ -28,6 +28,7 @@ import java.util.Map;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicBoolean;
import java.util.concurrent.atomic.AtomicReference;
+import java.util.stream.Collectors;
import org.apache.solr.client.solrj.cloud.autoscaling.PolicyHelper;
import org.apache.solr.cloud.ActiveReplicaWatcher;
@@ -81,6 +82,8 @@ public class ReplaceNodeCmd implements OverseerCollectionMessageHandler.Cmd {
}
if (target != null && !clusterState.liveNodesContain(target)) {
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Target Node: " + target + " is not live");
+ } else if (clusterState.getLiveNodes().size() <= 1) {
+ throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "No nodes other than the source node: " + source + " are live, therefore replicas cannot be moved");
}
List<ZkNodeProps> sourceReplicas = getReplicasOfNode(source, clusterState);
// how many leaders are we moving? for these replicas we have to make sure that either:
@@ -122,7 +125,7 @@ public class ReplaceNodeCmd implements OverseerCollectionMessageHandler.Cmd {
.assignNrtReplicas(numNrtReplicas)
.assignTlogReplicas(numTlogReplicas)
.assignPullReplicas(numPullReplicas)
- .onNodes(new ArrayList<>(ocmh.cloudManager.getClusterStateProvider().getLiveNodes()))
+ .onNodes(new ArrayList<>(ocmh.cloudManager.getClusterStateProvider().getLiveNodes().stream().filter(node -> !node.equals(source)).collect(Collectors.toList())))
.build();
Assign.AssignStrategyFactory assignStrategyFactory = new Assign.AssignStrategyFactory(ocmh.cloudManager);
Assign.AssignStrategy assignStrategy = assignStrategyFactory.create(clusterState, clusterState.getCollection(sourceCollection));
diff --git a/solr/core/src/test/org/apache/solr/cloud/ReplaceNodeTest.java b/solr/core/src/test/org/apache/solr/cloud/ReplaceNodeTest.java
index 7c1a7ba..ef7d607 100644
--- a/solr/core/src/test/org/apache/solr/cloud/ReplaceNodeTest.java
+++ b/solr/core/src/test/org/apache/solr/cloud/ReplaceNodeTest.java
@@ -22,7 +22,6 @@ import java.lang.invoke.MethodHandles;
import java.util.ArrayList;
import java.util.Collections;
import java.util.EnumSet;
-import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;
@@ -35,6 +34,7 @@ import org.apache.solr.client.solrj.request.CollectionAdminRequest;
import org.apache.solr.client.solrj.request.CoreAdminRequest;
import org.apache.solr.client.solrj.response.CoreAdminResponse;
import org.apache.solr.client.solrj.response.RequestStatusState;
+import org.apache.solr.common.SolrException;
import org.apache.solr.common.cloud.DocCollection;
import org.apache.solr.common.cloud.Replica;
import org.apache.solr.common.cloud.Slice;
@@ -44,6 +44,7 @@ import org.apache.solr.common.params.SolrParams;
import org.apache.solr.common.util.StrUtils;
import org.apache.solr.metrics.MetricsMap;
import org.apache.solr.metrics.SolrMetricManager;
+import org.junit.Before;
import org.junit.BeforeClass;
import org.junit.Test;
import org.slf4j.Logger;
@@ -54,9 +55,12 @@ public class ReplaceNodeTest extends SolrCloudTestCase {
@BeforeClass
public static void setupCluster() throws Exception {
System.setProperty("metricsEnabled", "true");
- configureCluster(6)
- .addConfig("conf1", TEST_PATH().resolve("configsets").resolve("cloud-dynamic").resolve("conf"))
- .configure();
+ }
+
+ @Before
+ public void clearPreviousCluster() throws Exception {
+ // Clear the previous cluster before each test, since they use different numbers of nodes.
+ shutdownCluster();
}
protected String getSolrXml() {
@@ -65,6 +69,9 @@ public class ReplaceNodeTest extends SolrCloudTestCase {
@Test
public void test() throws Exception {
+ configureCluster(6)
+ .addConfig("conf1", TEST_PATH().resolve("configsets").resolve("cloud-dynamic").resolve("conf"))
+ .configure();
String coll = "replacenodetest_coll";
if (log.isInfoEnabled()) {
log.info("total_jettys: {}", cluster.getJettySolrRunners().size());
@@ -79,23 +86,23 @@ public class ReplaceNodeTest extends SolrCloudTestCase {
CollectionAdminRequest.Create create;
// NOTE: always using the createCollection that takes in 'int' for all types of replicas, so we never
// have to worry about null checking when comparing the Create command with the final Slices
-
+
// TODO: tlog replicas do not work correctly in tests due to fault TestInjection#waitForInSyncWithLeader
create = pickRandom(
- CollectionAdminRequest.createCollection(coll, "conf1", 5, 2,0,0),
- //CollectionAdminRequest.createCollection(coll, "conf1", 5, 1,1,0),
- //CollectionAdminRequest.createCollection(coll, "conf1", 5, 0,1,1),
- //CollectionAdminRequest.createCollection(coll, "conf1", 5, 1,0,1),
- //CollectionAdminRequest.createCollection(coll, "conf1", 5, 0,2,0),
- // check also replicationFactor 1
- CollectionAdminRequest.createCollection(coll, "conf1", 5, 1,0,0)
- //CollectionAdminRequest.createCollection(coll, "conf1", 5, 0,1,0)
+ CollectionAdminRequest.createCollection(coll, "conf1", 5, 2,0,0),
+ //CollectionAdminRequest.createCollection(coll, "conf1", 5, 1,1,0),
+ //CollectionAdminRequest.createCollection(coll, "conf1", 5, 0,1,1),
+ //CollectionAdminRequest.createCollection(coll, "conf1", 5, 1,0,1),
+ //CollectionAdminRequest.createCollection(coll, "conf1", 5, 0,2,0),
+ // check also replicationFactor 1
+ CollectionAdminRequest.createCollection(coll, "conf1", 5, 1,0,0)
+ //CollectionAdminRequest.createCollection(coll, "conf1", 5, 0,1,0)
);
create.setCreateNodeSet(StrUtils.join(l, ',')).setMaxShardsPerNode(3);
cloudClient.request(create);
-
+
cluster.waitForActiveCollection(coll, 5, 5 * (create.getNumNrtReplicas() + create.getNumPullReplicas() + create.getNumTlogReplicas()));
-
+
DocCollection collection = cloudClient.getZkStateReader().getClusterState().getCollection(coll);
log.debug("### Before decommission: {}", collection);
log.info("excluded_node : {} ", emptyNode);
@@ -108,13 +115,13 @@ public class ReplaceNodeTest extends SolrCloudTestCase {
success = true;
break;
}
- assertFalse(rsp.getRequestStatus() == RequestStatusState.FAILED);
+ assertNotSame(rsp.getRequestStatus(), RequestStatusState.FAILED);
Thread.sleep(50);
}
assertTrue(success);
try (HttpSolrClient coreclient = getHttpSolrClient(cloudClient.getZkStateReader().getBaseUrlForNodeName(node2bdecommissioned))) {
CoreAdminResponse status = CoreAdminRequest.getStatus(null, coreclient);
- assertTrue(status.getCoreStatus().size() == 0);
+ assertEquals(0, status.getCoreStatus().size());
}
Thread.sleep(5000);
@@ -139,7 +146,7 @@ public class ReplaceNodeTest extends SolrCloudTestCase {
success = true;
break;
}
- assertFalse(rsp.getRequestStatus() == RequestStatusState.FAILED);
+ assertNotSame(rsp.getRequestStatus(), RequestStatusState.FAILED);
Thread.sleep(50);
}
assertTrue(success);
@@ -157,14 +164,7 @@ public class ReplaceNodeTest extends SolrCloudTestCase {
}
// make sure all newly created replicas on node are active
List<Replica> newReplicas = collection.getReplicas(node2bdecommissioned);
- replicas.forEach(r -> {
- for (Iterator<Replica> it = newReplicas.iterator(); it.hasNext(); ) {
- Replica nr = it.next();
- if (nr.getName().equals(r.getName())) {
- it.remove();
- }
- }
- });
+ replicas.forEach(r -> newReplicas.removeIf(nr -> nr.getName().equals(r.getName())));
assertFalse(newReplicas.isEmpty());
for (Replica r : newReplicas) {
assertEquals(r.toString(), Replica.State.ACTIVE, r.getState());
@@ -173,7 +173,7 @@ public class ReplaceNodeTest extends SolrCloudTestCase {
replicas = collection.getReplicas(emptyNode);
if (replicas != null) {
for (Replica r : replicas) {
- assertFalse(r.toString(), Replica.State.ACTIVE.equals(r.getState()));
+ assertNotEquals(r.toString(), Replica.State.ACTIVE, r.getState());
}
}
@@ -211,6 +211,25 @@ public class ReplaceNodeTest extends SolrCloudTestCase {
}
+ @Test
+ public void testFailOnSingleNode() throws Exception {
+ configureCluster(1)
+ .addConfig("conf1", TEST_PATH().resolve("configsets").resolve("cloud-dynamic").resolve("conf"))
+ .configure();
+ String coll = "replacesinglenodetest_coll";
+ if (log.isInfoEnabled()) {
+ log.info("total_jettys: {}", cluster.getJettySolrRunners().size());
+ }
+
+ CloudSolrClient cloudClient = cluster.getSolrClient();
+ cloudClient.request(CollectionAdminRequest.createCollection(coll, "conf1", 5, 1,0,0).setMaxShardsPerNode(5));
+
+ cluster.waitForActiveCollection(coll, 5, 5);
+
+ String liveNode = cloudClient.getZkStateReader().getClusterState().getLiveNodes().iterator().next();
+ expectThrows(SolrException.class, () -> createReplaceNodeRequest(liveNode, null, null).process(cloudClient));
+ }
+
public static CollectionAdminRequest.AsyncCollectionAdminRequest createReplaceNodeRequest(String sourceNode, String targetNode, Boolean parallel) {
if (random().nextBoolean()) {
return new CollectionAdminRequest.ReplaceNode(sourceNode, targetNode).setParallel(parallel);