You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@solr.apache.org by no...@apache.org on 2023/03/13 11:25:00 UTC

[solr] branch main updated: SOLR-16653 : sub-shard remains inactive after split for NRT+PULL collection (#1434)

This is an automated email from the ASF dual-hosted git repository.

noble pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/solr.git


The following commit(s) were added to refs/heads/main by this push:
     new c9d656a8aa7 SOLR-16653 :  sub-shard remains inactive after split for NRT+PULL collection (#1434)
c9d656a8aa7 is described below

commit c9d656a8aa7bc2f711d4a9007fc590faa9853fcf
Author: Noble Paul <no...@users.noreply.github.com>
AuthorDate: Mon Mar 13 22:24:54 2023 +1100

    SOLR-16653 :  sub-shard remains inactive after split for NRT+PULL collection (#1434)
---
 .../apache/solr/cloud/overseer/ReplicaMutator.java | 47 +++++++++--
 .../apache/solr/cloud/overseer/SliceMutator.java   | 17 ++--
 .../solr/cloud/SplitShardWithNodeRoleTest.java     | 98 ++++++++++++++++++++++
 3 files changed, 152 insertions(+), 10 deletions(-)

diff --git a/solr/core/src/java/org/apache/solr/cloud/overseer/ReplicaMutator.java b/solr/core/src/java/org/apache/solr/cloud/overseer/ReplicaMutator.java
index 9f5fd99a2dd..5254c9671f1 100644
--- a/solr/core/src/java/org/apache/solr/cloud/overseer/ReplicaMutator.java
+++ b/solr/core/src/java/org/apache/solr/cloud/overseer/ReplicaMutator.java
@@ -439,15 +439,52 @@ public class ReplicaMutator {
     DocCollection newCollection = CollectionMutator.updateSlice(collectionName, collection, slice);
     log.debug("Collection is now: {}", newCollection);
     if (collection.isPerReplicaState() && oldReplica != null) {
-      if (!isAnyPropertyChanged(replica, oldReplica)) return ZkWriteCommand.NO_OP;
+      if (!persistStateJson(replica, oldReplica, collection)) {
+        if (log.isDebugEnabled()) {
+          log.debug(
+              "state.json is not persisted slice/replica : {}/{} \n , old : {}, \n new {}",
+              replica.shard,
+              replica.name,
+              Utils.toJSONString(oldReplica.getProperties()),
+              Utils.toJSONString(replica.getProperties()));
+        }
+        return ZkWriteCommand.NO_OP;
+      }
     }
     return new ZkWriteCommand(collectionName, newCollection);
   }
 
-  private boolean isAnyPropertyChanged(Replica replica, Replica oldReplica) {
-    if (!Objects.equals(replica.getBaseUrl(), oldReplica.getBaseUrl())) return true;
-    if (!Objects.equals(replica.getCoreName(), oldReplica.getCoreName())) return true;
-    if (!Objects.equals(replica.getNodeName(), oldReplica.getNodeName())) return true;
+  /** Whether it is required to persist the state.json */
+  private boolean persistStateJson(Replica newReplica, Replica oldReplica, DocCollection coll) {
+    if (!Objects.equals(newReplica.getBaseUrl(), oldReplica.getBaseUrl())) return true;
+    if (!Objects.equals(newReplica.getCoreName(), oldReplica.getCoreName())) return true;
+    if (!Objects.equals(newReplica.getNodeName(), oldReplica.getNodeName())) return true;
+    if (!Objects.equals(
+        newReplica.getProperties().get(ZkStateReader.FORCE_SET_STATE_PROP),
+        oldReplica.getProperties().get(ZkStateReader.FORCE_SET_STATE_PROP))) {
+      if (log.isInfoEnabled()) {
+        log.info(
+            "{} force_set_state is changed from {} -> {}",
+            newReplica.name,
+            oldReplica.getProperties().get(ZkStateReader.FORCE_SET_STATE_PROP),
+            newReplica.getProperties().get(ZkStateReader.FORCE_SET_STATE_PROP));
+      }
+      return true;
+    }
+    Slice slice = coll.getSlice(newReplica.getShard());
+    // the slice may be in recovery
+    if (slice.getState() == Slice.State.RECOVERY) {
+      if (log.isInfoEnabled()) {
+        log.info("{} slice state_is_recovery", slice.getName());
+      }
+      return true;
+    }
+    if (Objects.equals(oldReplica.getProperties().get("state"), "recovering")) {
+      if (log.isInfoEnabled()) {
+        log.info("{} state_is_recovering", newReplica.name);
+      }
+      return true;
+    }
     return false;
   }
 
diff --git a/solr/core/src/java/org/apache/solr/cloud/overseer/SliceMutator.java b/solr/core/src/java/org/apache/solr/cloud/overseer/SliceMutator.java
index 01514af713b..c83a965d5a1 100644
--- a/solr/core/src/java/org/apache/solr/cloud/overseer/SliceMutator.java
+++ b/solr/core/src/java/org/apache/solr/cloud/overseer/SliceMutator.java
@@ -98,11 +98,18 @@ public class SliceMutator {
         new Replica(
             coreNodeName,
             Utils.makeMap(
-                ZkStateReader.CORE_NAME_PROP, message.getStr(ZkStateReader.CORE_NAME_PROP),
-                ZkStateReader.STATE_PROP, message.getStr(ZkStateReader.STATE_PROP),
-                ZkStateReader.NODE_NAME_PROP, nodeName,
-                ZkStateReader.BASE_URL_PROP, baseUrl,
-                ZkStateReader.REPLICA_TYPE, message.get(ZkStateReader.REPLICA_TYPE)),
+                ZkStateReader.CORE_NAME_PROP,
+                message.getStr(ZkStateReader.CORE_NAME_PROP),
+                ZkStateReader.STATE_PROP,
+                message.getStr(ZkStateReader.STATE_PROP),
+                ZkStateReader.NODE_NAME_PROP,
+                nodeName,
+                ZkStateReader.BASE_URL_PROP,
+                baseUrl,
+                ZkStateReader.FORCE_SET_STATE_PROP,
+                "false",
+                ZkStateReader.REPLICA_TYPE,
+                message.get(ZkStateReader.REPLICA_TYPE)),
             coll,
             slice);
 
diff --git a/solr/core/src/test/org/apache/solr/cloud/SplitShardWithNodeRoleTest.java b/solr/core/src/test/org/apache/solr/cloud/SplitShardWithNodeRoleTest.java
new file mode 100644
index 00000000000..78bbbc8e015
--- /dev/null
+++ b/solr/core/src/test/org/apache/solr/cloud/SplitShardWithNodeRoleTest.java
@@ -0,0 +1,98 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.solr.cloud;
+
+import java.lang.invoke.MethodHandles;
+import java.util.HashSet;
+import java.util.Set;
+import org.apache.solr.client.solrj.impl.CloudSolrClient;
+import org.apache.solr.client.solrj.request.CollectionAdminRequest;
+import org.apache.solr.client.solrj.request.UpdateRequest;
+import org.apache.solr.common.SolrInputDocument;
+import org.apache.solr.core.NodeRoles;
+import org.junit.BeforeClass;
+import org.junit.Test;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+public class SplitShardWithNodeRoleTest extends SolrCloudTestCase {
+  private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
+
+  @BeforeClass
+  public static void setupCluster() throws Exception {
+    configureCluster(2).addConfig("conf", configset("cloud-minimal")).configure();
+    System.setProperty(NodeRoles.NODE_ROLES_PROP, "data:off,coordinator:on");
+
+    try {
+      cluster.startJettySolrRunner();
+      cluster.startJettySolrRunner();
+    } finally {
+      System.clearProperty(NodeRoles.NODE_ROLES_PROP);
+    }
+
+    Set<String> overseerNodes = new HashSet<>();
+    System.setProperty(NodeRoles.NODE_ROLES_PROP, "data:off,overseer:preferred");
+    try {
+      overseerNodes.add(cluster.startJettySolrRunner().getNodeName());
+      overseerNodes.add(cluster.startJettySolrRunner().getNodeName());
+    } finally {
+      System.clearProperty(NodeRoles.NODE_ROLES_PROP);
+    }
+    OverseerRolesTest.waitForNewOverseer(10, overseerNodes::contains, false);
+  }
+
+  @Test
+  public void testSolrClusterWithNodeRoleWithSingleReplica() throws Exception {
+    doSplit("coll_ONLY_NRT", 1, 1, 0);
+  }
+
+  @Test
+  public void testSolrClusterWithNodeRoleWithPull() throws Exception {
+    doSplit("coll_NRT_PULL", 1, 1, 1);
+  }
+
+  public void doSplit(String collName, int shard, int nrtReplica, int pullReplica)
+      throws Exception {
+    CloudSolrClient client = cluster.getSolrClient();
+    CollectionAdminRequest.createCollection(collName, "conf", shard, nrtReplica, 0, pullReplica)
+        .setPerReplicaState(true)
+        .process(cluster.getSolrClient());
+    cluster.waitForActiveCollection(collName, shard, nrtReplica + pullReplica);
+    UpdateRequest ur = new UpdateRequest();
+    for (int i = 0; i < 10; i++) {
+      SolrInputDocument doc2 = new SolrInputDocument();
+      doc2.addField("id", "" + i);
+      ur.add(doc2);
+    }
+
+    ur.commit(client, collName);
+
+    CollectionAdminRequest.SplitShard splitShard =
+        CollectionAdminRequest.splitShard(collName).setShardName("shard1");
+    splitShard.process(cluster.getSolrClient());
+    waitForState(
+        "Timed out waiting for sub shards to be active. Number of active shards="
+            + cluster
+                .getSolrClient()
+                .getClusterState()
+                .getCollection(collName)
+                .getActiveSlices()
+                .size(),
+        collName,
+        activeClusterShape(shard + 1, 3 * (nrtReplica + pullReplica)));
+  }
+}