You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by ab...@apache.org on 2017/06/21 16:30:28 UTC
lucene-solr:jira/solr-10879: SOLR-10879: Make sure we don't lose
single replicas when deleting a node.
Repository: lucene-solr
Updated Branches:
refs/heads/jira/solr-10879 44d1f1fe3 -> a417f99a6
SOLR-10879: Make sure we don't lose single replicas when deleting a node.
Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/a417f99a
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/a417f99a
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/a417f99a
Branch: refs/heads/jira/solr-10879
Commit: a417f99a66d59917025d1e14207e74781326f961
Parents: 44d1f1f
Author: Andrzej Bialecki <ab...@apache.org>
Authored: Wed Jun 21 18:29:48 2017 +0200
Committer: Andrzej Bialecki <ab...@apache.org>
Committed: Wed Jun 21 18:29:48 2017 +0200
----------------------------------------------------------------------
.../org/apache/solr/cloud/DeleteNodeCmd.java | 43 +++++++++++++-
.../org/apache/solr/cloud/DeleteNodeTest.java | 61 +++++++++++++++++---
2 files changed, 93 insertions(+), 11 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/a417f99a/solr/core/src/java/org/apache/solr/cloud/DeleteNodeCmd.java
----------------------------------------------------------------------
diff --git a/solr/core/src/java/org/apache/solr/cloud/DeleteNodeCmd.java b/solr/core/src/java/org/apache/solr/cloud/DeleteNodeCmd.java
index afb95a2..51b0956 100644
--- a/solr/core/src/java/org/apache/solr/cloud/DeleteNodeCmd.java
+++ b/solr/core/src/java/org/apache/solr/cloud/DeleteNodeCmd.java
@@ -19,6 +19,7 @@ package org.apache.solr.cloud;
import java.lang.invoke.MethodHandles;
+import java.util.ArrayList;
import java.util.List;
import java.util.Locale;
import java.util.concurrent.CountDownLatch;
@@ -26,7 +27,11 @@ import java.util.concurrent.TimeUnit;
import org.apache.solr.common.SolrException;
import org.apache.solr.common.cloud.ClusterState;
+import org.apache.solr.common.cloud.DocCollection;
+import org.apache.solr.common.cloud.Replica;
+import org.apache.solr.common.cloud.Slice;
import org.apache.solr.common.cloud.ZkNodeProps;
+import org.apache.solr.common.cloud.ZkStateReader;
import org.apache.solr.common.util.NamedList;
import org.apache.zookeeper.KeeperException;
import org.slf4j.Logger;
@@ -54,7 +59,40 @@ public class DeleteNodeCmd implements OverseerCollectionMessageHandler.Cmd {
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Source Node: " + node + " is not live");
}
List<ZkNodeProps> sourceReplicas = ReplaceNodeCmd.getReplicasOfNode(node, state);
- cleanupReplicas(results, state, sourceReplicas, ocmh, node, message.getStr(ASYNC));
+ List<String> singleReplicas = verifyReplicaAvailability(sourceReplicas, state);
+ if (!singleReplicas.isEmpty()) {
+ results.add("failure", "Can't delete the only existing non-PULL replica(s) on node " + node + ": " + singleReplicas.toString());
+ } else {
+ cleanupReplicas(results, state, sourceReplicas, ocmh, node, message.getStr(ASYNC));
+ }
+ }
+
+ // collect names of replicas that cannot be deleted
+ static List<String> verifyReplicaAvailability(List<ZkNodeProps> sourceReplicas, ClusterState state) {
+ List<String> res = new ArrayList<>();
+ for (ZkNodeProps sourceReplica : sourceReplicas) {
+ String coll = sourceReplica.getStr(COLLECTION_PROP);
+ String shard = sourceReplica.getStr(SHARD_ID_PROP);
+ String replicaName = sourceReplica.getStr(ZkStateReader.REPLICA_PROP);
+ DocCollection collection = state.getCollection(coll);
+ Slice slice = collection.getSlice(shard);
+ if (slice.getReplicas().size() < 2) {
+ // can't delete the only replica in existence
+ res.add(coll + "/" + shard + "/" + replicaName + ", type=" + sourceReplica.getStr(ZkStateReader.REPLICA_TYPE));
+ } else { // check replica types
+ int otherNonPullReplicas = 0;
+ for (Replica r : slice.getReplicas()) {
+ if (!r.getName().equals(replicaName) && !r.getType().equals(Replica.Type.PULL)) {
+ otherNonPullReplicas++;
+ }
+ }
+ // can't delete - there are no other non-pull replicas
+ if (otherNonPullReplicas == 0) {
+ res.add(coll + "/" + shard + "/" + replicaName + ", type=" + sourceReplica.getStr(ZkStateReader.REPLICA_TYPE));
+ }
+ }
+ }
+ return res;
}
static void cleanupReplicas(NamedList results,
@@ -67,7 +105,8 @@ public class DeleteNodeCmd implements OverseerCollectionMessageHandler.Cmd {
for (ZkNodeProps sourceReplica : sourceReplicas) {
String coll = sourceReplica.getStr(COLLECTION_PROP);
String shard = sourceReplica.getStr(SHARD_ID_PROP);
- log.info("Deleting replica for collection={} shard={} on node={}", coll, shard, node);
+ String type = sourceReplica.getStr(ZkStateReader.REPLICA_TYPE);
+ log.info("Deleting replica type={} for collection={} shard={} on node={}", type, coll, shard, node);
NamedList deleteResult = new NamedList();
try {
if (async != null) sourceReplica = sourceReplica.plus(ASYNC, async);
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/a417f99a/solr/core/src/test/org/apache/solr/cloud/DeleteNodeTest.java
----------------------------------------------------------------------
diff --git a/solr/core/src/test/org/apache/solr/cloud/DeleteNodeTest.java b/solr/core/src/test/org/apache/solr/cloud/DeleteNodeTest.java
index 9f461f0..bd32710 100644
--- a/solr/core/src/test/org/apache/solr/cloud/DeleteNodeTest.java
+++ b/solr/core/src/test/org/apache/solr/cloud/DeleteNodeTest.java
@@ -18,19 +18,29 @@
package org.apache.solr.cloud;
+import java.lang.invoke.MethodHandles;
import java.util.ArrayList;
import java.util.Collections;
+import java.util.List;
import java.util.Set;
import org.apache.solr.client.solrj.impl.CloudSolrClient;
import org.apache.solr.client.solrj.request.CollectionAdminRequest;
import org.apache.solr.client.solrj.response.RequestStatusState;
+import org.apache.solr.common.cloud.ClusterState;
+import org.apache.solr.common.cloud.DocCollection;
+import org.apache.solr.common.cloud.Replica;
+import org.apache.solr.common.cloud.Slice;
+import org.apache.solr.common.cloud.ZkStateReader;
import org.apache.solr.common.util.StrUtils;
import org.junit.BeforeClass;
import org.junit.Test;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
public class DeleteNodeTest extends SolrCloudTestCase {
-
+ private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
+
@BeforeClass
public static void setupCluster() throws Exception {
configureCluster(6)
@@ -47,28 +57,61 @@ public class DeleteNodeTest extends SolrCloudTestCase {
cluster.waitForAllNodes(5000);
CloudSolrClient cloudClient = cluster.getSolrClient();
String coll = "deletenodetest_coll";
- Set<String> liveNodes = cloudClient.getZkStateReader().getClusterState().getLiveNodes();
+ ClusterState state = cloudClient.getZkStateReader().getClusterState();
+ Set<String> liveNodes = state.getLiveNodes();
ArrayList<String> l = new ArrayList<>(liveNodes);
Collections.shuffle(l, random());
CollectionAdminRequest.Create create = pickRandom(
CollectionAdminRequest.createCollection(coll, "conf1", 5, 2, 0, 0),
CollectionAdminRequest.createCollection(coll, "conf1", 5, 1, 1, 0),
- CollectionAdminRequest.createCollection(coll, "conf1", 5, 0, 1, 1));
+ CollectionAdminRequest.createCollection(coll, "conf1", 5, 0, 1, 1),
+ // check RF=1
+ CollectionAdminRequest.createCollection(coll, "conf1", 5, 1, 0, 0),
+ CollectionAdminRequest.createCollection(coll, "conf1", 5, 0, 1, 0)
+ );
create.setCreateNodeSet(StrUtils.join(l, ',')).setMaxShardsPerNode(3);
cloudClient.request(create);
+ state = cloudClient.getZkStateReader().getClusterState();
String node2bdecommissioned = l.get(0);
+ // check what replicas are on the node, and whether the call should fail
+ boolean shouldFail = false;
+ DocCollection docColl = state.getCollection(coll);
+ log.info("#### DocCollection: " + docColl);
+ List<Replica> replicas = docColl.getReplicas(node2bdecommissioned);
+ if (replicas != null) {
+ for (Replica replica : replicas) {
+ String shard = docColl.getShardId(node2bdecommissioned, replica.getStr(ZkStateReader.CORE_NAME_PROP));
+ Slice slice = docColl.getSlice(shard);
+ boolean hasOtherNonPullReplicas = false;
+ for (Replica r: slice.getReplicas()) {
+ if (!r.getName().equals(replica.getName()) &&
+ !r.getNodeName().equals(node2bdecommissioned) &&
+ r.getType() != Replica.Type.PULL) {
+ hasOtherNonPullReplicas = true;
+ break;
+ }
+ }
+ if (!hasOtherNonPullReplicas) {
+ shouldFail = true;
+ break;
+ }
+ }
+ }
new CollectionAdminRequest.DeleteNode(node2bdecommissioned).processAsync("003", cloudClient);
CollectionAdminRequest.RequestStatus requestStatus = CollectionAdminRequest.requestStatus("003");
- boolean success = false;
+ CollectionAdminRequest.RequestStatusResponse rsp = null;
for (int i = 0; i < 200; i++) {
- CollectionAdminRequest.RequestStatusResponse rsp = requestStatus.process(cloudClient);
- if (rsp.getRequestStatus() == RequestStatusState.COMPLETED) {
- success = true;
+ rsp = requestStatus.process(cloudClient);
+ if (rsp.getRequestStatus() != RequestStatusState.RUNNING) {
break;
}
- assertFalse(rsp.getRequestStatus() == RequestStatusState.FAILED);
Thread.sleep(50);
}
- assertTrue(success);
+ log.info("####### DocCollection after: " + cloudClient.getZkStateReader().getClusterState().getCollection(coll));
+ if (shouldFail) {
+ assertTrue(String.valueOf(rsp), rsp.getRequestStatus() == RequestStatusState.FAILED);
+ } else {
+ assertFalse(String.valueOf(rsp), rsp.getRequestStatus() == RequestStatusState.FAILED);
+ }
}
}