You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by da...@apache.org on 2018/09/20 23:59:30 UTC
[24/29] lucene-solr:jira/http2: iSOLR-12080: Improve error handling
of MoveReplicaCmd. Improve the test stability by avoiding killing overseer.
iSOLR-12080: Improve error handling of MoveReplicaCmd. Improve the test stability
by avoiding killing overseer.
Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/52bdcf6b
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/52bdcf6b
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/52bdcf6b
Branch: refs/heads/jira/http2
Commit: 52bdcf6bb0b7645da55124298f6d268fe8f4a77a
Parents: 87cd5b1
Author: Andrzej Bialecki <ab...@apache.org>
Authored: Thu Sep 20 12:36:52 2018 +0200
Committer: Andrzej Bialecki <ab...@apache.org>
Committed: Thu Sep 20 12:36:52 2018 +0200
----------------------------------------------------------------------
.../solr/cloud/api/collections/MoveReplicaCmd.java | 13 +++++++++++--
.../test/org/apache/solr/cloud/MoveReplicaTest.java | 14 ++++++++++++--
2 files changed, 23 insertions(+), 4 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/52bdcf6b/solr/core/src/java/org/apache/solr/cloud/api/collections/MoveReplicaCmd.java
----------------------------------------------------------------------
diff --git a/solr/core/src/java/org/apache/solr/cloud/api/collections/MoveReplicaCmd.java b/solr/core/src/java/org/apache/solr/cloud/api/collections/MoveReplicaCmd.java
index 4a9bd59..2df0f77 100644
--- a/solr/core/src/java/org/apache/solr/cloud/api/collections/MoveReplicaCmd.java
+++ b/solr/core/src/java/org/apache/solr/cloud/api/collections/MoveReplicaCmd.java
@@ -169,7 +169,12 @@ public class MoveReplicaCmd implements OverseerCollectionMessageHandler.Cmd {
removeReplicasProps.getProperties().put(CoreAdminParams.DELETE_INDEX, false);
if (async != null) removeReplicasProps.getProperties().put(ASYNC, async);
NamedList deleteResult = new NamedList();
- ocmh.deleteReplica(clusterState, removeReplicasProps, deleteResult, null);
+ try {
+ ocmh.deleteReplica(clusterState, removeReplicasProps, deleteResult, null);
+ } catch (SolrException e) {
+ // assume this failed completely so there's nothing to roll back
+ deleteResult.add("failure", e.toString());
+ }
if (deleteResult.get("failure") != null) {
String errorString = String.format(Locale.ROOT, "Failed to cleanup replica collection=%s shard=%s name=%s, failure=%s",
coll.getName(), slice.getName(), replica.getName(), deleteResult.get("failure"));
@@ -304,7 +309,11 @@ public class MoveReplicaCmd implements OverseerCollectionMessageHandler.Cmd {
REPLICA_PROP, replica.getName());
if (async != null) removeReplicasProps.getProperties().put(ASYNC, async);
NamedList deleteResult = new NamedList();
- ocmh.deleteReplica(clusterState, removeReplicasProps, deleteResult, null);
+ try {
+ ocmh.deleteReplica(clusterState, removeReplicasProps, deleteResult, null);
+ } catch (SolrException e) {
+ deleteResult.add("failure", e.toString());
+ }
if (deleteResult.get("failure") != null) {
String errorString = String.format(Locale.ROOT, "Failed to cleanup replica collection=%s shard=%s name=%s, failure=%s",
coll.getName(), slice.getName(), replica.getName(), deleteResult.get("failure"));
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/52bdcf6b/solr/core/src/test/org/apache/solr/cloud/MoveReplicaTest.java
----------------------------------------------------------------------
diff --git a/solr/core/src/test/org/apache/solr/cloud/MoveReplicaTest.java b/solr/core/src/test/org/apache/solr/cloud/MoveReplicaTest.java
index 2b3be30..8f0f0e3 100644
--- a/solr/core/src/test/org/apache/solr/cloud/MoveReplicaTest.java
+++ b/solr/core/src/test/org/apache/solr/cloud/MoveReplicaTest.java
@@ -250,13 +250,22 @@ public class MoveReplicaTest extends SolrCloudTestCase {
addDocs(coll, 100);
- Replica replica = getRandomReplica(coll, cloudClient);
+ NamedList<Object> overSeerStatus = cluster.getSolrClient().request(CollectionAdminRequest.getOverseerStatus());
+ String overseerLeader = (String) overSeerStatus.get("leader");
+
+ // don't kill overseer in this test
+ Replica replica;
+ int count = 10;
+ do {
+ replica = getRandomReplica(coll, cloudClient);
+ } while (!replica.getNodeName().equals(overseerLeader) && count-- > 0);
+ assertNotNull("could not find non-overseer replica???", replica);
Set<String> liveNodes = cloudClient.getZkStateReader().getClusterState().getLiveNodes();
ArrayList<String> l = new ArrayList<>(liveNodes);
Collections.shuffle(l, random());
String targetNode = null;
for (String node : liveNodes) {
- if (!replica.getNodeName().equals(node)) {
+ if (!replica.getNodeName().equals(node) && !overseerLeader.equals(node)) {
targetNode = node;
break;
}
@@ -271,6 +280,7 @@ public class MoveReplicaTest extends SolrCloudTestCase {
for (int i = 0; i < cluster.getJettySolrRunners().size(); i++) {
if (cluster.getJettySolrRunner(i).getNodeName().equals(targetNode)) {
cluster.stopJettySolrRunner(i);
+ break;
}
}
CollectionAdminRequest.RequestStatus requestStatus = CollectionAdminRequest.requestStatus(asyncId);