You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by da...@apache.org on 2018/09/20 23:59:30 UTC

[24/29] lucene-solr:jira/http2: iSOLR-12080: Improve error handling of MoveReplicaCmd. Improve the test stability by avoiding killing overseer.

iSOLR-12080: Improve error handling of MoveReplicaCmd. Improve the test stability
by avoiding killing overseer.


Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/52bdcf6b
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/52bdcf6b
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/52bdcf6b

Branch: refs/heads/jira/http2
Commit: 52bdcf6bb0b7645da55124298f6d268fe8f4a77a
Parents: 87cd5b1
Author: Andrzej Bialecki <ab...@apache.org>
Authored: Thu Sep 20 12:36:52 2018 +0200
Committer: Andrzej Bialecki <ab...@apache.org>
Committed: Thu Sep 20 12:36:52 2018 +0200

----------------------------------------------------------------------
 .../solr/cloud/api/collections/MoveReplicaCmd.java    | 13 +++++++++++--
 .../test/org/apache/solr/cloud/MoveReplicaTest.java   | 14 ++++++++++++--
 2 files changed, 23 insertions(+), 4 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/52bdcf6b/solr/core/src/java/org/apache/solr/cloud/api/collections/MoveReplicaCmd.java
----------------------------------------------------------------------
diff --git a/solr/core/src/java/org/apache/solr/cloud/api/collections/MoveReplicaCmd.java b/solr/core/src/java/org/apache/solr/cloud/api/collections/MoveReplicaCmd.java
index 4a9bd59..2df0f77 100644
--- a/solr/core/src/java/org/apache/solr/cloud/api/collections/MoveReplicaCmd.java
+++ b/solr/core/src/java/org/apache/solr/cloud/api/collections/MoveReplicaCmd.java
@@ -169,7 +169,12 @@ public class MoveReplicaCmd implements OverseerCollectionMessageHandler.Cmd {
       removeReplicasProps.getProperties().put(CoreAdminParams.DELETE_INDEX, false);
       if (async != null) removeReplicasProps.getProperties().put(ASYNC, async);
       NamedList deleteResult = new NamedList();
-      ocmh.deleteReplica(clusterState, removeReplicasProps, deleteResult, null);
+      try {
+        ocmh.deleteReplica(clusterState, removeReplicasProps, deleteResult, null);
+      } catch (SolrException e) {
+        // assume this failed completely so there's nothing to roll back
+        deleteResult.add("failure", e.toString());
+      }
       if (deleteResult.get("failure") != null) {
         String errorString = String.format(Locale.ROOT, "Failed to cleanup replica collection=%s shard=%s name=%s, failure=%s",
             coll.getName(), slice.getName(), replica.getName(), deleteResult.get("failure"));
@@ -304,7 +309,11 @@ public class MoveReplicaCmd implements OverseerCollectionMessageHandler.Cmd {
         REPLICA_PROP, replica.getName());
     if (async != null) removeReplicasProps.getProperties().put(ASYNC, async);
     NamedList deleteResult = new NamedList();
-    ocmh.deleteReplica(clusterState, removeReplicasProps, deleteResult, null);
+    try {
+      ocmh.deleteReplica(clusterState, removeReplicasProps, deleteResult, null);
+    } catch (SolrException e) {
+      deleteResult.add("failure", e.toString());
+    }
     if (deleteResult.get("failure") != null) {
       String errorString = String.format(Locale.ROOT, "Failed to cleanup replica collection=%s shard=%s name=%s, failure=%s",
           coll.getName(), slice.getName(), replica.getName(), deleteResult.get("failure"));

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/52bdcf6b/solr/core/src/test/org/apache/solr/cloud/MoveReplicaTest.java
----------------------------------------------------------------------
diff --git a/solr/core/src/test/org/apache/solr/cloud/MoveReplicaTest.java b/solr/core/src/test/org/apache/solr/cloud/MoveReplicaTest.java
index 2b3be30..8f0f0e3 100644
--- a/solr/core/src/test/org/apache/solr/cloud/MoveReplicaTest.java
+++ b/solr/core/src/test/org/apache/solr/cloud/MoveReplicaTest.java
@@ -250,13 +250,22 @@ public class MoveReplicaTest extends SolrCloudTestCase {
 
     addDocs(coll, 100);
 
-    Replica replica = getRandomReplica(coll, cloudClient);
+    NamedList<Object> overSeerStatus = cluster.getSolrClient().request(CollectionAdminRequest.getOverseerStatus());
+    String overseerLeader = (String) overSeerStatus.get("leader");
+
+    // don't kill overseer in this test
+    Replica replica;
+    int count = 10;
+    do {
+      replica = getRandomReplica(coll, cloudClient);
+    } while (!replica.getNodeName().equals(overseerLeader) && count-- > 0);
+    assertNotNull("could not find non-overseer replica???", replica);
     Set<String> liveNodes = cloudClient.getZkStateReader().getClusterState().getLiveNodes();
     ArrayList<String> l = new ArrayList<>(liveNodes);
     Collections.shuffle(l, random());
     String targetNode = null;
     for (String node : liveNodes) {
-      if (!replica.getNodeName().equals(node)) {
+      if (!replica.getNodeName().equals(node) && !overseerLeader.equals(node)) {
         targetNode = node;
         break;
       }
@@ -271,6 +280,7 @@ public class MoveReplicaTest extends SolrCloudTestCase {
     for (int i = 0; i < cluster.getJettySolrRunners().size(); i++) {
       if (cluster.getJettySolrRunner(i).getNodeName().equals(targetNode)) {
         cluster.stopJettySolrRunner(i);
+        break;
       }
     }
     CollectionAdminRequest.RequestStatus requestStatus = CollectionAdminRequest.requestStatus(asyncId);