You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by ab...@apache.org on 2018/09/21 05:00:27 UTC

[2/4] lucene-solr:branch_7x: iSOLR-12080: Improve error handling of MoveReplicaCmd. Improve the test stability by avoiding killing overseer.

iSOLR-12080: Improve error handling of MoveReplicaCmd. Improve the test stability
by avoiding killing overseer.


Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/1b0b33ea
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/1b0b33ea
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/1b0b33ea

Branch: refs/heads/branch_7x
Commit: 1b0b33eaf1c0de8cd70b64dd27ef91e0f8d2482a
Parents: cb73d31
Author: Andrzej Bialecki <ab...@apache.org>
Authored: Thu Sep 20 12:36:52 2018 +0200
Committer: Andrzej Bialecki <ab...@apache.org>
Committed: Fri Sep 21 07:00:17 2018 +0200

----------------------------------------------------------------------
 .../solr/cloud/api/collections/MoveReplicaCmd.java    | 13 +++++++++++--
 .../test/org/apache/solr/cloud/MoveReplicaTest.java   | 14 ++++++++++++--
 2 files changed, 23 insertions(+), 4 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/1b0b33ea/solr/core/src/java/org/apache/solr/cloud/api/collections/MoveReplicaCmd.java
----------------------------------------------------------------------
diff --git a/solr/core/src/java/org/apache/solr/cloud/api/collections/MoveReplicaCmd.java b/solr/core/src/java/org/apache/solr/cloud/api/collections/MoveReplicaCmd.java
index 4a9bd59..2df0f77 100644
--- a/solr/core/src/java/org/apache/solr/cloud/api/collections/MoveReplicaCmd.java
+++ b/solr/core/src/java/org/apache/solr/cloud/api/collections/MoveReplicaCmd.java
@@ -169,7 +169,12 @@ public class MoveReplicaCmd implements OverseerCollectionMessageHandler.Cmd {
       removeReplicasProps.getProperties().put(CoreAdminParams.DELETE_INDEX, false);
       if (async != null) removeReplicasProps.getProperties().put(ASYNC, async);
       NamedList deleteResult = new NamedList();
-      ocmh.deleteReplica(clusterState, removeReplicasProps, deleteResult, null);
+      try {
+        ocmh.deleteReplica(clusterState, removeReplicasProps, deleteResult, null);
+      } catch (SolrException e) {
+        // assume this failed completely so there's nothing to roll back
+        deleteResult.add("failure", e.toString());
+      }
       if (deleteResult.get("failure") != null) {
         String errorString = String.format(Locale.ROOT, "Failed to cleanup replica collection=%s shard=%s name=%s, failure=%s",
             coll.getName(), slice.getName(), replica.getName(), deleteResult.get("failure"));
@@ -304,7 +309,11 @@ public class MoveReplicaCmd implements OverseerCollectionMessageHandler.Cmd {
         REPLICA_PROP, replica.getName());
     if (async != null) removeReplicasProps.getProperties().put(ASYNC, async);
     NamedList deleteResult = new NamedList();
-    ocmh.deleteReplica(clusterState, removeReplicasProps, deleteResult, null);
+    try {
+      ocmh.deleteReplica(clusterState, removeReplicasProps, deleteResult, null);
+    } catch (SolrException e) {
+      deleteResult.add("failure", e.toString());
+    }
     if (deleteResult.get("failure") != null) {
       String errorString = String.format(Locale.ROOT, "Failed to cleanup replica collection=%s shard=%s name=%s, failure=%s",
           coll.getName(), slice.getName(), replica.getName(), deleteResult.get("failure"));

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/1b0b33ea/solr/core/src/test/org/apache/solr/cloud/MoveReplicaTest.java
----------------------------------------------------------------------
diff --git a/solr/core/src/test/org/apache/solr/cloud/MoveReplicaTest.java b/solr/core/src/test/org/apache/solr/cloud/MoveReplicaTest.java
index 162e74f..91bc846 100644
--- a/solr/core/src/test/org/apache/solr/cloud/MoveReplicaTest.java
+++ b/solr/core/src/test/org/apache/solr/cloud/MoveReplicaTest.java
@@ -248,13 +248,22 @@ public class MoveReplicaTest extends SolrCloudTestCase {
 
     addDocs(coll, 100);
 
-    Replica replica = getRandomReplica(coll, cloudClient);
+    NamedList<Object> overSeerStatus = cluster.getSolrClient().request(CollectionAdminRequest.getOverseerStatus());
+    String overseerLeader = (String) overSeerStatus.get("leader");
+
+    // don't kill overseer in this test
+    Replica replica;
+    int count = 10;
+    do {
+      replica = getRandomReplica(coll, cloudClient);
+    } while (!replica.getNodeName().equals(overseerLeader) && count-- > 0);
+    assertNotNull("could not find non-overseer replica???", replica);
     Set<String> liveNodes = cloudClient.getZkStateReader().getClusterState().getLiveNodes();
     ArrayList<String> l = new ArrayList<>(liveNodes);
     Collections.shuffle(l, random());
     String targetNode = null;
     for (String node : liveNodes) {
-      if (!replica.getNodeName().equals(node)) {
+      if (!replica.getNodeName().equals(node) && !overseerLeader.equals(node)) {
         targetNode = node;
         break;
       }
@@ -269,6 +278,7 @@ public class MoveReplicaTest extends SolrCloudTestCase {
     for (int i = 0; i < cluster.getJettySolrRunners().size(); i++) {
       if (cluster.getJettySolrRunner(i).getNodeName().equals(targetNode)) {
         cluster.stopJettySolrRunner(i);
+        break;
       }
     }
     CollectionAdminRequest.RequestStatus requestStatus = CollectionAdminRequest.requestStatus(asyncId);