You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by ab...@apache.org on 2017/06/19 10:13:17 UTC

lucene-solr:jira/solr-10878: SOLR-10878 Register a watcher before adding replica.

Repository: lucene-solr
Updated Branches:
  refs/heads/jira/solr-10878 08dadc7e4 -> 929986709


SOLR-10878 Register a watcher before adding replica.


Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/92998670
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/92998670
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/92998670

Branch: refs/heads/jira/solr-10878
Commit: 929986709d0323d68276bd1004baa8304f0dbd50
Parents: 08dadc7
Author: Andrzej Bialecki <ab...@apache.org>
Authored: Mon Jun 19 12:12:49 2017 +0200
Committer: Andrzej Bialecki <ab...@apache.org>
Committed: Mon Jun 19 12:12:49 2017 +0200

----------------------------------------------------------------------
 .../org/apache/solr/cloud/MoveReplicaCmd.java   | 21 +++++++++++++-------
 .../org/apache/solr/cloud/MoveReplicaTest.java  |  6 +++---
 2 files changed, 17 insertions(+), 10 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/92998670/solr/core/src/java/org/apache/solr/cloud/MoveReplicaCmd.java
----------------------------------------------------------------------
diff --git a/solr/core/src/java/org/apache/solr/cloud/MoveReplicaCmd.java b/solr/core/src/java/org/apache/solr/cloud/MoveReplicaCmd.java
index 42bc8b3..5b47059 100644
--- a/solr/core/src/java/org/apache/solr/cloud/MoveReplicaCmd.java
+++ b/solr/core/src/java/org/apache/solr/cloud/MoveReplicaCmd.java
@@ -167,22 +167,28 @@ public class MoveReplicaCmd implements Cmd{
         CoreAdminParams.NAME, newCoreName);
     if(async!=null) addReplicasProps.getProperties().put(ASYNC, async);
     NamedList addResult = new NamedList();
+    CountDownLatch countDownLatch = new CountDownLatch(1);
+    ReplaceNodeCmd.RecoveryWatcher watcher = null;
+    if (replica.equals(slice.getLeader())) {
+      watcher = new ReplaceNodeCmd.RecoveryWatcher(coll.getName(), slice.getName(),
+          replica.getName(), null, countDownLatch);
+      ocmh.zkStateReader.registerCollectionStateWatcher(coll.getName(), watcher);
+    }
     ZkNodeProps newReplica = ocmh.addReplica(clusterState, addReplicasProps, addResult, null);
     if (addResult.get("failure") != null) {
       String errorString = String.format(Locale.ROOT, "Failed to create replica for collection=%s shard=%s" +
           " on node=%s", coll.getName(), slice.getName(), targetNode);
       log.warn(errorString);
       results.add("failure", errorString);
+      if (watcher != null) { // unregister
+        ocmh.zkStateReader.registerCollectionStateWatcher(coll.getName(), watcher);
+      }
       return;
     }
     // wait for the other replica to be active if the source replica was a leader
-    if (replica.equals(slice.getLeader())) {
-      log.info("==== Waiting for leader replica to recover.");
-      CountDownLatch countDownLatch = new CountDownLatch(1);
-      ReplaceNodeCmd.RecoveryWatcher watcher = new ReplaceNodeCmd.RecoveryWatcher(coll.getName(), slice.getName(),
-          replica.getName(), newReplica.getStr(ZkStateReader.CORE_NAME_PROP), countDownLatch);
+    if (watcher != null) {
       try {
-        ocmh.zkStateReader.registerCollectionStateWatcher(coll.getName(), watcher);
+        log.info("==== Waiting for leader replica to recover.");
         if (!countDownLatch.await(timeout, TimeUnit.SECONDS)) {
           String errorString = String.format(Locale.ROOT, "Timed out waiting for leader replica to recover, collection=%s shard=%s" +
               " on node=%s", coll.getName(), slice.getName(), targetNode);
@@ -193,9 +199,10 @@ public class MoveReplicaCmd implements Cmd{
           log.info("Replica " + watcher.getRecoveredReplica() + " is active - deleting the source...");
         }
       } finally {
-        ocmh.zkStateReader.registerCollectionStateWatcher(coll.getName(), watcher);
+        ocmh.zkStateReader.removeCollectionStateWatcher(coll.getName(), watcher);
       }
     }
+
     ZkNodeProps removeReplicasProps = new ZkNodeProps(
         COLLECTION_PROP, coll.getName(),
         SHARD_ID_PROP, slice.getName(),

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/92998670/solr/core/src/test/org/apache/solr/cloud/MoveReplicaTest.java
----------------------------------------------------------------------
diff --git a/solr/core/src/test/org/apache/solr/cloud/MoveReplicaTest.java b/solr/core/src/test/org/apache/solr/cloud/MoveReplicaTest.java
index 93aaaaa..0a396a4 100644
--- a/solr/core/src/test/org/apache/solr/cloud/MoveReplicaTest.java
+++ b/solr/core/src/test/org/apache/solr/cloud/MoveReplicaTest.java
@@ -97,7 +97,7 @@ public class MoveReplicaTest extends SolrCloudTestCase {
         break;
       }
       assertFalse(rsp.getRequestStatus() == RequestStatusState.FAILED);
-      Thread.sleep(50);
+      Thread.sleep(500);
     }
     assertTrue(success);
     checkNumOfCores(cloudClient, replica.getNodeName(), 0);
@@ -143,12 +143,12 @@ public class MoveReplicaTest extends SolrCloudTestCase {
     recovered = false;
     for (int i = 0; i < 300; i++) {
       DocCollection collState = getCollectionState(coll);
-      List<Replica> replicas = collState.getReplicas(targetNode);
+      List<Replica> replicas = collState.getReplicas(replica.getNodeName());
       boolean allActive = true;
       boolean hasLeaders = true;
       if (replicas != null && !replicas.isEmpty()) {
         for (Replica r : replicas) {
-          if (!r.isActive(Collections.singleton(targetNode))) {
+          if (!r.isActive(Collections.singleton(replica.getNodeName()))) {
             log.info("Not active yet: " + r);
             allActive = false;
           }