You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by ab...@apache.org on 2017/06/19 13:22:13 UTC

lucene-solr:jira/solr-10878: SOLR-10878 Some cleanup.

Repository: lucene-solr
Updated Branches:
  refs/heads/jira/solr-10878 0ee6fb9ee -> 6f8039f60


SOLR-10878 Some cleanup.


Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/6f8039f6
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/6f8039f6
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/6f8039f6

Branch: refs/heads/jira/solr-10878
Commit: 6f8039f6096427ded2680fe99b2136cb090dc66e
Parents: 0ee6fb9
Author: Andrzej Bialecki <ab...@apache.org>
Authored: Mon Jun 19 15:21:53 2017 +0200
Committer: Andrzej Bialecki <ab...@apache.org>
Committed: Mon Jun 19 15:21:53 2017 +0200

----------------------------------------------------------------------
 .../org/apache/solr/cloud/AddReplicaCmd.java    |  2 +-
 .../org/apache/solr/cloud/MoveReplicaCmd.java   | 10 +++----
 .../cloud/OverseerCollectionMessageHandler.java | 28 ++++++++++++--------
 .../org/apache/solr/cloud/MoveReplicaTest.java  | 23 +++++++++++-----
 4 files changed, 39 insertions(+), 24 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/6f8039f6/solr/core/src/java/org/apache/solr/cloud/AddReplicaCmd.java
----------------------------------------------------------------------
diff --git a/solr/core/src/java/org/apache/solr/cloud/AddReplicaCmd.java b/solr/core/src/java/org/apache/solr/cloud/AddReplicaCmd.java
index 63acdd1..c42d073 100644
--- a/solr/core/src/java/org/apache/solr/cloud/AddReplicaCmd.java
+++ b/solr/core/src/java/org/apache/solr/cloud/AddReplicaCmd.java
@@ -68,7 +68,7 @@ public class AddReplicaCmd implements OverseerCollectionMessageHandler.Cmd {
 
   ZkNodeProps addReplica(ClusterState clusterState, ZkNodeProps message, NamedList results, Runnable onComplete)
       throws KeeperException, InterruptedException {
-    log.info("addReplica() : {}", Utils.toJSONString(message));
+    log.debug("addReplica() : {}", Utils.toJSONString(message));
     String collection = message.getStr(COLLECTION_PROP);
     String node = message.getStr(CoreAdminParams.NODE);
     String shard = message.getStr(SHARD_ID_PROP);

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/6f8039f6/solr/core/src/java/org/apache/solr/cloud/MoveReplicaCmd.java
----------------------------------------------------------------------
diff --git a/solr/core/src/java/org/apache/solr/cloud/MoveReplicaCmd.java b/solr/core/src/java/org/apache/solr/cloud/MoveReplicaCmd.java
index 5b47059..53d05e1 100644
--- a/solr/core/src/java/org/apache/solr/cloud/MoveReplicaCmd.java
+++ b/solr/core/src/java/org/apache/solr/cloud/MoveReplicaCmd.java
@@ -59,7 +59,7 @@ public class MoveReplicaCmd implements Cmd{
   }
 
   private void moveReplica(ClusterState clusterState, ZkNodeProps message, NamedList results) throws Exception {
-    log.info("moveReplica() : {}", Utils.toJSONString(message));
+    log.debug("moveReplica() : {}", Utils.toJSONString(message));
     ocmh.checkRequired(message, COLLECTION_PROP, "targetNode");
     String collection = message.getStr(COLLECTION_PROP);
     String targetNode = message.getStr("targetNode");
@@ -174,7 +174,7 @@ public class MoveReplicaCmd implements Cmd{
           replica.getName(), null, countDownLatch);
       ocmh.zkStateReader.registerCollectionStateWatcher(coll.getName(), watcher);
     }
-    ZkNodeProps newReplica = ocmh.addReplica(clusterState, addReplicasProps, addResult, null);
+    ocmh.addReplica(clusterState, addReplicasProps, addResult, null);
     if (addResult.get("failure") != null) {
       String errorString = String.format(Locale.ROOT, "Failed to create replica for collection=%s shard=%s" +
           " on node=%s", coll.getName(), slice.getName(), targetNode);
@@ -188,15 +188,15 @@ public class MoveReplicaCmd implements Cmd{
     // wait for the other replica to be active if the source replica was a leader
     if (watcher != null) {
       try {
-        log.info("==== Waiting for leader replica to recover.");
+        log.debug("Waiting for leader's replica to recover.");
         if (!countDownLatch.await(timeout, TimeUnit.SECONDS)) {
-          String errorString = String.format(Locale.ROOT, "Timed out waiting for leader replica to recover, collection=%s shard=%s" +
+          String errorString = String.format(Locale.ROOT, "Timed out waiting for leader's replica to recover, collection=%s shard=%s" +
               " on node=%s", coll.getName(), slice.getName(), targetNode);
           log.warn(errorString);
           results.add("failure", errorString);
           return;
         } else {
-          log.info("Replica " + watcher.getRecoveredReplica() + " is active - deleting the source...");
+          log.debug("Replica " + watcher.getRecoveredReplica() + " is active - deleting the source...");
         }
       } finally {
         ocmh.zkStateReader.removeCollectionStateWatcher(coll.getName(), watcher);

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/6f8039f6/solr/core/src/java/org/apache/solr/cloud/OverseerCollectionMessageHandler.java
----------------------------------------------------------------------
diff --git a/solr/core/src/java/org/apache/solr/cloud/OverseerCollectionMessageHandler.java b/solr/core/src/java/org/apache/solr/cloud/OverseerCollectionMessageHandler.java
index 2f9875d..fc17307 100644
--- a/solr/core/src/java/org/apache/solr/cloud/OverseerCollectionMessageHandler.java
+++ b/solr/core/src/java/org/apache/solr/cloud/OverseerCollectionMessageHandler.java
@@ -425,19 +425,25 @@ public class OverseerCollectionMessageHandler implements OverseerMessageHandler
 
   boolean waitForCoreNodeGone(String collectionName, String shard, String replicaName, int timeoutms) throws InterruptedException {
     TimeOut timeout = new TimeOut(timeoutms, TimeUnit.MILLISECONDS);
-    while (! timeout.hasTimedOut()) {
-      Thread.sleep(100);
-      DocCollection docCollection = zkStateReader.getClusterState().getCollection(collectionName);
-      if (docCollection == null) { // someone already deleted the collection
-        return true;
-      }
-      Slice slice = docCollection.getSlice(shard);
-      if(slice == null || slice.getReplica(replicaName) == null) {
-        return true;
+    // TODO: remove this workaround for SOLR-9440
+    zkStateReader.registerCore(collectionName);
+    try {
+      while (! timeout.hasTimedOut()) {
+        Thread.sleep(100);
+        DocCollection docCollection = zkStateReader.getClusterState().getCollection(collectionName);
+        if (docCollection == null) { // someone already deleted the collection
+          return true;
+        }
+        Slice slice = docCollection.getSlice(shard);
+        if(slice == null || slice.getReplica(replicaName) == null) {
+          return true;
+        }
       }
+      // replica still exists after the timeout
+      return false;
+    } finally {
+      zkStateReader.unregisterCore(collectionName);
     }
-    // replica still exists after the timeout
-    return false;
   }
 
   void deleteCoreNode(String collectionName, String replicaName, Replica replica, String core) throws KeeperException, InterruptedException {

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/6f8039f6/solr/core/src/test/org/apache/solr/cloud/MoveReplicaTest.java
----------------------------------------------------------------------
diff --git a/solr/core/src/test/org/apache/solr/cloud/MoveReplicaTest.java b/solr/core/src/test/org/apache/solr/cloud/MoveReplicaTest.java
index 0a396a4..c6beb46 100644
--- a/solr/core/src/test/org/apache/solr/cloud/MoveReplicaTest.java
+++ b/solr/core/src/test/org/apache/solr/cloud/MoveReplicaTest.java
@@ -20,6 +20,7 @@ package org.apache.solr.cloud;
 import java.io.IOException;
 import java.lang.invoke.MethodHandles;
 import java.util.ArrayList;
+import java.util.Collection;
 import java.util.Collections;
 import java.util.List;
 import java.util.Set;
@@ -58,7 +59,7 @@ public class MoveReplicaTest extends SolrCloudTestCase {
     cluster.waitForAllNodes(5000);
     String coll = "movereplicatest_coll";
     log.info("total_jettys: " + cluster.getJettySolrRunners().size());
-    int REPLICATION = 1;
+    int REPLICATION = 2;
 
     CloudSolrClient cloudClient = cluster.getSolrClient();
 
@@ -101,18 +102,22 @@ public class MoveReplicaTest extends SolrCloudTestCase {
     }
     assertTrue(success);
     checkNumOfCores(cloudClient, replica.getNodeName(), 0);
-    checkNumOfCores(cloudClient, targetNode, REPLICATION);
+    assertTrue("should be at least one core on target node!", getNumOfCores(cloudClient, targetNode) > 0);
     // wait for recovery
     boolean recovered = false;
     for (int i = 0; i < 300; i++) {
       DocCollection collState = getCollectionState(coll);
-      List<Replica> replicas = collState.getReplicas(targetNode);
+      log.info("###### " + collState);
+      Collection<Replica> replicas = collState.getSlice(shardId).getReplicas();
       boolean allActive = true;
       boolean hasLeaders = true;
       if (replicas != null && !replicas.isEmpty()) {
         for (Replica r : replicas) {
+          if (!r.getNodeName().equals(targetNode)) {
+            continue;
+          }
           if (!r.isActive(Collections.singleton(targetNode))) {
-            log.info("Not active yet: " + r);
+            log.info("Not active: " + r);
             allActive = false;
           }
         }
@@ -125,6 +130,7 @@ public class MoveReplicaTest extends SolrCloudTestCase {
         }
       }
       if (allActive && hasLeaders) {
+        // check the number of active replicas
         assertEquals("total number of replicas", REPLICATION, replicas.size());
         recovered = true;
         break;
@@ -138,16 +144,19 @@ public class MoveReplicaTest extends SolrCloudTestCase {
     moveReplica = new CollectionAdminRequest.MoveReplica(coll, shardId, targetNode, replica.getNodeName());
     moveReplica.process(cloudClient);
     checkNumOfCores(cloudClient, replica.getNodeName(), 1);
-    checkNumOfCores(cloudClient, targetNode, REPLICATION - 1);
     // wait for recovery
     recovered = false;
     for (int i = 0; i < 300; i++) {
       DocCollection collState = getCollectionState(coll);
-      List<Replica> replicas = collState.getReplicas(replica.getNodeName());
+      log.info("###### " + collState);
+      Collection<Replica> replicas = collState.getSlice(shardId).getReplicas();
       boolean allActive = true;
       boolean hasLeaders = true;
       if (replicas != null && !replicas.isEmpty()) {
         for (Replica r : replicas) {
+          if (!r.getNodeName().equals(replica.getNodeName())) {
+            continue;
+          }
           if (!r.isActive(Collections.singleton(replica.getNodeName()))) {
             log.info("Not active yet: " + r);
             allActive = false;
@@ -162,7 +171,7 @@ public class MoveReplicaTest extends SolrCloudTestCase {
         }
       }
       if (allActive && hasLeaders) {
-        assertEquals("total number of replicas", 1, replicas.size());
+        assertEquals("total number of replicas", REPLICATION, replicas.size());
         recovered = true;
         break;
       } else {