You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by ab...@apache.org on 2017/06/19 13:22:13 UTC
lucene-solr:jira/solr-10878: SOLR-10878 Some cleanup.
Repository: lucene-solr
Updated Branches:
refs/heads/jira/solr-10878 0ee6fb9ee -> 6f8039f60
SOLR-10878 Some cleanup.
Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/6f8039f6
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/6f8039f6
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/6f8039f6
Branch: refs/heads/jira/solr-10878
Commit: 6f8039f6096427ded2680fe99b2136cb090dc66e
Parents: 0ee6fb9
Author: Andrzej Bialecki <ab...@apache.org>
Authored: Mon Jun 19 15:21:53 2017 +0200
Committer: Andrzej Bialecki <ab...@apache.org>
Committed: Mon Jun 19 15:21:53 2017 +0200
----------------------------------------------------------------------
.../org/apache/solr/cloud/AddReplicaCmd.java | 2 +-
.../org/apache/solr/cloud/MoveReplicaCmd.java | 10 +++----
.../cloud/OverseerCollectionMessageHandler.java | 28 ++++++++++++--------
.../org/apache/solr/cloud/MoveReplicaTest.java | 23 +++++++++++-----
4 files changed, 39 insertions(+), 24 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/6f8039f6/solr/core/src/java/org/apache/solr/cloud/AddReplicaCmd.java
----------------------------------------------------------------------
diff --git a/solr/core/src/java/org/apache/solr/cloud/AddReplicaCmd.java b/solr/core/src/java/org/apache/solr/cloud/AddReplicaCmd.java
index 63acdd1..c42d073 100644
--- a/solr/core/src/java/org/apache/solr/cloud/AddReplicaCmd.java
+++ b/solr/core/src/java/org/apache/solr/cloud/AddReplicaCmd.java
@@ -68,7 +68,7 @@ public class AddReplicaCmd implements OverseerCollectionMessageHandler.Cmd {
ZkNodeProps addReplica(ClusterState clusterState, ZkNodeProps message, NamedList results, Runnable onComplete)
throws KeeperException, InterruptedException {
- log.info("addReplica() : {}", Utils.toJSONString(message));
+ log.debug("addReplica() : {}", Utils.toJSONString(message));
String collection = message.getStr(COLLECTION_PROP);
String node = message.getStr(CoreAdminParams.NODE);
String shard = message.getStr(SHARD_ID_PROP);
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/6f8039f6/solr/core/src/java/org/apache/solr/cloud/MoveReplicaCmd.java
----------------------------------------------------------------------
diff --git a/solr/core/src/java/org/apache/solr/cloud/MoveReplicaCmd.java b/solr/core/src/java/org/apache/solr/cloud/MoveReplicaCmd.java
index 5b47059..53d05e1 100644
--- a/solr/core/src/java/org/apache/solr/cloud/MoveReplicaCmd.java
+++ b/solr/core/src/java/org/apache/solr/cloud/MoveReplicaCmd.java
@@ -59,7 +59,7 @@ public class MoveReplicaCmd implements Cmd{
}
private void moveReplica(ClusterState clusterState, ZkNodeProps message, NamedList results) throws Exception {
- log.info("moveReplica() : {}", Utils.toJSONString(message));
+ log.debug("moveReplica() : {}", Utils.toJSONString(message));
ocmh.checkRequired(message, COLLECTION_PROP, "targetNode");
String collection = message.getStr(COLLECTION_PROP);
String targetNode = message.getStr("targetNode");
@@ -174,7 +174,7 @@ public class MoveReplicaCmd implements Cmd{
replica.getName(), null, countDownLatch);
ocmh.zkStateReader.registerCollectionStateWatcher(coll.getName(), watcher);
}
- ZkNodeProps newReplica = ocmh.addReplica(clusterState, addReplicasProps, addResult, null);
+ ocmh.addReplica(clusterState, addReplicasProps, addResult, null);
if (addResult.get("failure") != null) {
String errorString = String.format(Locale.ROOT, "Failed to create replica for collection=%s shard=%s" +
" on node=%s", coll.getName(), slice.getName(), targetNode);
@@ -188,15 +188,15 @@ public class MoveReplicaCmd implements Cmd{
// wait for the other replica to be active if the source replica was a leader
if (watcher != null) {
try {
- log.info("==== Waiting for leader replica to recover.");
+ log.debug("Waiting for leader's replica to recover.");
if (!countDownLatch.await(timeout, TimeUnit.SECONDS)) {
- String errorString = String.format(Locale.ROOT, "Timed out waiting for leader replica to recover, collection=%s shard=%s" +
+ String errorString = String.format(Locale.ROOT, "Timed out waiting for leader's replica to recover, collection=%s shard=%s" +
" on node=%s", coll.getName(), slice.getName(), targetNode);
log.warn(errorString);
results.add("failure", errorString);
return;
} else {
- log.info("Replica " + watcher.getRecoveredReplica() + " is active - deleting the source...");
+ log.debug("Replica " + watcher.getRecoveredReplica() + " is active - deleting the source...");
}
} finally {
ocmh.zkStateReader.removeCollectionStateWatcher(coll.getName(), watcher);
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/6f8039f6/solr/core/src/java/org/apache/solr/cloud/OverseerCollectionMessageHandler.java
----------------------------------------------------------------------
diff --git a/solr/core/src/java/org/apache/solr/cloud/OverseerCollectionMessageHandler.java b/solr/core/src/java/org/apache/solr/cloud/OverseerCollectionMessageHandler.java
index 2f9875d..fc17307 100644
--- a/solr/core/src/java/org/apache/solr/cloud/OverseerCollectionMessageHandler.java
+++ b/solr/core/src/java/org/apache/solr/cloud/OverseerCollectionMessageHandler.java
@@ -425,19 +425,25 @@ public class OverseerCollectionMessageHandler implements OverseerMessageHandler
boolean waitForCoreNodeGone(String collectionName, String shard, String replicaName, int timeoutms) throws InterruptedException {
TimeOut timeout = new TimeOut(timeoutms, TimeUnit.MILLISECONDS);
- while (! timeout.hasTimedOut()) {
- Thread.sleep(100);
- DocCollection docCollection = zkStateReader.getClusterState().getCollection(collectionName);
- if (docCollection == null) { // someone already deleted the collection
- return true;
- }
- Slice slice = docCollection.getSlice(shard);
- if(slice == null || slice.getReplica(replicaName) == null) {
- return true;
+ // TODO: remove this workaround for SOLR-9440
+ zkStateReader.registerCore(collectionName);
+ try {
+ while (! timeout.hasTimedOut()) {
+ Thread.sleep(100);
+ DocCollection docCollection = zkStateReader.getClusterState().getCollection(collectionName);
+ if (docCollection == null) { // someone already deleted the collection
+ return true;
+ }
+ Slice slice = docCollection.getSlice(shard);
+ if(slice == null || slice.getReplica(replicaName) == null) {
+ return true;
+ }
}
+ // replica still exists after the timeout
+ return false;
+ } finally {
+ zkStateReader.unregisterCore(collectionName);
}
- // replica still exists after the timeout
- return false;
}
void deleteCoreNode(String collectionName, String replicaName, Replica replica, String core) throws KeeperException, InterruptedException {
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/6f8039f6/solr/core/src/test/org/apache/solr/cloud/MoveReplicaTest.java
----------------------------------------------------------------------
diff --git a/solr/core/src/test/org/apache/solr/cloud/MoveReplicaTest.java b/solr/core/src/test/org/apache/solr/cloud/MoveReplicaTest.java
index 0a396a4..c6beb46 100644
--- a/solr/core/src/test/org/apache/solr/cloud/MoveReplicaTest.java
+++ b/solr/core/src/test/org/apache/solr/cloud/MoveReplicaTest.java
@@ -20,6 +20,7 @@ package org.apache.solr.cloud;
import java.io.IOException;
import java.lang.invoke.MethodHandles;
import java.util.ArrayList;
+import java.util.Collection;
import java.util.Collections;
import java.util.List;
import java.util.Set;
@@ -58,7 +59,7 @@ public class MoveReplicaTest extends SolrCloudTestCase {
cluster.waitForAllNodes(5000);
String coll = "movereplicatest_coll";
log.info("total_jettys: " + cluster.getJettySolrRunners().size());
- int REPLICATION = 1;
+ int REPLICATION = 2;
CloudSolrClient cloudClient = cluster.getSolrClient();
@@ -101,18 +102,22 @@ public class MoveReplicaTest extends SolrCloudTestCase {
}
assertTrue(success);
checkNumOfCores(cloudClient, replica.getNodeName(), 0);
- checkNumOfCores(cloudClient, targetNode, REPLICATION);
+ assertTrue("should be at least one core on target node!", getNumOfCores(cloudClient, targetNode) > 0);
// wait for recovery
boolean recovered = false;
for (int i = 0; i < 300; i++) {
DocCollection collState = getCollectionState(coll);
- List<Replica> replicas = collState.getReplicas(targetNode);
+ log.info("###### " + collState);
+ Collection<Replica> replicas = collState.getSlice(shardId).getReplicas();
boolean allActive = true;
boolean hasLeaders = true;
if (replicas != null && !replicas.isEmpty()) {
for (Replica r : replicas) {
+ if (!r.getNodeName().equals(targetNode)) {
+ continue;
+ }
if (!r.isActive(Collections.singleton(targetNode))) {
- log.info("Not active yet: " + r);
+ log.info("Not active: " + r);
allActive = false;
}
}
@@ -125,6 +130,7 @@ public class MoveReplicaTest extends SolrCloudTestCase {
}
}
if (allActive && hasLeaders) {
+ // check the number of active replicas
assertEquals("total number of replicas", REPLICATION, replicas.size());
recovered = true;
break;
@@ -138,16 +144,19 @@ public class MoveReplicaTest extends SolrCloudTestCase {
moveReplica = new CollectionAdminRequest.MoveReplica(coll, shardId, targetNode, replica.getNodeName());
moveReplica.process(cloudClient);
checkNumOfCores(cloudClient, replica.getNodeName(), 1);
- checkNumOfCores(cloudClient, targetNode, REPLICATION - 1);
// wait for recovery
recovered = false;
for (int i = 0; i < 300; i++) {
DocCollection collState = getCollectionState(coll);
- List<Replica> replicas = collState.getReplicas(replica.getNodeName());
+ log.info("###### " + collState);
+ Collection<Replica> replicas = collState.getSlice(shardId).getReplicas();
boolean allActive = true;
boolean hasLeaders = true;
if (replicas != null && !replicas.isEmpty()) {
for (Replica r : replicas) {
+ if (!r.getNodeName().equals(replica.getNodeName())) {
+ continue;
+ }
if (!r.isActive(Collections.singleton(replica.getNodeName()))) {
log.info("Not active yet: " + r);
allActive = false;
@@ -162,7 +171,7 @@ public class MoveReplicaTest extends SolrCloudTestCase {
}
}
if (allActive && hasLeaders) {
- assertEquals("total number of replicas", 1, replicas.size());
+ assertEquals("total number of replicas", REPLICATION, replicas.size());
recovered = true;
break;
} else {