You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by ma...@apache.org on 2020/11/16 11:24:45 UTC
[lucene-solr] 01/02: @1221 Enable basic move replica test.
This is an automated email from the ASF dual-hosted git repository.
markrmiller pushed a commit to branch reference_impl_dev
in repository https://gitbox.apache.org/repos/asf/lucene-solr.git
commit 6c5f9a27bca5f2e8c6dc4bc6e085bbe696ed0f9c
Author: markrmiller@gmail.com <ma...@gmail.com>
AuthorDate: Sun Nov 15 22:36:46 2020 -0600
@1221 Enable basic move replica test.
---
.../org/apache/solr/cloud/RecoveryStrategy.java | 2 +-
.../solr/cloud/api/collections/MoveReplicaCmd.java | 99 ++++++++++------------
.../java/org/apache/solr/core/CoreContainer.java | 21 +----
.../java/org/apache/solr/handler/IndexFetcher.java | 7 +-
.../java/org/apache/solr/servlet/HttpSolrCall.java | 32 +++----
.../org/apache/solr/cloud/MoveReplicaTest.java | 26 ++----
6 files changed, 77 insertions(+), 110 deletions(-)
diff --git a/solr/core/src/java/org/apache/solr/cloud/RecoveryStrategy.java b/solr/core/src/java/org/apache/solr/cloud/RecoveryStrategy.java
index e203010..6d3ea41 100644
--- a/solr/core/src/java/org/apache/solr/cloud/RecoveryStrategy.java
+++ b/solr/core/src/java/org/apache/solr/cloud/RecoveryStrategy.java
@@ -635,7 +635,7 @@ public class RecoveryStrategy implements Runnable, Closeable {
// though
try {
CloudDescriptor cloudDesc = this.coreDescriptor.getCloudDescriptor();
- final Replica leader = zkStateReader.getLeaderRetry(cloudDesc.getCollectionName(), cloudDesc.getShardId(), 15000);
+ final Replica leader = zkStateReader.getLeaderRetry(cloudDesc.getCollectionName(), cloudDesc.getShardId(), 3000);
log.info("Begin buffering updates. core=[{}]", coreName);
// recalling buffer updates will drop the old buffer tlog
diff --git a/solr/core/src/java/org/apache/solr/cloud/api/collections/MoveReplicaCmd.java b/solr/core/src/java/org/apache/solr/cloud/api/collections/MoveReplicaCmd.java
index a9c5a0b..5455221 100644
--- a/solr/core/src/java/org/apache/solr/cloud/api/collections/MoveReplicaCmd.java
+++ b/solr/core/src/java/org/apache/solr/cloud/api/collections/MoveReplicaCmd.java
@@ -121,7 +121,8 @@ public class MoveReplicaCmd implements OverseerCollectionMessageHandler.Cmd {
Slice slice = coll.getSlice(shardId);
List<Replica> sliceReplicas = new ArrayList<>(slice.getReplicas(r -> sourceNode.equals(r.getNodeName())));
if (sliceReplicas.isEmpty()) {
- throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Collection: " + collection + " node: " + sourceNode + " does not have any replica belonging to shard: " + shardId);
+ throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Collection: " + collection +
+ " node: " + sourceNode + " does not have any replica belonging to shard: " + shardId + " collection=" + coll);
}
Collections.shuffle(sliceReplicas, OverseerCollectionMessageHandler.RANDOM);
replica = sliceReplicas.iterator().next();
@@ -156,24 +157,26 @@ public class MoveReplicaCmd implements OverseerCollectionMessageHandler.Cmd {
assert slice != null;
Object dataDir = replica.get("dataDir");
boolean isSharedFS = replica.getBool(ZkStateReader.SHARED_STORAGE_PROP, false) && dataDir != null;
- OverseerCollectionMessageHandler.Finalize finalizer = null;
+
+ AddReplicaCmd.Response resp = null;
if (isSharedFS && inPlaceMove) {
log.debug("-- moveHdfsReplica");
+ // nocommit TODO
moveHdfsReplica(clusterState, results, dataDir.toString(), targetNode, async, coll, replica, slice, timeout, waitForFinalState);
} else {
log.debug("-- moveNormalReplica (inPlaceMove={}, isSharedFS={}", inPlaceMove, isSharedFS);
- finalizer = moveNormalReplica(clusterState, results, targetNode, async, coll, replica, slice, timeout, waitForFinalState);
+ resp = moveNormalReplica(clusterState, results, targetNode, async, coll, replica, slice, timeout, waitForFinalState);
}
AddReplicaCmd.Response response = new AddReplicaCmd.Response();
- OverseerCollectionMessageHandler.Finalize finalIzer = finalizer;
+ OverseerCollectionMessageHandler.Finalize finalizer = resp.asyncFinalRunner;
response.asyncFinalRunner = new OverseerCollectionMessageHandler.Finalize() {
@Override
public AddReplicaCmd.Response call() {
- if (finalIzer != null) {
+ if (finalizer != null) {
try {
- finalIzer.call();
+ finalizer.call();
} catch (Exception e) {
log.error("Exception during MoveReplica", e);
}
@@ -183,7 +186,7 @@ public class MoveReplicaCmd implements OverseerCollectionMessageHandler.Cmd {
}
};
- response.clusterState = clusterState;
+ response.clusterState = null;
return response;
}
@@ -281,7 +284,7 @@ public class MoveReplicaCmd implements OverseerCollectionMessageHandler.Cmd {
}
@SuppressWarnings({"unchecked"})
- private OverseerCollectionMessageHandler.Finalize moveNormalReplica(ClusterState clusterState, @SuppressWarnings({"rawtypes"}) NamedList results, String targetNode, String async, DocCollection coll,
+ private AddReplicaCmd.Response moveNormalReplica(ClusterState clusterState, @SuppressWarnings({"rawtypes"}) NamedList results, String targetNode, String async, DocCollection coll,
Replica replica, Slice slice, int timeout, boolean waitForFinalState) throws Exception {
String newCoreName = Assign.buildSolrCoreName(coll, coll.getName(), slice.getName(), replica.getType());
ZkNodeProps addReplicasProps = new ZkNodeProps(COLLECTION_PROP, coll.getName(), SHARD_ID_PROP, slice.getName(), CoreAdminParams.NODE, targetNode, CoreAdminParams.NAME, newCoreName,
@@ -293,64 +296,56 @@ public class MoveReplicaCmd implements OverseerCollectionMessageHandler.Cmd {
AddReplicaCmd.Response response = ocmh.addReplicaWithResp(clusterState, addReplicasProps, addResult);
- DocCollection docColl = response.clusterState.getCollectionOrNull(coll.getName());
- Map<String, DocCollection> collectionStates;
- if (docColl != null) {
- collectionStates = new HashMap<>();
- collectionStates.put(docColl.getName(), docColl);
- } else {
- collectionStates = new HashMap<>();
- }
- ClusterState cs = new ClusterState(response.clusterState.getLiveNodes(), collectionStates);
- ocmh.overseer.getZkStateWriter().enqueueUpdate(cs, null,false);
+ ocmh.overseer.getZkStateWriter().enqueueUpdate(response.clusterState, null,false);
ocmh.overseer.writePendingUpdates();
// wait for the other replica to be active if the source replica was a leader
+ AddReplicaCmd.Response finalResponse = new AddReplicaCmd.Response();
+
+ finalResponse.clusterState = response.clusterState;
+ finalResponse.asyncFinalRunner = () -> {
+ log.debug("Waiting for leader's replica to recover.");
- return new OverseerCollectionMessageHandler.Finalize() {
- @Override
- public AddReplicaCmd.Response call() throws Exception {
- log.debug("Waiting for leader's replica to recover.");
+ response.asyncFinalRunner.call();
+
+ if (addResult.get("failure") != null) {
+ String errorString = String
+ .format(Locale.ROOT, "Failed to create replica for collection=%s shard=%s" + " on node=%s, failure=%s", coll.getName(), slice.getName(), targetNode, addResult.get("failure"));
+ log.warn(errorString);
+ results.add("failure", errorString);
+
+ AddReplicaCmd.Response response1 = new AddReplicaCmd.Response();
+ return response1;
+ } else {
- response.asyncFinalRunner.call();
+ AddReplicaCmd.Response response1 = new AddReplicaCmd.Response();
- if (addResult.get("failure") != null) {
- String errorString = String
- .format(Locale.ROOT, "Failed to create replica for collection=%s shard=%s" + " on node=%s, failure=%s", coll.getName(), slice.getName(), targetNode, addResult.get("failure"));
+ ZkNodeProps removeReplicasProps = new ZkNodeProps(COLLECTION_PROP, coll.getName(), SHARD_ID_PROP, slice.getName(), REPLICA_PROP, replica.getName());
+ if (async != null) removeReplicasProps.getProperties().put(ASYNC, async);
+ @SuppressWarnings({"rawtypes"}) NamedList deleteResult = new NamedList();
+ try {
+ response1.clusterState = ocmh.deleteReplica(clusterState, removeReplicasProps, deleteResult).clusterState;
+ } catch (SolrException e) {
+ deleteResult.add("failure", e.toString());
+ }
+ if (deleteResult.get("failure") != null) {
+ String errorString = String.format(Locale.ROOT, "Failed to cleanup replica collection=%s shard=%s name=%s, failure=%s", coll.getName(), slice.getName(), replica.getName(), deleteResult.get("failure"));
log.warn(errorString);
results.add("failure", errorString);
-
- AddReplicaCmd.Response response = new AddReplicaCmd.Response();
- return response;
} else {
-
- AddReplicaCmd.Response response = new AddReplicaCmd.Response();
-
- ZkNodeProps removeReplicasProps = new ZkNodeProps(COLLECTION_PROP, coll.getName(), SHARD_ID_PROP, slice.getName(), REPLICA_PROP, replica.getName());
- if (async != null) removeReplicasProps.getProperties().put(ASYNC, async);
- @SuppressWarnings({"rawtypes"}) NamedList deleteResult = new NamedList();
- try {
- response.clusterState = ocmh.deleteReplica(clusterState, removeReplicasProps, deleteResult).clusterState;
- } catch (SolrException e) {
- deleteResult.add("failure", e.toString());
- }
- if (deleteResult.get("failure") != null) {
- String errorString = String.format(Locale.ROOT, "Failed to cleanup replica collection=%s shard=%s name=%s, failure=%s", coll.getName(), slice.getName(), replica.getName(), deleteResult.get("failure"));
- log.warn(errorString);
- results.add("failure", errorString);
- } else {
- String successString = String
- .format(Locale.ROOT, "MOVEREPLICA action completed successfully, moved replica=%s at node=%s " + "to replica=%s at node=%s", replica.getName(), replica.getNodeName(), newCoreName,
- targetNode);
- results.add("success", successString);
- }
-
- return response;
+ String successString = String
+ .format(Locale.ROOT, "MOVEREPLICA action completed successfully, moved replica=%s at node=%s " + "to replica=%s at node=%s", replica.getName(), replica.getNodeName(), newCoreName,
+ targetNode);
+ results.add("success", successString);
}
+
+ return response1;
}
};
+
+ return finalResponse;
}
}
diff --git a/solr/core/src/java/org/apache/solr/core/CoreContainer.java b/solr/core/src/java/org/apache/solr/core/CoreContainer.java
index 7d74696..4d4c36f 100644
--- a/solr/core/src/java/org/apache/solr/core/CoreContainer.java
+++ b/solr/core/src/java/org/apache/solr/core/CoreContainer.java
@@ -1346,11 +1346,11 @@ public class CoreContainer implements Closeable {
core = processCoreCreateException(e, dcore, coreConfig);
}
+ core.start();
registerCore(dcore, core, isZooKeeperAware(), false);
registered = true;
- core.start();
// always kick off recovery if we are in non-Cloud mode
if (!isZooKeeperAware() && core.getUpdateHandler().getUpdateLog() != null) {
@@ -1782,16 +1782,7 @@ public class CoreContainer implements Closeable {
if (cd == null) {
throw new SolrException(ErrorCode.BAD_REQUEST, "Cannot unload non-existent core [" + name + "]");
}
- if (cd != null) {
- SolrCore.deleteUnloadedCore(cd, deleteDataDir, deleteInstanceDir);
- solrCores.removeCoreDescriptor(cd);
- coresLocator.delete(this, cd);
- if (core == null) {
- // transient core
- SolrCore.deleteUnloadedCore(cd, deleteDataDir, deleteInstanceDir);
- return;
- }
- }
+
} finally {
if (isZooKeeperAware()) {
// cancel recovery in cloud mode
@@ -1838,14 +1829,6 @@ public class CoreContainer implements Closeable {
} catch (TimeoutException e) {
log.error("Timeout waiting for SolrCore close on unload", e);
throw new SolrException(ErrorCode.SERVER_ERROR, "Timeout waiting for SolrCore close on unload", e);
- } finally {
- if (deleteInstanceDir && cd != null) {
- try {
- FileUtils.deleteDirectory(cd.getInstanceDir().toFile());
- } catch (IOException e) {
- SolrException.log(log, "Failed to delete instance dir for core:" + cd.getName() + " dir:" + cd.getInstanceDir());
- }
- }
}
}
diff --git a/solr/core/src/java/org/apache/solr/handler/IndexFetcher.java b/solr/core/src/java/org/apache/solr/handler/IndexFetcher.java
index c2fa9f6..bbe8dff 100644
--- a/solr/core/src/java/org/apache/solr/handler/IndexFetcher.java
+++ b/solr/core/src/java/org/apache/solr/handler/IndexFetcher.java
@@ -731,7 +731,12 @@ public class IndexFetcher {
}
try {
if (indexDir != null) {
- core.getDirectoryFactory().release(indexDir);
+ try {
+ core.getDirectoryFactory().release(indexDir);
+ } catch (IllegalArgumentException e) {
+ if (log.isDebugEnabled()) log.debug("Error realing directory in IndexFetcher", e);
+ // could already be removed
+ }
}
} catch (Exception e) {
SolrException.log(log, e);
diff --git a/solr/core/src/java/org/apache/solr/servlet/HttpSolrCall.java b/solr/core/src/java/org/apache/solr/servlet/HttpSolrCall.java
index cfa4d7b..589695a 100644
--- a/solr/core/src/java/org/apache/solr/servlet/HttpSolrCall.java
+++ b/solr/core/src/java/org/apache/solr/servlet/HttpSolrCall.java
@@ -296,7 +296,7 @@ public class HttpSolrCall {
} else {
// if we couldn't find it locally, look on other nodes
if (idx > 0) {
- extractRemotePath(collectionName, origCorename);
+ extractRemotePath(null, origCorename);
if (action == REMOTEQUERY) {
path = path.substring(idx);
return;
@@ -1040,7 +1040,9 @@ public class HttpSolrCall {
return result;
}
- protected SolrCore getCoreByCollection(String collectionName, boolean isPreferLeader) {
+ protected SolrCore getCoreByCollection(String collectionName, boolean isPreferLeader) throws TimeoutException, InterruptedException {
+ ensureStatesAreAtLeastAtClient();
+
ZkStateReader zkStateReader = cores.getZkController().getZkStateReader();
ClusterState clusterState = zkStateReader.getClusterState();
@@ -1107,12 +1109,11 @@ public class HttpSolrCall {
final DocCollection docCollection = clusterState.getCollectionOrNull(collectionName, false);
Collection<Slice> slices = (docCollection != null) ? docCollection.getActiveSlices() : null;
List<Slice> activeSlices = new ArrayList<>();
- boolean byCoreName = false;
int totalReplicas = 0;
if (slices == null) {
- byCoreName = true;
+
activeSlices = new ArrayList<>();
getSlicesForCollections(clusterState, activeSlices, true);
if (activeSlices.isEmpty()) {
@@ -1135,8 +1136,8 @@ public class HttpSolrCall {
collectionsList = new ArrayList<>(collectionsList);
collectionsList.add(collectionName);
}
- String coreUrl = getCoreUrl(collectionName, origCorename, clusterState,
- activeSlices, byCoreName, true);
+ String coreUrl = getCoreUrl(origCorename,
+ activeSlices, true);
// Avoid getting into a recursive loop of requests being forwarded by
// stopping forwarding and erroring out after (totalReplicas) forwards
@@ -1145,16 +1146,14 @@ public class HttpSolrCall {
throw new SolrException(SolrException.ErrorCode.INVALID_STATE,
"No active replicas found for collection: " + collectionName);
}
- coreUrl = getCoreUrl(collectionName, origCorename, clusterState,
- activeSlices, byCoreName, false);
+ coreUrl = getCoreUrl(origCorename,
+ activeSlices, false);
}
return coreUrl;
}
- private String getCoreUrl(String collectionName,
- String origCorename, ClusterState clusterState, List<Slice> slices,
- boolean byCoreName, boolean activeReplicas) {
+ private String getCoreUrl(String origCorename, List<Slice> slices, boolean activeReplicas) {
String coreUrl;
Collections.shuffle(slices, random);
@@ -1167,7 +1166,7 @@ public class HttpSolrCall {
if (!activeReplicas || (cores.getZkController().zkStateReader.getLiveNodes().contains(replica.getNodeName())
&& replica.getState() == Replica.State.ACTIVE)) {
- if (byCoreName && (origCorename == null || !origCorename.equals(replica.getStr(CORE_NAME_PROP)))) {
+ if (!origCorename.equals(replica.getStr(CORE_NAME_PROP))) {
// if it's by core name, make sure they match
continue;
}
@@ -1176,14 +1175,7 @@ public class HttpSolrCall {
continue;
}
- if (origCorename != null) {
- coreUrl = replica.getBaseUrl() + "/" + origCorename;
- } else {
- coreUrl = replica.getCoreUrl();
- if (coreUrl.endsWith("/")) {
- coreUrl = coreUrl.substring(0, coreUrl.length() - 1);
- }
- }
+ coreUrl = replica.getCoreUrl();
return coreUrl;
}
diff --git a/solr/core/src/test/org/apache/solr/cloud/MoveReplicaTest.java b/solr/core/src/test/org/apache/solr/cloud/MoveReplicaTest.java
index f2d0d73..f197d8c 100644
--- a/solr/core/src/test/org/apache/solr/cloud/MoveReplicaTest.java
+++ b/solr/core/src/test/org/apache/solr/cloud/MoveReplicaTest.java
@@ -53,7 +53,6 @@ import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@LuceneTestCase.SuppressCodecs({"MockRandom", "Direct", "SimpleText"})
-@Ignore // nocommit
public class MoveReplicaTest extends SolrCloudTestCase {
private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
@@ -119,9 +118,6 @@ public class MoveReplicaTest extends SolrCloudTestCase {
create.setMaxShardsPerNode(2);
cloudClient.request(create);
- // wait for recovery
- cluster.waitForActiveCollection(coll, create.getNumShards(), create.getNumShards() * create.getTotaleReplicaCount());
-
addDocs(coll, 100);
Replica replica = getRandomReplica(coll, cloudClient);
@@ -153,20 +149,17 @@ public class MoveReplicaTest extends SolrCloudTestCase {
CollectionAdminRequest.RequestStatus requestStatus = CollectionAdminRequest.requestStatus(asyncId);
// wait for async request success
boolean success = false;
- for (int i = 0; i < 600; i++) {
+ for (int i = 0; i < 200; i++) {
CollectionAdminRequest.RequestStatusResponse rsp = requestStatus.process(cloudClient);
if (rsp.getRequestStatus() == RequestStatusState.COMPLETED) {
success = true;
break;
}
assertNotSame(rsp.getRequestStatus(), RequestStatusState.FAILED);
- Thread.sleep(250);
+ Thread.sleep(50);
}
assertTrue(success);
- // wait for recovery
- cluster.waitForActiveCollection(coll, create.getNumShards(), create.getNumShards() * (create.getNumNrtReplicas() + create.getNumPullReplicas() + create.getNumTlogReplicas()));
-
assertEquals(100, cluster.getSolrClient().query(coll, new SolrQuery("*:*")).getResults().getNumFound());
// assertEquals("should be one less core on the source node!", sourceNumCores - 1, getNumOfCores(cloudClient, replica.getNodeName(), coll, replica.getType().name()));
@@ -183,14 +176,13 @@ public class MoveReplicaTest extends SolrCloudTestCase {
}
assertNotNull(targetNode);
- // nocommit I think above get node logic is flakey Collection: MoveReplicaTest_coll_true node: 127.0.0.1:35129_solr does not have any replica belonging to shard: s1
-// moveReplica = createMoveReplicaRequest(coll, replica, targetNode, shardId);
-// moveReplica.setInPlaceMove(inPlaceMove);
-// moveReplica.process(cloudClient);
-//
-// assertEquals(100, cluster.getSolrClient().query(coll, new SolrQuery("*:*")).getResults().getNumFound());
-//
-// checkNumOfCores(cloudClient, replica.getNodeName(), coll, sourceNumCores);
+ moveReplica = createMoveReplicaRequest(coll, replica, targetNode, shardId);
+ moveReplica.setInPlaceMove(inPlaceMove);
+ moveReplica.process(cloudClient);
+
+ assertEquals(100, cluster.getSolrClient().query(coll, new SolrQuery("*:*")).getResults().getNumFound());
+
+ checkNumOfCores(cloudClient, replica.getNodeName(), coll, sourceNumCores);
}
//Commented out 5-Dec-2017