You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by ma...@apache.org on 2020/09/02 13:35:10 UTC
[lucene-solr] branch reference_impl updated: @702 Looking at rare
fail where the coreNodeName is not found and sent.
This is an automated email from the ASF dual-hosted git repository.
markrmiller pushed a commit to branch reference_impl
in repository https://gitbox.apache.org/repos/asf/lucene-solr.git
The following commit(s) were added to refs/heads/reference_impl by this push:
new ce57d88 @702 Looking at rare fail where the coreNodeName is not found and sent.
ce57d88 is described below
commit ce57d88d4f857a4827b96e2af95f696752efa563
Author: markrmiller@gmail.com <ma...@gmail.com>
AuthorDate: Wed Sep 2 08:33:12 2020 -0500
@702 Looking at rare fail where the coreNodeName is not found and sent.
---
.../cloud/api/collections/CreateCollectionCmd.java | 7 +++---
.../OverseerCollectionMessageHandler.java | 29 ++++++++++++++--------
2 files changed, 23 insertions(+), 13 deletions(-)
diff --git a/solr/core/src/java/org/apache/solr/cloud/api/collections/CreateCollectionCmd.java b/solr/core/src/java/org/apache/solr/cloud/api/collections/CreateCollectionCmd.java
index 793b156..8a4ce7c 100644
--- a/solr/core/src/java/org/apache/solr/cloud/api/collections/CreateCollectionCmd.java
+++ b/solr/core/src/java/org/apache/solr/cloud/api/collections/CreateCollectionCmd.java
@@ -98,11 +98,11 @@ public class CreateCollectionCmd implements OverseerCollectionMessageHandler.Cmd
private final ZkStateReader zkStateReader;
private final SolrCloudManager cloudManager;
- public CreateCollectionCmd(OverseerCollectionMessageHandler ocmh, CoreContainer cc, SolrCloudManager cloudManager, ZkStateReader zkStateReader) {
+ public CreateCollectionCmd(OverseerCollectionMessageHandler ocmh, CoreContainer cc, SolrCloudManager cloudManager) {
this.ocmh = ocmh;
this.stateManager = ocmh.cloudManager.getDistribStateManager();
this.timeSource = ocmh.cloudManager.getTimeSource();
- this.zkStateReader = zkStateReader;
+ this.zkStateReader = ocmh.zkStateReader;
this.cloudManager = cloudManager;
}
@@ -328,8 +328,9 @@ public class CreateCollectionCmd implements OverseerCollectionMessageHandler.Cmd
if(!isLegacyCloud) {
// wait for all replica entries to be created
Map<String,Replica> replicas = new HashMap<>();
+ zkStateReader.waitForState(collectionName, 10, TimeUnit.SECONDS, (n, c) -> c != null && c.getSlices().size() == shardNames.size());
zkStateReader.waitForState(collectionName, 5, TimeUnit.SECONDS, expectedReplicas(coresToCreate.size(), replicas)); // nocommit - timeout - keep this below containing timeouts - need central timeout stuff
- // nocommit, what if replicas comes back wrong?
+ // TODO what if replicas comes back wrong?
if (replicas.size() > 0) {
for (Map.Entry<String, ShardRequest> e : coresToCreate.entrySet()) {
ShardRequest sreq = e.getValue();
diff --git a/solr/core/src/java/org/apache/solr/cloud/api/collections/OverseerCollectionMessageHandler.java b/solr/core/src/java/org/apache/solr/cloud/api/collections/OverseerCollectionMessageHandler.java
index b72f279..44823db 100644
--- a/solr/core/src/java/org/apache/solr/cloud/api/collections/OverseerCollectionMessageHandler.java
+++ b/solr/core/src/java/org/apache/solr/cloud/api/collections/OverseerCollectionMessageHandler.java
@@ -66,6 +66,7 @@ import org.apache.solr.common.util.TimeSource;
import org.apache.solr.common.util.Utils;
import org.apache.solr.handler.component.HttpShardHandlerFactory;
import org.apache.solr.handler.component.ShardHandler;
+import org.apache.solr.handler.component.ShardHandlerFactory;
import org.apache.solr.handler.component.ShardRequest;
import org.apache.solr.handler.component.ShardResponse;
import org.apache.solr.logging.MDCLoggingContext;
@@ -252,7 +253,7 @@ public class OverseerCollectionMessageHandler implements OverseerMessageHandler,
.put(MIGRATESTATEFORMAT, this::migrateStateFormat)
.put(CREATESHARD, new CreateShardCmd(this))
.put(MIGRATE, new MigrateCmd(this))
- .put(CREATE, new CreateCollectionCmd(this, overseer.getCoreContainer(), cloudManager, zkStateReader))
+ .put(CREATE, new CreateCollectionCmd(this, overseer.getCoreContainer(), cloudManager))
.put(MODIFYCOLLECTION, this::modifyCollection)
.put(ADDREPLICAPROP, this::processReplicaAddPropertyCommand)
.put(DELETEREPLICAPROP, this::processReplicaDeletePropertyCommand)
@@ -817,7 +818,7 @@ public class OverseerCollectionMessageHandler implements OverseerMessageHandler,
ClusterState clusterState = zkStateReader.getClusterState();
DocCollection coll = clusterState.getCollection(collectionName);
List<Replica> notLivesReplicas = new ArrayList<>();
- final ShardRequestTracker shardRequestTracker = new ShardRequestTracker(asyncId);
+ final ShardRequestTracker shardRequestTracker = new ShardRequestTracker(asyncId, adminPath, zkStateReader, shardHandlerFactory, overseer);
for (Slice slice : coll.getSlices()) {
notLivesReplicas.addAll(shardRequestTracker.sliceCmd(clusterState, params, stateMatcher, slice, shardHandler));
}
@@ -826,7 +827,7 @@ public class OverseerCollectionMessageHandler implements OverseerMessageHandler,
return notLivesReplicas;
}
- private void processResponse(NamedList<Object> results, ShardResponse srsp, Set<String> okayExceptions) {
+ private static void processResponse(NamedList<Object> results, ShardResponse srsp, Set<String> okayExceptions) {
Throwable e = srsp.getException();
String nodeName = srsp.getNodeName();
SolrResponse solrResponse = srsp.getSolrResponse();
@@ -836,7 +837,7 @@ public class OverseerCollectionMessageHandler implements OverseerMessageHandler,
}
@SuppressWarnings("deprecation")
- private void processResponse(NamedList<Object> results, Throwable e, String nodeName, SolrResponse solrResponse, String shard, Set<String> okayExceptions) {
+ private static void processResponse(NamedList<Object> results, Throwable e, String nodeName, SolrResponse solrResponse, String shard, Set<String> okayExceptions) {
String rootThrowable = null;
if (e instanceof BaseHttpSolrClient.RemoteSolrException) {
rootThrowable = ((BaseHttpSolrClient.RemoteSolrException) e).getRootThrowable();
@@ -870,7 +871,7 @@ public class OverseerCollectionMessageHandler implements OverseerMessageHandler,
success.add(key, value);
}
- private NamedList<Object> waitForCoreAdminAsyncCallToComplete(String nodeName, String requestId) throws KeeperException, InterruptedException {
+ private static NamedList<Object> waitForCoreAdminAsyncCallToComplete(String nodeName, String requestId, String adminPath, ZkStateReader zkStateReader, HttpShardHandlerFactory shardHandlerFactory, Overseer overseer) throws KeeperException, InterruptedException {
ShardHandler shardHandler = shardHandlerFactory.getShardHandler(overseer.getCoreContainer().getUpdateShardHandler().getTheSharedHttpClient());
ModifiableSolrParams params = new ModifiableSolrParams();
params.set(CoreAdminParams.ACTION, CoreAdminAction.REQUESTSTATUS.toString());
@@ -1029,19 +1030,27 @@ public class OverseerCollectionMessageHandler implements OverseerMessageHandler,
static boolean INCLUDE_TOP_LEVEL_RESPONSE = true;
public ShardRequestTracker syncRequestTracker() {
- return new ShardRequestTracker(null);
+ return new ShardRequestTracker(null, adminPath, zkStateReader, shardHandlerFactory, overseer);
}
public ShardRequestTracker asyncRequestTracker(String asyncId) {
- return new ShardRequestTracker(asyncId);
+ return new ShardRequestTracker(asyncId, adminPath, zkStateReader, shardHandlerFactory, overseer);
}
- public class ShardRequestTracker{
+ public static class ShardRequestTracker{
private final String asyncId;
private final NamedList<String> shardAsyncIdByNode = new NamedList<String>();
+ private final String adminPath;
+ private final ZkStateReader zkStateReader;
+ private final HttpShardHandlerFactory shardHandlerFactory;
+ private final Overseer overseer;
- private ShardRequestTracker(String asyncId) {
+ private ShardRequestTracker(String asyncId, String adminPath, ZkStateReader reader, HttpShardHandlerFactory shardHandlerFactory, Overseer overseer) {
this.asyncId = asyncId;
+ this.adminPath = adminPath;
+ this.zkStateReader = reader;
+ this.shardHandlerFactory = shardHandlerFactory;
+ this.overseer = overseer;
}
/**
@@ -1128,7 +1137,7 @@ public class OverseerCollectionMessageHandler implements OverseerMessageHandler,
final String node = nodeToAsync.getKey();
final String shardAsyncId = nodeToAsync.getValue();
log.debug("I am Waiting for :{}/{}", node, shardAsyncId);
- NamedList<Object> reqResult = waitForCoreAdminAsyncCallToComplete(node, shardAsyncId);
+ NamedList<Object> reqResult = waitForCoreAdminAsyncCallToComplete(node, shardAsyncId, adminPath, zkStateReader, shardHandlerFactory, overseer);
if (INCLUDE_TOP_LEVEL_RESPONSE) {
results.add(shardAsyncId, reqResult);
}