You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by ma...@apache.org on 2020/09/02 13:35:10 UTC

[lucene-solr] branch reference_impl updated: @702 Looking at rare fail where the coreNodeName is not found and sent.

This is an automated email from the ASF dual-hosted git repository.

markrmiller pushed a commit to branch reference_impl
in repository https://gitbox.apache.org/repos/asf/lucene-solr.git


The following commit(s) were added to refs/heads/reference_impl by this push:
     new ce57d88  @702 Looking at rare fail where the coreNodeName is not found and sent.
ce57d88 is described below

commit ce57d88d4f857a4827b96e2af95f696752efa563
Author: markrmiller@gmail.com <ma...@gmail.com>
AuthorDate: Wed Sep 2 08:33:12 2020 -0500

    @702 Looking at rare fail where the coreNodeName is not found and sent.
---
 .../cloud/api/collections/CreateCollectionCmd.java |  7 +++---
 .../OverseerCollectionMessageHandler.java          | 29 ++++++++++++++--------
 2 files changed, 23 insertions(+), 13 deletions(-)

diff --git a/solr/core/src/java/org/apache/solr/cloud/api/collections/CreateCollectionCmd.java b/solr/core/src/java/org/apache/solr/cloud/api/collections/CreateCollectionCmd.java
index 793b156..8a4ce7c 100644
--- a/solr/core/src/java/org/apache/solr/cloud/api/collections/CreateCollectionCmd.java
+++ b/solr/core/src/java/org/apache/solr/cloud/api/collections/CreateCollectionCmd.java
@@ -98,11 +98,11 @@ public class CreateCollectionCmd implements OverseerCollectionMessageHandler.Cmd
   private final ZkStateReader zkStateReader;
   private final SolrCloudManager cloudManager;
 
-  public CreateCollectionCmd(OverseerCollectionMessageHandler ocmh, CoreContainer cc, SolrCloudManager cloudManager, ZkStateReader zkStateReader) {
+  public CreateCollectionCmd(OverseerCollectionMessageHandler ocmh, CoreContainer cc, SolrCloudManager cloudManager) {
     this.ocmh = ocmh;
     this.stateManager = ocmh.cloudManager.getDistribStateManager();
     this.timeSource = ocmh.cloudManager.getTimeSource();
-    this.zkStateReader = zkStateReader;
+    this.zkStateReader = ocmh.zkStateReader;
     this.cloudManager = cloudManager;
   }
 
@@ -328,8 +328,9 @@ public class CreateCollectionCmd implements OverseerCollectionMessageHandler.Cmd
       if(!isLegacyCloud) {
         // wait for all replica entries to be created
         Map<String,Replica> replicas = new HashMap<>();
+        zkStateReader.waitForState(collectionName, 10, TimeUnit.SECONDS, (n, c) -> c != null && c.getSlices().size() == shardNames.size());
         zkStateReader.waitForState(collectionName, 5, TimeUnit.SECONDS, expectedReplicas(coresToCreate.size(), replicas)); // nocommit - timeout - keep this below containing timeouts - need central timeout stuff
-       // nocommit, what if replicas comes back wrong?
+        // TODO what if replicas comes back wrong?
         if (replicas.size() > 0) {
           for (Map.Entry<String, ShardRequest> e : coresToCreate.entrySet()) {
             ShardRequest sreq = e.getValue();
diff --git a/solr/core/src/java/org/apache/solr/cloud/api/collections/OverseerCollectionMessageHandler.java b/solr/core/src/java/org/apache/solr/cloud/api/collections/OverseerCollectionMessageHandler.java
index b72f279..44823db 100644
--- a/solr/core/src/java/org/apache/solr/cloud/api/collections/OverseerCollectionMessageHandler.java
+++ b/solr/core/src/java/org/apache/solr/cloud/api/collections/OverseerCollectionMessageHandler.java
@@ -66,6 +66,7 @@ import org.apache.solr.common.util.TimeSource;
 import org.apache.solr.common.util.Utils;
 import org.apache.solr.handler.component.HttpShardHandlerFactory;
 import org.apache.solr.handler.component.ShardHandler;
+import org.apache.solr.handler.component.ShardHandlerFactory;
 import org.apache.solr.handler.component.ShardRequest;
 import org.apache.solr.handler.component.ShardResponse;
 import org.apache.solr.logging.MDCLoggingContext;
@@ -252,7 +253,7 @@ public class OverseerCollectionMessageHandler implements OverseerMessageHandler,
         .put(MIGRATESTATEFORMAT, this::migrateStateFormat)
         .put(CREATESHARD, new CreateShardCmd(this))
         .put(MIGRATE, new MigrateCmd(this))
-            .put(CREATE, new CreateCollectionCmd(this, overseer.getCoreContainer(), cloudManager, zkStateReader))
+            .put(CREATE, new CreateCollectionCmd(this, overseer.getCoreContainer(), cloudManager))
         .put(MODIFYCOLLECTION, this::modifyCollection)
         .put(ADDREPLICAPROP, this::processReplicaAddPropertyCommand)
         .put(DELETEREPLICAPROP, this::processReplicaDeletePropertyCommand)
@@ -817,7 +818,7 @@ public class OverseerCollectionMessageHandler implements OverseerMessageHandler,
     ClusterState clusterState = zkStateReader.getClusterState();
     DocCollection coll = clusterState.getCollection(collectionName);
     List<Replica> notLivesReplicas = new ArrayList<>();
-    final ShardRequestTracker shardRequestTracker = new ShardRequestTracker(asyncId);
+    final ShardRequestTracker shardRequestTracker = new ShardRequestTracker(asyncId, adminPath, zkStateReader, shardHandlerFactory, overseer);
     for (Slice slice : coll.getSlices()) {
       notLivesReplicas.addAll(shardRequestTracker.sliceCmd(clusterState, params, stateMatcher, slice, shardHandler));
     }
@@ -826,7 +827,7 @@ public class OverseerCollectionMessageHandler implements OverseerMessageHandler,
     return notLivesReplicas;
   }
 
-  private void processResponse(NamedList<Object> results, ShardResponse srsp, Set<String> okayExceptions) {
+  private static void processResponse(NamedList<Object> results, ShardResponse srsp, Set<String> okayExceptions) {
     Throwable e = srsp.getException();
     String nodeName = srsp.getNodeName();
     SolrResponse solrResponse = srsp.getSolrResponse();
@@ -836,7 +837,7 @@ public class OverseerCollectionMessageHandler implements OverseerMessageHandler,
   }
 
   @SuppressWarnings("deprecation")
-  private void processResponse(NamedList<Object> results, Throwable e, String nodeName, SolrResponse solrResponse, String shard, Set<String> okayExceptions) {
+  private static void processResponse(NamedList<Object> results, Throwable e, String nodeName, SolrResponse solrResponse, String shard, Set<String> okayExceptions) {
     String rootThrowable = null;
     if (e instanceof BaseHttpSolrClient.RemoteSolrException) {
       rootThrowable = ((BaseHttpSolrClient.RemoteSolrException) e).getRootThrowable();
@@ -870,7 +871,7 @@ public class OverseerCollectionMessageHandler implements OverseerMessageHandler,
     success.add(key, value);
   }
 
-  private NamedList<Object> waitForCoreAdminAsyncCallToComplete(String nodeName, String requestId) throws KeeperException, InterruptedException {
+  private static NamedList<Object> waitForCoreAdminAsyncCallToComplete(String nodeName, String requestId, String adminPath, ZkStateReader zkStateReader, HttpShardHandlerFactory shardHandlerFactory, Overseer overseer) throws KeeperException, InterruptedException {
     ShardHandler shardHandler = shardHandlerFactory.getShardHandler(overseer.getCoreContainer().getUpdateShardHandler().getTheSharedHttpClient());
     ModifiableSolrParams params = new ModifiableSolrParams();
     params.set(CoreAdminParams.ACTION, CoreAdminAction.REQUESTSTATUS.toString());
@@ -1029,19 +1030,27 @@ public class OverseerCollectionMessageHandler implements OverseerMessageHandler,
   static boolean INCLUDE_TOP_LEVEL_RESPONSE = true;
 
   public ShardRequestTracker syncRequestTracker() {
-    return new ShardRequestTracker(null);
+    return new ShardRequestTracker(null, adminPath, zkStateReader, shardHandlerFactory, overseer);
   }
 
   public ShardRequestTracker asyncRequestTracker(String asyncId) {
-    return new ShardRequestTracker(asyncId);
+    return new ShardRequestTracker(asyncId, adminPath, zkStateReader, shardHandlerFactory, overseer);
   }
 
-  public class ShardRequestTracker{
+  public static class ShardRequestTracker{
     private final String asyncId;
     private final NamedList<String> shardAsyncIdByNode = new NamedList<String>();
+    private final String adminPath;
+    private final ZkStateReader zkStateReader;
+    private final HttpShardHandlerFactory shardHandlerFactory;
+    private final Overseer overseer;
 
-    private ShardRequestTracker(String asyncId) {
+    private ShardRequestTracker(String asyncId, String adminPath, ZkStateReader reader,  HttpShardHandlerFactory shardHandlerFactory,  Overseer overseer) {
       this.asyncId = asyncId;
+      this.adminPath = adminPath;
+      this.zkStateReader = reader;
+      this.shardHandlerFactory = shardHandlerFactory;
+      this.overseer = overseer;
     }
 
     /**
@@ -1128,7 +1137,7 @@ public class OverseerCollectionMessageHandler implements OverseerMessageHandler,
         final String node = nodeToAsync.getKey();
         final String shardAsyncId = nodeToAsync.getValue();
         log.debug("I am Waiting for :{}/{}", node, shardAsyncId);
-        NamedList<Object> reqResult = waitForCoreAdminAsyncCallToComplete(node, shardAsyncId);
+        NamedList<Object> reqResult = waitForCoreAdminAsyncCallToComplete(node, shardAsyncId, adminPath, zkStateReader, shardHandlerFactory, overseer);
         if (INCLUDE_TOP_LEVEL_RESPONSE) {
           results.add(shardAsyncId, reqResult);
         }