You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by ma...@apache.org on 2021/01/24 02:18:19 UTC

[lucene-solr] 12/16: @1288 Only check state in prep recovery and don't try to unload cores that look to have moved.

This is an automated email from the ASF dual-hosted git repository.

markrmiller pushed a commit to branch reference_impl_dev
in repository https://gitbox.apache.org/repos/asf/lucene-solr.git

commit 34cc96692eec95518146dbd4fec5f879c9ee1b7a
Author: markrmiller@gmail.com <ma...@gmail.com>
AuthorDate: Thu Jan 21 23:08:21 2021 -0600

    @1288 Only check state in prep recovery and don't try to unload cores that look to have moved.
---
 .../src/java/org/apache/solr/cloud/CloudUtil.java  |  19 ---
 .../java/org/apache/solr/core/CoreContainer.java   | 183 ++++++++++-----------
 .../apache/solr/handler/admin/PrepRecoveryOp.java  |   2 +-
 3 files changed, 86 insertions(+), 118 deletions(-)

diff --git a/solr/core/src/java/org/apache/solr/cloud/CloudUtil.java b/solr/core/src/java/org/apache/solr/cloud/CloudUtil.java
index a0f6c51..547d7484 100644
--- a/solr/core/src/java/org/apache/solr/cloud/CloudUtil.java
+++ b/solr/core/src/java/org/apache/solr/cloud/CloudUtil.java
@@ -105,25 +105,6 @@ public class CloudUtil {
     }
   }
 
-  public static boolean checkIfValidCloudCore(CoreContainer cc, CoreDescriptor desc) {
-    if (desc.getCloudDescriptor() == null) return false;
-    ZkController zkController = cc.getZkController();
-    String coreName = desc.getName();
-
-    // if we see our core node name on a different base url, unload
-    final DocCollection docCollection = zkController.getClusterState().getCollectionOrNull(desc.getCloudDescriptor().getCollectionName());
-    if (docCollection == null || docCollection.getReplica(coreName) == null) {
-
-      try {
-        cc.unload(desc.getName(), true, true, true);
-      } catch (Exception e) {
-        log.error("unload exception", e);
-      }
-      return false;
-    }
-    return true;
-  }
-
   /**
    * Returns a displayable unified path to the given resource. For non-solrCloud that will be the
    * same as getConfigDir, but for Cloud it will be getConfigSetZkPath ending in a /
diff --git a/solr/core/src/java/org/apache/solr/core/CoreContainer.java b/solr/core/src/java/org/apache/solr/core/CoreContainer.java
index 54248c2..ab33eae 100644
--- a/solr/core/src/java/org/apache/solr/core/CoreContainer.java
+++ b/solr/core/src/java/org/apache/solr/core/CoreContainer.java
@@ -28,7 +28,6 @@ import org.apache.lucene.search.IndexSearcher;
 import org.apache.lucene.store.Directory;
 import org.apache.solr.client.solrj.SolrClient;
 import org.apache.solr.client.solrj.cloud.SolrCloudManager;
-import org.apache.solr.client.solrj.embedded.EmbeddedSolrServer;
 import org.apache.solr.client.solrj.impl.CloudHttp2SolrClient;
 import org.apache.solr.client.solrj.impl.HttpClientUtil;
 import org.apache.solr.client.solrj.impl.SolrHttpClientBuilder;
@@ -36,7 +35,6 @@ import org.apache.solr.client.solrj.impl.SolrHttpClientContextBuilder;
 import org.apache.solr.client.solrj.io.SolrClientCache;
 import org.apache.solr.client.solrj.util.SolrIdentifierValidator;
 import org.apache.solr.cloud.CloudDescriptor;
-import org.apache.solr.cloud.CloudUtil;
 import org.apache.solr.cloud.Overseer;
 import org.apache.solr.cloud.ZkController;
 import org.apache.solr.cloud.overseer.OverseerAction;
@@ -729,31 +727,29 @@ public class CoreContainer implements Closeable {
 
     try {
 
-        solrCores.load(loader);
+      solrCores.load(loader);
 
-        logging = LogWatcher.newRegisteredLogWatcher(cfg.getLogWatcherConfig(), loader);
+      logging = LogWatcher.newRegisteredLogWatcher(cfg.getLogWatcherConfig(), loader);
 
-        hostName = cfg.getNodeName();
+      hostName = cfg.getNodeName();
 
-        collectionsHandler = createHandler(COLLECTIONS_HANDLER_PATH, cfg.getCollectionsHandlerClass(), CollectionsHandler.class);
-        infoHandler = createHandler(INFO_HANDLER_PATH, cfg.getInfoHandlerClass(), InfoHandler.class);
-        coreAdminHandler = createHandler(CORES_HANDLER_PATH, cfg.getCoreAdminHandlerClass(), CoreAdminHandler.class);
-        configSetsHandler = createHandler(CONFIGSETS_HANDLER_PATH, cfg.getConfigSetsHandlerClass(), ConfigSetsHandler.class);
+      collectionsHandler = createHandler(COLLECTIONS_HANDLER_PATH, cfg.getCollectionsHandlerClass(), CollectionsHandler.class);
+      infoHandler = createHandler(INFO_HANDLER_PATH, cfg.getInfoHandlerClass(), InfoHandler.class);
+      coreAdminHandler = createHandler(CORES_HANDLER_PATH, cfg.getCoreAdminHandlerClass(), CoreAdminHandler.class);
+      configSetsHandler = createHandler(CONFIGSETS_HANDLER_PATH, cfg.getConfigSetsHandlerClass(), ConfigSetsHandler.class);
 
-        createHandler(ZK_PATH, ZookeeperInfoHandler.class.getName(), ZookeeperInfoHandler.class);
-        createHandler(ZK_STATUS_PATH, ZookeeperStatusHandler.class.getName(), ZookeeperStatusHandler.class);
+      createHandler(ZK_PATH, ZookeeperInfoHandler.class.getName(), ZookeeperInfoHandler.class);
+      createHandler(ZK_STATUS_PATH, ZookeeperStatusHandler.class.getName(), ZookeeperStatusHandler.class);
 
-
-        if (isZooKeeperAware()) {
-          try {
-            zkSys.start(this);
-          } catch (IOException e) {
-            throw new SolrException(ErrorCode.SERVER_ERROR, e);
-          } catch (KeeperException e) {
-            throw new SolrException(ErrorCode.SERVER_ERROR, e);
-          }
+      if (isZooKeeperAware()) {
+        try {
+          zkSys.start(this);
+        } catch (IOException e) {
+          throw new SolrException(ErrorCode.SERVER_ERROR, e);
+        } catch (KeeperException e) {
+          throw new SolrException(ErrorCode.SERVER_ERROR, e);
         }
-
+      }
 
       try (ParWork work = new ParWork(this, false, true)) {
 
@@ -807,7 +803,6 @@ public class CoreContainer implements Closeable {
 
         work.addCollect();
 
-
         work.collect("", () -> {
           solrClientCache = new SolrClientCache(isZooKeeperAware() ? zkSys.getZkController().getZkStateReader() : null, updateShardHandler.getTheSharedHttpClient());
         });
@@ -833,7 +828,6 @@ public class CoreContainer implements Closeable {
       throw new SolrException(ErrorCode.SERVER_ERROR, "Exception in CoreContainer load", e);
     }
 
-
     if (!containerHandlers.keySet().contains(CORES_HANDLER_PATH)) {
       throw new IllegalStateException("No core admin path was loaded " + CORES_HANDLER_PATH);
     }
@@ -877,98 +871,91 @@ public class CoreContainer implements Closeable {
     }
 
     List<Future<SolrCore>> coreLoadFutures = null;
-    try {
-      List<CoreDescriptor> cds = coresLocator.discover(this);
-      coreLoadFutures = new ArrayList<>(cds.size());
-      if (isZooKeeperAware()) {
-        cds = CoreSorter.sortCores(this, cds);
+
+    List<CoreDescriptor> cds = coresLocator.discover(this);
+    coreLoadFutures = new ArrayList<>(cds.size());
+    if (isZooKeeperAware()) {
+      cds = CoreSorter.sortCores(this, cds);
+    }
+    checkForDuplicateCoreNames(cds);
+    status |= CORE_DISCOVERY_COMPLETE;
+    startedLoadingCores = true;
+    for (final CoreDescriptor cd : cds) {
+//      if (isZooKeeperAware()) {
+//        String collection = cd.getCollectionName();
+//        try {
+//          zkSys.zkController.zkStateReader.waitForState(collection, 5, TimeUnit.SECONDS, (n, c) -> {
+//            if (c != null) {
+//              Replica replica = c.getReplica(cd.getName());
+//
+//              if (replica.getState().equals(State.DOWN)) {
+//                return true;
+//              }
+//
+//            }
+//            return false;
+//          });
+//        } catch (InterruptedException e) {
+//          ParWork.propagateInterrupt(e);
+//        } catch (TimeoutException e) {
+//          log.error("Timeout", e);
+//        }
+//      }
+
+      if (log.isDebugEnabled()) log.debug("Process core descriptor {} {} {}", cd.getName(), cd.isTransient(), cd.isLoadOnStartup());
+      if (cd.isTransient() || !cd.isLoadOnStartup()) {
+        solrCores.addCoreDescriptor(cd);
+      } else {
+        solrCores.markCoreAsLoading(cd);
       }
-      checkForDuplicateCoreNames(cds);
-      status |= CORE_DISCOVERY_COMPLETE;
+      if (cd.isLoadOnStartup()) {
 
-      for (final CoreDescriptor cd : cds) {
-        if (isZooKeeperAware()) {
-          String collection = cd.getCollectionName();
+        coreLoadFutures.add(solrCoreLoadExecutor.submit(() -> {
+          SolrCore core;
+          MDCLoggingContext.setCoreDescriptor(this, cd);
           try {
-            zkSys.zkController.zkStateReader.waitForState(collection, 5, TimeUnit.SECONDS, (n, c) -> {
-              if (c != null) {
-                Replica replica = c.getReplica(cd.getName());
-
-                if (replica.getState().equals(State.DOWN)) {
-                  return true;
-                }
-
-              }
-              return false;
-            });
-          } catch (InterruptedException e) {
-            ParWork.propagateInterrupt(e);
-          } catch (TimeoutException e) {
-            log.error("Timeout", e);
-          }
-        }
-
-        if (log.isDebugEnabled()) log.debug("Process core descriptor {} {} {}", cd.getName(), cd.isTransient(), cd.isLoadOnStartup());
-        if (cd.isTransient() || !cd.isLoadOnStartup()) {
-          solrCores.addCoreDescriptor(cd);
-        } else {
-          solrCores.markCoreAsLoading(cd);
-        }
-        if (cd.isLoadOnStartup()) {
-          if (isZooKeeperAware() && !CloudUtil.checkIfValidCloudCore(this, cd)) {
-            continue;
-          }
-
-          coreLoadFutures.add(solrCoreLoadExecutor.submit(() -> {
-            SolrCore core;
-            MDCLoggingContext.setCoreDescriptor(this, cd);
             try {
-              try {
 
-                core = createFromDescriptor(cd, false);
+              core = createFromDescriptor(cd, false);
 
-                if (core.getDirectoryFactory().isSharedStorage()) {
-                  if (isZooKeeperAware()) {
-                    zkSys.getZkController().throwErrorIfReplicaReplaced(cd);
-                  }
+              if (core.getDirectoryFactory().isSharedStorage()) {
+                if (isZooKeeperAware()) {
+                  zkSys.getZkController().throwErrorIfReplicaReplaced(cd);
                 }
-
-              } finally {
-                solrCores.markCoreAsNotLoading(cd);
-              } if (isZooKeeperAware()) {
-                new ZkController.RegisterCoreAsync(zkSys.zkController, cd, false).call();
               }
+
             } finally {
-              MDCLoggingContext.clear();
+              solrCores.markCoreAsNotLoading(cd);
             }
-            return core;
-          }));
-        }
-      }
-      if (isZooKeeperAware()) {
-
-        ParWork.getRootSharedExecutor().submit(() -> {
-          zkSys.getZkController().createEphemeralLiveNode();
-        });
+            if (isZooKeeperAware()) {
+              new ZkController.RegisterCoreAsync(zkSys.zkController, cd, false).call();
+            }
+          } finally {
+            MDCLoggingContext.clear();
+          }
+          return core;
+        }));
       }
-    } finally {
+    }
+    if (isZooKeeperAware()) {
+      zkSys.getZkController().createEphemeralLiveNode();
+    }
 
-      startedLoadingCores = true;
-      if (coreLoadFutures != null && !asyncSolrCoreLoad) {
-        for (Future<SolrCore> future : coreLoadFutures) {
-          try {
-            future.get();
-          } catch (InterruptedException e) {
-            ParWork.propagateInterrupt(e);
-          } catch (ExecutionException e) {
-            log.error("Error waiting for SolrCore to be loaded on startup", e.getCause());
-          }
+    if (coreLoadFutures != null && !asyncSolrCoreLoad) {
+      for (Future<SolrCore> future : coreLoadFutures) {
+        try {
+          future.get();
+        } catch (InterruptedException e) {
+          ParWork.propagateInterrupt(e);
+        } catch (ExecutionException e) {
+          log.error("Error waiting for SolrCore to be loaded on startup", e.getCause());
         }
       }
     }
+
     if (isZooKeeperAware()) {
 
-     // zkSys.getZkController().checkOverseerDesignate();
+      // zkSys.getZkController().checkOverseerDesignate();
       // initialize this handler here when SolrCloudManager is ready
     }
     // This is a bit redundant but these are two distinct concepts for all they're accomplished at the same time.
diff --git a/solr/core/src/java/org/apache/solr/handler/admin/PrepRecoveryOp.java b/solr/core/src/java/org/apache/solr/handler/admin/PrepRecoveryOp.java
index b463137..ca07d0b 100644
--- a/solr/core/src/java/org/apache/solr/handler/admin/PrepRecoveryOp.java
+++ b/solr/core/src/java/org/apache/solr/handler/admin/PrepRecoveryOp.java
@@ -79,7 +79,7 @@ class PrepRecoveryOp implements CoreAdminHandler.CoreAdminOp {
         boolean isLive = false;
         if (replica != null) {
           isLive = coreContainer.getZkController().getZkStateReader().isNodeLive(replica.getNodeName());
-          if ((replica.getState() == waitForState || replica.getState() == Replica.State.ACTIVE) && isLive) {
+          if (replica.getState() == waitForState) {
             // if (log.isDebugEnabled()) log.debug("replica={} state={} waitForState={}", replica, replica.getState(), waitForState);
             log.info("replica={} state={} waitForState={} isLive={}", replica, replica.getState(), waitForState, coreContainer.getZkController().getZkStateReader().isNodeLive(replica.getNodeName()));
             return true;