You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by ma...@apache.org on 2021/01/24 02:18:19 UTC
[lucene-solr] 12/16: @1288 Only check state in prep recovery and
don't try to unload cores that look to have moved.
This is an automated email from the ASF dual-hosted git repository.
markrmiller pushed a commit to branch reference_impl_dev
in repository https://gitbox.apache.org/repos/asf/lucene-solr.git
commit 34cc96692eec95518146dbd4fec5f879c9ee1b7a
Author: markrmiller@gmail.com <ma...@gmail.com>
AuthorDate: Thu Jan 21 23:08:21 2021 -0600
@1288 Only check state in prep recovery and don't try to unload cores that look to have moved.
---
.../src/java/org/apache/solr/cloud/CloudUtil.java | 19 ---
.../java/org/apache/solr/core/CoreContainer.java | 183 ++++++++++-----------
.../apache/solr/handler/admin/PrepRecoveryOp.java | 2 +-
3 files changed, 86 insertions(+), 118 deletions(-)
diff --git a/solr/core/src/java/org/apache/solr/cloud/CloudUtil.java b/solr/core/src/java/org/apache/solr/cloud/CloudUtil.java
index a0f6c51..547d7484 100644
--- a/solr/core/src/java/org/apache/solr/cloud/CloudUtil.java
+++ b/solr/core/src/java/org/apache/solr/cloud/CloudUtil.java
@@ -105,25 +105,6 @@ public class CloudUtil {
}
}
- public static boolean checkIfValidCloudCore(CoreContainer cc, CoreDescriptor desc) {
- if (desc.getCloudDescriptor() == null) return false;
- ZkController zkController = cc.getZkController();
- String coreName = desc.getName();
-
- // if we see our core node name on a different base url, unload
- final DocCollection docCollection = zkController.getClusterState().getCollectionOrNull(desc.getCloudDescriptor().getCollectionName());
- if (docCollection == null || docCollection.getReplica(coreName) == null) {
-
- try {
- cc.unload(desc.getName(), true, true, true);
- } catch (Exception e) {
- log.error("unload exception", e);
- }
- return false;
- }
- return true;
- }
-
/**
* Returns a displayable unified path to the given resource. For non-solrCloud that will be the
* same as getConfigDir, but for Cloud it will be getConfigSetZkPath ending in a /
diff --git a/solr/core/src/java/org/apache/solr/core/CoreContainer.java b/solr/core/src/java/org/apache/solr/core/CoreContainer.java
index 54248c2..ab33eae 100644
--- a/solr/core/src/java/org/apache/solr/core/CoreContainer.java
+++ b/solr/core/src/java/org/apache/solr/core/CoreContainer.java
@@ -28,7 +28,6 @@ import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.store.Directory;
import org.apache.solr.client.solrj.SolrClient;
import org.apache.solr.client.solrj.cloud.SolrCloudManager;
-import org.apache.solr.client.solrj.embedded.EmbeddedSolrServer;
import org.apache.solr.client.solrj.impl.CloudHttp2SolrClient;
import org.apache.solr.client.solrj.impl.HttpClientUtil;
import org.apache.solr.client.solrj.impl.SolrHttpClientBuilder;
@@ -36,7 +35,6 @@ import org.apache.solr.client.solrj.impl.SolrHttpClientContextBuilder;
import org.apache.solr.client.solrj.io.SolrClientCache;
import org.apache.solr.client.solrj.util.SolrIdentifierValidator;
import org.apache.solr.cloud.CloudDescriptor;
-import org.apache.solr.cloud.CloudUtil;
import org.apache.solr.cloud.Overseer;
import org.apache.solr.cloud.ZkController;
import org.apache.solr.cloud.overseer.OverseerAction;
@@ -729,31 +727,29 @@ public class CoreContainer implements Closeable {
try {
- solrCores.load(loader);
+ solrCores.load(loader);
- logging = LogWatcher.newRegisteredLogWatcher(cfg.getLogWatcherConfig(), loader);
+ logging = LogWatcher.newRegisteredLogWatcher(cfg.getLogWatcherConfig(), loader);
- hostName = cfg.getNodeName();
+ hostName = cfg.getNodeName();
- collectionsHandler = createHandler(COLLECTIONS_HANDLER_PATH, cfg.getCollectionsHandlerClass(), CollectionsHandler.class);
- infoHandler = createHandler(INFO_HANDLER_PATH, cfg.getInfoHandlerClass(), InfoHandler.class);
- coreAdminHandler = createHandler(CORES_HANDLER_PATH, cfg.getCoreAdminHandlerClass(), CoreAdminHandler.class);
- configSetsHandler = createHandler(CONFIGSETS_HANDLER_PATH, cfg.getConfigSetsHandlerClass(), ConfigSetsHandler.class);
+ collectionsHandler = createHandler(COLLECTIONS_HANDLER_PATH, cfg.getCollectionsHandlerClass(), CollectionsHandler.class);
+ infoHandler = createHandler(INFO_HANDLER_PATH, cfg.getInfoHandlerClass(), InfoHandler.class);
+ coreAdminHandler = createHandler(CORES_HANDLER_PATH, cfg.getCoreAdminHandlerClass(), CoreAdminHandler.class);
+ configSetsHandler = createHandler(CONFIGSETS_HANDLER_PATH, cfg.getConfigSetsHandlerClass(), ConfigSetsHandler.class);
- createHandler(ZK_PATH, ZookeeperInfoHandler.class.getName(), ZookeeperInfoHandler.class);
- createHandler(ZK_STATUS_PATH, ZookeeperStatusHandler.class.getName(), ZookeeperStatusHandler.class);
+ createHandler(ZK_PATH, ZookeeperInfoHandler.class.getName(), ZookeeperInfoHandler.class);
+ createHandler(ZK_STATUS_PATH, ZookeeperStatusHandler.class.getName(), ZookeeperStatusHandler.class);
-
- if (isZooKeeperAware()) {
- try {
- zkSys.start(this);
- } catch (IOException e) {
- throw new SolrException(ErrorCode.SERVER_ERROR, e);
- } catch (KeeperException e) {
- throw new SolrException(ErrorCode.SERVER_ERROR, e);
- }
+ if (isZooKeeperAware()) {
+ try {
+ zkSys.start(this);
+ } catch (IOException e) {
+ throw new SolrException(ErrorCode.SERVER_ERROR, e);
+ } catch (KeeperException e) {
+ throw new SolrException(ErrorCode.SERVER_ERROR, e);
}
-
+ }
try (ParWork work = new ParWork(this, false, true)) {
@@ -807,7 +803,6 @@ public class CoreContainer implements Closeable {
work.addCollect();
-
work.collect("", () -> {
solrClientCache = new SolrClientCache(isZooKeeperAware() ? zkSys.getZkController().getZkStateReader() : null, updateShardHandler.getTheSharedHttpClient());
});
@@ -833,7 +828,6 @@ public class CoreContainer implements Closeable {
throw new SolrException(ErrorCode.SERVER_ERROR, "Exception in CoreContainer load", e);
}
-
if (!containerHandlers.keySet().contains(CORES_HANDLER_PATH)) {
throw new IllegalStateException("No core admin path was loaded " + CORES_HANDLER_PATH);
}
@@ -877,98 +871,91 @@ public class CoreContainer implements Closeable {
}
List<Future<SolrCore>> coreLoadFutures = null;
- try {
- List<CoreDescriptor> cds = coresLocator.discover(this);
- coreLoadFutures = new ArrayList<>(cds.size());
- if (isZooKeeperAware()) {
- cds = CoreSorter.sortCores(this, cds);
+
+ List<CoreDescriptor> cds = coresLocator.discover(this);
+ coreLoadFutures = new ArrayList<>(cds.size());
+ if (isZooKeeperAware()) {
+ cds = CoreSorter.sortCores(this, cds);
+ }
+ checkForDuplicateCoreNames(cds);
+ status |= CORE_DISCOVERY_COMPLETE;
+ startedLoadingCores = true;
+ for (final CoreDescriptor cd : cds) {
+// if (isZooKeeperAware()) {
+// String collection = cd.getCollectionName();
+// try {
+// zkSys.zkController.zkStateReader.waitForState(collection, 5, TimeUnit.SECONDS, (n, c) -> {
+// if (c != null) {
+// Replica replica = c.getReplica(cd.getName());
+//
+// if (replica.getState().equals(State.DOWN)) {
+// return true;
+// }
+//
+// }
+// return false;
+// });
+// } catch (InterruptedException e) {
+// ParWork.propagateInterrupt(e);
+// } catch (TimeoutException e) {
+// log.error("Timeout", e);
+// }
+// }
+
+ if (log.isDebugEnabled()) log.debug("Process core descriptor {} {} {}", cd.getName(), cd.isTransient(), cd.isLoadOnStartup());
+ if (cd.isTransient() || !cd.isLoadOnStartup()) {
+ solrCores.addCoreDescriptor(cd);
+ } else {
+ solrCores.markCoreAsLoading(cd);
}
- checkForDuplicateCoreNames(cds);
- status |= CORE_DISCOVERY_COMPLETE;
+ if (cd.isLoadOnStartup()) {
- for (final CoreDescriptor cd : cds) {
- if (isZooKeeperAware()) {
- String collection = cd.getCollectionName();
+ coreLoadFutures.add(solrCoreLoadExecutor.submit(() -> {
+ SolrCore core;
+ MDCLoggingContext.setCoreDescriptor(this, cd);
try {
- zkSys.zkController.zkStateReader.waitForState(collection, 5, TimeUnit.SECONDS, (n, c) -> {
- if (c != null) {
- Replica replica = c.getReplica(cd.getName());
-
- if (replica.getState().equals(State.DOWN)) {
- return true;
- }
-
- }
- return false;
- });
- } catch (InterruptedException e) {
- ParWork.propagateInterrupt(e);
- } catch (TimeoutException e) {
- log.error("Timeout", e);
- }
- }
-
- if (log.isDebugEnabled()) log.debug("Process core descriptor {} {} {}", cd.getName(), cd.isTransient(), cd.isLoadOnStartup());
- if (cd.isTransient() || !cd.isLoadOnStartup()) {
- solrCores.addCoreDescriptor(cd);
- } else {
- solrCores.markCoreAsLoading(cd);
- }
- if (cd.isLoadOnStartup()) {
- if (isZooKeeperAware() && !CloudUtil.checkIfValidCloudCore(this, cd)) {
- continue;
- }
-
- coreLoadFutures.add(solrCoreLoadExecutor.submit(() -> {
- SolrCore core;
- MDCLoggingContext.setCoreDescriptor(this, cd);
try {
- try {
- core = createFromDescriptor(cd, false);
+ core = createFromDescriptor(cd, false);
- if (core.getDirectoryFactory().isSharedStorage()) {
- if (isZooKeeperAware()) {
- zkSys.getZkController().throwErrorIfReplicaReplaced(cd);
- }
+ if (core.getDirectoryFactory().isSharedStorage()) {
+ if (isZooKeeperAware()) {
+ zkSys.getZkController().throwErrorIfReplicaReplaced(cd);
}
-
- } finally {
- solrCores.markCoreAsNotLoading(cd);
- } if (isZooKeeperAware()) {
- new ZkController.RegisterCoreAsync(zkSys.zkController, cd, false).call();
}
+
} finally {
- MDCLoggingContext.clear();
+ solrCores.markCoreAsNotLoading(cd);
}
- return core;
- }));
- }
- }
- if (isZooKeeperAware()) {
-
- ParWork.getRootSharedExecutor().submit(() -> {
- zkSys.getZkController().createEphemeralLiveNode();
- });
+ if (isZooKeeperAware()) {
+ new ZkController.RegisterCoreAsync(zkSys.zkController, cd, false).call();
+ }
+ } finally {
+ MDCLoggingContext.clear();
+ }
+ return core;
+ }));
}
- } finally {
+ }
+ if (isZooKeeperAware()) {
+ zkSys.getZkController().createEphemeralLiveNode();
+ }
- startedLoadingCores = true;
- if (coreLoadFutures != null && !asyncSolrCoreLoad) {
- for (Future<SolrCore> future : coreLoadFutures) {
- try {
- future.get();
- } catch (InterruptedException e) {
- ParWork.propagateInterrupt(e);
- } catch (ExecutionException e) {
- log.error("Error waiting for SolrCore to be loaded on startup", e.getCause());
- }
+ if (coreLoadFutures != null && !asyncSolrCoreLoad) {
+ for (Future<SolrCore> future : coreLoadFutures) {
+ try {
+ future.get();
+ } catch (InterruptedException e) {
+ ParWork.propagateInterrupt(e);
+ } catch (ExecutionException e) {
+ log.error("Error waiting for SolrCore to be loaded on startup", e.getCause());
}
}
}
+
if (isZooKeeperAware()) {
- // zkSys.getZkController().checkOverseerDesignate();
+ // zkSys.getZkController().checkOverseerDesignate();
// initialize this handler here when SolrCloudManager is ready
}
// This is a bit redundant but these are two distinct concepts for all they're accomplished at the same time.
diff --git a/solr/core/src/java/org/apache/solr/handler/admin/PrepRecoveryOp.java b/solr/core/src/java/org/apache/solr/handler/admin/PrepRecoveryOp.java
index b463137..ca07d0b 100644
--- a/solr/core/src/java/org/apache/solr/handler/admin/PrepRecoveryOp.java
+++ b/solr/core/src/java/org/apache/solr/handler/admin/PrepRecoveryOp.java
@@ -79,7 +79,7 @@ class PrepRecoveryOp implements CoreAdminHandler.CoreAdminOp {
boolean isLive = false;
if (replica != null) {
isLive = coreContainer.getZkController().getZkStateReader().isNodeLive(replica.getNodeName());
- if ((replica.getState() == waitForState || replica.getState() == Replica.State.ACTIVE) && isLive) {
+ if (replica.getState() == waitForState) {
// if (log.isDebugEnabled()) log.debug("replica={} state={} waitForState={}", replica, replica.getState(), waitForState);
log.info("replica={} state={} waitForState={} isLive={}", replica, replica.getState(), waitForState, coreContainer.getZkController().getZkStateReader().isNodeLive(replica.getNodeName()));
return true;