You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@solr.apache.org by ho...@apache.org on 2022/10/26 02:36:36 UTC
[solr] branch main updated: SOLR-16416: Register all handlers before doing overseer operations (#1129)
This is an automated email from the ASF dual-hosted git repository.
houston pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/solr.git
The following commit(s) were added to refs/heads/main by this push:
new 1738098b348 SOLR-16416: Register all handlers before doing overseer operations (#1129)
1738098b348 is described below
commit 1738098b348a27b3dd5c4f64e12115de20b74b17
Author: Houston Putman <ho...@apache.org>
AuthorDate: Tue Oct 25 22:36:31 2022 -0400
SOLR-16416: Register all handlers before doing overseer operations (#1129)
Also retry overseerPrioritizer ops on failure.
---
solr/CHANGES.txt | 2 ++
.../apache/solr/cloud/OverseerNodePrioritizer.java | 35 +++++++++++++++++++---
.../java/org/apache/solr/core/CoreContainer.java | 29 ++++++++++--------
3 files changed, 49 insertions(+), 17 deletions(-)
diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt
index d0f86191d9c..4c4631a1879 100644
--- a/solr/CHANGES.txt
+++ b/solr/CHANGES.txt
@@ -264,6 +264,8 @@ Bug Fixes
* SOLR-16412: Race condition could trigger error on concurrent SizeLimitedDistributedMap cleanup (Patson Luk via noble)
+* SOLR-16416: OverseerPrioritizer now runs after all handlers are registered, and retries on failures. (Houston Putman)
+
Other Changes
---------------------
* SOLR-16351: Upgrade Carrot2 to 4.4.3, upgrade randomizedtesting to 2.8.0. (Dawid Weiss)
diff --git a/solr/core/src/java/org/apache/solr/cloud/OverseerNodePrioritizer.java b/solr/core/src/java/org/apache/solr/cloud/OverseerNodePrioritizer.java
index de65877b7de..63dca3f1c61 100644
--- a/solr/core/src/java/org/apache/solr/cloud/OverseerNodePrioritizer.java
+++ b/solr/core/src/java/org/apache/solr/cloud/OverseerNodePrioritizer.java
@@ -21,6 +21,7 @@ import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import org.apache.solr.client.solrj.impl.ZkDistribStateManager;
+import org.apache.solr.common.SolrException;
import org.apache.solr.common.cloud.SolrZkClient;
import org.apache.solr.common.cloud.ZkStateReader;
import org.apache.solr.common.params.CoreAdminParams;
@@ -117,11 +118,13 @@ public class OverseerNodePrioritizer {
}
if (!designateNodeId.equals(electionNodes.get(1))) { // checking if it is already at no:1
log.info("asking node {} to come join election at head", designateNodeId);
- invokeOverseerOp(designateNodeId, "rejoinAtHead"); // ask designate to come first
+ invokeOverseerOpWithRetries(
+ designateNodeId, "rejoinAtHead", 5); // ask designate to come first
if (log.isInfoEnabled()) {
log.info("asking the old first in line {} to rejoin election ", electionNodes.get(1));
}
- invokeOverseerOp(electionNodes.get(1), "rejoin"); // ask second inline to go behind
+ invokeOverseerOpWithRetries(
+ electionNodes.get(1), "rejoin", 5); // ask second inline to go behind
if (log.isInfoEnabled()) {
List<String> newElectionNodes =
OverseerTaskProcessor.getSortedElectionNodes(
@@ -133,6 +136,28 @@ public class OverseerNodePrioritizer {
overseer.sendQuitToOverseer(OverseerTaskProcessor.getLeaderId(zkStateReader.getZkClient()));
}
+ private void invokeOverseerOpWithRetries(String electionNode, String op, int retryCount) {
+ boolean successful = false;
+ for (int i = 0; i < retryCount && !successful; i++) {
+ try {
+ invokeOverseerOp(electionNode, op);
+ successful = true;
+ } catch (SolrException e) {
+ if (i < retryCount - 1) {
+ log.warn("Exception occurred while invoking Overseer Operation '{}'. Retrying.", op, e);
+ try {
+ Thread.sleep(100);
+ } catch (InterruptedException ex) {
+ Thread.currentThread().interrupt();
+ break;
+ }
+ } else {
+ throw e;
+ }
+ }
+ }
+ }
+
private void invokeOverseerOp(String electionNode, String op) {
ModifiableSolrParams params = new ModifiableSolrParams();
ShardHandler shardHandler = shardHandlerFactory.getShardHandler();
@@ -149,8 +174,10 @@ public class OverseerNodePrioritizer {
shardHandler.submit(sreq, replica, sreq.params);
ShardResponse response = shardHandler.takeCompletedOrError();
if (response.getException() != null) {
- log.error(
- "Exception occurred while invoking Overseer Operation: {}", op, response.getException());
+ throw new SolrException(
+ SolrException.ErrorCode.SERVER_ERROR,
+ "Exception occurred while invoking Overseer Operation: " + op,
+ response.getException());
}
}
}
diff --git a/solr/core/src/java/org/apache/solr/core/CoreContainer.java b/solr/core/src/java/org/apache/solr/core/CoreContainer.java
index 7fa16410e93..ec5f4fc3002 100644
--- a/solr/core/src/java/org/apache/solr/core/CoreContainer.java
+++ b/solr/core/src/java/org/apache/solr/core/CoreContainer.java
@@ -1055,19 +1055,6 @@ public class CoreContainer {
clusterSingletons.getSingletons().put(singleton.getName(), singleton);
}
});
-
- clusterSingletons.setReady();
- if (NodeRoles.MODE_PREFERRED.equals(nodeRoles.getRoleMode(NodeRoles.Role.OVERSEER))) {
- try {
- log.info("This node has been started as a preferred overseer");
- zkSys.getZkController().setPreferredOverseer();
- } catch (KeeperException | InterruptedException e) {
- throw new SolrException(ErrorCode.SERVER_ERROR, e);
- }
- }
- if (!distributedCollectionCommandRunner.isPresent()) {
- zkSys.getZkController().checkOverseerDesignate();
- }
}
final CoreContainer thisCCRef = this;
@@ -1085,6 +1072,22 @@ public class CoreContainer {
});
jerseyAppHandler = new ApplicationHandler(containerHandlers.getJerseyEndpoints());
+ // Do Node setup logic after all handlers have been registered.
+ if (isZooKeeperAware()) {
+ clusterSingletons.setReady();
+ if (NodeRoles.MODE_PREFERRED.equals(nodeRoles.getRoleMode(NodeRoles.Role.OVERSEER))) {
+ try {
+ log.info("This node has been started as a preferred overseer");
+ zkSys.getZkController().setPreferredOverseer();
+ } catch (KeeperException | InterruptedException e) {
+ throw new SolrException(ErrorCode.SERVER_ERROR, e);
+ }
+ }
+ if (!distributedCollectionCommandRunner.isPresent()) {
+ zkSys.getZkController().checkOverseerDesignate();
+ }
+ }
+
// This is a bit redundant but these are two distinct concepts for all they're accomplished at
// the same time.
status |= LOAD_COMPLETE | INITIAL_CORE_LOAD_COMPLETE;