You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@solr.apache.org by ho...@apache.org on 2022/10/26 03:11:48 UTC
[solr] branch branch_9_1 updated: SOLR-16416: retry overseerPrioritizer ops on failure. (#1129)
This is an automated email from the ASF dual-hosted git repository.
houston pushed a commit to branch branch_9_1
in repository https://gitbox.apache.org/repos/asf/solr.git
The following commit(s) were added to refs/heads/branch_9_1 by this push:
new e6365906002 SOLR-16416: retry overseerPrioritizer ops on failure. (#1129)
e6365906002 is described below
commit e636590600206c8cae9f88fabab5f962fe984937
Author: Houston Putman <ho...@apache.org>
AuthorDate: Tue Oct 25 22:36:31 2022 -0400
SOLR-16416: retry overseerPrioritizer ops on failure. (#1129)
(cherry picked from commit 1738098b348a27b3dd5c4f64e12115de20b74b17)
---
solr/CHANGES.txt | 2 ++
.../apache/solr/cloud/OverseerNodePrioritizer.java | 35 +++++++++++++++++++---
2 files changed, 33 insertions(+), 4 deletions(-)
diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt
index 1bdb0a248b3..f8c6ef82865 100644
--- a/solr/CHANGES.txt
+++ b/solr/CHANGES.txt
@@ -171,6 +171,8 @@ Bug Fixes
* SOLR-16412: Race condition could trigger error on concurrent SizeLimitedDistributedMap cleanup (Patson Luk via noble)
+* SOLR-16416: OverseerPrioritizer now runs after all handlers are registered, and retries on failures. (Houston Putman)
+
Other Changes
---------------------
* SOLR-16351: Upgrade Carrot2 to 4.4.3, upgrade randomizedtesting to 2.8.0. (Dawid Weiss)
diff --git a/solr/core/src/java/org/apache/solr/cloud/OverseerNodePrioritizer.java b/solr/core/src/java/org/apache/solr/cloud/OverseerNodePrioritizer.java
index de65877b7de..63dca3f1c61 100644
--- a/solr/core/src/java/org/apache/solr/cloud/OverseerNodePrioritizer.java
+++ b/solr/core/src/java/org/apache/solr/cloud/OverseerNodePrioritizer.java
@@ -21,6 +21,7 @@ import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import org.apache.solr.client.solrj.impl.ZkDistribStateManager;
+import org.apache.solr.common.SolrException;
import org.apache.solr.common.cloud.SolrZkClient;
import org.apache.solr.common.cloud.ZkStateReader;
import org.apache.solr.common.params.CoreAdminParams;
@@ -117,11 +118,13 @@ public class OverseerNodePrioritizer {
}
if (!designateNodeId.equals(electionNodes.get(1))) { // checking if it is already at no:1
log.info("asking node {} to come join election at head", designateNodeId);
- invokeOverseerOp(designateNodeId, "rejoinAtHead"); // ask designate to come first
+ invokeOverseerOpWithRetries(
+ designateNodeId, "rejoinAtHead", 5); // ask designate to come first
if (log.isInfoEnabled()) {
log.info("asking the old first in line {} to rejoin election ", electionNodes.get(1));
}
- invokeOverseerOp(electionNodes.get(1), "rejoin"); // ask second inline to go behind
+ invokeOverseerOpWithRetries(
+ electionNodes.get(1), "rejoin", 5); // ask second inline to go behind
if (log.isInfoEnabled()) {
List<String> newElectionNodes =
OverseerTaskProcessor.getSortedElectionNodes(
@@ -133,6 +136,28 @@ public class OverseerNodePrioritizer {
overseer.sendQuitToOverseer(OverseerTaskProcessor.getLeaderId(zkStateReader.getZkClient()));
}
+ private void invokeOverseerOpWithRetries(String electionNode, String op, int retryCount) {
+ boolean successful = false;
+ for (int i = 0; i < retryCount && !successful; i++) {
+ try {
+ invokeOverseerOp(electionNode, op);
+ successful = true;
+ } catch (SolrException e) {
+ if (i < retryCount - 1) {
+ log.warn("Exception occurred while invoking Overseer Operation '{}'. Retrying.", op, e);
+ try {
+ Thread.sleep(100);
+ } catch (InterruptedException ex) {
+ Thread.currentThread().interrupt();
+ break;
+ }
+ } else {
+ throw e;
+ }
+ }
+ }
+ }
+
private void invokeOverseerOp(String electionNode, String op) {
ModifiableSolrParams params = new ModifiableSolrParams();
ShardHandler shardHandler = shardHandlerFactory.getShardHandler();
@@ -149,8 +174,10 @@ public class OverseerNodePrioritizer {
shardHandler.submit(sreq, replica, sreq.params);
ShardResponse response = shardHandler.takeCompletedOrError();
if (response.getException() != null) {
- log.error(
- "Exception occurred while invoking Overseer Operation: {}", op, response.getException());
+ throw new SolrException(
+ SolrException.ErrorCode.SERVER_ERROR,
+ "Exception occurred while invoking Overseer Operation: " + op,
+ response.getException());
}
}
}