You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@solr.apache.org by ho...@apache.org on 2022/10/26 03:11:48 UTC

[solr] branch branch_9_1 updated: SOLR-16416: retry overseerPrioritizer ops on failure. (#1129)

This is an automated email from the ASF dual-hosted git repository.

houston pushed a commit to branch branch_9_1
in repository https://gitbox.apache.org/repos/asf/solr.git


The following commit(s) were added to refs/heads/branch_9_1 by this push:
     new e6365906002 SOLR-16416: retry overseerPrioritizer ops on failure. (#1129)
e6365906002 is described below

commit e636590600206c8cae9f88fabab5f962fe984937
Author: Houston Putman <ho...@apache.org>
AuthorDate: Tue Oct 25 22:36:31 2022 -0400

    SOLR-16416: retry overseerPrioritizer ops on failure. (#1129)
    
    (cherry picked from commit 1738098b348a27b3dd5c4f64e12115de20b74b17)
---
 solr/CHANGES.txt                                   |  2 ++
 .../apache/solr/cloud/OverseerNodePrioritizer.java | 35 +++++++++++++++++++---
 2 files changed, 33 insertions(+), 4 deletions(-)

diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt
index 1bdb0a248b3..f8c6ef82865 100644
--- a/solr/CHANGES.txt
+++ b/solr/CHANGES.txt
@@ -171,6 +171,8 @@ Bug Fixes
 
 * SOLR-16412: Race condition could trigger error on concurrent SizeLimitedDistributedMap cleanup (Patson Luk via noble)
 
+* SOLR-16416: OverseerPrioritizer now runs after all handlers are registered, and retries on failures. (Houston Putman)
+
 Other Changes
 ---------------------
 * SOLR-16351: Upgrade Carrot2 to 4.4.3, upgrade randomizedtesting to 2.8.0. (Dawid Weiss)
diff --git a/solr/core/src/java/org/apache/solr/cloud/OverseerNodePrioritizer.java b/solr/core/src/java/org/apache/solr/cloud/OverseerNodePrioritizer.java
index de65877b7de..63dca3f1c61 100644
--- a/solr/core/src/java/org/apache/solr/cloud/OverseerNodePrioritizer.java
+++ b/solr/core/src/java/org/apache/solr/cloud/OverseerNodePrioritizer.java
@@ -21,6 +21,7 @@ import java.util.ArrayList;
 import java.util.List;
 import java.util.Map;
 import org.apache.solr.client.solrj.impl.ZkDistribStateManager;
+import org.apache.solr.common.SolrException;
 import org.apache.solr.common.cloud.SolrZkClient;
 import org.apache.solr.common.cloud.ZkStateReader;
 import org.apache.solr.common.params.CoreAdminParams;
@@ -117,11 +118,13 @@ public class OverseerNodePrioritizer {
     }
     if (!designateNodeId.equals(electionNodes.get(1))) { // checking if it is already at no:1
       log.info("asking node {} to come join election at head", designateNodeId);
-      invokeOverseerOp(designateNodeId, "rejoinAtHead"); // ask designate to come first
+      invokeOverseerOpWithRetries(
+          designateNodeId, "rejoinAtHead", 5); // ask designate to come first
       if (log.isInfoEnabled()) {
         log.info("asking the old first in line {} to rejoin election  ", electionNodes.get(1));
       }
-      invokeOverseerOp(electionNodes.get(1), "rejoin"); // ask second inline to go behind
+      invokeOverseerOpWithRetries(
+          electionNodes.get(1), "rejoin", 5); // ask second inline to go behind
       if (log.isInfoEnabled()) {
         List<String> newElectionNodes =
             OverseerTaskProcessor.getSortedElectionNodes(
@@ -133,6 +136,28 @@ public class OverseerNodePrioritizer {
     overseer.sendQuitToOverseer(OverseerTaskProcessor.getLeaderId(zkStateReader.getZkClient()));
   }
 
+  private void invokeOverseerOpWithRetries(String electionNode, String op, int retryCount) {
+    boolean successful = false;
+    for (int i = 0; i < retryCount && !successful; i++) {
+      try {
+        invokeOverseerOp(electionNode, op);
+        successful = true;
+      } catch (SolrException e) {
+        if (i < retryCount - 1) {
+          log.warn("Exception occurred while invoking Overseer Operation '{}'. Retrying.", op, e);
+          try {
+            Thread.sleep(100);
+          } catch (InterruptedException ex) {
+            Thread.currentThread().interrupt();
+            break;
+          }
+        } else {
+          throw e;
+        }
+      }
+    }
+  }
+
   private void invokeOverseerOp(String electionNode, String op) {
     ModifiableSolrParams params = new ModifiableSolrParams();
     ShardHandler shardHandler = shardHandlerFactory.getShardHandler();
@@ -149,8 +174,10 @@ public class OverseerNodePrioritizer {
     shardHandler.submit(sreq, replica, sreq.params);
     ShardResponse response = shardHandler.takeCompletedOrError();
     if (response.getException() != null) {
-      log.error(
-          "Exception occurred while invoking Overseer Operation: {}", op, response.getException());
+      throw new SolrException(
+          SolrException.ErrorCode.SERVER_ERROR,
+          "Exception occurred while invoking Overseer Operation: " + op,
+          response.getException());
     }
   }
 }