You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by sh...@apache.org on 2020/10/23 09:46:17 UTC

[lucene-solr] branch jira/solr-14942 updated: SOLR-14982: Added asserts on registration and de-registration and code comments

This is an automated email from the ASF dual-hosted git repository.

shalin pushed a commit to branch jira/solr-14942
in repository https://gitbox.apache.org/repos/asf/lucene-solr.git


The following commit(s) were added to refs/heads/jira/solr-14942 by this push:
     new ae1dee7  SOLR-14982: Added asserts on registration and de-registration and code comments
ae1dee7 is described below

commit ae1dee771f241cb58f0fb827aadbafb877e3fea8
Author: Shalin Shekhar Mangar <sh...@apache.org>
AuthorDate: Fri Oct 23 15:16:01 2020 +0530

    SOLR-14982: Added asserts on registration and de-registration and code comments
---
 .../src/java/org/apache/solr/core/CoreContainer.java    | 17 +++++++++++++++--
 .../src/java/org/apache/solr/servlet/HttpSolrCall.java  |  9 +++++++++
 .../src/java/org/apache/solr/update/SolrCoreState.java  |  8 ++++----
 3 files changed, 28 insertions(+), 6 deletions(-)

diff --git a/solr/core/src/java/org/apache/solr/core/CoreContainer.java b/solr/core/src/java/org/apache/solr/core/CoreContainer.java
index ff3d2a9..a6ae366 100644
--- a/solr/core/src/java/org/apache/solr/core/CoreContainer.java
+++ b/solr/core/src/java/org/apache/solr/core/CoreContainer.java
@@ -38,7 +38,11 @@ import java.util.Optional;
 import java.util.Properties;
 import java.util.Set;
 import java.util.UUID;
-import java.util.concurrent.*;
+import java.util.concurrent.ConcurrentHashMap;
+import java.util.concurrent.ExecutionException;
+import java.util.concurrent.ExecutorService;
+import java.util.concurrent.Future;
+import java.util.concurrent.TimeoutException;
 import java.util.function.Supplier;
 
 import com.google.common.annotations.VisibleForTesting;
@@ -996,7 +1000,15 @@ public class CoreContainer {
       if (isZooKeeperAware()) {
         cancelCoreRecoveries();
         zkSys.zkController.preClose();
-        // doesn't need to unpause here since we are shutting down
+        /*
+         * Pause updates for all cores on this node and wait for all in-flight update requests to finish.
+         * Here, we (slightly) delay leader election so that in-flight update requests succeed and we can preserve consistency.
+         *
+         * Jetty already allows a grace period for in-flight requests to complete and our solr cores, searchers etc
+         * are reference counted to allow for graceful shutdown. So we don't worry about any other kind of requests.
+         *
+         * We do not need to unpause ever because the node is being shut down.
+         */
         getCores().parallelStream().forEach(solrCore -> {
           SolrCoreState solrCoreState = solrCore.getSolrCoreState();
           try {
@@ -1005,6 +1017,7 @@ public class CoreContainer {
             log.warn("Timed out waiting for in-flight update requests to complete for core: {}", solrCore.getName());
           } catch (InterruptedException e) {
             log.warn("Interrupted while waiting for in-flight update requests to complete for core: {}", solrCore.getName());
+            Thread.currentThread().interrupt();
           }
         });
         zkSys.zkController.tryCancelAllElections();
diff --git a/solr/core/src/java/org/apache/solr/servlet/HttpSolrCall.java b/solr/core/src/java/org/apache/solr/servlet/HttpSolrCall.java
index c5aaf86..6c03059 100644
--- a/solr/core/src/java/org/apache/solr/servlet/HttpSolrCall.java
+++ b/solr/core/src/java/org/apache/solr/servlet/HttpSolrCall.java
@@ -552,6 +552,14 @@ public class HttpSolrCall {
           remoteQuery(coreUrl + path, resp);
           return RETURN;
         case PROCESS:
+          /*
+           We track update requests so that we can preserve consistency by waiting for them to complete
+           on a node shutdown and then immediately trigger a leader election without waiting for the core to close.
+           See how the SolrCoreState#pauseUpdatesAndAwaitInflightRequests() method is used in CoreContainer#shutdown()
+
+           Also see https://issues.apache.org/jira/browse/SOLR-14942 for details on why we do not care for
+           other kinds of requests.
+          */
           if (handler instanceof UpdateRequestHandler && !core.getSolrCoreState().registerInFlightUpdate()) {
             throw new SolrException(ErrorCode.SERVER_ERROR, "Updates are temporarily paused for core: " + core.getName());
           }
@@ -591,6 +599,7 @@ public class HttpSolrCall {
             return RETURN;
           } finally {
             if (handler instanceof UpdateRequestHandler) {
+              // every registered request must also be de-registered
               core.getSolrCoreState().deregisterInFlightUpdate();
             }
           }
diff --git a/solr/core/src/java/org/apache/solr/update/SolrCoreState.java b/solr/core/src/java/org/apache/solr/update/SolrCoreState.java
index 5022596..580f9b4 100644
--- a/solr/core/src/java/org/apache/solr/update/SolrCoreState.java
+++ b/solr/core/src/java/org/apache/solr/update/SolrCoreState.java
@@ -124,9 +124,8 @@ public abstract class SolrCoreState {
   public void pauseUpdatesAndAwaitInflightRequests() throws TimeoutException, InterruptedException {
     if (pauseUpdateRequests.compareAndSet(false, true)) {
       int arrivalNumber = inflightUpdatesCounter.register();
-      if (arrivalNumber != -1) {
-        inflightUpdatesCounter.awaitAdvanceInterruptibly(inflightUpdatesCounter.arrive(), PAUSE_UPDATES_TIMEOUT_MILLIS, TimeUnit.MILLISECONDS);
-      }
+      assert arrivalNumber >= 0 : "Registration of in-flight request should have succeeded but got arrival phase number < 0";
+      inflightUpdatesCounter.awaitAdvanceInterruptibly(inflightUpdatesCounter.arrive(), PAUSE_UPDATES_TIMEOUT_MILLIS, TimeUnit.MILLISECONDS);
     }
   }
 
@@ -147,7 +146,8 @@ public abstract class SolrCoreState {
    * De-registers in-flight update requests to this core (marks them as completed)
    */
   public void deregisterInFlightUpdate() {
-    inflightUpdatesCounter.arriveAndDeregister();
+    int arrivalPhaseNumber = inflightUpdatesCounter.arriveAndDeregister();
+    assert arrivalPhaseNumber >= 0 : "inflightUpdatesCounter should not have been terminated";
   }
 
   public abstract Lock getCommitLock();