You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by so...@apache.org on 2020/06/03 19:13:30 UTC

[lucene-solr] 01/47: SOLR-14473: Improve Overseer Javadoc (#1510)

This is an automated email from the ASF dual-hosted git repository.

sokolov pushed a commit to branch jira/lucene-8962
in repository https://gitbox.apache.org/repos/asf/lucene-solr.git

commit 3cb8ebbc0b22ab2929a3040da20329426dffb91b
Author: murblanc <43...@users.noreply.github.com>
AuthorDate: Mon May 18 22:21:16 2020 +0200

    SOLR-14473: Improve Overseer Javadoc (#1510)
    
    Co-authored-by: Ilan Ginzburg <ig...@salesforce.com>
---
 .../src/java/org/apache/solr/cloud/Overseer.java   | 64 +++++++++++++++++++++-
 .../org/apache/solr/cloud/ZkDistributedQueue.java  | 11 +++-
 .../solr/common/params/CollectionParams.java       |  9 +++
 3 files changed, 80 insertions(+), 4 deletions(-)

diff --git a/solr/core/src/java/org/apache/solr/cloud/Overseer.java b/solr/core/src/java/org/apache/solr/cloud/Overseer.java
index 9df6a2d..dd01368 100644
--- a/solr/core/src/java/org/apache/solr/cloud/Overseer.java
+++ b/solr/core/src/java/org/apache/solr/cloud/Overseer.java
@@ -38,6 +38,7 @@ import org.apache.solr.client.solrj.impl.CloudSolrClient;
 import org.apache.solr.client.solrj.impl.ClusterStateProvider;
 import org.apache.solr.client.solrj.request.CollectionAdminRequest;
 import org.apache.solr.client.solrj.response.CollectionAdminResponse;
+import org.apache.solr.cloud.api.collections.CreateCollectionCmd;
 import org.apache.solr.cloud.api.collections.OverseerCollectionMessageHandler;
 import org.apache.solr.cloud.autoscaling.OverseerTriggerThread;
 import org.apache.solr.cloud.overseer.ClusterStateMutator;
@@ -79,8 +80,61 @@ import org.slf4j.LoggerFactory;
 import com.codahale.metrics.Timer;
 
 /**
- * Cluster leader. Responsible for processing state updates, node assignments, creating/deleting
- * collections, shards, replicas and setting various properties.
+ * <p>Cluster leader. Responsible for processing state updates, node assignments, creating/deleting
+ * collections, shards, replicas and setting various properties.</p>
+ *
+ * <p>The <b>Overseer</b> is a single elected node in the SolrCloud cluster that is in charge of interactions with
+ * ZooKeeper that require global synchronization. It also hosts the Collection API implementation and the
+ * Autoscaling framework.</p>
+ *
+ * <p>The Overseer deals with:</p>
+ * <ul>
+ *   <li>Cluster State updates, i.e. updating Collections' <code>state.json</code> files in ZooKeeper, see {@link ClusterStateUpdater},</li>
+ *   <li>Collection API implementation, including Autoscaling replica placement computation, see
+ *   {@link OverseerCollectionConfigSetProcessor} and {@link OverseerCollectionMessageHandler} (and the example below),</li>
+ *   <li>Updating Config Sets, see {@link OverseerCollectionConfigSetProcessor} and {@link OverseerConfigSetMessageHandler},</li>
+ *   <li>Autoscaling triggers, see {@link org.apache.solr.cloud.autoscaling.OverseerTriggerThread}.</li>
+ * </ul>
+ *
+ * <p>The nodes in the cluster communicate with the Overseer over queues implemented in ZooKeeper. There are essentially
+ * two queues:</p>
+ * <ol>
+ *   <li>The <b>state update queue</b>, through which nodes request the Overseer to update the <code>state.json</code> file of a
+ *   Collection in ZooKeeper. This queue is in Zookeeper at <code>/overseer/queue</code>,</li>
+ *   <li>A queue shared between <b>Collection API and Config Set API</b> requests. This queue is in Zookeeper at
+ *   <code>/overseer/collection-queue-work</code>.</li>
+ * </ol>
+ *
+ * <p>An example of the steps involved in the Overseer processing a Collection creation API call:</p>
+ * <ol>
+ *   <li>Client uses the Collection API with <code>CREATE</code> action and reaches a node of the cluster,</li>
+ *   <li>The node (via {@link CollectionsHandler}) enqueues the request into the <code>/overseer/collection-queue-work</code>
+ *   queue in ZooKeepeer,</li>
+ *   <li>The {@link OverseerCollectionConfigSetProcessor} running on the Overseer node dequeues the message and using an
+ *   executor service with a maximum pool size of {@link OverseerTaskProcessor#MAX_PARALLEL_TASKS} hands it for processing
+ *   to {@link OverseerCollectionMessageHandler},</li>
+ *   <li>Command {@link CreateCollectionCmd} then executes and does:
+ *   <ol>
+ *     <li>Update some state directly in ZooKeeper (creating collection znode),</li>
+ *     <li>Compute replica placement on available nodes in the cluster,</li>
+ *     <li>Enqueue a state change request for creating the <code>state.json</code> file for the collection in ZooKeeper.
+ *     This is done by enqueuing a message in <code>/overseer/queue</code>,</li>
+ *     <li>The command then waits for the update to be seen in ZooKeeper...</li>
+ *   </ol></li>
+ *   <li>The {@link ClusterStateUpdater} (also running on the Overseer node) dequeues the state change message and creates the
+ *   <code>state.json</code> file in ZooKeeper for the Collection. All the work of the cluster state updater
+ *   (creations, updates, deletes) is done sequentially for the whole cluster by a single thread.</li>
+ *   <li>The {@link CreateCollectionCmd} sees the state change in
+ *   ZooKeeper and:
+ *   <ol start="5">
+ *     <li>Builds and sends requests to each node to create the appropriate cores for all the replicas of all shards
+ *     of the collection. Nodes create the replicas and set them to {@link org.apache.solr.common.cloud.Replica.State#ACTIVE}.</li>
+ *   </ol></li>
+ *   <li>The collection creation command has succeeded from the Overseer perspective,</li>
+ *   <li>{@link CollectionsHandler} checks the replicas in Zookeeper and verifies they are all
+ *   {@link org.apache.solr.common.cloud.Replica.State#ACTIVE},</li>
+ *   <li>The client receives a success return.</li>
+ * </ol>
  */
 public class Overseer implements SolrCloseable {
   public static final String QUEUE_OPERATION = "operation";
@@ -97,6 +151,12 @@ public class Overseer implements SolrCloseable {
 
   enum LeaderStatus {DONT_KNOW, NO, YES}
 
+  /**
+   * <p>This class is responsible for dequeueing state change requests from the ZooKeeper queue at <code>/overseer/queue</code>
+   * and executing the requested cluster change (essentially writing or updating <code>state.json</code> for a collection).</p>
+   *
+   * <p>The cluster state updater is a single thread dequeueing and executing requests.</p>
+   */
   private class ClusterStateUpdater implements Runnable, Closeable {
 
     private final ZkStateReader reader;
diff --git a/solr/core/src/java/org/apache/solr/cloud/ZkDistributedQueue.java b/solr/core/src/java/org/apache/solr/cloud/ZkDistributedQueue.java
index 465888f..53d799b 100644
--- a/solr/core/src/java/org/apache/solr/cloud/ZkDistributedQueue.java
+++ b/solr/core/src/java/org/apache/solr/cloud/ZkDistributedQueue.java
@@ -51,9 +51,16 @@ import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
 /**
- * A ZK-based distributed queue. Optimized for single-consumer,
+ * <p>A ZK-based distributed queue. Optimized for single-consumer,
  * multiple-producer: if there are multiple consumers on the same ZK queue,
- * the results should be correct but inefficient
+ * the results should be correct but inefficient.</p>
+ *
+ * <p>This implementation (with help from subclass {@link OverseerTaskQueue}) is used for the
+ * <code>/overseer/collection-queue-work</code> queue used for Collection and Config Set API calls to the Overseer.</p>
+ *
+ * <p><i>Implementation note:</i> In order to enqueue a message into this queue, a {@link CreateMode#EPHEMERAL_SEQUENTIAL} response node is created
+ * and watched at <code>/overseer/collection-queue-work/qnr-<i>monotonically_increasng_id</i></code>, then a corresponding
+ * {@link CreateMode#PERSISTENT} request node reusing the same id is created at <code>/overseer/collection-queue-work/qn-<i>response_id</i></code>.</p>
  */
 public class ZkDistributedQueue implements DistributedQueue {
   private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
diff --git a/solr/solrj/src/java/org/apache/solr/common/params/CollectionParams.java b/solr/solrj/src/java/org/apache/solr/common/params/CollectionParams.java
index 89f1600..3e8ee37 100644
--- a/solr/solrj/src/java/org/apache/solr/common/params/CollectionParams.java
+++ b/solr/solrj/src/java/org/apache/solr/common/params/CollectionParams.java
@@ -70,6 +70,15 @@ public interface CollectionParams {
     }
   }
 
+  /**
+   * <p>(Mostly) Collection API actions that can be sent by nodes to the Overseer over the <code>/overseer/collection-queue-work</code>
+   * ZooKeeper queue.</p>
+   *
+   * <p>Some of these actions are also used over the cluster state update queue at <code>/overseer/queue</code> and have a
+   * different (though related) meaning there. These actions are:
+   * {@link #CREATE}, {@link #DELETE}, {@link #CREATESHARD}, {@link #DELETESHARD}, {@link #ADDREPLICA}, {@link #ADDREPLICAPROP},
+   * {@link #DELETEREPLICAPROP}, {@link #BALANCESHARDUNIQUE}, {@link #MODIFYCOLLECTION} and {@link #MIGRATESTATEFORMAT}.</p>
+   */
   enum CollectionAction {
     CREATE(true, LockLevel.COLLECTION),
     DELETE(true, LockLevel.COLLECTION),