You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@solr.apache.org by ds...@apache.org on 2024/03/15 02:06:25 UTC

(solr) branch branch_9x updated (7eb38d5e7dd -> 8c91cd064a1)

This is an automated email from the ASF dual-hosted git repository.

dsmiley pushed a change to branch branch_9x
in repository https://gitbox.apache.org/repos/asf/solr.git


    from 7eb38d5e7dd SOLR-599: Better thread clean-up in unit tests (#2259)
     new 4098d6ae3e2 SOLR-16403: ClusterSingleton to remove inactive Shards (#1926)
     new 8c91cd064a1 Tests: avoid NPE in JettySolrRunner lifeCycleStopped (#2337)

The 2 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.


Summary of changes:
 solr/CHANGES.txt                                   |   2 +
 .../cluster/maintenance/InactiveShardRemover.java  | 219 ++++++++++++++++++++
 .../maintenance/InactiveShardRemoverConfig.java    |  60 ++++++
 .../solr/cluster/maintenance}/package-info.java    |   4 +-
 .../org/apache/solr/cloud/DeleteShardTest.java     |  52 +----
 .../maintenance/InactiveShardRemoverTest.java      | 225 +++++++++++++++++++++
 .../pages/cluster-singleton-plugins.adoc           |  93 +++++++++
 .../java/org/apache/solr/cloud/ShardTestUtil.java  |  57 ++++++
 .../org/apache/solr/embedded/JettySolrRunner.java  |   4 +-
 9 files changed, 672 insertions(+), 44 deletions(-)
 create mode 100644 solr/core/src/java/org/apache/solr/cluster/maintenance/InactiveShardRemover.java
 create mode 100644 solr/core/src/java/org/apache/solr/cluster/maintenance/InactiveShardRemoverConfig.java
 copy solr/{api/src/java/org/apache/solr/client/api => core/src/java/org/apache/solr/cluster/maintenance}/package-info.java (85%)
 create mode 100644 solr/core/src/test/org/apache/solr/cluster/maintenance/InactiveShardRemoverTest.java
 create mode 100644 solr/solr-ref-guide/modules/configuration-guide/pages/cluster-singleton-plugins.adoc
 create mode 100644 solr/test-framework/src/java/org/apache/solr/cloud/ShardTestUtil.java


(solr) 02/02: Tests: avoid NPE in JettySolrRunner lifeCycleStopped (#2337)

Posted by ds...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

dsmiley pushed a commit to branch branch_9x
in repository https://gitbox.apache.org/repos/asf/solr.git

commit 8c91cd064a15af4ab4e19d2503e45c9db1d0f442
Author: David Smiley <ds...@apache.org>
AuthorDate: Thu Mar 14 16:38:33 2024 -0400

    Tests: avoid NPE in JettySolrRunner lifeCycleStopped (#2337)
    
    (cherry picked from commit 6697e8aa07acf9f4b6baae5fc3a747c21d3e07e8)
---
 .../src/java/org/apache/solr/embedded/JettySolrRunner.java            | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/solr/test-framework/src/java/org/apache/solr/embedded/JettySolrRunner.java b/solr/test-framework/src/java/org/apache/solr/embedded/JettySolrRunner.java
index d3fc76afff3..7edb5bc7fdc 100644
--- a/solr/test-framework/src/java/org/apache/solr/embedded/JettySolrRunner.java
+++ b/solr/test-framework/src/java/org/apache/solr/embedded/JettySolrRunner.java
@@ -384,7 +384,9 @@ public class JettySolrRunner {
 
             @Override
             public synchronized void lifeCycleStopped(LifeCycle arg0) {
-              coreContainerProvider.close();
+              if (coreContainerProvider != null) {
+                coreContainerProvider.close();
+              }
             }
 
             @Override


(solr) 01/02: SOLR-16403: ClusterSingleton to remove inactive Shards (#1926)

Posted by ds...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

dsmiley pushed a commit to branch branch_9x
in repository https://gitbox.apache.org/repos/asf/solr.git

commit 4098d6ae3e298efdfa219d5a2629d69f60c5aff9
Author: pjmcarthur <92...@users.noreply.github.com>
AuthorDate: Thu Mar 14 12:49:48 2024 -0700

    SOLR-16403: ClusterSingleton to remove inactive Shards (#1926)
    
    ClusterSingleton that periodically removes state=INACTIVE shards.  These occur from shard splits.
    
    Co-authored-by: Paul McArthur <pm...@proton.me>
    (cherry picked from commit ca58f1aa90b351b69b0ec4184adbaaca03978573)
---
 solr/CHANGES.txt                                   |   2 +
 .../cluster/maintenance/InactiveShardRemover.java  | 219 ++++++++++++++++++++
 .../maintenance/InactiveShardRemoverConfig.java    |  60 ++++++
 .../solr/cluster/maintenance/package-info.java     |  19 ++
 .../org/apache/solr/cloud/DeleteShardTest.java     |  52 +----
 .../maintenance/InactiveShardRemoverTest.java      | 225 +++++++++++++++++++++
 .../pages/cluster-singleton-plugins.adoc           |  93 +++++++++
 .../java/org/apache/solr/cloud/ShardTestUtil.java  |  57 ++++++
 8 files changed, 686 insertions(+), 41 deletions(-)

diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt
index 0a034818332..0cbbc4ea70e 100644
--- a/solr/CHANGES.txt
+++ b/solr/CHANGES.txt
@@ -11,6 +11,8 @@ New Features
 
 * SOLR-599: Add a new SolrJ client using the JDK’s built-in Http Client.  (James Dyer)
 
+* SOLR-16403: A new cluster singleton plugin to automatically remove inactive shards. (Paul McArthur, David Smiley)
+
 Improvements
 ---------------------
 * SOLR-17119: When registering or updating a ConfigurablePlugin through the `/cluster/plugin` API,
diff --git a/solr/core/src/java/org/apache/solr/cluster/maintenance/InactiveShardRemover.java b/solr/core/src/java/org/apache/solr/cluster/maintenance/InactiveShardRemover.java
new file mode 100644
index 00000000000..177663d1140
--- /dev/null
+++ b/solr/core/src/java/org/apache/solr/cluster/maintenance/InactiveShardRemover.java
@@ -0,0 +1,219 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.solr.cluster.maintenance;
+
+import com.google.common.annotations.VisibleForTesting;
+import java.lang.invoke.MethodHandles;
+import java.util.Collection;
+import java.util.HashSet;
+import java.util.Objects;
+import java.util.concurrent.Executors;
+import java.util.concurrent.ScheduledExecutorService;
+import java.util.concurrent.TimeUnit;
+import java.util.stream.Collectors;
+import org.apache.solr.api.ConfigurablePlugin;
+import org.apache.solr.client.solrj.SolrResponse;
+import org.apache.solr.client.solrj.request.CollectionAdminRequest;
+import org.apache.solr.cloud.ClusterSingleton;
+import org.apache.solr.common.cloud.ClusterState;
+import org.apache.solr.common.cloud.DocCollection;
+import org.apache.solr.common.cloud.Slice;
+import org.apache.solr.common.cloud.ZkStateReader;
+import org.apache.solr.common.util.ExecutorUtil;
+import org.apache.solr.common.util.SolrNamedThreadFactory;
+import org.apache.solr.core.CoreContainer;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * This Cluster Singleton can be configured to periodically find and remove {@link
+ * org.apache.solr.common.cloud.Slice.State#INACTIVE} Shards that are left behind after a Shard is
+ * split
+ */
+public class InactiveShardRemover
+    implements ClusterSingleton, ConfigurablePlugin<InactiveShardRemoverConfig> {
+
+  private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
+
+  public static final String PLUGIN_NAME = ".inactive-shard-remover";
+
+  static class DeleteActor {
+
+    private final CoreContainer coreContainer;
+
+    DeleteActor(final CoreContainer coreContainer) {
+      this.coreContainer = coreContainer;
+    }
+
+    void delete(final Slice slice) {
+      CollectionAdminRequest.DeleteShard deleteRequest =
+          CollectionAdminRequest.deleteShard(slice.getCollection(), slice.getName());
+      try {
+        SolrResponse response =
+            coreContainer.getZkController().getSolrCloudManager().request(deleteRequest);
+        if (response.getException() != null) {
+          throw response.getException();
+        }
+      } catch (Exception e) {
+        log.warn("An exception occurred when deleting an inactive shard", e);
+      }
+    }
+  }
+
+  private State state = State.STOPPED;
+
+  private final CoreContainer coreContainer;
+
+  private final DeleteActor deleteActor;
+
+  private ScheduledExecutorService executor;
+
+  private long scheduleIntervalSeconds;
+
+  private long ttlSeconds;
+
+  private int maxDeletesPerCycle;
+
+  /** Constructor invoked via Reflection */
+  public InactiveShardRemover(final CoreContainer cc) {
+    this(cc, new DeleteActor(cc));
+  }
+
+  public InactiveShardRemover(final CoreContainer cc, final DeleteActor deleteActor) {
+    this.coreContainer = cc;
+    this.deleteActor = deleteActor;
+  }
+
+  @Override
+  public void configure(final InactiveShardRemoverConfig cfg) {
+    Objects.requireNonNull(cfg, "config must be specified");
+    cfg.validate();
+    this.scheduleIntervalSeconds = cfg.scheduleIntervalSeconds;
+    this.maxDeletesPerCycle = cfg.maxDeletesPerCycle;
+    this.ttlSeconds = cfg.ttlSeconds;
+  }
+
+  @Override
+  public String getName() {
+    return PLUGIN_NAME;
+  }
+
+  @Override
+  public State getState() {
+    return state;
+  }
+
+  @Override
+  public void start() throws Exception {
+    state = State.STARTING;
+    executor = Executors.newScheduledThreadPool(1, new SolrNamedThreadFactory(PLUGIN_NAME));
+    executor.scheduleAtFixedRate(
+        this::deleteInactiveSlices,
+        scheduleIntervalSeconds,
+        scheduleIntervalSeconds,
+        TimeUnit.SECONDS);
+    state = State.RUNNING;
+  }
+
+  @Override
+  public void stop() {
+    if (state == State.RUNNING) {
+      state = State.STOPPING;
+      ExecutorUtil.shutdownNowAndAwaitTermination(executor);
+    }
+    state = State.STOPPED;
+  }
+
+  @VisibleForTesting
+  void deleteInactiveSlices() {
+    final ClusterState clusterState = coreContainer.getZkController().getClusterState();
+    Collection<Slice> inactiveSlices =
+        clusterState.getCollectionsMap().values().stream()
+            .flatMap(v -> collectInactiveSlices(v).stream())
+            .collect(Collectors.toSet());
+
+    if (log.isInfoEnabled()) {
+      log.info(
+          "Found {} inactive Shards to delete, {} will be deleted",
+          inactiveSlices.size(),
+          Math.min(inactiveSlices.size(), maxDeletesPerCycle));
+    }
+
+    inactiveSlices.stream().limit(maxDeletesPerCycle).forEach(this::deleteShard);
+  }
+
+  private Collection<Slice> collectInactiveSlices(final DocCollection docCollection) {
+    final Collection<Slice> slices = new HashSet<>(docCollection.getSlices());
+    slices.removeAll(docCollection.getActiveSlices());
+    return slices.stream().filter(this::isExpired).collect(Collectors.toSet());
+  }
+
+  private void deleteShard(final Slice s) {
+    deleteActor.delete(s);
+  }
+
+  /**
+   * An Inactive Shard is expired if it has not undergone a state change in the period of time
+   * defined by {@link InactiveShardRemover#ttlSeconds}. If it is expired, it is eligible for
+   * removal.
+   */
+  private boolean isExpired(final Slice slice) {
+
+    final String collectionName = slice.getCollection();
+    final String sliceName = slice.getName();
+
+    if (slice.getState() != Slice.State.INACTIVE) {
+      return false;
+    }
+
+    final String lastChangeTimestamp = slice.getStr(ZkStateReader.STATE_TIMESTAMP_PROP);
+    if (lastChangeTimestamp == null || lastChangeTimestamp.isEmpty()) {
+      log.warn(
+          "Collection {} Shard {} has no last change timestamp and will not be deleted",
+          collectionName,
+          sliceName);
+      return false;
+    }
+
+    final long epochTimestampNs;
+    try {
+      epochTimestampNs = Long.parseLong(lastChangeTimestamp);
+    } catch (NumberFormatException e) {
+      log.warn(
+          "Collection {} Shard {} has an invalid last change timestamp and will not be deleted",
+          collectionName,
+          sliceName);
+      return false;
+    }
+
+    long currentEpochTimeNs =
+        coreContainer.getZkController().getSolrCloudManager().getTimeSource().getEpochTimeNs();
+    long delta = TimeUnit.NANOSECONDS.toSeconds(currentEpochTimeNs - epochTimestampNs);
+
+    boolean expired = delta >= ttlSeconds;
+    if (log.isDebugEnabled()) {
+      log.debug(
+          "collection {} shard {} last state change {} seconds ago. Expired={}",
+          slice.getCollection(),
+          slice.getName(),
+          delta,
+          expired);
+    }
+    return expired;
+  }
+}
diff --git a/solr/core/src/java/org/apache/solr/cluster/maintenance/InactiveShardRemoverConfig.java b/solr/core/src/java/org/apache/solr/cluster/maintenance/InactiveShardRemoverConfig.java
new file mode 100644
index 00000000000..22465e82359
--- /dev/null
+++ b/solr/core/src/java/org/apache/solr/cluster/maintenance/InactiveShardRemoverConfig.java
@@ -0,0 +1,60 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.solr.cluster.maintenance;
+
+import org.apache.solr.common.SolrException;
+import org.apache.solr.common.annotation.JsonProperty;
+import org.apache.solr.common.util.ReflectMapWriter;
+
+public class InactiveShardRemoverConfig implements ReflectMapWriter {
+
+  public static final long DEFAULT_SCHEDULE_INTERVAL_SECONDS = 900L; // 15 minutes
+
+  public static final long DEFAULT_TTL_SECONDS = 900L; // 15 minutes
+
+  public static final int DEFAULT_MAX_DELETES_PER_CYCLE = 20;
+
+  @JsonProperty public long scheduleIntervalSeconds;
+
+  @JsonProperty public long ttlSeconds;
+
+  @JsonProperty public int maxDeletesPerCycle;
+
+  /** Default constructor required for deserialization */
+  public InactiveShardRemoverConfig() {
+    this(DEFAULT_SCHEDULE_INTERVAL_SECONDS, DEFAULT_TTL_SECONDS, DEFAULT_MAX_DELETES_PER_CYCLE);
+  }
+
+  public InactiveShardRemoverConfig(
+      final long scheduleIntervalSeconds, final long ttlSeconds, final int maxDeletesPerCycle) {
+    this.scheduleIntervalSeconds = scheduleIntervalSeconds;
+    this.ttlSeconds = ttlSeconds;
+    this.maxDeletesPerCycle = maxDeletesPerCycle;
+  }
+
+  public void validate() {
+    if (scheduleIntervalSeconds <= 0) {
+      throw new SolrException(
+          SolrException.ErrorCode.BAD_REQUEST, "scheduleIntervalSeconds must be greater than 0");
+    }
+    if (maxDeletesPerCycle <= 0) {
+      throw new SolrException(
+          SolrException.ErrorCode.BAD_REQUEST, "maxDeletesPerCycle must be greater than 0");
+    }
+  }
+}
diff --git a/solr/core/src/java/org/apache/solr/cluster/maintenance/package-info.java b/solr/core/src/java/org/apache/solr/cluster/maintenance/package-info.java
new file mode 100644
index 00000000000..3001cb775c3
--- /dev/null
+++ b/solr/core/src/java/org/apache/solr/cluster/maintenance/package-info.java
@@ -0,0 +1,19 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/** Cluster Singleton plugins that are used to perform maintenance tasks within the cluster. */
+package org.apache.solr.cluster.maintenance;
diff --git a/solr/core/src/test/org/apache/solr/cloud/DeleteShardTest.java b/solr/core/src/test/org/apache/solr/cloud/DeleteShardTest.java
index 63bd18ea46d..91ab353be1b 100644
--- a/solr/core/src/test/org/apache/solr/cloud/DeleteShardTest.java
+++ b/solr/core/src/test/org/apache/solr/cloud/DeleteShardTest.java
@@ -18,17 +18,12 @@ package org.apache.solr.cloud;
 
 import java.io.IOException;
 import org.apache.solr.client.solrj.SolrServerException;
-import org.apache.solr.client.solrj.cloud.DistributedQueue;
 import org.apache.solr.client.solrj.request.CollectionAdminRequest;
 import org.apache.solr.client.solrj.request.CoreStatus;
-import org.apache.solr.cloud.overseer.OverseerAction;
-import org.apache.solr.common.MapWriter;
 import org.apache.solr.common.cloud.DocCollection;
 import org.apache.solr.common.cloud.Replica;
 import org.apache.solr.common.cloud.Slice;
 import org.apache.solr.common.cloud.Slice.State;
-import org.apache.solr.common.cloud.ZkNodeProps;
-import org.apache.solr.common.cloud.ZkStateReader;
 import org.apache.solr.util.FileUtils;
 import org.junit.After;
 import org.junit.Before;
@@ -68,7 +63,7 @@ public class DeleteShardTest extends SolrCloudTestCase {
             CollectionAdminRequest.deleteShard(collection, "shard1")
                 .process(cluster.getSolrClient()));
 
-    setSliceState(collection, "shard1", Slice.State.INACTIVE);
+    ShardTestUtil.setSliceState(cluster, collection, "shard1", Slice.State.INACTIVE);
 
     // Can delete an INACTIVE shard
     CollectionAdminRequest.deleteShard(collection, "shard1").process(cluster.getSolrClient());
@@ -76,46 +71,12 @@ public class DeleteShardTest extends SolrCloudTestCase {
         "Expected 'shard1' to be removed", collection, (n, c) -> c.getSlice("shard1") == null);
 
     // Can delete a shard under construction
-    setSliceState(collection, "shard2", Slice.State.CONSTRUCTION);
+    ShardTestUtil.setSliceState(cluster, collection, "shard2", Slice.State.CONSTRUCTION);
     CollectionAdminRequest.deleteShard(collection, "shard2").process(cluster.getSolrClient());
     waitForState(
         "Expected 'shard2' to be removed", collection, (n, c) -> c.getSlice("shard2") == null);
   }
 
-  protected void setSliceState(String collection, String slice, State state) throws Exception {
-
-    // TODO can this be encapsulated better somewhere?
-    MapWriter m =
-        ew ->
-            ew.put(Overseer.QUEUE_OPERATION, OverseerAction.UPDATESHARDSTATE.toLower())
-                .put(slice, state.toString())
-                .put(ZkStateReader.COLLECTION_PROP, collection);
-    final Overseer overseer = cluster.getOpenOverseer();
-    if (overseer.getDistributedClusterStateUpdater().isDistributedStateUpdate()) {
-      overseer
-          .getDistributedClusterStateUpdater()
-          .doSingleStateUpdate(
-              DistributedClusterStateUpdater.MutatingCommand.SliceUpdateShardState,
-              new ZkNodeProps(m),
-              cluster.getOpenOverseer().getSolrCloudManager(),
-              cluster.getOpenOverseer().getZkStateReader());
-    } else {
-      DistributedQueue inQueue =
-          cluster
-              .getJettySolrRunner(0)
-              .getCoreContainer()
-              .getZkController()
-              .getOverseer()
-              .getStateUpdateQueue();
-      inQueue.offer(m);
-    }
-
-    waitForState(
-        "Expected shard " + slice + " to be in state " + state,
-        collection,
-        (n, c) -> c.getSlice(slice).getState() == state);
-  }
-
   @Test
   public void testDirectoryCleanupAfterDeleteShard() throws IOException, SolrServerException {
 
@@ -162,4 +123,13 @@ public class DeleteShardTest extends SolrCloudTestCase {
         "Instance directory still exists", FileUtils.fileExists(coreStatus.getInstanceDirectory()));
     assertTrue("Data directory still exists", FileUtils.fileExists(coreStatus.getDataDirectory()));
   }
+
+  private void setSliceState(String collectionName, String shardId, Slice.State state)
+      throws Exception {
+    ShardTestUtil.setSliceState(cluster, collectionName, shardId, state);
+    waitForState(
+        "Expected shard " + shardId + " to be in state " + state,
+        collectionName,
+        (n, c) -> c.getSlice(shardId).getState() == state);
+  }
 }
diff --git a/solr/core/src/test/org/apache/solr/cluster/maintenance/InactiveShardRemoverTest.java b/solr/core/src/test/org/apache/solr/cluster/maintenance/InactiveShardRemoverTest.java
new file mode 100644
index 00000000000..fb8f2b95715
--- /dev/null
+++ b/solr/core/src/test/org/apache/solr/cluster/maintenance/InactiveShardRemoverTest.java
@@ -0,0 +1,225 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.solr.cluster.maintenance;
+
+import static org.apache.solr.client.solrj.SolrRequest.METHOD.POST;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.Map;
+import java.util.concurrent.TimeUnit;
+import org.apache.solr.client.solrj.SolrServerException;
+import org.apache.solr.client.solrj.request.CollectionAdminRequest;
+import org.apache.solr.client.solrj.request.V2Request;
+import org.apache.solr.client.solrj.request.beans.PluginMeta;
+import org.apache.solr.client.solrj.response.V2Response;
+import org.apache.solr.cloud.ShardTestUtil;
+import org.apache.solr.cloud.SolrCloudTestCase;
+import org.apache.solr.common.SolrException;
+import org.apache.solr.common.cloud.DocCollection;
+import org.apache.solr.common.cloud.Slice;
+import org.apache.solr.common.util.TimeSource;
+import org.apache.solr.core.CoreContainer;
+import org.junit.BeforeClass;
+import org.junit.Test;
+
+public class InactiveShardRemoverTest extends SolrCloudTestCase {
+
+  @BeforeClass
+  public static void setupCluster() throws Exception {
+    configureCluster(1)
+        .addConfig(
+            "conf", TEST_PATH().resolve("configsets").resolve("cloud-minimal").resolve("conf"))
+        .configure();
+  }
+
+  @Test
+  public void testDeleteInactiveShard() throws Exception {
+
+    addPlugin(new InactiveShardRemoverConfig(1, 0, 2));
+    try {
+      final String collectionName = "testDeleteInactiveShard";
+      createCollection(collectionName, 1);
+
+      final String sliceName =
+          new ArrayList<>(getCollectionState(collectionName).getSlices()).get(0).getName();
+      ShardTestUtil.setSliceState(cluster, collectionName, sliceName, Slice.State.INACTIVE);
+
+      waitForState(
+          "Waiting for inactive shard to be deleted",
+          collectionName,
+          clusterShape(0, 0),
+          5,
+          TimeUnit.SECONDS);
+    } finally {
+      removePlugin();
+    }
+  }
+
+  @Test
+  public void testTtl() throws Exception {
+
+    final int ttlSeconds = 1 + random().nextInt(5);
+    final TimeSource timeSource = cluster.getOpenOverseer().getSolrCloudManager().getTimeSource();
+
+    addPlugin(new InactiveShardRemoverConfig(1, ttlSeconds, 1));
+    try {
+      final String collectionName = "testTtl";
+      createCollection(collectionName, 1);
+
+      final String sliceName =
+          new ArrayList<>(getCollectionState(collectionName).getSlices()).get(0).getName();
+      ShardTestUtil.setSliceState(cluster, collectionName, sliceName, Slice.State.INACTIVE);
+      waitForState(
+          "Expected shard " + sliceName + " to be in state " + Slice.State.INACTIVE,
+          collectionName,
+          (n, c) -> c.getSlice(sliceName).getState() == Slice.State.INACTIVE);
+
+      final long ttlStart = timeSource.getTimeNs();
+
+      waitForState(
+          "Waiting for InactiveShardRemover to delete inactive shard",
+          collectionName,
+          clusterShape(0, 0),
+          ttlSeconds + 5,
+          TimeUnit.SECONDS);
+
+      final long ttlEnd = timeSource.getTimeNs();
+      final long ttlPeriodSeconds = TimeUnit.NANOSECONDS.toSeconds(ttlEnd - ttlStart);
+
+      assertTrue(ttlPeriodSeconds >= ttlSeconds);
+    } finally {
+      removePlugin();
+    }
+  }
+
+  public void testMaxShardsToDeletePerCycle() throws Exception {
+
+    final CoreContainer cc = cluster.getOpenOverseer().getCoreContainer();
+
+    final int maxDeletesPerCycle = 5;
+    final InactiveShardRemover remover = new InactiveShardRemover(cc);
+    remover.configure(new InactiveShardRemoverConfig(1, 0, maxDeletesPerCycle));
+
+    // Remove across multiple collections
+    final String collection1 = "testMaxShardsToDeletePerCycle-1";
+    final String collection2 = "testMaxShardsToDeletePerCycle-2";
+    final int shardsPerCollection = 10;
+    final int totalShards = 2 * shardsPerCollection;
+
+    createCollection(collection1, shardsPerCollection);
+    createCollection(collection2, shardsPerCollection);
+
+    setAllShardsInactive(collection1);
+    setAllShardsInactive(collection2);
+
+    int cycle = 0;
+    int shardsDeleted = 0;
+    while (shardsDeleted < totalShards) {
+      cycle++;
+      remover.deleteInactiveSlices();
+      DocCollection coll1 = getCollectionState(collection1);
+      DocCollection coll2 = getCollectionState(collection2);
+
+      int remainingShards = coll1.getSlices().size() + coll2.getSlices().size();
+      if (remainingShards != totalShards - maxDeletesPerCycle * cycle) {
+        System.out.println(coll1);
+        System.out.println(coll2);
+      }
+      assertEquals(totalShards - maxDeletesPerCycle * cycle, remainingShards);
+      shardsDeleted = totalShards - remainingShards;
+    }
+  }
+
+  @Test
+  public void testConfigValidation() {
+
+    try {
+      new InactiveShardRemoverConfig(0, 0, 1).validate();
+      fail("Expected validation error for scheduleIntervalSeconds=0");
+    } catch (SolrException e) {
+      assertEquals(SolrException.ErrorCode.BAD_REQUEST.code, e.code());
+    }
+
+    try {
+      new InactiveShardRemoverConfig(1, 0, 0).validate();
+      fail("Expected validation error for maxDeletesPerCycle=0");
+    } catch (SolrException e) {
+      assertEquals(SolrException.ErrorCode.BAD_REQUEST.code, e.code());
+    }
+  }
+
+  private static void addPlugin(final InactiveShardRemoverConfig config)
+      throws SolrServerException, IOException {
+    PluginMeta plugin = pluginMeta(config);
+    pluginRequest(Collections.singletonMap("add", plugin));
+  }
+
+  private static void removePlugin() throws SolrServerException, IOException {
+    pluginRequest(Collections.singletonMap("remove", InactiveShardRemover.PLUGIN_NAME));
+  }
+
+  private static void pluginRequest(Map<String, Object> payload)
+      throws SolrServerException, IOException {
+    V2Request req =
+        new V2Request.Builder("/cluster/plugin").withMethod(POST).withPayload(payload).build();
+    V2Response rsp = req.process(cluster.getSolrClient());
+    assertEquals(0, rsp.getStatus());
+  }
+
+  private static PluginMeta pluginMeta(final InactiveShardRemoverConfig config) {
+    PluginMeta plugin = pluginMeta();
+    plugin.config = config;
+    return plugin;
+  }
+
+  private static PluginMeta pluginMeta() {
+    PluginMeta plugin = new PluginMeta();
+    plugin.klass = InactiveShardRemover.class.getName();
+    plugin.name = InactiveShardRemover.PLUGIN_NAME;
+    return plugin;
+  }
+
+  private void createCollection(final String collectionName, final int numShards)
+      throws SolrServerException, IOException {
+    CollectionAdminRequest.createCollection(collectionName, "conf", numShards, 1)
+        .process(cluster.getSolrClient());
+
+    cluster.waitForActiveCollection(collectionName, numShards, numShards);
+  }
+
+  private void setAllShardsInactive(final String collectionName) {
+    DocCollection collection = getCollectionState(collectionName);
+    collection.getSlices().stream()
+        .filter(s -> s.getState() != Slice.State.INACTIVE)
+        .forEach(
+            s -> {
+              try {
+                ShardTestUtil.setSliceState(
+                    cluster, s.getCollection(), s.getName(), Slice.State.INACTIVE);
+                waitForState(
+                    "Expected shard " + s + " to be in state " + Slice.State.INACTIVE,
+                    collection.getName(),
+                    (n, c) -> c.getSlice(s.getName()).getState() == Slice.State.INACTIVE);
+              } catch (Exception e) {
+                throw new RuntimeException(e);
+              }
+            });
+  }
+}
diff --git a/solr/solr-ref-guide/modules/configuration-guide/pages/cluster-singleton-plugins.adoc b/solr/solr-ref-guide/modules/configuration-guide/pages/cluster-singleton-plugins.adoc
new file mode 100644
index 00000000000..5afdcb0d18a
--- /dev/null
+++ b/solr/solr-ref-guide/modules/configuration-guide/pages/cluster-singleton-plugins.adoc
@@ -0,0 +1,93 @@
+= Cluster Singleton Plugins
+:toclevels: 3
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+The Solr distribution includes some Cluster Singleton plugins.
+Additional plugins can be added - they have to implement the ClusterSingleton interface.
+The configuration entry may also contain a config element if the plugin implements the ConfigurablePlugin interface.
+
+== Plugin Configuration
+Cluster Singleton plugins can be configured in two ways, either by using the xref:cluster-plugins.adoc[cluster plugins API] or by declaring them in xref:configuring-solr-xml.adoc[solr.xml].
+
+All cluster plugins must be declared using the same method. It is not possible to declare some plugins in solr.xml and use the cluster plugins API to manage other plugins.
+
+== Cluster Singleton Plugins Included with Solr
+Solr includes the following plugins out-of-the-box.
+
+=== Inactive Shard Remover
+This plugin will periodically find and delete shards that have an INACTIVE shard state.
+Shards become INACTIVE when they are split, and the documents they contain are now managed by two or more sub-shards.
+
+Configuration using the cluster plugin API
+[source,bash]
+----
+curl -X POST -H 'Content-type: application/json' -d '{
+    "add":{
+        "name": ".inactive-shard-remover",
+        "class": "org.apache.solr.cluster.maintenance.InactiveShardRemover",
+        "config": {
+          "scheduleIntervalSeconds": 3600,
+          "ttlSeconds": 1800,
+          "maxDeletesPerCycle": 20
+        }
+    }}'
+  http://localhost:8983/api/cluster/plugin
+----
+
+Configuration in solr.xml
+[source,xml]
+----
+<clusterSingleton name=".inactive-shard-remover" class="org.apache.solr.cluster.maintenance.InactiveShardRemover">
+  <long name="scheduleIntervalSeconds">3600</long>
+  <long name="ttlSeconds">1800</long>
+  <int name="maxDeletesPerCycle">20</int>
+</clusterSingleton>
+----
+
+NOTE: The Inactive Shard Remover plugin configuration MUST use the predefined name `.inactive-shard-remover`.
+There can be only one (or none) of these configurations defined.
+
+==== Configuration
+
+`scheduleIntervalSeconds`::
++
+[%autowidth,frame=none]
+|===
+|Optional |Default: `900` Seconds
+|===
++
+This value determines how often the inactive shard remover will run
+
+`ttlSeconds`::
++
+[%autowidth,frame=none]
+|===
+|Optional |Default: `900` Seconds
+|===
++
+This value defines the minimum period of time that a Shard must be INACTIVE before it is considered for deletion.
+
+`maxDeletesPerCycle`::
++
+[%autowidth,frame=none]
+|===
+|Optional |Default: `20`
+|===
++
+This is the maximum number of shards that will be deleted each time the inactive shard remover runs.
+If there are more Shards that could be deleted, they will be considered during the next cycle.
diff --git a/solr/test-framework/src/java/org/apache/solr/cloud/ShardTestUtil.java b/solr/test-framework/src/java/org/apache/solr/cloud/ShardTestUtil.java
new file mode 100644
index 00000000000..4faf9708098
--- /dev/null
+++ b/solr/test-framework/src/java/org/apache/solr/cloud/ShardTestUtil.java
@@ -0,0 +1,57 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.solr.cloud;
+
+import org.apache.solr.client.solrj.cloud.DistributedQueue;
+import org.apache.solr.cloud.overseer.OverseerAction;
+import org.apache.solr.common.MapWriter;
+import org.apache.solr.common.cloud.Slice;
+import org.apache.solr.common.cloud.ZkNodeProps;
+import org.apache.solr.common.cloud.ZkStateReader;
+
+public class ShardTestUtil {
+
+  public static void setSliceState(
+      MiniSolrCloudCluster cluster, String collection, String slice, Slice.State state)
+      throws Exception {
+
+    MapWriter m =
+        ew ->
+            ew.put(Overseer.QUEUE_OPERATION, OverseerAction.UPDATESHARDSTATE.toLower())
+                .put(slice, state.toString())
+                .put(ZkStateReader.COLLECTION_PROP, collection);
+    final Overseer overseer = cluster.getOpenOverseer();
+    if (overseer.getDistributedClusterStateUpdater().isDistributedStateUpdate()) {
+      overseer
+          .getDistributedClusterStateUpdater()
+          .doSingleStateUpdate(
+              DistributedClusterStateUpdater.MutatingCommand.SliceUpdateShardState,
+              new ZkNodeProps(m),
+              cluster.getOpenOverseer().getSolrCloudManager(),
+              cluster.getOpenOverseer().getZkStateReader());
+    } else {
+      DistributedQueue inQueue =
+          cluster
+              .getJettySolrRunner(0)
+              .getCoreContainer()
+              .getZkController()
+              .getOverseer()
+              .getStateUpdateQueue();
+      inQueue.offer(m);
+    }
+  }
+}