You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@solr.apache.org by ds...@apache.org on 2024/03/14 19:49:53 UTC
(solr) branch main updated: SOLR-16403: ClusterSingleton to remove inactive Shards (#1926)
This is an automated email from the ASF dual-hosted git repository.
dsmiley pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/solr.git
The following commit(s) were added to refs/heads/main by this push:
new ca58f1aa90b SOLR-16403: ClusterSingleton to remove inactive Shards (#1926)
ca58f1aa90b is described below
commit ca58f1aa90b351b69b0ec4184adbaaca03978573
Author: pjmcarthur <92...@users.noreply.github.com>
AuthorDate: Thu Mar 14 12:49:48 2024 -0700
SOLR-16403: ClusterSingleton to remove inactive Shards (#1926)
ClusterSingleton that periodically removes state=INACTIVE shards. These occur from shard splits.
Co-authored-by: Paul McArthur <pm...@proton.me>
---
solr/CHANGES.txt | 2 +
.../cluster/maintenance/InactiveShardRemover.java | 219 ++++++++++++++++++++
.../maintenance/InactiveShardRemoverConfig.java | 60 ++++++
.../solr/cluster/maintenance/package-info.java | 19 ++
.../org/apache/solr/cloud/DeleteShardTest.java | 52 +----
.../maintenance/InactiveShardRemoverTest.java | 225 +++++++++++++++++++++
.../pages/cluster-singleton-plugins.adoc | 93 +++++++++
.../java/org/apache/solr/cloud/ShardTestUtil.java | 57 ++++++
8 files changed, 686 insertions(+), 41 deletions(-)
diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt
index 84d0b79dbd7..f55d4e82900 100644
--- a/solr/CHANGES.txt
+++ b/solr/CHANGES.txt
@@ -90,6 +90,8 @@ New Features
* SOLR-599: Add a new SolrJ client using the JDK’s built-in Http Client. (James Dyer)
+* SOLR-16403: A new cluster singleton plugin to automatically remove inactive shards. (Paul McArthur, David Smiley)
+
Improvements
---------------------
* SOLR-17119: When registering or updating a ConfigurablePlugin through the `/cluster/plugin` API,
diff --git a/solr/core/src/java/org/apache/solr/cluster/maintenance/InactiveShardRemover.java b/solr/core/src/java/org/apache/solr/cluster/maintenance/InactiveShardRemover.java
new file mode 100644
index 00000000000..177663d1140
--- /dev/null
+++ b/solr/core/src/java/org/apache/solr/cluster/maintenance/InactiveShardRemover.java
@@ -0,0 +1,219 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.solr.cluster.maintenance;
+
+import com.google.common.annotations.VisibleForTesting;
+import java.lang.invoke.MethodHandles;
+import java.util.Collection;
+import java.util.HashSet;
+import java.util.Objects;
+import java.util.concurrent.Executors;
+import java.util.concurrent.ScheduledExecutorService;
+import java.util.concurrent.TimeUnit;
+import java.util.stream.Collectors;
+import org.apache.solr.api.ConfigurablePlugin;
+import org.apache.solr.client.solrj.SolrResponse;
+import org.apache.solr.client.solrj.request.CollectionAdminRequest;
+import org.apache.solr.cloud.ClusterSingleton;
+import org.apache.solr.common.cloud.ClusterState;
+import org.apache.solr.common.cloud.DocCollection;
+import org.apache.solr.common.cloud.Slice;
+import org.apache.solr.common.cloud.ZkStateReader;
+import org.apache.solr.common.util.ExecutorUtil;
+import org.apache.solr.common.util.SolrNamedThreadFactory;
+import org.apache.solr.core.CoreContainer;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * This Cluster Singleton can be configured to periodically find and remove {@link
+ * org.apache.solr.common.cloud.Slice.State#INACTIVE} Shards that are left behind after a Shard is
+ * split
+ */
+public class InactiveShardRemover
+ implements ClusterSingleton, ConfigurablePlugin<InactiveShardRemoverConfig> {
+
+ private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
+
+ public static final String PLUGIN_NAME = ".inactive-shard-remover";
+
+ static class DeleteActor {
+
+ private final CoreContainer coreContainer;
+
+ DeleteActor(final CoreContainer coreContainer) {
+ this.coreContainer = coreContainer;
+ }
+
+ void delete(final Slice slice) {
+ CollectionAdminRequest.DeleteShard deleteRequest =
+ CollectionAdminRequest.deleteShard(slice.getCollection(), slice.getName());
+ try {
+ SolrResponse response =
+ coreContainer.getZkController().getSolrCloudManager().request(deleteRequest);
+ if (response.getException() != null) {
+ throw response.getException();
+ }
+ } catch (Exception e) {
+ log.warn("An exception occurred when deleting an inactive shard", e);
+ }
+ }
+ }
+
+ private State state = State.STOPPED;
+
+ private final CoreContainer coreContainer;
+
+ private final DeleteActor deleteActor;
+
+ private ScheduledExecutorService executor;
+
+ private long scheduleIntervalSeconds;
+
+ private long ttlSeconds;
+
+ private int maxDeletesPerCycle;
+
+ /** Constructor invoked via Reflection */
+ public InactiveShardRemover(final CoreContainer cc) {
+ this(cc, new DeleteActor(cc));
+ }
+
+ public InactiveShardRemover(final CoreContainer cc, final DeleteActor deleteActor) {
+ this.coreContainer = cc;
+ this.deleteActor = deleteActor;
+ }
+
+ @Override
+ public void configure(final InactiveShardRemoverConfig cfg) {
+ Objects.requireNonNull(cfg, "config must be specified");
+ cfg.validate();
+ this.scheduleIntervalSeconds = cfg.scheduleIntervalSeconds;
+ this.maxDeletesPerCycle = cfg.maxDeletesPerCycle;
+ this.ttlSeconds = cfg.ttlSeconds;
+ }
+
+ @Override
+ public String getName() {
+ return PLUGIN_NAME;
+ }
+
+ @Override
+ public State getState() {
+ return state;
+ }
+
+ @Override
+ public void start() throws Exception {
+ state = State.STARTING;
+ executor = Executors.newScheduledThreadPool(1, new SolrNamedThreadFactory(PLUGIN_NAME));
+ executor.scheduleAtFixedRate(
+ this::deleteInactiveSlices,
+ scheduleIntervalSeconds,
+ scheduleIntervalSeconds,
+ TimeUnit.SECONDS);
+ state = State.RUNNING;
+ }
+
+ @Override
+ public void stop() {
+ if (state == State.RUNNING) {
+ state = State.STOPPING;
+ ExecutorUtil.shutdownNowAndAwaitTermination(executor);
+ }
+ state = State.STOPPED;
+ }
+
+ @VisibleForTesting
+ void deleteInactiveSlices() {
+ final ClusterState clusterState = coreContainer.getZkController().getClusterState();
+ Collection<Slice> inactiveSlices =
+ clusterState.getCollectionsMap().values().stream()
+ .flatMap(v -> collectInactiveSlices(v).stream())
+ .collect(Collectors.toSet());
+
+ if (log.isInfoEnabled()) {
+ log.info(
+ "Found {} inactive Shards to delete, {} will be deleted",
+ inactiveSlices.size(),
+ Math.min(inactiveSlices.size(), maxDeletesPerCycle));
+ }
+
+ inactiveSlices.stream().limit(maxDeletesPerCycle).forEach(this::deleteShard);
+ }
+
+ private Collection<Slice> collectInactiveSlices(final DocCollection docCollection) {
+ final Collection<Slice> slices = new HashSet<>(docCollection.getSlices());
+ slices.removeAll(docCollection.getActiveSlices());
+ return slices.stream().filter(this::isExpired).collect(Collectors.toSet());
+ }
+
+ private void deleteShard(final Slice s) {
+ deleteActor.delete(s);
+ }
+
+ /**
+ * An Inactive Shard is expired if it has not undergone a state change in the period of time
+ * defined by {@link InactiveShardRemover#ttlSeconds}. If it is expired, it is eligible for
+ * removal.
+ */
+ private boolean isExpired(final Slice slice) {
+
+ final String collectionName = slice.getCollection();
+ final String sliceName = slice.getName();
+
+ if (slice.getState() != Slice.State.INACTIVE) {
+ return false;
+ }
+
+ final String lastChangeTimestamp = slice.getStr(ZkStateReader.STATE_TIMESTAMP_PROP);
+ if (lastChangeTimestamp == null || lastChangeTimestamp.isEmpty()) {
+ log.warn(
+ "Collection {} Shard {} has no last change timestamp and will not be deleted",
+ collectionName,
+ sliceName);
+ return false;
+ }
+
+ final long epochTimestampNs;
+ try {
+ epochTimestampNs = Long.parseLong(lastChangeTimestamp);
+ } catch (NumberFormatException e) {
+ log.warn(
+ "Collection {} Shard {} has an invalid last change timestamp and will not be deleted",
+ collectionName,
+ sliceName);
+ return false;
+ }
+
+ long currentEpochTimeNs =
+ coreContainer.getZkController().getSolrCloudManager().getTimeSource().getEpochTimeNs();
+ long delta = TimeUnit.NANOSECONDS.toSeconds(currentEpochTimeNs - epochTimestampNs);
+
+ boolean expired = delta >= ttlSeconds;
+ if (log.isDebugEnabled()) {
+ log.debug(
+ "collection {} shard {} last state change {} seconds ago. Expired={}",
+ slice.getCollection(),
+ slice.getName(),
+ delta,
+ expired);
+ }
+ return expired;
+ }
+}
diff --git a/solr/core/src/java/org/apache/solr/cluster/maintenance/InactiveShardRemoverConfig.java b/solr/core/src/java/org/apache/solr/cluster/maintenance/InactiveShardRemoverConfig.java
new file mode 100644
index 00000000000..22465e82359
--- /dev/null
+++ b/solr/core/src/java/org/apache/solr/cluster/maintenance/InactiveShardRemoverConfig.java
@@ -0,0 +1,60 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.solr.cluster.maintenance;
+
+import org.apache.solr.common.SolrException;
+import org.apache.solr.common.annotation.JsonProperty;
+import org.apache.solr.common.util.ReflectMapWriter;
+
+public class InactiveShardRemoverConfig implements ReflectMapWriter {
+
+ public static final long DEFAULT_SCHEDULE_INTERVAL_SECONDS = 900L; // 15 minutes
+
+ public static final long DEFAULT_TTL_SECONDS = 900L; // 15 minutes
+
+ public static final int DEFAULT_MAX_DELETES_PER_CYCLE = 20;
+
+ @JsonProperty public long scheduleIntervalSeconds;
+
+ @JsonProperty public long ttlSeconds;
+
+ @JsonProperty public int maxDeletesPerCycle;
+
+ /** Default constructor required for deserialization */
+ public InactiveShardRemoverConfig() {
+ this(DEFAULT_SCHEDULE_INTERVAL_SECONDS, DEFAULT_TTL_SECONDS, DEFAULT_MAX_DELETES_PER_CYCLE);
+ }
+
+ public InactiveShardRemoverConfig(
+ final long scheduleIntervalSeconds, final long ttlSeconds, final int maxDeletesPerCycle) {
+ this.scheduleIntervalSeconds = scheduleIntervalSeconds;
+ this.ttlSeconds = ttlSeconds;
+ this.maxDeletesPerCycle = maxDeletesPerCycle;
+ }
+
+ public void validate() {
+ if (scheduleIntervalSeconds <= 0) {
+ throw new SolrException(
+ SolrException.ErrorCode.BAD_REQUEST, "scheduleIntervalSeconds must be greater than 0");
+ }
+ if (maxDeletesPerCycle <= 0) {
+ throw new SolrException(
+ SolrException.ErrorCode.BAD_REQUEST, "maxDeletesPerCycle must be greater than 0");
+ }
+ }
+}
diff --git a/solr/core/src/java/org/apache/solr/cluster/maintenance/package-info.java b/solr/core/src/java/org/apache/solr/cluster/maintenance/package-info.java
new file mode 100644
index 00000000000..3001cb775c3
--- /dev/null
+++ b/solr/core/src/java/org/apache/solr/cluster/maintenance/package-info.java
@@ -0,0 +1,19 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/** Cluster Singleton plugins that are used to perform maintenance tasks within the cluster. */
+package org.apache.solr.cluster.maintenance;
diff --git a/solr/core/src/test/org/apache/solr/cloud/DeleteShardTest.java b/solr/core/src/test/org/apache/solr/cloud/DeleteShardTest.java
index 63bd18ea46d..91ab353be1b 100644
--- a/solr/core/src/test/org/apache/solr/cloud/DeleteShardTest.java
+++ b/solr/core/src/test/org/apache/solr/cloud/DeleteShardTest.java
@@ -18,17 +18,12 @@ package org.apache.solr.cloud;
import java.io.IOException;
import org.apache.solr.client.solrj.SolrServerException;
-import org.apache.solr.client.solrj.cloud.DistributedQueue;
import org.apache.solr.client.solrj.request.CollectionAdminRequest;
import org.apache.solr.client.solrj.request.CoreStatus;
-import org.apache.solr.cloud.overseer.OverseerAction;
-import org.apache.solr.common.MapWriter;
import org.apache.solr.common.cloud.DocCollection;
import org.apache.solr.common.cloud.Replica;
import org.apache.solr.common.cloud.Slice;
import org.apache.solr.common.cloud.Slice.State;
-import org.apache.solr.common.cloud.ZkNodeProps;
-import org.apache.solr.common.cloud.ZkStateReader;
import org.apache.solr.util.FileUtils;
import org.junit.After;
import org.junit.Before;
@@ -68,7 +63,7 @@ public class DeleteShardTest extends SolrCloudTestCase {
CollectionAdminRequest.deleteShard(collection, "shard1")
.process(cluster.getSolrClient()));
- setSliceState(collection, "shard1", Slice.State.INACTIVE);
+ ShardTestUtil.setSliceState(cluster, collection, "shard1", Slice.State.INACTIVE);
// Can delete an INACTIVE shard
CollectionAdminRequest.deleteShard(collection, "shard1").process(cluster.getSolrClient());
@@ -76,46 +71,12 @@ public class DeleteShardTest extends SolrCloudTestCase {
"Expected 'shard1' to be removed", collection, (n, c) -> c.getSlice("shard1") == null);
// Can delete a shard under construction
- setSliceState(collection, "shard2", Slice.State.CONSTRUCTION);
+ ShardTestUtil.setSliceState(cluster, collection, "shard2", Slice.State.CONSTRUCTION);
CollectionAdminRequest.deleteShard(collection, "shard2").process(cluster.getSolrClient());
waitForState(
"Expected 'shard2' to be removed", collection, (n, c) -> c.getSlice("shard2") == null);
}
- protected void setSliceState(String collection, String slice, State state) throws Exception {
-
- // TODO can this be encapsulated better somewhere?
- MapWriter m =
- ew ->
- ew.put(Overseer.QUEUE_OPERATION, OverseerAction.UPDATESHARDSTATE.toLower())
- .put(slice, state.toString())
- .put(ZkStateReader.COLLECTION_PROP, collection);
- final Overseer overseer = cluster.getOpenOverseer();
- if (overseer.getDistributedClusterStateUpdater().isDistributedStateUpdate()) {
- overseer
- .getDistributedClusterStateUpdater()
- .doSingleStateUpdate(
- DistributedClusterStateUpdater.MutatingCommand.SliceUpdateShardState,
- new ZkNodeProps(m),
- cluster.getOpenOverseer().getSolrCloudManager(),
- cluster.getOpenOverseer().getZkStateReader());
- } else {
- DistributedQueue inQueue =
- cluster
- .getJettySolrRunner(0)
- .getCoreContainer()
- .getZkController()
- .getOverseer()
- .getStateUpdateQueue();
- inQueue.offer(m);
- }
-
- waitForState(
- "Expected shard " + slice + " to be in state " + state,
- collection,
- (n, c) -> c.getSlice(slice).getState() == state);
- }
-
@Test
public void testDirectoryCleanupAfterDeleteShard() throws IOException, SolrServerException {
@@ -162,4 +123,13 @@ public class DeleteShardTest extends SolrCloudTestCase {
"Instance directory still exists", FileUtils.fileExists(coreStatus.getInstanceDirectory()));
assertTrue("Data directory still exists", FileUtils.fileExists(coreStatus.getDataDirectory()));
}
+
+ private void setSliceState(String collectionName, String shardId, Slice.State state)
+ throws Exception {
+ ShardTestUtil.setSliceState(cluster, collectionName, shardId, state);
+ waitForState(
+ "Expected shard " + shardId + " to be in state " + state,
+ collectionName,
+ (n, c) -> c.getSlice(shardId).getState() == state);
+ }
}
diff --git a/solr/core/src/test/org/apache/solr/cluster/maintenance/InactiveShardRemoverTest.java b/solr/core/src/test/org/apache/solr/cluster/maintenance/InactiveShardRemoverTest.java
new file mode 100644
index 00000000000..fb8f2b95715
--- /dev/null
+++ b/solr/core/src/test/org/apache/solr/cluster/maintenance/InactiveShardRemoverTest.java
@@ -0,0 +1,225 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.solr.cluster.maintenance;
+
+import static org.apache.solr.client.solrj.SolrRequest.METHOD.POST;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.Map;
+import java.util.concurrent.TimeUnit;
+import org.apache.solr.client.solrj.SolrServerException;
+import org.apache.solr.client.solrj.request.CollectionAdminRequest;
+import org.apache.solr.client.solrj.request.V2Request;
+import org.apache.solr.client.solrj.request.beans.PluginMeta;
+import org.apache.solr.client.solrj.response.V2Response;
+import org.apache.solr.cloud.ShardTestUtil;
+import org.apache.solr.cloud.SolrCloudTestCase;
+import org.apache.solr.common.SolrException;
+import org.apache.solr.common.cloud.DocCollection;
+import org.apache.solr.common.cloud.Slice;
+import org.apache.solr.common.util.TimeSource;
+import org.apache.solr.core.CoreContainer;
+import org.junit.BeforeClass;
+import org.junit.Test;
+
+public class InactiveShardRemoverTest extends SolrCloudTestCase {
+
+ @BeforeClass
+ public static void setupCluster() throws Exception {
+ configureCluster(1)
+ .addConfig(
+ "conf", TEST_PATH().resolve("configsets").resolve("cloud-minimal").resolve("conf"))
+ .configure();
+ }
+
+ @Test
+ public void testDeleteInactiveShard() throws Exception {
+
+ addPlugin(new InactiveShardRemoverConfig(1, 0, 2));
+ try {
+ final String collectionName = "testDeleteInactiveShard";
+ createCollection(collectionName, 1);
+
+ final String sliceName =
+ new ArrayList<>(getCollectionState(collectionName).getSlices()).get(0).getName();
+ ShardTestUtil.setSliceState(cluster, collectionName, sliceName, Slice.State.INACTIVE);
+
+ waitForState(
+ "Waiting for inactive shard to be deleted",
+ collectionName,
+ clusterShape(0, 0),
+ 5,
+ TimeUnit.SECONDS);
+ } finally {
+ removePlugin();
+ }
+ }
+
+ @Test
+ public void testTtl() throws Exception {
+
+ final int ttlSeconds = 1 + random().nextInt(5);
+ final TimeSource timeSource = cluster.getOpenOverseer().getSolrCloudManager().getTimeSource();
+
+ addPlugin(new InactiveShardRemoverConfig(1, ttlSeconds, 1));
+ try {
+ final String collectionName = "testTtl";
+ createCollection(collectionName, 1);
+
+ final String sliceName =
+ new ArrayList<>(getCollectionState(collectionName).getSlices()).get(0).getName();
+ ShardTestUtil.setSliceState(cluster, collectionName, sliceName, Slice.State.INACTIVE);
+ waitForState(
+ "Expected shard " + sliceName + " to be in state " + Slice.State.INACTIVE,
+ collectionName,
+ (n, c) -> c.getSlice(sliceName).getState() == Slice.State.INACTIVE);
+
+ final long ttlStart = timeSource.getTimeNs();
+
+ waitForState(
+ "Waiting for InactiveShardRemover to delete inactive shard",
+ collectionName,
+ clusterShape(0, 0),
+ ttlSeconds + 5,
+ TimeUnit.SECONDS);
+
+ final long ttlEnd = timeSource.getTimeNs();
+ final long ttlPeriodSeconds = TimeUnit.NANOSECONDS.toSeconds(ttlEnd - ttlStart);
+
+ assertTrue(ttlPeriodSeconds >= ttlSeconds);
+ } finally {
+ removePlugin();
+ }
+ }
+
+ public void testMaxShardsToDeletePerCycle() throws Exception {
+
+ final CoreContainer cc = cluster.getOpenOverseer().getCoreContainer();
+
+ final int maxDeletesPerCycle = 5;
+ final InactiveShardRemover remover = new InactiveShardRemover(cc);
+ remover.configure(new InactiveShardRemoverConfig(1, 0, maxDeletesPerCycle));
+
+ // Remove across multiple collections
+ final String collection1 = "testMaxShardsToDeletePerCycle-1";
+ final String collection2 = "testMaxShardsToDeletePerCycle-2";
+ final int shardsPerCollection = 10;
+ final int totalShards = 2 * shardsPerCollection;
+
+ createCollection(collection1, shardsPerCollection);
+ createCollection(collection2, shardsPerCollection);
+
+ setAllShardsInactive(collection1);
+ setAllShardsInactive(collection2);
+
+ int cycle = 0;
+ int shardsDeleted = 0;
+ while (shardsDeleted < totalShards) {
+ cycle++;
+ remover.deleteInactiveSlices();
+ DocCollection coll1 = getCollectionState(collection1);
+ DocCollection coll2 = getCollectionState(collection2);
+
+ int remainingShards = coll1.getSlices().size() + coll2.getSlices().size();
+ if (remainingShards != totalShards - maxDeletesPerCycle * cycle) {
+ System.out.println(coll1);
+ System.out.println(coll2);
+ }
+ assertEquals(totalShards - maxDeletesPerCycle * cycle, remainingShards);
+ shardsDeleted = totalShards - remainingShards;
+ }
+ }
+
+ @Test
+ public void testConfigValidation() {
+
+ try {
+ new InactiveShardRemoverConfig(0, 0, 1).validate();
+ fail("Expected validation error for scheduleIntervalSeconds=0");
+ } catch (SolrException e) {
+ assertEquals(SolrException.ErrorCode.BAD_REQUEST.code, e.code());
+ }
+
+ try {
+ new InactiveShardRemoverConfig(1, 0, 0).validate();
+ fail("Expected validation error for maxDeletesPerCycle=0");
+ } catch (SolrException e) {
+ assertEquals(SolrException.ErrorCode.BAD_REQUEST.code, e.code());
+ }
+ }
+
+ private static void addPlugin(final InactiveShardRemoverConfig config)
+ throws SolrServerException, IOException {
+ PluginMeta plugin = pluginMeta(config);
+ pluginRequest(Collections.singletonMap("add", plugin));
+ }
+
+ private static void removePlugin() throws SolrServerException, IOException {
+ pluginRequest(Collections.singletonMap("remove", InactiveShardRemover.PLUGIN_NAME));
+ }
+
+ private static void pluginRequest(Map<String, Object> payload)
+ throws SolrServerException, IOException {
+ V2Request req =
+ new V2Request.Builder("/cluster/plugin").withMethod(POST).withPayload(payload).build();
+ V2Response rsp = req.process(cluster.getSolrClient());
+ assertEquals(0, rsp.getStatus());
+ }
+
+ private static PluginMeta pluginMeta(final InactiveShardRemoverConfig config) {
+ PluginMeta plugin = pluginMeta();
+ plugin.config = config;
+ return plugin;
+ }
+
+ private static PluginMeta pluginMeta() {
+ PluginMeta plugin = new PluginMeta();
+ plugin.klass = InactiveShardRemover.class.getName();
+ plugin.name = InactiveShardRemover.PLUGIN_NAME;
+ return plugin;
+ }
+
+ private void createCollection(final String collectionName, final int numShards)
+ throws SolrServerException, IOException {
+ CollectionAdminRequest.createCollection(collectionName, "conf", numShards, 1)
+ .process(cluster.getSolrClient());
+
+ cluster.waitForActiveCollection(collectionName, numShards, numShards);
+ }
+
+ private void setAllShardsInactive(final String collectionName) {
+ DocCollection collection = getCollectionState(collectionName);
+ collection.getSlices().stream()
+ .filter(s -> s.getState() != Slice.State.INACTIVE)
+ .forEach(
+ s -> {
+ try {
+ ShardTestUtil.setSliceState(
+ cluster, s.getCollection(), s.getName(), Slice.State.INACTIVE);
+ waitForState(
+ "Expected shard " + s + " to be in state " + Slice.State.INACTIVE,
+ collection.getName(),
+ (n, c) -> c.getSlice(s.getName()).getState() == Slice.State.INACTIVE);
+ } catch (Exception e) {
+ throw new RuntimeException(e);
+ }
+ });
+ }
+}
diff --git a/solr/solr-ref-guide/modules/configuration-guide/pages/cluster-singleton-plugins.adoc b/solr/solr-ref-guide/modules/configuration-guide/pages/cluster-singleton-plugins.adoc
new file mode 100644
index 00000000000..5afdcb0d18a
--- /dev/null
+++ b/solr/solr-ref-guide/modules/configuration-guide/pages/cluster-singleton-plugins.adoc
@@ -0,0 +1,93 @@
+= Cluster Singleton Plugins
+:toclevels: 3
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+The Solr distribution includes some Cluster Singleton plugins.
+Additional plugins can be added - they have to implement the ClusterSingleton interface.
+The configuration entry may also contain a config element if the plugin implements the ConfigurablePlugin interface.
+
+== Plugin Configuration
+Cluster Singleton plugins can be configured in two ways, either by using the xref:cluster-plugins.adoc[cluster plugins API] or by declaring them in xref:configuring-solr-xml.adoc[solr.xml].
+
+All cluster plugins must be declared using the same method. It is not possible to declare some plugins in solr.xml and use the cluster plugins API to manage other plugins.
+
+== Cluster Singleton Plugins Included with Solr
+Solr includes the following plugins out-of-the-box.
+
+=== Inactive Shard Remover
+This plugin will periodically find and delete shards that have an INACTIVE shard state.
+Shards become INACTIVE when they are split, and the documents they contain are now managed by two or more sub-shards.
+
+Configuration using the cluster plugin API
+[source,bash]
+----
+curl -X POST -H 'Content-type: application/json' -d '{
+ "add":{
+ "name": ".inactive-shard-remover",
+ "class": "org.apache.solr.cluster.maintenance.InactiveShardRemover",
+ "config": {
+ "scheduleIntervalSeconds": 3600,
+ "ttlSeconds": 1800,
+ "maxDeletesPerCycle": 20
+ }
+ }}'
+ http://localhost:8983/api/cluster/plugin
+----
+
+Configuration in solr.xml
+[source,xml]
+----
+<clusterSingleton name=".inactive-shard-remover" class="org.apache.solr.cluster.maintenance.InactiveShardRemover">
+ <long name="scheduleIntervalSeconds">3600</long>
+ <long name="ttlSeconds">1800</long>
+ <int name="maxDeletesPerCycle">20</int>
+</clusterSingleton>
+----
+
+NOTE: The Inactive Shard Remover plugin configuration MUST use the predefined name `.inactive-shard-remover`.
+There can be only one (or none) of these configurations defined.
+
+==== Configuration
+
+`scheduleIntervalSeconds`::
++
+[%autowidth,frame=none]
+|===
+|Optional |Default: `900` Seconds
+|===
++
+This value determines how often the inactive shard remover will run
+
+`ttlSeconds`::
++
+[%autowidth,frame=none]
+|===
+|Optional |Default: `900` Seconds
+|===
++
+This value defines the minimum period of time that a Shard must be INACTIVE before it is considered for deletion.
+
+`maxDeletesPerCycle`::
++
+[%autowidth,frame=none]
+|===
+|Optional |Default: `20`
+|===
++
+This is the maximum number of shards that will be deleted each time the inactive shard remover runs.
+If there are more Shards that could be deleted, they will be considered during the next cycle.
diff --git a/solr/test-framework/src/java/org/apache/solr/cloud/ShardTestUtil.java b/solr/test-framework/src/java/org/apache/solr/cloud/ShardTestUtil.java
new file mode 100644
index 00000000000..4faf9708098
--- /dev/null
+++ b/solr/test-framework/src/java/org/apache/solr/cloud/ShardTestUtil.java
@@ -0,0 +1,57 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.solr.cloud;
+
+import org.apache.solr.client.solrj.cloud.DistributedQueue;
+import org.apache.solr.cloud.overseer.OverseerAction;
+import org.apache.solr.common.MapWriter;
+import org.apache.solr.common.cloud.Slice;
+import org.apache.solr.common.cloud.ZkNodeProps;
+import org.apache.solr.common.cloud.ZkStateReader;
+
+public class ShardTestUtil {
+
+ public static void setSliceState(
+ MiniSolrCloudCluster cluster, String collection, String slice, Slice.State state)
+ throws Exception {
+
+ MapWriter m =
+ ew ->
+ ew.put(Overseer.QUEUE_OPERATION, OverseerAction.UPDATESHARDSTATE.toLower())
+ .put(slice, state.toString())
+ .put(ZkStateReader.COLLECTION_PROP, collection);
+ final Overseer overseer = cluster.getOpenOverseer();
+ if (overseer.getDistributedClusterStateUpdater().isDistributedStateUpdate()) {
+ overseer
+ .getDistributedClusterStateUpdater()
+ .doSingleStateUpdate(
+ DistributedClusterStateUpdater.MutatingCommand.SliceUpdateShardState,
+ new ZkNodeProps(m),
+ cluster.getOpenOverseer().getSolrCloudManager(),
+ cluster.getOpenOverseer().getZkStateReader());
+ } else {
+ DistributedQueue inQueue =
+ cluster
+ .getJettySolrRunner(0)
+ .getCoreContainer()
+ .getZkController()
+ .getOverseer()
+ .getStateUpdateQueue();
+ inQueue.offer(m);
+ }
+ }
+}