You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@solr.apache.org by ge...@apache.org on 2024/02/06 15:57:19 UTC
(solr) branch branch_9_5 updated: SOLR-17149: Fix backup/restore for large collections. (#2243)
This is an automated email from the ASF dual-hosted git repository.
gerlowskija pushed a commit to branch branch_9_5
in repository https://gitbox.apache.org/repos/asf/solr.git
The following commit(s) were added to refs/heads/branch_9_5 by this push:
new 20982b0d96a SOLR-17149: Fix backup/restore for large collections. (#2243)
20982b0d96a is described below
commit 20982b0d96a12dcbbf1b16e58bbe3013a7a8bd79
Author: Pierre Salagnac <82...@users.noreply.github.com>
AuthorDate: Tue Feb 6 16:08:48 2024 +0100
SOLR-17149: Fix backup/restore for large collections. (#2243)
9.4 introduced a new executor used for "expensive" operations, that
relied on a bounded queue. This caused backup operations to fail on
collections large enough to hit this queue capacity.
This commit fixes this problem by making the executor queue unbounded.
---------
Co-authored-by: Pierre Salagnac <ps...@salesforce.com>
Co-authored-by: Jason Gerlowski <ge...@apache.org>
---
solr/CHANGES.txt | 2 ++
solr/core/src/java/org/apache/solr/core/CoreContainer.java | 3 ++-
.../src/java/org/apache/solr/handler/admin/CoreAdminHandler.java | 4 +++-
.../cloud/api/collections/LocalFSCloudIncrementalBackupTest.java | 2 +-
.../src/test/org/apache/solr/gcs/GCSIncrementalBackupTest.java | 2 +-
.../cloud/api/collections/HdfsCloudIncrementalBackupTest.java | 2 +-
.../src/test/org/apache/solr/s3/S3IncrementalBackupTest.java | 2 +-
.../solrj/src/java/org/apache/solr/common/util/ExecutorUtil.java | 9 +++++++--
.../cloud/api/collections/AbstractIncrementalBackupTest.java | 7 +++++--
9 files changed, 23 insertions(+), 10 deletions(-)
diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt
index b4ae5e496bc..2d704dd6ffb 100644
--- a/solr/CHANGES.txt
+++ b/solr/CHANGES.txt
@@ -110,6 +110,8 @@ Bug Fixes
* SOLR-17112: bin/solr script doesn't do ps properly on some systems. (Vincenzo D'Amore via Eric Pugh)
+* SOLR-17149: Backups on collections with too many shards fail due to restrictive Executor queue size (Pierre Salagnac via Jason Gerlowski)
+
Dependency Upgrades
---------------------
* PR#2055: Update org.mockito:mockito* from v5.5.0 to v5.8.0 (solrbot)
diff --git a/solr/core/src/java/org/apache/solr/core/CoreContainer.java b/solr/core/src/java/org/apache/solr/core/CoreContainer.java
index 3d7dff9ed13..86cb52f0710 100644
--- a/solr/core/src/java/org/apache/solr/core/CoreContainer.java
+++ b/solr/core/src/java/org/apache/solr/core/CoreContainer.java
@@ -414,7 +414,8 @@ public class CoreContainer {
new OrderedExecutor(
cfg.getReplayUpdatesThreads(),
ExecutorUtil.newMDCAwareCachedThreadPool(
- cfg.getReplayUpdatesThreads(),
+ cfg.getReplayUpdatesThreads(), // thread count
+ cfg.getReplayUpdatesThreads(), // queue size
new SolrNamedThreadFactory("replayUpdatesExecutor")));
this.appHandlersByConfigSetId = new JerseyAppHandlerCache();
diff --git a/solr/core/src/java/org/apache/solr/handler/admin/CoreAdminHandler.java b/solr/core/src/java/org/apache/solr/handler/admin/CoreAdminHandler.java
index ca67a355b44..d2d823eb190 100644
--- a/solr/core/src/java/org/apache/solr/handler/admin/CoreAdminHandler.java
+++ b/solr/core/src/java/org/apache/solr/handler/admin/CoreAdminHandler.java
@@ -443,7 +443,9 @@ public class CoreAdminHandler extends RequestHandlerBase implements PermissionNa
// We keep the number number of max threads very low to have throttling for expensive tasks
private ExecutorService expensiveExecutor =
ExecutorUtil.newMDCAwareCachedThreadPool(
- 5, new SolrNamedThreadFactory("parallelCoreAdminAPIExpensiveExecutor"));
+ 5,
+ Integer.MAX_VALUE,
+ new SolrNamedThreadFactory("parallelCoreAdminAPIExpensiveExecutor"));
public CoreAdminAsyncTracker() {
HashMap<String, Map<String, TaskObject>> map = new HashMap<>(3, 1.0f);
diff --git a/solr/core/src/test/org/apache/solr/cloud/api/collections/LocalFSCloudIncrementalBackupTest.java b/solr/core/src/test/org/apache/solr/cloud/api/collections/LocalFSCloudIncrementalBackupTest.java
index 06b9ff3e0df..588a990560b 100644
--- a/solr/core/src/test/org/apache/solr/cloud/api/collections/LocalFSCloudIncrementalBackupTest.java
+++ b/solr/core/src/test/org/apache/solr/cloud/api/collections/LocalFSCloudIncrementalBackupTest.java
@@ -80,7 +80,7 @@ public class LocalFSCloudIncrementalBackupTest extends AbstractIncrementalBackup
backupLocation = createTempDir("mybackup").toAbsolutePath().toString();
}
- configureCluster(NUM_SHARDS) // nodes
+ configureCluster(NUM_NODES) // nodes
.addConfig(
"conf1", TEST_PATH().resolve("configsets").resolve("cloud-minimal").resolve("conf"))
.withSolrXml(SOLR_XML.replace("ALLOWPATHS_TEMPLATE_VAL", backupLocation))
diff --git a/solr/modules/gcs-repository/src/test/org/apache/solr/gcs/GCSIncrementalBackupTest.java b/solr/modules/gcs-repository/src/test/org/apache/solr/gcs/GCSIncrementalBackupTest.java
index ac0b15a0187..8dd8f8f4f42 100644
--- a/solr/modules/gcs-repository/src/test/org/apache/solr/gcs/GCSIncrementalBackupTest.java
+++ b/solr/modules/gcs-repository/src/test/org/apache/solr/gcs/GCSIncrementalBackupTest.java
@@ -73,7 +73,7 @@ public class GCSIncrementalBackupTest extends AbstractIncrementalBackupTest {
@BeforeClass
public static void setupClass() throws Exception {
- configureCluster(NUM_SHARDS) // nodes
+ configureCluster(NUM_NODES) // nodes
.addConfig("conf1", getFile("conf/solrconfig.xml").getParentFile().toPath())
.withSolrXml(SOLR_XML)
.configure();
diff --git a/solr/modules/hdfs/src/test/org/apache/solr/hdfs/cloud/api/collections/HdfsCloudIncrementalBackupTest.java b/solr/modules/hdfs/src/test/org/apache/solr/hdfs/cloud/api/collections/HdfsCloudIncrementalBackupTest.java
index 9ae15f86e0b..5d78be2a423 100644
--- a/solr/modules/hdfs/src/test/org/apache/solr/hdfs/cloud/api/collections/HdfsCloudIncrementalBackupTest.java
+++ b/solr/modules/hdfs/src/test/org/apache/solr/hdfs/cloud/api/collections/HdfsCloudIncrementalBackupTest.java
@@ -122,7 +122,7 @@ public class HdfsCloudIncrementalBackupTest extends AbstractIncrementalBackupTes
System.setProperty("solr.hdfs.home", hdfsUri + "/solr");
useFactory("solr.StandardDirectoryFactory");
- configureCluster(NUM_SHARDS) // nodes
+ configureCluster(NUM_NODES) // nodes
.addConfig(
"conf1", TEST_PATH().resolve("configsets").resolve("cloud-minimal").resolve("conf"))
.withSolrXml(SOLR_XML)
diff --git a/solr/modules/s3-repository/src/test/org/apache/solr/s3/S3IncrementalBackupTest.java b/solr/modules/s3-repository/src/test/org/apache/solr/s3/S3IncrementalBackupTest.java
index d76daa270f1..ffe0a7dfb8b 100644
--- a/solr/modules/s3-repository/src/test/org/apache/solr/s3/S3IncrementalBackupTest.java
+++ b/solr/modules/s3-repository/src/test/org/apache/solr/s3/S3IncrementalBackupTest.java
@@ -93,7 +93,7 @@ public class S3IncrementalBackupTest extends AbstractIncrementalBackupTest {
System.setProperty("aws.accessKeyId", "foo");
System.setProperty("aws.secretAccessKey", "bar");
- configureCluster(NUM_SHARDS) // nodes
+ configureCluster(NUM_NODES) // nodes
.addConfig("conf1", getFile("conf/solrconfig.xml").getParentFile().toPath())
.withSolrXml(
SOLR_XML
diff --git a/solr/solrj/src/java/org/apache/solr/common/util/ExecutorUtil.java b/solr/solrj/src/java/org/apache/solr/common/util/ExecutorUtil.java
index cfa7b176d50..16d67c7c15d 100644
--- a/solr/solrj/src/java/org/apache/solr/common/util/ExecutorUtil.java
+++ b/solr/solrj/src/java/org/apache/solr/common/util/ExecutorUtil.java
@@ -168,9 +168,14 @@ public class ExecutorUtil {
}
public static ExecutorService newMDCAwareCachedThreadPool(
- int maxThreads, ThreadFactory threadFactory) {
+ int maxThreads, int queueCapacity, ThreadFactory threadFactory) {
return new MDCAwareThreadPoolExecutor(
- 0, maxThreads, 60L, TimeUnit.SECONDS, new LinkedBlockingQueue<>(maxThreads), threadFactory);
+ 0,
+ maxThreads,
+ 60L,
+ TimeUnit.SECONDS,
+ new LinkedBlockingQueue<>(queueCapacity),
+ threadFactory);
}
@SuppressForbidden(reason = "class customizes ThreadPoolExecutor so it can be used instead")
diff --git a/solr/test-framework/src/java/org/apache/solr/cloud/api/collections/AbstractIncrementalBackupTest.java b/solr/test-framework/src/java/org/apache/solr/cloud/api/collections/AbstractIncrementalBackupTest.java
index 03acadab7ff..79a5f59d12f 100644
--- a/solr/test-framework/src/java/org/apache/solr/cloud/api/collections/AbstractIncrementalBackupTest.java
+++ b/solr/test-framework/src/java/org/apache/solr/cloud/api/collections/AbstractIncrementalBackupTest.java
@@ -88,7 +88,9 @@ public abstract class AbstractIncrementalBackupTest extends SolrCloudTestCase {
private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
private static long docsSeed; // see indexDocs()
+ protected static final int NUM_NODES = 2;
protected static final int NUM_SHARDS = 2; // granted we sometimes shard split to get more
+ protected static final int LARGE_NUM_SHARDS = 11; // Periodically chosen via randomization
protected static final int REPL_FACTOR = 2;
protected static final String BACKUPNAME_PREFIX = "mytestbackup";
protected static final String BACKUP_REPO_NAME = "trackingBackupRepository";
@@ -129,10 +131,11 @@ public abstract class AbstractIncrementalBackupTest extends SolrCloudTestCase {
setTestSuffix("testbackupincsimple");
final String backupCollectionName = getCollectionName();
final String restoreCollectionName = backupCollectionName + "_restore";
+ final int randomizedNumShards = rarely() ? LARGE_NUM_SHARDS : NUM_SHARDS;
CloudSolrClient solrClient = cluster.getSolrClient();
- CollectionAdminRequest.createCollection(backupCollectionName, "conf1", NUM_SHARDS, 1)
+ CollectionAdminRequest.createCollection(backupCollectionName, "conf1", randomizedNumShards, 1)
.process(solrClient);
int totalIndexedDocs = indexDocs(backupCollectionName, true);
String backupName = BACKUPNAME_PREFIX + testSuffix;
@@ -166,7 +169,7 @@ public abstract class AbstractIncrementalBackupTest extends SolrCloudTestCase {
log.info("Created backup with {} docs, took {}ms", numFound, timeTaken);
t = System.nanoTime();
- randomlyPrecreateRestoreCollection(restoreCollectionName, "conf1", NUM_SHARDS, 1);
+ randomlyPrecreateRestoreCollection(restoreCollectionName, "conf1", randomizedNumShards, 1);
CollectionAdminRequest.restoreCollection(restoreCollectionName, backupName)
.setBackupId(0)
.setLocation(backupLocation)