You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@solr.apache.org by ge...@apache.org on 2024/02/06 15:08:56 UTC

(solr) branch main updated: SOLR-17149: Fix backup/restore for large collections. (#2243)

This is an automated email from the ASF dual-hosted git repository.

gerlowskija pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/solr.git


The following commit(s) were added to refs/heads/main by this push:
     new c98a8f655ce SOLR-17149: Fix backup/restore for large collections. (#2243)
c98a8f655ce is described below

commit c98a8f655ceca19ff641bbf675dad74b42df1264
Author: Pierre Salagnac <82...@users.noreply.github.com>
AuthorDate: Tue Feb 6 16:08:48 2024 +0100

    SOLR-17149: Fix backup/restore for large collections. (#2243)
    
    9.4 introduced a new executor used for "expensive" operations, that
    relied on a bounded queue.  This caused backup operations to fail on
    collections large enough to hit this queue capacity.
    
    This commit fixes this problem by making the executor queue unbounded.
    
    ---------
    
    Co-authored-by: Pierre Salagnac <ps...@salesforce.com>
    Co-authored-by: Jason Gerlowski <ge...@apache.org>
---
 solr/CHANGES.txt                                                 | 2 ++
 solr/core/src/java/org/apache/solr/core/CoreContainer.java       | 3 ++-
 .../src/java/org/apache/solr/handler/admin/CoreAdminHandler.java | 4 +++-
 .../cloud/api/collections/LocalFSCloudIncrementalBackupTest.java | 2 +-
 .../src/test/org/apache/solr/gcs/GCSIncrementalBackupTest.java   | 2 +-
 .../cloud/api/collections/HdfsCloudIncrementalBackupTest.java    | 2 +-
 .../src/test/org/apache/solr/s3/S3IncrementalBackupTest.java     | 2 +-
 .../solrj/src/java/org/apache/solr/common/util/ExecutorUtil.java | 9 +++++++--
 .../cloud/api/collections/AbstractIncrementalBackupTest.java     | 7 +++++--
 9 files changed, 23 insertions(+), 10 deletions(-)

diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt
index a6b4567eaa7..f7b8018b52b 100644
--- a/solr/CHANGES.txt
+++ b/solr/CHANGES.txt
@@ -222,6 +222,8 @@ Bug Fixes
 
 * SOLR-17112: bin/solr script doesn't do ps properly on some systems. (Vincenzo D'Amore via Eric Pugh)
 
+* SOLR-17149: Backups on collections with too many shards fail due to restrictive Executor queue size (Pierre Salagnac via Jason Gerlowski)
+
 Dependency Upgrades
 ---------------------
 * SOLR-17012: Update Apache Hadoop to 3.3.6 and Apache Curator to 5.5.0 (Kevin Risden)
diff --git a/solr/core/src/java/org/apache/solr/core/CoreContainer.java b/solr/core/src/java/org/apache/solr/core/CoreContainer.java
index 1da0802cf08..8af5ecc272e 100644
--- a/solr/core/src/java/org/apache/solr/core/CoreContainer.java
+++ b/solr/core/src/java/org/apache/solr/core/CoreContainer.java
@@ -412,7 +412,8 @@ public class CoreContainer {
         new OrderedExecutor(
             cfg.getReplayUpdatesThreads(),
             ExecutorUtil.newMDCAwareCachedThreadPool(
-                cfg.getReplayUpdatesThreads(),
+                cfg.getReplayUpdatesThreads(), // thread count
+                cfg.getReplayUpdatesThreads(), // queue size
                 new SolrNamedThreadFactory("replayUpdatesExecutor")));
     this.appHandlersByConfigSetId = new JerseyAppHandlerCache();
 
diff --git a/solr/core/src/java/org/apache/solr/handler/admin/CoreAdminHandler.java b/solr/core/src/java/org/apache/solr/handler/admin/CoreAdminHandler.java
index ca67a355b44..d2d823eb190 100644
--- a/solr/core/src/java/org/apache/solr/handler/admin/CoreAdminHandler.java
+++ b/solr/core/src/java/org/apache/solr/handler/admin/CoreAdminHandler.java
@@ -443,7 +443,9 @@ public class CoreAdminHandler extends RequestHandlerBase implements PermissionNa
     // We keep the number number of max threads very low to have throttling for expensive tasks
     private ExecutorService expensiveExecutor =
         ExecutorUtil.newMDCAwareCachedThreadPool(
-            5, new SolrNamedThreadFactory("parallelCoreAdminAPIExpensiveExecutor"));
+            5,
+            Integer.MAX_VALUE,
+            new SolrNamedThreadFactory("parallelCoreAdminAPIExpensiveExecutor"));
 
     public CoreAdminAsyncTracker() {
       HashMap<String, Map<String, TaskObject>> map = new HashMap<>(3, 1.0f);
diff --git a/solr/core/src/test/org/apache/solr/cloud/api/collections/LocalFSCloudIncrementalBackupTest.java b/solr/core/src/test/org/apache/solr/cloud/api/collections/LocalFSCloudIncrementalBackupTest.java
index 869c90fd67d..06a664aef5a 100644
--- a/solr/core/src/test/org/apache/solr/cloud/api/collections/LocalFSCloudIncrementalBackupTest.java
+++ b/solr/core/src/test/org/apache/solr/cloud/api/collections/LocalFSCloudIncrementalBackupTest.java
@@ -79,7 +79,7 @@ public class LocalFSCloudIncrementalBackupTest extends AbstractIncrementalBackup
       backupLocation = createTempDir("mybackup").toAbsolutePath().toString();
     }
 
-    configureCluster(NUM_SHARDS) // nodes
+    configureCluster(NUM_NODES) // nodes
         .addConfig(
             "conf1", TEST_PATH().resolve("configsets").resolve("cloud-minimal").resolve("conf"))
         .withSolrXml(SOLR_XML.replace("ALLOWPATHS_TEMPLATE_VAL", backupLocation))
diff --git a/solr/modules/gcs-repository/src/test/org/apache/solr/gcs/GCSIncrementalBackupTest.java b/solr/modules/gcs-repository/src/test/org/apache/solr/gcs/GCSIncrementalBackupTest.java
index 83b93b2ce58..4b145c601b4 100644
--- a/solr/modules/gcs-repository/src/test/org/apache/solr/gcs/GCSIncrementalBackupTest.java
+++ b/solr/modules/gcs-repository/src/test/org/apache/solr/gcs/GCSIncrementalBackupTest.java
@@ -72,7 +72,7 @@ public class GCSIncrementalBackupTest extends AbstractIncrementalBackupTest {
   @BeforeClass
   public static void setupClass() throws Exception {
 
-    configureCluster(NUM_SHARDS) // nodes
+    configureCluster(NUM_NODES) // nodes
         .addConfig("conf1", getFile("conf/solrconfig.xml").getParentFile().toPath())
         .withSolrXml(SOLR_XML)
         .configure();
diff --git a/solr/modules/hdfs/src/test/org/apache/solr/hdfs/cloud/api/collections/HdfsCloudIncrementalBackupTest.java b/solr/modules/hdfs/src/test/org/apache/solr/hdfs/cloud/api/collections/HdfsCloudIncrementalBackupTest.java
index aed608ef37b..508e3c42ee4 100644
--- a/solr/modules/hdfs/src/test/org/apache/solr/hdfs/cloud/api/collections/HdfsCloudIncrementalBackupTest.java
+++ b/solr/modules/hdfs/src/test/org/apache/solr/hdfs/cloud/api/collections/HdfsCloudIncrementalBackupTest.java
@@ -121,7 +121,7 @@ public class HdfsCloudIncrementalBackupTest extends AbstractIncrementalBackupTes
     System.setProperty("solr.hdfs.home", hdfsUri + "/solr");
     useFactory("solr.StandardDirectoryFactory");
 
-    configureCluster(NUM_SHARDS) // nodes
+    configureCluster(NUM_NODES) // nodes
         .addConfig(
             "conf1", TEST_PATH().resolve("configsets").resolve("cloud-minimal").resolve("conf"))
         .withSolrXml(SOLR_XML)
diff --git a/solr/modules/s3-repository/src/test/org/apache/solr/s3/S3IncrementalBackupTest.java b/solr/modules/s3-repository/src/test/org/apache/solr/s3/S3IncrementalBackupTest.java
index 7214fe6418b..d10322253cd 100644
--- a/solr/modules/s3-repository/src/test/org/apache/solr/s3/S3IncrementalBackupTest.java
+++ b/solr/modules/s3-repository/src/test/org/apache/solr/s3/S3IncrementalBackupTest.java
@@ -94,7 +94,7 @@ public class S3IncrementalBackupTest extends AbstractIncrementalBackupTest {
 
     AbstractS3ClientTest.setS3ConfFile();
 
-    configureCluster(NUM_SHARDS) // nodes
+    configureCluster(NUM_NODES) // nodes
         .addConfig("conf1", getFile("conf/solrconfig.xml").getParentFile().toPath())
         .withSolrXml(
             SOLR_XML
diff --git a/solr/solrj/src/java/org/apache/solr/common/util/ExecutorUtil.java b/solr/solrj/src/java/org/apache/solr/common/util/ExecutorUtil.java
index f0d1f8bc698..c97658efebd 100644
--- a/solr/solrj/src/java/org/apache/solr/common/util/ExecutorUtil.java
+++ b/solr/solrj/src/java/org/apache/solr/common/util/ExecutorUtil.java
@@ -181,9 +181,14 @@ public class ExecutorUtil {
   }
 
   public static ExecutorService newMDCAwareCachedThreadPool(
-      int maxThreads, ThreadFactory threadFactory) {
+      int maxThreads, int queueCapacity, ThreadFactory threadFactory) {
     return new MDCAwareThreadPoolExecutor(
-        0, maxThreads, 60L, TimeUnit.SECONDS, new LinkedBlockingQueue<>(maxThreads), threadFactory);
+        0,
+        maxThreads,
+        60L,
+        TimeUnit.SECONDS,
+        new LinkedBlockingQueue<>(queueCapacity),
+        threadFactory);
   }
 
   @SuppressForbidden(reason = "class customizes ThreadPoolExecutor so it can be used instead")
diff --git a/solr/test-framework/src/java/org/apache/solr/cloud/api/collections/AbstractIncrementalBackupTest.java b/solr/test-framework/src/java/org/apache/solr/cloud/api/collections/AbstractIncrementalBackupTest.java
index 03acadab7ff..79a5f59d12f 100644
--- a/solr/test-framework/src/java/org/apache/solr/cloud/api/collections/AbstractIncrementalBackupTest.java
+++ b/solr/test-framework/src/java/org/apache/solr/cloud/api/collections/AbstractIncrementalBackupTest.java
@@ -88,7 +88,9 @@ public abstract class AbstractIncrementalBackupTest extends SolrCloudTestCase {
   private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
 
   private static long docsSeed; // see indexDocs()
+  protected static final int NUM_NODES = 2;
   protected static final int NUM_SHARDS = 2; // granted we sometimes shard split to get more
+  protected static final int LARGE_NUM_SHARDS = 11; // Periodically chosen via randomization
   protected static final int REPL_FACTOR = 2;
   protected static final String BACKUPNAME_PREFIX = "mytestbackup";
   protected static final String BACKUP_REPO_NAME = "trackingBackupRepository";
@@ -129,10 +131,11 @@ public abstract class AbstractIncrementalBackupTest extends SolrCloudTestCase {
     setTestSuffix("testbackupincsimple");
     final String backupCollectionName = getCollectionName();
     final String restoreCollectionName = backupCollectionName + "_restore";
+    final int randomizedNumShards = rarely() ? LARGE_NUM_SHARDS : NUM_SHARDS;
 
     CloudSolrClient solrClient = cluster.getSolrClient();
 
-    CollectionAdminRequest.createCollection(backupCollectionName, "conf1", NUM_SHARDS, 1)
+    CollectionAdminRequest.createCollection(backupCollectionName, "conf1", randomizedNumShards, 1)
         .process(solrClient);
     int totalIndexedDocs = indexDocs(backupCollectionName, true);
     String backupName = BACKUPNAME_PREFIX + testSuffix;
@@ -166,7 +169,7 @@ public abstract class AbstractIncrementalBackupTest extends SolrCloudTestCase {
       log.info("Created backup with {} docs, took {}ms", numFound, timeTaken);
 
       t = System.nanoTime();
-      randomlyPrecreateRestoreCollection(restoreCollectionName, "conf1", NUM_SHARDS, 1);
+      randomlyPrecreateRestoreCollection(restoreCollectionName, "conf1", randomizedNumShards, 1);
       CollectionAdminRequest.restoreCollection(restoreCollectionName, backupName)
           .setBackupId(0)
           .setLocation(backupLocation)