You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@solr.apache.org by tf...@apache.org on 2023/06/28 21:59:04 UTC

[solr] branch branch_9x updated: SOLR-16844: Optionally skip backup of configset files (#1733)

This is an automated email from the ASF dual-hosted git repository.

tflobbe pushed a commit to branch branch_9x
in repository https://gitbox.apache.org/repos/asf/solr.git


The following commit(s) were added to refs/heads/branch_9x by this push:
     new 361cee3fab1 SOLR-16844: Optionally skip backup of configset files (#1733)
361cee3fab1 is described below

commit 361cee3fab1a3da6a676988d12aed670de030e76
Author: Tomas Eduardo Fernandez Lobbe <tf...@apache.org>
AuthorDate: Wed Jun 28 14:52:14 2023 -0700

    SOLR-16844: Optionally skip backup of configset files (#1733)
---
 solr/CHANGES.txt                                   |   2 +
 .../solr/cloud/api/collections/BackupCmd.java      |   9 +-
 .../solr/cloud/api/collections/RestoreCmd.java     |   6 +-
 .../org/apache/solr/core/backup/BackupManager.java |   7 +-
 .../admin/api/CreateCollectionBackupAPI.java       |   3 +
 .../pages/collection-management.adoc               |   9 ++
 .../solrj/request/CollectionAdminRequest.java      |  18 ++++
 .../apache/solr/common/params/CoreAdminParams.java |   3 +
 .../collections/AbstractIncrementalBackupTest.java | 108 +++++++++++++++++++++
 .../apache/solr/core/TrackingBackupRepository.java |  15 +++
 10 files changed, 174 insertions(+), 6 deletions(-)

diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt
index 1e43fe964ba..2706bb1b4cc 100644
--- a/solr/CHANGES.txt
+++ b/solr/CHANGES.txt
@@ -27,6 +27,8 @@ New Features
 
 * SOLR-16827: Add min/max scaling to the reranker (Joel Bernstein)
 
+* SOLR-16844: Added new parameter `backupConfigset` to collection Backup to optionally skip backing up configsets. (Tomás Fernández Löbbe)
+
 Improvements
 ---------------------
 
diff --git a/solr/core/src/java/org/apache/solr/cloud/api/collections/BackupCmd.java b/solr/core/src/java/org/apache/solr/cloud/api/collections/BackupCmd.java
index 5499767d69d..5d4aaf7fd79 100644
--- a/solr/core/src/java/org/apache/solr/cloud/api/collections/BackupCmd.java
+++ b/solr/core/src/java/org/apache/solr/cloud/api/collections/BackupCmd.java
@@ -84,6 +84,7 @@ public class BackupCmd implements CollApiCmds.CollectionApiCommand {
     String backupName = message.getStr(NAME);
     String repo = message.getStr(CoreAdminParams.BACKUP_REPOSITORY);
     boolean incremental = message.getBool(CoreAdminParams.BACKUP_INCREMENTAL, true);
+    boolean backupConfigset = message.getBool(CoreAdminParams.BACKUP_CONFIGSET, true);
     String configName =
         ccc.getSolrCloudManager()
             .getClusterStateProvider()
@@ -145,8 +146,12 @@ public class BackupCmd implements CollApiCmds.CollectionApiCommand {
 
       log.info("Starting to backup ZK data for backupName={}", backupName);
 
-      // Download the configs
-      backupMgr.downloadConfigDir(configName, cc.getConfigSetService());
+      backupMgr.createZkStateDir();
+
+      if (backupConfigset) {
+        // Download the configs
+        backupMgr.downloadConfigDir(configName, cc.getConfigSetService());
+      }
 
       // Save the collection's state. Can be part of the monolithic clusterstate.json or a
       // individual state.json. Since we don't want to distinguish we extract the state and back it
diff --git a/solr/core/src/java/org/apache/solr/cloud/api/collections/RestoreCmd.java b/solr/core/src/java/org/apache/solr/cloud/api/collections/RestoreCmd.java
index 11bff6e23d5..2140078ed1e 100644
--- a/solr/core/src/java/org/apache/solr/cloud/api/collections/RestoreCmd.java
+++ b/solr/core/src/java/org/apache/solr/cloud/api/collections/RestoreCmd.java
@@ -321,12 +321,14 @@ public class RestoreCmd implements CollApiCmds.CollectionApiCommand {
         ConfigSetService configSetService)
         throws IOException {
       if (configSetService.checkConfigExists(restoreConfigName)) {
-        log.warn(
+        log.info(
             "Config with name {} already exists. Skipping upload to Zookeeper and using existing config.",
             restoreConfigName);
         // TODO add overwrite option?
       } else {
-        log.info("Uploading config {}", restoreConfigName);
+        log.info(
+            "Config with name {} does not already exist in ZooKeeper. Will restore from Backup.",
+            restoreConfigName);
 
         backupMgr.uploadConfigDir(configName, restoreConfigName, configSetService);
       }
diff --git a/solr/core/src/java/org/apache/solr/core/backup/BackupManager.java b/solr/core/src/java/org/apache/solr/core/backup/BackupManager.java
index 806ba1f6b69..e193928214a 100644
--- a/solr/core/src/java/org/apache/solr/core/backup/BackupManager.java
+++ b/solr/core/src/java/org/apache/solr/core/backup/BackupManager.java
@@ -254,7 +254,7 @@ public class BackupManager {
       throws IOException {
     URI source = repository.resolveDirectory(getZkStateDir(), CONFIG_STATE_DIR, sourceConfigName);
     if (!repository.exists(source)) {
-      throw new IllegalArgumentException("Path " + source + " does not exist");
+      throw new IllegalArgumentException("Configset expected at " + source + " does not exist");
     }
     uploadConfigToSolrCloud(configSetService, source, targetConfigName, "");
   }
@@ -270,7 +270,6 @@ public class BackupManager {
   public void downloadConfigDir(String configName, ConfigSetService configSetService)
       throws IOException {
     URI dest = repository.resolveDirectory(getZkStateDir(), CONFIG_STATE_DIR, configName);
-    repository.createDirectory(getZkStateDir());
     repository.createDirectory(repository.resolveDirectory(getZkStateDir(), CONFIG_STATE_DIR));
     repository.createDirectory(dest);
 
@@ -404,4 +403,8 @@ public class BackupManager {
     }
     return zkStateDir;
   }
+
+  public void createZkStateDir() throws IOException {
+    repository.createDirectory(getZkStateDir());
+  }
 }
diff --git a/solr/core/src/java/org/apache/solr/handler/admin/api/CreateCollectionBackupAPI.java b/solr/core/src/java/org/apache/solr/handler/admin/api/CreateCollectionBackupAPI.java
index 01cb621ac4b..4a5f1991fde 100644
--- a/solr/core/src/java/org/apache/solr/handler/admin/api/CreateCollectionBackupAPI.java
+++ b/solr/core/src/java/org/apache/solr/handler/admin/api/CreateCollectionBackupAPI.java
@@ -24,6 +24,7 @@ import static org.apache.solr.common.params.CollectionAdminParams.FOLLOW_ALIASES
 import static org.apache.solr.common.params.CollectionAdminParams.INDEX_BACKUP_STRATEGY;
 import static org.apache.solr.common.params.CommonAdminParams.ASYNC;
 import static org.apache.solr.common.params.CommonParams.NAME;
+import static org.apache.solr.common.params.CoreAdminParams.BACKUP_CONFIGSET;
 import static org.apache.solr.common.params.CoreAdminParams.BACKUP_INCREMENTAL;
 import static org.apache.solr.common.params.CoreAdminParams.BACKUP_LOCATION;
 import static org.apache.solr.common.params.CoreAdminParams.BACKUP_REPOSITORY;
@@ -162,6 +163,7 @@ public class CreateCollectionBackupAPI extends BackupAPIBase {
     requestBody.backupStrategy = params.get(INDEX_BACKUP_STRATEGY);
     requestBody.snapshotName = params.get(COMMIT_NAME);
     requestBody.incremental = params.getBool(BACKUP_INCREMENTAL);
+    requestBody.backupConfigset = params.getBool(BACKUP_CONFIGSET);
     requestBody.maxNumBackupPoints = params.getInt(MAX_NUM_BACKUP_POINTS);
     requestBody.async = params.get(ASYNC);
 
@@ -186,6 +188,7 @@ public class CreateCollectionBackupAPI extends BackupAPIBase {
     @JsonProperty public String backupStrategy;
     @JsonProperty public String snapshotName;
     @JsonProperty public Boolean incremental;
+    @JsonProperty public Boolean backupConfigset;
     @JsonProperty public Integer maxNumBackupPoints;
     @JsonProperty public String async;
   }
diff --git a/solr/solr-ref-guide/modules/deployment-guide/pages/collection-management.adoc b/solr/solr-ref-guide/modules/deployment-guide/pages/collection-management.adoc
index 61e987509c6..12137859df3 100644
--- a/solr/solr-ref-guide/modules/deployment-guide/pages/collection-management.adoc
+++ b/solr/solr-ref-guide/modules/deployment-guide/pages/collection-management.adoc
@@ -1664,6 +1664,15 @@ The upper-bound on how many backups should be retained at the backup location.
 If the current number exceeds this bound, older backups will be deleted until only `maxNumBackupPoints` backups remain.
 This parameter has no effect if `incremental=false` is specified.
 
+`backupConfigset`::
++
+[%autowidth,frame=none]
+|===
+|Optional |Default: true
+|===
++
+Indicates if configset files should be included with the index backup or not. Note that in order to restore a collection, the configset must either exist in ZooKeeper or be part of the backup. Only set this to `false` if you can restore configsets by other means external to Solr (i.e. you have it stored with your application source code, is part of your ZooKeeper backups, etc).
+
 `incremental`::
 +
 [%autowidth,frame=none]
diff --git a/solr/solrj/src/java/org/apache/solr/client/solrj/request/CollectionAdminRequest.java b/solr/solrj/src/java/org/apache/solr/client/solrj/request/CollectionAdminRequest.java
index 83dbd1fae8b..c7814aab55b 100644
--- a/solr/solrj/src/java/org/apache/solr/client/solrj/request/CollectionAdminRequest.java
+++ b/solr/solrj/src/java/org/apache/solr/client/solrj/request/CollectionAdminRequest.java
@@ -1110,6 +1110,7 @@ public abstract class CollectionAdminRequest<T extends CollectionAdminResponse>
     protected Optional<String> indexBackupStrategy = Optional.empty();
     protected boolean incremental = true;
     protected Optional<Integer> maxNumBackupPoints = Optional.empty();
+    protected boolean backupConfigset = true;
 
     public Backup(String collection, String name) {
       super(CollectionAction.BACKUP, collection);
@@ -1188,6 +1189,22 @@ public abstract class CollectionAdminRequest<T extends CollectionAdminResponse>
       return this;
     }
 
+    /**
+     * Indicates weather the Backup operation should also backup the configset files (such as
+     * schema.xml, solrconfig.xml. etc).
+     *
+     * <p>Note that the configset is needed to restore the collection, so only set this to false if
+     * you know the cluster will have the configset available before restoring.
+     *
+     * @param backupConfigset {@code true} if you want to backup configsets (default). {@code false}
+     *     to skip configset files.
+     * @return {@code this} builder
+     */
+    public Backup setBackupConfigset(boolean backupConfigset) {
+      this.backupConfigset = backupConfigset;
+      return this;
+    }
+
     @Override
     public SolrParams getParams() {
       ModifiableSolrParams params = (ModifiableSolrParams) super.getParams();
@@ -1207,6 +1224,7 @@ public abstract class CollectionAdminRequest<T extends CollectionAdminResponse>
         params.set(CoreAdminParams.MAX_NUM_BACKUP_POINTS, maxNumBackupPoints.get());
       }
       params.set(CoreAdminParams.BACKUP_INCREMENTAL, incremental);
+      params.set(CoreAdminParams.BACKUP_CONFIGSET, backupConfigset);
       return params;
     }
   }
diff --git a/solr/solrj/src/java/org/apache/solr/common/params/CoreAdminParams.java b/solr/solrj/src/java/org/apache/solr/common/params/CoreAdminParams.java
index 59f184a92c8..4d9427f31bb 100644
--- a/solr/solrj/src/java/org/apache/solr/common/params/CoreAdminParams.java
+++ b/solr/solrj/src/java/org/apache/solr/common/params/CoreAdminParams.java
@@ -150,6 +150,9 @@ public abstract class CoreAdminParams {
   /** A boolean parameter specifying if a core is being created as part of a new collection */
   public static final String NEW_COLLECTION = "newCollection";
 
+  /** A parameter to specify if Configsets should be included in the backup or not */
+  public static final String BACKUP_CONFIGSET = "backupConfigset";
+
   /**
    * Tells the CoreAdminHandler that the new Core will be a replica of a particular {@link
    * org.apache.solr.common.cloud.Replica.Type}
diff --git a/solr/test-framework/src/java/org/apache/solr/cloud/api/collections/AbstractIncrementalBackupTest.java b/solr/test-framework/src/java/org/apache/solr/cloud/api/collections/AbstractIncrementalBackupTest.java
index 43bd98e378a..3e8a8d9dbae 100644
--- a/solr/test-framework/src/java/org/apache/solr/cloud/api/collections/AbstractIncrementalBackupTest.java
+++ b/solr/test-framework/src/java/org/apache/solr/cloud/api/collections/AbstractIncrementalBackupTest.java
@@ -348,6 +348,114 @@ public abstract class AbstractIncrementalBackupTest extends SolrCloudTestCase {
     }
   }
 
+  @Test
+  public void testSkipConfigset() throws Exception {
+    setTestSuffix("testskipconfigset");
+    final String backupCollectionName = getCollectionName();
+    final String restoreCollectionName = backupCollectionName + "-restore";
+
+    CloudSolrClient solrClient = cluster.getSolrClient();
+
+    CollectionAdminRequest.createCollection(backupCollectionName, "conf1", NUM_SHARDS, 1)
+        .process(solrClient);
+    int numDocs = indexDocs(backupCollectionName, true);
+    String backupName = BACKUPNAME_PREFIX + testSuffix;
+    try (BackupRepository repository =
+        cluster.getJettySolrRunner(0).getCoreContainer().newBackupRepository(BACKUP_REPO_NAME)) {
+      String backupLocation = repository.getBackupLocation(getBackupLocation());
+      CollectionAdminRequest.backupCollection(backupCollectionName, backupName)
+          .setLocation(backupLocation)
+          .setBackupConfigset(false)
+          .setRepositoryName(BACKUP_REPO_NAME)
+          .processAndWait(cluster.getSolrClient(), 100);
+
+      assertFalse(
+          "Configset shouldn't be part of the backup but found:\n"
+              + TrackingBackupRepository.directoriesCreated().stream()
+                  .map(URI::toString)
+                  .collect(Collectors.joining("\n")),
+          TrackingBackupRepository.directoriesCreated().stream()
+              .anyMatch(f -> f.getPath().contains("configs/conf1")));
+      assertFalse(
+          "Configset shouldn't be part of the backup but found:\n"
+              + TrackingBackupRepository.outputsCreated().stream()
+                  .map(URI::toString)
+                  .collect(Collectors.joining("\n")),
+          TrackingBackupRepository.outputsCreated().stream()
+              .anyMatch(f -> f.getPath().contains("configs/conf1")));
+      assertFalse(
+          "solrconfig.xml shouldn't be part of the backup but found:\n"
+              + TrackingBackupRepository.outputsCreated().stream()
+                  .map(URI::toString)
+                  .collect(Collectors.joining("\n")),
+          TrackingBackupRepository.outputsCreated().stream()
+              .anyMatch(f -> f.getPath().contains("solrconfig.xml")));
+      assertFalse(
+          "schema.xml shouldn't be part of the backup but found:\n"
+              + TrackingBackupRepository.outputsCreated().stream()
+                  .map(URI::toString)
+                  .collect(Collectors.joining("\n")),
+          TrackingBackupRepository.outputsCreated().stream()
+              .anyMatch(f -> f.getPath().contains("schema.xml")));
+
+      CollectionAdminRequest.restoreCollection(restoreCollectionName, backupName)
+          .setLocation(backupLocation)
+          .setRepositoryName(BACKUP_REPO_NAME)
+          .processAndWait(solrClient, 500);
+
+      AbstractDistribZkTestBase.waitForRecoveriesToFinish(
+          restoreCollectionName, ZkStateReader.from(solrClient), log.isDebugEnabled(), false, 3);
+      assertEquals(
+          numDocs,
+          cluster
+              .getSolrClient()
+              .query(restoreCollectionName, new SolrQuery("*:*"))
+              .getResults()
+              .getNumFound());
+    }
+
+    TrackingBackupRepository.clear();
+
+    try (BackupRepository repository =
+        cluster.getJettySolrRunner(0).getCoreContainer().newBackupRepository(BACKUP_REPO_NAME)) {
+      String backupLocation = repository.getBackupLocation(getBackupLocation());
+      CollectionAdminRequest.backupCollection(backupCollectionName, backupName)
+          .setLocation(backupLocation)
+          .setBackupConfigset(true)
+          .setRepositoryName(BACKUP_REPO_NAME)
+          .processAndWait(cluster.getSolrClient(), 100);
+
+      assertTrue(
+          "Configset should be part of the backup but not found:\n"
+              + TrackingBackupRepository.directoriesCreated().stream()
+                  .map(URI::toString)
+                  .collect(Collectors.joining("\n")),
+          TrackingBackupRepository.directoriesCreated().stream()
+              .anyMatch(f -> f.getPath().contains("configs/conf1")));
+      assertTrue(
+          "Configset should be part of the backup but not found:\n"
+              + TrackingBackupRepository.outputsCreated().stream()
+                  .map(URI::toString)
+                  .collect(Collectors.joining("\n")),
+          TrackingBackupRepository.outputsCreated().stream()
+              .anyMatch(f -> f.getPath().contains("configs/conf1")));
+      assertTrue(
+          "solrconfig.xml should be part of the backup but not found:\n"
+              + TrackingBackupRepository.outputsCreated().stream()
+                  .map(URI::toString)
+                  .collect(Collectors.joining("\n")),
+          TrackingBackupRepository.outputsCreated().stream()
+              .anyMatch(f -> f.getPath().contains("solrconfig.xml")));
+      assertTrue(
+          "schema.xml should be part of the backup but not found:\n"
+              + TrackingBackupRepository.outputsCreated().stream()
+                  .map(URI::toString)
+                  .collect(Collectors.joining("\n")),
+          TrackingBackupRepository.outputsCreated().stream()
+              .anyMatch(f -> f.getPath().contains("schema.xml")));
+    }
+  }
+
   protected void corruptIndexFiles() throws IOException {
     List<Slice> slices = new ArrayList<>(getCollectionState(getCollectionName()).getSlices());
     Replica leader = slices.get(random().nextInt(slices.size())).getLeader();
diff --git a/solr/test-framework/src/java/org/apache/solr/core/TrackingBackupRepository.java b/solr/test-framework/src/java/org/apache/solr/core/TrackingBackupRepository.java
index e02b16dcffb..5e28d7aff62 100644
--- a/solr/test-framework/src/java/org/apache/solr/core/TrackingBackupRepository.java
+++ b/solr/test-framework/src/java/org/apache/solr/core/TrackingBackupRepository.java
@@ -34,6 +34,9 @@ import org.apache.solr.core.backup.repository.BackupRepositoryFactory;
 
 public class TrackingBackupRepository implements BackupRepository {
   private static final List<URI> COPIED_FILES = Collections.synchronizedList(new ArrayList<>());
+  private static final List<URI> DIRECTORIES_CREATED =
+      Collections.synchronizedList(new ArrayList<>());
+  private static final List<URI> OUTPUTS_CREATED = Collections.synchronizedList(new ArrayList<>());
 
   private BackupRepository delegate;
 
@@ -84,11 +87,13 @@ public class TrackingBackupRepository implements BackupRepository {
 
   @Override
   public OutputStream createOutput(URI path) throws IOException {
+    OUTPUTS_CREATED.add(path);
     return delegate.createOutput(path);
   }
 
   @Override
   public void createDirectory(URI path) throws IOException {
+    DIRECTORIES_CREATED.add(path);
     delegate.createDirectory(path);
   }
 
@@ -136,9 +141,19 @@ public class TrackingBackupRepository implements BackupRepository {
     return new ArrayList<>(COPIED_FILES);
   }
 
+  public static List<URI> directoriesCreated() {
+    return new ArrayList<>(DIRECTORIES_CREATED);
+  }
+
+  public static List<URI> outputsCreated() {
+    return new ArrayList<>(OUTPUTS_CREATED);
+  }
+
   /** Clear all tracking data */
   public static void clear() {
     COPIED_FILES.clear();
+    DIRECTORIES_CREATED.clear();
+    OUTPUTS_CREATED.clear();
   }
 
   @Override