You are viewing a plain text version of this content. The canonical link for it is here.
Posted to issues@solr.apache.org by GitBox <gi...@apache.org> on 2021/08/18 22:39:47 UTC

[GitHub] [solr-operator] HoustonPutman commented on a change in pull request #302: Allow users to take GCS-based backups

HoustonPutman commented on a change in pull request #302:
URL: https://github.com/apache/solr-operator/pull/302#discussion_r691586870



##########
File path: api/v1beta1/solrbackup_types.go
##########
@@ -38,12 +38,16 @@ type SolrBackupSpec struct {
 	// A reference to the SolrCloud to create a backup for
 	SolrCloud string `json:"solrCloud"`
 
+	// The name of the repository to use for the backup.  Defaults to "legacy_local_repository" if not specified (the
+	// auto-configured repository for legacy singleton volumes).
+	RepositoryName string `json:"repositoryName,omitempty"`

Review comment:
       ```suggestion
   	// +optional
   	RepositoryName string `json:"repositoryName,omitempty"`
   ```

##########
File path: api/v1beta1/solrcloud_types.go
##########
@@ -325,20 +329,158 @@ type SolrEphemeralDataStorageOptions struct {
 	EmptyDir *corev1.EmptyDirVolumeSource `json:"emptyDir,omitempty"`
 }
 
+func (backupOptions *SolrBackupRestoreOptions) IsManaged() bool {
+	return true
+}
+
+func (backupOptions *SolrBackupRestoreOptions) GetVolumeSource() corev1.VolumeSource {
+	return *backupOptions.Volume
+}
+
+func (backupOptions *SolrBackupRestoreOptions) GetDirectory() string {
+	return backupOptions.Directory
+}
+
+func (backupOptions *SolrBackupRestoreOptions) GetSolrMountPath() string {
+	return BaseBackupRestorePath
+}
+
+func (backupOptions *SolrBackupRestoreOptions) GetInitPodMountPath() string {
+	return BackupRestoreInitContainerPath
+}
+
+func (backupOptions *SolrBackupRestoreOptions) GetBackupPath(backupName string) string {
+	return backupOptions.GetSolrMountPath() + "/backups/" + backupName
+}
+
+func (backupOptions *SolrBackupRestoreOptions) GetVolumeName() string {
+	return BackupRestoreVolume
+}
+
 type SolrBackupRestoreOptions struct {
+	// TODO Do we need to support this (Volume) here for backcompat, or can it live exclusively in ManagedStorage?

Review comment:
       Volume and Directory do need to stay for back-compat, but we can deprecate them and auto-move them over to the new location.

##########
File path: controllers/util/backup_util.go
##########
@@ -440,3 +483,13 @@ func RunExecForPod(podName string, namespace string, command []string, config re
 
 	return nil
 }
+
+// TODO Is the Mozilla license on hashicorp's go-version lib we're using here acceptable by Apache?  Is there a better option?
+func SupportsGcsBackups(versionStr string) (bool, error) {

Review comment:
       I'm not sure we even need to do this.... We should definitely **heavily** document the constraint, but there is no guarantee that the tag or version of the solr image will reflect this. I would like to ensure we never infer the version of a solr image by its tag, I think that can get us into a tricky place.

##########
File path: api/v1beta1/solrbackup_types.go
##########
@@ -38,12 +38,16 @@ type SolrBackupSpec struct {
 	// A reference to the SolrCloud to create a backup for
 	SolrCloud string `json:"solrCloud"`
 
+	// The name of the repository to use for the backup.  Defaults to "legacy_local_repository" if not specified (the
+	// auto-configured repository for legacy singleton volumes).
+	RepositoryName string `json:"repositoryName,omitempty"`
+
 	// The list of collections to backup. If empty, all collections in the cloud will be backed up.
 	// +optional
 	Collections []string `json:"collections,omitempty"`
 
 	// Persistence is the specification on how to persist the backup data.
-	Persistence PersistenceSource `json:"persistence"`
+	Persistence PersistenceSource `json:"persistence,omitempty"`

Review comment:
       ```suggestion
   	// +optional
   	Persistence *PersistenceSource `json:"persistence,omitempty"`
   ```
   
   This will make the field _more_ optional. Though it will require code changes, since this is a pointer now.

##########
File path: api/v1beta1/solrcloud_types.go
##########
@@ -325,20 +329,158 @@ type SolrEphemeralDataStorageOptions struct {
 	EmptyDir *corev1.EmptyDirVolumeSource `json:"emptyDir,omitempty"`
 }
 
+func (backupOptions *SolrBackupRestoreOptions) IsManaged() bool {
+	return true
+}
+
+func (backupOptions *SolrBackupRestoreOptions) GetVolumeSource() corev1.VolumeSource {
+	return *backupOptions.Volume
+}
+
+func (backupOptions *SolrBackupRestoreOptions) GetDirectory() string {
+	return backupOptions.Directory
+}
+
+func (backupOptions *SolrBackupRestoreOptions) GetSolrMountPath() string {
+	return BaseBackupRestorePath
+}
+
+func (backupOptions *SolrBackupRestoreOptions) GetInitPodMountPath() string {
+	return BackupRestoreInitContainerPath
+}
+
+func (backupOptions *SolrBackupRestoreOptions) GetBackupPath(backupName string) string {
+	return backupOptions.GetSolrMountPath() + "/backups/" + backupName
+}
+
+func (backupOptions *SolrBackupRestoreOptions) GetVolumeName() string {
+	return BackupRestoreVolume
+}
+
 type SolrBackupRestoreOptions struct {
+	// TODO Do we need to support this (Volume) here for backcompat, or can it live exclusively in ManagedStorage?
+
 	// This is a volumeSource for a volume that will be mounted to all solrNodes to store backups and load restores.
 	// The data within the volume will be namespaces for this instance, so feel free to use the same volume for multiple clouds.
 	// Since the volume will be mounted to all solrNodes, it must be able to be written from multiple pods.
 	// If a PVC reference is given, the PVC must have `accessModes: - ReadWriteMany`.
 	// Other options are to use a NFS volume.
-	Volume corev1.VolumeSource `json:"volume"`
+	Volume *corev1.VolumeSource `json:"volume,omitempty"`
+
+	// Allows specification of multiple different "repositories" for Solr to use when backing up data to GCS.
+	GcsRepositories *[]GcsStorage `json:"gcsRepositories,omitempty"`
 
+	// Allows specification of multiple different "repositories" for Solr to use when backing up data "locally".
+	// Repositories defined here are considered "managed" and can take advantage of special operator features, such as
+	// post-backup compression.
+	ManagedRepositories *[]ManagedStorage `json:"managedRepositories,omitempty""`
+
+	// TODO Do we need to support this here for backcompat, or can it live exclusively in ManagedStorage
 	// Select a custom directory name to mount the backup/restore data from the given volume.
 	// If not specified, then the name of the solrcloud will be used by default.
 	// +optional
 	Directory string `json:"directory,omitempty"`
 }
 
+type GcsStorage struct {
+	// A name used to identify this GCS storage profile.
+	Name string `json:"name"`
+
+	// The name of the GCS bucket that all backup data will be stored in
+	Bucket string `json:"bucket"`
+
+	// The name of a Kubernetes secret holding a Google cloud service account key
+	GcsCredentialSecret string `json:"gcsCredentialSecret"`
+	// JEGERLOW TODO Should 'baseLocation' be optional?
+	// A chroot within the bucket to store data in.  If specified this should already exist
+	BaseLocation string `json:"baseLocation"`

Review comment:
       yeah, this should be optional.

##########
File path: api/v1beta1/solrcloud_types.go
##########
@@ -325,20 +329,158 @@ type SolrEphemeralDataStorageOptions struct {
 	EmptyDir *corev1.EmptyDirVolumeSource `json:"emptyDir,omitempty"`
 }
 
+func (backupOptions *SolrBackupRestoreOptions) IsManaged() bool {
+	return true
+}
+
+func (backupOptions *SolrBackupRestoreOptions) GetVolumeSource() corev1.VolumeSource {
+	return *backupOptions.Volume
+}
+
+func (backupOptions *SolrBackupRestoreOptions) GetDirectory() string {
+	return backupOptions.Directory
+}
+
+func (backupOptions *SolrBackupRestoreOptions) GetSolrMountPath() string {
+	return BaseBackupRestorePath
+}
+
+func (backupOptions *SolrBackupRestoreOptions) GetInitPodMountPath() string {
+	return BackupRestoreInitContainerPath
+}
+
+func (backupOptions *SolrBackupRestoreOptions) GetBackupPath(backupName string) string {
+	return backupOptions.GetSolrMountPath() + "/backups/" + backupName
+}
+
+func (backupOptions *SolrBackupRestoreOptions) GetVolumeName() string {
+	return BackupRestoreVolume
+}
+
 type SolrBackupRestoreOptions struct {
+	// TODO Do we need to support this (Volume) here for backcompat, or can it live exclusively in ManagedStorage?
+
 	// This is a volumeSource for a volume that will be mounted to all solrNodes to store backups and load restores.
 	// The data within the volume will be namespaces for this instance, so feel free to use the same volume for multiple clouds.
 	// Since the volume will be mounted to all solrNodes, it must be able to be written from multiple pods.
 	// If a PVC reference is given, the PVC must have `accessModes: - ReadWriteMany`.
 	// Other options are to use a NFS volume.
-	Volume corev1.VolumeSource `json:"volume"`
+	Volume *corev1.VolumeSource `json:"volume,omitempty"`
+
+	// Allows specification of multiple different "repositories" for Solr to use when backing up data to GCS.
+	GcsRepositories *[]GcsStorage `json:"gcsRepositories,omitempty"`
 
+	// Allows specification of multiple different "repositories" for Solr to use when backing up data "locally".
+	// Repositories defined here are considered "managed" and can take advantage of special operator features, such as
+	// post-backup compression.
+	ManagedRepositories *[]ManagedStorage `json:"managedRepositories,omitempty""`
+
+	// TODO Do we need to support this here for backcompat, or can it live exclusively in ManagedStorage
 	// Select a custom directory name to mount the backup/restore data from the given volume.
 	// If not specified, then the name of the solrcloud will be used by default.
 	// +optional
 	Directory string `json:"directory,omitempty"`
 }
 
+type GcsStorage struct {
+	// A name used to identify this GCS storage profile.
+	Name string `json:"name"`
+
+	// The name of the GCS bucket that all backup data will be stored in
+	Bucket string `json:"bucket"`
+
+	// The name of a Kubernetes secret holding a Google cloud service account key
+	GcsCredentialSecret string `json:"gcsCredentialSecret"`
+	// JEGERLOW TODO Should 'baseLocation' be optional?
+	// A chroot within the bucket to store data in.  If specified this should already exist
+	BaseLocation string `json:"baseLocation"`
+}
+
+func VolumeExistsWithName(needle string, haystack []corev1.Volume) bool {
+	for _, volume := range haystack {
+		if volume.Name == needle {
+			return true
+		}
+	}
+	return false
+}
+
+func (gcsRepository *GcsStorage) IsManaged() bool { return false }
+
+func (gcsRepository *GcsStorage) GetSolrMountPath() string {
+	return fmt.Sprintf("%s-%s-%s", BaseBackupRestorePath, "gcscredential", gcsRepository.Name)
+}
+
+func (gcsRepository *GcsStorage) GetInitPodMountPath() string {
+	return ""
+}
+
+func (gcsRepository *GcsStorage) GetBackupPath(backupName string) string {
+	if gcsRepository.BaseLocation != "" {
+		return gcsRepository.BaseLocation
+	}
+	return "/"
+}
+
+func (gcsRepository *GcsStorage) GetVolumeName() string {
+	return fmt.Sprintf("%s-%s", gcsRepository.Name, BackupRestoreCredentialVolume)
+}
+
+func (gcs *GcsStorage) IsCredentialVolumePresent(volumes []corev1.Volume) bool {
+	expectedVolumeName := gcs.GetVolumeName()
+	return VolumeExistsWithName(expectedVolumeName, volumes)
+}
+
+type ManagedStorage struct {
+	// A name used to identify this local storage profile.
+	Name string `json:"name"`
+
+	// This is a volumeSource for a volume that will be mounted to all solrNodes to store backups and load restores.
+	// The data within the volume will be namespaces for this instance, so feel free to use the same volume for multiple clouds.
+	// Since the volume will be mounted to all solrNodes, it must be able to be written from multiple pods.
+	// If a PVC reference is given, the PVC must have `accessModes: - ReadWriteMany`.
+	// Other options are to use a NFS volume.
+	Volume *corev1.VolumeSource `json:"volume,omitempty"`

Review comment:
       We don't want this to be optional right? If it's a managed backup, they need a readWriteMany volume.

##########
File path: controllers/solrcloud_controller.go
##########
@@ -699,6 +695,41 @@ func reconcileCloudStatus(r *SolrCloudReconciler, solrCloud *solr.SolrCloud, log
 	return outOfDatePods, outOfDatePodsNotStarted, availableUpdatedPodCount, nil
 }
 
+func isPodReadyForBackup(pod corev1.Pod, backupOptions *solr.SolrBackupRestoreOptions) bool {

Review comment:
       I would think that this method would take a `repository`. Because a pod might be ready for one repo, but not another.

##########
File path: controllers/solrbackup_controller.go
##########
@@ -253,7 +283,21 @@ func persistSolrCloudBackups(r *SolrBackupReconciler, backup *solrv1beta1.SolrBa
 	}
 	now := metav1.Now()
 
-	persistenceJob := util.GenerateBackupPersistenceJobForCloud(backup, solrCloud)
+	backupRepository, found := util.GetBackupRepositoryByName(solrCloud.Spec.StorageOptions.BackupRestoreOptions, backup.Spec.RepositoryName)
+	if ! found {
+		err = fmt.Errorf("Unable to find backup repository to use for backup [%s] (which specified the repository" +
+			"[%s]).  solrcloud must define a repository matching that name (or have only 1 repository defined).",
+			backup.Name, backup.Spec.RepositoryName)
+		return err
+	}
+
+	managedBackupRepository, ok := backupRepository.(util.ManagedBackupRepository)
+	if ! ok {

Review comment:
       I'd rather the persistence stuff go in this `if ok {`, otherwise it's a little hard to follow.

##########
File path: api/v1beta1/solrcloud_types.go
##########
@@ -325,20 +329,158 @@ type SolrEphemeralDataStorageOptions struct {
 	EmptyDir *corev1.EmptyDirVolumeSource `json:"emptyDir,omitempty"`
 }
 
+func (backupOptions *SolrBackupRestoreOptions) IsManaged() bool {
+	return true
+}
+
+func (backupOptions *SolrBackupRestoreOptions) GetVolumeSource() corev1.VolumeSource {
+	return *backupOptions.Volume
+}
+
+func (backupOptions *SolrBackupRestoreOptions) GetDirectory() string {
+	return backupOptions.Directory
+}
+
+func (backupOptions *SolrBackupRestoreOptions) GetSolrMountPath() string {
+	return BaseBackupRestorePath
+}
+
+func (backupOptions *SolrBackupRestoreOptions) GetInitPodMountPath() string {
+	return BackupRestoreInitContainerPath
+}
+
+func (backupOptions *SolrBackupRestoreOptions) GetBackupPath(backupName string) string {
+	return backupOptions.GetSolrMountPath() + "/backups/" + backupName
+}
+
+func (backupOptions *SolrBackupRestoreOptions) GetVolumeName() string {
+	return BackupRestoreVolume
+}
+
 type SolrBackupRestoreOptions struct {
+	// TODO Do we need to support this (Volume) here for backcompat, or can it live exclusively in ManagedStorage?
+
 	// This is a volumeSource for a volume that will be mounted to all solrNodes to store backups and load restores.
 	// The data within the volume will be namespaces for this instance, so feel free to use the same volume for multiple clouds.
 	// Since the volume will be mounted to all solrNodes, it must be able to be written from multiple pods.
 	// If a PVC reference is given, the PVC must have `accessModes: - ReadWriteMany`.
 	// Other options are to use a NFS volume.
-	Volume corev1.VolumeSource `json:"volume"`
+	Volume *corev1.VolumeSource `json:"volume,omitempty"`
+
+	// Allows specification of multiple different "repositories" for Solr to use when backing up data to GCS.
+	GcsRepositories *[]GcsStorage `json:"gcsRepositories,omitempty"`
 
+	// Allows specification of multiple different "repositories" for Solr to use when backing up data "locally".
+	// Repositories defined here are considered "managed" and can take advantage of special operator features, such as
+	// post-backup compression.
+	ManagedRepositories *[]ManagedStorage `json:"managedRepositories,omitempty""`
+
+	// TODO Do we need to support this here for backcompat, or can it live exclusively in ManagedStorage
 	// Select a custom directory name to mount the backup/restore data from the given volume.
 	// If not specified, then the name of the solrcloud will be used by default.
 	// +optional
 	Directory string `json:"directory,omitempty"`
 }
 
+type GcsStorage struct {
+	// A name used to identify this GCS storage profile.
+	Name string `json:"name"`
+
+	// The name of the GCS bucket that all backup data will be stored in
+	Bucket string `json:"bucket"`
+
+	// The name of a Kubernetes secret holding a Google cloud service account key
+	GcsCredentialSecret string `json:"gcsCredentialSecret"`
+	// JEGERLOW TODO Should 'baseLocation' be optional?
+	// A chroot within the bucket to store data in.  If specified this should already exist
+	BaseLocation string `json:"baseLocation"`
+}
+
+func VolumeExistsWithName(needle string, haystack []corev1.Volume) bool {
+	for _, volume := range haystack {
+		if volume.Name == needle {
+			return true
+		}
+	}
+	return false
+}
+
+func (gcsRepository *GcsStorage) IsManaged() bool { return false }
+
+func (gcsRepository *GcsStorage) GetSolrMountPath() string {
+	return fmt.Sprintf("%s-%s-%s", BaseBackupRestorePath, "gcscredential", gcsRepository.Name)
+}
+
+func (gcsRepository *GcsStorage) GetInitPodMountPath() string {
+	return ""
+}
+
+func (gcsRepository *GcsStorage) GetBackupPath(backupName string) string {
+	if gcsRepository.BaseLocation != "" {
+		return gcsRepository.BaseLocation
+	}
+	return "/"
+}
+
+func (gcsRepository *GcsStorage) GetVolumeName() string {
+	return fmt.Sprintf("%s-%s", gcsRepository.Name, BackupRestoreCredentialVolume)
+}
+
+func (gcs *GcsStorage) IsCredentialVolumePresent(volumes []corev1.Volume) bool {
+	expectedVolumeName := gcs.GetVolumeName()
+	return VolumeExistsWithName(expectedVolumeName, volumes)
+}
+
+type ManagedStorage struct {
+	// A name used to identify this local storage profile.
+	Name string `json:"name"`
+
+	// This is a volumeSource for a volume that will be mounted to all solrNodes to store backups and load restores.
+	// The data within the volume will be namespaces for this instance, so feel free to use the same volume for multiple clouds.
+	// Since the volume will be mounted to all solrNodes, it must be able to be written from multiple pods.
+	// If a PVC reference is given, the PVC must have `accessModes: - ReadWriteMany`.
+	// Other options are to use a NFS volume.
+	Volume *corev1.VolumeSource `json:"volume,omitempty"`
+
+	// Select a custom directory name to mount the backup/restore data from the given volume.
+	// If not specified, then the name of the solrcloud will be used by default.
+	// +optional
+	Directory string `json:"directory,omitempty"`
+}
+
+func (managedRepository *ManagedStorage) IsManaged() bool { return true }

Review comment:
       I think it would be good to get a fair number of these methods out of the API part and move them somewhere else. I know there are a lot of similar examples in here already, but I need to go and refactor that sometime.




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: issues-unsubscribe@solr.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org



---------------------------------------------------------------------
To unsubscribe, e-mail: issues-unsubscribe@solr.apache.org
For additional commands, e-mail: issues-help@solr.apache.org