You are viewing a plain text version of this content. The canonical link for it is here.

Posted to issues@solr.apache.org by "HoustonPutman (via GitHub)" <gi...@apache.org> on 2023/04/24 20:36:59 UTC

[GitHub] [solr-operator] HoustonPutman opened a new pull request, #561: Add managed scale-down for SolrClouds

HoustonPutman opened a new pull request, #561:
URL: https://github.com/apache/solr-operator/pull/561

   Resolves #559 
   
   This also adds cluster-operation locking for SolrClouds, as described here: #560 
   
   TODO:
   
   - [ ] Implement scale-down of SolrClouds, moving replicas when needed
   - [ ] Add cluster-operation locking for SolrClouds, but do not move other cluster-operations logic. That can be done separately.
   - [ ] Add option to do managed scale-downs, this should be enabled by default
   - [ ] Add integration tests for scaling down
   - [ ] Add documentation & upgrade notes
   - [ ] Add changelog entry


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: issues-unsubscribe@solr.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: issues-unsubscribe@solr.apache.org
For additional commands, e-mail: issues-help@solr.apache.org

[GitHub] [solr-operator] HoustonPutman commented on a diff in pull request #561: Add managed scale-down for SolrClouds

Posted by "HoustonPutman (via GitHub)" <gi...@apache.org>.

HoustonPutman commented on code in PR #561:
URL: https://github.com/apache/solr-operator/pull/561#discussion_r1199181044


##########
controllers/solr_cluster_ops_util.go:
##########
@@ -210,9 +219,43 @@ func evictSinglePod(ctx context.Context, r *SolrCloudReconciler, instance *solrv
 	}
 
 	// Only evict from the pod if it contains replicas in the clusterState
-	if err, podIsEmpty = util.EvictReplicasForPodIfNecessary(ctx, instance, pod, podHasReplicas, "scaleDown", logger); err != nil {
+	if e, canDeletePod := util.EvictReplicasForPodIfNecessary(ctx, instance, pod, podHasReplicas, "scaleDown", logger); e != nil {
+		err = e
 		logger.Error(err, "Error while evicting replicas on Pod, when scaling down SolrCloud", "pod", pod.Name)
+	} else if canDeletePod {
+		// The pod previously had replicas, so loop back in the next reconcile to make sure that the pod doesn't
+		// have replicas anymore even if the previous evict command was successful.
+		// If there are still replicas, it will start the eviction process again
+		podIsEmpty = !podHasReplicas
 	}
 
 	return
 }
+
+func getReplicasForPod(ctx context.Context, cloud *solrv1beta1.SolrCloud, podName string, logger logr.Logger) (replicas []string, err error) {
+	clusterResp := &solr_api.SolrClusterStatusResponse{}
+	queryParams := url.Values{}
+	queryParams.Add("action", "CLUSTERSTATUS")
+	err = solr_api.CallCollectionsApi(ctx, cloud, queryParams, clusterResp)
+	if err == nil {
+		if hasError, apiErr := solr_api.CheckForCollectionsApiError("CLUSTERSTATUS", clusterResp.ResponseHeader); hasError {
+			err = apiErr
+		}
+	}
+	podNodeName := util.SolrNodeName(cloud, podName)
+	if err == nil {
+		for _, colState := range clusterResp.ClusterStatus.Collections {
+			for _, shardState := range colState.Shards {
+				for replica, replicaState := range shardState.Replicas {
+					if replicaState.NodeName == podNodeName {
+						replicas = append(replicas, replica)
+					}
+				}
+			}
+		}
+	}
+	if err != nil {

Review Comment:
   No reason, will change.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: issues-unsubscribe@solr.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: issues-unsubscribe@solr.apache.org
For additional commands, e-mail: issues-help@solr.apache.org

[GitHub] [solr-operator] radu-gheorghe commented on pull request #561: Add managed scale-down for SolrClouds

Posted by "radu-gheorghe (via GitHub)" <gi...@apache.org>.

radu-gheorghe commented on PR #561:
URL: https://github.com/apache/solr-operator/pull/561#issuecomment-1552539241

   Awesome! I will test it with HPA then, so I can get a jump start on that tutorial as well.


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: issues-unsubscribe@solr.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: issues-unsubscribe@solr.apache.org
For additional commands, e-mail: issues-help@solr.apache.org

[GitHub] [solr-operator] tflobbe commented on a diff in pull request #561: Add managed scale-down for SolrClouds

Posted by "tflobbe (via GitHub)" <gi...@apache.org>.

tflobbe commented on code in PR #561:
URL: https://github.com/apache/solr-operator/pull/561#discussion_r1199178613


##########
controllers/solr_cluster_ops_util.go:
##########
@@ -210,9 +219,43 @@ func evictSinglePod(ctx context.Context, r *SolrCloudReconciler, instance *solrv
 	}
 
 	// Only evict from the pod if it contains replicas in the clusterState
-	if err, podIsEmpty = util.EvictReplicasForPodIfNecessary(ctx, instance, pod, podHasReplicas, "scaleDown", logger); err != nil {
+	if e, canDeletePod := util.EvictReplicasForPodIfNecessary(ctx, instance, pod, podHasReplicas, "scaleDown", logger); e != nil {
+		err = e
 		logger.Error(err, "Error while evicting replicas on Pod, when scaling down SolrCloud", "pod", pod.Name)
+	} else if canDeletePod {
+		// The pod previously had replicas, so loop back in the next reconcile to make sure that the pod doesn't
+		// have replicas anymore even if the previous evict command was successful.
+		// If there are still replicas, it will start the eviction process again
+		podIsEmpty = !podHasReplicas
 	}
 
 	return
 }
+
+func getReplicasForPod(ctx context.Context, cloud *solrv1beta1.SolrCloud, podName string, logger logr.Logger) (replicas []string, err error) {
+	clusterResp := &solr_api.SolrClusterStatusResponse{}
+	queryParams := url.Values{}
+	queryParams.Add("action", "CLUSTERSTATUS")
+	err = solr_api.CallCollectionsApi(ctx, cloud, queryParams, clusterResp)
+	if err == nil {
+		if hasError, apiErr := solr_api.CheckForCollectionsApiError("CLUSTERSTATUS", clusterResp.ResponseHeader); hasError {
+			err = apiErr
+		}
+	}
+	podNodeName := util.SolrNodeName(cloud, podName)
+	if err == nil {
+		for _, colState := range clusterResp.ClusterStatus.Collections {
+			for _, shardState := range colState.Shards {
+				for replica, replicaState := range shardState.Replicas {
+					if replicaState.NodeName == podNodeName {
+						replicas = append(replicas, replica)
+					}
+				}
+			}
+		}
+	}
+	if err != nil {

Review Comment:
   Why not `else`?



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: issues-unsubscribe@solr.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: issues-unsubscribe@solr.apache.org
For additional commands, e-mail: issues-help@solr.apache.org

[GitHub] [solr-operator] HoustonPutman commented on pull request #561: Add managed scale-down for SolrClouds

Posted by "HoustonPutman (via GitHub)" <gi...@apache.org>.

HoustonPutman commented on PR #561:
URL: https://github.com/apache/solr-operator/pull/561#issuecomment-1553890627

   @tflobbe thank you for the wonderful suggestions! I think I have addressed all of them, I'll merge early next week in case you want to take another look, but no worries if you don't have time.


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: issues-unsubscribe@solr.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: issues-unsubscribe@solr.apache.org
For additional commands, e-mail: issues-help@solr.apache.org

[GitHub] [solr-operator] HoustonPutman commented on a diff in pull request #561: Add managed scale-down for SolrClouds

Posted by "HoustonPutman (via GitHub)" <gi...@apache.org>.

HoustonPutman commented on code in PR #561:
URL: https://github.com/apache/solr-operator/pull/561#discussion_r1196546226


##########
docs/solr-cloud/autoscaling.md:
##########
@@ -0,0 +1,85 @@
+<!--
+    Licensed to the Apache Software Foundation (ASF) under one or more
+    contributor license agreements.  See the NOTICE file distributed with
+    this work for additional information regarding copyright ownership.
+    The ASF licenses this file to You under the Apache License, Version 2.0
+    the "License"); you may not use this file except in compliance with
+    the License.  You may obtain a copy of the License at
+
+        http://www.apache.org/licenses/LICENSE-2.0
+
+    Unless required by applicable law or agreed to in writing, software
+    distributed under the License is distributed on an "AS IS" BASIS,
+    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+    See the License for the specific language governing permissions and
+    limitations under the License.
+ -->
+
+# SolrCloud Scaling
+_Since v0.8.0_
+
+Solr Clouds are complex distributed systems, and thus require additional help when trying to scale up or down.
+
+Scaling/Autoscaling can mean different things in different situations, and this is true even within the `SolrCloud.spec.autoscaling` section.
+- Replicas can be moved when new nodes are added or when nodes need to be taken down
+- Nodes can be added/removed if more or less resources are desired.
+
+The following sections describes all the features that the Solr Operator currently supports to aid in scaling & autoscaling SolrClouds.
+
+## Configuration
+
+The `autoscaling` section in the SolrCloud CRD can be configured in the following ways
+
+```yaml
+spec:
+  autoscaling:
+    vacatePodsOnScaleDown: true # Default: true
+```
+
+## Replica Movement
+
+Solr can be scaled up & down either manually or by `HorizontalPodAutoscaler`'s, however no matter how the `SolrCloud.Spec.Replicas` value
+changes, the Solr Operator must implement this change the same way.
+
+For now Replicas are not scaled up and down themselves, they are just moved to utilize new Solr pods or vacate soon-to-be-deleted Solr pods.
+
+### Solr Pod Scale-Down
+
+When the desired number of Solr Pods that should be run `SolrCloud.Spec.Replicas` is decreased,
+the `SolrCloud.spec.autoscaling.vacatePodsOnScaleDown` option determines whether the Solr Operator should move replicas
+off of the pods that are about to be deleted.
+
+When a StatefulSet, which the Solr Operator uses to run Solr pods, has its size decreased by `x` pods, it's the last
+`x` pods that are deleted. So if a StatefulSet `tmp` has size 4, it will have pods `tmp-0`, `tmp-1`, `tmp-2` and `tmp-3`.
+If that `tmp` then is scaled down to size 2, then pods `tmp-2` and `tmp-3` will be deleted because they are `tmp`'s last pods numerically.
+
+If Solr has replicas placed on the pods that will be deleted as a part of the scale-down, then it has a problem.
+Solr will expect that these replicas will eventually come back online, because they are a part of the clusterState.
+However, the Solr Operator has no expectations for these replicas to come back, because the cloud has been scaled down.
+Therefore, the safest option is to move the replicas off of these pods before the scale-down operation occurs.

Review Comment:
   Ahhh understood. Will use your line to make this more concise.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: issues-unsubscribe@solr.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: issues-unsubscribe@solr.apache.org
For additional commands, e-mail: issues-help@solr.apache.org

[GitHub] [solr-operator] HoustonPutman commented on a diff in pull request #561: Add managed scale-down for SolrClouds

Posted by "HoustonPutman (via GitHub)" <gi...@apache.org>.

HoustonPutman commented on code in PR #561:
URL: https://github.com/apache/solr-operator/pull/561#discussion_r1196576097


##########
controllers/solr_cluster_ops_util.go:
##########
@@ -0,0 +1,218 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package controllers
+
+import (
+	"context"
+	"errors"
+	solrv1beta1 "github.com/apache/solr-operator/api/v1beta1"
+	"github.com/apache/solr-operator/controllers/util"
+	"github.com/go-logr/logr"
+	appsv1 "k8s.io/api/apps/v1"
+	corev1 "k8s.io/api/core/v1"
+	"k8s.io/utils/pointer"
+	"sigs.k8s.io/controller-runtime/pkg/client"
+	"strconv"
+	"time"
+)
+
+func determineScaleClusterOpLockIfNecessary(ctx context.Context, r *SolrCloudReconciler, instance *solrv1beta1.SolrCloud, statefulSet *appsv1.StatefulSet, podList []corev1.Pod, logger logr.Logger) (clusterOpLock string, clusterOpMetadata string, retryLaterDuration time.Duration, err error) {
+	desiredPods := int(*instance.Spec.Replicas)
+	configuredPods := int(*statefulSet.Spec.Replicas)
+	if desiredPods != configuredPods {
+		scaleTo := -1
+		// Start a scaling operation
+		if desiredPods < configuredPods {
+			// Scale down!
+			// The option is enabled by default, so treat "nil" like "true"
+			if instance.Spec.Autoscaling.VacatePodsOnScaleDown == nil || *instance.Spec.Autoscaling.VacatePodsOnScaleDown {
+				if desiredPods > 0 {
+					// We only support one scaling down one pod at-a-time if not scaling down to 0 pods
+					scaleTo = configuredPods - 1
+				} else {
+					scaleTo = 0
+				}
+			} else {
+				// The cloud is not setup to use managed scale-down
+				err = scaleCloudUnmanaged(ctx, r, statefulSet, desiredPods, logger)
+			}
+		} else if desiredPods > configuredPods {
+			// Scale up!
+			// TODO: replicasScaleUp is not supported, so do not make a clusterOp out of it, just do the patch
+			err = scaleCloudUnmanaged(ctx, r, statefulSet, desiredPods, logger)
+		}
+		if scaleTo > -1 {
+			clusterOpLock = util.ScaleLock
+			clusterOpMetadata = strconv.Itoa(scaleTo)
+		}
+	}
+	return
+}
+
+func handleLockedClusterOpScale(ctx context.Context, r *SolrCloudReconciler, instance *solrv1beta1.SolrCloud, statefulSet *appsv1.StatefulSet, podList []corev1.Pod, logger logr.Logger) (retryLaterDuration time.Duration, err error) {
+	if scalingToNodes, hasAnn := statefulSet.Annotations[util.ClusterOpsMetadataAnnotation]; hasAnn {
+		if scalingToNodesInt, convErr := strconv.Atoi(scalingToNodes); convErr != nil {
+			logger.Error(convErr, "Could not convert statefulSet annotation to int for scale-down-to information", "annotation", util.ClusterOpsMetadataAnnotation, "value", scalingToNodes)
+			err = convErr
+		} else {
+			replicaManagementComplete := false
+			if scalingToNodesInt < int(*statefulSet.Spec.Replicas) {
+				// Manage scaling down the SolrCloud
+				replicaManagementComplete, err = handleManagedCloudScaleDown(ctx, r, instance, statefulSet, scalingToNodesInt, podList, logger)
+				// } else if scalingToNodesInt > int(*statefulSet.Spec.Replicas) {
+				// TODO: Utilize the scaled-up nodes in the future, however Solr does not currently have APIs for this.
+				// TODO: Think about the order of scale-up and restart when individual nodeService IPs are injected into the pods.
+				// TODO: Will likely want to do a scale-up of the service first, then do the rolling restart of the cluster, then utilize the node.
+			} else {
+				// This shouldn't happen. The ScalingToNodesAnnotation is removed when the statefulSet size changes, through a Patch.
+				// But if it does happen, we should just remove the annotation and move forward.
+				patchedStatefulSet := statefulSet.DeepCopy()
+				delete(patchedStatefulSet.Annotations, util.ClusterOpsLockAnnotation)
+				delete(patchedStatefulSet.Annotations, util.ClusterOpsMetadataAnnotation)
+				if err = r.Patch(ctx, patchedStatefulSet, client.StrategicMergeFrom(statefulSet)); err != nil {
+					logger.Error(err, "Error while patching StatefulSet to remove unneeded clusterLockOp annotation for scaling to the current amount of nodes")
+				} else {
+					statefulSet = patchedStatefulSet
+				}
+			}
+
+			// Scale down the statefulSet to represent the new number of utilizedPods, if it is lower than the current number of pods
+			// Also remove the "scalingToNodes" annotation, as that acts as a lock on the cluster, so that other operations,
+			// such as scale-up, pod updates and further scale-down cannot happen at the same time.
+			if replicaManagementComplete {
+				patchedStatefulSet := statefulSet.DeepCopy()
+				patchedStatefulSet.Spec.Replicas = pointer.Int32(int32(scalingToNodesInt))
+				delete(patchedStatefulSet.Annotations, util.ClusterOpsLockAnnotation)
+				delete(patchedStatefulSet.Annotations, util.ClusterOpsMetadataAnnotation)
+				if err = r.Patch(ctx, patchedStatefulSet, client.StrategicMergeFrom(statefulSet)); err != nil {
+					logger.Error(err, "Error while patching StatefulSet to scale down SolrCloud", "newUtilizedNodes", scalingToNodesInt)
+				}
+
+				// TODO: Create event for the CRD.
+			} else {
+				// Retry after five minutes to check if the replica management commands have been completed
+				retryLaterDuration = time.Second * 5
+			}
+		}
+		// If everything succeeded, the statefulSet will have an annotation updated
+		// and the reconcile loop will be called again.
+
+		return
+	} else {
+		err = errors.New("no clusterOpMetadata annotation is present in the statefulSet")
+		logger.Error(err, "Cannot perform scaling operation when no scale-to-nodes is provided via the clusterOpMetadata")
+		return time.Second * 10, err
+	}
+}
+
+// handleManagedCloudScaleDown does the logic of a managed and "locked" cloud scale down operation.
+// This will likely take many reconcile loops to complete, as it is moving replicas away from the nodes that will be scaled down.
+func handleManagedCloudScaleDown(ctx context.Context, r *SolrCloudReconciler, instance *solrv1beta1.SolrCloud, statefulSet *appsv1.StatefulSet, scaleDownTo int, podList []corev1.Pod, logger logr.Logger) (replicaManagementComplete bool, err error) {
+	// Before doing anything to the pod, make sure that users cannot send requests to the pod anymore.
+	podStoppedReadinessConditions := map[corev1.PodConditionType]podReadinessConditionChange{
+		util.SolrIsNotStoppedReadinessCondition: {
+			reason:  ScaleDown,
+			message: "Pod is being deleted, traffic to the pod must be stopped",
+			status:  false,
+		},
+	}
+
+	if scaleDownTo == 0 {
+		// Delete all collections & data, the user wants no data left if scaling the solrcloud down to 0
+		// This is a much different operation to deleting the SolrCloud/StatefulSet all-together
+		replicaManagementComplete, err = evictAllPods(ctx, r, instance, podList, podStoppedReadinessConditions, logger)
+	} else {
+		// Only evict the last pod, even if we are trying to scale down multiple pods.
+		// Scale down will happen one pod at a time.
+		replicaManagementComplete, err = evictSinglePod(ctx, r, instance, scaleDownTo, podList, podStoppedReadinessConditions, logger)
+	}
+	// TODO: It would be great to support a multi-node scale down when Solr supports evicting many SolrNodes at once.
+
+	return
+}
+
+// scaleCloudUnmanaged does simple scaling of a SolrCloud without moving replicas.
+// This is not a "locked" cluster operation, and does not block other cluster operations from taking place.
+func scaleCloudUnmanaged(ctx context.Context, r *SolrCloudReconciler, statefulSet *appsv1.StatefulSet, scaleTo int, logger logr.Logger) (err error) {
+	// Before doing anything to the pod, make sure that users cannot send requests to the pod anymore.
+	patchedStatefulSet := statefulSet.DeepCopy()
+	patchedStatefulSet.Spec.Replicas = pointer.Int32(int32(scaleTo))
+	if err = r.Patch(ctx, patchedStatefulSet, client.StrategicMergeFrom(statefulSet)); err != nil {
+		logger.Error(err, "Error while patching StatefulSet to scale SolrCloud.", "fromNodes", *statefulSet.Spec.Replicas, "toNodes", scaleTo)
+	}
+	return err
+}
+
+func evictAllPods(ctx context.Context, r *SolrCloudReconciler, instance *solrv1beta1.SolrCloud, podList []corev1.Pod, readinessConditions map[corev1.PodConditionType]podReadinessConditionChange, logger logr.Logger) (podsAreEmpty bool, err error) {
+	// If there are no pods, we can't empty them. Just return true
+	if len(podList) == 0 {
+		return true, nil
+	}
+
+	for i, pod := range podList {
+		if updatedPod, e := EnsurePodReadinessConditions(ctx, r, &pod, readinessConditions, logger); e != nil {
+			err = e
+			return
+		} else {
+			podList[i] = *updatedPod
+		}
+	}
+
+	// Delete all collections & data, the user wants no data left if scaling the solrcloud down to 0
+	// This is a much different operation to deleting the SolrCloud/StatefulSet all-together
+	// TODO: Implement delete all collections. Currently just leave the data
+	//if err, podsAreEmpty = util.DeleteAllCollectionsIfNecessary(ctx, instance, "scaleDown", logger); err != nil {
+	//	logger.Error(err, "Error while evicting all collections in SolrCloud, when scaling down SolrCloud to 0 pods")
+	//}
+	podsAreEmpty = true
+
+	return
+}
+
+func evictSinglePod(ctx context.Context, r *SolrCloudReconciler, instance *solrv1beta1.SolrCloud, scaleDownTo int, podList []corev1.Pod, readinessConditions map[corev1.PodConditionType]podReadinessConditionChange, logger logr.Logger) (podIsEmpty bool, err error) {
+	var pod *corev1.Pod
+	podName := instance.GetSolrPodName(scaleDownTo)
+	for _, p := range podList {
+		if p.Name == podName {
+			pod = &p
+			break
+		}
+	}
+
+	// TODO: Get whether the pod has replicas or not
+	podHasReplicas := true
+
+	// The pod doesn't exist, we cannot empty it

Review Comment:
   Maybe the pod never came up for some reason, maybe Kubernetes pre-emptively deleted the pod for whatever reason. But you are right, most of these would result in the pod existing but not actually running.
   
   99.99% of the time this won't happen, but better safe than sorry.
   
   Maybe we ensure that the pod is running before trying to evict... Because if it can't be created because of resource constraints, there's no way that the eviction is going to work...



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: issues-unsubscribe@solr.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: issues-unsubscribe@solr.apache.org
For additional commands, e-mail: issues-help@solr.apache.org

[GitHub] [solr-operator] HoustonPutman commented on pull request #561: Add managed scale-down for SolrClouds

Posted by "HoustonPutman (via GitHub)" <gi...@apache.org>.

HoustonPutman commented on PR #561:
URL: https://github.com/apache/solr-operator/pull/561#issuecomment-1551464496

   > I assume I can't test the whole thing because the Solr-side changes are not there yet, correct?
   
   You can!! There's even two integration tests for this. The scale-up logic isn't in solr yet, but also not in the solr-operator yet 😉 
   
   > As I was writing this I realized Horizontal Pod Autoscaling covers the above.
   
   That is wonderful to hear! It would definitely be possible to add these to the operator, but much much better to leave it to the HPA 😅 
   
   
   > Maybe you know about these HPA options, but maybe most users don't? I just learned about them 😅 Maybe it's worth documenting a complete tutorial on autoscaling with HPA once all this work is done? I can help if that... helps 🙂
   
   Yeah even I don't. I'm not an HPA expert at all. Having a complete tutorial with autoscaling would be immense, and I will 100% take you up on that offer after this work is done!


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: issues-unsubscribe@solr.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: issues-unsubscribe@solr.apache.org
For additional commands, e-mail: issues-help@solr.apache.org

[GitHub] [solr-operator] HoustonPutman commented on a diff in pull request #561: Add managed scale-down for SolrClouds

Posted by "HoustonPutman (via GitHub)" <gi...@apache.org>.

HoustonPutman commented on code in PR #561:
URL: https://github.com/apache/solr-operator/pull/561#discussion_r1196571087


##########
controllers/util/solr_update_util.go:
##########
@@ -503,82 +503,65 @@ func GetAllManagedSolrNodeNames(solrCloud *solr.SolrCloud) map[string]bool {
 }
 
 // EvictReplicasForPodIfNecessary takes a solr Pod and migrates all replicas off of that Pod, if the Pod is using ephemeral storage.
-// If the pod is using persistent storage, this function is a no-op.
-// This function MUST be idempotent and return the same list of pods given the same kubernetes/solr state.
-func EvictReplicasForPodIfNecessary(ctx context.Context, solrCloud *solr.SolrCloud, pod *corev1.Pod, podHasReplicas bool, logger logr.Logger) (err error, canDeletePod bool) {
-	var solrDataVolume *corev1.Volume
-
-	// TODO: Remove these checks after v0.7.0, since it will be taken care by the evictReplicas podReadinessCondition
-	dataVolumeName := solrCloud.DataVolumeName()
-	for _, volume := range pod.Spec.Volumes {
-		if volume.Name == dataVolumeName {
-			solrDataVolume = &volume
-			break
-		}
-	}
-
-	// Only evict if the Data volume is not persistent
-	if solrDataVolume != nil && solrDataVolume.VolumeSource.PersistentVolumeClaim == nil {
-		// If the Cloud has 1 or zero pods, and this is the "-0" pod, then delete the data since we can't move it anywhere else
-		// Otherwise, move the replicas to other pods
-		if (solrCloud.Spec.Replicas == nil || *solrCloud.Spec.Replicas < 2) && strings.HasSuffix(pod.Name, "-0") {
-			queryParams := url.Values{}
-			queryParams.Add("action", "DELETENODE")
-			queryParams.Add("node", SolrNodeName(solrCloud, pod.Name))
-			// TODO: Figure out a way to do this, since DeleteNode will not delete the last replica of every type...
-			canDeletePod = true
-		} else {
-			requestId := "move-replicas-" + pod.Name
-
-			// First check to see if the Async Replace request has started
-			if asyncState, message, asyncErr := solr_api.CheckAsyncRequest(ctx, solrCloud, requestId); asyncErr != nil {
-				err = asyncErr
-			} else if asyncState == "notfound" {
-				if podHasReplicas {
-					// Submit new Replace Node request
-					replaceResponse := &solr_api.SolrAsyncResponse{}
-					queryParams := url.Values{}
-					queryParams.Add("action", "REPLACENODE")
-					queryParams.Add("parallel", "true")
-					queryParams.Add("sourceNode", SolrNodeName(solrCloud, pod.Name))
-					queryParams.Add("async", requestId)
-					err = solr_api.CallCollectionsApi(ctx, solrCloud, queryParams, replaceResponse)
-					if hasError, apiErr := solr_api.CheckForCollectionsApiError("REPLACENODE", replaceResponse.ResponseHeader); hasError {
-						err = apiErr
-					}
-					if err == nil {
-						logger.Info("Migrating all replicas off of pod before deletion.", "requestId", requestId, "pod", pod.Name)
-					} else {
-						logger.Error(err, "Could not migrate all replicas off of pod before deletion. Will try again later.", "requestId", requestId, "message", message)
-					}
+// For updates this will only be called for pods using ephemeral data.
+// For scale-down operations, this can be called for pods using ephemeral or persistent data.
+func EvictReplicasForPodIfNecessary(ctx context.Context, solrCloud *solr.SolrCloud, pod *corev1.Pod, podHasReplicas bool, evictionReason string, logger logr.Logger) (err error, canDeletePod bool) {
+	logger = logger.WithValues("evictionReason", evictionReason)
+	// If the Cloud has 1 or zero pods, and this is the "-0" pod, then delete the data since we can't move it anywhere else
+	// Otherwise, move the replicas to other pods
+	if (solrCloud.Spec.Replicas == nil || *solrCloud.Spec.Replicas < 2) && strings.HasSuffix(pod.Name, "-0") {
+		queryParams := url.Values{}
+		queryParams.Add("action", "DELETENODE")
+		queryParams.Add("node", SolrNodeName(solrCloud, pod.Name))
+		// TODO: Figure out a way to do this, since DeleteNode will not delete the last replica of every type...

Review Comment:
   Ahhh yes.... So this relates very closely to https://github.com/apache/solr-operator/pull/561#discussion_r1195591555
   
   Should we be deleting the replica, or instead maybe scaling up to a second pod, then doing an update, then scaling back down. Either way it's such a small edge-case (running a single solr pod without persistent data), that I'm not sure its worth the effort. Either way we should document the edge case, though it's unrelated to this ticket. We should open another issue for this.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: issues-unsubscribe@solr.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: issues-unsubscribe@solr.apache.org
For additional commands, e-mail: issues-help@solr.apache.org

[GitHub] [solr-operator] radu-gheorghe commented on a diff in pull request #561: Add managed scale-down for SolrClouds

Posted by "radu-gheorghe (via GitHub)" <gi...@apache.org>.

radu-gheorghe commented on code in PR #561:
URL: https://github.com/apache/solr-operator/pull/561#discussion_r1196346376


##########
controllers/util/solr_update_util.go:
##########
@@ -503,82 +503,65 @@ func GetAllManagedSolrNodeNames(solrCloud *solr.SolrCloud) map[string]bool {
 }
 
 // EvictReplicasForPodIfNecessary takes a solr Pod and migrates all replicas off of that Pod, if the Pod is using ephemeral storage.
-// If the pod is using persistent storage, this function is a no-op.
-// This function MUST be idempotent and return the same list of pods given the same kubernetes/solr state.
-func EvictReplicasForPodIfNecessary(ctx context.Context, solrCloud *solr.SolrCloud, pod *corev1.Pod, podHasReplicas bool, logger logr.Logger) (err error, canDeletePod bool) {
-	var solrDataVolume *corev1.Volume
-
-	// TODO: Remove these checks after v0.7.0, since it will be taken care by the evictReplicas podReadinessCondition
-	dataVolumeName := solrCloud.DataVolumeName()
-	for _, volume := range pod.Spec.Volumes {
-		if volume.Name == dataVolumeName {
-			solrDataVolume = &volume
-			break
-		}
-	}
-
-	// Only evict if the Data volume is not persistent
-	if solrDataVolume != nil && solrDataVolume.VolumeSource.PersistentVolumeClaim == nil {
-		// If the Cloud has 1 or zero pods, and this is the "-0" pod, then delete the data since we can't move it anywhere else
-		// Otherwise, move the replicas to other pods
-		if (solrCloud.Spec.Replicas == nil || *solrCloud.Spec.Replicas < 2) && strings.HasSuffix(pod.Name, "-0") {
-			queryParams := url.Values{}
-			queryParams.Add("action", "DELETENODE")
-			queryParams.Add("node", SolrNodeName(solrCloud, pod.Name))
-			// TODO: Figure out a way to do this, since DeleteNode will not delete the last replica of every type...
-			canDeletePod = true
-		} else {
-			requestId := "move-replicas-" + pod.Name
-
-			// First check to see if the Async Replace request has started
-			if asyncState, message, asyncErr := solr_api.CheckAsyncRequest(ctx, solrCloud, requestId); asyncErr != nil {
-				err = asyncErr
-			} else if asyncState == "notfound" {
-				if podHasReplicas {
-					// Submit new Replace Node request
-					replaceResponse := &solr_api.SolrAsyncResponse{}
-					queryParams := url.Values{}
-					queryParams.Add("action", "REPLACENODE")
-					queryParams.Add("parallel", "true")
-					queryParams.Add("sourceNode", SolrNodeName(solrCloud, pod.Name))
-					queryParams.Add("async", requestId)
-					err = solr_api.CallCollectionsApi(ctx, solrCloud, queryParams, replaceResponse)
-					if hasError, apiErr := solr_api.CheckForCollectionsApiError("REPLACENODE", replaceResponse.ResponseHeader); hasError {
-						err = apiErr
-					}
-					if err == nil {
-						logger.Info("Migrating all replicas off of pod before deletion.", "requestId", requestId, "pod", pod.Name)
-					} else {
-						logger.Error(err, "Could not migrate all replicas off of pod before deletion. Will try again later.", "requestId", requestId, "message", message)
-					}
+// For updates this will only be called for pods using ephemeral data.
+// For scale-down operations, this can be called for pods using ephemeral or persistent data.
+func EvictReplicasForPodIfNecessary(ctx context.Context, solrCloud *solr.SolrCloud, pod *corev1.Pod, podHasReplicas bool, evictionReason string, logger logr.Logger) (err error, canDeletePod bool) {
+	logger = logger.WithValues("evictionReason", evictionReason)
+	// If the Cloud has 1 or zero pods, and this is the "-0" pod, then delete the data since we can't move it anywhere else
+	// Otherwise, move the replicas to other pods
+	if (solrCloud.Spec.Replicas == nil || *solrCloud.Spec.Replicas < 2) && strings.HasSuffix(pod.Name, "-0") {
+		queryParams := url.Values{}
+		queryParams.Add("action", "DELETENODE")
+		queryParams.Add("node", SolrNodeName(solrCloud, pod.Name))
+		// TODO: Figure out a way to do this, since DeleteNode will not delete the last replica of every type...

Review Comment:
   Maybe we can go crazy and just delete all the collections from the collections API's "LIST" output?



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: issues-unsubscribe@solr.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: issues-unsubscribe@solr.apache.org
For additional commands, e-mail: issues-help@solr.apache.org

[GitHub] [solr-operator] HoustonPutman commented on a diff in pull request #561: Add managed scale-down for SolrClouds

Posted by "HoustonPutman (via GitHub)" <gi...@apache.org>.

HoustonPutman commented on code in PR #561:
URL: https://github.com/apache/solr-operator/pull/561#discussion_r1198261300


##########
controllers/solr_cluster_ops_util.go:
##########
@@ -0,0 +1,218 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package controllers
+
+import (
+	"context"
+	"errors"
+	solrv1beta1 "github.com/apache/solr-operator/api/v1beta1"
+	"github.com/apache/solr-operator/controllers/util"
+	"github.com/go-logr/logr"
+	appsv1 "k8s.io/api/apps/v1"
+	corev1 "k8s.io/api/core/v1"
+	"k8s.io/utils/pointer"
+	"sigs.k8s.io/controller-runtime/pkg/client"
+	"strconv"
+	"time"
+)
+
+func determineScaleClusterOpLockIfNecessary(ctx context.Context, r *SolrCloudReconciler, instance *solrv1beta1.SolrCloud, statefulSet *appsv1.StatefulSet, podList []corev1.Pod, logger logr.Logger) (clusterOpLock string, clusterOpMetadata string, retryLaterDuration time.Duration, err error) {
+	desiredPods := int(*instance.Spec.Replicas)
+	configuredPods := int(*statefulSet.Spec.Replicas)
+	if desiredPods != configuredPods {
+		scaleTo := -1
+		// Start a scaling operation
+		if desiredPods < configuredPods {
+			// Scale down!
+			// The option is enabled by default, so treat "nil" like "true"
+			if instance.Spec.Autoscaling.VacatePodsOnScaleDown == nil || *instance.Spec.Autoscaling.VacatePodsOnScaleDown {
+				if desiredPods > 0 {
+					// We only support one scaling down one pod at-a-time if not scaling down to 0 pods
+					scaleTo = configuredPods - 1
+				} else {
+					scaleTo = 0
+				}
+			} else {
+				// The cloud is not setup to use managed scale-down
+				err = scaleCloudUnmanaged(ctx, r, statefulSet, desiredPods, logger)
+			}
+		} else if desiredPods > configuredPods {
+			// Scale up!
+			// TODO: replicasScaleUp is not supported, so do not make a clusterOp out of it, just do the patch
+			err = scaleCloudUnmanaged(ctx, r, statefulSet, desiredPods, logger)
+		}
+		if scaleTo > -1 {
+			clusterOpLock = util.ScaleLock
+			clusterOpMetadata = strconv.Itoa(scaleTo)
+		}
+	}
+	return
+}
+
+func handleLockedClusterOpScale(ctx context.Context, r *SolrCloudReconciler, instance *solrv1beta1.SolrCloud, statefulSet *appsv1.StatefulSet, podList []corev1.Pod, logger logr.Logger) (retryLaterDuration time.Duration, err error) {
+	if scalingToNodes, hasAnn := statefulSet.Annotations[util.ClusterOpsMetadataAnnotation]; hasAnn {
+		if scalingToNodesInt, convErr := strconv.Atoi(scalingToNodes); convErr != nil {
+			logger.Error(convErr, "Could not convert statefulSet annotation to int for scale-down-to information", "annotation", util.ClusterOpsMetadataAnnotation, "value", scalingToNodes)
+			err = convErr
+		} else {
+			replicaManagementComplete := false
+			if scalingToNodesInt < int(*statefulSet.Spec.Replicas) {
+				// Manage scaling down the SolrCloud
+				replicaManagementComplete, err = handleManagedCloudScaleDown(ctx, r, instance, statefulSet, scalingToNodesInt, podList, logger)
+				// } else if scalingToNodesInt > int(*statefulSet.Spec.Replicas) {
+				// TODO: Utilize the scaled-up nodes in the future, however Solr does not currently have APIs for this.
+				// TODO: Think about the order of scale-up and restart when individual nodeService IPs are injected into the pods.
+				// TODO: Will likely want to do a scale-up of the service first, then do the rolling restart of the cluster, then utilize the node.
+			} else {
+				// This shouldn't happen. The ScalingToNodesAnnotation is removed when the statefulSet size changes, through a Patch.
+				// But if it does happen, we should just remove the annotation and move forward.
+				patchedStatefulSet := statefulSet.DeepCopy()
+				delete(patchedStatefulSet.Annotations, util.ClusterOpsLockAnnotation)
+				delete(patchedStatefulSet.Annotations, util.ClusterOpsMetadataAnnotation)
+				if err = r.Patch(ctx, patchedStatefulSet, client.StrategicMergeFrom(statefulSet)); err != nil {
+					logger.Error(err, "Error while patching StatefulSet to remove unneeded clusterLockOp annotation for scaling to the current amount of nodes")
+				} else {
+					statefulSet = patchedStatefulSet
+				}
+			}
+
+			// Scale down the statefulSet to represent the new number of utilizedPods, if it is lower than the current number of pods
+			// Also remove the "scalingToNodes" annotation, as that acts as a lock on the cluster, so that other operations,
+			// such as scale-up, pod updates and further scale-down cannot happen at the same time.
+			if replicaManagementComplete {
+				patchedStatefulSet := statefulSet.DeepCopy()
+				patchedStatefulSet.Spec.Replicas = pointer.Int32(int32(scalingToNodesInt))
+				delete(patchedStatefulSet.Annotations, util.ClusterOpsLockAnnotation)
+				delete(patchedStatefulSet.Annotations, util.ClusterOpsMetadataAnnotation)
+				if err = r.Patch(ctx, patchedStatefulSet, client.StrategicMergeFrom(statefulSet)); err != nil {
+					logger.Error(err, "Error while patching StatefulSet to scale down SolrCloud", "newUtilizedNodes", scalingToNodesInt)
+				}
+
+				// TODO: Create event for the CRD.
+			} else {
+				// Retry after five minutes to check if the replica management commands have been completed
+				retryLaterDuration = time.Second * 5
+			}
+		}
+		// If everything succeeded, the statefulSet will have an annotation updated
+		// and the reconcile loop will be called again.
+
+		return
+	} else {
+		err = errors.New("no clusterOpMetadata annotation is present in the statefulSet")
+		logger.Error(err, "Cannot perform scaling operation when no scale-to-nodes is provided via the clusterOpMetadata")
+		return time.Second * 10, err
+	}
+}
+
+// handleManagedCloudScaleDown does the logic of a managed and "locked" cloud scale down operation.
+// This will likely take many reconcile loops to complete, as it is moving replicas away from the nodes that will be scaled down.
+func handleManagedCloudScaleDown(ctx context.Context, r *SolrCloudReconciler, instance *solrv1beta1.SolrCloud, statefulSet *appsv1.StatefulSet, scaleDownTo int, podList []corev1.Pod, logger logr.Logger) (replicaManagementComplete bool, err error) {
+	// Before doing anything to the pod, make sure that users cannot send requests to the pod anymore.
+	podStoppedReadinessConditions := map[corev1.PodConditionType]podReadinessConditionChange{
+		util.SolrIsNotStoppedReadinessCondition: {
+			reason:  ScaleDown,
+			message: "Pod is being deleted, traffic to the pod must be stopped",
+			status:  false,
+		},
+	}
+
+	if scaleDownTo == 0 {
+		// Delete all collections & data, the user wants no data left if scaling the solrcloud down to 0
+		// This is a much different operation to deleting the SolrCloud/StatefulSet all-together
+		replicaManagementComplete, err = evictAllPods(ctx, r, instance, podList, podStoppedReadinessConditions, logger)
+	} else {
+		// Only evict the last pod, even if we are trying to scale down multiple pods.
+		// Scale down will happen one pod at a time.
+		replicaManagementComplete, err = evictSinglePod(ctx, r, instance, scaleDownTo, podList, podStoppedReadinessConditions, logger)
+	}
+	// TODO: It would be great to support a multi-node scale down when Solr supports evicting many SolrNodes at once.
+
+	return
+}
+
+// scaleCloudUnmanaged does simple scaling of a SolrCloud without moving replicas.
+// This is not a "locked" cluster operation, and does not block other cluster operations from taking place.
+func scaleCloudUnmanaged(ctx context.Context, r *SolrCloudReconciler, statefulSet *appsv1.StatefulSet, scaleTo int, logger logr.Logger) (err error) {
+	// Before doing anything to the pod, make sure that users cannot send requests to the pod anymore.
+	patchedStatefulSet := statefulSet.DeepCopy()
+	patchedStatefulSet.Spec.Replicas = pointer.Int32(int32(scaleTo))
+	if err = r.Patch(ctx, patchedStatefulSet, client.StrategicMergeFrom(statefulSet)); err != nil {
+		logger.Error(err, "Error while patching StatefulSet to scale SolrCloud.", "fromNodes", *statefulSet.Spec.Replicas, "toNodes", scaleTo)
+	}
+	return err
+}
+
+func evictAllPods(ctx context.Context, r *SolrCloudReconciler, instance *solrv1beta1.SolrCloud, podList []corev1.Pod, readinessConditions map[corev1.PodConditionType]podReadinessConditionChange, logger logr.Logger) (podsAreEmpty bool, err error) {
+	// If there are no pods, we can't empty them. Just return true
+	if len(podList) == 0 {
+		return true, nil
+	}
+
+	for i, pod := range podList {
+		if updatedPod, e := EnsurePodReadinessConditions(ctx, r, &pod, readinessConditions, logger); e != nil {
+			err = e
+			return
+		} else {
+			podList[i] = *updatedPod
+		}
+	}
+
+	// Delete all collections & data, the user wants no data left if scaling the solrcloud down to 0
+	// This is a much different operation to deleting the SolrCloud/StatefulSet all-together
+	// TODO: Implement delete all collections. Currently just leave the data
+	//if err, podsAreEmpty = util.DeleteAllCollectionsIfNecessary(ctx, instance, "scaleDown", logger); err != nil {
+	//	logger.Error(err, "Error while evicting all collections in SolrCloud, when scaling down SolrCloud to 0 pods")
+	//}
+	podsAreEmpty = true
+
+	return
+}
+
+func evictSinglePod(ctx context.Context, r *SolrCloudReconciler, instance *solrv1beta1.SolrCloud, scaleDownTo int, podList []corev1.Pod, readinessConditions map[corev1.PodConditionType]podReadinessConditionChange, logger logr.Logger) (podIsEmpty bool, err error) {
+	var pod *corev1.Pod
+	podName := instance.GetSolrPodName(scaleDownTo)
+	for _, p := range podList {
+		if p.Name == podName {
+			pod = &p
+			break
+		}
+	}
+
+	// TODO: Get whether the pod has replicas or not
+	podHasReplicas := true
+
+	// The pod doesn't exist, we cannot empty it

Review Comment:
   Might as well just rely on the collections API command to fail then... No need to add the logic in ourselves.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: issues-unsubscribe@solr.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: issues-unsubscribe@solr.apache.org
For additional commands, e-mail: issues-help@solr.apache.org

[GitHub] [solr-operator] tflobbe commented on a diff in pull request #561: Add managed scale-down for SolrClouds

Posted by "tflobbe (via GitHub)" <gi...@apache.org>.

tflobbe commented on code in PR #561:
URL: https://github.com/apache/solr-operator/pull/561#discussion_r1195591555


##########
controllers/solr_cluster_ops_util.go:
##########
@@ -0,0 +1,218 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package controllers
+
+import (
+	"context"
+	"errors"
+	solrv1beta1 "github.com/apache/solr-operator/api/v1beta1"
+	"github.com/apache/solr-operator/controllers/util"
+	"github.com/go-logr/logr"
+	appsv1 "k8s.io/api/apps/v1"
+	corev1 "k8s.io/api/core/v1"
+	"k8s.io/utils/pointer"
+	"sigs.k8s.io/controller-runtime/pkg/client"
+	"strconv"
+	"time"
+)
+
+func determineScaleClusterOpLockIfNecessary(ctx context.Context, r *SolrCloudReconciler, instance *solrv1beta1.SolrCloud, statefulSet *appsv1.StatefulSet, podList []corev1.Pod, logger logr.Logger) (clusterOpLock string, clusterOpMetadata string, retryLaterDuration time.Duration, err error) {
+	desiredPods := int(*instance.Spec.Replicas)
+	configuredPods := int(*statefulSet.Spec.Replicas)
+	if desiredPods != configuredPods {
+		scaleTo := -1
+		// Start a scaling operation
+		if desiredPods < configuredPods {
+			// Scale down!
+			// The option is enabled by default, so treat "nil" like "true"
+			if instance.Spec.Autoscaling.VacatePodsOnScaleDown == nil || *instance.Spec.Autoscaling.VacatePodsOnScaleDown {
+				if desiredPods > 0 {
+					// We only support one scaling down one pod at-a-time if not scaling down to 0 pods
+					scaleTo = configuredPods - 1
+				} else {
+					scaleTo = 0
+				}
+			} else {
+				// The cloud is not setup to use managed scale-down
+				err = scaleCloudUnmanaged(ctx, r, statefulSet, desiredPods, logger)
+			}
+		} else if desiredPods > configuredPods {
+			// Scale up!
+			// TODO: replicasScaleUp is not supported, so do not make a clusterOp out of it, just do the patch
+			err = scaleCloudUnmanaged(ctx, r, statefulSet, desiredPods, logger)
+		}
+		if scaleTo > -1 {
+			clusterOpLock = util.ScaleLock
+			clusterOpMetadata = strconv.Itoa(scaleTo)
+		}
+	}
+	return
+}
+
+func handleLockedClusterOpScale(ctx context.Context, r *SolrCloudReconciler, instance *solrv1beta1.SolrCloud, statefulSet *appsv1.StatefulSet, podList []corev1.Pod, logger logr.Logger) (retryLaterDuration time.Duration, err error) {
+	if scalingToNodes, hasAnn := statefulSet.Annotations[util.ClusterOpsMetadataAnnotation]; hasAnn {
+		if scalingToNodesInt, convErr := strconv.Atoi(scalingToNodes); convErr != nil {
+			logger.Error(convErr, "Could not convert statefulSet annotation to int for scale-down-to information", "annotation", util.ClusterOpsMetadataAnnotation, "value", scalingToNodes)
+			err = convErr
+		} else {
+			replicaManagementComplete := false
+			if scalingToNodesInt < int(*statefulSet.Spec.Replicas) {
+				// Manage scaling down the SolrCloud
+				replicaManagementComplete, err = handleManagedCloudScaleDown(ctx, r, instance, statefulSet, scalingToNodesInt, podList, logger)
+				// } else if scalingToNodesInt > int(*statefulSet.Spec.Replicas) {
+				// TODO: Utilize the scaled-up nodes in the future, however Solr does not currently have APIs for this.
+				// TODO: Think about the order of scale-up and restart when individual nodeService IPs are injected into the pods.
+				// TODO: Will likely want to do a scale-up of the service first, then do the rolling restart of the cluster, then utilize the node.
+			} else {
+				// This shouldn't happen. The ScalingToNodesAnnotation is removed when the statefulSet size changes, through a Patch.
+				// But if it does happen, we should just remove the annotation and move forward.
+				patchedStatefulSet := statefulSet.DeepCopy()
+				delete(patchedStatefulSet.Annotations, util.ClusterOpsLockAnnotation)
+				delete(patchedStatefulSet.Annotations, util.ClusterOpsMetadataAnnotation)
+				if err = r.Patch(ctx, patchedStatefulSet, client.StrategicMergeFrom(statefulSet)); err != nil {
+					logger.Error(err, "Error while patching StatefulSet to remove unneeded clusterLockOp annotation for scaling to the current amount of nodes")
+				} else {
+					statefulSet = patchedStatefulSet
+				}
+			}
+
+			// Scale down the statefulSet to represent the new number of utilizedPods, if it is lower than the current number of pods
+			// Also remove the "scalingToNodes" annotation, as that acts as a lock on the cluster, so that other operations,
+			// such as scale-up, pod updates and further scale-down cannot happen at the same time.
+			if replicaManagementComplete {
+				patchedStatefulSet := statefulSet.DeepCopy()
+				patchedStatefulSet.Spec.Replicas = pointer.Int32(int32(scalingToNodesInt))
+				delete(patchedStatefulSet.Annotations, util.ClusterOpsLockAnnotation)
+				delete(patchedStatefulSet.Annotations, util.ClusterOpsMetadataAnnotation)
+				if err = r.Patch(ctx, patchedStatefulSet, client.StrategicMergeFrom(statefulSet)); err != nil {
+					logger.Error(err, "Error while patching StatefulSet to scale down SolrCloud", "newUtilizedNodes", scalingToNodesInt)
+				}
+
+				// TODO: Create event for the CRD.
+			} else {
+				// Retry after five minutes to check if the replica management commands have been completed
+				retryLaterDuration = time.Second * 5
+			}
+		}
+		// If everything succeeded, the statefulSet will have an annotation updated
+		// and the reconcile loop will be called again.
+
+		return
+	} else {
+		err = errors.New("no clusterOpMetadata annotation is present in the statefulSet")
+		logger.Error(err, "Cannot perform scaling operation when no scale-to-nodes is provided via the clusterOpMetadata")
+		return time.Second * 10, err
+	}
+}
+
+// handleManagedCloudScaleDown does the logic of a managed and "locked" cloud scale down operation.
+// This will likely take many reconcile loops to complete, as it is moving replicas away from the nodes that will be scaled down.
+func handleManagedCloudScaleDown(ctx context.Context, r *SolrCloudReconciler, instance *solrv1beta1.SolrCloud, statefulSet *appsv1.StatefulSet, scaleDownTo int, podList []corev1.Pod, logger logr.Logger) (replicaManagementComplete bool, err error) {
+	// Before doing anything to the pod, make sure that users cannot send requests to the pod anymore.
+	podStoppedReadinessConditions := map[corev1.PodConditionType]podReadinessConditionChange{
+		util.SolrIsNotStoppedReadinessCondition: {
+			reason:  ScaleDown,
+			message: "Pod is being deleted, traffic to the pod must be stopped",
+			status:  false,
+		},
+	}
+
+	if scaleDownTo == 0 {
+		// Delete all collections & data, the user wants no data left if scaling the solrcloud down to 0
+		// This is a much different operation to deleting the SolrCloud/StatefulSet all-together
+		replicaManagementComplete, err = evictAllPods(ctx, r, instance, podList, podStoppedReadinessConditions, logger)

Review Comment:
   Question: Why is this such different than deleting the SolrCloud/StatefulSet? Isn't this going to delete all data and stop the pods?



##########
controllers/util/solr_update_util.go:
##########
@@ -503,82 +503,65 @@ func GetAllManagedSolrNodeNames(solrCloud *solr.SolrCloud) map[string]bool {
 }
 
 // EvictReplicasForPodIfNecessary takes a solr Pod and migrates all replicas off of that Pod, if the Pod is using ephemeral storage.
-// If the pod is using persistent storage, this function is a no-op.
-// This function MUST be idempotent and return the same list of pods given the same kubernetes/solr state.
-func EvictReplicasForPodIfNecessary(ctx context.Context, solrCloud *solr.SolrCloud, pod *corev1.Pod, podHasReplicas bool, logger logr.Logger) (err error, canDeletePod bool) {
-	var solrDataVolume *corev1.Volume
-
-	// TODO: Remove these checks after v0.7.0, since it will be taken care by the evictReplicas podReadinessCondition
-	dataVolumeName := solrCloud.DataVolumeName()
-	for _, volume := range pod.Spec.Volumes {
-		if volume.Name == dataVolumeName {
-			solrDataVolume = &volume
-			break
-		}
-	}
-
-	// Only evict if the Data volume is not persistent
-	if solrDataVolume != nil && solrDataVolume.VolumeSource.PersistentVolumeClaim == nil {
-		// If the Cloud has 1 or zero pods, and this is the "-0" pod, then delete the data since we can't move it anywhere else
-		// Otherwise, move the replicas to other pods
-		if (solrCloud.Spec.Replicas == nil || *solrCloud.Spec.Replicas < 2) && strings.HasSuffix(pod.Name, "-0") {
-			queryParams := url.Values{}
-			queryParams.Add("action", "DELETENODE")
-			queryParams.Add("node", SolrNodeName(solrCloud, pod.Name))
-			// TODO: Figure out a way to do this, since DeleteNode will not delete the last replica of every type...
-			canDeletePod = true
-		} else {
-			requestId := "move-replicas-" + pod.Name
-
-			// First check to see if the Async Replace request has started
-			if asyncState, message, asyncErr := solr_api.CheckAsyncRequest(ctx, solrCloud, requestId); asyncErr != nil {
-				err = asyncErr
-			} else if asyncState == "notfound" {
-				if podHasReplicas {
-					// Submit new Replace Node request
-					replaceResponse := &solr_api.SolrAsyncResponse{}
-					queryParams := url.Values{}
-					queryParams.Add("action", "REPLACENODE")
-					queryParams.Add("parallel", "true")
-					queryParams.Add("sourceNode", SolrNodeName(solrCloud, pod.Name))
-					queryParams.Add("async", requestId)
-					err = solr_api.CallCollectionsApi(ctx, solrCloud, queryParams, replaceResponse)
-					if hasError, apiErr := solr_api.CheckForCollectionsApiError("REPLACENODE", replaceResponse.ResponseHeader); hasError {
-						err = apiErr
-					}
-					if err == nil {
-						logger.Info("Migrating all replicas off of pod before deletion.", "requestId", requestId, "pod", pod.Name)
-					} else {
-						logger.Error(err, "Could not migrate all replicas off of pod before deletion. Will try again later.", "requestId", requestId, "message", message)
-					}
+// For updates this will only be called for pods using ephemeral data.
+// For scale-down operations, this can be called for pods using ephemeral or persistent data.
+func EvictReplicasForPodIfNecessary(ctx context.Context, solrCloud *solr.SolrCloud, pod *corev1.Pod, podHasReplicas bool, evictionReason string, logger logr.Logger) (err error, canDeletePod bool) {
+	logger = logger.WithValues("evictionReason", evictionReason)
+	// If the Cloud has 1 or zero pods, and this is the "-0" pod, then delete the data since we can't move it anywhere else
+	// Otherwise, move the replicas to other pods
+	if (solrCloud.Spec.Replicas == nil || *solrCloud.Spec.Replicas < 2) && strings.HasSuffix(pod.Name, "-0") {
+		queryParams := url.Values{}
+		queryParams.Add("action", "DELETENODE")
+		queryParams.Add("node", SolrNodeName(solrCloud, pod.Name))
+		// TODO: Figure out a way to do this, since DeleteNode will not delete the last replica of every type...
+		canDeletePod = true
+	} else {
+		requestId := "move-replicas-" + pod.Name
+
+		// First check to see if the Async Replace request has started
+		if asyncState, message, asyncErr := solr_api.CheckAsyncRequest(ctx, solrCloud, requestId); asyncErr != nil {
+			err = asyncErr
+		} else if asyncState == "notfound" {
+			if podHasReplicas {
+				// Submit new Replace Node request
+				replaceResponse := &solr_api.SolrAsyncResponse{}
+				queryParams := url.Values{}
+				queryParams.Add("action", "REPLACENODE")
+				queryParams.Add("parallel", "true")
+				queryParams.Add("sourceNode", SolrNodeName(solrCloud, pod.Name))
+				queryParams.Add("async", requestId)
+				err = solr_api.CallCollectionsApi(ctx, solrCloud, queryParams, replaceResponse)
+				if hasError, apiErr := solr_api.CheckForCollectionsApiError("REPLACENODE", replaceResponse.ResponseHeader); hasError {
+					err = apiErr
+				}
+				if err == nil {
+					logger.Info("Migrating all replicas off of pod before deletion.", "requestId", requestId, "pod", pod.Name)
 				} else {
-					canDeletePod = true
+					logger.Error(err, "Could not migrate all replicas off of pod before deletion. Will try again later.", "requestId", requestId, "message", message)
 				}
-
 			} else {
-				logger.Info("Found async status", "requestId", requestId, "state", asyncState)
-				// Only continue to delete the pod if the ReplaceNode request is complete and successful
-				if asyncState == "completed" {
-					canDeletePod = true
-					logger.Info("Migration of all replicas off of pod before deletion complete. Pod can now be deleted.", "pod", pod.Name)
-				} else if asyncState == "failed" {
-					logger.Info("Migration of all replicas off of pod before deletion failed. Will try again.", "pod", pod.Name, "message", message)
-				}
+				canDeletePod = true
+			}
 
-				// Delete the async request Id if the async request is successful or failed.
-				// If the request failed, this will cause a retry since the next reconcile won't find the async requestId in Solr.
-				if asyncState == "completed" || asyncState == "failed" {
-					if message, err = solr_api.DeleteAsyncRequest(ctx, solrCloud, requestId); err != nil {
-						logger.Error(err, "Could not delete Async request status.", "requestId", requestId, "message", message)
-					} else {
-						canDeletePod = false
-					}
+		} else {
+			logger.Info("Found async status", "requestId", requestId, "state", asyncState)
+			// Only continue to delete the pod if the ReplaceNode request is complete and successful
+			if asyncState == "completed" {
+				canDeletePod = true

Review Comment:
   Do you think it makes sense to check again for the pod before proceeding to delete it? Maybe keep doing `REPLACENODE` until it's empty? there is still a race condition, but much shorter



##########
controllers/solr_cluster_ops_util.go:
##########
@@ -0,0 +1,218 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package controllers
+
+import (
+	"context"
+	"errors"
+	solrv1beta1 "github.com/apache/solr-operator/api/v1beta1"
+	"github.com/apache/solr-operator/controllers/util"
+	"github.com/go-logr/logr"
+	appsv1 "k8s.io/api/apps/v1"
+	corev1 "k8s.io/api/core/v1"
+	"k8s.io/utils/pointer"
+	"sigs.k8s.io/controller-runtime/pkg/client"
+	"strconv"
+	"time"
+)
+
+func determineScaleClusterOpLockIfNecessary(ctx context.Context, r *SolrCloudReconciler, instance *solrv1beta1.SolrCloud, statefulSet *appsv1.StatefulSet, podList []corev1.Pod, logger logr.Logger) (clusterOpLock string, clusterOpMetadata string, retryLaterDuration time.Duration, err error) {
+	desiredPods := int(*instance.Spec.Replicas)
+	configuredPods := int(*statefulSet.Spec.Replicas)
+	if desiredPods != configuredPods {
+		scaleTo := -1
+		// Start a scaling operation
+		if desiredPods < configuredPods {
+			// Scale down!
+			// The option is enabled by default, so treat "nil" like "true"
+			if instance.Spec.Autoscaling.VacatePodsOnScaleDown == nil || *instance.Spec.Autoscaling.VacatePodsOnScaleDown {
+				if desiredPods > 0 {
+					// We only support one scaling down one pod at-a-time if not scaling down to 0 pods
+					scaleTo = configuredPods - 1
+				} else {
+					scaleTo = 0
+				}
+			} else {
+				// The cloud is not setup to use managed scale-down
+				err = scaleCloudUnmanaged(ctx, r, statefulSet, desiredPods, logger)
+			}
+		} else if desiredPods > configuredPods {
+			// Scale up!
+			// TODO: replicasScaleUp is not supported, so do not make a clusterOp out of it, just do the patch
+			err = scaleCloudUnmanaged(ctx, r, statefulSet, desiredPods, logger)
+		}
+		if scaleTo > -1 {
+			clusterOpLock = util.ScaleLock
+			clusterOpMetadata = strconv.Itoa(scaleTo)
+		}
+	}
+	return
+}
+
+func handleLockedClusterOpScale(ctx context.Context, r *SolrCloudReconciler, instance *solrv1beta1.SolrCloud, statefulSet *appsv1.StatefulSet, podList []corev1.Pod, logger logr.Logger) (retryLaterDuration time.Duration, err error) {
+	if scalingToNodes, hasAnn := statefulSet.Annotations[util.ClusterOpsMetadataAnnotation]; hasAnn {
+		if scalingToNodesInt, convErr := strconv.Atoi(scalingToNodes); convErr != nil {
+			logger.Error(convErr, "Could not convert statefulSet annotation to int for scale-down-to information", "annotation", util.ClusterOpsMetadataAnnotation, "value", scalingToNodes)
+			err = convErr
+		} else {
+			replicaManagementComplete := false
+			if scalingToNodesInt < int(*statefulSet.Spec.Replicas) {
+				// Manage scaling down the SolrCloud
+				replicaManagementComplete, err = handleManagedCloudScaleDown(ctx, r, instance, statefulSet, scalingToNodesInt, podList, logger)
+				// } else if scalingToNodesInt > int(*statefulSet.Spec.Replicas) {
+				// TODO: Utilize the scaled-up nodes in the future, however Solr does not currently have APIs for this.
+				// TODO: Think about the order of scale-up and restart when individual nodeService IPs are injected into the pods.
+				// TODO: Will likely want to do a scale-up of the service first, then do the rolling restart of the cluster, then utilize the node.
+			} else {
+				// This shouldn't happen. The ScalingToNodesAnnotation is removed when the statefulSet size changes, through a Patch.
+				// But if it does happen, we should just remove the annotation and move forward.
+				patchedStatefulSet := statefulSet.DeepCopy()
+				delete(patchedStatefulSet.Annotations, util.ClusterOpsLockAnnotation)
+				delete(patchedStatefulSet.Annotations, util.ClusterOpsMetadataAnnotation)
+				if err = r.Patch(ctx, patchedStatefulSet, client.StrategicMergeFrom(statefulSet)); err != nil {
+					logger.Error(err, "Error while patching StatefulSet to remove unneeded clusterLockOp annotation for scaling to the current amount of nodes")
+				} else {
+					statefulSet = patchedStatefulSet
+				}
+			}
+
+			// Scale down the statefulSet to represent the new number of utilizedPods, if it is lower than the current number of pods
+			// Also remove the "scalingToNodes" annotation, as that acts as a lock on the cluster, so that other operations,
+			// such as scale-up, pod updates and further scale-down cannot happen at the same time.
+			if replicaManagementComplete {
+				patchedStatefulSet := statefulSet.DeepCopy()
+				patchedStatefulSet.Spec.Replicas = pointer.Int32(int32(scalingToNodesInt))
+				delete(patchedStatefulSet.Annotations, util.ClusterOpsLockAnnotation)
+				delete(patchedStatefulSet.Annotations, util.ClusterOpsMetadataAnnotation)
+				if err = r.Patch(ctx, patchedStatefulSet, client.StrategicMergeFrom(statefulSet)); err != nil {
+					logger.Error(err, "Error while patching StatefulSet to scale down SolrCloud", "newUtilizedNodes", scalingToNodesInt)
+				}
+
+				// TODO: Create event for the CRD.
+			} else {
+				// Retry after five minutes to check if the replica management commands have been completed
+				retryLaterDuration = time.Second * 5
+			}
+		}
+		// If everything succeeded, the statefulSet will have an annotation updated
+		// and the reconcile loop will be called again.
+
+		return
+	} else {
+		err = errors.New("no clusterOpMetadata annotation is present in the statefulSet")
+		logger.Error(err, "Cannot perform scaling operation when no scale-to-nodes is provided via the clusterOpMetadata")
+		return time.Second * 10, err
+	}
+}
+
+// handleManagedCloudScaleDown does the logic of a managed and "locked" cloud scale down operation.
+// This will likely take many reconcile loops to complete, as it is moving replicas away from the nodes that will be scaled down.
+func handleManagedCloudScaleDown(ctx context.Context, r *SolrCloudReconciler, instance *solrv1beta1.SolrCloud, statefulSet *appsv1.StatefulSet, scaleDownTo int, podList []corev1.Pod, logger logr.Logger) (replicaManagementComplete bool, err error) {
+	// Before doing anything to the pod, make sure that users cannot send requests to the pod anymore.
+	podStoppedReadinessConditions := map[corev1.PodConditionType]podReadinessConditionChange{
+		util.SolrIsNotStoppedReadinessCondition: {
+			reason:  ScaleDown,
+			message: "Pod is being deleted, traffic to the pod must be stopped",
+			status:  false,
+		},
+	}
+
+	if scaleDownTo == 0 {
+		// Delete all collections & data, the user wants no data left if scaling the solrcloud down to 0
+		// This is a much different operation to deleting the SolrCloud/StatefulSet all-together
+		replicaManagementComplete, err = evictAllPods(ctx, r, instance, podList, podStoppedReadinessConditions, logger)
+	} else {
+		// Only evict the last pod, even if we are trying to scale down multiple pods.
+		// Scale down will happen one pod at a time.
+		replicaManagementComplete, err = evictSinglePod(ctx, r, instance, scaleDownTo, podList, podStoppedReadinessConditions, logger)
+	}
+	// TODO: It would be great to support a multi-node scale down when Solr supports evicting many SolrNodes at once.
+
+	return
+}
+
+// scaleCloudUnmanaged does simple scaling of a SolrCloud without moving replicas.
+// This is not a "locked" cluster operation, and does not block other cluster operations from taking place.
+func scaleCloudUnmanaged(ctx context.Context, r *SolrCloudReconciler, statefulSet *appsv1.StatefulSet, scaleTo int, logger logr.Logger) (err error) {
+	// Before doing anything to the pod, make sure that users cannot send requests to the pod anymore.
+	patchedStatefulSet := statefulSet.DeepCopy()
+	patchedStatefulSet.Spec.Replicas = pointer.Int32(int32(scaleTo))
+	if err = r.Patch(ctx, patchedStatefulSet, client.StrategicMergeFrom(statefulSet)); err != nil {
+		logger.Error(err, "Error while patching StatefulSet to scale SolrCloud.", "fromNodes", *statefulSet.Spec.Replicas, "toNodes", scaleTo)
+	}
+	return err
+}
+
+func evictAllPods(ctx context.Context, r *SolrCloudReconciler, instance *solrv1beta1.SolrCloud, podList []corev1.Pod, readinessConditions map[corev1.PodConditionType]podReadinessConditionChange, logger logr.Logger) (podsAreEmpty bool, err error) {
+	// If there are no pods, we can't empty them. Just return true
+	if len(podList) == 0 {
+		return true, nil
+	}
+
+	for i, pod := range podList {
+		if updatedPod, e := EnsurePodReadinessConditions(ctx, r, &pod, readinessConditions, logger); e != nil {
+			err = e
+			return
+		} else {
+			podList[i] = *updatedPod
+		}
+	}
+
+	// Delete all collections & data, the user wants no data left if scaling the solrcloud down to 0
+	// This is a much different operation to deleting the SolrCloud/StatefulSet all-together
+	// TODO: Implement delete all collections. Currently just leave the data
+	//if err, podsAreEmpty = util.DeleteAllCollectionsIfNecessary(ctx, instance, "scaleDown", logger); err != nil {
+	//	logger.Error(err, "Error while evicting all collections in SolrCloud, when scaling down SolrCloud to 0 pods")
+	//}
+	podsAreEmpty = true
+
+	return
+}
+
+func evictSinglePod(ctx context.Context, r *SolrCloudReconciler, instance *solrv1beta1.SolrCloud, scaleDownTo int, podList []corev1.Pod, readinessConditions map[corev1.PodConditionType]podReadinessConditionChange, logger logr.Logger) (podIsEmpty bool, err error) {
+	var pod *corev1.Pod
+	podName := instance.GetSolrPodName(scaleDownTo)
+	for _, p := range podList {
+		if p.Name == podName {
+			pod = &p
+			break
+		}
+	}
+
+	// TODO: Get whether the pod has replicas or not
+	podHasReplicas := true
+
+	// The pod doesn't exist, we cannot empty it

Review Comment:
   Question: How can this happen?



##########
docs/solr-cloud/autoscaling.md:
##########
@@ -0,0 +1,85 @@
+<!--
+    Licensed to the Apache Software Foundation (ASF) under one or more
+    contributor license agreements.  See the NOTICE file distributed with
+    this work for additional information regarding copyright ownership.
+    The ASF licenses this file to You under the Apache License, Version 2.0
+    the "License"); you may not use this file except in compliance with
+    the License.  You may obtain a copy of the License at
+
+        http://www.apache.org/licenses/LICENSE-2.0
+
+    Unless required by applicable law or agreed to in writing, software
+    distributed under the License is distributed on an "AS IS" BASIS,
+    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+    See the License for the specific language governing permissions and
+    limitations under the License.
+ -->
+
+# SolrCloud Scaling
+_Since v0.8.0_
+
+Solr Clouds are complex distributed systems, and thus require additional help when trying to scale up or down.
+
+Scaling/Autoscaling can mean different things in different situations, and this is true even within the `SolrCloud.spec.autoscaling` section.
+- Replicas can be moved when new nodes are added or when nodes need to be taken down
+- Nodes can be added/removed if more or less resources are desired.
+
+The following sections describes all the features that the Solr Operator currently supports to aid in scaling & autoscaling SolrClouds.
+
+## Configuration
+
+The `autoscaling` section in the SolrCloud CRD can be configured in the following ways
+
+```yaml
+spec:
+  autoscaling:
+    vacatePodsOnScaleDown: true # Default: true
+```
+
+## Replica Movement
+
+Solr can be scaled up & down either manually or by `HorizontalPodAutoscaler`'s, however no matter how the `SolrCloud.Spec.Replicas` value
+changes, the Solr Operator must implement this change the same way.
+
+For now Replicas are not scaled up and down themselves, they are just moved to utilize new Solr pods or vacate soon-to-be-deleted Solr pods.
+
+### Solr Pod Scale-Down
+
+When the desired number of Solr Pods that should be run `SolrCloud.Spec.Replicas` is decreased,
+the `SolrCloud.spec.autoscaling.vacatePodsOnScaleDown` option determines whether the Solr Operator should move replicas
+off of the pods that are about to be deleted.
+
+When a StatefulSet, which the Solr Operator uses to run Solr pods, has its size decreased by `x` pods, it's the last
+`x` pods that are deleted. So if a StatefulSet `tmp` has size 4, it will have pods `tmp-0`, `tmp-1`, `tmp-2` and `tmp-3`.
+If that `tmp` then is scaled down to size 2, then pods `tmp-2` and `tmp-3` will be deleted because they are `tmp`'s last pods numerically.

Review Comment:
   > then pods `tmp-2` and `tmp-3` will be deleted because they are `tmp`'s last pods numerically
   
   May be 
   > then pod `tmp-3` will be deleted first, followed by `tmp-2`because they are `tmp`'s last pods numerically



##########
docs/solr-cloud/autoscaling.md:
##########
@@ -0,0 +1,85 @@
+<!--
+    Licensed to the Apache Software Foundation (ASF) under one or more
+    contributor license agreements.  See the NOTICE file distributed with
+    this work for additional information regarding copyright ownership.
+    The ASF licenses this file to You under the Apache License, Version 2.0
+    the "License"); you may not use this file except in compliance with
+    the License.  You may obtain a copy of the License at
+
+        http://www.apache.org/licenses/LICENSE-2.0
+
+    Unless required by applicable law or agreed to in writing, software
+    distributed under the License is distributed on an "AS IS" BASIS,
+    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+    See the License for the specific language governing permissions and
+    limitations under the License.
+ -->
+
+# SolrCloud Scaling
+_Since v0.8.0_
+
+Solr Clouds are complex distributed systems, and thus require additional help when trying to scale up or down.
+
+Scaling/Autoscaling can mean different things in different situations, and this is true even within the `SolrCloud.spec.autoscaling` section.
+- Replicas can be moved when new nodes are added or when nodes need to be taken down
+- Nodes can be added/removed if more or less resources are desired.
+
+The following sections describes all the features that the Solr Operator currently supports to aid in scaling & autoscaling SolrClouds.
+
+## Configuration
+
+The `autoscaling` section in the SolrCloud CRD can be configured in the following ways
+
+```yaml
+spec:
+  autoscaling:
+    vacatePodsOnScaleDown: true # Default: true
+```
+
+## Replica Movement
+
+Solr can be scaled up & down either manually or by `HorizontalPodAutoscaler`'s, however no matter how the `SolrCloud.Spec.Replicas` value
+changes, the Solr Operator must implement this change the same way.
+
+For now Replicas are not scaled up and down themselves, they are just moved to utilize new Solr pods or vacate soon-to-be-deleted Solr pods.
+
+### Solr Pod Scale-Down
+
+When the desired number of Solr Pods that should be run `SolrCloud.Spec.Replicas` is decreased,
+the `SolrCloud.spec.autoscaling.vacatePodsOnScaleDown` option determines whether the Solr Operator should move replicas
+off of the pods that are about to be deleted.
+
+When a StatefulSet, which the Solr Operator uses to run Solr pods, has its size decreased by `x` pods, it's the last
+`x` pods that are deleted. So if a StatefulSet `tmp` has size 4, it will have pods `tmp-0`, `tmp-1`, `tmp-2` and `tmp-3`.
+If that `tmp` then is scaled down to size 2, then pods `tmp-2` and `tmp-3` will be deleted because they are `tmp`'s last pods numerically.
+
+If Solr has replicas placed on the pods that will be deleted as a part of the scale-down, then it has a problem.
+Solr will expect that these replicas will eventually come back online, because they are a part of the clusterState.
+However, the Solr Operator has no expectations for these replicas to come back, because the cloud has been scaled down.
+Therefore, the safest option is to move the replicas off of these pods before the scale-down operation occurs.

Review Comment:
   I think this isn't very clear. I think a simpler explanation of what the operator will do would be better, something like: 
   ```
   The Solr Operator can update the cluster state to handle the scale-down operation by moving replicas off of the soon-to-be-deleted pods
   ```
   Or something like that. I feel all the explanation of the difference between Solr and Solr Operator just adds confusion.



##########
controllers/solr_cluster_ops_util.go:
##########
@@ -0,0 +1,218 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package controllers
+
+import (
+	"context"
+	"errors"
+	solrv1beta1 "github.com/apache/solr-operator/api/v1beta1"
+	"github.com/apache/solr-operator/controllers/util"
+	"github.com/go-logr/logr"
+	appsv1 "k8s.io/api/apps/v1"
+	corev1 "k8s.io/api/core/v1"
+	"k8s.io/utils/pointer"
+	"sigs.k8s.io/controller-runtime/pkg/client"
+	"strconv"
+	"time"
+)
+
+func determineScaleClusterOpLockIfNecessary(ctx context.Context, r *SolrCloudReconciler, instance *solrv1beta1.SolrCloud, statefulSet *appsv1.StatefulSet, podList []corev1.Pod, logger logr.Logger) (clusterOpLock string, clusterOpMetadata string, retryLaterDuration time.Duration, err error) {
+	desiredPods := int(*instance.Spec.Replicas)
+	configuredPods := int(*statefulSet.Spec.Replicas)
+	if desiredPods != configuredPods {
+		scaleTo := -1
+		// Start a scaling operation
+		if desiredPods < configuredPods {
+			// Scale down!
+			// The option is enabled by default, so treat "nil" like "true"
+			if instance.Spec.Autoscaling.VacatePodsOnScaleDown == nil || *instance.Spec.Autoscaling.VacatePodsOnScaleDown {
+				if desiredPods > 0 {
+					// We only support one scaling down one pod at-a-time if not scaling down to 0 pods
+					scaleTo = configuredPods - 1
+				} else {
+					scaleTo = 0
+				}
+			} else {
+				// The cloud is not setup to use managed scale-down
+				err = scaleCloudUnmanaged(ctx, r, statefulSet, desiredPods, logger)
+			}
+		} else if desiredPods > configuredPods {
+			// Scale up!
+			// TODO: replicasScaleUp is not supported, so do not make a clusterOp out of it, just do the patch
+			err = scaleCloudUnmanaged(ctx, r, statefulSet, desiredPods, logger)
+		}
+		if scaleTo > -1 {
+			clusterOpLock = util.ScaleLock
+			clusterOpMetadata = strconv.Itoa(scaleTo)
+		}
+	}
+	return
+}
+
+func handleLockedClusterOpScale(ctx context.Context, r *SolrCloudReconciler, instance *solrv1beta1.SolrCloud, statefulSet *appsv1.StatefulSet, podList []corev1.Pod, logger logr.Logger) (retryLaterDuration time.Duration, err error) {
+	if scalingToNodes, hasAnn := statefulSet.Annotations[util.ClusterOpsMetadataAnnotation]; hasAnn {
+		if scalingToNodesInt, convErr := strconv.Atoi(scalingToNodes); convErr != nil {
+			logger.Error(convErr, "Could not convert statefulSet annotation to int for scale-down-to information", "annotation", util.ClusterOpsMetadataAnnotation, "value", scalingToNodes)
+			err = convErr
+		} else {
+			replicaManagementComplete := false
+			if scalingToNodesInt < int(*statefulSet.Spec.Replicas) {
+				// Manage scaling down the SolrCloud
+				replicaManagementComplete, err = handleManagedCloudScaleDown(ctx, r, instance, statefulSet, scalingToNodesInt, podList, logger)
+				// } else if scalingToNodesInt > int(*statefulSet.Spec.Replicas) {
+				// TODO: Utilize the scaled-up nodes in the future, however Solr does not currently have APIs for this.
+				// TODO: Think about the order of scale-up and restart when individual nodeService IPs are injected into the pods.
+				// TODO: Will likely want to do a scale-up of the service first, then do the rolling restart of the cluster, then utilize the node.
+			} else {
+				// This shouldn't happen. The ScalingToNodesAnnotation is removed when the statefulSet size changes, through a Patch.
+				// But if it does happen, we should just remove the annotation and move forward.
+				patchedStatefulSet := statefulSet.DeepCopy()
+				delete(patchedStatefulSet.Annotations, util.ClusterOpsLockAnnotation)
+				delete(patchedStatefulSet.Annotations, util.ClusterOpsMetadataAnnotation)
+				if err = r.Patch(ctx, patchedStatefulSet, client.StrategicMergeFrom(statefulSet)); err != nil {
+					logger.Error(err, "Error while patching StatefulSet to remove unneeded clusterLockOp annotation for scaling to the current amount of nodes")
+				} else {
+					statefulSet = patchedStatefulSet
+				}
+			}
+
+			// Scale down the statefulSet to represent the new number of utilizedPods, if it is lower than the current number of pods
+			// Also remove the "scalingToNodes" annotation, as that acts as a lock on the cluster, so that other operations,
+			// such as scale-up, pod updates and further scale-down cannot happen at the same time.

Review Comment:
   Isn't there a `lock` annotation being used ((as I see blow)?



##########
docs/solr-cloud/autoscaling.md:
##########
@@ -0,0 +1,85 @@
+<!--
+    Licensed to the Apache Software Foundation (ASF) under one or more
+    contributor license agreements.  See the NOTICE file distributed with
+    this work for additional information regarding copyright ownership.
+    The ASF licenses this file to You under the Apache License, Version 2.0
+    the "License"); you may not use this file except in compliance with
+    the License.  You may obtain a copy of the License at
+
+        http://www.apache.org/licenses/LICENSE-2.0
+
+    Unless required by applicable law or agreed to in writing, software
+    distributed under the License is distributed on an "AS IS" BASIS,
+    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+    See the License for the specific language governing permissions and
+    limitations under the License.
+ -->
+
+# SolrCloud Scaling
+_Since v0.8.0_
+
+Solr Clouds are complex distributed systems, and thus require additional help when trying to scale up or down.
+
+Scaling/Autoscaling can mean different things in different situations, and this is true even within the `SolrCloud.spec.autoscaling` section.
+- Replicas can be moved when new nodes are added or when nodes need to be taken down
+- Nodes can be added/removed if more or less resources are desired.
+
+The following sections describes all the features that the Solr Operator currently supports to aid in scaling & autoscaling SolrClouds.
+
+## Configuration
+
+The `autoscaling` section in the SolrCloud CRD can be configured in the following ways
+
+```yaml
+spec:
+  autoscaling:
+    vacatePodsOnScaleDown: true # Default: true
+```
+
+## Replica Movement
+
+Solr can be scaled up & down either manually or by `HorizontalPodAutoscaler`'s, however no matter how the `SolrCloud.Spec.Replicas` value
+changes, the Solr Operator must implement this change the same way.
+
+For now Replicas are not scaled up and down themselves, they are just moved to utilize new Solr pods or vacate soon-to-be-deleted Solr pods.
+
+### Solr Pod Scale-Down
+
+When the desired number of Solr Pods that should be run `SolrCloud.Spec.Replicas` is decreased,
+the `SolrCloud.spec.autoscaling.vacatePodsOnScaleDown` option determines whether the Solr Operator should move replicas
+off of the pods that are about to be deleted.
+
+When a StatefulSet, which the Solr Operator uses to run Solr pods, has its size decreased by `x` pods, it's the last
+`x` pods that are deleted. So if a StatefulSet `tmp` has size 4, it will have pods `tmp-0`, `tmp-1`, `tmp-2` and `tmp-3`.
+If that `tmp` then is scaled down to size 2, then pods `tmp-2` and `tmp-3` will be deleted because they are `tmp`'s last pods numerically.
+
+If Solr has replicas placed on the pods that will be deleted as a part of the scale-down, then it has a problem.
+Solr will expect that these replicas will eventually come back online, because they are a part of the clusterState.
+However, the Solr Operator has no expectations for these replicas to come back, because the cloud has been scaled down.
+Therefore, the safest option is to move the replicas off of these pods before the scale-down operation occurs.
+
+If `autoscaling.vacatePodsOnScaleDown` option is not enabled, then whenever the `SolrCloud.Spec.Replicas` is decreased,
+that change will be reflected in the StatefulSet immediately.
+Pods will be deleted even if replicas live on those pods.
+
+If `autoscaling.vacatePodsOnScaleDown` option is enabled, which it is by default, then the following steps occur:
+1. Acquire a cluster-ops lock on the SolrCloud. (This means other cluster operations, such as a rolling restart, cannot occur during the scale down operation)
+1. Scale down the last pod.
+   1. Mark the pod as "notReady" so that traffic is diverted away from this pod.

Review Comment:
   Question: Is this only an ingress change? traffic that goes directly to the pod should continue to succeed, right? Otherwise, there will be update errors, replicas going DOWN, unnecessarily 



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: issues-unsubscribe@solr.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: issues-unsubscribe@solr.apache.org
For additional commands, e-mail: issues-help@solr.apache.org

[GitHub] [solr-operator] radu-gheorghe commented on a diff in pull request #561: Add managed scale-down for SolrClouds

Posted by "radu-gheorghe (via GitHub)" <gi...@apache.org>.

radu-gheorghe commented on code in PR #561:
URL: https://github.com/apache/solr-operator/pull/561#discussion_r1196311716


##########
controllers/solr_cluster_ops_util.go:
##########
@@ -0,0 +1,218 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package controllers
+
+import (
+	"context"
+	"errors"
+	solrv1beta1 "github.com/apache/solr-operator/api/v1beta1"
+	"github.com/apache/solr-operator/controllers/util"
+	"github.com/go-logr/logr"
+	appsv1 "k8s.io/api/apps/v1"
+	corev1 "k8s.io/api/core/v1"
+	"k8s.io/utils/pointer"
+	"sigs.k8s.io/controller-runtime/pkg/client"
+	"strconv"
+	"time"
+)
+
+func determineScaleClusterOpLockIfNecessary(ctx context.Context, r *SolrCloudReconciler, instance *solrv1beta1.SolrCloud, statefulSet *appsv1.StatefulSet, podList []corev1.Pod, logger logr.Logger) (clusterOpLock string, clusterOpMetadata string, retryLaterDuration time.Duration, err error) {
+	desiredPods := int(*instance.Spec.Replicas)
+	configuredPods := int(*statefulSet.Spec.Replicas)
+	if desiredPods != configuredPods {
+		scaleTo := -1
+		// Start a scaling operation
+		if desiredPods < configuredPods {
+			// Scale down!
+			// The option is enabled by default, so treat "nil" like "true"
+			if instance.Spec.Autoscaling.VacatePodsOnScaleDown == nil || *instance.Spec.Autoscaling.VacatePodsOnScaleDown {
+				if desiredPods > 0 {
+					// We only support one scaling down one pod at-a-time if not scaling down to 0 pods
+					scaleTo = configuredPods - 1
+				} else {
+					scaleTo = 0
+				}
+			} else {
+				// The cloud is not setup to use managed scale-down
+				err = scaleCloudUnmanaged(ctx, r, statefulSet, desiredPods, logger)
+			}
+		} else if desiredPods > configuredPods {
+			// Scale up!
+			// TODO: replicasScaleUp is not supported, so do not make a clusterOp out of it, just do the patch
+			err = scaleCloudUnmanaged(ctx, r, statefulSet, desiredPods, logger)
+		}
+		if scaleTo > -1 {
+			clusterOpLock = util.ScaleLock
+			clusterOpMetadata = strconv.Itoa(scaleTo)
+		}
+	}
+	return
+}
+
+func handleLockedClusterOpScale(ctx context.Context, r *SolrCloudReconciler, instance *solrv1beta1.SolrCloud, statefulSet *appsv1.StatefulSet, podList []corev1.Pod, logger logr.Logger) (retryLaterDuration time.Duration, err error) {
+	if scalingToNodes, hasAnn := statefulSet.Annotations[util.ClusterOpsMetadataAnnotation]; hasAnn {
+		if scalingToNodesInt, convErr := strconv.Atoi(scalingToNodes); convErr != nil {
+			logger.Error(convErr, "Could not convert statefulSet annotation to int for scale-down-to information", "annotation", util.ClusterOpsMetadataAnnotation, "value", scalingToNodes)
+			err = convErr
+		} else {
+			replicaManagementComplete := false
+			if scalingToNodesInt < int(*statefulSet.Spec.Replicas) {
+				// Manage scaling down the SolrCloud
+				replicaManagementComplete, err = handleManagedCloudScaleDown(ctx, r, instance, statefulSet, scalingToNodesInt, podList, logger)
+				// } else if scalingToNodesInt > int(*statefulSet.Spec.Replicas) {
+				// TODO: Utilize the scaled-up nodes in the future, however Solr does not currently have APIs for this.
+				// TODO: Think about the order of scale-up and restart when individual nodeService IPs are injected into the pods.
+				// TODO: Will likely want to do a scale-up of the service first, then do the rolling restart of the cluster, then utilize the node.
+			} else {
+				// This shouldn't happen. The ScalingToNodesAnnotation is removed when the statefulSet size changes, through a Patch.
+				// But if it does happen, we should just remove the annotation and move forward.
+				patchedStatefulSet := statefulSet.DeepCopy()
+				delete(patchedStatefulSet.Annotations, util.ClusterOpsLockAnnotation)
+				delete(patchedStatefulSet.Annotations, util.ClusterOpsMetadataAnnotation)
+				if err = r.Patch(ctx, patchedStatefulSet, client.StrategicMergeFrom(statefulSet)); err != nil {
+					logger.Error(err, "Error while patching StatefulSet to remove unneeded clusterLockOp annotation for scaling to the current amount of nodes")
+				} else {
+					statefulSet = patchedStatefulSet
+				}
+			}
+
+			// Scale down the statefulSet to represent the new number of utilizedPods, if it is lower than the current number of pods
+			// Also remove the "scalingToNodes" annotation, as that acts as a lock on the cluster, so that other operations,
+			// such as scale-up, pod updates and further scale-down cannot happen at the same time.
+			if replicaManagementComplete {
+				patchedStatefulSet := statefulSet.DeepCopy()
+				patchedStatefulSet.Spec.Replicas = pointer.Int32(int32(scalingToNodesInt))
+				delete(patchedStatefulSet.Annotations, util.ClusterOpsLockAnnotation)
+				delete(patchedStatefulSet.Annotations, util.ClusterOpsMetadataAnnotation)
+				if err = r.Patch(ctx, patchedStatefulSet, client.StrategicMergeFrom(statefulSet)); err != nil {
+					logger.Error(err, "Error while patching StatefulSet to scale down SolrCloud", "newUtilizedNodes", scalingToNodesInt)
+				}
+
+				// TODO: Create event for the CRD.
+			} else {
+				// Retry after five minutes to check if the replica management commands have been completed
+				retryLaterDuration = time.Second * 5
+			}
+		}
+		// If everything succeeded, the statefulSet will have an annotation updated
+		// and the reconcile loop will be called again.
+
+		return
+	} else {
+		err = errors.New("no clusterOpMetadata annotation is present in the statefulSet")
+		logger.Error(err, "Cannot perform scaling operation when no scale-to-nodes is provided via the clusterOpMetadata")
+		return time.Second * 10, err
+	}
+}
+
+// handleManagedCloudScaleDown does the logic of a managed and "locked" cloud scale down operation.
+// This will likely take many reconcile loops to complete, as it is moving replicas away from the nodes that will be scaled down.
+func handleManagedCloudScaleDown(ctx context.Context, r *SolrCloudReconciler, instance *solrv1beta1.SolrCloud, statefulSet *appsv1.StatefulSet, scaleDownTo int, podList []corev1.Pod, logger logr.Logger) (replicaManagementComplete bool, err error) {
+	// Before doing anything to the pod, make sure that users cannot send requests to the pod anymore.
+	podStoppedReadinessConditions := map[corev1.PodConditionType]podReadinessConditionChange{
+		util.SolrIsNotStoppedReadinessCondition: {
+			reason:  ScaleDown,
+			message: "Pod is being deleted, traffic to the pod must be stopped",
+			status:  false,
+		},
+	}
+
+	if scaleDownTo == 0 {
+		// Delete all collections & data, the user wants no data left if scaling the solrcloud down to 0
+		// This is a much different operation to deleting the SolrCloud/StatefulSet all-together
+		replicaManagementComplete, err = evictAllPods(ctx, r, instance, podList, podStoppedReadinessConditions, logger)
+	} else {
+		// Only evict the last pod, even if we are trying to scale down multiple pods.
+		// Scale down will happen one pod at a time.
+		replicaManagementComplete, err = evictSinglePod(ctx, r, instance, scaleDownTo, podList, podStoppedReadinessConditions, logger)
+	}
+	// TODO: It would be great to support a multi-node scale down when Solr supports evicting many SolrNodes at once.

Review Comment:
   I doubt this is ever going to help, because conditions may change (e.g. load might increase).
   
   Although one could argue that scaling can be done in steps. For example if you have 6 nodes and 12 shards in total, it makes sense to go from 6 to 4, skipping 5, to keep the load balanced.
   
   Just venting :)



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: issues-unsubscribe@solr.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: issues-unsubscribe@solr.apache.org
For additional commands, e-mail: issues-help@solr.apache.org

[GitHub] [solr-operator] HoustonPutman commented on a diff in pull request #561: Add managed scale-down for SolrClouds

Posted by "HoustonPutman (via GitHub)" <gi...@apache.org>.

HoustonPutman commented on code in PR #561:
URL: https://github.com/apache/solr-operator/pull/561#discussion_r1196567570


##########
controllers/solr_cluster_ops_util.go:
##########
@@ -0,0 +1,218 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package controllers
+
+import (
+	"context"
+	"errors"
+	solrv1beta1 "github.com/apache/solr-operator/api/v1beta1"
+	"github.com/apache/solr-operator/controllers/util"
+	"github.com/go-logr/logr"
+	appsv1 "k8s.io/api/apps/v1"
+	corev1 "k8s.io/api/core/v1"
+	"k8s.io/utils/pointer"
+	"sigs.k8s.io/controller-runtime/pkg/client"
+	"strconv"
+	"time"
+)
+
+func determineScaleClusterOpLockIfNecessary(ctx context.Context, r *SolrCloudReconciler, instance *solrv1beta1.SolrCloud, statefulSet *appsv1.StatefulSet, podList []corev1.Pod, logger logr.Logger) (clusterOpLock string, clusterOpMetadata string, retryLaterDuration time.Duration, err error) {
+	desiredPods := int(*instance.Spec.Replicas)
+	configuredPods := int(*statefulSet.Spec.Replicas)
+	if desiredPods != configuredPods {
+		scaleTo := -1
+		// Start a scaling operation
+		if desiredPods < configuredPods {
+			// Scale down!
+			// The option is enabled by default, so treat "nil" like "true"
+			if instance.Spec.Autoscaling.VacatePodsOnScaleDown == nil || *instance.Spec.Autoscaling.VacatePodsOnScaleDown {
+				if desiredPods > 0 {
+					// We only support one scaling down one pod at-a-time if not scaling down to 0 pods
+					scaleTo = configuredPods - 1
+				} else {
+					scaleTo = 0
+				}
+			} else {
+				// The cloud is not setup to use managed scale-down
+				err = scaleCloudUnmanaged(ctx, r, statefulSet, desiredPods, logger)
+			}
+		} else if desiredPods > configuredPods {
+			// Scale up!
+			// TODO: replicasScaleUp is not supported, so do not make a clusterOp out of it, just do the patch
+			err = scaleCloudUnmanaged(ctx, r, statefulSet, desiredPods, logger)
+		}
+		if scaleTo > -1 {
+			clusterOpLock = util.ScaleLock
+			clusterOpMetadata = strconv.Itoa(scaleTo)
+		}
+	}
+	return
+}
+
+func handleLockedClusterOpScale(ctx context.Context, r *SolrCloudReconciler, instance *solrv1beta1.SolrCloud, statefulSet *appsv1.StatefulSet, podList []corev1.Pod, logger logr.Logger) (retryLaterDuration time.Duration, err error) {
+	if scalingToNodes, hasAnn := statefulSet.Annotations[util.ClusterOpsMetadataAnnotation]; hasAnn {
+		if scalingToNodesInt, convErr := strconv.Atoi(scalingToNodes); convErr != nil {
+			logger.Error(convErr, "Could not convert statefulSet annotation to int for scale-down-to information", "annotation", util.ClusterOpsMetadataAnnotation, "value", scalingToNodes)
+			err = convErr
+		} else {
+			replicaManagementComplete := false
+			if scalingToNodesInt < int(*statefulSet.Spec.Replicas) {
+				// Manage scaling down the SolrCloud
+				replicaManagementComplete, err = handleManagedCloudScaleDown(ctx, r, instance, statefulSet, scalingToNodesInt, podList, logger)
+				// } else if scalingToNodesInt > int(*statefulSet.Spec.Replicas) {
+				// TODO: Utilize the scaled-up nodes in the future, however Solr does not currently have APIs for this.
+				// TODO: Think about the order of scale-up and restart when individual nodeService IPs are injected into the pods.
+				// TODO: Will likely want to do a scale-up of the service first, then do the rolling restart of the cluster, then utilize the node.
+			} else {
+				// This shouldn't happen. The ScalingToNodesAnnotation is removed when the statefulSet size changes, through a Patch.
+				// But if it does happen, we should just remove the annotation and move forward.
+				patchedStatefulSet := statefulSet.DeepCopy()
+				delete(patchedStatefulSet.Annotations, util.ClusterOpsLockAnnotation)
+				delete(patchedStatefulSet.Annotations, util.ClusterOpsMetadataAnnotation)
+				if err = r.Patch(ctx, patchedStatefulSet, client.StrategicMergeFrom(statefulSet)); err != nil {
+					logger.Error(err, "Error while patching StatefulSet to remove unneeded clusterLockOp annotation for scaling to the current amount of nodes")
+				} else {
+					statefulSet = patchedStatefulSet
+				}
+			}
+
+			// Scale down the statefulSet to represent the new number of utilizedPods, if it is lower than the current number of pods
+			// Also remove the "scalingToNodes" annotation, as that acts as a lock on the cluster, so that other operations,
+			// such as scale-up, pod updates and further scale-down cannot happen at the same time.
+			if replicaManagementComplete {
+				patchedStatefulSet := statefulSet.DeepCopy()
+				patchedStatefulSet.Spec.Replicas = pointer.Int32(int32(scalingToNodesInt))
+				delete(patchedStatefulSet.Annotations, util.ClusterOpsLockAnnotation)
+				delete(patchedStatefulSet.Annotations, util.ClusterOpsMetadataAnnotation)
+				if err = r.Patch(ctx, patchedStatefulSet, client.StrategicMergeFrom(statefulSet)); err != nil {
+					logger.Error(err, "Error while patching StatefulSet to scale down SolrCloud", "newUtilizedNodes", scalingToNodesInt)
+				}
+
+				// TODO: Create event for the CRD.
+			} else {
+				// Retry after five minutes to check if the replica management commands have been completed
+				retryLaterDuration = time.Second * 5
+			}
+		}
+		// If everything succeeded, the statefulSet will have an annotation updated
+		// and the reconcile loop will be called again.
+
+		return
+	} else {
+		err = errors.New("no clusterOpMetadata annotation is present in the statefulSet")
+		logger.Error(err, "Cannot perform scaling operation when no scale-to-nodes is provided via the clusterOpMetadata")
+		return time.Second * 10, err
+	}
+}
+
+// handleManagedCloudScaleDown does the logic of a managed and "locked" cloud scale down operation.
+// This will likely take many reconcile loops to complete, as it is moving replicas away from the nodes that will be scaled down.
+func handleManagedCloudScaleDown(ctx context.Context, r *SolrCloudReconciler, instance *solrv1beta1.SolrCloud, statefulSet *appsv1.StatefulSet, scaleDownTo int, podList []corev1.Pod, logger logr.Logger) (replicaManagementComplete bool, err error) {
+	// Before doing anything to the pod, make sure that users cannot send requests to the pod anymore.
+	podStoppedReadinessConditions := map[corev1.PodConditionType]podReadinessConditionChange{
+		util.SolrIsNotStoppedReadinessCondition: {
+			reason:  ScaleDown,
+			message: "Pod is being deleted, traffic to the pod must be stopped",
+			status:  false,
+		},
+	}
+
+	if scaleDownTo == 0 {
+		// Delete all collections & data, the user wants no data left if scaling the solrcloud down to 0
+		// This is a much different operation to deleting the SolrCloud/StatefulSet all-together
+		replicaManagementComplete, err = evictAllPods(ctx, r, instance, podList, podStoppedReadinessConditions, logger)

Review Comment:
   Wonderful question. So I should make this more clear here, rather than obfuscating it.
   
   Currently I have not actually implemented data deletion. evictAllPods is basically a NO-OP, that I left around for future improvement. If you scale down to 0, the operator just leaves the data and deletes all of the pods. This is actually the same as Deleting the StatefulSet (Though slightly different with regard to PVC lifespan). The only way the data would still live is if you are using persistent storage and using the `RetainOnDelete` option for PVCs.
   
   The idea was that maybe we should just explicitly delete all collections before scaling to 0, since they might want to scale back up and at that point the collections in ZK will no longer exist. I didn't implement this and left it for a future option. But we can make this a blocker for the next release if you think its worth-while.
   
   I'm also just unsure if collection deletion is what we should be doing. Scaling down nodes to me doesn't include deleting data, now that I'm thinking about it more. Either way though, this code path should be made more clear as to what it actually does, which is basically a NO-OP currently.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: issues-unsubscribe@solr.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: issues-unsubscribe@solr.apache.org
For additional commands, e-mail: issues-help@solr.apache.org

[GitHub] [solr-operator] HoustonPutman merged pull request #561: Add managed scale-down for SolrClouds

Posted by "HoustonPutman (via GitHub)" <gi...@apache.org>.

HoustonPutman merged PR #561:
URL: https://github.com/apache/solr-operator/pull/561


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: issues-unsubscribe@solr.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: issues-unsubscribe@solr.apache.org
For additional commands, e-mail: issues-help@solr.apache.org

[GitHub] [solr-operator] HoustonPutman commented on a diff in pull request #561: Add managed scale-down for SolrClouds

Posted by "HoustonPutman (via GitHub)" <gi...@apache.org>.

HoustonPutman commented on code in PR #561:
URL: https://github.com/apache/solr-operator/pull/561#discussion_r1196554404


##########
controllers/util/solr_update_util.go:
##########
@@ -503,82 +503,65 @@ func GetAllManagedSolrNodeNames(solrCloud *solr.SolrCloud) map[string]bool {
 }
 
 // EvictReplicasForPodIfNecessary takes a solr Pod and migrates all replicas off of that Pod, if the Pod is using ephemeral storage.
-// If the pod is using persistent storage, this function is a no-op.
-// This function MUST be idempotent and return the same list of pods given the same kubernetes/solr state.
-func EvictReplicasForPodIfNecessary(ctx context.Context, solrCloud *solr.SolrCloud, pod *corev1.Pod, podHasReplicas bool, logger logr.Logger) (err error, canDeletePod bool) {
-	var solrDataVolume *corev1.Volume
-
-	// TODO: Remove these checks after v0.7.0, since it will be taken care by the evictReplicas podReadinessCondition
-	dataVolumeName := solrCloud.DataVolumeName()
-	for _, volume := range pod.Spec.Volumes {
-		if volume.Name == dataVolumeName {
-			solrDataVolume = &volume
-			break
-		}
-	}
-
-	// Only evict if the Data volume is not persistent
-	if solrDataVolume != nil && solrDataVolume.VolumeSource.PersistentVolumeClaim == nil {
-		// If the Cloud has 1 or zero pods, and this is the "-0" pod, then delete the data since we can't move it anywhere else
-		// Otherwise, move the replicas to other pods
-		if (solrCloud.Spec.Replicas == nil || *solrCloud.Spec.Replicas < 2) && strings.HasSuffix(pod.Name, "-0") {
-			queryParams := url.Values{}
-			queryParams.Add("action", "DELETENODE")
-			queryParams.Add("node", SolrNodeName(solrCloud, pod.Name))
-			// TODO: Figure out a way to do this, since DeleteNode will not delete the last replica of every type...
-			canDeletePod = true
-		} else {
-			requestId := "move-replicas-" + pod.Name
-
-			// First check to see if the Async Replace request has started
-			if asyncState, message, asyncErr := solr_api.CheckAsyncRequest(ctx, solrCloud, requestId); asyncErr != nil {
-				err = asyncErr
-			} else if asyncState == "notfound" {
-				if podHasReplicas {
-					// Submit new Replace Node request
-					replaceResponse := &solr_api.SolrAsyncResponse{}
-					queryParams := url.Values{}
-					queryParams.Add("action", "REPLACENODE")
-					queryParams.Add("parallel", "true")
-					queryParams.Add("sourceNode", SolrNodeName(solrCloud, pod.Name))
-					queryParams.Add("async", requestId)
-					err = solr_api.CallCollectionsApi(ctx, solrCloud, queryParams, replaceResponse)
-					if hasError, apiErr := solr_api.CheckForCollectionsApiError("REPLACENODE", replaceResponse.ResponseHeader); hasError {
-						err = apiErr
-					}
-					if err == nil {
-						logger.Info("Migrating all replicas off of pod before deletion.", "requestId", requestId, "pod", pod.Name)
-					} else {
-						logger.Error(err, "Could not migrate all replicas off of pod before deletion. Will try again later.", "requestId", requestId, "message", message)
-					}
+// For updates this will only be called for pods using ephemeral data.
+// For scale-down operations, this can be called for pods using ephemeral or persistent data.
+func EvictReplicasForPodIfNecessary(ctx context.Context, solrCloud *solr.SolrCloud, pod *corev1.Pod, podHasReplicas bool, evictionReason string, logger logr.Logger) (err error, canDeletePod bool) {
+	logger = logger.WithValues("evictionReason", evictionReason)
+	// If the Cloud has 1 or zero pods, and this is the "-0" pod, then delete the data since we can't move it anywhere else
+	// Otherwise, move the replicas to other pods
+	if (solrCloud.Spec.Replicas == nil || *solrCloud.Spec.Replicas < 2) && strings.HasSuffix(pod.Name, "-0") {
+		queryParams := url.Values{}
+		queryParams.Add("action", "DELETENODE")
+		queryParams.Add("node", SolrNodeName(solrCloud, pod.Name))
+		// TODO: Figure out a way to do this, since DeleteNode will not delete the last replica of every type...
+		canDeletePod = true
+	} else {
+		requestId := "move-replicas-" + pod.Name
+
+		// First check to see if the Async Replace request has started
+		if asyncState, message, asyncErr := solr_api.CheckAsyncRequest(ctx, solrCloud, requestId); asyncErr != nil {
+			err = asyncErr
+		} else if asyncState == "notfound" {
+			if podHasReplicas {
+				// Submit new Replace Node request
+				replaceResponse := &solr_api.SolrAsyncResponse{}
+				queryParams := url.Values{}
+				queryParams.Add("action", "REPLACENODE")
+				queryParams.Add("parallel", "true")
+				queryParams.Add("sourceNode", SolrNodeName(solrCloud, pod.Name))
+				queryParams.Add("async", requestId)
+				err = solr_api.CallCollectionsApi(ctx, solrCloud, queryParams, replaceResponse)
+				if hasError, apiErr := solr_api.CheckForCollectionsApiError("REPLACENODE", replaceResponse.ResponseHeader); hasError {
+					err = apiErr
+				}
+				if err == nil {
+					logger.Info("Migrating all replicas off of pod before deletion.", "requestId", requestId, "pod", pod.Name)
 				} else {
-					canDeletePod = true
+					logger.Error(err, "Could not migrate all replicas off of pod before deletion. Will try again later.", "requestId", requestId, "message", message)
 				}
-
 			} else {
-				logger.Info("Found async status", "requestId", requestId, "state", asyncState)
-				// Only continue to delete the pod if the ReplaceNode request is complete and successful
-				if asyncState == "completed" {
-					canDeletePod = true
-					logger.Info("Migration of all replicas off of pod before deletion complete. Pod can now be deleted.", "pod", pod.Name)
-				} else if asyncState == "failed" {
-					logger.Info("Migration of all replicas off of pod before deletion failed. Will try again.", "pod", pod.Name, "message", message)
-				}
+				canDeletePod = true
+			}
 
-				// Delete the async request Id if the async request is successful or failed.
-				// If the request failed, this will cause a retry since the next reconcile won't find the async requestId in Solr.
-				if asyncState == "completed" || asyncState == "failed" {
-					if message, err = solr_api.DeleteAsyncRequest(ctx, solrCloud, requestId); err != nil {
-						logger.Error(err, "Could not delete Async request status.", "requestId", requestId, "message", message)
-					} else {
-						canDeletePod = false
-					}
+		} else {
+			logger.Info("Found async status", "requestId", requestId, "state", asyncState)
+			// Only continue to delete the pod if the ReplaceNode request is complete and successful
+			if asyncState == "completed" {
+				canDeletePod = true

Review Comment:
   Yeah that's certainly fair. I would love to see a Solr feature that we can say "Don't put replicas on this node", so that we can set that flag first, and then the check wouldn't be necessary. But you are right.
   
   If I remember correctly, that's how it worked initially. So it shouldn't be a problem to go back to that.
   
   If we do this here, we should certainly do it the same way in the scale down operations. Currently it just waits for the command to succeed, just like here.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: issues-unsubscribe@solr.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: issues-unsubscribe@solr.apache.org
For additional commands, e-mail: issues-help@solr.apache.org

[GitHub] [solr-operator] HoustonPutman commented on a diff in pull request #561: Add managed scale-down for SolrClouds

Posted by "HoustonPutman (via GitHub)" <gi...@apache.org>.

HoustonPutman commented on code in PR #561:
URL: https://github.com/apache/solr-operator/pull/561#discussion_r1196543849


##########
docs/solr-cloud/autoscaling.md:
##########
@@ -0,0 +1,85 @@
+<!--
+    Licensed to the Apache Software Foundation (ASF) under one or more
+    contributor license agreements.  See the NOTICE file distributed with
+    this work for additional information regarding copyright ownership.
+    The ASF licenses this file to You under the Apache License, Version 2.0
+    the "License"); you may not use this file except in compliance with
+    the License.  You may obtain a copy of the License at
+
+        http://www.apache.org/licenses/LICENSE-2.0
+
+    Unless required by applicable law or agreed to in writing, software
+    distributed under the License is distributed on an "AS IS" BASIS,
+    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+    See the License for the specific language governing permissions and
+    limitations under the License.
+ -->
+
+# SolrCloud Scaling
+_Since v0.8.0_
+
+Solr Clouds are complex distributed systems, and thus require additional help when trying to scale up or down.
+
+Scaling/Autoscaling can mean different things in different situations, and this is true even within the `SolrCloud.spec.autoscaling` section.
+- Replicas can be moved when new nodes are added or when nodes need to be taken down
+- Nodes can be added/removed if more or less resources are desired.
+
+The following sections describes all the features that the Solr Operator currently supports to aid in scaling & autoscaling SolrClouds.
+
+## Configuration
+
+The `autoscaling` section in the SolrCloud CRD can be configured in the following ways
+
+```yaml
+spec:
+  autoscaling:
+    vacatePodsOnScaleDown: true # Default: true
+```
+
+## Replica Movement
+
+Solr can be scaled up & down either manually or by `HorizontalPodAutoscaler`'s, however no matter how the `SolrCloud.Spec.Replicas` value
+changes, the Solr Operator must implement this change the same way.
+
+For now Replicas are not scaled up and down themselves, they are just moved to utilize new Solr pods or vacate soon-to-be-deleted Solr pods.
+
+### Solr Pod Scale-Down
+
+When the desired number of Solr Pods that should be run `SolrCloud.Spec.Replicas` is decreased,
+the `SolrCloud.spec.autoscaling.vacatePodsOnScaleDown` option determines whether the Solr Operator should move replicas
+off of the pods that are about to be deleted.
+
+When a StatefulSet, which the Solr Operator uses to run Solr pods, has its size decreased by `x` pods, it's the last
+`x` pods that are deleted. So if a StatefulSet `tmp` has size 4, it will have pods `tmp-0`, `tmp-1`, `tmp-2` and `tmp-3`.
+If that `tmp` then is scaled down to size 2, then pods `tmp-2` and `tmp-3` will be deleted because they are `tmp`'s last pods numerically.
+
+If Solr has replicas placed on the pods that will be deleted as a part of the scale-down, then it has a problem.
+Solr will expect that these replicas will eventually come back online, because they are a part of the clusterState.
+However, the Solr Operator has no expectations for these replicas to come back, because the cloud has been scaled down.
+Therefore, the safest option is to move the replicas off of these pods before the scale-down operation occurs.
+
+If `autoscaling.vacatePodsOnScaleDown` option is not enabled, then whenever the `SolrCloud.Spec.Replicas` is decreased,
+that change will be reflected in the StatefulSet immediately.
+Pods will be deleted even if replicas live on those pods.
+
+If `autoscaling.vacatePodsOnScaleDown` option is enabled, which it is by default, then the following steps occur:
+1. Acquire a cluster-ops lock on the SolrCloud. (This means other cluster operations, such as a rolling restart, cannot occur during the scale down operation)
+1. Scale down the last pod.
+   1. Mark the pod as "notReady" so that traffic is diverted away from this pod.

Review Comment:
   This isn't actually a change, its how any operator-managed pod-stop happens since the last release. See https://github.com/apache/solr-operator/pull/530. 
   
   But to actually answer your question, it does not affect traffic going to the headless service or individual node service. So traffic going directly to the pod will not be stopped ever. The only service that actually uses the readiness conditions is the common service (ultimately used by the common ingress endpoint).



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: issues-unsubscribe@solr.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: issues-unsubscribe@solr.apache.org
For additional commands, e-mail: issues-help@solr.apache.org

[GitHub] [solr-operator] HoustonPutman commented on pull request #561: Add managed scale-down for SolrClouds

Posted by "HoustonPutman (via GitHub)" <gi...@apache.org>.

HoustonPutman commented on PR #561:
URL: https://github.com/apache/solr-operator/pull/561#issuecomment-1545872689

   Review request: @radu-gheorghe and @tflobbe. I'll finish this up today or Monday


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: issues-unsubscribe@solr.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: issues-unsubscribe@solr.apache.org
For additional commands, e-mail: issues-help@solr.apache.org

[GitHub] [solr-operator] HoustonPutman commented on pull request #561: Add managed scale-down for SolrClouds

Posted by "HoustonPutman (via GitHub)" <gi...@apache.org>.

HoustonPutman commented on PR #561:
URL: https://github.com/apache/solr-operator/pull/561#issuecomment-1548467236

   Cool, docs and e2e tests are pretty good to go. There might be optimizations that can be made down the line, but I think this is complete for now!
   
   Overall the only thing that is in my mind is that we might want to change the `autoscaling` section to just be called `scaling`. Not exactly sure though. Would welcome opinions.


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: issues-unsubscribe@solr.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: issues-unsubscribe@solr.apache.org
For additional commands, e-mail: issues-help@solr.apache.org

[GitHub] [solr-operator] HoustonPutman commented on a diff in pull request #561: Add managed scale-down for SolrClouds

Posted by "HoustonPutman (via GitHub)" <gi...@apache.org>.

HoustonPutman commented on code in PR #561:
URL: https://github.com/apache/solr-operator/pull/561#discussion_r1198251842


##########
controllers/solr_cluster_ops_util.go:
##########
@@ -0,0 +1,218 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package controllers
+
+import (
+	"context"
+	"errors"
+	solrv1beta1 "github.com/apache/solr-operator/api/v1beta1"
+	"github.com/apache/solr-operator/controllers/util"
+	"github.com/go-logr/logr"
+	appsv1 "k8s.io/api/apps/v1"
+	corev1 "k8s.io/api/core/v1"
+	"k8s.io/utils/pointer"
+	"sigs.k8s.io/controller-runtime/pkg/client"
+	"strconv"
+	"time"
+)
+
+func determineScaleClusterOpLockIfNecessary(ctx context.Context, r *SolrCloudReconciler, instance *solrv1beta1.SolrCloud, statefulSet *appsv1.StatefulSet, podList []corev1.Pod, logger logr.Logger) (clusterOpLock string, clusterOpMetadata string, retryLaterDuration time.Duration, err error) {
+	desiredPods := int(*instance.Spec.Replicas)
+	configuredPods := int(*statefulSet.Spec.Replicas)
+	if desiredPods != configuredPods {
+		scaleTo := -1
+		// Start a scaling operation
+		if desiredPods < configuredPods {
+			// Scale down!
+			// The option is enabled by default, so treat "nil" like "true"
+			if instance.Spec.Autoscaling.VacatePodsOnScaleDown == nil || *instance.Spec.Autoscaling.VacatePodsOnScaleDown {
+				if desiredPods > 0 {
+					// We only support one scaling down one pod at-a-time if not scaling down to 0 pods
+					scaleTo = configuredPods - 1
+				} else {
+					scaleTo = 0
+				}
+			} else {
+				// The cloud is not setup to use managed scale-down
+				err = scaleCloudUnmanaged(ctx, r, statefulSet, desiredPods, logger)
+			}
+		} else if desiredPods > configuredPods {
+			// Scale up!
+			// TODO: replicasScaleUp is not supported, so do not make a clusterOp out of it, just do the patch
+			err = scaleCloudUnmanaged(ctx, r, statefulSet, desiredPods, logger)
+		}
+		if scaleTo > -1 {
+			clusterOpLock = util.ScaleLock
+			clusterOpMetadata = strconv.Itoa(scaleTo)
+		}
+	}
+	return
+}
+
+func handleLockedClusterOpScale(ctx context.Context, r *SolrCloudReconciler, instance *solrv1beta1.SolrCloud, statefulSet *appsv1.StatefulSet, podList []corev1.Pod, logger logr.Logger) (retryLaterDuration time.Duration, err error) {
+	if scalingToNodes, hasAnn := statefulSet.Annotations[util.ClusterOpsMetadataAnnotation]; hasAnn {
+		if scalingToNodesInt, convErr := strconv.Atoi(scalingToNodes); convErr != nil {
+			logger.Error(convErr, "Could not convert statefulSet annotation to int for scale-down-to information", "annotation", util.ClusterOpsMetadataAnnotation, "value", scalingToNodes)
+			err = convErr
+		} else {
+			replicaManagementComplete := false
+			if scalingToNodesInt < int(*statefulSet.Spec.Replicas) {
+				// Manage scaling down the SolrCloud
+				replicaManagementComplete, err = handleManagedCloudScaleDown(ctx, r, instance, statefulSet, scalingToNodesInt, podList, logger)
+				// } else if scalingToNodesInt > int(*statefulSet.Spec.Replicas) {
+				// TODO: Utilize the scaled-up nodes in the future, however Solr does not currently have APIs for this.
+				// TODO: Think about the order of scale-up and restart when individual nodeService IPs are injected into the pods.
+				// TODO: Will likely want to do a scale-up of the service first, then do the rolling restart of the cluster, then utilize the node.
+			} else {
+				// This shouldn't happen. The ScalingToNodesAnnotation is removed when the statefulSet size changes, through a Patch.
+				// But if it does happen, we should just remove the annotation and move forward.
+				patchedStatefulSet := statefulSet.DeepCopy()
+				delete(patchedStatefulSet.Annotations, util.ClusterOpsLockAnnotation)
+				delete(patchedStatefulSet.Annotations, util.ClusterOpsMetadataAnnotation)
+				if err = r.Patch(ctx, patchedStatefulSet, client.StrategicMergeFrom(statefulSet)); err != nil {
+					logger.Error(err, "Error while patching StatefulSet to remove unneeded clusterLockOp annotation for scaling to the current amount of nodes")
+				} else {
+					statefulSet = patchedStatefulSet
+				}
+			}
+
+			// Scale down the statefulSet to represent the new number of utilizedPods, if it is lower than the current number of pods
+			// Also remove the "scalingToNodes" annotation, as that acts as a lock on the cluster, so that other operations,
+			// such as scale-up, pod updates and further scale-down cannot happen at the same time.
+			if replicaManagementComplete {
+				patchedStatefulSet := statefulSet.DeepCopy()
+				patchedStatefulSet.Spec.Replicas = pointer.Int32(int32(scalingToNodesInt))
+				delete(patchedStatefulSet.Annotations, util.ClusterOpsLockAnnotation)
+				delete(patchedStatefulSet.Annotations, util.ClusterOpsMetadataAnnotation)
+				if err = r.Patch(ctx, patchedStatefulSet, client.StrategicMergeFrom(statefulSet)); err != nil {
+					logger.Error(err, "Error while patching StatefulSet to scale down SolrCloud", "newUtilizedNodes", scalingToNodesInt)
+				}
+
+				// TODO: Create event for the CRD.
+			} else {
+				// Retry after five minutes to check if the replica management commands have been completed
+				retryLaterDuration = time.Second * 5
+			}
+		}
+		// If everything succeeded, the statefulSet will have an annotation updated
+		// and the reconcile loop will be called again.
+
+		return
+	} else {
+		err = errors.New("no clusterOpMetadata annotation is present in the statefulSet")
+		logger.Error(err, "Cannot perform scaling operation when no scale-to-nodes is provided via the clusterOpMetadata")
+		return time.Second * 10, err
+	}
+}
+
+// handleManagedCloudScaleDown does the logic of a managed and "locked" cloud scale down operation.
+// This will likely take many reconcile loops to complete, as it is moving replicas away from the nodes that will be scaled down.
+func handleManagedCloudScaleDown(ctx context.Context, r *SolrCloudReconciler, instance *solrv1beta1.SolrCloud, statefulSet *appsv1.StatefulSet, scaleDownTo int, podList []corev1.Pod, logger logr.Logger) (replicaManagementComplete bool, err error) {
+	// Before doing anything to the pod, make sure that users cannot send requests to the pod anymore.
+	podStoppedReadinessConditions := map[corev1.PodConditionType]podReadinessConditionChange{
+		util.SolrIsNotStoppedReadinessCondition: {
+			reason:  ScaleDown,
+			message: "Pod is being deleted, traffic to the pod must be stopped",
+			status:  false,
+		},
+	}
+
+	if scaleDownTo == 0 {
+		// Delete all collections & data, the user wants no data left if scaling the solrcloud down to 0
+		// This is a much different operation to deleting the SolrCloud/StatefulSet all-together
+		replicaManagementComplete, err = evictAllPods(ctx, r, instance, podList, podStoppedReadinessConditions, logger)

Review Comment:
   Yeah I made the code much more clear, but we aren't explicitly deleting all of the data. We can do that further down the line as an enhancement if we want to.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: issues-unsubscribe@solr.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: issues-unsubscribe@solr.apache.org
For additional commands, e-mail: issues-help@solr.apache.org

[GitHub] [solr-operator] HoustonPutman commented on a diff in pull request #561: Add managed scale-down for SolrClouds

Posted by "HoustonPutman (via GitHub)" <gi...@apache.org>.

HoustonPutman commented on code in PR #561:
URL: https://github.com/apache/solr-operator/pull/561#discussion_r1196543849


##########
docs/solr-cloud/autoscaling.md:
##########
@@ -0,0 +1,85 @@
+<!--
+    Licensed to the Apache Software Foundation (ASF) under one or more
+    contributor license agreements.  See the NOTICE file distributed with
+    this work for additional information regarding copyright ownership.
+    The ASF licenses this file to You under the Apache License, Version 2.0
+    the "License"); you may not use this file except in compliance with
+    the License.  You may obtain a copy of the License at
+
+        http://www.apache.org/licenses/LICENSE-2.0
+
+    Unless required by applicable law or agreed to in writing, software
+    distributed under the License is distributed on an "AS IS" BASIS,
+    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+    See the License for the specific language governing permissions and
+    limitations under the License.
+ -->
+
+# SolrCloud Scaling
+_Since v0.8.0_
+
+Solr Clouds are complex distributed systems, and thus require additional help when trying to scale up or down.
+
+Scaling/Autoscaling can mean different things in different situations, and this is true even within the `SolrCloud.spec.autoscaling` section.
+- Replicas can be moved when new nodes are added or when nodes need to be taken down
+- Nodes can be added/removed if more or less resources are desired.
+
+The following sections describes all the features that the Solr Operator currently supports to aid in scaling & autoscaling SolrClouds.
+
+## Configuration
+
+The `autoscaling` section in the SolrCloud CRD can be configured in the following ways
+
+```yaml
+spec:
+  autoscaling:
+    vacatePodsOnScaleDown: true # Default: true
+```
+
+## Replica Movement
+
+Solr can be scaled up & down either manually or by `HorizontalPodAutoscaler`'s, however no matter how the `SolrCloud.Spec.Replicas` value
+changes, the Solr Operator must implement this change the same way.
+
+For now Replicas are not scaled up and down themselves, they are just moved to utilize new Solr pods or vacate soon-to-be-deleted Solr pods.
+
+### Solr Pod Scale-Down
+
+When the desired number of Solr Pods that should be run `SolrCloud.Spec.Replicas` is decreased,
+the `SolrCloud.spec.autoscaling.vacatePodsOnScaleDown` option determines whether the Solr Operator should move replicas
+off of the pods that are about to be deleted.
+
+When a StatefulSet, which the Solr Operator uses to run Solr pods, has its size decreased by `x` pods, it's the last
+`x` pods that are deleted. So if a StatefulSet `tmp` has size 4, it will have pods `tmp-0`, `tmp-1`, `tmp-2` and `tmp-3`.
+If that `tmp` then is scaled down to size 2, then pods `tmp-2` and `tmp-3` will be deleted because they are `tmp`'s last pods numerically.
+
+If Solr has replicas placed on the pods that will be deleted as a part of the scale-down, then it has a problem.
+Solr will expect that these replicas will eventually come back online, because they are a part of the clusterState.
+However, the Solr Operator has no expectations for these replicas to come back, because the cloud has been scaled down.
+Therefore, the safest option is to move the replicas off of these pods before the scale-down operation occurs.
+
+If `autoscaling.vacatePodsOnScaleDown` option is not enabled, then whenever the `SolrCloud.Spec.Replicas` is decreased,
+that change will be reflected in the StatefulSet immediately.
+Pods will be deleted even if replicas live on those pods.
+
+If `autoscaling.vacatePodsOnScaleDown` option is enabled, which it is by default, then the following steps occur:
+1. Acquire a cluster-ops lock on the SolrCloud. (This means other cluster operations, such as a rolling restart, cannot occur during the scale down operation)
+1. Scale down the last pod.
+   1. Mark the pod as "notReady" so that traffic is diverted away from this pod.

Review Comment:
   This isn't actually a change, its how any operator-managed pod-stop happens since the last release. See https://github.com/apache/solr-operator/pull/530. 
   
   But to actually answer your question, it does not affect traffic going to the headless service or individual node service. So traffic going directly to the pod will not be stopped ever. The only service that actually uses the readiness conditions is the common service (ultimately used by the common ingress endpoint).
   
   I can make the documentation clearer here.



##########
controllers/solr_cluster_ops_util.go:
##########
@@ -0,0 +1,218 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package controllers
+
+import (
+	"context"
+	"errors"
+	solrv1beta1 "github.com/apache/solr-operator/api/v1beta1"
+	"github.com/apache/solr-operator/controllers/util"
+	"github.com/go-logr/logr"
+	appsv1 "k8s.io/api/apps/v1"
+	corev1 "k8s.io/api/core/v1"
+	"k8s.io/utils/pointer"
+	"sigs.k8s.io/controller-runtime/pkg/client"
+	"strconv"
+	"time"
+)
+
+func determineScaleClusterOpLockIfNecessary(ctx context.Context, r *SolrCloudReconciler, instance *solrv1beta1.SolrCloud, statefulSet *appsv1.StatefulSet, podList []corev1.Pod, logger logr.Logger) (clusterOpLock string, clusterOpMetadata string, retryLaterDuration time.Duration, err error) {
+	desiredPods := int(*instance.Spec.Replicas)
+	configuredPods := int(*statefulSet.Spec.Replicas)
+	if desiredPods != configuredPods {
+		scaleTo := -1
+		// Start a scaling operation
+		if desiredPods < configuredPods {
+			// Scale down!
+			// The option is enabled by default, so treat "nil" like "true"
+			if instance.Spec.Autoscaling.VacatePodsOnScaleDown == nil || *instance.Spec.Autoscaling.VacatePodsOnScaleDown {
+				if desiredPods > 0 {
+					// We only support one scaling down one pod at-a-time if not scaling down to 0 pods
+					scaleTo = configuredPods - 1
+				} else {
+					scaleTo = 0
+				}
+			} else {
+				// The cloud is not setup to use managed scale-down
+				err = scaleCloudUnmanaged(ctx, r, statefulSet, desiredPods, logger)
+			}
+		} else if desiredPods > configuredPods {
+			// Scale up!
+			// TODO: replicasScaleUp is not supported, so do not make a clusterOp out of it, just do the patch
+			err = scaleCloudUnmanaged(ctx, r, statefulSet, desiredPods, logger)
+		}
+		if scaleTo > -1 {
+			clusterOpLock = util.ScaleLock
+			clusterOpMetadata = strconv.Itoa(scaleTo)
+		}
+	}
+	return
+}
+
+func handleLockedClusterOpScale(ctx context.Context, r *SolrCloudReconciler, instance *solrv1beta1.SolrCloud, statefulSet *appsv1.StatefulSet, podList []corev1.Pod, logger logr.Logger) (retryLaterDuration time.Duration, err error) {
+	if scalingToNodes, hasAnn := statefulSet.Annotations[util.ClusterOpsMetadataAnnotation]; hasAnn {
+		if scalingToNodesInt, convErr := strconv.Atoi(scalingToNodes); convErr != nil {
+			logger.Error(convErr, "Could not convert statefulSet annotation to int for scale-down-to information", "annotation", util.ClusterOpsMetadataAnnotation, "value", scalingToNodes)
+			err = convErr
+		} else {
+			replicaManagementComplete := false
+			if scalingToNodesInt < int(*statefulSet.Spec.Replicas) {
+				// Manage scaling down the SolrCloud
+				replicaManagementComplete, err = handleManagedCloudScaleDown(ctx, r, instance, statefulSet, scalingToNodesInt, podList, logger)
+				// } else if scalingToNodesInt > int(*statefulSet.Spec.Replicas) {
+				// TODO: Utilize the scaled-up nodes in the future, however Solr does not currently have APIs for this.
+				// TODO: Think about the order of scale-up and restart when individual nodeService IPs are injected into the pods.
+				// TODO: Will likely want to do a scale-up of the service first, then do the rolling restart of the cluster, then utilize the node.
+			} else {
+				// This shouldn't happen. The ScalingToNodesAnnotation is removed when the statefulSet size changes, through a Patch.
+				// But if it does happen, we should just remove the annotation and move forward.
+				patchedStatefulSet := statefulSet.DeepCopy()
+				delete(patchedStatefulSet.Annotations, util.ClusterOpsLockAnnotation)
+				delete(patchedStatefulSet.Annotations, util.ClusterOpsMetadataAnnotation)
+				if err = r.Patch(ctx, patchedStatefulSet, client.StrategicMergeFrom(statefulSet)); err != nil {
+					logger.Error(err, "Error while patching StatefulSet to remove unneeded clusterLockOp annotation for scaling to the current amount of nodes")
+				} else {
+					statefulSet = patchedStatefulSet
+				}
+			}
+
+			// Scale down the statefulSet to represent the new number of utilizedPods, if it is lower than the current number of pods
+			// Also remove the "scalingToNodes" annotation, as that acts as a lock on the cluster, so that other operations,
+			// such as scale-up, pod updates and further scale-down cannot happen at the same time.
+			if replicaManagementComplete {
+				patchedStatefulSet := statefulSet.DeepCopy()
+				patchedStatefulSet.Spec.Replicas = pointer.Int32(int32(scalingToNodesInt))
+				delete(patchedStatefulSet.Annotations, util.ClusterOpsLockAnnotation)
+				delete(patchedStatefulSet.Annotations, util.ClusterOpsMetadataAnnotation)
+				if err = r.Patch(ctx, patchedStatefulSet, client.StrategicMergeFrom(statefulSet)); err != nil {
+					logger.Error(err, "Error while patching StatefulSet to scale down SolrCloud", "newUtilizedNodes", scalingToNodesInt)
+				}
+
+				// TODO: Create event for the CRD.
+			} else {
+				// Retry after five minutes to check if the replica management commands have been completed
+				retryLaterDuration = time.Second * 5
+			}
+		}
+		// If everything succeeded, the statefulSet will have an annotation updated
+		// and the reconcile loop will be called again.
+
+		return
+	} else {
+		err = errors.New("no clusterOpMetadata annotation is present in the statefulSet")
+		logger.Error(err, "Cannot perform scaling operation when no scale-to-nodes is provided via the clusterOpMetadata")
+		return time.Second * 10, err
+	}
+}
+
+// handleManagedCloudScaleDown does the logic of a managed and "locked" cloud scale down operation.
+// This will likely take many reconcile loops to complete, as it is moving replicas away from the nodes that will be scaled down.
+func handleManagedCloudScaleDown(ctx context.Context, r *SolrCloudReconciler, instance *solrv1beta1.SolrCloud, statefulSet *appsv1.StatefulSet, scaleDownTo int, podList []corev1.Pod, logger logr.Logger) (replicaManagementComplete bool, err error) {
+	// Before doing anything to the pod, make sure that users cannot send requests to the pod anymore.
+	podStoppedReadinessConditions := map[corev1.PodConditionType]podReadinessConditionChange{
+		util.SolrIsNotStoppedReadinessCondition: {
+			reason:  ScaleDown,
+			message: "Pod is being deleted, traffic to the pod must be stopped",
+			status:  false,
+		},
+	}
+
+	if scaleDownTo == 0 {
+		// Delete all collections & data, the user wants no data left if scaling the solrcloud down to 0
+		// This is a much different operation to deleting the SolrCloud/StatefulSet all-together
+		replicaManagementComplete, err = evictAllPods(ctx, r, instance, podList, podStoppedReadinessConditions, logger)
+	} else {
+		// Only evict the last pod, even if we are trying to scale down multiple pods.
+		// Scale down will happen one pod at a time.
+		replicaManagementComplete, err = evictSinglePod(ctx, r, instance, scaleDownTo, podList, podStoppedReadinessConditions, logger)
+	}
+	// TODO: It would be great to support a multi-node scale down when Solr supports evicting many SolrNodes at once.

Review Comment:
   Ahhh yeah, this is more because right now even if someone scales down the SolrCloud by 2, say 6 -> 4, the Solr Operator has to do it one node at a time. So it will first scale down from 6 -> 5 then 5 -> 4.
   
   I agree that the HPA is rarely going to scale down that aggressively, but it'll just be a small improvement in the existing Solr API that we can then use to make the managed multi-node scale down operations much faster.
   
   I agree its a small thing.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: issues-unsubscribe@solr.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: issues-unsubscribe@solr.apache.org
For additional commands, e-mail: issues-help@solr.apache.org

[GitHub] [solr-operator] HoustonPutman commented on a diff in pull request #561: Add managed scale-down for SolrClouds

Posted by "HoustonPutman (via GitHub)" <gi...@apache.org>.

HoustonPutman commented on code in PR #561:
URL: https://github.com/apache/solr-operator/pull/561#discussion_r1196555332


##########
controllers/solr_cluster_ops_util.go:
##########
@@ -0,0 +1,218 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package controllers
+
+import (
+	"context"
+	"errors"
+	solrv1beta1 "github.com/apache/solr-operator/api/v1beta1"
+	"github.com/apache/solr-operator/controllers/util"
+	"github.com/go-logr/logr"
+	appsv1 "k8s.io/api/apps/v1"
+	corev1 "k8s.io/api/core/v1"
+	"k8s.io/utils/pointer"
+	"sigs.k8s.io/controller-runtime/pkg/client"
+	"strconv"
+	"time"
+)
+
+func determineScaleClusterOpLockIfNecessary(ctx context.Context, r *SolrCloudReconciler, instance *solrv1beta1.SolrCloud, statefulSet *appsv1.StatefulSet, podList []corev1.Pod, logger logr.Logger) (clusterOpLock string, clusterOpMetadata string, retryLaterDuration time.Duration, err error) {
+	desiredPods := int(*instance.Spec.Replicas)
+	configuredPods := int(*statefulSet.Spec.Replicas)
+	if desiredPods != configuredPods {
+		scaleTo := -1
+		// Start a scaling operation
+		if desiredPods < configuredPods {
+			// Scale down!
+			// The option is enabled by default, so treat "nil" like "true"
+			if instance.Spec.Autoscaling.VacatePodsOnScaleDown == nil || *instance.Spec.Autoscaling.VacatePodsOnScaleDown {
+				if desiredPods > 0 {
+					// We only support one scaling down one pod at-a-time if not scaling down to 0 pods
+					scaleTo = configuredPods - 1
+				} else {
+					scaleTo = 0
+				}
+			} else {
+				// The cloud is not setup to use managed scale-down
+				err = scaleCloudUnmanaged(ctx, r, statefulSet, desiredPods, logger)
+			}
+		} else if desiredPods > configuredPods {
+			// Scale up!
+			// TODO: replicasScaleUp is not supported, so do not make a clusterOp out of it, just do the patch
+			err = scaleCloudUnmanaged(ctx, r, statefulSet, desiredPods, logger)
+		}
+		if scaleTo > -1 {
+			clusterOpLock = util.ScaleLock
+			clusterOpMetadata = strconv.Itoa(scaleTo)
+		}
+	}
+	return
+}
+
+func handleLockedClusterOpScale(ctx context.Context, r *SolrCloudReconciler, instance *solrv1beta1.SolrCloud, statefulSet *appsv1.StatefulSet, podList []corev1.Pod, logger logr.Logger) (retryLaterDuration time.Duration, err error) {
+	if scalingToNodes, hasAnn := statefulSet.Annotations[util.ClusterOpsMetadataAnnotation]; hasAnn {
+		if scalingToNodesInt, convErr := strconv.Atoi(scalingToNodes); convErr != nil {
+			logger.Error(convErr, "Could not convert statefulSet annotation to int for scale-down-to information", "annotation", util.ClusterOpsMetadataAnnotation, "value", scalingToNodes)
+			err = convErr
+		} else {
+			replicaManagementComplete := false
+			if scalingToNodesInt < int(*statefulSet.Spec.Replicas) {
+				// Manage scaling down the SolrCloud
+				replicaManagementComplete, err = handleManagedCloudScaleDown(ctx, r, instance, statefulSet, scalingToNodesInt, podList, logger)
+				// } else if scalingToNodesInt > int(*statefulSet.Spec.Replicas) {
+				// TODO: Utilize the scaled-up nodes in the future, however Solr does not currently have APIs for this.
+				// TODO: Think about the order of scale-up and restart when individual nodeService IPs are injected into the pods.
+				// TODO: Will likely want to do a scale-up of the service first, then do the rolling restart of the cluster, then utilize the node.
+			} else {
+				// This shouldn't happen. The ScalingToNodesAnnotation is removed when the statefulSet size changes, through a Patch.
+				// But if it does happen, we should just remove the annotation and move forward.
+				patchedStatefulSet := statefulSet.DeepCopy()
+				delete(patchedStatefulSet.Annotations, util.ClusterOpsLockAnnotation)
+				delete(patchedStatefulSet.Annotations, util.ClusterOpsMetadataAnnotation)
+				if err = r.Patch(ctx, patchedStatefulSet, client.StrategicMergeFrom(statefulSet)); err != nil {
+					logger.Error(err, "Error while patching StatefulSet to remove unneeded clusterLockOp annotation for scaling to the current amount of nodes")
+				} else {
+					statefulSet = patchedStatefulSet
+				}
+			}
+
+			// Scale down the statefulSet to represent the new number of utilizedPods, if it is lower than the current number of pods
+			// Also remove the "scalingToNodes" annotation, as that acts as a lock on the cluster, so that other operations,
+			// such as scale-up, pod updates and further scale-down cannot happen at the same time.

Review Comment:
   This is left over from previous implementations. Thanks for catching it.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: issues-unsubscribe@solr.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: issues-unsubscribe@solr.apache.org
For additional commands, e-mail: issues-help@solr.apache.org