You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@cloudstack.apache.org by da...@apache.org on 2020/03/12 15:59:59 UTC
[cloudstack] branch master updated: [KVM] Rolling maintenance
(#3610)
This is an automated email from the ASF dual-hosted git repository.
dahn pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/cloudstack.git
The following commit(s) were added to refs/heads/master by this push:
new efe00aa [KVM] Rolling maintenance (#3610)
efe00aa is described below
commit efe00aa7e037fb56e00e52c1a3169defbe3fe0d6
Author: Nicolas Vazquez <ni...@gmail.com>
AuthorDate: Thu Mar 12 12:59:46 2020 -0300
[KVM] Rolling maintenance (#3610)
---
agent/bindir/rolling-maintenance.in | 91 +++
agent/conf/agent.properties | 6 +
agent/conf/cloudstack-agent.logrotate.in | 2 +-
.../com/cloud/deploy/DataCenterDeployment.java | 10 +
.../main/java/com/cloud/deploy/DeploymentPlan.java | 2 +
api/src/main/java/com/cloud/event/EventTypes.java | 16 +
.../cloud/resource/RollingMaintenanceManager.java | 146 ++++
.../org/apache/cloudstack/api/ApiConstants.java | 4 +
.../apache/cloudstack/api/ResponseGenerator.java | 9 +-
.../admin/host/PrepareForMaintenanceCmd.java | 4 +
.../admin/resource/StartRollingMaintenanceCmd.java | 178 +++++
.../RollingMaintenanceHostSkippedResponse.java | 61 ++
.../RollingMaintenanceHostUpdatedResponse.java | 85 +++
.../api/response/RollingMaintenanceResponse.java | 79 +++
.../cloud/agent/api/RollingMaintenanceAnswer.java | 56 ++
.../cloud/agent/api/RollingMaintenanceCommand.java | 70 ++
debian/rules | 4 +-
.../java/com/cloud/resource/ResourceManager.java | 2 +
.../java/com/cloud/agent/manager/AgentAttache.java | 3 +-
.../com/cloud/vm/VirtualMachineManagerImpl.java | 1 +
.../src/main/java/com/cloud/host/dao/HostDao.java | 2 +
.../main/java/com/cloud/host/dao/HostDaoImpl.java | 18 +
packaging/centos7/cloud.spec | 5 +
.../cloudstack-rolling-maintenance@.service | 22 +-
.../kvm/resource/LibvirtComputingResource.java | 13 +
.../RollingMaintenanceAgentExecutor.java | 88 +++
.../maintenance/RollingMaintenanceExecutor.java | 31 +
.../RollingMaintenanceExecutorBase.java | 91 +++
.../RollingMaintenanceServiceExecutor.java | 137 ++++
.../LibvirtRollingMaintenanceCommandWrapper.java | 81 +++
.../main/java/com/cloud/api/ApiResponseHelper.java | 31 +
.../deploy/DeploymentPlanningManagerImpl.java | 2 +-
.../com/cloud/resource/ResourceManagerImpl.java | 2 +-
.../resource/RollingMaintenanceManagerImpl.java | 734 +++++++++++++++++++++
.../com/cloud/server/ManagementServerImpl.java | 2 +
.../core/spring-server-core-managers-context.xml | 4 +
.../cloud/resource/MockResourceManagerImpl.java | 5 +
.../RollingMaintenanceManagerImplTest.java | 167 +++++
tools/apidoc/gen_toc.py | 4 +-
ui/css/cloudstack3.css | 8 +
ui/l10n/en.js | 3 +
ui/scripts/system.js | 320 ++++++++-
.../main/java/com/cloud/utils/script/Script.java | 4 +
43 files changed, 2583 insertions(+), 20 deletions(-)
diff --git a/agent/bindir/rolling-maintenance.in b/agent/bindir/rolling-maintenance.in
new file mode 100644
index 0000000..572209c
--- /dev/null
+++ b/agent/bindir/rolling-maintenance.in
@@ -0,0 +1,91 @@
+#!/usr/bin/python
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+# http://www.apache.org/licenses/LICENSE-2.0
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from subprocess import *
+import sys
+import logging
+
+LOG_FILE='/var/log/cloudstack/agent/rolling-maintenance.log'
+AVOID_MAINTENANCE_EXIT_STATUS=70
+
+logging.basicConfig(filename=LOG_FILE,
+ filemode='a',
+ format='%(asctime)s,%(msecs)d %(name)s %(levelname)s %(message)s',
+ datefmt='%H:%M:%S',
+ level=logging.INFO)
+logger = logging.getLogger('rolling-maintenance')
+
+
+def execute_script(stage, script, payload, timeout):
+ logger.info("Executing script: %s for stage: %s" % (script, stage))
+
+ try:
+ command = "timeout %s %s " % (str(timeout), script)
+ if payload:
+ logger.info("Adding payload: %s" % payload)
+ command += " " + payload
+ pout = Popen(command, shell=True, stdout=PIPE, stderr=PIPE)
+ exitStatus = pout.wait()
+ stdout, stderr = pout.communicate()
+
+ success = True if exitStatus == 0 or exitStatus == AVOID_MAINTENANCE_EXIT_STATUS else False
+ avoid_maintenance = True if exitStatus == AVOID_MAINTENANCE_EXIT_STATUS else False
+ return {"success": success, "message": stdout.strip(), "avoidmaintenance": avoid_maintenance}
+ except Exception as e:
+ logger.error("Error in stage %s: %s" % (script, e))
+ sys.exit(1)
+
+
+if __name__ == '__main__':
+ try:
+ logger.info(sys.argv)
+ if len(sys.argv) < 2:
+ logger.error("Arguments missing")
+ sys.exit(0)
+
+ args = sys.argv[1]
+ params = args.split(',')
+ if len(params) < 5:
+ logger.error("Wrong number of parameters received, STAGE,SCRIPT,TIMEOUT,RESULTS_FILE,OUTPUT_FILE"
+ "[,PAYLOAD] expected")
+ sys.exit(0)
+
+ stage = params[0]
+ script = params[1]
+ timeout = params[2]
+ results_file_path = params[3]
+ output_file_path = params[4]
+ payload = params[5] if len(params) > 5 else None
+ logger.info("Received parameters: stage: %s, script: %s, timeout: %s, results_file: %s, output_file: %s "
+ "and payload: %s" % (stage, script, timeout, results_file_path, output_file_path, payload))
+
+ results = execute_script(stage, script, payload, timeout)
+
+ # Persist results and output on a file
+ output_file = open(output_file_path, "w+")
+ output_file.write(results['message'])
+ output_file.close()
+
+ results_file = open(results_file_path, "w+")
+ results_file.write("%s,%s,%s" % (stage, str(results['success']), str(results['avoidmaintenance'])))
+ results_file.close()
+
+ msg = "Successful execution of %s" if results['success'] else "Script execution failed: %s"
+ logger.info(results['message'])
+ logger.info(msg % script)
+ except Exception as e:
+ logger.error("Unexpected error on systemd service: %s" % e)
+ sys.exit(1)
diff --git a/agent/conf/agent.properties b/agent/conf/agent.properties
index 2459238..bb9bf40 100644
--- a/agent/conf/agent.properties
+++ b/agent/conf/agent.properties
@@ -118,6 +118,12 @@ hypervisor.type=kvm
# This parameter specifies a directory on the host local storage for temporary storing direct download templates
#direct.download.temporary.download.location=/var/lib/libvirt/images
+# set the rolling maintenance hook scripts directory
+#rolling.maintenance.hooks.dir=/etc/cloudstack/agent/hooks.d
+
+# disable the rolling maintenance service execution
+#rolling.maintenance.service.executor.disabled=true
+
# set the hypervisor URI. Usually there is no need for changing this
# For KVM: qemu:///system
# For LXC: lxc:///
diff --git a/agent/conf/cloudstack-agent.logrotate.in b/agent/conf/cloudstack-agent.logrotate.in
index d9a3dfb..2b3dc87 100644
--- a/agent/conf/cloudstack-agent.logrotate.in
+++ b/agent/conf/cloudstack-agent.logrotate.in
@@ -15,7 +15,7 @@
# specific language governing permissions and limitations
# under the License.
-/var/log/cloudstack/agent/security_group.log /var/log/cloudstack/agent/resizevolume.log {
+/var/log/cloudstack/agent/security_group.log /var/log/cloudstack/agent/resizevolume.log /var/log/cloudstack/agent/rolling-maintenance.log {
copytruncate
daily
rotate 5
diff --git a/api/src/main/java/com/cloud/deploy/DataCenterDeployment.java b/api/src/main/java/com/cloud/deploy/DataCenterDeployment.java
index 76faf25..3ee544c 100644
--- a/api/src/main/java/com/cloud/deploy/DataCenterDeployment.java
+++ b/api/src/main/java/com/cloud/deploy/DataCenterDeployment.java
@@ -33,6 +33,7 @@ public class DataCenterDeployment implements DeploymentPlan {
boolean _recreateDisks;
ReservationContext _context;
List<Long> preferredHostIds = new ArrayList<>();
+ boolean migrationPlan;
public DataCenterDeployment(long dataCenterId) {
this(dataCenterId, null, null, null, null, null);
@@ -107,4 +108,13 @@ public class DataCenterDeployment implements DeploymentPlan {
return this.preferredHostIds;
}
+ public void setMigrationPlan(boolean migrationPlan) {
+ this.migrationPlan = migrationPlan;
+ }
+
+ @Override
+ public boolean isMigrationPlan() {
+ return migrationPlan;
+ }
+
}
diff --git a/api/src/main/java/com/cloud/deploy/DeploymentPlan.java b/api/src/main/java/com/cloud/deploy/DeploymentPlan.java
index b57fec0..c71bf3e 100644
--- a/api/src/main/java/com/cloud/deploy/DeploymentPlan.java
+++ b/api/src/main/java/com/cloud/deploy/DeploymentPlan.java
@@ -71,4 +71,6 @@ public interface DeploymentPlan {
void setPreferredHosts(List<Long> hostIds);
List<Long> getPreferredHosts();
+
+ boolean isMigrationPlan();
}
diff --git a/api/src/main/java/com/cloud/event/EventTypes.java b/api/src/main/java/com/cloud/event/EventTypes.java
index c74e9b7..30b6ac0 100644
--- a/api/src/main/java/com/cloud/event/EventTypes.java
+++ b/api/src/main/java/com/cloud/event/EventTypes.java
@@ -76,6 +76,10 @@ import com.cloud.user.User;
import com.cloud.vm.Nic;
import com.cloud.vm.NicSecondaryIp;
import com.cloud.vm.VirtualMachine;
+import org.apache.cloudstack.api.response.ClusterResponse;
+import org.apache.cloudstack.api.response.HostResponse;
+import org.apache.cloudstack.api.response.PodResponse;
+import org.apache.cloudstack.api.response.ZoneResponse;
public class EventTypes {
@@ -591,6 +595,13 @@ public class EventTypes {
// Diagnostics Events
public static final String EVENT_SYSTEM_VM_DIAGNOSTICS = "SYSTEM.VM.DIAGNOSTICS";
+ // Rolling Maintenance
+ public static final String EVENT_START_ROLLING_MAINTENANCE = "SYSTEM.ROLLING.MAINTENANCE";
+ public static final String EVENT_HOST_ROLLING_MAINTENANCE = "HOST.ROLLING.MAINTENANCE";
+ public static final String EVENT_CLUSTER_ROLLING_MAINTENANCE = "CLUSTER.ROLLING.MAINTENANCE";
+ public static final String EVENT_POD_ROLLING_MAINTENANCE = "POD.ROLLING.MAINTENANCE";
+ public static final String EVENT_ZONE_ROLLING_MAINTENANCE = "ZONE.ROLLING.MAINTENANCE";
+
static {
// TODO: need a way to force author adding event types to declare the entity details as well, with out braking
@@ -990,6 +1001,11 @@ public class EventTypes {
entityEventDetails.put(EVENT_TEMPLATE_DIRECT_DOWNLOAD_FAILURE, VirtualMachineTemplate.class);
entityEventDetails.put(EVENT_ISO_DIRECT_DOWNLOAD_FAILURE, "Iso");
entityEventDetails.put(EVENT_SYSTEM_VM_DIAGNOSTICS, VirtualMachine.class);
+
+ entityEventDetails.put(EVENT_ZONE_ROLLING_MAINTENANCE, ZoneResponse.class);
+ entityEventDetails.put(EVENT_POD_ROLLING_MAINTENANCE, PodResponse.class);
+ entityEventDetails.put(EVENT_CLUSTER_ROLLING_MAINTENANCE, ClusterResponse.class);
+ entityEventDetails.put(EVENT_HOST_ROLLING_MAINTENANCE, HostResponse.class);
}
public static String getEntityForEvent(String eventName) {
diff --git a/api/src/main/java/com/cloud/resource/RollingMaintenanceManager.java b/api/src/main/java/com/cloud/resource/RollingMaintenanceManager.java
new file mode 100644
index 0000000..2399980
--- /dev/null
+++ b/api/src/main/java/com/cloud/resource/RollingMaintenanceManager.java
@@ -0,0 +1,146 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+package com.cloud.resource;
+
+import com.cloud.host.Host;
+import com.cloud.utils.Pair;
+import com.cloud.utils.Ternary;
+import com.cloud.utils.exception.CloudRuntimeException;
+import org.apache.cloudstack.api.command.admin.resource.StartRollingMaintenanceCmd;
+import org.apache.cloudstack.framework.config.ConfigKey;
+import org.apache.cloudstack.framework.config.Configurable;
+
+import java.util.Date;
+import java.util.List;
+
+public interface RollingMaintenanceManager extends Configurable {
+
+ ConfigKey<Integer> KvmRollingMaintenanceStageTimeout = new ConfigKey<>("Advanced", Integer.class,
+ "kvm.rolling.maintenance.stage.timeout", "1800",
+ "Wait timeout (in seconds) for a rolling maintenance stage update from hosts",
+ true, ConfigKey.Scope.Global);
+ ConfigKey<Integer> KvmRollingMaintenancePingInterval = new ConfigKey<>("Advanced", Integer.class,
+ "kvm.rolling.maintenance.ping.interval", "10",
+ "Ping interval in seconds between management server and hosts performing stages during rolling maintenance",
+ true, ConfigKey.Scope.Global);
+ ConfigKey<Integer> KvmRollingMaintenanceWaitForMaintenanceTimeout = new ConfigKey<>("Advanced", Integer.class,
+ "kvm.rolling.maintenance.wait.maintenance.timeout", "1800",
+ "Timeout (in seconds) to wait for a host preparing to enter maintenance mode",
+ true, ConfigKey.Scope.Global);
+
+ class HostSkipped {
+ private Host host;
+ private String reason;
+
+ public HostSkipped(Host host, String reason) {
+ this.host = host;
+ this.reason = reason;
+ }
+
+ public Host getHost() {
+ return host;
+ }
+
+ public void setHost(Host host) {
+ this.host = host;
+ }
+
+ public String getReason() {
+ return reason;
+ }
+
+ public void setReason(String reason) {
+ this.reason = reason;
+ }
+ }
+
+ class HostUpdated {
+ private Host host;
+ private Date start;
+ private Date end;
+ private String outputMsg;
+
+ public HostUpdated(Host host, Date start, Date end, String outputMsg) {
+ this.host = host;
+ this.start = start;
+ this.end = end;
+ this.outputMsg = outputMsg;
+ }
+
+ public Host getHost() {
+ return host;
+ }
+
+ public void setHost(Host host) {
+ this.host = host;
+ }
+
+ public Date getStart() {
+ return start;
+ }
+
+ public void setStart(Date start) {
+ this.start = start;
+ }
+
+ public Date getEnd() {
+ return end;
+ }
+
+ public void setEnd(Date end) {
+ this.end = end;
+ }
+
+ public String getOutputMsg() {
+ return outputMsg;
+ }
+
+ public void setOutputMsg(String outputMsg) {
+ this.outputMsg = outputMsg;
+ }
+ }
+
+ enum Stage {
+ PreFlight, PreMaintenance, Maintenance, PostMaintenance;
+
+ public Stage next() {
+ switch (this) {
+ case PreFlight:
+ return PreMaintenance;
+ case PreMaintenance:
+ return Maintenance;
+ case Maintenance:
+ return PostMaintenance;
+ case PostMaintenance:
+ return null;
+ }
+ throw new CloudRuntimeException("Unexpected stage: " + this);
+ }
+ }
+
+ enum ResourceType {
+ Pod, Cluster, Zone, Host
+ }
+
+ /**
+ * Starts rolling maintenance as specified in cmd
+ * @param cmd command
+ * @return tuple: (SUCCESS, DETAILS, (HOSTS_UPDATED, HOSTS_SKIPPED))
+ */
+ Ternary<Boolean, String, Pair<List<HostUpdated>, List<HostSkipped>>> startRollingMaintenance(StartRollingMaintenanceCmd cmd);
+ Pair<ResourceType, List<Long>> getResourceTypeIdPair(StartRollingMaintenanceCmd cmd);
+}
\ No newline at end of file
diff --git a/api/src/main/java/org/apache/cloudstack/api/ApiConstants.java b/api/src/main/java/org/apache/cloudstack/api/ApiConstants.java
index ed7e39e..0482364 100644
--- a/api/src/main/java/org/apache/cloudstack/api/ApiConstants.java
+++ b/api/src/main/java/org/apache/cloudstack/api/ApiConstants.java
@@ -74,6 +74,7 @@ public class ApiConstants {
public static final String CLEANUP = "cleanup";
public static final String MAKEREDUNDANT = "makeredundant";
public static final String CLUSTER_ID = "clusterid";
+ public static final String CLUSTER_IDS = "clusterids";
public static final String CLUSTER_NAME = "clustername";
public static final String CLUSTER_TYPE = "clustertype";
public static final String CN = "cn";
@@ -173,6 +174,7 @@ public class ApiConstants {
public static final String HEALTH = "health";
public static final String HIDE_IP_ADDRESS_USAGE = "hideipaddressusage";
public static final String HOST_ID = "hostid";
+ public static final String HOST_IDS = "hostids";
public static final String HOST_NAME = "hostname";
public static final String HYPERVISOR = "hypervisor";
public static final String INLINE = "inline";
@@ -256,6 +258,7 @@ public class ApiConstants {
public static final String OS_NAME_FOR_HYPERVISOR = "osnameforhypervisor";
public static final String OUTOFBANDMANAGEMENT_POWERSTATE = "outofbandmanagementpowerstate";
public static final String OUTOFBANDMANAGEMENT_ENABLED = "outofbandmanagementenabled";
+ public static final String OUTPUT = "output";
public static final String OVF_PROPERTIES = "ovfproperties";
public static final String PARAMS = "params";
public static final String PARENT_ID = "parentid";
@@ -267,6 +270,7 @@ public class ApiConstants {
public static final String PASSWORD_ENABLED = "passwordenabled";
public static final String SSHKEY_ENABLED = "sshkeyenabled";
public static final String PATH = "path";
+ public static final String PAYLOAD = "payload";
public static final String POD_ID = "podid";
public static final String POD_NAME = "podname";
public static final String POD_IDS = "podids";
diff --git a/api/src/main/java/org/apache/cloudstack/api/ResponseGenerator.java b/api/src/main/java/org/apache/cloudstack/api/ResponseGenerator.java
index 57e03b3..3f0d978 100644
--- a/api/src/main/java/org/apache/cloudstack/api/ResponseGenerator.java
+++ b/api/src/main/java/org/apache/cloudstack/api/ResponseGenerator.java
@@ -22,6 +22,10 @@ import java.util.List;
import java.util.Map;
import java.util.Set;
+import org.apache.cloudstack.api.response.RouterHealthCheckResultResponse;
+import com.cloud.resource.RollingMaintenanceManager;
+import org.apache.cloudstack.api.response.RollingMaintenanceResponse;
+import org.apache.cloudstack.management.ManagementServerHost;
import org.apache.cloudstack.affinity.AffinityGroup;
import org.apache.cloudstack.affinity.AffinityGroupResponse;
import org.apache.cloudstack.api.ApiConstants.HostDetails;
@@ -88,7 +92,6 @@ import org.apache.cloudstack.api.response.RemoteAccessVpnResponse;
import org.apache.cloudstack.api.response.ResourceCountResponse;
import org.apache.cloudstack.api.response.ResourceLimitResponse;
import org.apache.cloudstack.api.response.ResourceTagResponse;
-import org.apache.cloudstack.api.response.RouterHealthCheckResultResponse;
import org.apache.cloudstack.api.response.SSHKeyPairResponse;
import org.apache.cloudstack.api.response.SecurityGroupResponse;
import org.apache.cloudstack.api.response.ServiceOfferingResponse;
@@ -125,7 +128,6 @@ import org.apache.cloudstack.backup.BackupOffering;
import org.apache.cloudstack.backup.Backup;
import org.apache.cloudstack.backup.BackupSchedule;
import org.apache.cloudstack.config.Configuration;
-import org.apache.cloudstack.management.ManagementServerHost;
import org.apache.cloudstack.network.lb.ApplicationLoadBalancerRule;
import org.apache.cloudstack.region.PortableIp;
import org.apache.cloudstack.region.PortableIpRange;
@@ -482,4 +484,7 @@ public interface ResponseGenerator {
ManagementServerResponse createManagementResponse(ManagementServerHost mgmt);
List<RouterHealthCheckResultResponse> createHealthCheckResponse(VirtualMachine router, List<RouterHealthCheckResult> healthCheckResults);
+
+ RollingMaintenanceResponse createRollingMaintenanceResponse(Boolean success, String details, List<RollingMaintenanceManager.HostUpdated> hostsUpdated, List<RollingMaintenanceManager.HostSkipped> hostsSkipped);
+
}
diff --git a/api/src/main/java/org/apache/cloudstack/api/command/admin/host/PrepareForMaintenanceCmd.java b/api/src/main/java/org/apache/cloudstack/api/command/admin/host/PrepareForMaintenanceCmd.java
index f608128..7083f0d 100644
--- a/api/src/main/java/org/apache/cloudstack/api/command/admin/host/PrepareForMaintenanceCmd.java
+++ b/api/src/main/java/org/apache/cloudstack/api/command/admin/host/PrepareForMaintenanceCmd.java
@@ -97,6 +97,10 @@ public class PrepareForMaintenanceCmd extends BaseAsyncCmd {
return getId();
}
+ public void setId(Long id) {
+ this.id = id;
+ }
+
@Override
public void execute() {
try {
diff --git a/api/src/main/java/org/apache/cloudstack/api/command/admin/resource/StartRollingMaintenanceCmd.java b/api/src/main/java/org/apache/cloudstack/api/command/admin/resource/StartRollingMaintenanceCmd.java
new file mode 100644
index 0000000..b5a9128
--- /dev/null
+++ b/api/src/main/java/org/apache/cloudstack/api/command/admin/resource/StartRollingMaintenanceCmd.java
@@ -0,0 +1,178 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package org.apache.cloudstack.api.command.admin.resource;
+
+import com.cloud.event.EventTypes;
+import com.cloud.exception.ConcurrentOperationException;
+import com.cloud.exception.InsufficientCapacityException;
+import com.cloud.exception.NetworkRuleConflictException;
+import com.cloud.exception.ResourceAllocationException;
+import com.cloud.exception.ResourceUnavailableException;
+import com.cloud.resource.RollingMaintenanceManager;
+import com.cloud.utils.Pair;
+import com.cloud.utils.Ternary;
+import org.apache.cloudstack.acl.RoleType;
+import org.apache.cloudstack.api.APICommand;
+import org.apache.cloudstack.api.ApiConstants;
+import org.apache.cloudstack.api.BaseAsyncCmd;
+import org.apache.cloudstack.api.BaseCmd;
+import org.apache.cloudstack.api.Parameter;
+import org.apache.cloudstack.api.ServerApiException;
+import org.apache.cloudstack.api.response.ClusterResponse;
+import org.apache.cloudstack.api.response.HostResponse;
+import org.apache.cloudstack.api.response.PodResponse;
+import org.apache.cloudstack.api.response.RollingMaintenanceResponse;
+import org.apache.cloudstack.api.response.ZoneResponse;
+import org.apache.cloudstack.context.CallContext;
+import org.apache.log4j.Logger;
+
+import javax.inject.Inject;
+import java.util.List;
+
+@APICommand(name = StartRollingMaintenanceCmd.APINAME, description = "Start rolling maintenance",
+ responseObject = RollingMaintenanceResponse.class,
+ requestHasSensitiveInfo = false, responseHasSensitiveInfo = false,
+ authorized = {RoleType.Admin})
+public class StartRollingMaintenanceCmd extends BaseAsyncCmd {
+
+ @Inject
+ RollingMaintenanceManager manager;
+
+ public static final Logger s_logger = Logger.getLogger(StartRollingMaintenanceCmd.class.getName());
+
+ public static final String APINAME = "startRollingMaintenance";
+
+ /////////////////////////////////////////////////////
+ //////////////// API parameters /////////////////////
+ /////////////////////////////////////////////////////
+ @Parameter(name = ApiConstants.POD_IDS, type = CommandType.LIST, collectionType = CommandType.UUID,
+ entityType = PodResponse.class, description = "the IDs of the pods to start maintenance on")
+ private List<Long> podIds;
+
+ @Parameter(name = ApiConstants.CLUSTER_IDS, type = CommandType.LIST, collectionType = CommandType.UUID,
+ entityType = ClusterResponse.class, description = "the IDs of the clusters to start maintenance on")
+ private List<Long> clusterIds;
+
+ @Parameter(name = ApiConstants.ZONE_ID_LIST, type = CommandType.LIST, collectionType = CommandType.UUID,
+ entityType = ZoneResponse.class, description = "the IDs of the zones to start maintenance on")
+ private List<Long> zoneIds;
+
+ @Parameter(name = ApiConstants.HOST_IDS, type = CommandType.LIST, collectionType = CommandType.UUID,
+ entityType = HostResponse.class, description = "the IDs of the hosts to start maintenance on")
+ private List<Long> hostIds;
+
+ @Parameter(name = ApiConstants.FORCED, type = CommandType.BOOLEAN,
+ description = "if rolling mechanism should continue in case of an error")
+ private Boolean forced;
+
+ @Parameter(name = ApiConstants.PAYLOAD, type = CommandType.STRING,
+ description = "the command to execute while hosts are on maintenance")
+ private String payload;
+
+ @Parameter(name = ApiConstants.TIMEOUT, type = CommandType.INTEGER,
+ description = "optional operation timeout (in seconds) that overrides the global timeout setting")
+ private Integer timeout;
+
+ /////////////////////////////////////////////////////
+ /////////////////// Accessors ///////////////////////
+ /////////////////////////////////////////////////////
+
+ public List<Long> getPodIds() {
+ return podIds;
+ }
+
+ public List<Long> getClusterIds() {
+ return clusterIds;
+ }
+
+ public List<Long> getZoneIds() {
+ return zoneIds;
+ }
+
+ public List<Long> getHostIds() {
+ return hostIds;
+ }
+
+ public Boolean getForced() {
+ return forced != null && forced;
+ }
+
+ public String getPayload() {
+ return payload;
+ }
+
+ public Integer getTimeout() {
+ return timeout;
+ }
+
+ /////////////////////////////////////////////////////
+ /////////////// API Implementation///////////////////
+ /////////////////////////////////////////////////////
+
+ @Override
+ public void execute() throws ResourceUnavailableException, InsufficientCapacityException, ServerApiException, ConcurrentOperationException, ResourceAllocationException, NetworkRuleConflictException {
+ Ternary<Boolean, String, Pair<List<RollingMaintenanceManager.HostUpdated>, List<RollingMaintenanceManager.HostSkipped>>>
+ result = manager.startRollingMaintenance(this);
+ Boolean success = result.first();
+ String details = result.second();
+ Pair<List<RollingMaintenanceManager.HostUpdated>, List<RollingMaintenanceManager.HostSkipped>> pair = result.third();
+ List<RollingMaintenanceManager.HostUpdated> hostsUpdated = pair.first();
+ List<RollingMaintenanceManager.HostSkipped> hostsSkipped = pair.second();
+
+ RollingMaintenanceResponse response = _responseGenerator.createRollingMaintenanceResponse(success, details, hostsUpdated, hostsSkipped);
+ response.setResponseName(getCommandName());
+ this.setResponseObject(response);
+ }
+
+ @Override
+ public String getCommandName() {
+ return APINAME.toLowerCase() + BaseCmd.RESPONSE_SUFFIX;
+ }
+
+ @Override
+ public long getEntityOwnerId() {
+ return CallContext.current().getCallingAccountId();
+ }
+
+ @Override
+ public String getEventType() {
+ Pair<RollingMaintenanceManager.ResourceType, List<Long>> pair = manager.getResourceTypeIdPair(this);
+ RollingMaintenanceManager.ResourceType type = pair.first();
+ String eventType = "";
+ switch (type) {
+ case Zone:
+ eventType = EventTypes.EVENT_ZONE_ROLLING_MAINTENANCE;
+ break;
+ case Pod:
+ eventType = EventTypes.EVENT_POD_ROLLING_MAINTENANCE;
+ break;
+ case Cluster:
+ eventType = EventTypes.EVENT_CLUSTER_ROLLING_MAINTENANCE;
+ break;
+ case Host:
+ eventType = EventTypes.EVENT_HOST_ROLLING_MAINTENANCE;
+ }
+ return eventType;
+ }
+
+ @Override
+ public String getEventDescription() {
+ Pair<RollingMaintenanceManager.ResourceType, List<Long>> pair = manager.getResourceTypeIdPair(this);
+ return "Starting rolling maintenance on entity: " + pair.first() + " with IDs: " + pair.second();
+ }
+}
\ No newline at end of file
diff --git a/api/src/main/java/org/apache/cloudstack/api/response/RollingMaintenanceHostSkippedResponse.java b/api/src/main/java/org/apache/cloudstack/api/response/RollingMaintenanceHostSkippedResponse.java
new file mode 100644
index 0000000..8d30454
--- /dev/null
+++ b/api/src/main/java/org/apache/cloudstack/api/response/RollingMaintenanceHostSkippedResponse.java
@@ -0,0 +1,61 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+package org.apache.cloudstack.api.response;
+
+import com.cloud.serializer.Param;
+import com.google.gson.annotations.SerializedName;
+import org.apache.cloudstack.api.ApiConstants;
+import org.apache.cloudstack.api.BaseResponse;
+
+public class RollingMaintenanceHostSkippedResponse extends BaseResponse {
+
+ @SerializedName(ApiConstants.HOST_ID)
+ @Param(description = "the ID of the skipped host")
+ private String hostId;
+
+ @SerializedName(ApiConstants.HOST_NAME)
+ @Param(description = "the name of the skipped host")
+ private String hostName;
+
+ @SerializedName(ApiConstants.ACL_REASON)
+ @Param(description = "the reason to skip the host")
+ private String reason;
+
+ public String getHostId() {
+ return hostId;
+ }
+
+ public void setHostId(String hostId) {
+ this.hostId = hostId;
+ }
+
+ public String getHostName() {
+ return hostName;
+ }
+
+ public void setHostName(String hostName) {
+ this.hostName = hostName;
+ }
+
+ public String getReason() {
+ return reason;
+ }
+
+ public void setReason(String reason) {
+ this.reason = reason;
+ }
+}
diff --git a/api/src/main/java/org/apache/cloudstack/api/response/RollingMaintenanceHostUpdatedResponse.java b/api/src/main/java/org/apache/cloudstack/api/response/RollingMaintenanceHostUpdatedResponse.java
new file mode 100644
index 0000000..821257d
--- /dev/null
+++ b/api/src/main/java/org/apache/cloudstack/api/response/RollingMaintenanceHostUpdatedResponse.java
@@ -0,0 +1,85 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+package org.apache.cloudstack.api.response;
+
+import com.cloud.serializer.Param;
+import com.google.gson.annotations.SerializedName;
+import org.apache.cloudstack.api.ApiConstants;
+import org.apache.cloudstack.api.BaseResponse;
+
+public class RollingMaintenanceHostUpdatedResponse extends BaseResponse {
+
+ @SerializedName(ApiConstants.HOST_ID)
+ @Param(description = "the ID of the updated host")
+ private String hostId;
+
+ @SerializedName(ApiConstants.HOST_NAME)
+ @Param(description = "the name of the updated host")
+ private String hostName;
+
+ @SerializedName(ApiConstants.START_DATE)
+ @Param(description = "start date of the update on the host")
+ private String startDate;
+
+ @SerializedName(ApiConstants.END_DATE)
+ @Param(description = "end date of the update on the host")
+ private String endDate;
+
+ @SerializedName(ApiConstants.OUTPUT)
+ @Param(description = "output of the maintenance script on the host")
+ private String output;
+
+ public String getHostId() {
+ return hostId;
+ }
+
+ public void setHostId(String hostId) {
+ this.hostId = hostId;
+ }
+
+ public String getHostName() {
+ return hostName;
+ }
+
+ public void setHostName(String hostName) {
+ this.hostName = hostName;
+ }
+
+ public String getStartDate() {
+ return startDate;
+ }
+
+ public void setStartDate(String startDate) {
+ this.startDate = startDate;
+ }
+
+ public String getEndDate() {
+ return endDate;
+ }
+
+ public void setEndDate(String endDate) {
+ this.endDate = endDate;
+ }
+
+ public String getOutput() {
+ return output;
+ }
+
+ public void setOutput(String output) {
+ this.output = output;
+ }
+}
diff --git a/api/src/main/java/org/apache/cloudstack/api/response/RollingMaintenanceResponse.java b/api/src/main/java/org/apache/cloudstack/api/response/RollingMaintenanceResponse.java
new file mode 100644
index 0000000..bfd4d9f
--- /dev/null
+++ b/api/src/main/java/org/apache/cloudstack/api/response/RollingMaintenanceResponse.java
@@ -0,0 +1,79 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+package org.apache.cloudstack.api.response;
+
+import com.cloud.serializer.Param;
+import com.google.gson.annotations.SerializedName;
+import org.apache.cloudstack.api.BaseResponse;
+
+import java.util.List;
+
+public class RollingMaintenanceResponse extends BaseResponse {
+
+ @SerializedName("success")
+ @Param(description = "indicates if the rolling maintenance operation was successful")
+ private Boolean success;
+
+ @SerializedName("details")
+ @Param(description = "in case of failure, details are displayed")
+ private String details;
+
+ @SerializedName("hostsupdated")
+ @Param(description = "the hosts updated", responseObject = RollingMaintenanceHostUpdatedResponse.class)
+ private List<RollingMaintenanceHostUpdatedResponse> updatedHosts;
+
+ @SerializedName("hostsskipped")
+ @Param(description = "the hosts skipped", responseObject = RollingMaintenanceHostSkippedResponse.class)
+ private List<RollingMaintenanceHostSkippedResponse> skippedHosts;
+
+ public RollingMaintenanceResponse(Boolean success, String details) {
+ this.success = success;
+ this.details = details;
+ }
+
+ public Boolean getSuccess() {
+ return success;
+ }
+
+ public void setSuccess(Boolean success) {
+ this.success = success;
+ }
+
+ public String getDetails() {
+ return details;
+ }
+
+ public void setDetails(String details) {
+ this.details = details;
+ }
+
+ public List<RollingMaintenanceHostUpdatedResponse> getUpdatedHosts() {
+ return updatedHosts;
+ }
+
+ public void setUpdatedHosts(List<RollingMaintenanceHostUpdatedResponse> updatedHosts) {
+ this.updatedHosts = updatedHosts;
+ }
+
+ public List<RollingMaintenanceHostSkippedResponse> getSkippedHosts() {
+ return skippedHosts;
+ }
+
+ public void setSkippedHosts(List<RollingMaintenanceHostSkippedResponse> skippedHosts) {
+ this.skippedHosts = skippedHosts;
+ }
+}
\ No newline at end of file
diff --git a/core/src/main/java/com/cloud/agent/api/RollingMaintenanceAnswer.java b/core/src/main/java/com/cloud/agent/api/RollingMaintenanceAnswer.java
new file mode 100644
index 0000000..de7b1ba
--- /dev/null
+++ b/core/src/main/java/com/cloud/agent/api/RollingMaintenanceAnswer.java
@@ -0,0 +1,56 @@
+//
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+//
+package com.cloud.agent.api;
+
+public class RollingMaintenanceAnswer extends Answer {
+
+ private boolean finished;
+ private boolean avoidMaintenance;
+ private boolean maintenaceScriptDefined;
+
+ public RollingMaintenanceAnswer(Command command, boolean success, String details, boolean finished) {
+ super(command, success, details);
+ this.finished = finished;
+ }
+
+ public RollingMaintenanceAnswer(Command command, boolean isMaintenanceScript) {
+ super(command, true, "");
+ this.maintenaceScriptDefined = isMaintenanceScript;
+ }
+
+ public boolean isFinished() {
+ return finished;
+ }
+
+ public boolean isAvoidMaintenance() {
+ return avoidMaintenance;
+ }
+
+ public void setAvoidMaintenance(boolean avoidMaintenance) {
+ this.avoidMaintenance = avoidMaintenance;
+ }
+
+ public boolean isMaintenaceScriptDefined() {
+ return maintenaceScriptDefined;
+ }
+
+ public void setMaintenaceScriptDefined(boolean maintenaceScriptDefined) {
+ this.maintenaceScriptDefined = maintenaceScriptDefined;
+ }
+}
diff --git a/core/src/main/java/com/cloud/agent/api/RollingMaintenanceCommand.java b/core/src/main/java/com/cloud/agent/api/RollingMaintenanceCommand.java
new file mode 100644
index 0000000..ae1f493
--- /dev/null
+++ b/core/src/main/java/com/cloud/agent/api/RollingMaintenanceCommand.java
@@ -0,0 +1,70 @@
+//
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+//
+
+package com.cloud.agent.api;
+
+public class RollingMaintenanceCommand extends Command {
+
+ private String stage;
+ private String payload;
+ private boolean started;
+ private boolean checkMaintenanceScript;
+
+ public RollingMaintenanceCommand(boolean checkMaintenanceScript) {
+ this.checkMaintenanceScript = checkMaintenanceScript;
+ }
+
+ public RollingMaintenanceCommand(String stage) {
+ this.stage = stage;
+ }
+
+ public void setStage(String stage) {
+ this.stage = stage;
+ }
+
+ public String getStage() {
+ return this.stage;
+ }
+
+ public String getPayload() {
+ return payload;
+ }
+
+ public void setPayload(String payload) {
+ this.payload = payload;
+ }
+
+ public boolean isStarted() {
+ return started;
+ }
+
+ public void setStarted(boolean started) {
+ this.started = started;
+ }
+
+ public boolean isCheckMaintenanceScript() {
+ return checkMaintenanceScript;
+ }
+
+ @Override
+ public boolean executeInSequence() {
+ return false;
+ }
+
+}
diff --git a/debian/rules b/debian/rules
index 4220a83..9055ee1 100755
--- a/debian/rules
+++ b/debian/rules
@@ -45,6 +45,7 @@ override_dh_auto_install:
install -d -m0755 debian/$(PACKAGE)-agent/lib/systemd/system
install -m0644 packaging/systemd/$(PACKAGE)-agent.service debian/$(PACKAGE)-agent/lib/systemd/system/$(PACKAGE)-agent.service
install -m0644 packaging/systemd/$(PACKAGE)-agent.default $(DESTDIR)/$(SYSCONFDIR)/default/$(PACKAGE)-agent
+ install -m0644 packaging/systemd/$(PACKAGE)-rolling-maintenance@.service debian/$(PACKAGE)-agent/lib/systemd/system/$(PACKAGE)-rolling-maintenance@.service
install -D -m0644 agent/target/transformed/cloudstack-agent.logrotate $(DESTDIR)/$(SYSCONFDIR)/logrotate.d/cloudstack-agent
@@ -54,6 +55,7 @@ override_dh_auto_install:
install -D agent/target/transformed/cloudstack-agent-upgrade $(DESTDIR)/usr/bin/cloudstack-agent-upgrade
install -D agent/target/transformed/cloud-guest-tool $(DESTDIR)/usr/bin/cloudstack-guest-tool
install -D agent/target/transformed/libvirtqemuhook $(DESTDIR)/usr/share/$(PACKAGE)-agent/lib/
+ install -D agent/target/transformed/rolling-maintenance $(DESTDIR)/usr/share/$(PACKAGE)-agent/lib/
install -D agent/target/transformed/* $(DESTDIR)/$(SYSCONFDIR)/$(PACKAGE)/agent
# cloudstack-management
@@ -139,7 +141,7 @@ override_dh_auto_install:
cp -r test/integration/* $(DESTDIR)/usr/share/$(PACKAGE)-integration-tests/
override_dh_systemd_enable:
- dh_systemd_enable -pcloudstack-management -pcloudstack-agent -pcloudstack-usage
+ dh_systemd_enable -pcloudstack-management -pcloudstack-agent -pcloudstack-usage -pcloudstack-rolling-maintenance@
override_dh_strip_nondeterminism:
# Disable dh_strip_nondeterminism to speed up the build
diff --git a/engine/components-api/src/main/java/com/cloud/resource/ResourceManager.java b/engine/components-api/src/main/java/com/cloud/resource/ResourceManager.java
index 387fa7f..db7a27f 100755
--- a/engine/components-api/src/main/java/com/cloud/resource/ResourceManager.java
+++ b/engine/components-api/src/main/java/com/cloud/resource/ResourceManager.java
@@ -205,4 +205,6 @@ public interface ResourceManager extends ResourceService, Configurable {
HashMap<String, HashMap<String, VgpuTypesInfo>> getGPUStatistics(HostVO host);
HostVO findOneRandomRunningHostByHypervisor(HypervisorType type);
+
+ boolean cancelMaintenance(final long hostId);
}
diff --git a/engine/orchestration/src/main/java/com/cloud/agent/manager/AgentAttache.java b/engine/orchestration/src/main/java/com/cloud/agent/manager/AgentAttache.java
index e96181b..45df231 100644
--- a/engine/orchestration/src/main/java/com/cloud/agent/manager/AgentAttache.java
+++ b/engine/orchestration/src/main/java/com/cloud/agent/manager/AgentAttache.java
@@ -34,6 +34,7 @@ import java.util.concurrent.TimeUnit;
import com.cloud.agent.api.ModifySshKeysCommand;
import com.cloud.agent.api.ModifyStoragePoolCommand;
import org.apache.cloudstack.agent.lb.SetupMSListCommand;
+import com.cloud.agent.api.RollingMaintenanceCommand;
import org.apache.cloudstack.managed.context.ManagedContextRunnable;
import org.apache.log4j.Logger;
@@ -117,7 +118,7 @@ public abstract class AgentAttache {
StopCommand.class.toString(), CheckVirtualMachineCommand.class.toString(), PingTestCommand.class.toString(), CheckHealthCommand.class.toString(),
ReadyCommand.class.toString(), ShutdownCommand.class.toString(), SetupCommand.class.toString(),
CleanupNetworkRulesCmd.class.toString(), CheckNetworkCommand.class.toString(), PvlanSetupCommand.class.toString(), CheckOnHostCommand.class.toString(),
- ModifyTargetsCommand.class.toString(), ModifySshKeysCommand.class.toString(), ModifyStoragePoolCommand.class.toString(), SetupMSListCommand.class.toString()};
+ ModifyTargetsCommand.class.toString(), ModifySshKeysCommand.class.toString(), ModifyStoragePoolCommand.class.toString(), SetupMSListCommand.class.toString(), RollingMaintenanceCommand.class.toString()};
protected final static String[] s_commandsNotAllowedInConnectingMode = new String[] { StartCommand.class.toString(), CreateCommand.class.toString() };
static {
Arrays.sort(s_commandsAllowedInMaintenanceMode);
diff --git a/engine/orchestration/src/main/java/com/cloud/vm/VirtualMachineManagerImpl.java b/engine/orchestration/src/main/java/com/cloud/vm/VirtualMachineManagerImpl.java
index 8e52c38..7765611 100755
--- a/engine/orchestration/src/main/java/com/cloud/vm/VirtualMachineManagerImpl.java
+++ b/engine/orchestration/src/main/java/com/cloud/vm/VirtualMachineManagerImpl.java
@@ -2997,6 +2997,7 @@ public class VirtualMachineManagerImpl extends ManagerBase implements VirtualMac
while (true) {
try {
+ plan.setMigrationPlan(true);
dest = _dpMgr.planDeployment(profile, plan, excludes, planner);
} catch (final AffinityConflictException e2) {
s_logger.warn("Unable to create deployment, affinity rules associted to the VM conflict", e2);
diff --git a/engine/schema/src/main/java/com/cloud/host/dao/HostDao.java b/engine/schema/src/main/java/com/cloud/host/dao/HostDao.java
index 781f82f..34b8963 100644
--- a/engine/schema/src/main/java/com/cloud/host/dao/HostDao.java
+++ b/engine/schema/src/main/java/com/cloud/host/dao/HostDao.java
@@ -111,4 +111,6 @@ public interface HostDao extends GenericDao<HostVO, Long>, StateDao<Status, Stat
HostVO findHostInZoneToExecuteCommand(long zoneId, HypervisorType hypervisorType);
List<HostVO> listAllHostsUpByZoneAndHypervisor(long zoneId, HypervisorType hypervisorType);
+
+ List<HostVO> listByClusterAndHypervisorType(long clusterId, HypervisorType hypervisorType);
}
diff --git a/engine/schema/src/main/java/com/cloud/host/dao/HostDaoImpl.java b/engine/schema/src/main/java/com/cloud/host/dao/HostDaoImpl.java
index 2b2a80b..20d817c 100644
--- a/engine/schema/src/main/java/com/cloud/host/dao/HostDaoImpl.java
+++ b/engine/schema/src/main/java/com/cloud/host/dao/HostDaoImpl.java
@@ -109,6 +109,7 @@ public class HostDaoImpl extends GenericDaoBase<HostVO, Long> implements HostDao
protected SearchBuilder<HostVO> ClusterStatusSearch;
protected SearchBuilder<HostVO> TypeNameZoneSearch;
protected SearchBuilder<HostVO> AvailHypevisorInZone;
+ protected SearchBuilder<HostVO> ClusterHypervisorSearch;
protected SearchBuilder<HostVO> DirectConnectSearch;
protected SearchBuilder<HostVO> ManagedDirectConnectSearch;
@@ -293,6 +294,13 @@ public class HostDaoImpl extends GenericDaoBase<HostVO, Long> implements HostDao
DirectlyConnectedSearch.and("resourceState", DirectlyConnectedSearch.entity().getResourceState(), SearchCriteria.Op.NOTIN);
DirectlyConnectedSearch.done();
+ ClusterHypervisorSearch = createSearchBuilder();
+ ClusterHypervisorSearch.and("clusterId", ClusterHypervisorSearch.entity().getClusterId(), SearchCriteria.Op.EQ);
+ ClusterHypervisorSearch.and("hypervisor", ClusterHypervisorSearch.entity().getHypervisorType(), SearchCriteria.Op.EQ);
+ ClusterHypervisorSearch.and("type", ClusterHypervisorSearch.entity().getType(), SearchCriteria.Op.EQ);
+ ClusterHypervisorSearch.and("status", ClusterHypervisorSearch.entity().getStatus(), SearchCriteria.Op.EQ);
+ ClusterHypervisorSearch.done();
+
UnmanagedDirectConnectSearch = createSearchBuilder();
UnmanagedDirectConnectSearch.and("resource", UnmanagedDirectConnectSearch.entity().getResource(), SearchCriteria.Op.NNULL);
UnmanagedDirectConnectSearch.and("server", UnmanagedDirectConnectSearch.entity().getManagementServerId(), SearchCriteria.Op.NULL);
@@ -1213,6 +1221,16 @@ public class HostDaoImpl extends GenericDaoBase<HostVO, Long> implements HostDao
.collect(Collectors.toList());
}
+ @Override
+ public List<HostVO> listByClusterAndHypervisorType(long clusterId, HypervisorType hypervisorType) {
+ SearchCriteria<HostVO> sc = ClusterHypervisorSearch.create();
+ sc.setParameters("clusterId", clusterId);
+ sc.setParameters("hypervisor", hypervisorType);
+ sc.setParameters("type", Type.Routing);
+ sc.setParameters("status", Status.Up);
+ return listBy(sc);
+ }
+
private ResultSet executeSqlGetResultsetForMethodFindHostInZoneToExecuteCommand(HypervisorType hypervisorType, long zoneId, TransactionLegacy tx, String sql) throws SQLException {
PreparedStatement pstmt = tx.prepareAutoCloseStatement(sql);
pstmt.setString(1, Objects.toString(hypervisorType));
diff --git a/packaging/centos7/cloud.spec b/packaging/centos7/cloud.spec
index 3a08289..6a8a160 100644
--- a/packaging/centos7/cloud.spec
+++ b/packaging/centos7/cloud.spec
@@ -292,6 +292,7 @@ mkdir -p ${RPM_BUILD_ROOT}%{_localstatedir}/log/%{name}/agent
mkdir -p ${RPM_BUILD_ROOT}%{_datadir}/%{name}-agent/lib
mkdir -p ${RPM_BUILD_ROOT}%{_datadir}/%{name}-agent/plugins
install -D packaging/systemd/cloudstack-agent.service ${RPM_BUILD_ROOT}%{_unitdir}/%{name}-agent.service
+install -D packaging/systemd/cloudstack-rolling-maintenance@.service ${RPM_BUILD_ROOT}%{_unitdir}/%{name}-rolling-maintenance@.service
install -D packaging/systemd/cloudstack-agent.default ${RPM_BUILD_ROOT}%{_sysconfdir}/default/%{name}-agent
install -D agent/target/transformed/agent.properties ${RPM_BUILD_ROOT}%{_sysconfdir}/%{name}/agent/agent.properties
install -D agent/target/transformed/environment.properties ${RPM_BUILD_ROOT}%{_sysconfdir}/%{name}/agent/environment.properties
@@ -300,6 +301,7 @@ install -D agent/target/transformed/cloud-setup-agent ${RPM_BUILD_ROOT}%{_bindir
install -D agent/target/transformed/cloudstack-agent-upgrade ${RPM_BUILD_ROOT}%{_bindir}/%{name}-agent-upgrade
install -D agent/target/transformed/cloud-guest-tool ${RPM_BUILD_ROOT}%{_bindir}/%{name}-guest-tool
install -D agent/target/transformed/libvirtqemuhook ${RPM_BUILD_ROOT}%{_datadir}/%{name}-agent/lib/libvirtqemuhook
+install -D agent/target/transformed/rolling-maintenance ${RPM_BUILD_ROOT}%{_datadir}/%{name}-agent/lib/rolling-maintenance
install -D agent/target/transformed/cloud-ssh ${RPM_BUILD_ROOT}%{_bindir}/%{name}-ssh
install -D agent/target/transformed/cloudstack-agent-profile.sh ${RPM_BUILD_ROOT}%{_sysconfdir}/profile.d/%{name}-agent-profile.sh
install -D agent/target/transformed/cloudstack-agent.logrotate ${RPM_BUILD_ROOT}%{_sysconfdir}/logrotate.d/%{name}-agent
@@ -428,6 +430,7 @@ cp -a ${RPM_BUILD_ROOT}%{_datadir}/%{name}-agent/lib/libvirtqemuhook %{_sysconfd
mkdir -m 0755 -p /usr/share/cloudstack-agent/tmp
/sbin/service libvirtd restart
/sbin/systemctl enable cloudstack-agent > /dev/null 2>&1 || true
+/sbin/systemctl enable cloudstack-rolling-maintenance@p > /dev/null 2>&1 || true
# if saved configs from upgrade exist, copy them over
if [ -f "%{_sysconfdir}/cloud.rpmsave/agent/agent.properties" ]; then
@@ -519,6 +522,7 @@ pip install --upgrade /usr/share/cloudstack-marvin/Marvin-*.tar.gz
%attr(0755,root,root) %{_bindir}/%{name}-guest-tool
%attr(0755,root,root) %{_bindir}/%{name}-ssh
%attr(0644,root,root) %{_unitdir}/%{name}-agent.service
+%attr(0644,root,root) %{_unitdir}/%{name}-rolling-maintenance@.service
%config(noreplace) %{_sysconfdir}/default/%{name}-agent
%attr(0644,root,root) %{_sysconfdir}/profile.d/%{name}-agent-profile.sh
%config(noreplace) %attr(0644,root,root) %{_sysconfdir}/logrotate.d/%{name}-agent
@@ -527,6 +531,7 @@ pip install --upgrade /usr/share/cloudstack-marvin/Marvin-*.tar.gz
%dir %{_localstatedir}/log/%{name}/agent
%attr(0644,root,root) %{_datadir}/%{name}-agent/lib/*.jar
%attr(0755,root,root) %{_datadir}/%{name}-agent/lib/libvirtqemuhook
+%attr(0755,root,root) %{_datadir}/%{name}-agent/lib/rolling-maintenance
%dir %{_datadir}/%{name}-agent/plugins
%{_defaultdocdir}/%{name}-agent-%{version}/LICENSE
%{_defaultdocdir}/%{name}-agent-%{version}/NOTICE
diff --git a/agent/conf/cloudstack-agent.logrotate.in b/packaging/systemd/cloudstack-rolling-maintenance@.service
similarity index 70%
copy from agent/conf/cloudstack-agent.logrotate.in
copy to packaging/systemd/cloudstack-rolling-maintenance@.service
index d9a3dfb..8c793a7 100644
--- a/agent/conf/cloudstack-agent.logrotate.in
+++ b/packaging/systemd/cloudstack-rolling-maintenance@.service
@@ -15,11 +15,17 @@
# specific language governing permissions and limitations
# under the License.
-/var/log/cloudstack/agent/security_group.log /var/log/cloudstack/agent/resizevolume.log {
- copytruncate
- daily
- rotate 5
- compress
- missingok
- size 10M
-}
+# Systemd unit file for CloudStack Rolling Maintenance
+
+[Unit]
+Description=Rolling maintenance executor %I
+After=network.target local-fs.target
+
+[Install]
+WantedBy=multi-user.target
+
+[Service]
+Type=simple
+WorkingDirectory=/usr/share/cloudstack-agent/lib/
+ExecStart=/usr/share/cloudstack-agent/lib/rolling-maintenance %I
+Restart=no
diff --git a/plugins/hypervisors/kvm/src/main/java/com/cloud/hypervisor/kvm/resource/LibvirtComputingResource.java b/plugins/hypervisors/kvm/src/main/java/com/cloud/hypervisor/kvm/resource/LibvirtComputingResource.java
index 9404be2..1be6785 100644
--- a/plugins/hypervisors/kvm/src/main/java/com/cloud/hypervisor/kvm/resource/LibvirtComputingResource.java
+++ b/plugins/hypervisors/kvm/src/main/java/com/cloud/hypervisor/kvm/resource/LibvirtComputingResource.java
@@ -46,6 +46,9 @@ import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
+import com.cloud.hypervisor.kvm.resource.rolling.maintenance.RollingMaintenanceAgentExecutor;
+import com.cloud.hypervisor.kvm.resource.rolling.maintenance.RollingMaintenanceExecutor;
+import com.cloud.hypervisor.kvm.resource.rolling.maintenance.RollingMaintenanceServiceExecutor;
import org.apache.cloudstack.storage.to.PrimaryDataStoreTO;
import org.apache.cloudstack.storage.to.TemplateObjectTO;
import org.apache.cloudstack.storage.to.VolumeObjectTO;
@@ -276,6 +279,7 @@ public class LibvirtComputingResource extends ServerResourceBase implements Serv
protected int _migrateDowntime;
protected int _migratePauseAfter;
protected boolean _diskActivityCheckEnabled;
+ protected RollingMaintenanceExecutor rollingMaintenanceExecutor;
protected long _diskActivityCheckFileSizeMin = 10485760; // 10MB
protected int _diskActivityCheckTimeoutSeconds = 120; // 120s
protected long _diskActivityInactiveThresholdMilliseconds = 30000; // 30s
@@ -426,6 +430,10 @@ public class LibvirtComputingResource extends ServerResourceBase implements Serv
return _migrateSpeed;
}
+ public RollingMaintenanceExecutor getRollingMaintenanceExecutor() {
+ return rollingMaintenanceExecutor;
+ }
+
public String getPingTestPath() {
return _pingTestPath;
}
@@ -790,6 +798,11 @@ public class LibvirtComputingResource extends ServerResourceBase implements Serv
_hypervisorType = HypervisorType.KVM;
}
+ String hooksDir = (String)params.get("rolling.maintenance.hooks.dir");
+ value = (String) params.get("rolling.maintenance.service.executor.disabled");
+ rollingMaintenanceExecutor = Boolean.parseBoolean(value) ? new RollingMaintenanceAgentExecutor(hooksDir) :
+ new RollingMaintenanceServiceExecutor(hooksDir);
+
_hypervisorURI = (String)params.get("hypervisor.uri");
if (_hypervisorURI == null) {
_hypervisorURI = LibvirtConnection.getHypervisorURI(_hypervisorType.toString());
diff --git a/plugins/hypervisors/kvm/src/main/java/com/cloud/hypervisor/kvm/resource/rolling/maintenance/RollingMaintenanceAgentExecutor.java b/plugins/hypervisors/kvm/src/main/java/com/cloud/hypervisor/kvm/resource/rolling/maintenance/RollingMaintenanceAgentExecutor.java
new file mode 100644
index 0000000..110c4a8
--- /dev/null
+++ b/plugins/hypervisors/kvm/src/main/java/com/cloud/hypervisor/kvm/resource/rolling/maintenance/RollingMaintenanceAgentExecutor.java
@@ -0,0 +1,88 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+package com.cloud.hypervisor.kvm.resource.rolling.maintenance;
+
+import com.cloud.utils.Pair;
+import com.cloud.utils.exception.CloudRuntimeException;
+import com.cloud.utils.script.OutputInterpreter;
+import com.cloud.utils.script.Script;
+import com.google.common.base.Strings;
+import org.apache.log4j.Logger;
+import org.joda.time.Duration;
+
+import java.io.File;
+
+public class RollingMaintenanceAgentExecutor extends RollingMaintenanceExecutorBase implements RollingMaintenanceExecutor {
+
+ private static final Logger s_logger = Logger.getLogger(RollingMaintenanceAgentExecutor.class);
+
+ private String output;
+ private boolean success;
+
+ public RollingMaintenanceAgentExecutor(String hooksDir) {
+ super(hooksDir);
+ }
+
+ @Override
+ public Pair<Boolean, String> startStageExecution(String stage, File scriptFile, int timeout, String payload) {
+ checkHooksDirectory();
+ Duration duration = Duration.standardSeconds(timeout);
+ final Script script = new Script(scriptFile.getAbsolutePath(), duration, s_logger);
+ final OutputInterpreter.AllLinesParser parser = new OutputInterpreter.AllLinesParser();
+ if (!Strings.isNullOrEmpty(payload)) {
+ script.add(payload);
+ }
+ s_logger.info("Executing stage: " + stage + " script: " + script);
+ output = script.execute(parser) + " " + parser.getLines();
+
+ if (script.isTimeout()) {
+ String msg = "Script " + scriptFile + " timed out";
+ s_logger.error(msg);
+ success = false;
+ return new Pair<>(false, msg);
+ }
+
+ int exitValue = script.getExitValue();
+ if (exitValue == exitValueTerminatedSignal) {
+ throw new CloudRuntimeException("Script " + scriptFile + " terminated");
+ }
+ success = exitValue == 0 || exitValue == exitValueAvoidMaintenance;
+ setAvoidMaintenance(exitValue == exitValueAvoidMaintenance);
+ s_logger.info("Execution finished for stage: " + stage + " script: " + script + ": " + exitValue);
+ if (s_logger.isDebugEnabled()) {
+ s_logger.debug(output);
+ s_logger.debug("Stage " + stage + " execution finished: " + exitValue);
+ }
+ return new Pair<>(true, "Stage " + stage + " finished");
+ }
+
+ @Override
+ public String getStageExecutionOutput(String stage, File scriptFile) {
+ return output;
+ }
+
+ @Override
+ public boolean isStageRunning(String stage, File scriptFile, String payload) {
+ // In case of reconnection, it is assumed that the stage is finished
+ return false;
+ }
+
+ @Override
+ public boolean getStageExecutionSuccess(String stage, File scriptFile) {
+ return success;
+ }
+}
diff --git a/plugins/hypervisors/kvm/src/main/java/com/cloud/hypervisor/kvm/resource/rolling/maintenance/RollingMaintenanceExecutor.java b/plugins/hypervisors/kvm/src/main/java/com/cloud/hypervisor/kvm/resource/rolling/maintenance/RollingMaintenanceExecutor.java
new file mode 100644
index 0000000..fe72765
--- /dev/null
+++ b/plugins/hypervisors/kvm/src/main/java/com/cloud/hypervisor/kvm/resource/rolling/maintenance/RollingMaintenanceExecutor.java
@@ -0,0 +1,31 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+package com.cloud.hypervisor.kvm.resource.rolling.maintenance;
+
+import com.cloud.utils.Pair;
+
+import java.io.File;
+
+public interface RollingMaintenanceExecutor {
+
+ File getStageScriptFile(String stage);
+ Pair<Boolean, String> startStageExecution(String stage, File scriptFile, int timeout, String payload);
+ String getStageExecutionOutput(String stage, File scriptFile);
+ boolean isStageRunning(String stage, File scriptFile, String payload);
+ boolean getStageExecutionSuccess(String stage, File scriptFile);
+ boolean getStageAvoidMaintenance(String stage, File scriptFile);
+}
diff --git a/plugins/hypervisors/kvm/src/main/java/com/cloud/hypervisor/kvm/resource/rolling/maintenance/RollingMaintenanceExecutorBase.java b/plugins/hypervisors/kvm/src/main/java/com/cloud/hypervisor/kvm/resource/rolling/maintenance/RollingMaintenanceExecutorBase.java
new file mode 100644
index 0000000..140b588
--- /dev/null
+++ b/plugins/hypervisors/kvm/src/main/java/com/cloud/hypervisor/kvm/resource/rolling/maintenance/RollingMaintenanceExecutorBase.java
@@ -0,0 +1,91 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+package com.cloud.hypervisor.kvm.resource.rolling.maintenance;
+
+import com.cloud.utils.exception.CloudRuntimeException;
+import org.apache.commons.lang.StringUtils;
+import org.apache.log4j.Logger;
+
+import java.io.File;
+
+public abstract class RollingMaintenanceExecutorBase implements RollingMaintenanceExecutor {
+
+ private String hooksDir;
+ private int timeout;
+ private boolean avoidMaintenance = false;
+
+ static final int exitValueAvoidMaintenance = 70;
+ static final int exitValueTerminatedSignal = 143;
+ private static final Logger s_logger = Logger.getLogger(RollingMaintenanceExecutor.class);
+
+ void setTimeout(int timeout) {
+ this.timeout = timeout;
+ }
+
+ long getTimeout() {
+ return timeout;
+ }
+
+ private void sanitizeHoooksDirFormat() {
+ if (StringUtils.isNotBlank(this.hooksDir) && !this.hooksDir.endsWith("/")) {
+ this.hooksDir += "/";
+ }
+ }
+
+ RollingMaintenanceExecutorBase(String hooksDir) {
+ this.hooksDir = hooksDir;
+ sanitizeHoooksDirFormat();
+ }
+
+ protected boolean existsAndIsFile(String filepath) {
+ File file = new File(filepath);
+ return file.exists() && file.isFile();
+ }
+
+ public File getStageScriptFile(String stage) {
+ String scriptPath = hooksDir + stage;
+ if (existsAndIsFile(scriptPath)) {
+ return new File(scriptPath);
+ } else if (existsAndIsFile(scriptPath + ".sh")) {
+ return new File(scriptPath + ".sh");
+ } else if (existsAndIsFile(scriptPath + ".py")) {
+ return new File(scriptPath + ".py");
+ } else {
+ String msg = "Unable to locate script for stage: " + stage + " in directory: " + hooksDir;
+ s_logger.warn(msg);
+ return null;
+ }
+ }
+
+ void checkHooksDirectory() {
+ if (StringUtils.isBlank(hooksDir)) {
+ throw new CloudRuntimeException("Hooks directory is empty, please specify it on agent.properties and restart the agent");
+ }
+ }
+
+ String getHooksDir() {
+ return hooksDir;
+ }
+
+ public void setAvoidMaintenance(boolean avoidMaintenance) {
+ this.avoidMaintenance = avoidMaintenance;
+ }
+
+ public boolean getStageAvoidMaintenance(String stage, File scriptFile) {
+ return avoidMaintenance;
+ }
+}
diff --git a/plugins/hypervisors/kvm/src/main/java/com/cloud/hypervisor/kvm/resource/rolling/maintenance/RollingMaintenanceServiceExecutor.java b/plugins/hypervisors/kvm/src/main/java/com/cloud/hypervisor/kvm/resource/rolling/maintenance/RollingMaintenanceServiceExecutor.java
new file mode 100644
index 0000000..6659bf4
--- /dev/null
+++ b/plugins/hypervisors/kvm/src/main/java/com/cloud/hypervisor/kvm/resource/rolling/maintenance/RollingMaintenanceServiceExecutor.java
@@ -0,0 +1,137 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+package com.cloud.hypervisor.kvm.resource.rolling.maintenance;
+
+import com.cloud.utils.Pair;
+import com.cloud.utils.exception.CloudRuntimeException;
+import com.cloud.utils.script.OutputInterpreter;
+import com.cloud.utils.script.Script;
+import org.apache.commons.lang.StringUtils;
+import org.apache.log4j.Logger;
+
+import java.io.File;
+import java.io.IOException;
+import java.nio.charset.StandardCharsets;
+import java.nio.file.Files;
+import java.nio.file.Paths;
+import java.util.stream.Stream;
+
+public class RollingMaintenanceServiceExecutor extends RollingMaintenanceExecutorBase implements RollingMaintenanceExecutor {
+
+ private static final String servicePrefix = "cloudstack-rolling-maintenance";
+ private static final String resultsFileSuffix = "rolling-maintenance-results";
+ private static final String outputFileSuffix = "rolling-maintenance-output";
+
+ private static final Logger s_logger = Logger.getLogger(RollingMaintenanceServiceExecutor.class);
+
+ public RollingMaintenanceServiceExecutor(String hooksDir) {
+ super(hooksDir);
+ }
+
+ /**
+ * Generate and return escaped instance name to use on systemd service invokation
+ */
+ private String generateInstanceName(String stage, String file, String payload) {
+ String instanceName = String.format("%s,%s,%s,%s,%s", stage, file, getTimeout(),
+ getResultsFilePath(), getOutputFilePath());
+ if (StringUtils.isNotBlank(payload)) {
+ instanceName += "," + payload;
+ }
+ return Script.runSimpleBashScript(String.format("systemd-escape '%s'", instanceName));
+ }
+
+ private String invokeService(String action, String stage, String file, String payload) {
+ s_logger.debug("Invoking rolling maintenance service for stage: " + stage + " and file " + file + " with action: " + action);
+ final OutputInterpreter.AllLinesParser parser = new OutputInterpreter.AllLinesParser();
+ Script command = new Script("/bin/systemctl", s_logger);
+ command.add(action);
+ String service = servicePrefix + "@" + generateInstanceName(stage, file, payload);
+ command.add(service);
+ String result = command.execute(parser);
+ int exitValue = command.getExitValue();
+ s_logger.trace("Execution: " + command.toString() + " - exit code: " + exitValue +
+ ": " + result + (StringUtils.isNotBlank(parser.getLines()) ? parser.getLines() : ""));
+ return StringUtils.isBlank(result) ? parser.getLines().replace("\n", " ") : result;
+ }
+
+ @Override
+ public Pair<Boolean, String> startStageExecution(String stage, File scriptFile, int timeout, String payload) {
+ checkHooksDirectory();
+ setTimeout(timeout);
+ String result = invokeService("start", stage, scriptFile.getAbsolutePath(), payload);
+ if (StringUtils.isNotBlank(result)) {
+ throw new CloudRuntimeException("Error starting stage: " + stage + " execution: " + result);
+ }
+ s_logger.trace("Stage " + stage + "execution started");
+ return new Pair<>(true, "OK");
+ }
+
+ private String getResultsFilePath() {
+ return getHooksDir() + resultsFileSuffix;
+ }
+
+ private String getOutputFilePath() {
+ return getHooksDir() + outputFileSuffix;
+ }
+
+ private String readFromFile(String filePath) {
+ StringBuilder contentBuilder = new StringBuilder();
+
+ try (Stream<String> stream = Files.lines( Paths.get(filePath), StandardCharsets.UTF_8)) {
+ stream.forEach(s -> contentBuilder.append(s).append("\n"));
+ } catch (IOException e) {
+ e.printStackTrace();
+ }
+
+ return contentBuilder.toString();
+ }
+
+ @Override
+ public String getStageExecutionOutput(String stage, File scriptFile) {
+ return readFromFile(getOutputFilePath());
+ }
+
+ @Override
+ public boolean isStageRunning(String stage, File scriptFile, String payload) {
+ String result = invokeService("is-active", stage, scriptFile.getAbsolutePath(), payload);
+ if (StringUtils.isNotBlank(result) && result.equals("failed")) {
+ String status = invokeService("status", stage, scriptFile.getAbsolutePath(), payload);
+ String errorMsg = "Stage " + stage + " execution failed, status: " + status;
+ s_logger.error(errorMsg);
+ throw new CloudRuntimeException(errorMsg);
+ }
+ return StringUtils.isNotBlank(result) && result.equals("active");
+ }
+
+ @Override
+ public boolean getStageExecutionSuccess(String stage, File scriptFile) {
+ String fileContent = readFromFile(getResultsFilePath());
+ if (StringUtils.isBlank(fileContent)) {
+ throw new CloudRuntimeException("Empty content in file " + getResultsFilePath());
+ }
+ fileContent = fileContent.replace("\n", "");
+ String[] parts = fileContent.split(",");
+ if (parts.length < 3) {
+ throw new CloudRuntimeException("Results file " + getResultsFilePath() + " unexpected content: " + fileContent);
+ }
+ if (!parts[0].equalsIgnoreCase(stage)) {
+ throw new CloudRuntimeException("Expected stage " + stage + " results but got stage " + parts[0]);
+ }
+ setAvoidMaintenance(Boolean.parseBoolean(parts[2]));
+ return Boolean.parseBoolean(parts[1]);
+ }
+}
diff --git a/plugins/hypervisors/kvm/src/main/java/com/cloud/hypervisor/kvm/resource/wrapper/LibvirtRollingMaintenanceCommandWrapper.java b/plugins/hypervisors/kvm/src/main/java/com/cloud/hypervisor/kvm/resource/wrapper/LibvirtRollingMaintenanceCommandWrapper.java
new file mode 100644
index 0000000..a1b1af6
--- /dev/null
+++ b/plugins/hypervisors/kvm/src/main/java/com/cloud/hypervisor/kvm/resource/wrapper/LibvirtRollingMaintenanceCommandWrapper.java
@@ -0,0 +1,81 @@
+//
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+//
+
+package com.cloud.hypervisor.kvm.resource.wrapper;
+
+import com.cloud.agent.api.RollingMaintenanceAnswer;
+import com.cloud.agent.api.RollingMaintenanceCommand;
+import com.cloud.hypervisor.kvm.resource.LibvirtComputingResource;
+import com.cloud.hypervisor.kvm.resource.rolling.maintenance.RollingMaintenanceAgentExecutor;
+import com.cloud.hypervisor.kvm.resource.rolling.maintenance.RollingMaintenanceExecutor;
+import com.cloud.resource.CommandWrapper;
+import com.cloud.resource.ResourceWrapper;
+import com.cloud.resource.RollingMaintenanceManager;
+import com.cloud.utils.exception.CloudRuntimeException;
+import org.apache.log4j.Logger;
+
+import java.io.File;
+
+@ResourceWrapper(handles = RollingMaintenanceCommand.class)
+public class LibvirtRollingMaintenanceCommandWrapper extends CommandWrapper<RollingMaintenanceCommand, RollingMaintenanceAnswer, LibvirtComputingResource> {
+
+ private static final Logger s_logger = Logger.getLogger(LibvirtRollingMaintenanceCommandWrapper.class);
+
+ @Override
+ public RollingMaintenanceAnswer execute(RollingMaintenanceCommand command, LibvirtComputingResource resource) {
+ RollingMaintenanceExecutor executor = resource.getRollingMaintenanceExecutor();
+ String stage = command.isCheckMaintenanceScript() ? RollingMaintenanceManager.Stage.Maintenance.toString() : command.getStage();
+ int timeout = command.getWait();
+ String payload = command.getPayload();
+
+ try {
+ File scriptFile = executor.getStageScriptFile(stage);
+ if (command.isCheckMaintenanceScript()) {
+ return new RollingMaintenanceAnswer(command, scriptFile != null);
+ } else if (scriptFile == null) {
+ s_logger.info("No script file defined for stage " + stage + ". Skipping stage...");
+ return new RollingMaintenanceAnswer(command, true, "Skipped stage " + stage, true);
+ }
+
+ if (command.isStarted() && executor instanceof RollingMaintenanceAgentExecutor) {
+ String msg = "Stage has been started previously and the agent restarted, setting stage as finished";
+ s_logger.info(msg);
+ return new RollingMaintenanceAnswer(command, true, msg, true);
+ }
+ s_logger.info("Processing stage " + stage);
+ if (!command.isStarted()) {
+ executor.startStageExecution(stage, scriptFile, timeout, payload);
+ }
+ if (executor.isStageRunning(stage, scriptFile, payload)) {
+ return new RollingMaintenanceAnswer(command, true, "Stage " + stage + " still running", false);
+ }
+ boolean success = executor.getStageExecutionSuccess(stage, scriptFile);
+ String output = executor.getStageExecutionOutput(stage, scriptFile);
+ RollingMaintenanceAnswer answer = new RollingMaintenanceAnswer(command, success, output, true);
+ if (executor.getStageAvoidMaintenance(stage, scriptFile)) {
+ s_logger.info("Avoid maintenance flag added to the answer for the stage " + stage);
+ answer.setAvoidMaintenance(true);
+ }
+ s_logger.info("Finished processing stage " + stage);
+ return answer;
+ } catch (CloudRuntimeException e) {
+ return new RollingMaintenanceAnswer(command, false, e.getMessage(), false);
+ }
+ }
+}
diff --git a/server/src/main/java/com/cloud/api/ApiResponseHelper.java b/server/src/main/java/com/cloud/api/ApiResponseHelper.java
index 9da3ae4..9bec408 100644
--- a/server/src/main/java/com/cloud/api/ApiResponseHelper.java
+++ b/server/src/main/java/com/cloud/api/ApiResponseHelper.java
@@ -31,6 +31,7 @@ import java.util.stream.Collectors;
import javax.inject.Inject;
+import com.cloud.resource.RollingMaintenanceManager;
import org.apache.cloudstack.acl.ControlledEntity;
import org.apache.cloudstack.acl.ControlledEntity.ACLType;
import org.apache.cloudstack.affinity.AffinityGroup;
@@ -44,6 +45,9 @@ import org.apache.cloudstack.api.command.user.job.QueryAsyncJobResultCmd;
import org.apache.cloudstack.api.response.AccountResponse;
import org.apache.cloudstack.api.response.ApplicationLoadBalancerInstanceResponse;
import org.apache.cloudstack.api.response.ApplicationLoadBalancerResponse;
+import org.apache.cloudstack.api.response.RollingMaintenanceHostSkippedResponse;
+import org.apache.cloudstack.api.response.RollingMaintenanceHostUpdatedResponse;
+import org.apache.cloudstack.api.response.RollingMaintenanceResponse;
import org.apache.cloudstack.api.response.ApplicationLoadBalancerRuleResponse;
import org.apache.cloudstack.api.response.AsyncJobResponse;
import org.apache.cloudstack.api.response.AutoScalePolicyResponse;
@@ -4281,4 +4285,31 @@ public class ApiResponseHelper implements ResponseGenerator {
}
return responses;
}
+
+ @Override
+ public RollingMaintenanceResponse createRollingMaintenanceResponse(Boolean success, String details, List<RollingMaintenanceManager.HostUpdated> hostsUpdated, List<RollingMaintenanceManager.HostSkipped> hostsSkipped) {
+ RollingMaintenanceResponse response = new RollingMaintenanceResponse(success, details);
+ List<RollingMaintenanceHostUpdatedResponse> updated = new ArrayList<>();
+ for (RollingMaintenanceManager.HostUpdated h : hostsUpdated) {
+ RollingMaintenanceHostUpdatedResponse r = new RollingMaintenanceHostUpdatedResponse();
+ r.setHostId(h.getHost().getUuid());
+ r.setHostName(h.getHost().getName());
+ r.setStartDate(getDateStringInternal(h.getStart()));
+ r.setEndDate(getDateStringInternal(h.getEnd()));
+ r.setOutput(h.getOutputMsg());
+ updated.add(r);
+ }
+ List<RollingMaintenanceHostSkippedResponse> skipped = new ArrayList<>();
+ for (RollingMaintenanceManager.HostSkipped h : hostsSkipped) {
+ RollingMaintenanceHostSkippedResponse r = new RollingMaintenanceHostSkippedResponse();
+ r.setHostId(h.getHost().getUuid());
+ r.setHostName(h.getHost().getName());
+ r.setReason(h.getReason());
+ skipped.add(r);
+ }
+ response.setUpdatedHosts(updated);
+ response.setSkippedHosts(skipped);
+ response.setObjectName("rollingmaintenance");
+ return response;
+ }
}
diff --git a/server/src/main/java/com/cloud/deploy/DeploymentPlanningManagerImpl.java b/server/src/main/java/com/cloud/deploy/DeploymentPlanningManagerImpl.java
index a95f4ef..1315cdf 100644
--- a/server/src/main/java/com/cloud/deploy/DeploymentPlanningManagerImpl.java
+++ b/server/src/main/java/com/cloud/deploy/DeploymentPlanningManagerImpl.java
@@ -1041,7 +1041,7 @@ StateListener<State, VirtualMachine.Event, VirtualMachine> {
for (Long clusterId : clusterList) {
ClusterVO clusterVO = _clusterDao.findById(clusterId);
- if (clusterVO.getAllocationState() == Grouping.AllocationState.Disabled) {
+ if (clusterVO.getAllocationState() == Grouping.AllocationState.Disabled && !plan.isMigrationPlan()) {
s_logger.debug("Cannot deploy in disabled cluster " + clusterId + ", skipping this cluster");
avoid.addCluster(clusterVO.getId());
}
diff --git a/server/src/main/java/com/cloud/resource/ResourceManagerImpl.java b/server/src/main/java/com/cloud/resource/ResourceManagerImpl.java
index 29f7e68..c1c221b 100755
--- a/server/src/main/java/com/cloud/resource/ResourceManagerImpl.java
+++ b/server/src/main/java/com/cloud/resource/ResourceManagerImpl.java
@@ -2485,7 +2485,7 @@ public class ResourceManagerImpl extends ManagerBase implements ResourceManager,
}
}
- private boolean cancelMaintenance(final long hostId) {
+ public boolean cancelMaintenance(final long hostId) {
try {
final Boolean result = propagateResourceEvent(hostId, ResourceState.Event.AdminCancelMaintenance);
diff --git a/server/src/main/java/com/cloud/resource/RollingMaintenanceManagerImpl.java b/server/src/main/java/com/cloud/resource/RollingMaintenanceManagerImpl.java
new file mode 100644
index 0000000..62bb30e
--- /dev/null
+++ b/server/src/main/java/com/cloud/resource/RollingMaintenanceManagerImpl.java
@@ -0,0 +1,734 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+package com.cloud.resource;
+
+import com.cloud.agent.AgentManager;
+import com.cloud.agent.api.Answer;
+import com.cloud.agent.api.RollingMaintenanceAnswer;
+import com.cloud.agent.api.RollingMaintenanceCommand;
+import com.cloud.alert.AlertManager;
+import com.cloud.capacity.CapacityManager;
+import com.cloud.dc.ClusterDetailsDao;
+import com.cloud.dc.ClusterDetailsVO;
+import com.cloud.deploy.DeployDestination;
+import com.cloud.event.ActionEventUtils;
+import com.cloud.event.EventVO;
+import com.cloud.exception.AgentUnavailableException;
+import com.cloud.exception.InvalidParameterValueException;
+import com.cloud.exception.OperationTimedoutException;
+import com.cloud.host.Host;
+import com.cloud.host.HostVO;
+import com.cloud.host.Status;
+import com.cloud.host.dao.HostDao;
+import com.cloud.host.dao.HostTagsDao;
+import com.cloud.hypervisor.Hypervisor;
+import com.cloud.org.Cluster;
+import com.cloud.org.Grouping;
+import com.cloud.service.ServiceOfferingVO;
+import com.cloud.service.dao.ServiceOfferingDao;
+import com.cloud.utils.Pair;
+import com.cloud.utils.Ternary;
+import com.cloud.utils.component.ManagerBase;
+import com.cloud.utils.exception.CloudRuntimeException;
+import com.cloud.vm.VMInstanceVO;
+import com.cloud.vm.VirtualMachine.State;
+import com.cloud.vm.VirtualMachineProfileImpl;
+import com.cloud.vm.dao.VMInstanceDao;
+import org.apache.cloudstack.affinity.AffinityGroupProcessor;
+import org.apache.cloudstack.api.command.admin.host.PrepareForMaintenanceCmd;
+import org.apache.cloudstack.api.command.admin.resource.StartRollingMaintenanceCmd;
+import org.apache.cloudstack.context.CallContext;
+import org.apache.cloudstack.framework.config.ConfigKey;
+import org.apache.commons.collections.CollectionUtils;
+import org.apache.log4j.Logger;
+
+import javax.inject.Inject;
+import javax.naming.ConfigurationException;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.Date;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+import java.util.stream.Collectors;
+
+public class RollingMaintenanceManagerImpl extends ManagerBase implements RollingMaintenanceManager {
+
+ @Inject
+ private HostDao hostDao;
+ @Inject
+ private AgentManager agentManager;
+ @Inject
+ private ResourceManager resourceManager;
+ @Inject
+ private CapacityManager capacityManager;
+ @Inject
+ private VMInstanceDao vmInstanceDao;
+ @Inject
+ private ServiceOfferingDao serviceOfferingDao;
+ @Inject
+ private ClusterDetailsDao clusterDetailsDao;
+ @Inject
+ private HostTagsDao hostTagsDao;
+ @Inject
+ private AlertManager alertManager;
+
+ protected List<AffinityGroupProcessor> _affinityProcessors;
+
+ public void setAffinityGroupProcessors(List<AffinityGroupProcessor> affinityProcessors) {
+ _affinityProcessors = affinityProcessors;
+ }
+
+ public static final Logger s_logger = Logger.getLogger(RollingMaintenanceManagerImpl.class.getName());
+
+ private Pair<ResourceType, List<Long>> getResourceTypeAndIdPair(List<Long> podIds, List<Long> clusterIds, List<Long> zoneIds, List<Long> hostIds) {
+ Pair<ResourceType, List<Long>> pair = CollectionUtils.isNotEmpty(podIds) ? new Pair<>(ResourceType.Pod, podIds) :
+ CollectionUtils.isNotEmpty(clusterIds) ? new Pair<>(ResourceType.Cluster, clusterIds) :
+ CollectionUtils.isNotEmpty(zoneIds) ? new Pair<>(ResourceType.Zone, zoneIds) :
+ CollectionUtils.isNotEmpty(hostIds) ? new Pair<>(ResourceType.Host, hostIds) : null;
+ if (pair == null) {
+ throw new CloudRuntimeException("Parameters podId, clusterId, zoneId, hostId are mutually exclusive, " +
+ "please set only one of them");
+ }
+ return pair;
+ }
+
+ @Override
+ public boolean configure(String name, Map<String, Object> params) throws ConfigurationException {
+ return true;
+ }
+
+ private void updateCluster(long clusterId, String state) {
+ Cluster cluster = resourceManager.getCluster(clusterId);
+ if (cluster == null) {
+ throw new InvalidParameterValueException("Unable to find the cluster by id=" + clusterId);
+ }
+ resourceManager.updateCluster(cluster, "", "", state, "");
+ }
+
+ private void generateReportAndFinishingEvent(StartRollingMaintenanceCmd cmd, boolean success, String details,
+ List<HostUpdated> hostsUpdated, List<HostSkipped> hostsSkipped) {
+ Pair<ResourceType, List<Long>> pair = getResourceTypeIdPair(cmd);
+ ResourceType entity = pair.first();
+ List<Long> ids = pair.second();
+
+ String description = String.format("Success: %s, details: %s, hosts updated: %s, hosts skipped: %s", success, details,
+ generateReportHostsUpdated(hostsUpdated), generateReportHostsSkipped(hostsSkipped));
+ ActionEventUtils.onCompletedActionEvent(CallContext.current().getCallingUserId(), CallContext.current().getCallingAccountId(),
+ EventVO.LEVEL_INFO, cmd.getEventType(),
+ "Completed rolling maintenance for entity " + entity + " with IDs: " + ids + " - " + description, 0);
+ }
+
+ private String generateReportHostsUpdated(List<HostUpdated> hostsUpdated) {
+ StringBuilder stringBuilder = new StringBuilder();
+ stringBuilder.append(hostsUpdated.size());
+ return stringBuilder.toString();
+ }
+
+ private String generateReportHostsSkipped(List<HostSkipped> hostsSkipped) {
+ StringBuilder stringBuilder = new StringBuilder();
+ stringBuilder.append(hostsSkipped.size());
+ return stringBuilder.toString();
+ }
+
+ @Override
+ public Ternary<Boolean, String, Pair<List<HostUpdated>, List<HostSkipped>>> startRollingMaintenance(StartRollingMaintenanceCmd cmd) {
+ Pair<ResourceType, List<Long>> pair = getResourceTypeAndIdPair(cmd.getPodIds(), cmd.getClusterIds(), cmd.getZoneIds(), cmd.getHostIds());
+ ResourceType type = pair.first();
+ List<Long> ids = pair.second();
+ int timeout = cmd.getTimeout() == null ? KvmRollingMaintenanceStageTimeout.value() : cmd.getTimeout();
+ String payload = cmd.getPayload();
+ Boolean forced = cmd.getForced();
+
+ Set<Long> disabledClusters = new HashSet<>();
+ Map<Long, String> hostsToAvoidMaintenance = new HashMap<>();
+
+ boolean success = false;
+ String details = null;
+ List<HostUpdated> hostsUpdated = new ArrayList<>();
+ List<HostSkipped> hostsSkipped = new ArrayList<>();
+
+ if (timeout <= KvmRollingMaintenancePingInterval.value()) {
+ return new Ternary<>(success, "The timeout value provided must be greater or equal than the ping interval " +
+ "defined in '" + KvmRollingMaintenancePingInterval.key() + "'", new Pair<>(hostsUpdated, hostsSkipped));
+ }
+
+ try {
+ Map<Long, List<Host>> hostsByCluster = getHostsByClusterForRollingMaintenance(type, ids);
+
+ for (Long clusterId : hostsByCluster.keySet()) {
+ Cluster cluster = resourceManager.getCluster(clusterId);
+ List<Host> hosts = hostsByCluster.get(clusterId);
+
+ if (!isMaintenanceAllowedByVMStates(cluster, hosts, hostsSkipped)) {
+ if (forced) {
+ continue;
+ }
+ success = false;
+ details = "VMs in invalid states in cluster: " + cluster.getUuid();
+ return new Ternary<>(success, details, new Pair<>(hostsUpdated, hostsSkipped));
+ }
+ disableClusterIfEnabled(cluster, disabledClusters);
+
+ s_logger.debug("State checks on the hosts in the cluster");
+ performStateChecks(cluster, hosts, forced, hostsSkipped);
+ s_logger.debug("Checking hosts capacity before attempting rolling maintenance");
+ performCapacityChecks(cluster, hosts, forced);
+ s_logger.debug("Attempting pre-flight stages on each host before starting rolling maintenance");
+ performPreFlightChecks(hosts, timeout, payload, forced, hostsToAvoidMaintenance);
+
+ for (Host host: hosts) {
+ Ternary<Boolean, Boolean, String> hostResult = startRollingMaintenanceHostInCluster(cluster, host,
+ timeout, payload, forced, hostsToAvoidMaintenance, hostsUpdated, hostsSkipped);
+ if (hostResult.second()) {
+ continue;
+ }
+ if (hostResult.first()) {
+ success = false;
+ details = hostResult.third();
+ return new Ternary<>(success, details, new Pair<>(hostsUpdated, hostsSkipped));
+ }
+ }
+ enableClusterIfDisabled(cluster, disabledClusters);
+ }
+ } catch (AgentUnavailableException | InterruptedException | CloudRuntimeException e) {
+ String err = "Error starting rolling maintenance: " + e.getMessage();
+ s_logger.error(err, e);
+ success = false;
+ details = err;
+ return new Ternary<>(success, details, new Pair<>(hostsUpdated, hostsSkipped));
+ } finally {
+ // Enable back disabled clusters
+ for (Long clusterId : disabledClusters) {
+ Cluster cluster = resourceManager.getCluster(clusterId);
+ if (cluster.getAllocationState() == Grouping.AllocationState.Disabled) {
+ updateCluster(clusterId, "Enabled");
+ }
+ }
+ generateReportAndFinishingEvent(cmd, success, details, hostsUpdated, hostsSkipped);
+ }
+ success = true;
+ details = "OK";
+ return new Ternary<>(success, details, new Pair<>(hostsUpdated, hostsSkipped));
+ }
+
+ /**
+ * Perform state checks on the hosts in a cluster
+ */
+ protected void performStateChecks(Cluster cluster, List<Host> hosts, Boolean forced, List<HostSkipped> hostsSkipped) {
+ List<Host> hostsToDrop = new ArrayList<>();
+ for (Host host : hosts) {
+ if (host.getStatus() != Status.Up) {
+ String msg = "Host " + host.getUuid() + " is not connected, state = " + host.getStatus().toString();
+ if (forced) {
+ hostsSkipped.add(new HostSkipped(host, msg));
+ hostsToDrop.add(host);
+ continue;
+ }
+ throw new CloudRuntimeException(msg);
+ }
+ if (host.getResourceState() != ResourceState.Enabled) {
+ String msg = "Host " + host.getUuid() + " is not enabled, state = " + host.getResourceState().toString();
+ if (forced) {
+ hostsSkipped.add(new HostSkipped(host, msg));
+ hostsToDrop.add(host);
+ continue;
+ }
+ throw new CloudRuntimeException(msg);
+ }
+ }
+ if (CollectionUtils.isNotEmpty(hostsToDrop)) {
+ hosts.removeAll(hostsToDrop);
+ }
+ }
+
+ /**
+ * Do not allow rolling maintenance if there are VMs in Starting/Stopping/Migrating/Error/Unknown state
+ */
+ private boolean isMaintenanceAllowedByVMStates(Cluster cluster, List<Host> hosts, List<HostSkipped> hostsSkipped) {
+ for (Host host : hosts) {
+ List<VMInstanceVO> notAllowedStates = vmInstanceDao.findByHostInStates(host.getId(), State.Starting, State.Stopping,
+ State.Migrating, State.Error, State.Unknown);
+ if (notAllowedStates.size() > 0) {
+ String msg = "There are VMs in starting/stopping/migrating/error/unknown state, not allowing rolling maintenance in the cluster";
+ HostSkipped skipped = new HostSkipped(host, msg);
+ hostsSkipped.add(skipped);
+ return false;
+ }
+ }
+ return true;
+ }
+
+ /**
+ * Start rolling maintenance for a single host
+ * @return tuple: (FAIL, SKIP, DETAILS), where:
+ * - FAIL: True if rolling maintenance must fail
+ * - SKIP: True if host must be skipped
+ * - DETAILS: Information retrieved by the host
+ */
+ private Ternary<Boolean, Boolean, String> startRollingMaintenanceHostInCluster(Cluster cluster, Host host, int timeout,
+ String payload, Boolean forced,
+ Map<Long, String> hostsToAvoidMaintenance,
+ List<HostUpdated> hostsUpdated,
+ List<HostSkipped> hostsSkipped) throws InterruptedException, AgentUnavailableException {
+ Ternary<Boolean, Boolean, String> result;
+ if (!isMaintenanceScriptDefinedOnHost(host, hostsSkipped)) {
+ String msg = "There is no maintenance script on the host";
+ hostsSkipped.add(new HostSkipped(host, msg));
+ return new Ternary<>(false, true, msg);
+ }
+
+ result = performPreMaintenanceStageOnHost(host, timeout, payload, forced, hostsToAvoidMaintenance, hostsSkipped);
+ if (result.first() || result.second()) {
+ return result;
+ }
+
+ if (isMaintenanceStageAvoided(host, hostsToAvoidMaintenance, hostsSkipped)) {
+ return new Ternary<>(false, true, "Maintenance stage must be avoided");
+ }
+
+ s_logger.debug("Updating capacity before re-checking capacity");
+ alertManager.recalculateCapacity();
+ result = reCheckCapacityBeforeMaintenanceOnHost(cluster, host, forced, hostsSkipped);
+ if (result.first() || result.second()) {
+ return result;
+ }
+
+ Date startTime = new Date();
+ putHostIntoMaintenance(host);
+ result = performMaintenanceStageOnHost(host, timeout, payload, forced, hostsToAvoidMaintenance, hostsSkipped);
+ if (result.first() || result.second()) {
+ cancelHostMaintenance(host);
+ return result;
+ }
+ cancelHostMaintenance(host);
+ Date endTime = new Date();
+
+ HostUpdated hostUpdated = new HostUpdated(host, startTime, endTime, result.third());
+ hostsUpdated.add(hostUpdated);
+
+ result = performPostMaintenanceStageOnHost(host, timeout, payload, forced, hostsToAvoidMaintenance, hostsSkipped);
+ if (result.first() || result.second()) {
+ return result;
+ }
+ return new Ternary<>(false, false, "Completed rolling maintenance on host " + host.getUuid());
+ }
+
+ /**
+ * Perform Post-Maintenance stage on host
+ * @return tuple: (FAIL, SKIP, DETAILS), where:
+ * - FAIL: True if rolling maintenance must fail
+ * - SKIP: True if host must be skipped
+ * - DETAILS: Information retrieved by the host after executing the stage
+ * @throws InterruptedException
+ */
+ private Ternary<Boolean, Boolean, String> performPostMaintenanceStageOnHost(Host host, int timeout, String payload, Boolean forced, Map<Long, String> hostsToAvoidMaintenance, List<HostSkipped> hostsSkipped) throws InterruptedException {
+ Ternary<Boolean, String, Boolean> result = performStageOnHost(host, Stage.PostMaintenance, timeout, payload, forced);
+ if (!result.first()) {
+ if (forced) {
+ String msg = "Post-maintenance script failed: " + result.second();
+ hostsSkipped.add(new HostSkipped(host, msg));
+ return new Ternary<>(true, true, msg);
+ }
+ return new Ternary<>(true, false, result.second());
+ }
+ return new Ternary<>(false, false, result.second());
+ }
+
+ /**
+ * Cancel maintenance mode on host
+ * @param host host
+ */
+ private void cancelHostMaintenance(Host host) {
+ if (!resourceManager.cancelMaintenance(host.getId())) {
+ String message = "Could not cancel maintenance on host " + host.getUuid();
+ s_logger.error(message);
+ throw new CloudRuntimeException(message);
+ }
+ }
+
+ /**
+ * Perform Maintenance stage on host
+ * @return tuple: (FAIL, SKIP, DETAILS), where:
+ * - FAIL: True if rolling maintenance must fail
+ * - SKIP: True if host must be skipped
+ * - DETAILS: Information retrieved by the host after executing the stage
+ * @throws InterruptedException
+ */
+ private Ternary<Boolean, Boolean, String> performMaintenanceStageOnHost(Host host, int timeout, String payload, Boolean forced, Map<Long, String> hostsToAvoidMaintenance, List<HostSkipped> hostsSkipped) throws InterruptedException {
+ Ternary<Boolean, String, Boolean> result = performStageOnHost(host, Stage.Maintenance, timeout, payload, forced);
+ if (!result.first()) {
+ if (forced) {
+ String msg = "Maintenance script failed: " + result.second();
+ hostsSkipped.add(new HostSkipped(host, msg));
+ return new Ternary<>(true, true, msg);
+ }
+ return new Ternary<>(true, false, result.second());
+ }
+ return new Ternary<>(false, false, result.second());
+ }
+
+ /**
+ * Puts host into maintenance and waits for its completion
+ * @param host host
+ * @throws InterruptedException
+ * @throws AgentUnavailableException
+ */
+ private void putHostIntoMaintenance(Host host) throws InterruptedException, AgentUnavailableException {
+ s_logger.debug("Trying to set the host " + host.getId() + " into maintenance");
+ PrepareForMaintenanceCmd cmd = new PrepareForMaintenanceCmd();
+ cmd.setId(host.getId());
+ resourceManager.maintain(cmd);
+ waitForHostInMaintenance(host.getId());
+ }
+
+ /**
+ * Enable back disabled cluster
+ * @param cluster cluster to enable if it has been disabled
+ * @param disabledClusters set of disabled clusters
+ */
+ private void enableClusterIfDisabled(Cluster cluster, Set<Long> disabledClusters) {
+ if (cluster.getAllocationState() == Grouping.AllocationState.Disabled && disabledClusters.contains(cluster.getId())) {
+ updateCluster(cluster.getId(), "Enabled");
+ }
+ }
+
+ /**
+ * Re-check capacity to ensure the host can transit into maintenance state
+ * @return tuple: (FAIL, SKIP, DETAILS), where:
+ * - FAIL: True if rolling maintenance must fail
+ * - SKIP: True if host must be skipped
+ * - DETAILS: Information retrieved after capacity checks
+ */
+ private Ternary<Boolean, Boolean, String> reCheckCapacityBeforeMaintenanceOnHost(Cluster cluster, Host host, Boolean forced, List<HostSkipped> hostsSkipped) {
+ Pair<Boolean, String> capacityCheckBeforeMaintenance = performCapacityChecksBeforeHostInMaintenance(host, cluster);
+ if (!capacityCheckBeforeMaintenance.first()) {
+ String errorMsg = "Capacity check failed for host " + host.getUuid() + ": " + capacityCheckBeforeMaintenance.second();
+ if (forced) {
+ s_logger.info("Skipping host " + host.getUuid() + " as: " + errorMsg);
+ hostsSkipped.add(new HostSkipped(host, errorMsg));
+ return new Ternary<>(true, true, capacityCheckBeforeMaintenance.second());
+ }
+ return new Ternary<>(true, false, capacityCheckBeforeMaintenance.second());
+ }
+ return new Ternary<>(false, false, capacityCheckBeforeMaintenance.second());
+ }
+
+ /**
+ * Indicates if the maintenance stage must be avoided
+ */
+ private boolean isMaintenanceStageAvoided(Host host, Map<Long, String> hostsToAvoidMaintenance, List<HostSkipped> hostsSkipped) {
+ if (hostsToAvoidMaintenance.containsKey(host.getId())) {
+ s_logger.debug("Host " + host.getId() + " is not being put into maintenance, skipping it");
+ HostSkipped hostSkipped = new HostSkipped(host, hostsToAvoidMaintenance.get(host.getId()));
+ hostsSkipped.add(hostSkipped);
+ return true;
+ }
+ return false;
+ }
+
+ /**
+ * Perform Pre-Maintenance stage on host
+ * @return tuple: (FAIL, SKIP, DETAILS), where:
+ * - FAIL: True if rolling maintenance must fail
+ * - SKIP: True if host must be skipped
+ * - DETAILS: Information retrieved by the host after executing the stage
+ * @throws InterruptedException
+ */
+ private Ternary<Boolean, Boolean, String> performPreMaintenanceStageOnHost(Host host, int timeout, String payload, Boolean forced,
+ Map<Long, String> hostsToAvoidMaintenance,
+ List<HostSkipped> hostsSkipped) throws InterruptedException {
+ Ternary<Boolean, String, Boolean> result = performStageOnHost(host, Stage.PreMaintenance, timeout, payload, forced);
+ if (!result.first()) {
+ if (forced) {
+ String msg = "Pre-maintenance script failed: " + result.second();
+ hostsSkipped.add(new HostSkipped(host, msg));
+ return new Ternary<>(true, true, result.second());
+ }
+ return new Ternary<>(true, false, result.second());
+ }
+ if (result.third() && !hostsToAvoidMaintenance.containsKey(host.getId())) {
+ s_logger.debug("Host " + host.getId() + " added to the avoid maintenance set");
+ hostsToAvoidMaintenance.put(host.getId(), "Pre-maintenance stage set to avoid maintenance");
+ }
+ return new Ternary<>(false, false, result.second());
+ }
+
+ /**
+ * Disable cluster (if hasn't been disabled yet)
+ * @param cluster cluster to disable
+ * @param disabledClusters set of disabled cluster ids. cluster is added if it is disabled
+ */
+ private void disableClusterIfEnabled(Cluster cluster, Set<Long> disabledClusters) {
+ if (cluster.getAllocationState() == Grouping.AllocationState.Enabled && !disabledClusters.contains(cluster.getId())) {
+ updateCluster(cluster.getId(), "Disabled");
+ disabledClusters.add(cluster.getId());
+ }
+ }
+
+ private boolean isMaintenanceScriptDefinedOnHost(Host host, List<HostSkipped> hostsSkipped) {
+ try {
+ RollingMaintenanceAnswer answer = (RollingMaintenanceAnswer) agentManager.send(host.getId(), new RollingMaintenanceCommand(true));
+ return answer.isMaintenaceScriptDefined();
+ } catch (AgentUnavailableException | OperationTimedoutException e) {
+ String msg = "Could not check for maintenance script on host " + host.getId() + " due to: " + e.getMessage();
+ s_logger.error(msg, e);
+ return false;
+ }
+ }
+
+ /**
+ * Execute stage on host
+ * @return tuple: (SUCCESS, DETAILS, AVOID_MAINTENANCE) where:
+ * - SUCCESS: True if stage is successfull
+ * - DETAILS: Information retrieved by the host after executing the stage
+ * - AVOID_MAINTENANCE: True if maintenance stage must be avoided
+ */
+ private Ternary<Boolean, String, Boolean> performStageOnHost(Host host, Stage stage, int timeout,
+ String payload, Boolean forced) throws InterruptedException {
+ Ternary<Boolean, String, Boolean> result = sendRollingMaintenanceCommandToHost(host, stage, timeout, payload);
+ if (!result.first() && !forced) {
+ throw new CloudRuntimeException("Stage: " + stage.toString() + " failed on host " + host.getUuid() + ": " + result.second());
+ }
+ return result;
+ }
+
+ /**
+ * Send rolling maintenance command to a host to perform a certain stage specified in cmd
+ * @return tuple: (SUCCESS, DETAILS, AVOID_MAINTENANCE) where:
+ * - SUCCESS: True if stage is successfull
+ * - DETAILS: Information retrieved by the host after executing the stage
+ * - AVOID_MAINTENANCE: True if maintenance stage must be avoided
+ */
+ private Ternary<Boolean, String, Boolean> sendRollingMaintenanceCommandToHost(Host host, Stage stage,
+ int timeout, String payload) throws InterruptedException {
+ boolean completed = false;
+ Answer answer = null;
+ long timeSpent = 0L;
+ long pingInterval = KvmRollingMaintenancePingInterval.value() * 1000L;
+ boolean avoidMaintenance = false;
+
+ RollingMaintenanceCommand cmd = new RollingMaintenanceCommand(stage.toString());
+ cmd.setWait(timeout);
+ cmd.setPayload(payload);
+
+ while (!completed && timeSpent < timeout * 1000L) {
+ try {
+ answer = agentManager.send(host.getId(), cmd);
+ } catch (AgentUnavailableException | OperationTimedoutException e) {
+ // Agent may be restarted on the scripts - continue polling until it is up
+ String msg = "Cannot send command to host: " + host.getId() + ", waiting " + pingInterval + "ms - " + e.getMessage();
+ s_logger.warn(msg);
+ cmd.setStarted(true);
+ Thread.sleep(pingInterval);
+ timeSpent += pingInterval;
+ continue;
+ }
+ cmd.setStarted(true);
+
+ RollingMaintenanceAnswer rollingMaintenanceAnswer = (RollingMaintenanceAnswer) answer;
+ completed = rollingMaintenanceAnswer.isFinished();
+ if (!completed) {
+ Thread.sleep(pingInterval);
+ timeSpent += pingInterval;
+ } else {
+ avoidMaintenance = rollingMaintenanceAnswer.isAvoidMaintenance();
+ }
+ }
+ if (timeSpent >= timeout * 1000L) {
+ return new Ternary<>(false,
+ "Timeout exceeded for rolling maintenance on host " + host.getUuid() + " and stage " + stage.toString(),
+ avoidMaintenance);
+ }
+ return new Ternary<>(answer.getResult(), answer.getDetails(), avoidMaintenance);
+ }
+
+ /**
+ * Pre flight checks on hosts
+ */
+ private void performPreFlightChecks(List<Host> hosts, int timeout, String payload, Boolean forced,
+ Map<Long, String> hostsToAvoidMaintenance) throws InterruptedException {
+ for (Host host : hosts) {
+ Ternary<Boolean, String, Boolean> result = performStageOnHost(host, Stage.PreFlight, timeout, payload, forced);
+ if (result.third() && !hostsToAvoidMaintenance.containsKey(host.getId())) {
+ s_logger.debug("Host " + host.getId() + " added to the avoid maintenance set");
+ hostsToAvoidMaintenance.put(host.getId(), "Pre-flight stage set to avoid maintenance");
+ }
+ }
+ }
+
+ /**
+ * Capacity checks on hosts
+ */
+ private void performCapacityChecks(Cluster cluster, List<Host> hosts, Boolean forced) {
+ for (Host host : hosts) {
+ Pair<Boolean, String> result = performCapacityChecksBeforeHostInMaintenance(host, cluster);
+ if (!result.first() && !forced) {
+ throw new CloudRuntimeException("Capacity check failed for host " + host.getUuid() + ": " + result.second());
+ }
+ }
+ }
+
+ /**
+ * Check if there is enough capacity for host to enter maintenance
+ */
+ private Pair<Boolean, String> performCapacityChecksBeforeHostInMaintenance(Host host, Cluster cluster) {
+ List<HostVO> hosts = hostDao.findByClusterId(cluster.getId());
+ List<Host> hostsInCluster = hosts.stream()
+ .filter(x -> x.getId() != host.getId() &&
+ x.getClusterId().equals(cluster.getId()) &&
+ x.getResourceState() == ResourceState.Enabled &&
+ x.getStatus() == Status.Up)
+ .collect(Collectors.toList());
+ if (CollectionUtils.isEmpty(hostsInCluster)) {
+ throw new CloudRuntimeException("No host available in cluster " + cluster.getUuid() + " (" + cluster.getName() + ") to support host " +
+ host.getUuid() + " (" + host.getName() + ") in maintenance");
+ }
+ List<VMInstanceVO> vmsRunning = vmInstanceDao.listByHostId(host.getId());
+ if (CollectionUtils.isEmpty(vmsRunning)) {
+ return new Pair<>(true, "OK");
+ }
+ List<String> hostTags = hostTagsDao.gethostTags(host.getId());
+
+ int sucessfullyCheckedVmMigrations = 0;
+ for (VMInstanceVO runningVM : vmsRunning) {
+ boolean canMigrateVm = false;
+ ServiceOfferingVO serviceOffering = serviceOfferingDao.findById(runningVM.getServiceOfferingId());
+ for (Host hostInCluster : hostsInCluster) {
+ if (!checkHostTags(hostTags, hostTagsDao.gethostTags(hostInCluster.getId()), serviceOffering.getHostTag())) {
+ s_logger.debug("Host tags mismatch between host " + host.getUuid() + " and host " + hostInCluster.getUuid() +
+ ". Skipping it from the capacity check");
+ continue;
+ }
+ DeployDestination deployDestination = new DeployDestination(null, null, null, host);
+ VirtualMachineProfileImpl vmProfile = new VirtualMachineProfileImpl(runningVM);
+ boolean affinityChecks = true;
+ for (AffinityGroupProcessor affinityProcessor : _affinityProcessors) {
+ affinityChecks = affinityChecks && affinityProcessor.check(vmProfile, deployDestination);
+ }
+ if (!affinityChecks) {
+ s_logger.debug("Affinity check failed between host " + host.getUuid() + " and host " + hostInCluster.getUuid() +
+ ". Skipping it from the capacity check");
+ continue;
+ }
+ boolean maxGuestLimit = capacityManager.checkIfHostReachMaxGuestLimit(host);
+ boolean hostHasCPUCapacity = capacityManager.checkIfHostHasCpuCapability(hostInCluster.getId(), serviceOffering.getCpu(), serviceOffering.getSpeed());
+ int cpuRequested = serviceOffering.getCpu() * serviceOffering.getSpeed();
+ long ramRequested = serviceOffering.getRamSize() * 1024L * 1024L;
+ ClusterDetailsVO clusterDetailsCpuOvercommit = clusterDetailsDao.findDetail(cluster.getId(), "cpuOvercommitRatio");
+ ClusterDetailsVO clusterDetailsRamOvercommmt = clusterDetailsDao.findDetail(cluster.getId(), "memoryOvercommitRatio");
+ Float cpuOvercommitRatio = Float.parseFloat(clusterDetailsCpuOvercommit.getValue());
+ Float memoryOvercommitRatio = Float.parseFloat(clusterDetailsRamOvercommmt.getValue());
+ boolean hostHasCapacity = capacityManager.checkIfHostHasCapacity(hostInCluster.getId(), cpuRequested, ramRequested, false,
+ cpuOvercommitRatio, memoryOvercommitRatio, false);
+ if (!maxGuestLimit && hostHasCPUCapacity && hostHasCapacity) {
+ canMigrateVm = true;
+ break;
+ }
+ }
+ if (!canMigrateVm) {
+ String msg = "VM " + runningVM.getUuid() + " cannot be migrated away from host " + host.getUuid() +
+ " to any other host in the cluster";
+ s_logger.error(msg);
+ return new Pair<>(false, msg);
+ }
+ sucessfullyCheckedVmMigrations++;
+ }
+ if (sucessfullyCheckedVmMigrations != vmsRunning.size()) {
+ return new Pair<>(false, "Host " + host.getId() + " cannot enter maintenance mode as capacity check failed for hosts in cluster " + cluster.getUuid());
+ }
+ return new Pair<>(true, "OK");
+ }
+
+ /**
+ * Check hosts tags
+ */
+ private boolean checkHostTags(List<String> hostTags, List<String> hostInClusterTags, String offeringTag) {
+ if (CollectionUtils.isEmpty(hostTags) && CollectionUtils.isEmpty(hostInClusterTags)) {
+ return true;
+ } else if ((CollectionUtils.isNotEmpty(hostTags) && CollectionUtils.isEmpty(hostInClusterTags)) ||
+ (CollectionUtils.isEmpty(hostTags) && CollectionUtils.isNotEmpty(hostInClusterTags))) {
+ return false;
+ } else {
+ return hostInClusterTags.contains(offeringTag);
+ }
+ }
+
+ /**
+ * Retrieve all the hosts in 'Up' state within the scope for starting rolling maintenance
+ */
+ protected Map<Long, List<Host>> getHostsByClusterForRollingMaintenance(ResourceType type, List<Long> ids) {
+ Set<Host> hosts = new HashSet<>();
+ List<HostVO> hostsInScope = null;
+ for (Long id : ids) {
+ if (type == ResourceType.Host) {
+ hostsInScope = Collections.singletonList(hostDao.findById(id));
+ } else if (type == ResourceType.Cluster) {
+ hostsInScope = hostDao.findByClusterId(id);
+ } else if (type == ResourceType.Pod) {
+ hostsInScope = hostDao.findByPodId(id);
+ } else if (type == ResourceType.Zone) {
+ hostsInScope = hostDao.findByDataCenterId(id);
+ }
+ List<HostVO> hostsUp = hostsInScope.stream()
+ .filter(x -> x.getHypervisorType() == Hypervisor.HypervisorType.KVM)
+ .collect(Collectors.toList());
+ hosts.addAll(hostsUp);
+ }
+ return hosts.stream().collect(Collectors.groupingBy(Host::getClusterId));
+ }
+
+ @Override
+ public Pair<ResourceType, List<Long>> getResourceTypeIdPair(StartRollingMaintenanceCmd cmd) {
+ return getResourceTypeAndIdPair(cmd.getPodIds(), cmd.getClusterIds(), cmd.getZoneIds(), cmd.getHostIds());
+ }
+
+ /*
+ Wait for to be in maintenance mode
+ */
+ private void waitForHostInMaintenance(long hostId) throws CloudRuntimeException, InterruptedException {
+ HostVO host = hostDao.findById(hostId);
+ long timeout = KvmRollingMaintenanceWaitForMaintenanceTimeout.value() * 1000L;
+ long timeSpent = 0;
+ long step = 30 * 1000L;
+ while (timeSpent < timeout && host.getResourceState() != ResourceState.Maintenance) {
+ Thread.sleep(step);
+ timeSpent += step;
+ host = hostDao.findById(hostId);
+ }
+
+ if (host.getResourceState() != ResourceState.Maintenance) {
+ String errorMsg = "Timeout: waited " + timeout + "ms for host " + host.getUuid() + "(" + host.getName() + ")" +
+ " to be in Maintenance state, but after timeout it is in " + host.getResourceState().toString() + " state";
+ s_logger.error(errorMsg);
+ throw new CloudRuntimeException(errorMsg);
+ }
+ s_logger.debug("Host " + host.getUuid() + "(" + host.getName() + ") is in maintenance");
+ }
+
+ @Override
+ public String getConfigComponentName() {
+ return RollingMaintenanceManagerImpl.class.getSimpleName();
+ }
+
+ @Override
+ public ConfigKey<?>[] getConfigKeys() {
+ return new ConfigKey<?>[] {KvmRollingMaintenanceStageTimeout, KvmRollingMaintenancePingInterval, KvmRollingMaintenanceWaitForMaintenanceTimeout};
+ }
+}
\ No newline at end of file
diff --git a/server/src/main/java/com/cloud/server/ManagementServerImpl.java b/server/src/main/java/com/cloud/server/ManagementServerImpl.java
index e02f016..ff29f1d 100644
--- a/server/src/main/java/com/cloud/server/ManagementServerImpl.java
+++ b/server/src/main/java/com/cloud/server/ManagementServerImpl.java
@@ -166,6 +166,7 @@ import org.apache.cloudstack.api.command.admin.resource.CleanVMReservationsCmd;
import org.apache.cloudstack.api.command.admin.resource.DeleteAlertsCmd;
import org.apache.cloudstack.api.command.admin.resource.ListAlertsCmd;
import org.apache.cloudstack.api.command.admin.resource.ListCapacityCmd;
+import org.apache.cloudstack.api.command.admin.resource.StartRollingMaintenanceCmd;
import org.apache.cloudstack.api.command.admin.resource.UploadCustomCertificateCmd;
import org.apache.cloudstack.api.command.admin.router.ConfigureOvsElementCmd;
import org.apache.cloudstack.api.command.admin.router.ConfigureVirtualRouterElementCmd;
@@ -3128,6 +3129,7 @@ public class ManagementServerImpl extends ManagerBase implements ManagementServe
cmdList.add(GetUploadParamsForIsoCmd.class);
cmdList.add(ListTemplateOVFProperties.class);
cmdList.add(GetRouterHealthCheckResultsCmd.class);
+ cmdList.add(StartRollingMaintenanceCmd.class);
// Out-of-band management APIs for admins
cmdList.add(EnableOutOfBandManagementForHostCmd.class);
diff --git a/server/src/main/resources/META-INF/cloudstack/core/spring-server-core-managers-context.xml b/server/src/main/resources/META-INF/cloudstack/core/spring-server-core-managers-context.xml
index 17a9b94..1c90a97 100644
--- a/server/src/main/resources/META-INF/cloudstack/core/spring-server-core-managers-context.xml
+++ b/server/src/main/resources/META-INF/cloudstack/core/spring-server-core-managers-context.xml
@@ -313,4 +313,8 @@
<constructor-arg name="timeout" value="10000" />
</bean>
+ <bean id="rollingMaintenanceManager" class="com.cloud.resource.RollingMaintenanceManagerImpl">
+ <property name="affinityGroupProcessors"
+ value="#{affinityProcessorsRegistry.registered}" />
+ </bean>
</beans>
diff --git a/server/src/test/java/com/cloud/resource/MockResourceManagerImpl.java b/server/src/test/java/com/cloud/resource/MockResourceManagerImpl.java
index 26cd820..8ce60df 100755
--- a/server/src/test/java/com/cloud/resource/MockResourceManagerImpl.java
+++ b/server/src/test/java/com/cloud/resource/MockResourceManagerImpl.java
@@ -622,6 +622,11 @@ public class MockResourceManagerImpl extends ManagerBase implements ResourceMana
}
@Override
+ public boolean cancelMaintenance(long hostId) {
+ return false;
+ }
+
+ @Override
public boolean isHostGpuEnabled(final long hostId) {
// TODO Auto-generated method stub
return false;
diff --git a/server/src/test/java/com/cloud/resource/RollingMaintenanceManagerImplTest.java b/server/src/test/java/com/cloud/resource/RollingMaintenanceManagerImplTest.java
new file mode 100644
index 0000000..ef0277f
--- /dev/null
+++ b/server/src/test/java/com/cloud/resource/RollingMaintenanceManagerImplTest.java
@@ -0,0 +1,167 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package com.cloud.resource;
+
+import com.cloud.host.Host;
+import com.cloud.host.HostVO;
+import com.cloud.host.Status;
+import com.cloud.host.dao.HostDao;
+import com.cloud.hypervisor.Hypervisor;
+import com.cloud.org.Cluster;
+import com.cloud.utils.exception.CloudRuntimeException;
+import org.junit.Assert;
+import org.junit.Before;
+import org.junit.Test;
+import org.mockito.InjectMocks;
+import org.mockito.Mock;
+import org.mockito.Mockito;
+import org.mockito.MockitoAnnotations;
+import org.mockito.Spy;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.List;
+import java.util.Map;
+
+public class RollingMaintenanceManagerImplTest {
+
+ @Mock
+ HostDao hostDao;
+ @Mock
+ HostVO host1;
+ @Mock
+ HostVO host2;
+ @Mock
+ HostVO host3;
+ @Mock
+ HostVO host4;
+ @Mock
+ Cluster cluster;
+
+ @Spy
+ @InjectMocks
+ private RollingMaintenanceManagerImpl manager = new RollingMaintenanceManagerImpl();
+
+ // Hosts in cluster 1
+ private static final long hostId1 = 1L;
+ private static final long hostId2 = 2L;
+
+ // Hosts in cluster 2
+ private static final long hostId3 = 3L;
+ private static final long hostId4 = 4L;
+
+ private static final long clusterId1 = 1L;
+ private static final long clusterId2 = 2L;
+
+ private static final long podId = 1L;
+ private static final long zoneId = 1L;
+
+ @Before
+ public void setup() throws Exception {
+ MockitoAnnotations.initMocks(this);
+ Mockito.when(hostDao.findByClusterId(clusterId1)).thenReturn(Arrays.asList(host1, host2));
+ Mockito.when(hostDao.findByClusterId(clusterId2)).thenReturn(Arrays.asList(host3, host4));
+ List<HostVO> hosts = Arrays.asList(host1, host2, host3, host4);
+ Mockito.when(hostDao.findByPodId(podId)).thenReturn(hosts);
+ Mockito.when(hostDao.findByDataCenterId(zoneId)).thenReturn(hosts);
+ for (HostVO host : hosts) {
+ Mockito.when(host.getHypervisorType()).thenReturn(Hypervisor.HypervisorType.KVM);
+ Mockito.when(host.getState()).thenReturn(Status.Up);
+ Mockito.when(host.isInMaintenanceStates()).thenReturn(false);
+ }
+ Mockito.when(host1.getClusterId()).thenReturn(clusterId1);
+ Mockito.when(host2.getClusterId()).thenReturn(clusterId1);
+
+ Mockito.when(host3.getClusterId()).thenReturn(clusterId2);
+ Mockito.when(host4.getClusterId()).thenReturn(clusterId2);
+
+ Mockito.when(hostDao.findById(hostId1)).thenReturn(host1);
+ Mockito.when(hostDao.findById(hostId2)).thenReturn(host2);
+ Mockito.when(hostDao.findById(hostId3)).thenReturn(host3);
+ Mockito.when(hostDao.findById(hostId4)).thenReturn(host4);
+
+ Mockito.when(host1.getStatus()).thenReturn(Status.Up);
+ Mockito.when(host2.getStatus()).thenReturn(Status.Up);
+ Mockito.when(host1.getResourceState()).thenReturn(ResourceState.Enabled);
+ Mockito.when(host2.getResourceState()).thenReturn(ResourceState.Enabled);
+ }
+
+ private void checkResults(Map<Long, List<Host>> result) {
+ Assert.assertEquals(2, result.size());
+ Assert.assertTrue(result.containsKey(clusterId1));
+ Assert.assertTrue(result.containsKey(clusterId2));
+ List<Host> cluster1Hosts = result.get(clusterId1);
+ List<Host> cluster2Hosts = result.get(clusterId2);
+ Assert.assertEquals(2, cluster1Hosts.size());
+ Assert.assertTrue(cluster1Hosts.contains(host1));
+ Assert.assertTrue(cluster1Hosts.contains(host2));
+ Assert.assertEquals(2, cluster2Hosts.size());
+ Assert.assertTrue(cluster2Hosts.contains(host3));
+ Assert.assertTrue(cluster2Hosts.contains(host4));
+ }
+
+ @Test
+ public void testGetHostsByClusterForRollingMaintenanceZoneScope() {
+ Map<Long, List<Host>> result = manager.getHostsByClusterForRollingMaintenance(RollingMaintenanceManager.ResourceType.Zone, Collections.singletonList(zoneId));
+ checkResults(result);
+ }
+
+ @Test
+ public void testGetHostsByClusterForRollingMaintenancePodScope() {
+ Map<Long, List<Host>> result = manager.getHostsByClusterForRollingMaintenance(RollingMaintenanceManager.ResourceType.Pod, Collections.singletonList(podId));
+ checkResults(result);
+ }
+
+ @Test
+ public void testGetHostsByClusterForRollingMaintenanceClusterScope() {
+ List<Long> clusterIds = Arrays.asList(clusterId1, clusterId2);
+ Map<Long, List<Host>> result = manager.getHostsByClusterForRollingMaintenance(RollingMaintenanceManager.ResourceType.Cluster, clusterIds);
+ checkResults(result);
+ }
+
+ @Test
+ public void testGetHostsByClusterForRollingMaintenanceHostScope() {
+ List<Long> hostIds = Arrays.asList(hostId1, hostId2, hostId3, hostId4);
+ Map<Long, List<Host>> result = manager.getHostsByClusterForRollingMaintenance(RollingMaintenanceManager.ResourceType.Host, hostIds);
+ checkResults(result);
+ }
+
+ @Test(expected = CloudRuntimeException.class)
+ public void testPerformStateChecksNotForce() {
+ List<Host> hosts = Arrays.asList(host1, host2);
+ Mockito.when(host1.getStatus()).thenReturn(Status.Error);
+ manager.performStateChecks(cluster, hosts, false, new ArrayList<>());
+ }
+
+ @Test
+ public void testPerformStateChecksForce() {
+ List<Host> hosts = new ArrayList<>();
+ hosts.add(host1);
+ hosts.add(host2);
+ Mockito.when(host1.getStatus()).thenReturn(Status.Error);
+ List<RollingMaintenanceManager.HostSkipped> skipped = new ArrayList<>();
+ manager.performStateChecks(cluster, hosts, true, skipped);
+
+ Assert.assertFalse(skipped.isEmpty());
+ Assert.assertEquals(1, skipped.size());
+ Assert.assertEquals(host1, skipped.get(0).getHost());
+
+ Assert.assertEquals(1, hosts.size());
+ }
+}
diff --git a/tools/apidoc/gen_toc.py b/tools/apidoc/gen_toc.py
index 068b685..ef98b13 100644
--- a/tools/apidoc/gen_toc.py
+++ b/tools/apidoc/gen_toc.py
@@ -193,7 +193,9 @@ known_categories = {
'Restore' : 'Backup and Recovery',
'UnmanagedInstance': 'Virtual Machine',
'KubernetesSupportedVersion': 'Kubernetes Service',
- 'KubernetesCluster': 'Kubernetes Service'
+ 'KubernetesCluster': 'Kubernetes Service',
+ 'UnmanagedInstance': 'Virtual Machine',
+ 'Rolling': 'Rolling Maintenance'
}
diff --git a/ui/css/cloudstack3.css b/ui/css/cloudstack3.css
index cc69a39..a2da335 100644
--- a/ui/css/cloudstack3.css
+++ b/ui/css/cloudstack3.css
@@ -12669,6 +12669,14 @@ div.ui-dialog div.autoscaler div.field-group div.form-container form div.form-it
background-position: -100px -614px;
}
+.startRollingMaintenance .icon {
+ background-position: -138px -65px;
+}
+
+.startRollingMaintenance:hover .icon {
+ background-position: -138px -65px;
+}
+
.addVlanRange .icon,
.addVmwareDc .icon {
background-position: -37px -62px;
diff --git a/ui/l10n/en.js b/ui/l10n/en.js
index 36f6d6d..d022e02 100644
--- a/ui/l10n/en.js
+++ b/ui/l10n/en.js
@@ -1669,6 +1669,9 @@ var dictionary = {
"label.start.lb.vm":"Start LB VM",
"label.start.port":"Start Port",
"label.start.reserved.system.IP":"Start Reserved system IP",
+"label.start.rolling.maintenance":"Start Rolling Maintenance",
+"label.start.rolling.maintenance.force":"Force",
+"label.start.rolling.maintenance.payload":"Payload",
"label.start.vlan":"Start VLAN",
"label.start.vxlan":"Start VXLAN",
"label.state":"State",
diff --git a/ui/scripts/system.js b/ui/scripts/system.js
index ea180fd..1e29500 100755
--- a/ui/scripts/system.js
+++ b/ui/scripts/system.js
@@ -258,6 +258,100 @@
return allowedActions;
};
+ var rollingMaintenanceAction = function(args) {
+ var isCluster = args.entity === 'clusters';
+ var isZone = args.entity === 'zones';
+ var isPod = args.entity === 'pods';
+ var isHost = args.entity === 'hosts';
+ var action = {
+ messages: {
+ notification: function(args) {
+ return 'label.start.rolling.maintenance';
+ }
+ },
+ label: 'label.start.rolling.maintenance',
+ addRow: 'false',
+ createForm: {
+ title: 'label.start.rolling.maintenance',
+ fields: {
+ timeout: {
+ label: 'label.timeout',
+ },
+ force: {
+ isBoolean: true,
+ label: 'label.start.rolling.maintenance.force'
+ },
+ payload: {
+ label: 'label.start.rolling.maintenance.payload'
+ }
+ }
+ },
+ action: function(args) {
+ var selectedIds;
+ if (isCluster) {
+ selectedIds = args.context.clusters.map(x => x.id);
+ } else if (isZone) {
+ selectedIds = args.context.physicalResources.map(x => x.id);
+ } else if (isPod) {
+ selectedIds = args.context.pods.map(x => x.id);
+ } else if (isHost) {
+ selectedIds = args.context.hosts.map(x => x.id);
+ }
+ var ids = selectedIds.join(',');
+ var data = {
+ force: args.data.force,
+ timeout: args.data.timeout,
+ payload: args.data.payload
+ };
+ if (isCluster) {
+ $.extend(data, {
+ clusterids : ids
+ });
+ } else if (isZone) {
+ $.extend(data, {
+ zoneids : ids
+ });
+ } else if (isPod) {
+ $.extend(data, {
+ podids : ids
+ });
+ } else if (isHost) {
+ $.extend(data, {
+ hostids : ids
+ });
+ }
+
+ $.ajax({
+ url: createURL("startRollingMaintenance"),
+ dataType: "json",
+ data: data,
+ async: true,
+ success: function (json) {
+ var item = json.startrollingmaintenanceresponse;
+ var jid = item.jobid;
+ args.response.success({
+ _custom: {
+ jobId: jid
+ }
+ });
+ }
+ });
+ },
+ notification: {
+ poll: pollAsyncJobResult
+ }
+ };
+
+ if (args && args.listView) {
+ $.extend(action, {
+ isHeader: true,
+ isMultiSelectAction: true
+ });
+ }
+
+ return action;
+ };
+
cloudStack.sections.system = {
title: 'label.menu.infrastructure',
id: 'system',
@@ -7666,6 +7760,7 @@
zones: {
id: 'physicalResources',
label: 'label.menu.physical.resources',
+ multiSelect: true,
fields: {
name: {
label: 'label.zone'
@@ -7755,12 +7850,65 @@
return 'label.metrics';
}
}
- }
+ },
+ startRollingMaintenance: rollingMaintenanceAction({ listView: true, entity: 'zones' })
},
detailView: {
isMaximized: true,
actions: {
+
+ startRollingMaintenance: {
+ label: 'label.start.rolling.maintenance',
+ textLabel: 'label.start.rolling.maintenance',
+ messages: {
+ notification: function (args) {
+ return 'label.start.rolling.maintenance';
+ }
+ },
+ createForm: {
+ title: 'label.start.rolling.maintenance',
+ fields: {
+ timeout: {
+ label: 'label.timeout',
+ },
+ force: {
+ isBoolean: true,
+ label: 'label.start.rolling.maintenance.force'
+ },
+ payload: {
+ label: 'label.start.rolling.maintenance.payload'
+ }
+ }
+ },
+ action: function (args) {
+ var data = {
+ zoneids: args.context.physicalResources[0].id,
+ force: args.data.force,
+ timeout: args.data.timeout,
+ payload: args.data.payload
+ };
+ $.ajax({
+ url: createURL("startRollingMaintenance"),
+ dataType: "json",
+ data: data,
+ async: true,
+ success: function (json) {
+ var item = json.rollingmaintenance;
+ args.response.success({
+ actionFilter: zoneActionfilter,
+ data: item
+ });
+ }
+ });
+ },
+ notification: {
+ poll: function (args) {
+ args.complete();
+ }
+ }
+ },
+
addVmwareDc: {
label: 'label.add.vmware.datacenter',
textLabel: 'label.add.vmware.datacenter',
@@ -13792,6 +13940,7 @@
listView: {
id: 'pods',
section: 'pods',
+ multiSelect: true,
fields: {
name: {
label: 'label.name'
@@ -14053,7 +14202,8 @@
return 'label.add.pod';
}
}
- }
+ },
+ startRollingMaintenance: rollingMaintenanceAction({ listView: true, entity: 'pods' })
},
detailView: {
@@ -14075,6 +14225,57 @@
return hiddenTabs;
},
actions: {
+ startRollingMaintenance: {
+ label: 'label.start.rolling.maintenance',
+ textLabel: 'label.start.rolling.maintenance',
+ messages: {
+ notification: function (args) {
+ return 'label.start.rolling.maintenance';
+ }
+ },
+ createForm: {
+ title: 'label.start.rolling.maintenance',
+ fields: {
+ timeout: {
+ label: 'label.timeout',
+ },
+ force: {
+ isBoolean: true,
+ label: 'label.start.rolling.maintenance.force'
+ },
+ payload: {
+ label: 'label.start.rolling.maintenance.payload'
+ }
+ }
+ },
+ action: function (args) {
+ var data = {
+ podids: args.context.pods[0].id,
+ force: args.data.force,
+ timeout: args.data.timeout,
+ payload: args.data.payload
+ };
+ $.ajax({
+ url: createURL("startRollingMaintenance"),
+ dataType: "json",
+ data: data,
+ async: true,
+ success: function (json) {
+ var item = json.rollingmaintenance;
+ args.response.success({
+ actionFilter: zoneActionfilter,
+ data: item
+ });
+ }
+ });
+ },
+ notification: {
+ poll: function (args) {
+ args.complete();
+ }
+ }
+ },
+
edit: {
label: 'label.edit',
action: function (args) {
@@ -14446,6 +14647,7 @@
listView: {
id: 'clusters',
section: 'clusters',
+ multiSelect: true,
fields: {
name: {
label: 'label.name'
@@ -15184,7 +15386,8 @@
return 'label.metrics';
}
}
- }
+ },
+ startRollingMaintenance: rollingMaintenanceAction({ listView: true, entity: 'clusters' })
},
detailView: {
@@ -15215,6 +15418,56 @@
actions: {
+ startRollingMaintenance: {
+ label: 'label.start.rolling.maintenance',
+ textLabel: 'label.start.rolling.maintenance',
+ messages: {
+ notification: function (args) {
+ return 'label.start.rolling.maintenance';
+ }
+ },
+ createForm: {
+ title: 'label.start.rolling.maintenance',
+ fields: {
+ timeout: {
+ label: 'label.timeout',
+ },
+ force: {
+ isBoolean: true,
+ label: 'label.start.rolling.maintenance.force'
+ },
+ payload: {
+ label: 'label.start.rolling.maintenance.payload'
+ }
+ }
+ },
+ action: function (args) {
+ var data = {
+ clusterids: args.context.clusters[0].id,
+ force: args.data.force,
+ timeout: args.data.timeout,
+ payload: args.data.payload
+ };
+ $.ajax({
+ url: createURL("startRollingMaintenance"),
+ dataType: "json",
+ data: data,
+ async: true,
+ success: function (json) {
+ var item = json.rollingmaintenance;
+ args.response.success({
+ actionFilter: zoneActionfilter,
+ data: item
+ });
+ }
+ });
+ },
+ notification: {
+ poll: function (args) {
+ args.complete();
+ }
+ }
+ },
edit: {
label: 'label.edit',
action: function (args) {
@@ -16002,6 +16255,7 @@
listView: {
section: 'hosts',
id: 'hosts',
+ multiSelect: true,
fields: {
name: {
label: 'label.name'
@@ -16697,7 +16951,8 @@
return 'label.metrics';
}
}
- }
+ },
+ startRollingMaintenance: rollingMaintenanceAction({ listView: true, entity: 'hosts' })
},
detailView: {
name: "Host details",
@@ -16706,6 +16961,56 @@
path: 'instances'
},
actions: {
+ startRollingMaintenance: {
+ label: 'label.start.rolling.maintenance',
+ textLabel: 'label.start.rolling.maintenance',
+ messages: {
+ notification: function (args) {
+ return 'label.start.rolling.maintenance';
+ }
+ },
+ createForm: {
+ title: 'label.start.rolling.maintenance',
+ fields: {
+ timeout: {
+ label: 'label.timeout',
+ },
+ force: {
+ isBoolean: true,
+ label: 'label.start.rolling.maintenance.force'
+ },
+ payload: {
+ label: 'label.start.rolling.maintenance.payload'
+ }
+ }
+ },
+ action: function (args) {
+ var data = {
+ hostids: args.context.hosts[0].id,
+ force: args.data.force,
+ timeout: args.data.timeout,
+ payload: args.data.payload
+ };
+ $.ajax({
+ url: createURL("startRollingMaintenance"),
+ dataType: "json",
+ data: data,
+ async: true,
+ success: function (json) {
+ var item = json.rollingmaintenance;
+ args.response.success({
+ actionFilter: zoneActionfilter,
+ data: item
+ });
+ }
+ });
+ },
+ notification: {
+ poll: function (args) {
+ args.complete();
+ }
+ }
+ },
edit: {
label: 'label.edit',
action: function (args) {
@@ -22173,6 +22478,7 @@
allowedActions.push("disableHA");
}
+ allowedActions.push("startRollingMaintenance");
return allowedActions;
}
@@ -22224,6 +22530,7 @@
//$("#tab_ipallocation, #add_iprange_button, #tab_network_device, #add_network_device_button").hide();
}
+ allowedActions.push("startRollingMaintenance");
return allowedActions;
}
@@ -22270,6 +22577,7 @@
allowedActions.push("disableHA");
}
+ allowedActions.push("startRollingMaintenance");
return allowedActions;
}
@@ -22292,12 +22600,16 @@
if (jsonObj.hypervisor == "KVM") {
allowedActions.push("secureKVMHost");
+ allowedActions.push("startRollingMaintenance");
}
} else if (jsonObj.resourcestate == "ErrorInMaintenance") {
allowedActions.push("edit");
allowedActions.push("enableMaintenanceMode");
allowedActions.push("cancelMaintenanceMode");
+ if (jsonObj.hypervisor == "KVM") {
+ allowedActions.push("startRollingMaintenance");
+ }
} else if (jsonObj.resourcestate == "PrepareForMaintenance" || jsonObj.resourcestate == 'ErrorInPrepareForMaintenance') {
allowedActions.push("edit");
allowedActions.push("cancelMaintenanceMode");
diff --git a/utils/src/main/java/com/cloud/utils/script/Script.java b/utils/src/main/java/com/cloud/utils/script/Script.java
index 35aa24b..13845cd 100644
--- a/utils/src/main/java/com/cloud/utils/script/Script.java
+++ b/utils/src/main/java/com/cloud/utils/script/Script.java
@@ -66,6 +66,10 @@ public class Script implements Callable<String> {
Process _process;
Thread _thread;
+ public boolean isTimeout() {
+ return _isTimeOut;
+ }
+
public int getExitValue() {
return _process.exitValue();
}