You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@cloudstack.apache.org by nv...@apache.org on 2021/09/15 15:38:45 UTC

[cloudstack] branch main updated: kvm: honor migrate.wait and abort vm migration job (#5388)

This is an automated email from the ASF dual-hosted git repository.

nvazquez pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/cloudstack.git


The following commit(s) were added to refs/heads/main by this push:
     new 95ef292  kvm: honor migrate.wait and abort vm migration job (#5388)
95ef292 is described below

commit 95ef292860457c1cbf08548b9e16cafcaa742b5c
Author: Wei Zhou <we...@apache.org>
AuthorDate: Wed Sep 15 17:38:16 2021 +0200

    kvm: honor migrate.wait and abort vm migration job (#5388)
    
    * kvm: honor migrate.wait and abort vm migration job
    
    * kvm: propogate migratewait to all cloudstack agents on kvm hosts
    
    * update #5388
    
    * update #5388: display error msg
---
 agent/conf/agent.properties                        |  6 +++++
 .../com/cloud/agent/manager/AgentManagerImpl.java  |  4 +++-
 .../kvm/resource/LibvirtComputingResource.java     | 16 +++++++++++++
 .../wrapper/LibvirtMigrateCommandWrapper.java      | 28 +++++++++++++++++++++-
 .../configuration/ConfigurationManagerImpl.java    |  4 +++-
 5 files changed, 55 insertions(+), 3 deletions(-)

diff --git a/agent/conf/agent.properties b/agent/conf/agent.properties
index f4ffd4b..dafe708 100644
--- a/agent/conf/agent.properties
+++ b/agent/conf/agent.properties
@@ -97,6 +97,12 @@ domr.scripts.dir=scripts/network/domr/kvm
 # migration will finish quickly.  Less than 1 means disabled.
 #vm.migrate.pauseafter=0
 
+# Time (in seconds) to wait for VM migrate finish. Less than 1 means disabled.
+# If vm migration is not finished in the time, the vm job will be cancelled by libvirt.
+# It will be configured by cloudstack management server when cloudstack agent connects.
+# please change the global setting 'migratewait' if needed (default value: 3600)
+#vm.migrate.wait=0
+
 # Agent hooks is the way to override default agent behavior to extend the functionality without excessive coding
 # for a custom deployment. The first hook promoted is libvirt-vm-xml-transformer which allows provider to modify
 # VM XML specification before send to libvirt. Hooks are implemented in Groovy and must be implemented in the way
diff --git a/engine/orchestration/src/main/java/com/cloud/agent/manager/AgentManagerImpl.java b/engine/orchestration/src/main/java/com/cloud/agent/manager/AgentManagerImpl.java
index f69f54c..c15edcf 100644
--- a/engine/orchestration/src/main/java/com/cloud/agent/manager/AgentManagerImpl.java
+++ b/engine/orchestration/src/main/java/com/cloud/agent/manager/AgentManagerImpl.java
@@ -38,6 +38,7 @@ import java.util.concurrent.locks.ReentrantLock;
 import javax.inject.Inject;
 import javax.naming.ConfigurationException;
 
+import com.cloud.configuration.Config;
 import com.cloud.utils.NumbersUtil;
 import org.apache.cloudstack.agent.lb.IndirectAgentLB;
 import org.apache.cloudstack.ca.CAManager;
@@ -1758,7 +1759,8 @@ public class AgentManagerImpl extends ManagerBase implements AgentManager, Handl
             if (cmd instanceof StartupRoutingCommand) {
                 if (((StartupRoutingCommand)cmd).getHypervisorType() == HypervisorType.KVM || ((StartupRoutingCommand)cmd).getHypervisorType() == HypervisorType.LXC) {
                     Map<String, String> params = new HashMap<String, String>();
-                    params.put("router.aggregation.command.each.timeout", _configDao.getValue("router.aggregation.command.each.timeout"));
+                    params.put(Config.RouterAggregationCommandEachTimeout.toString(), _configDao.getValue(Config.RouterAggregationCommandEachTimeout.toString()));
+                    params.put(Config.MigrateWait.toString(), _configDao.getValue(Config.MigrateWait.toString()));
 
                     try {
                         SetHostParamsCommand cmds = new SetHostParamsCommand(params);
diff --git a/plugins/hypervisors/kvm/src/main/java/com/cloud/hypervisor/kvm/resource/LibvirtComputingResource.java b/plugins/hypervisors/kvm/src/main/java/com/cloud/hypervisor/kvm/resource/LibvirtComputingResource.java
index 2394307..6620fc2 100644
--- a/plugins/hypervisors/kvm/src/main/java/com/cloud/hypervisor/kvm/resource/LibvirtComputingResource.java
+++ b/plugins/hypervisors/kvm/src/main/java/com/cloud/hypervisor/kvm/resource/LibvirtComputingResource.java
@@ -46,6 +46,7 @@ import javax.xml.parsers.DocumentBuilder;
 import javax.xml.parsers.DocumentBuilderFactory;
 import javax.xml.parsers.ParserConfigurationException;
 
+import com.cloud.configuration.Config;
 import org.apache.cloudstack.storage.configdrive.ConfigDrive;
 import org.apache.cloudstack.storage.to.PrimaryDataStoreTO;
 import org.apache.cloudstack.storage.to.TemplateObjectTO;
@@ -356,6 +357,7 @@ public class LibvirtComputingResource extends ServerResourceBase implements Serv
     protected int _migrateSpeed;
     protected int _migrateDowntime;
     protected int _migratePauseAfter;
+    protected int _migrateWait;
     protected boolean _diskActivityCheckEnabled;
     protected RollingMaintenanceExecutor rollingMaintenanceExecutor;
     protected long _diskActivityCheckFileSizeMin = 10485760; // 10MB
@@ -540,6 +542,10 @@ public class LibvirtComputingResource extends ServerResourceBase implements Serv
         return _migratePauseAfter;
     }
 
+    public int getMigrateWait() {
+        return _migrateWait;
+    }
+
     public int getMigrateSpeed() {
         return _migrateSpeed;
     }
@@ -1228,6 +1234,9 @@ public class LibvirtComputingResource extends ServerResourceBase implements Serv
         value = (String) params.get("vm.migrate.pauseafter");
         _migratePauseAfter = NumbersUtil.parseInt(value, -1);
 
+        value = (String) params.get("vm.migrate.wait");
+        _migrateWait = NumbersUtil.parseInt(value, -1);
+
         configureAgentHooks(params);
 
         value = (String)params.get("vm.migrate.speed");
@@ -1291,6 +1300,13 @@ public class LibvirtComputingResource extends ServerResourceBase implements Serv
             storage.persist("router.aggregation.command.each.timeout", String.valueOf(longValue));
         }
 
+        if (params.get(Config.MigrateWait.toString()) != null) {
+            String value = (String)params.get(Config.MigrateWait.toString());
+            Integer intValue = NumbersUtil.parseInt(value, -1);
+            storage.persist("vm.migrate.wait", String.valueOf(intValue));
+            _migrateWait = intValue;
+        }
+
         return true;
     }
 
diff --git a/plugins/hypervisors/kvm/src/main/java/com/cloud/hypervisor/kvm/resource/wrapper/LibvirtMigrateCommandWrapper.java b/plugins/hypervisors/kvm/src/main/java/com/cloud/hypervisor/kvm/resource/wrapper/LibvirtMigrateCommandWrapper.java
index a72d584..1ad1802 100644
--- a/plugins/hypervisors/kvm/src/main/java/com/cloud/hypervisor/kvm/resource/wrapper/LibvirtMigrateCommandWrapper.java
+++ b/plugins/hypervisors/kvm/src/main/java/com/cloud/hypervisor/kvm/resource/wrapper/LibvirtMigrateCommandWrapper.java
@@ -51,6 +51,7 @@ import org.apache.commons.lang.StringUtils;
 import org.apache.log4j.Logger;
 import org.libvirt.Connect;
 import org.libvirt.Domain;
+import org.libvirt.DomainJobInfo;
 import org.libvirt.DomainInfo.DomainState;
 import org.libvirt.LibvirtException;
 import org.libvirt.StorageVol;
@@ -219,6 +220,29 @@ public final class LibvirtMigrateCommandWrapper extends CommandWrapper<MigrateCo
                     s_logger.info("Waiting for migration of " + vmName + " to complete, waited " + sleeptime + "ms");
                 }
 
+                // abort the vm migration if the job is executed more than vm.migrate.wait
+                final int migrateWait = libvirtComputingResource.getMigrateWait();
+                if (migrateWait > 0 && sleeptime > migrateWait * 1000) {
+                    DomainState state = null;
+                    try {
+                        state = dm.getInfo().state;
+                    } catch (final LibvirtException e) {
+                        s_logger.info("Couldn't get VM domain state after " + sleeptime + "ms: " + e.getMessage());
+                    }
+                    if (state != null && state == DomainState.VIR_DOMAIN_RUNNING) {
+                        try {
+                            DomainJobInfo job = dm.getJobInfo();
+                            s_logger.info("Aborting " + vmName + " domain job: " + job);
+                            dm.abortJob();
+                            result = String.format("Migration of VM %s was cancelled by cloudstack due to time out after %d seconds", vmName, migrateWait);
+                            s_logger.debug(result);
+                            break;
+                        } catch (final LibvirtException e) {
+                            s_logger.info("Failed to abort the vm migration job of vm " + vmName + " : " + e.getMessage());
+                        }
+                    }
+                }
+
                 // pause vm if we meet the vm.migrate.pauseafter threshold and not already paused
                 final int migratePauseAfter = libvirtComputingResource.getMigratePauseAfter();
                 if (migratePauseAfter > 0 && sleeptime > migratePauseAfter) {
@@ -262,7 +286,9 @@ public final class LibvirtMigrateCommandWrapper extends CommandWrapper<MigrateCo
             | TransformerException
             | URISyntaxException e) {
             s_logger.debug(String.format("%s : %s", e.getClass().getSimpleName(), e.getMessage()));
-            result = "Exception during migrate: " + e.getMessage();
+            if (result == null) {
+                result = "Exception during migrate: " + e.getMessage();
+            }
         } finally {
             try {
                 if (dm != null && result != null) {
diff --git a/server/src/main/java/com/cloud/configuration/ConfigurationManagerImpl.java b/server/src/main/java/com/cloud/configuration/ConfigurationManagerImpl.java
index 4ccdf7e..773f168 100755
--- a/server/src/main/java/com/cloud/configuration/ConfigurationManagerImpl.java
+++ b/server/src/main/java/com/cloud/configuration/ConfigurationManagerImpl.java
@@ -543,9 +543,11 @@ public class ConfigurationManagerImpl extends ManagerBase implements Configurati
                 if (globalSettingUpdated.equals(ApiServiceConfiguration.ManagementServerAddresses.key()) ||
                         globalSettingUpdated.equals(IndirectAgentLBServiceImpl.IndirectAgentLBAlgorithm.key())) {
                     _indirectAgentLB.propagateMSListToAgents();
-                } else if (globalSettingUpdated.equals(Config.RouterAggregationCommandEachTimeout.toString())) {
+                } else if (globalSettingUpdated.equals(Config.RouterAggregationCommandEachTimeout.toString())
+                        ||  globalSettingUpdated.equals(Config.MigrateWait.toString())) {
                     Map<String, String> params = new HashMap<String, String>();
                     params.put(Config.RouterAggregationCommandEachTimeout.toString(), _configDao.getValue(Config.RouterAggregationCommandEachTimeout.toString()));
+                    params.put(Config.MigrateWait.toString(), _configDao.getValue(Config.MigrateWait.toString()));
                     _agentManager.propagateChangeToAgents(params);
                 }
             }