You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@cloudstack.apache.org by ml...@apache.org on 2014/03/05 20:25:29 UTC

git commit: updated refs/heads/master to e5449e2

Repository: cloudstack
Updated Branches:
  refs/heads/master 63e3eea79 -> e5449e29c


CLOUDSTACK-6203: KVM Migration fixes. Moved migration to a thread
so we can monitor it and potentially take action to make migration
complete if admin has defined such.


Project: http://git-wip-us.apache.org/repos/asf/cloudstack/repo
Commit: http://git-wip-us.apache.org/repos/asf/cloudstack/commit/e5449e29
Tree: http://git-wip-us.apache.org/repos/asf/cloudstack/tree/e5449e29
Diff: http://git-wip-us.apache.org/repos/asf/cloudstack/diff/e5449e29

Branch: refs/heads/master
Commit: e5449e29c9bdc03c93fe8eb361f32878c9a1b980
Parents: 63e3eea
Author: Marcus Sorensen <ma...@betterservers.com>
Authored: Wed Mar 5 12:24:04 2014 -0700
Committer: Marcus Sorensen <ma...@betterservers.com>
Committed: Wed Mar 5 12:24:04 2014 -0700

----------------------------------------------------------------------
 agent/conf/agent.properties                     | 11 +++
 .../kvm/resource/LibvirtComputingResource.java  | 97 ++++++++++++++++++--
 2 files changed, 101 insertions(+), 7 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/cloudstack/blob/e5449e29/agent/conf/agent.properties
----------------------------------------------------------------------
diff --git a/agent/conf/agent.properties b/agent/conf/agent.properties
index a7376b6..e7e3b9c 100644
--- a/agent/conf/agent.properties
+++ b/agent/conf/agent.properties
@@ -70,6 +70,17 @@ domr.scripts.dir=scripts/network/domr/kvm
 # In MegaBytes per second
 #vm.migrate.speed=0
 
+# set target downtime at end of livemigration, the 'hiccup' for final copy. Higher numbers
+# make livemigration easier, lower numbers may cause migration to never complete. Less than 1
+# means hypervisor default (20ms).
+#vm.migrate.downtime=0
+
+# Busy VMs may never finish migrating, depending on environment. When its available, we will
+# want to add support for autoconvergence migration flag which should fix this. Set an upper
+# limit in seconds for how long live migration should wait, at which point VM is paused and
+# migration will finish quickly.  Less than 1 means disabled.
+#vm.migrate.pauseafter=0
+
 # set the type of bridge used on the hypervisor, this defines what commands the resource 
 # will use to setup networking. Currently supported NATIVE, OPENVSWITCH
 #network.bridge.type=native

http://git-wip-us.apache.org/repos/asf/cloudstack/blob/e5449e29/plugins/hypervisors/kvm/src/com/cloud/hypervisor/kvm/resource/LibvirtComputingResource.java
----------------------------------------------------------------------
diff --git a/plugins/hypervisors/kvm/src/com/cloud/hypervisor/kvm/resource/LibvirtComputingResource.java b/plugins/hypervisors/kvm/src/com/cloud/hypervisor/kvm/resource/LibvirtComputingResource.java
index 1056bcf..3f6abfc 100755
--- a/plugins/hypervisors/kvm/src/com/cloud/hypervisor/kvm/resource/LibvirtComputingResource.java
+++ b/plugins/hypervisors/kvm/src/com/cloud/hypervisor/kvm/resource/LibvirtComputingResource.java
@@ -254,11 +254,14 @@ import java.util.Map;
 import java.util.Properties;
 import java.util.Set;
 import java.util.UUID;
+import java.util.concurrent.Callable;
 import java.util.concurrent.ConcurrentHashMap;
 import java.util.concurrent.ExecutionException;
 import java.util.concurrent.ExecutorService;
 import java.util.concurrent.Executors;
 import java.util.concurrent.Future;
+import java.util.concurrent.TimeoutException;
+import java.util.concurrent.TimeUnit;
 import java.util.regex.Matcher;
 import java.util.regex.Pattern;
 
@@ -306,6 +309,8 @@ public class LibvirtComputingResource extends ServerResourceBase implements Serv
     private String _pod;
     private String _clusterId;
     private int _migrateSpeed;
+    private int _migrateDowntime;
+    private int _migratePauseAfter;
 
     private long _hvVersion;
     private long _kernelVersion;
@@ -889,6 +894,12 @@ public class LibvirtComputingResource extends ServerResourceBase implements Serv
             _mountPoint = "/mnt";
         }
 
+        value = (String) params.get("vm.migrate.downtime");
+        _migrateDowntime = NumbersUtil.parseInt(value, -1);
+
+        value = (String) params.get("vm.migrate.pauseafter");
+        _migratePauseAfter = NumbersUtil.parseInt(value, -1);
+
         value = (String)params.get("vm.migrate.speed");
         _migrateSpeed = NumbersUtil.parseInt(value, -1);
         if (_migrateSpeed == -1) {
@@ -2986,7 +2997,7 @@ public class LibvirtComputingResource extends ServerResourceBase implements Serv
         Connect conn = null;
         String xmlDesc = null;
         try {
-            conn = LibvirtConnection.getConnectionByVmName(cmd.getVmName());
+            conn = LibvirtConnection.getConnectionByVmName(vmName);
             ifaces = getInterfaces(conn, vmName);
             disks = getDisks(conn, vmName);
             dm = conn.domainLookupByName(vmName);
@@ -3006,17 +3017,65 @@ public class LibvirtComputingResource extends ServerResourceBase implements Serv
             xmlDesc = dm.getXMLDesc(0).replace(_privateIp, cmd.getDestinationIp());
 
             dconn = new Connect("qemu+tcp://" + cmd.getDestinationIp() + "/system");
-            /*
-             * Hard code lm flag: VIR_MIGRATE_LIVE(1<<0)
-             */
-            destDomain = dm.migrate(dconn, (1 << 0), xmlDesc, vmName, "tcp:" + cmd.getDestinationIp(), _migrateSpeed);
 
-            for (DiskDef disk : disks) {
-                cleanupDisk(disk);
+            //run migration in thread so we can monitor it
+            s_logger.info("Live migration of instance " + vmName + " initiated");
+            ExecutorService executor = Executors.newFixedThreadPool(1);
+            Callable<Domain> worker = new MigrateKVMAsync(dm, dconn, vmName, cmd.getDestinationIp());
+            Future<Domain> migrateThread = executor.submit(worker);
+            executor.shutdown();
+            long sleeptime = 0;
+            while (!executor.isTerminated()) {
+                Thread.sleep(100);
+                sleeptime += 100;
+                if (sleeptime == 1000) { // wait 1s before attempting to set downtime on migration, since I don't know of a VIR_DOMAIN_MIGRATING state
+                    if (_migrateDowntime > 0 ) {
+                        try {
+                            int setDowntime = dm.migrateSetMaxDowntime(_migrateDowntime);
+                            if (setDowntime == 0 ) {
+                                s_logger.debug("Set max downtime for migration of " + vmName + " to " + String.valueOf(_migrateDowntime) + "ms");
+                            }
+                        } catch (LibvirtException e) {
+                            s_logger.debug("Failed to set max downtime for migration, perhaps migration completed? Error: " + e.getMessage());
+                        }
+                    }
+                }
+                if ((sleeptime % 1000) == 0) {
+                    s_logger.info("Waiting for migration of " + vmName + " to complete, waited " + sleeptime + "ms");
+                }
+
+                // pause vm if we meet the vm.migrate.pauseafter threshold and not already paused
+                if (_migratePauseAfter > 0 && sleeptime > _migratePauseAfter && dm.getInfo().state == DomainInfo.DomainState.VIR_DOMAIN_RUNNING ) {
+                    s_logger.info("Pausing VM " + vmName + " due to property vm.migrate.pauseafter setting to " + _migratePauseAfter+ "ms to complete migration");
+                    try {
+                        dm.suspend();
+                    } catch (LibvirtException e) {
+                        // pause could be racy if it attempts to pause right when vm is finished, simply warn
+                        s_logger.info("Failed to pause vm " + vmName + " : " + e.getMessage());
+                    }
+                }
+            }
+            s_logger.info("Migration thread for " + vmName + " is done");
+
+            destDomain = migrateThread.get(10, TimeUnit.SECONDS);
+
+            if (destDomain != null) {
+                for (DiskDef disk : disks) {
+                    cleanupDisk(disk);
+                }
             }
         } catch (LibvirtException e) {
             s_logger.debug("Can't migrate domain: " + e.getMessage());
             result = e.getMessage();
+        } catch (InterruptedException e) {
+            s_logger.debug("Interrupted while migrating domain: " + e.getMessage());
+            result = e.getMessage();
+        } catch (ExecutionException e) {
+            s_logger.debug("Failed to execute while migrating domain: " + e.getMessage());
+            result = e.getMessage();
+        } catch (TimeoutException e) {
+            s_logger.debug("Timed out while migrating domain: " + e.getMessage());
+            result = e.getMessage();
         } finally {
             try {
                 if (dm != null) {
@@ -3054,6 +3113,30 @@ public class LibvirtComputingResource extends ServerResourceBase implements Serv
         return new MigrateAnswer(cmd, result == null, result, null);
     }
 
+    private class MigrateKVMAsync implements Callable<Domain> {
+        Domain dm = null;
+        Connect dconn = null;
+        String vmName = "";
+        String destIp = "";
+
+        MigrateKVMAsync(Domain dm, Connect dconn, String vmName, String destIp) {
+            this.dm = dm;
+            this.dconn = dconn;
+            this.vmName = vmName;
+            this.destIp = destIp;
+        }
+
+        @Override
+        public Domain call() throws LibvirtException {
+            // set compression flag for migration if libvirt version supports it
+            if (dconn.getLibVirVersion() < 1003000) {
+                return dm.migrate(dconn, (1 << 0), vmName, "tcp:" + destIp, _migrateSpeed);
+            } else {
+                return dm.migrate(dconn, (1 << 0)|(1 << 11), vmName, "tcp:" + destIp, _migrateSpeed);
+            }
+        }
+    }
+
     private synchronized Answer execute(PrepareForMigrationCommand cmd) {
 
         VirtualMachineTO vm = cmd.getVirtualMachine();