You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@cloudstack.apache.org by GitBox <gi...@apache.org> on 2021/07/23 13:52:27 UTC

[GitHub] [cloudstack] nvazquez commented on a change in pull request #5239: Externalize KVM Agent storage's timeout configuration

nvazquez commented on a change in pull request #5239:
URL: https://github.com/apache/cloudstack/pull/5239#discussion_r675579319



##########
File path: plugins/hypervisors/kvm/src/main/java/com/cloud/hypervisor/kvm/resource/KVMHAMonitor.java
##########
@@ -32,153 +33,143 @@
 import java.util.concurrent.ConcurrentHashMap;
 
 public class KVMHAMonitor extends KVMHABase implements Runnable {
+
     private static final Logger s_logger = Logger.getLogger(KVMHAMonitor.class);
-    private final Map<String, NfsStoragePool> _storagePool = new ConcurrentHashMap<String, NfsStoragePool>();
+    private final Map<String, NfsStoragePool> storagePool = new ConcurrentHashMap<>();
 
-    private final String _hostIP; /* private ip address */
+    private final String hostPrivateIp;
 
     public KVMHAMonitor(NfsStoragePool pool, String host, String scriptPath) {
         if (pool != null) {
-            _storagePool.put(pool._poolUUID, pool);
+            storagePool.put(pool._poolUUID, pool);
         }
-        _hostIP = host;
+        hostPrivateIp = host;
         configureHeartBeatPath(scriptPath);
+
+        _heartBeatUpdateTimeout = AgentPropertiesFileHandler.getPropertyValue(AgentProperties.HEARTBEAT_UPDATE_TIMEOUT);
     }
 
     private static synchronized void configureHeartBeatPath(String scriptPath) {
         KVMHABase.s_heartBeatPath = scriptPath;
     }
 
     public void addStoragePool(NfsStoragePool pool) {
-        synchronized (_storagePool) {
-            _storagePool.put(pool._poolUUID, pool);
+        synchronized (storagePool) {
+            storagePool.put(pool._poolUUID, pool);
         }
     }
 
     public void removeStoragePool(String uuid) {
-        synchronized (_storagePool) {
-            NfsStoragePool pool = _storagePool.get(uuid);
+        synchronized (storagePool) {
+            NfsStoragePool pool = storagePool.get(uuid);
             if (pool != null) {
                 Script.runSimpleBashScript("umount " + pool._mountDestPath);
-                _storagePool.remove(uuid);
+                storagePool.remove(uuid);
             }
         }
     }
 
     public List<NfsStoragePool> getStoragePools() {
-        synchronized (_storagePool) {
-            return new ArrayList<NfsStoragePool>(_storagePool.values());
+        synchronized (storagePool) {
+            return new ArrayList<>(storagePool.values());
         }
     }
 
     public NfsStoragePool getStoragePool(String uuid) {
-        synchronized (_storagePool) {
-            return _storagePool.get(uuid);
+        synchronized (storagePool) {
+            return storagePool.get(uuid);
         }
     }
 
-    private class Monitor extends ManagedContextRunnable {
+    protected void runHeartBeat() {
+        synchronized (storagePool) {
+            Set<String> removedPools = new HashSet<>();
+            for (String uuid : storagePool.keySet()) {
+                NfsStoragePool primaryStoragePool = storagePool.get(uuid);
+                StoragePool storage;
+                try {
+                    Connect conn = LibvirtConnection.getConnection();
+                    storage = conn.storagePoolLookupByUUIDString(uuid);
+                    if (storage == null || storage.getInfo().state != StoragePoolState.VIR_STORAGE_POOL_RUNNING) {
+                        if (storage == null) {
+                            s_logger.debug(String.format("Libvirt storage pool [%s] not found, removing from HA list.", uuid));
+                        } else {
+                            s_logger.debug(String.format("Libvirt storage pool [%s] found, but not running, removing from HA list.", uuid));
+                        }
 
-        @Override
-        protected void runInContext() {
-            synchronized (_storagePool) {
-                Set<String> removedPools = new HashSet<String>();
-                for (String uuid : _storagePool.keySet()) {
-                    NfsStoragePool primaryStoragePool = _storagePool.get(uuid);
+                        removedPools.add(uuid);
+                        continue;
+                    }
 
-                    // check for any that have been deregistered with libvirt and
-                    // skip,remove them
+                    s_logger.debug(String.format("Found NFS storage pool [%s] in libvirt, continuing.", uuid));
 
-                    StoragePool storage = null;
-                    try {
-                        Connect conn = LibvirtConnection.getConnection();
-                        storage = conn.storagePoolLookupByUUIDString(uuid);
-                        if (storage == null) {
-                            s_logger.debug("Libvirt storage pool " + uuid + " not found, removing from HA list");
-                            removedPools.add(uuid);
-                            continue;
+                } catch (LibvirtException e) {
+                    s_logger.debug(String.format("Failed to lookup libvirt storage pool [%s].", uuid), e);
 
-                        } else if (storage.getInfo().state != StoragePoolState.VIR_STORAGE_POOL_RUNNING) {
-                            s_logger.debug("Libvirt storage pool " + uuid + " found, but not running, removing from HA list");
+                    if (e.toString().contains("pool not found")) {
+                        s_logger.debug(String.format("Removing pool [%s] from HA monitor since it was deleted.", uuid));
+                        removedPools.add(uuid);
+                        continue;
+                    }
 
-                            removedPools.add(uuid);
-                            continue;
-                        }
-                        s_logger.debug("Found NFS storage pool " + uuid + " in libvirt, continuing");
+                }
 
-                    } catch (LibvirtException e) {
-                        s_logger.debug("Failed to lookup libvirt storage pool " + uuid + " due to: " + e);
+                String result = null;
+                for (int i = 1; i <= _heartBeatUpdateMaxTries; i++) {
+                    Script cmd = createHeartBeatCommand(primaryStoragePool);
+                    cmd.add("-h", hostPrivateIp);
+                    result = cmd.execute();
 
-                        // we only want to remove pool if it's not found, not if libvirt
-                        // connection fails
-                        if (e.toString().contains("pool not found")) {
-                            s_logger.debug("removing pool from HA monitor since it was deleted");
-                            removedPools.add(uuid);
-                            continue;
-                        }
-                    }
+                    s_logger.debug(String.format("The command (%s), to the pool [%s], has the result [%s].", cmd.toString(), uuid, result));
 
-                    String result = null;
-                    // Try multiple times, but sleep in between tries to ensure it isn't a short lived transient error
-                    for (int i = 1; i <= _heartBeatUpdateMaxTries; i++) {
-                        Script cmd = new Script(s_heartBeatPath, _heartBeatUpdateTimeout, s_logger);
-                        cmd.add("-i", primaryStoragePool._poolIp);
-                        cmd.add("-p", primaryStoragePool._poolMountSourcePath);
-                        cmd.add("-m", primaryStoragePool._mountDestPath);
-                        cmd.add("-h", _hostIP);
-                        result = cmd.execute();
-                        if (result != null) {
-                            s_logger.warn("write heartbeat failed: " + result + ", try: " + i + " of " + _heartBeatUpdateMaxTries);
-                            try {
-                                Thread.sleep(_heartBeatUpdateRetrySleep);
-                            } catch (InterruptedException e) {
-                                s_logger.debug("[ignored] interupted between heartbeat retries.");
-                            }
-                        } else {
-                            break;
+                    if (result != null) {
+                        s_logger.warn(String.format("Write heartbeat for pool [%s] failed: %s; try: %s of %s.", uuid, result, i, _heartBeatUpdateMaxTries));
+                        try {
+                            Thread.sleep(_heartBeatUpdateRetrySleep);
+                        } catch (InterruptedException e) {
+                            s_logger.debug("[IGNORED] Interrupted between heartbeat retries.", e);
                         }
+                    } else {
+                        break;
                     }
 
-                    if (result != null) {
-                        // Stop cloudstack-agent if can't write to heartbeat file.
-                        // This will raise an alert on the mgmt server
-                        s_logger.warn("write heartbeat failed: " + result + "; stopping cloudstack-agent");
-                        Script cmd = new Script(s_heartBeatPath, _heartBeatUpdateTimeout, s_logger);
-                        cmd.add("-i", primaryStoragePool._poolIp);
-                        cmd.add("-p", primaryStoragePool._poolMountSourcePath);
-                        cmd.add("-m", primaryStoragePool._mountDestPath);
-                        cmd.add("-c");
-                        result = cmd.execute();
-                    }
                 }
 
-                if (!removedPools.isEmpty()) {
-                    for (String uuid : removedPools) {
-                        removeStoragePool(uuid);
-                    }
+                if (result != null) {
+                    s_logger.warn(String.format("Write heartbeat for pool [%s] failed: %s; stopping cloudstack-agent.", uuid, result));
+                    Script cmd = createHeartBeatCommand(primaryStoragePool);
+                    cmd.add("-c");
+                    result = cmd.execute();
                 }
             }
 
+            if (!removedPools.isEmpty()) {
+                for (String uuid : removedPools) {
+                    removeStoragePool(uuid);
+                }
+            }
         }
+
+    }
+
+    private Script createHeartBeatCommand(NfsStoragePool primaryStoragePool) {

Review comment:
       Minor suggestion: what about adding the host IP and a boolean parameter, and add them to `cmd` in case the host IP is not null or the boolean is true?




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscribe@cloudstack.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org