You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@fluo.apache.org by ar...@apache.org on 2021/08/16 17:03:16 UTC

[fluo-muchos] branch main updated: Add support for Azure Spot VM scale sets (#406)

This is an automated email from the ASF dual-hosted git repository.

arvindsh pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/fluo-muchos.git


The following commit(s) were added to refs/heads/main by this push:
     new 1305721  Add support for Azure Spot VM scale sets (#406)
1305721 is described below

commit 13057215ac76027e544530daac1b3bf0ce2cb226
Author: Arvind Shyamsundar <ar...@apache.org>
AuthorDate: Mon Aug 16 10:03:11 2021 -0700

    Add support for Azure Spot VM scale sets (#406)
    
    * Add VMSS priorty settings in example configuration files. By setting
      priority to `Low`, Azure Spot instances can be used for the cluster,
      thereby leading to potentially significant cost savings.
    * Update Azure VMSS documentation for the multiple VMSS scenario.
    * Update doc for running Muchos (for Azure clusters) on Ubuntu clients
      to clarify Python package installation.
    * Add additional validations to ensure only Azure Spot capable VM SKUs
      are used when the cluster is configured to use Azure Spot.
---
 ansible/roles/azure/tasks/create_multiple_vmss.yml |  1 +
 ansible/roles/azure/tasks/create_vmss.yml          |  1 +
 conf/azure_multiple_vmss_vars.yml.example          |  3 +++
 conf/muchos.props.example                          |  6 +++++
 docs/azure-multiple-vmss.md                        |  1 +
 docs/azure-ubuntu-1804.md                          | 11 +++++----
 lib/muchos/config/azure.py                         | 25 +++++++++++++++++++++
 lib/muchos/config/azurevalidations.py              | 26 ++++++++++++++++++++++
 8 files changed, 70 insertions(+), 4 deletions(-)

diff --git a/ansible/roles/azure/tasks/create_multiple_vmss.yml b/ansible/roles/azure/tasks/create_multiple_vmss.yml
index 906c43a..dd17caf 100644
--- a/ansible/roles/azure/tasks/create_multiple_vmss.yml
+++ b/ansible/roles/azure/tasks/create_multiple_vmss.yml
@@ -36,6 +36,7 @@
     location: "{{ location }}"
     name: "{{ vmss_name }}-{{ item.name_suffix }}"
     vm_size: "{{ item.sku }}"
+    priority: "{{ item.vmss_priority | default('Regular') }}"
     admin_username: "{{ cluster_user }}"
     ssh_password_enabled: false
     ssh_public_keys:
diff --git a/ansible/roles/azure/tasks/create_vmss.yml b/ansible/roles/azure/tasks/create_vmss.yml
index 2812f58..b520f18 100644
--- a/ansible/roles/azure/tasks/create_vmss.yml
+++ b/ansible/roles/azure/tasks/create_vmss.yml
@@ -45,6 +45,7 @@
     location: "{{ location }}"
     name: "{{ vmss_name }}"
     vm_size: "{{ vm_sku }}"
+    priority: "{{ vmss_priority | default('Regular') }}"
     admin_username: "{{ cluster_user }}"
     ssh_password_enabled: false
     ssh_public_keys:
diff --git a/conf/azure_multiple_vmss_vars.yml.example b/conf/azure_multiple_vmss_vars.yml.example
index 176c770..5d2f964 100644
--- a/conf/azure_multiple_vmss_vars.yml.example
+++ b/conf/azure_multiple_vmss_vars.yml.example
@@ -70,8 +70,11 @@ vars_list:
     roles:
       metrics: 1
       client: 1
+
+  # As an example, the below is also configured as an Azure Spot (Low-Priority) VMSS
   - name_suffix: vmss6
     sku: Standard_D8s_v3
+    vmss_priority: Low
     perf_profile: azd8s
     data_disk_count: 8
     data_disk_sku: Standard_LRS
diff --git a/conf/muchos.props.example b/conf/muchos.props.example
index 982e1f2..56a427f 100644
--- a/conf/muchos.props.example
+++ b/conf/muchos.props.example
@@ -116,6 +116,12 @@ subnet = subnet1
 subnet_cidr = 10.1.0.0/16
 #Optional. If set to True, will create multiple VMSS based on multiple_vmss_vars.yml
 use_multiple_vmss = False
+# The below "vmss_priority" controls whether Azure Spot is used (or not).
+# When vmss_priority is set to Low, the cluster uses Azure Spot VM Scale Sets.
+# When vmss_priority is set to Regular (which is the default), Azure Spot is NOT used.
+# Note that for multiple-VMSS deployments, this setting can be overridden at a 
+# per-VMSS level by specifying vmss_priority in the conf/azure_multiple_vmss_vars.yml file.
+vmss_priority = Regular
 # Azure image reference defined as a pipe-delimited string in the format offer|publisher|sku|version|
 # Please refer 'Launching an Azure cluster' section of the README before making changes
 azure_image_reference = CentOS|OpenLogic|7.5|latest|
diff --git a/docs/azure-multiple-vmss.md b/docs/azure-multiple-vmss.md
index 100febf..18d6818 100644
--- a/docs/azure-multiple-vmss.md
+++ b/docs/azure-multiple-vmss.md
@@ -22,6 +22,7 @@ Muchos provides a [sample file](../conf/azure_multiple_vmss_vars.yml.example) wh
 |-----------|------------------------|---------|-------------|
 | `name_suffix` | Required | - | The name of each VMSS is constructed by concatenating the Muchos cluster name with this string. As an example, if your Muchos cluster is called `test`, and this field has a value of `ldr`, then the VMSS is created with a name `test-ldr`|
 | `sku` | Required | - | A string identifier specifying the Azure VM size. Refer to the [Azure documentation](https://docs.microsoft.com/en-us/azure/virtual-machines/dv3-dsv3-series) to lookup these strings. An example VM size is `Standard_D32s_v3` for a 32-vCPU [Dsv3](https://docs.microsoft.com/en-us/azure/virtual-machines/dv3-dsv3-series#dsv3-series) VM|
+| `vmss_priority` | Optional | Regular | If this not specified at each VM level, the value for `vmss_priority` from the `azure` section in [muchos.props](../conf/muchos.props.example) is used | This can be set to `Regular`, for regular VMs, or `Low` for [Spot VMs](https://docs.microsoft.com/en-us/azure/virtual-machines/windows/spot-vms).|
 | `perf_profile` | Required | - | A string identifying a corresponding performance profile configuration section in muchos.props which contains perf profile parameters |
 | `azure_disk_device_path`| Optional | If not specified, the corresponding `azure_disk_device_path` value from the `azure` section in [muchos.props](../conf/muchos.props.example) is used | This is a device path used to enumerate attached SCSI or NVME disks to use for persistent local storage |
 | `azure_disk_device_pattern`| Optional | If not specified, the corresponding `azure_disk_device_pattern` value from the `azure` section in [muchos.props](../conf/muchos.props.example) is used | This is a device name wildcard pattern used (internally) in conjunction with `azure_disk_device_path` to enumerate attached SCSI or NVME disks to use for persistent local storage |
diff --git a/docs/azure-ubuntu-1804.md b/docs/azure-ubuntu-1804.md
index 1cdac95..d3573ea 100644
--- a/docs/azure-ubuntu-1804.md
+++ b/docs/azure-ubuntu-1804.md
@@ -10,14 +10,17 @@ the following steps can get you on your way.
 # https://docs.microsoft.com/en-us/cli/azure/install-azure-cli-apt?view=azure-cli-latest
 
 # Install Ansible Azure for Python 3.  The main reason these tips were written
-# was to save you time on the following steps.  Muchos uses Python 3.  If
-# python-pip and pip were installed and used, those would go against Python 2
+# was to save you time on the following steps.  Muchos needs Python 3.6 and above.
+# If python-pip and pip were installed and used, those would go against Python 2
 # and would not work.
 sudo apt install python3-pip
-sudo pip3 install ansible[azure]==2.9.13
+sudo pip3 install -r lib/requirements.txt
+# Current versions of Ansible separate out the Azure-specific modules into a
+# separate "collection". To install that, and associated pre-requisites, a helper
+# script has been provided. Please be sure to execute this script:
+./scripts/install-ansible-for-azure
 ```
 
 A virtual python environment is not needed in Ubuntu.  The instructions that
 mention that are targeted for Centos 7.  The version of Python 3 and pip3 that
 ship with Ubuntu 18.04 suffice.
-
diff --git a/lib/muchos/config/azure.py b/lib/muchos/config/azure.py
index 06e131e..817350a 100644
--- a/lib/muchos/config/azure.py
+++ b/lib/muchos/config/azure.py
@@ -308,6 +308,11 @@ class AzureDeployConfig(BaseConfig):
         return self.get("azure", "azure_proxy_host_vm_sku")
 
     @ansible_host_var
+    @is_valid(is_in(["Regular", "Low"]))
+    def vmss_priority(self):
+        return self.get("azure", "vmss_priority")
+
+    @ansible_host_var
     @default("Standard_LRS")
     @is_valid(is_in(["Standard_LRS", "Premium_LRS", "StandardSSD_LRS"]))
     def data_disk_sku(self):
@@ -356,6 +361,26 @@ class AzureDeployConfig(BaseConfig):
             )
         )
 
+    def spot_capable_skus(self):
+        return list(
+            map(
+                lambda r: r.name,
+                filter(
+                    lambda s: len(
+                        list(
+                            filter(
+                                lambda c: c.name == "LowPriorityCapable"
+                                and c.value == "True",
+                                s.capabilities,
+                            )
+                        )
+                    )
+                    > 0,
+                    self.vm_skus_for_location,
+                ),
+            )
+        )
+
     def max_data_disks_for_skus(self):
         n = list(map(lambda r: r.name, self.vm_skus_for_location))
         d = list(
diff --git a/lib/muchos/config/azurevalidations.py b/lib/muchos/config/azurevalidations.py
index 7fb69dd..cdbdc39 100644
--- a/lib/muchos/config/azurevalidations.py
+++ b/lib/muchos/config/azurevalidations.py
@@ -93,6 +93,32 @@ AZURE_VALIDATIONS = {
             "when use_multiple_vmss == True, any VMSS with sku "
             "must be a valid VM SKU for the selected location",
         ),
+        # Cannot specify Spot (Low Priority) if VMSS SKU is / are not capable
+        ConfigValidator(
+            lambda config, client: config.getboolean(
+                "azure", "use_multiple_vmss"
+            )
+            or not config.vmss_priority() == "Low"
+            or config.vm_sku() in config.spot_capable_skus(),
+            "azure.vm_sku must be an Azure Spot (low priority) capable VM SKU",
+        ),
+        ConfigValidator(
+            lambda config, client: not config.getboolean(
+                "azure", "use_multiple_vmss"
+            )
+            or all(
+                [
+                    vmss.get("sku") in config.spot_capable_skus()
+                    if vmss.get("vmss_priority") == "Low"
+                    else True
+                    for vmss in config.azure_multiple_vmss_vars.get(
+                        "vars_list", []
+                    )
+                ]
+            ),
+            "when use_multiple_vmss == True, any VMSS set to use Azure Spot "
+            "(low priority) must use an Azure Spot-capable VM SKU",
+        ),
         # data_disk_sku in
         # ['Standard_LRS', 'StandardSSD_LRS', Premium_LRS']
         ConfigValidator(