You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@fluo.apache.org by ar...@apache.org on 2020/01/21 18:32:39 UTC

[fluo-muchos] branch master updated: Implement terminate and wipe actions for Azure clusters (#306)

This is an automated email from the ASF dual-hosted git repository.

arvindsh pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/fluo-muchos.git


The following commit(s) were added to refs/heads/master by this push:
     new 337a80c  Implement terminate and wipe actions for Azure clusters (#306)
337a80c is described below

commit 337a80c8257214cc04b765d5e454260aded982aa
Author: Shan <sh...@users.noreply.github.com>
AuthorDate: Tue Jan 21 13:32:29 2020 -0500

    Implement terminate and wipe actions for Azure clusters (#306)
    
    * Implement terminate and wipe actions for Azure clusters
    Also adds wipe for Hub, Maven and Fluo YARN
    
    * Added validation for hosts file.
    
    * Moved azure specific code to azure.py
    Organize Azure files to azure specific folder
---
 README.md                                       |  3 -
 ansible/roles/azure/tasks/create_vmss.yml       |  2 +
 ansible/roles/azure/tasks/terminate_cluster.yml | 87 +++++++++++++++++++++++++
 ansible/roles/azure/tasks/wipe_adlsg2.yml       | 42 ++++++++++++
 ansible/wipe.yml                                | 22 +++++++
 lib/muchos/azure.py                             | 35 ++++++++++
 lib/muchos/ec2.py                               |  2 +
 lib/muchos/existing.py                          | 11 ++--
 8 files changed, 197 insertions(+), 7 deletions(-)

diff --git a/README.md b/README.md
index 4dd1fcc..dc7a778 100644
--- a/README.md
+++ b/README.md
@@ -308,9 +308,6 @@ data on your cluster will be lost:
 
     ./bin/muchos terminate
 
-Note: The terminate command is currently unsupported for Azure based clusters. Instead, you should delete
-underlying Azure VMSS resources when you need to terminate the cluster.
-
 ## Automatic shutdown of clusters
 
 With the default configuration, clusters will not shutdown automatically after a delay and the default
diff --git a/ansible/roles/azure/tasks/create_vmss.yml b/ansible/roles/azure/tasks/create_vmss.yml
index 9a6eb21..d7f5d08 100644
--- a/ansible/roles/azure/tasks/create_vmss.yml
+++ b/ansible/roles/azure/tasks/create_vmss.yml
@@ -95,6 +95,7 @@
       - path: /home/{{admin_username}}/.ssh/authorized_keys
         key_data: "{{ lookup('file', '~/.ssh/id_rsa.pub') }}"
     os_disk_caching: ReadWrite
+    managed_disk_type: Standard_LRS
     image:
       offer: CentOS
       publisher: OpenLogic
@@ -103,6 +104,7 @@
     data_disks:
      - lun: 0
        disk_size_gb: 64
+       managed_disk_type: Standard_LRS
   when: azure_proxy_host is defined and azure_proxy_host != '' and azure_proxy_host != None
 
 # SECTION 3: Create the Azure VMSS for the nodes used by Muchos
diff --git a/ansible/roles/azure/tasks/terminate_cluster.yml b/ansible/roles/azure/tasks/terminate_cluster.yml
new file mode 100644
index 0000000..822656d
--- /dev/null
+++ b/ansible/roles/azure/tasks/terminate_cluster.yml
@@ -0,0 +1,87 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+---
+
+- hosts: localhost
+  tasks:
+    - name: Delete User Assigned Identity
+      azure_rm_resource:
+         resource_group: "{{ resource_group }}"
+         provider: ManagedIdentity
+         resource_type: userAssignedIdentities
+         resource_name:  "{{ user_assigned_identity if user_assigned_identity !='' else vmss_name + '-ua-msi' }}"
+         api_version: '2018-11-30'
+         state: absent
+      when: use_adlsg2 == True
+
+    - name: Delete ADLS Gen2 storage Account
+      azure_rm_storageaccount:
+         resource_group: "{{ resource_group }}"
+         name:  "{{ item.split('@')[1].split('.')[0] }}"
+         force_delete_nonempty: yes
+         state: absent
+      loop:
+         "{{ instance_volumes_preferred.split(',') }}"
+      when: use_adlsg2 == True
+
+    - name: Delete VM Scale Set
+      azure_rm_virtualmachinescaleset:
+        resource_group: "{{ resource_group }}"
+        name: "{{ vmss_name }}"
+        remove_on_absent: all
+        state: absent
+
+    - name: Delete azure proxy virtual machine if one was created
+      azure_rm_virtualmachine:
+        resource_group: "{{ resource_group }}"
+        name: "{{ azure_proxy_host }}"
+        remove_on_absent: all
+        state: absent
+      when: azure_proxy_host is defined and azure_proxy_host|length
+
+    - name: Delete corresponding network security group
+      azure_rm_securitygroup:
+        resource_group: "{{ resource_group }}"
+        name: "{{ azure_proxy_host }}-nsg"
+        state: absent
+      when: azure_proxy_host is defined and azure_proxy_host|length
+
+    - name: Get facts for virtual network
+      azure_rm_virtualnetwork_facts:
+        resource_group: "{{ resource_group }}"
+        name: "{{ vnet }}"
+      register: vnet_facts
+
+    - name: Delete corresponding virtual network if one was created
+      azure_rm_virtualnetwork:
+        resource_group: "{{ resource_group }}"
+        name: "{{ vnet }}"
+        state: absent
+      when: >
+        vnet_facts['ansible_facts']['azure_virtualnetworks']|length and
+        'deployment_type' in vnet_facts['ansible_facts']['azure_virtualnetworks'][0]['tags'] and
+        vnet_facts['ansible_facts']['azure_virtualnetworks'][0]['tags']['deployment_type'] == 'muchos'
+
+    - name: Delete the resource group if it is empty
+      azure_rm_resourcegroup:
+        name: "{{ resource_group }}"
+        state: absent
+      retries: 30
+      delay: 15
+      register: resourcegroupresult
+      until: resourcegroupresult is not failed
diff --git a/ansible/roles/azure/tasks/wipe_adlsg2.yml b/ansible/roles/azure/tasks/wipe_adlsg2.yml
new file mode 100644
index 0000000..6278adc
--- /dev/null
+++ b/ansible/roles/azure/tasks/wipe_adlsg2.yml
@@ -0,0 +1,42 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+- hosts: localhost
+  tasks:
+  - name: Delete container/Filesystem on ADLS Gen2
+    azure_rm_storageblob:
+      resource_group: "{{ resource_group }}"
+      storage_account_name:  "{{ item.split('@')[1].split('.')[0] }}"
+      container: "{{ item.split('@')[0].split('://')[1] }}"
+      state: absent
+      force: yes
+    loop:
+        "{{ instance_volumes_preferred.split(',') }}"
+    when: cluster_type == 'azure' and use_adlsg2 == True
+
+  - name: Create container/Filesystem on ADLS Gen2
+    azure_rm_storageblob:
+      resource_group: "{{ resource_group }}"
+      storage_account_name:  "{{ item.split('@')[1].split('.')[0] }}"
+      container: "{{ item.split('@')[0].split('://')[1] }}"
+    retries: 20
+    delay: 30
+    register: result
+    until: result is succeeded and (result.changed == False or (result.changed == True and result.container|length > 0))
+    loop:
+      "{{ instance_volumes_preferred.split(',')  }}"
+    when: cluster_type == 'azure' and use_adlsg2 == True
diff --git a/ansible/wipe.yml b/ansible/wipe.yml
index 186f443..2c048b1 100644
--- a/ansible/wipe.yml
+++ b/ansible/wipe.yml
@@ -27,7 +27,9 @@
     file: path={{ default_data_dirs[0] }}/influxdb state=absent
   - name: "wipe grafana db"
     file: path=/var/lib/grafana/grafana.db state=absent
+
 - import_playbook: kill.yml
+
 - hosts: all
   tasks:
   - name: "wipe software installation dirs"
@@ -38,20 +40,40 @@
       - "{{ accumulo_home }}"
       - "{{ fluo_home }}"
       - "{{ spark_home }}"
+      - "{{ maven_home }}"
+      - "{{ hub_home }}"
+      - "{{ fluo_yarn_home }}"
+
 - hosts: hadoop
   tasks:
   - name: "wipe hadoop data"
     file: path={{item}}/hadoop state=absent
     with_items: "{{ worker_data_dirs }}"
+    when: cluster_type != 'azure'
   - name: "remove hadoop logs"
     shell: rm -rf {{ hadoop_home }}/logs/*
+    when: cluster_type != 'azure'
+
 - hosts: zookeepers
   tasks:
   - name: "wipe zookeeper data"
     file: path={{ default_data_dirs[0] }}/zookeeper state=absent
+    when: cluster_type != 'azure'
   - name: "remove zookeeper logs"
     file: path={{ zookeeper_home }}/zookeeper.out state=absent
+    when: cluster_type != 'azure'
+
 - hosts: accumulo
   tasks:
   - name: "remove accumulo logs"
     shell: rm -rf {{ accumulo_home }}/logs/*
+    when: cluster_type != 'azure'
+
+# Using rm -rf instead of Ansible file module to remove multiple levels of subfolder
+- hosts: all
+  tasks:
+  - name: "wipe data and log directories for Azure"
+    shell: rm -rf {{ item }}/*
+    loop:
+      "{{ worker_data_dirs }}"
+    when: cluster_type == 'azure'
diff --git a/lib/muchos/azure.py b/lib/muchos/azure.py
index c5d1cb6..075778b 100644
--- a/lib/muchos/azure.py
+++ b/lib/muchos/azure.py
@@ -54,6 +54,41 @@ class VmssCluster(ExistingCluster):
         print('name:', vmss_status.name,
               '\nprovisioning_state:', vmss_status.provisioning_state)
 
+    def terminate(self):
+        config = self.config
+        azure_config = dict(config.items("azure"))
+        azure_config["vmss_name"] = config.cluster_name
+        azure_config["deploy_path"] = config.deploy_path
+        azure_config = {k:  VmssCluster._parse_config_value(v)
+                        for k, v in azure_config.items()}
+        print("All of the Muchos resources provisioned in resource group '{0}'"
+              " will be deleted!".format(azure_config['resource_group']))
+
+        response = input("Do you want to continue? (y/n) ")
+        if response == "y":
+            subprocess.call(["ansible-playbook",
+                             join(config.deploy_path,
+                                  "ansible/roles/azure/tasks/terminate_cluster.yml"),
+                             "--extra-vars", json.dumps(azure_config)])
+        else:
+            print("Aborted termination")
+
+    def wipe(self):
+        self.execute_playbook("wipe.yml")
+        # Wipe ADLS Gen2 storage accounts if implemented
+        config = self.config
+        azure_config = dict(config.items("azure"))
+        azure_config["vmss_name"] = config.cluster_name
+        azure_config["cluster_type"] = config.get("general", "cluster_type")
+        azure_config["deploy_path"] = config.deploy_path
+        azure_config = {k:  VmssCluster._parse_config_value(v)
+                        for k, v in azure_config.items()}
+        retcode = subprocess.call(["ansible-playbook",
+                         join(config.deploy_path, "ansible/roles/azure/tasks/wipe_adlsg2.yml"),
+                         "--extra-vars", json.dumps(azure_config)])
+        if retcode != 0:
+            exit("ERROR - Command failed with return code of {0}".format(retcode))
+
     def _parse_config_value(v):
         if v.isdigit():
             return int(v)
diff --git a/lib/muchos/ec2.py b/lib/muchos/ec2.py
index 2973d87..e7fc78c 100644
--- a/lib/muchos/ec2.py
+++ b/lib/muchos/ec2.py
@@ -239,6 +239,8 @@ class Ec2Cluster(ExistingCluster):
         else:
             print("Aborted termination")
 
+    def wipe(self):
+        self.execute_playbook("wipe.yml")
 
 class Ec2ClusterTemplate(Ec2Cluster):
 
diff --git a/lib/muchos/existing.py b/lib/muchos/existing.py
index 523b2a7..c886531 100644
--- a/lib/muchos/existing.py
+++ b/lib/muchos/existing.py
@@ -230,12 +230,15 @@ class ExistingCluster:
             self.setup()
         elif action == 'ssh':
             self.ssh()
-        elif action in ('wipe', 'kill', 'cancel_shutdown'):
+        elif action == 'wipe':
+            if not isfile(self.config.hosts_path):
+                exit("Hosts file does not exist for cluster: " + self.config.hosts_path)
+            print("Killing all processes started by Muchos and wiping Muchos data from {0} cluster"
+                    .format(self.config.cluster_name))
+            self.wipe()
+        elif action in ('kill', 'cancel_shutdown'):
             if not isfile(self.config.hosts_path):
                 exit("Hosts file does not exist for cluster: " + self.config.hosts_path)
-            if action == 'wipe':
-                print("Killing all processes started by Muchos and wiping Muchos data from {0} cluster"
-                      .format(self.config.cluster_name))
             elif action == 'kill':
                 print("Killing all processes started by Muchos on {0} cluster".format(self.config.cluster_name))
             elif action == 'cancel_shutdown':