You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@fluo.apache.org by ar...@apache.org on 2020/01/21 18:32:39 UTC
[fluo-muchos] branch master updated: Implement terminate and wipe
actions for Azure clusters (#306)
This is an automated email from the ASF dual-hosted git repository.
arvindsh pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/fluo-muchos.git
The following commit(s) were added to refs/heads/master by this push:
new 337a80c Implement terminate and wipe actions for Azure clusters (#306)
337a80c is described below
commit 337a80c8257214cc04b765d5e454260aded982aa
Author: Shan <sh...@users.noreply.github.com>
AuthorDate: Tue Jan 21 13:32:29 2020 -0500
Implement terminate and wipe actions for Azure clusters (#306)
* Implement terminate and wipe actions for Azure clusters
Also adds wipe for Hub, Maven and Fluo YARN
* Added validation for hosts file.
* Moved azure specific code to azure.py
Organize Azure files to azure specific folder
---
README.md | 3 -
ansible/roles/azure/tasks/create_vmss.yml | 2 +
ansible/roles/azure/tasks/terminate_cluster.yml | 87 +++++++++++++++++++++++++
ansible/roles/azure/tasks/wipe_adlsg2.yml | 42 ++++++++++++
ansible/wipe.yml | 22 +++++++
lib/muchos/azure.py | 35 ++++++++++
lib/muchos/ec2.py | 2 +
lib/muchos/existing.py | 11 ++--
8 files changed, 197 insertions(+), 7 deletions(-)
diff --git a/README.md b/README.md
index 4dd1fcc..dc7a778 100644
--- a/README.md
+++ b/README.md
@@ -308,9 +308,6 @@ data on your cluster will be lost:
./bin/muchos terminate
-Note: The terminate command is currently unsupported for Azure based clusters. Instead, you should delete
-underlying Azure VMSS resources when you need to terminate the cluster.
-
## Automatic shutdown of clusters
With the default configuration, clusters will not shutdown automatically after a delay and the default
diff --git a/ansible/roles/azure/tasks/create_vmss.yml b/ansible/roles/azure/tasks/create_vmss.yml
index 9a6eb21..d7f5d08 100644
--- a/ansible/roles/azure/tasks/create_vmss.yml
+++ b/ansible/roles/azure/tasks/create_vmss.yml
@@ -95,6 +95,7 @@
- path: /home/{{admin_username}}/.ssh/authorized_keys
key_data: "{{ lookup('file', '~/.ssh/id_rsa.pub') }}"
os_disk_caching: ReadWrite
+ managed_disk_type: Standard_LRS
image:
offer: CentOS
publisher: OpenLogic
@@ -103,6 +104,7 @@
data_disks:
- lun: 0
disk_size_gb: 64
+ managed_disk_type: Standard_LRS
when: azure_proxy_host is defined and azure_proxy_host != '' and azure_proxy_host != None
# SECTION 3: Create the Azure VMSS for the nodes used by Muchos
diff --git a/ansible/roles/azure/tasks/terminate_cluster.yml b/ansible/roles/azure/tasks/terminate_cluster.yml
new file mode 100644
index 0000000..822656d
--- /dev/null
+++ b/ansible/roles/azure/tasks/terminate_cluster.yml
@@ -0,0 +1,87 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+---
+
+- hosts: localhost
+ tasks:
+ - name: Delete User Assigned Identity
+ azure_rm_resource:
+ resource_group: "{{ resource_group }}"
+ provider: ManagedIdentity
+ resource_type: userAssignedIdentities
+ resource_name: "{{ user_assigned_identity if user_assigned_identity !='' else vmss_name + '-ua-msi' }}"
+ api_version: '2018-11-30'
+ state: absent
+ when: use_adlsg2 == True
+
+ - name: Delete ADLS Gen2 storage Account
+ azure_rm_storageaccount:
+ resource_group: "{{ resource_group }}"
+ name: "{{ item.split('@')[1].split('.')[0] }}"
+ force_delete_nonempty: yes
+ state: absent
+ loop:
+ "{{ instance_volumes_preferred.split(',') }}"
+ when: use_adlsg2 == True
+
+ - name: Delete VM Scale Set
+ azure_rm_virtualmachinescaleset:
+ resource_group: "{{ resource_group }}"
+ name: "{{ vmss_name }}"
+ remove_on_absent: all
+ state: absent
+
+ - name: Delete azure proxy virtual machine if one was created
+ azure_rm_virtualmachine:
+ resource_group: "{{ resource_group }}"
+ name: "{{ azure_proxy_host }}"
+ remove_on_absent: all
+ state: absent
+ when: azure_proxy_host is defined and azure_proxy_host|length
+
+ - name: Delete corresponding network security group
+ azure_rm_securitygroup:
+ resource_group: "{{ resource_group }}"
+ name: "{{ azure_proxy_host }}-nsg"
+ state: absent
+ when: azure_proxy_host is defined and azure_proxy_host|length
+
+ - name: Get facts for virtual network
+ azure_rm_virtualnetwork_facts:
+ resource_group: "{{ resource_group }}"
+ name: "{{ vnet }}"
+ register: vnet_facts
+
+ - name: Delete corresponding virtual network if one was created
+ azure_rm_virtualnetwork:
+ resource_group: "{{ resource_group }}"
+ name: "{{ vnet }}"
+ state: absent
+ when: >
+ vnet_facts['ansible_facts']['azure_virtualnetworks']|length and
+ 'deployment_type' in vnet_facts['ansible_facts']['azure_virtualnetworks'][0]['tags'] and
+ vnet_facts['ansible_facts']['azure_virtualnetworks'][0]['tags']['deployment_type'] == 'muchos'
+
+ - name: Delete the resource group if it is empty
+ azure_rm_resourcegroup:
+ name: "{{ resource_group }}"
+ state: absent
+ retries: 30
+ delay: 15
+ register: resourcegroupresult
+ until: resourcegroupresult is not failed
diff --git a/ansible/roles/azure/tasks/wipe_adlsg2.yml b/ansible/roles/azure/tasks/wipe_adlsg2.yml
new file mode 100644
index 0000000..6278adc
--- /dev/null
+++ b/ansible/roles/azure/tasks/wipe_adlsg2.yml
@@ -0,0 +1,42 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+- hosts: localhost
+ tasks:
+ - name: Delete container/Filesystem on ADLS Gen2
+ azure_rm_storageblob:
+ resource_group: "{{ resource_group }}"
+ storage_account_name: "{{ item.split('@')[1].split('.')[0] }}"
+ container: "{{ item.split('@')[0].split('://')[1] }}"
+ state: absent
+ force: yes
+ loop:
+ "{{ instance_volumes_preferred.split(',') }}"
+ when: cluster_type == 'azure' and use_adlsg2 == True
+
+ - name: Create container/Filesystem on ADLS Gen2
+ azure_rm_storageblob:
+ resource_group: "{{ resource_group }}"
+ storage_account_name: "{{ item.split('@')[1].split('.')[0] }}"
+ container: "{{ item.split('@')[0].split('://')[1] }}"
+ retries: 20
+ delay: 30
+ register: result
+ until: result is succeeded and (result.changed == False or (result.changed == True and result.container|length > 0))
+ loop:
+ "{{ instance_volumes_preferred.split(',') }}"
+ when: cluster_type == 'azure' and use_adlsg2 == True
diff --git a/ansible/wipe.yml b/ansible/wipe.yml
index 186f443..2c048b1 100644
--- a/ansible/wipe.yml
+++ b/ansible/wipe.yml
@@ -27,7 +27,9 @@
file: path={{ default_data_dirs[0] }}/influxdb state=absent
- name: "wipe grafana db"
file: path=/var/lib/grafana/grafana.db state=absent
+
- import_playbook: kill.yml
+
- hosts: all
tasks:
- name: "wipe software installation dirs"
@@ -38,20 +40,40 @@
- "{{ accumulo_home }}"
- "{{ fluo_home }}"
- "{{ spark_home }}"
+ - "{{ maven_home }}"
+ - "{{ hub_home }}"
+ - "{{ fluo_yarn_home }}"
+
- hosts: hadoop
tasks:
- name: "wipe hadoop data"
file: path={{item}}/hadoop state=absent
with_items: "{{ worker_data_dirs }}"
+ when: cluster_type != 'azure'
- name: "remove hadoop logs"
shell: rm -rf {{ hadoop_home }}/logs/*
+ when: cluster_type != 'azure'
+
- hosts: zookeepers
tasks:
- name: "wipe zookeeper data"
file: path={{ default_data_dirs[0] }}/zookeeper state=absent
+ when: cluster_type != 'azure'
- name: "remove zookeeper logs"
file: path={{ zookeeper_home }}/zookeeper.out state=absent
+ when: cluster_type != 'azure'
+
- hosts: accumulo
tasks:
- name: "remove accumulo logs"
shell: rm -rf {{ accumulo_home }}/logs/*
+ when: cluster_type != 'azure'
+
+# Using rm -rf instead of Ansible file module to remove multiple levels of subfolder
+- hosts: all
+ tasks:
+ - name: "wipe data and log directories for Azure"
+ shell: rm -rf {{ item }}/*
+ loop:
+ "{{ worker_data_dirs }}"
+ when: cluster_type == 'azure'
diff --git a/lib/muchos/azure.py b/lib/muchos/azure.py
index c5d1cb6..075778b 100644
--- a/lib/muchos/azure.py
+++ b/lib/muchos/azure.py
@@ -54,6 +54,41 @@ class VmssCluster(ExistingCluster):
print('name:', vmss_status.name,
'\nprovisioning_state:', vmss_status.provisioning_state)
+ def terminate(self):
+ config = self.config
+ azure_config = dict(config.items("azure"))
+ azure_config["vmss_name"] = config.cluster_name
+ azure_config["deploy_path"] = config.deploy_path
+ azure_config = {k: VmssCluster._parse_config_value(v)
+ for k, v in azure_config.items()}
+ print("All of the Muchos resources provisioned in resource group '{0}'"
+ " will be deleted!".format(azure_config['resource_group']))
+
+ response = input("Do you want to continue? (y/n) ")
+ if response == "y":
+ subprocess.call(["ansible-playbook",
+ join(config.deploy_path,
+ "ansible/roles/azure/tasks/terminate_cluster.yml"),
+ "--extra-vars", json.dumps(azure_config)])
+ else:
+ print("Aborted termination")
+
+ def wipe(self):
+ self.execute_playbook("wipe.yml")
+ # Wipe ADLS Gen2 storage accounts if implemented
+ config = self.config
+ azure_config = dict(config.items("azure"))
+ azure_config["vmss_name"] = config.cluster_name
+ azure_config["cluster_type"] = config.get("general", "cluster_type")
+ azure_config["deploy_path"] = config.deploy_path
+ azure_config = {k: VmssCluster._parse_config_value(v)
+ for k, v in azure_config.items()}
+ retcode = subprocess.call(["ansible-playbook",
+ join(config.deploy_path, "ansible/roles/azure/tasks/wipe_adlsg2.yml"),
+ "--extra-vars", json.dumps(azure_config)])
+ if retcode != 0:
+ exit("ERROR - Command failed with return code of {0}".format(retcode))
+
def _parse_config_value(v):
if v.isdigit():
return int(v)
diff --git a/lib/muchos/ec2.py b/lib/muchos/ec2.py
index 2973d87..e7fc78c 100644
--- a/lib/muchos/ec2.py
+++ b/lib/muchos/ec2.py
@@ -239,6 +239,8 @@ class Ec2Cluster(ExistingCluster):
else:
print("Aborted termination")
+ def wipe(self):
+ self.execute_playbook("wipe.yml")
class Ec2ClusterTemplate(Ec2Cluster):
diff --git a/lib/muchos/existing.py b/lib/muchos/existing.py
index 523b2a7..c886531 100644
--- a/lib/muchos/existing.py
+++ b/lib/muchos/existing.py
@@ -230,12 +230,15 @@ class ExistingCluster:
self.setup()
elif action == 'ssh':
self.ssh()
- elif action in ('wipe', 'kill', 'cancel_shutdown'):
+ elif action == 'wipe':
+ if not isfile(self.config.hosts_path):
+ exit("Hosts file does not exist for cluster: " + self.config.hosts_path)
+ print("Killing all processes started by Muchos and wiping Muchos data from {0} cluster"
+ .format(self.config.cluster_name))
+ self.wipe()
+ elif action in ('kill', 'cancel_shutdown'):
if not isfile(self.config.hosts_path):
exit("Hosts file does not exist for cluster: " + self.config.hosts_path)
- if action == 'wipe':
- print("Killing all processes started by Muchos and wiping Muchos data from {0} cluster"
- .format(self.config.cluster_name))
elif action == 'kill':
print("Killing all processes started by Muchos on {0} cluster".format(self.config.cluster_name))
elif action == 'cancel_shutdown':