You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@fluo.apache.org by mw...@apache.org on 2019/02/04 19:25:19 UTC
[fluo-muchos] branch master updated: More updates (#252)
This is an automated email from the ASF dual-hosted git repository.
mwalch pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/fluo-muchos.git
The following commit(s) were added to refs/heads/master by this push:
new 035a656 More updates (#252)
035a656 is described below
commit 035a656e5919ad19dff659f797ec8448e4e04b17
Author: Mike Walch <mw...@apache.org>
AuthorDate: Mon Feb 4 14:25:15 2019 -0500
More updates (#252)
* Add cluster_group to muchos.props
* Separated ec2 & existing commands
* Installation dir is configurable
* Installing Hub is configurable
---
ansible/common.yml | 5 ++++-
ansible/roles/common/tasks/{drives.yml => ec2.yml} | 21 ++++++++-----------
.../tasks/main.yml => common/tasks/existing.yml} | 10 +++------
ansible/roles/common/tasks/main.yml | 24 +++++++++++-----------
ansible/roles/common/tasks/os.yml | 2 +-
ansible/roles/common/tasks/ssh.yml | 6 +++---
ansible/roles/fluo/tasks/main.yml | 2 +-
ansible/roles/fluo_yarn/tasks/main.yml | 2 +-
ansible/roles/hadoop/tasks/main.yml | 2 +-
ansible/roles/spark/tasks/main.yml | 2 +-
conf/muchos.props.example | 11 +++++++---
lib/muchos/config.py | 11 +++++++---
lib/muchos/existing.py | 13 +++++-------
lib/tests/test_config.py | 3 ++-
14 files changed, 59 insertions(+), 55 deletions(-)
diff --git a/ansible/common.yml b/ansible/common.yml
index 95f7d85..ef26613 100644
--- a/ansible/common.yml
+++ b/ansible/common.yml
@@ -32,4 +32,7 @@
tasks:
- import_tasks: roles/common/tasks/ssh.yml
- import_tasks: roles/common/tasks/os.yml
- - import_tasks: roles/common/tasks/drives.yml
+ - import_tasks: roles/common/tasks/ec2.yml
+ when: cluster_type == 'ec2'
+ - import_tasks: roles/common/tasks/existing.yml
+ when: cluster_type == 'existing'
diff --git a/ansible/roles/common/tasks/drives.yml b/ansible/roles/common/tasks/ec2.yml
similarity index 79%
rename from ansible/roles/common/tasks/drives.yml
rename to ansible/roles/common/tasks/ec2.yml
index 4f3c315..63142c6 100644
--- a/ansible/roles/common/tasks/drives.yml
+++ b/ansible/roles/common/tasks/ec2.yml
@@ -15,26 +15,23 @@
# limitations under the License.
#
-- name: "unmount default drive at /mnt"
+- name: "ec2 - unmount default drive at /mnt"
mount: name=/mnt src=/dev/xvdb fstype=auto state=unmounted
- when: cluster_type == 'ec2'
-- name: "unmount all ephemeral"
+- name: "ec2 - unmount all ephemeral"
mount: name={{ item.0 }} src={{ item.1 }} fstype=auto state=unmounted
- when: cluster_type == 'ec2' and force_format == 'yes'
- with_together:
+ when: force_format == 'yes'
+ with_together:
- "{{ node_type_map[node_type].mounts }}"
- "{{ node_type_map[node_type].devices }}"
-- name: "format drives"
+- name: "ec2 - format drives"
filesystem: fstype={{ fstype }} dev={{ item }} force={{ force_format }}
- when: cluster_type == 'ec2'
with_items: "{{ node_type_map[node_type].devices }}"
-- name: "mount drives"
+- name: "ec2 - mount drives"
mount: name={{ item.0 }} src={{ item.1 }} fstype=auto state=mounted
opts=defaults,nofail,noatime,nodiratime,comment=cloudconfig passno=2
- when: cluster_type == 'ec2'
- with_together:
+ with_together:
- "{{ node_type_map[node_type].mounts }}"
- "{{ node_type_map[node_type].devices }}"
-- name: "ensure drives are owned by cluster user"
- file: path={{ item }} state=directory owner={{ cluster_user }} group={{ cluster_user }}
+- name: "ec2 - ensure drives are owned by cluster user"
+ file: path={{ item }} state=directory owner={{ cluster_user }} group={{ cluster_group }}
with_items: "{{ node_type_map[node_type].mounts }}"
diff --git a/ansible/roles/spark/tasks/main.yml b/ansible/roles/common/tasks/existing.yml
similarity index 69%
copy from ansible/roles/spark/tasks/main.yml
copy to ansible/roles/common/tasks/existing.yml
index c5e9272..15a882c 100644
--- a/ansible/roles/spark/tasks/main.yml
+++ b/ansible/roles/common/tasks/existing.yml
@@ -15,10 +15,6 @@
# limitations under the License.
#
-- name: "install spark tarball"
- unarchive: src={{ tarballs_dir }}/{{ spark_tarball }} dest={{ install_dir }} creates={{ spark_home }} copy=yes
-- name: "configure spark"
- template: src={{ item }} dest={{ spark_home }}/conf/{{ item }} owner={{ cluster_user }} group={{ cluster_user }}
- with_items:
- - spark-defaults.conf
- - spark-env.sh
+- name: "existing - verify data dirs exist and are owned by cluster user"
+ file: path={{ item }} state=directory owner={{ cluster_user }} group={{ cluster_group }}
+ with_items: "{{ default_data_dirs }}"
diff --git a/ansible/roles/common/tasks/main.yml b/ansible/roles/common/tasks/main.yml
index bea44ad..d409dd4 100644
--- a/ansible/roles/common/tasks/main.yml
+++ b/ansible/roles/common/tasks/main.yml
@@ -30,20 +30,20 @@
- java-1.8.0-openjdk-devel
state: present
- name: "configure node shutdown"
- shell: shutdown +{{ shutdown_delay_minutes }} &> {{ cluster_basedir }}/.shutdown creates={{ cluster_basedir}}/.shutdown
+ shell: shutdown +{{ shutdown_delay_minutes }} &> {{ user_home }}/.shutdown creates={{ user_home }}/.shutdown
when: shutdown_delay_minutes > 0
- name: "create install directory on all hosts"
- file: path={{ install_dir }} state=directory owner={{ cluster_user }} group={{ cluster_user }}
-- name: "install maven & hub"
- unarchive: src={{ tarballs_dir }}/{{ item.tarball }} dest={{ install_dir }} creates={{ item.install }}
- with_items:
- - { tarball: "{{ maven_tarball }}", install: "{{ maven_home }}" }
- - { tarball: "{{ hub_tarball }}", install: "{{ hub_home }}" }
-- name: "set correct owner & group for maven & hub home"
- file: path={{ item }} recurse=yes owner={{ cluster_user }} group={{ cluster_user}}
- with_items:
- - "{{ maven_home }}"
- - "{{ hub_home }}"
+ file: path={{ install_dir }} state=directory owner={{ cluster_user }} group={{ cluster_group }}
+- name: "install maven"
+ unarchive: src={{ tarballs_dir }}/{{ maven_tarball }} dest={{ install_dir }} creates={{ maven_home }}
+- name: "chown maven home"
+ file: path={{ maven_home }} recurse=yes owner={{ cluster_user }} group={{ cluster_group}}
+- name: "install hub"
+ unarchive: src={{ tarballs_dir }}/{{ hub_tarball }} dest={{ install_dir }} creates={{ hub_home }}
+ when: install_hub
+- name: "chown hub home"
+ file: path={{ hub_home }} recurse=yes owner={{ cluster_user }} group={{ cluster_group}}
+ when: install_hub
- name: "configure collectd"
template: src=collectd.conf dest=/etc/collectd.conf
when: "'metrics' in groups"
diff --git a/ansible/roles/common/tasks/os.yml b/ansible/roles/common/tasks/os.yml
index 0b6b183..8e6f02b 100644
--- a/ansible/roles/common/tasks/os.yml
+++ b/ansible/roles/common/tasks/os.yml
@@ -22,7 +22,7 @@
- name: "copy new limits.conf"
copy: src=roles/common/files/limits.conf dest=/etc/security/limits.conf
- name: "configure user shell"
- template: src=roles/common/templates/{{ item }} dest=/home/{{ cluster_user }}/.{{ item }} owner={{ cluster_user }} group={{ cluster_user }} mode=0644
+ template: src=roles/common/templates/{{ item }} dest=/home/{{ cluster_user }}/.{{ item }} owner={{ cluster_user }} group={{ cluster_group }} mode=0644
with_items:
- bashrc
- bash_profile
diff --git a/ansible/roles/common/tasks/ssh.yml b/ansible/roles/common/tasks/ssh.yml
index 61e1a59..0e2e2af 100644
--- a/ansible/roles/common/tasks/ssh.yml
+++ b/ansible/roles/common/tasks/ssh.yml
@@ -18,12 +18,12 @@
- name: "ensure cluster user exists but don't generate ssh key"
user: name={{ cluster_user }} generate_ssh_key=no state=present
- name: "copy cluster private key to all nodes"
- copy: src=/home/{{ cluster_user }}/.ssh/id_rsa dest=/home/{{ cluster_user }}/.ssh/id_rsa owner={{ cluster_user }} group={{ cluster_user }} mode=0600
+ copy: src=/home/{{ cluster_user }}/.ssh/id_rsa dest=/home/{{ cluster_user }}/.ssh/id_rsa owner={{ cluster_user }} group={{ cluster_group }} mode=0600
- name: "copy cluster public key to all nodes"
- copy: src=/home/{{ cluster_user }}/.ssh/id_rsa.pub dest=/home/{{ cluster_user }}/.ssh/id_rsa.pub owner={{ cluster_user }} group={{ cluster_user }} mode=0644
+ copy: src=/home/{{ cluster_user }}/.ssh/id_rsa.pub dest=/home/{{ cluster_user }}/.ssh/id_rsa.pub owner={{ cluster_user }} group={{ cluster_group }} mode=0644
- name: "add cluster user to ~/.ssh/authorized_keys"
authorized_key: user={{ cluster_user }} key="{{ lookup('file', '/home/' + cluster_user + '/.ssh/id_rsa.pub') }}"
- name: "add conf/keys to ~/.ssh/authorized_keys"
authorized_key: user={{ cluster_user }} key="{{ lookup('file', 'conf/keys') }}"
- name: "set ssh config"
- copy: src=roles/common/files/ssh_config dest=/home/{{ cluster_user}}/.ssh/config owner={{ cluster_user }} group={{ cluster_user }} mode=0600
+ copy: src=roles/common/files/ssh_config dest=/home/{{ cluster_user}}/.ssh/config owner={{ cluster_user }} group={{ cluster_group }} mode=0600
diff --git a/ansible/roles/fluo/tasks/main.yml b/ansible/roles/fluo/tasks/main.yml
index aa8062c..103b69e 100644
--- a/ansible/roles/fluo/tasks/main.yml
+++ b/ansible/roles/fluo/tasks/main.yml
@@ -38,4 +38,4 @@
- name: "fetch extra fluo dependencies"
command: "{{ fluo_home }}/lib/fetch.sh extra"
- name: "set correct owner & group"
- file: path={{ fluo_home }} recurse=yes owner={{ cluster_user }} group={{ cluster_user}}
+ file: path={{ fluo_home }} recurse=yes owner={{ cluster_user }} group={{ cluster_group}}
diff --git a/ansible/roles/fluo_yarn/tasks/main.yml b/ansible/roles/fluo_yarn/tasks/main.yml
index c0b956a..06543b5 100644
--- a/ansible/roles/fluo_yarn/tasks/main.yml
+++ b/ansible/roles/fluo_yarn/tasks/main.yml
@@ -22,4 +22,4 @@
- name: "fetch extra fluo-yarn dependencies"
command: "{{ fluo_yarn_home }}/lib/fetch.sh extra"
- name: "set correct owner & group"
- file: path={{ fluo_yarn_home }} recurse=yes owner={{ cluster_user }} group={{ cluster_user}}
+ file: path={{ fluo_yarn_home }} recurse=yes owner={{ cluster_user }} group={{ cluster_group }}
diff --git a/ansible/roles/hadoop/tasks/main.yml b/ansible/roles/hadoop/tasks/main.yml
index 310dab0..24214e7 100644
--- a/ansible/roles/hadoop/tasks/main.yml
+++ b/ansible/roles/hadoop/tasks/main.yml
@@ -38,7 +38,7 @@
command: cp {{ spark_home }}/yarn/spark-{{ spark_version }}-yarn-shuffle.jar {{ hadoop_home }}/share/hadoop/yarn/lib/ creates={{ hadoop_home }}/share/hadoop/yarn/lib/spark-{{ spark_version }}-yarn-shuffle.jar
when: "'spark' in groups"
- name: "setup hadoop short circuit socket dir"
- file: path=/var/lib/hadoop-hdfs state=directory owner={{ cluster_user }} group={{ cluster_user }} mode=0755
+ file: path=/var/lib/hadoop-hdfs state=directory owner={{ cluster_user }} group={{ cluster_group }} mode=0755
become: yes
- name: "Configure hadoop log dir"
replace:
diff --git a/ansible/roles/spark/tasks/main.yml b/ansible/roles/spark/tasks/main.yml
index c5e9272..4310754 100644
--- a/ansible/roles/spark/tasks/main.yml
+++ b/ansible/roles/spark/tasks/main.yml
@@ -18,7 +18,7 @@
- name: "install spark tarball"
unarchive: src={{ tarballs_dir }}/{{ spark_tarball }} dest={{ install_dir }} creates={{ spark_home }} copy=yes
- name: "configure spark"
- template: src={{ item }} dest={{ spark_home }}/conf/{{ item }} owner={{ cluster_user }} group={{ cluster_user }}
+ template: src={{ item }} dest={{ spark_home }}/conf/{{ item }} owner={{ cluster_user }} group={{ cluster_group }}
with_items:
- spark-defaults.conf
- spark-env.sh
diff --git a/conf/muchos.props.example b/conf/muchos.props.example
index 899beef..626cf1d 100644
--- a/conf/muchos.props.example
+++ b/conf/muchos.props.example
@@ -19,9 +19,12 @@ cluster_type = ec2
# Cluster user name (install command will SSH to cluster using this user)
# Leave default below if launching cluster in AWS
cluster_user = centos
-# Cluster base directory where install/ & tarballs/ directories are created
-# Leave default below if launching cluster in AWS
-cluster_basedir = /home/centos
+# Cluster user group
+cluster_group = %(cluster_user)s
+# Cluster user home directory
+user_home = /home/%(cluster_user)s
+# Install directory where Hadoop, Accumulo, etc will be installed
+install_dir = %(user_home)s/install
# Hostname of proxy node that Muchos will use to direct installation of cluster. Will be given
# public IP if launching in EC2. If not launching in EC2, node must have public IP that can be reached
# from your machine. Hostname can be chosen from "nodes" section below.
@@ -41,6 +44,8 @@ fluo_yarn_version = 1.0.0
accumulo_version = 2.0.0-alpha-1
# Specifies if software should be downloaded. If 'False', tarballs of the software above should be in conf/upload/
download_software = True
+# Install Hub (for GitHub)
+install_hub = True
[ec2]
# AWS machine image to use. The default below is for a CentOS 7 image (in us-east-1).
diff --git a/lib/muchos/config.py b/lib/muchos/config.py
index ace1532..8578dc8 100644
--- a/lib/muchos/config.py
+++ b/lib/muchos/config.py
@@ -99,6 +99,9 @@ class DeployConfig(ConfigParser):
return 'worker'
return 'default'
+ def user_home(self):
+ return self.get('general', 'user_home')
+
def mounts(self, num_ephemeral):
mounts = []
for i in range(0, num_ephemeral):
@@ -335,7 +338,7 @@ HOST_VAR_DEFAULTS = {
'accumulo_tarball': 'accumulo-{{ accumulo_version }}-bin.tar.gz',
'accumulo_version': None,
'cluster_type': None,
- 'cluster_basedir': None,
+ 'cluster_group': None,
'cluster_user': None,
'default_data_dirs': None,
'download_software': None,
@@ -350,7 +353,8 @@ HOST_VAR_DEFAULTS = {
'hadoop_version': None,
'hadoop_major_version': '"{{ hadoop_version.split(\'.\')[0] }}"',
'hdfs_root': 'hdfs://{{ groups[\'namenode\'][0] }}:8020',
- 'install_dir': '"{{ cluster_basedir }}/install"',
+ 'install_dir': None,
+ 'install_hub': None,
'java_home': '"/usr/lib/jvm/java-1.8.0-openjdk"',
'maven_home': '"{{ install_dir }}/apache-maven-{{ maven_version }}"',
'maven_tarball': 'apache-maven-{{ maven_version }}-bin.tar.gz',
@@ -358,7 +362,8 @@ HOST_VAR_DEFAULTS = {
'spark_home': '"{{ install_dir }}/spark-{{ spark_version }}-bin-without-hadoop"',
'spark_tarball': 'spark-{{ spark_version }}-bin-without-hadoop.tgz',
'spark_version': None,
- 'tarballs_dir': '"{{ cluster_basedir }}/tarballs"',
+ 'tarballs_dir': '"{{ user_home }}/tarballs"',
+ 'user_home': None,
'worker_data_dirs': None,
'zookeeper_connect': '"{{ groups[\'zookeepers\']|join(\',\') }}"',
'zookeeper_client_port': '"2181"',
diff --git a/lib/muchos/existing.py b/lib/muchos/existing.py
index 6f79a4e..6b9247a 100644
--- a/lib/muchos/existing.py
+++ b/lib/muchos/existing.py
@@ -140,14 +140,13 @@ class ExistingCluster:
else:
open(ansible_keys, 'w').close()
- basedir = config.get('general', 'cluster_basedir')
cmd = "rsync -az --delete -e \"ssh -o 'StrictHostKeyChecking no'\""
subprocess.call("{cmd} {src} {usr}@{ldr}:{tdir}".format(cmd=cmd, src=join(config.deploy_path, "ansible"),
usr=config.get('general', 'cluster_user'),
- ldr=config.get_proxy_ip(), tdir=basedir),
+ ldr=config.get_proxy_ip(), tdir=config.user_home()),
shell=True)
- self.exec_on_proxy_verified("{0}/ansible/scripts/install_ansible.sh".format(basedir), opts='-t')
+ self.exec_on_proxy_verified("{0}/ansible/scripts/install_ansible.sh".format(config.user_home()), opts='-t')
def setup(self):
config = self.config
@@ -156,8 +155,7 @@ class ExistingCluster:
self.sync()
conf_upload = join(config.deploy_path, "conf/upload")
- basedir = config.get('general', 'cluster_basedir')
- cluster_tarballs = "{0}/tarballs".format(basedir)
+ cluster_tarballs = "{0}/tarballs".format(config.user_home())
self.exec_on_proxy_verified("mkdir -p {0}".format(cluster_tarballs))
for f in listdir(conf_upload):
tarball_path = join(conf_upload, f)
@@ -212,15 +210,14 @@ class ExistingCluster:
def execute_playbook(self, playbook):
print("Executing '{0}' playbook".format(playbook))
- basedir = self.config.get('general', 'cluster_basedir')
self.exec_on_proxy_verified("time -p ansible-playbook {base}/ansible/{playbook}"
- .format(base=basedir, playbook=playbook), opts='-t')
+ .format(base=self.config.user_home(), playbook=playbook), opts='-t')
def send_to_proxy(self, path, target, skip_if_exists=True):
print("Copying to proxy: ", path)
cmd = "scp -o 'StrictHostKeyChecking no'"
if skip_if_exists:
- cmd = "rsync --update --progress -e \"ssh -o 'StrictHostKeyChecking no'\""
+ cmd = "rsync --update --progress -e \"ssh -q -o 'StrictHostKeyChecking no'\""
subprocess.call("{cmd} {src} {usr}@{ldr}:{tdir}".format(
cmd=cmd, src=path, usr=self.config.get('general', 'cluster_user'), ldr=self.config.get_proxy_ip(),
tdir=target), shell=True)
diff --git a/lib/tests/test_config.py b/lib/tests/test_config.py
index c69fc9a..d2a9fe4 100644
--- a/lib/tests/test_config.py
+++ b/lib/tests/test_config.py
@@ -26,6 +26,7 @@ def test_ec2_cluster():
assert c.get('ec2', 'default_instance_type') == 'm5d.large'
assert c.get('ec2', 'worker_instance_type') == 'm5d.large'
assert c.get('ec2', 'aws_ami') == 'ami-9887c6e7'
+ assert c.user_home() == '/home/centos'
assert c.max_ephemeral() == 1
assert c.mount_root() == '/media/ephemeral'
assert c.fstype() == 'ext3'
@@ -67,8 +68,8 @@ def test_ec2_cluster():
assert c.get('general', 'proxy_hostname') == "leader1"
assert c.proxy_public_ip() == "23.0.0.0"
assert c.proxy_private_ip() == "10.0.0.0"
- assert c.get('general', 'cluster_basedir') == "/home/centos"
assert c.get('general', 'cluster_user') == "centos"
+ assert c.get('general', 'cluster_group') == "centos"
assert c.get_non_proxy() == [('10.0.0.1', 'leader2'), ('10.0.0.2', 'worker1'), ('10.0.0.3', 'worker2'),
('10.0.0.4', 'worker3'), ('10.0.0.5', 'worker4')]
assert c.get_host_services() == [('leader1', 'namenode resourcemanager accumulomaster zookeeper'),