You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@fluo.apache.org by mw...@apache.org on 2019/02/04 19:25:19 UTC

[fluo-muchos] branch master updated: More updates (#252)

This is an automated email from the ASF dual-hosted git repository.

mwalch pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/fluo-muchos.git


The following commit(s) were added to refs/heads/master by this push:
     new 035a656  More updates (#252)
035a656 is described below

commit 035a656e5919ad19dff659f797ec8448e4e04b17
Author: Mike Walch <mw...@apache.org>
AuthorDate: Mon Feb 4 14:25:15 2019 -0500

    More updates (#252)
    
    * Add cluster_group to muchos.props
    * Separated ec2 & existing commands
    * Installation dir is configurable
    * Installing Hub is configurable
---
 ansible/common.yml                                 |  5 ++++-
 ansible/roles/common/tasks/{drives.yml => ec2.yml} | 21 ++++++++-----------
 .../tasks/main.yml => common/tasks/existing.yml}   | 10 +++------
 ansible/roles/common/tasks/main.yml                | 24 +++++++++++-----------
 ansible/roles/common/tasks/os.yml                  |  2 +-
 ansible/roles/common/tasks/ssh.yml                 |  6 +++---
 ansible/roles/fluo/tasks/main.yml                  |  2 +-
 ansible/roles/fluo_yarn/tasks/main.yml             |  2 +-
 ansible/roles/hadoop/tasks/main.yml                |  2 +-
 ansible/roles/spark/tasks/main.yml                 |  2 +-
 conf/muchos.props.example                          | 11 +++++++---
 lib/muchos/config.py                               | 11 +++++++---
 lib/muchos/existing.py                             | 13 +++++-------
 lib/tests/test_config.py                           |  3 ++-
 14 files changed, 59 insertions(+), 55 deletions(-)

diff --git a/ansible/common.yml b/ansible/common.yml
index 95f7d85..ef26613 100644
--- a/ansible/common.yml
+++ b/ansible/common.yml
@@ -32,4 +32,7 @@
   tasks:
     - import_tasks: roles/common/tasks/ssh.yml
     - import_tasks: roles/common/tasks/os.yml
-    - import_tasks: roles/common/tasks/drives.yml
+    - import_tasks: roles/common/tasks/ec2.yml
+      when: cluster_type == 'ec2'
+    - import_tasks: roles/common/tasks/existing.yml
+      when: cluster_type == 'existing'
diff --git a/ansible/roles/common/tasks/drives.yml b/ansible/roles/common/tasks/ec2.yml
similarity index 79%
rename from ansible/roles/common/tasks/drives.yml
rename to ansible/roles/common/tasks/ec2.yml
index 4f3c315..63142c6 100644
--- a/ansible/roles/common/tasks/drives.yml
+++ b/ansible/roles/common/tasks/ec2.yml
@@ -15,26 +15,23 @@
 # limitations under the License.
 #
 
-- name: "unmount default drive at /mnt"
+- name: "ec2 - unmount default drive at /mnt"
   mount: name=/mnt src=/dev/xvdb fstype=auto state=unmounted
-  when: cluster_type == 'ec2'
-- name: "unmount all ephemeral"
+- name: "ec2 - unmount all ephemeral"
   mount: name={{ item.0 }} src={{ item.1 }} fstype=auto state=unmounted
-  when: cluster_type == 'ec2' and force_format == 'yes'
-  with_together: 
+  when: force_format == 'yes'
+  with_together:
     - "{{ node_type_map[node_type].mounts }}"
     - "{{ node_type_map[node_type].devices }}"
-- name: "format drives"
+- name: "ec2 - format drives"
   filesystem: fstype={{ fstype }} dev={{ item }} force={{ force_format }}
-  when: cluster_type == 'ec2'
   with_items: "{{ node_type_map[node_type].devices }}"
-- name: "mount drives"
+- name: "ec2 - mount drives"
   mount: name={{ item.0 }} src={{ item.1 }} fstype=auto state=mounted
     opts=defaults,nofail,noatime,nodiratime,comment=cloudconfig passno=2
-  when: cluster_type == 'ec2'
-  with_together: 
+  with_together:
     - "{{ node_type_map[node_type].mounts }}"
     - "{{ node_type_map[node_type].devices }}"
-- name: "ensure drives are owned by cluster user"
-  file: path={{ item }} state=directory owner={{ cluster_user }} group={{ cluster_user }} 
+- name: "ec2 - ensure drives are owned by cluster user"
+  file: path={{ item }} state=directory owner={{ cluster_user }} group={{ cluster_group }}
   with_items: "{{ node_type_map[node_type].mounts }}"
diff --git a/ansible/roles/spark/tasks/main.yml b/ansible/roles/common/tasks/existing.yml
similarity index 69%
copy from ansible/roles/spark/tasks/main.yml
copy to ansible/roles/common/tasks/existing.yml
index c5e9272..15a882c 100644
--- a/ansible/roles/spark/tasks/main.yml
+++ b/ansible/roles/common/tasks/existing.yml
@@ -15,10 +15,6 @@
 # limitations under the License.
 #
 
-- name: "install spark tarball"
-  unarchive: src={{ tarballs_dir }}/{{ spark_tarball }} dest={{ install_dir }} creates={{ spark_home }} copy=yes
-- name: "configure spark"
-  template: src={{ item }} dest={{ spark_home }}/conf/{{ item }} owner={{ cluster_user }} group={{ cluster_user }}
-  with_items:
-    - spark-defaults.conf
-    - spark-env.sh
+- name: "existing - verify data dirs exist and are owned by cluster user"
+  file: path={{ item }} state=directory owner={{ cluster_user }} group={{ cluster_group }}
+  with_items: "{{ default_data_dirs }}"
diff --git a/ansible/roles/common/tasks/main.yml b/ansible/roles/common/tasks/main.yml
index bea44ad..d409dd4 100644
--- a/ansible/roles/common/tasks/main.yml
+++ b/ansible/roles/common/tasks/main.yml
@@ -30,20 +30,20 @@
       - java-1.8.0-openjdk-devel
     state: present
 - name: "configure node shutdown"
-  shell: shutdown +{{ shutdown_delay_minutes }} &> {{ cluster_basedir }}/.shutdown creates={{ cluster_basedir}}/.shutdown
+  shell: shutdown +{{ shutdown_delay_minutes }} &> {{ user_home }}/.shutdown creates={{ user_home }}/.shutdown
   when: shutdown_delay_minutes > 0
 - name: "create install directory on all hosts"
-  file: path={{ install_dir }} state=directory owner={{ cluster_user }} group={{ cluster_user }}
-- name: "install maven & hub"
-  unarchive: src={{ tarballs_dir }}/{{ item.tarball }} dest={{ install_dir }} creates={{ item.install }}
-  with_items:
-    - { tarball: "{{ maven_tarball }}", install: "{{ maven_home }}" } 
-    - { tarball: "{{ hub_tarball }}", install: "{{ hub_home }}" } 
-- name: "set correct owner & group for maven & hub home"
-  file: path={{ item }} recurse=yes owner={{ cluster_user }} group={{ cluster_user}}
-  with_items:
-    - "{{ maven_home }}"
-    - "{{ hub_home }}"
+  file: path={{ install_dir }} state=directory owner={{ cluster_user }} group={{ cluster_group }}
+- name: "install maven"
+  unarchive: src={{ tarballs_dir }}/{{ maven_tarball }} dest={{ install_dir }} creates={{ maven_home }}
+- name: "chown maven home"
+  file: path={{ maven_home }} recurse=yes owner={{ cluster_user }} group={{ cluster_group}}
+- name: "install hub"
+  unarchive: src={{ tarballs_dir }}/{{ hub_tarball }} dest={{ install_dir }} creates={{ hub_home }}
+  when: install_hub
+- name: "chown hub home"
+  file: path={{ hub_home }} recurse=yes owner={{ cluster_user }} group={{ cluster_group}}
+  when: install_hub
 - name: "configure collectd"
   template: src=collectd.conf dest=/etc/collectd.conf
   when: "'metrics' in groups"
diff --git a/ansible/roles/common/tasks/os.yml b/ansible/roles/common/tasks/os.yml
index 0b6b183..8e6f02b 100644
--- a/ansible/roles/common/tasks/os.yml
+++ b/ansible/roles/common/tasks/os.yml
@@ -22,7 +22,7 @@
 - name: "copy new limits.conf"
   copy: src=roles/common/files/limits.conf dest=/etc/security/limits.conf
 - name: "configure user shell"
-  template: src=roles/common/templates/{{ item }} dest=/home/{{ cluster_user }}/.{{ item }} owner={{ cluster_user }} group={{ cluster_user }} mode=0644
+  template: src=roles/common/templates/{{ item }} dest=/home/{{ cluster_user }}/.{{ item }} owner={{ cluster_user }} group={{ cluster_group }} mode=0644
   with_items:
     - bashrc
     - bash_profile
diff --git a/ansible/roles/common/tasks/ssh.yml b/ansible/roles/common/tasks/ssh.yml
index 61e1a59..0e2e2af 100644
--- a/ansible/roles/common/tasks/ssh.yml
+++ b/ansible/roles/common/tasks/ssh.yml
@@ -18,12 +18,12 @@
 - name: "ensure cluster user exists but don't generate ssh key" 
   user: name={{ cluster_user }} generate_ssh_key=no state=present
 - name: "copy cluster private key to all nodes"
-  copy: src=/home/{{ cluster_user }}/.ssh/id_rsa dest=/home/{{ cluster_user }}/.ssh/id_rsa owner={{ cluster_user }} group={{ cluster_user }} mode=0600
+  copy: src=/home/{{ cluster_user }}/.ssh/id_rsa dest=/home/{{ cluster_user }}/.ssh/id_rsa owner={{ cluster_user }} group={{ cluster_group }} mode=0600
 - name: "copy cluster public key to all nodes"
-  copy: src=/home/{{ cluster_user }}/.ssh/id_rsa.pub dest=/home/{{ cluster_user }}/.ssh/id_rsa.pub owner={{ cluster_user }} group={{ cluster_user }} mode=0644
+  copy: src=/home/{{ cluster_user }}/.ssh/id_rsa.pub dest=/home/{{ cluster_user }}/.ssh/id_rsa.pub owner={{ cluster_user }} group={{ cluster_group }} mode=0644
 - name: "add cluster user to ~/.ssh/authorized_keys"
   authorized_key: user={{ cluster_user }} key="{{ lookup('file', '/home/' + cluster_user + '/.ssh/id_rsa.pub') }}"
 - name: "add conf/keys to ~/.ssh/authorized_keys"
   authorized_key: user={{ cluster_user }} key="{{ lookup('file', 'conf/keys') }}"
 - name: "set ssh config"
-  copy: src=roles/common/files/ssh_config dest=/home/{{ cluster_user}}/.ssh/config owner={{ cluster_user }} group={{ cluster_user }} mode=0600
+  copy: src=roles/common/files/ssh_config dest=/home/{{ cluster_user}}/.ssh/config owner={{ cluster_user }} group={{ cluster_group }} mode=0600
diff --git a/ansible/roles/fluo/tasks/main.yml b/ansible/roles/fluo/tasks/main.yml
index aa8062c..103b69e 100644
--- a/ansible/roles/fluo/tasks/main.yml
+++ b/ansible/roles/fluo/tasks/main.yml
@@ -38,4 +38,4 @@
 - name: "fetch extra fluo dependencies"
   command: "{{ fluo_home }}/lib/fetch.sh extra"
 - name: "set correct owner & group"
-  file: path={{ fluo_home }} recurse=yes owner={{ cluster_user }} group={{ cluster_user}}
+  file: path={{ fluo_home }} recurse=yes owner={{ cluster_user }} group={{ cluster_group}}
diff --git a/ansible/roles/fluo_yarn/tasks/main.yml b/ansible/roles/fluo_yarn/tasks/main.yml
index c0b956a..06543b5 100644
--- a/ansible/roles/fluo_yarn/tasks/main.yml
+++ b/ansible/roles/fluo_yarn/tasks/main.yml
@@ -22,4 +22,4 @@
 - name: "fetch extra fluo-yarn dependencies"
   command: "{{ fluo_yarn_home }}/lib/fetch.sh extra"
 - name: "set correct owner & group"
-  file: path={{ fluo_yarn_home }} recurse=yes owner={{ cluster_user }} group={{ cluster_user}}
+  file: path={{ fluo_yarn_home }} recurse=yes owner={{ cluster_user }} group={{ cluster_group }}
diff --git a/ansible/roles/hadoop/tasks/main.yml b/ansible/roles/hadoop/tasks/main.yml
index 310dab0..24214e7 100644
--- a/ansible/roles/hadoop/tasks/main.yml
+++ b/ansible/roles/hadoop/tasks/main.yml
@@ -38,7 +38,7 @@
   command: cp {{ spark_home }}/yarn/spark-{{ spark_version }}-yarn-shuffle.jar {{ hadoop_home }}/share/hadoop/yarn/lib/ creates={{ hadoop_home }}/share/hadoop/yarn/lib/spark-{{ spark_version }}-yarn-shuffle.jar
   when: "'spark' in groups"
 - name: "setup hadoop short circuit socket dir"
-  file: path=/var/lib/hadoop-hdfs state=directory owner={{ cluster_user }} group={{ cluster_user }} mode=0755
+  file: path=/var/lib/hadoop-hdfs state=directory owner={{ cluster_user }} group={{ cluster_group }} mode=0755
   become: yes
 - name: "Configure hadoop log dir"
   replace:
diff --git a/ansible/roles/spark/tasks/main.yml b/ansible/roles/spark/tasks/main.yml
index c5e9272..4310754 100644
--- a/ansible/roles/spark/tasks/main.yml
+++ b/ansible/roles/spark/tasks/main.yml
@@ -18,7 +18,7 @@
 - name: "install spark tarball"
   unarchive: src={{ tarballs_dir }}/{{ spark_tarball }} dest={{ install_dir }} creates={{ spark_home }} copy=yes
 - name: "configure spark"
-  template: src={{ item }} dest={{ spark_home }}/conf/{{ item }} owner={{ cluster_user }} group={{ cluster_user }}
+  template: src={{ item }} dest={{ spark_home }}/conf/{{ item }} owner={{ cluster_user }} group={{ cluster_group }}
   with_items:
     - spark-defaults.conf
     - spark-env.sh
diff --git a/conf/muchos.props.example b/conf/muchos.props.example
index 899beef..626cf1d 100644
--- a/conf/muchos.props.example
+++ b/conf/muchos.props.example
@@ -19,9 +19,12 @@ cluster_type = ec2
 # Cluster user name (install command will SSH to cluster using this user)
 # Leave default below if launching cluster in AWS
 cluster_user = centos
-# Cluster base directory where install/ & tarballs/ directories are created
-# Leave default below if launching cluster in AWS
-cluster_basedir = /home/centos
+# Cluster user group
+cluster_group = %(cluster_user)s
+# Cluster user home directory
+user_home = /home/%(cluster_user)s
+# Install directory where Hadoop, Accumulo, etc will be installed
+install_dir = %(user_home)s/install
 # Hostname of proxy node that Muchos will use to direct installation of cluster.  Will be given
 # public IP if launching in EC2.  If not launching in EC2, node must have public IP that can be reached
 # from your machine. Hostname can be chosen from "nodes" section below.
@@ -41,6 +44,8 @@ fluo_yarn_version = 1.0.0
 accumulo_version = 2.0.0-alpha-1
 # Specifies if software should be downloaded. If 'False', tarballs of the software above should be in conf/upload/
 download_software = True
+# Install Hub (for GitHub)
+install_hub = True
 
 [ec2]
 # AWS machine image to use. The default below is for a CentOS 7 image (in us-east-1).
diff --git a/lib/muchos/config.py b/lib/muchos/config.py
index ace1532..8578dc8 100644
--- a/lib/muchos/config.py
+++ b/lib/muchos/config.py
@@ -99,6 +99,9 @@ class DeployConfig(ConfigParser):
             return 'worker'
         return 'default'
 
+    def user_home(self):
+        return self.get('general', 'user_home')
+
     def mounts(self, num_ephemeral):
         mounts = []
         for i in range(0, num_ephemeral):
@@ -335,7 +338,7 @@ HOST_VAR_DEFAULTS = {
   'accumulo_tarball': 'accumulo-{{ accumulo_version }}-bin.tar.gz',
   'accumulo_version': None,
   'cluster_type': None,
-  'cluster_basedir': None,
+  'cluster_group': None,
   'cluster_user': None,
   'default_data_dirs': None,
   'download_software': None,
@@ -350,7 +353,8 @@ HOST_VAR_DEFAULTS = {
   'hadoop_version': None,
   'hadoop_major_version': '"{{ hadoop_version.split(\'.\')[0] }}"',
   'hdfs_root': 'hdfs://{{ groups[\'namenode\'][0] }}:8020',
-  'install_dir': '"{{ cluster_basedir }}/install"',
+  'install_dir': None,
+  'install_hub': None,
   'java_home': '"/usr/lib/jvm/java-1.8.0-openjdk"',
   'maven_home': '"{{ install_dir }}/apache-maven-{{ maven_version }}"',
   'maven_tarball': 'apache-maven-{{ maven_version }}-bin.tar.gz',
@@ -358,7 +362,8 @@ HOST_VAR_DEFAULTS = {
   'spark_home': '"{{ install_dir }}/spark-{{ spark_version }}-bin-without-hadoop"',
   'spark_tarball': 'spark-{{ spark_version }}-bin-without-hadoop.tgz',
   'spark_version': None,
-  'tarballs_dir': '"{{ cluster_basedir }}/tarballs"',
+  'tarballs_dir': '"{{ user_home }}/tarballs"',
+  'user_home': None,
   'worker_data_dirs': None,
   'zookeeper_connect': '"{{ groups[\'zookeepers\']|join(\',\') }}"',
   'zookeeper_client_port': '"2181"',
diff --git a/lib/muchos/existing.py b/lib/muchos/existing.py
index 6f79a4e..6b9247a 100644
--- a/lib/muchos/existing.py
+++ b/lib/muchos/existing.py
@@ -140,14 +140,13 @@ class ExistingCluster:
         else:
             open(ansible_keys, 'w').close()
 
-        basedir = config.get('general', 'cluster_basedir')
         cmd = "rsync -az --delete -e \"ssh -o 'StrictHostKeyChecking no'\""
         subprocess.call("{cmd} {src} {usr}@{ldr}:{tdir}".format(cmd=cmd, src=join(config.deploy_path, "ansible"),
                                                                 usr=config.get('general', 'cluster_user'),
-                                                                ldr=config.get_proxy_ip(), tdir=basedir),
+                                                                ldr=config.get_proxy_ip(), tdir=config.user_home()),
                         shell=True)
 
-        self.exec_on_proxy_verified("{0}/ansible/scripts/install_ansible.sh".format(basedir), opts='-t')
+        self.exec_on_proxy_verified("{0}/ansible/scripts/install_ansible.sh".format(config.user_home()), opts='-t')
 
     def setup(self):
         config = self.config
@@ -156,8 +155,7 @@ class ExistingCluster:
         self.sync()
 
         conf_upload = join(config.deploy_path, "conf/upload")
-        basedir = config.get('general', 'cluster_basedir')
-        cluster_tarballs = "{0}/tarballs".format(basedir)
+        cluster_tarballs = "{0}/tarballs".format(config.user_home())
         self.exec_on_proxy_verified("mkdir -p {0}".format(cluster_tarballs))
         for f in listdir(conf_upload):
             tarball_path = join(conf_upload, f)
@@ -212,15 +210,14 @@ class ExistingCluster:
 
     def execute_playbook(self, playbook):
         print("Executing '{0}' playbook".format(playbook))
-        basedir = self.config.get('general', 'cluster_basedir')
         self.exec_on_proxy_verified("time -p ansible-playbook {base}/ansible/{playbook}"
-                                    .format(base=basedir, playbook=playbook), opts='-t')
+                                    .format(base=self.config.user_home(), playbook=playbook), opts='-t')
 
     def send_to_proxy(self, path, target, skip_if_exists=True):
         print("Copying to proxy: ", path)
         cmd = "scp -o 'StrictHostKeyChecking no'"
         if skip_if_exists:
-            cmd = "rsync --update --progress -e \"ssh -o 'StrictHostKeyChecking no'\""
+            cmd = "rsync --update --progress -e \"ssh -q -o 'StrictHostKeyChecking no'\""
         subprocess.call("{cmd} {src} {usr}@{ldr}:{tdir}".format(
             cmd=cmd, src=path, usr=self.config.get('general', 'cluster_user'), ldr=self.config.get_proxy_ip(),
             tdir=target), shell=True)
diff --git a/lib/tests/test_config.py b/lib/tests/test_config.py
index c69fc9a..d2a9fe4 100644
--- a/lib/tests/test_config.py
+++ b/lib/tests/test_config.py
@@ -26,6 +26,7 @@ def test_ec2_cluster():
     assert c.get('ec2', 'default_instance_type') == 'm5d.large'
     assert c.get('ec2', 'worker_instance_type') == 'm5d.large'
     assert c.get('ec2', 'aws_ami') == 'ami-9887c6e7'
+    assert c.user_home() == '/home/centos'
     assert c.max_ephemeral() == 1
     assert c.mount_root() == '/media/ephemeral'
     assert c.fstype() == 'ext3'
@@ -67,8 +68,8 @@ def test_ec2_cluster():
     assert c.get('general', 'proxy_hostname') == "leader1"
     assert c.proxy_public_ip() == "23.0.0.0"
     assert c.proxy_private_ip() == "10.0.0.0"
-    assert c.get('general', 'cluster_basedir') == "/home/centos"
     assert c.get('general', 'cluster_user') == "centos"
+    assert c.get('general', 'cluster_group') == "centos"
     assert c.get_non_proxy() == [('10.0.0.1', 'leader2'), ('10.0.0.2', 'worker1'), ('10.0.0.3', 'worker2'),
                                  ('10.0.0.4', 'worker3'), ('10.0.0.5', 'worker4')]
     assert c.get_host_services() == [('leader1', 'namenode resourcemanager accumulomaster zookeeper'),