You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@fluo.apache.org by ar...@apache.org on 2019/11/26 23:09:49 UTC

[fluo-muchos] branch master updated: Add support for various hash algorithms for specifying checksums (#300)

This is an automated email from the ASF dual-hosted git repository.

arvindsh pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/fluo-muchos.git


The following commit(s) were added to refs/heads/master by this push:
     new cb1838f  Add support for various hash algorithms for specifying checksums (#300)
cb1838f is described below

commit cb1838f5ec629d410456e9bcaea20480679334f5
Author: Arvind Shyamsundar <ar...@apache.org>
AuthorDate: Tue Nov 26 15:09:41 2019 -0800

    Add support for various hash algorithms for specifying checksums (#300)
    
    This PR allows the user to either specify (or not specify) the hash
    algorithm as a prefix to the checksum within conf/checksums. Further
    this also expands on the possible algorithms which can be used to
    compute and specify checksums. Supported algorithms are sha512, sha384
    sha256, sha224, sha1, md5. If the algorithm is not specified in the
    checksums file, it is inferred based on the length of the hash.
---
 README.md                                  |  7 +++--
 ansible/roles/accumulo/tasks/download.yml  |  4 +--
 ansible/roles/fluo/tasks/download.yml      |  4 +--
 ansible/roles/fluo_yarn/tasks/download.yml |  4 +--
 ansible/roles/grafana/defaults/main.yml    |  2 +-
 ansible/roles/grafana/tasks/main.yml       |  2 +-
 ansible/roles/influxdb/defaults/main.yml   |  2 +-
 ansible/roles/influxdb/tasks/main.yml      |  2 +-
 ansible/roles/proxy/tasks/download.yml     | 10 +++----
 ansible/roles/spark/tasks/download.yml     |  4 +--
 conf/muchos.props.example                  |  2 +-
 lib/muchos/config/base.py                  | 46 +++++++++++++++++++++++-------
 lib/tests/azure/test_config.py             |  4 +--
 lib/tests/ec2/test_config.py               |  4 +--
 14 files changed, 63 insertions(+), 34 deletions(-)

diff --git a/README.md b/README.md
index d5a8178..4339ae8 100644
--- a/README.md
+++ b/README.md
@@ -182,8 +182,11 @@ versions of these tarballs are specified in [muchos.props] and can be changed if
 Optionally, Muchos can setup the cluster using an Accumulo or Fluo tarball that is placed in the
 `conf/upload` directory of Muchos. This option is only necessary if you want to use an unreleased
 version of Fluo or Accumulo. Before running the `muchos setup` command, you should confirm that the
-version and SHA-256 hash of your tarball matches what is set in [conf/checksums][checksums]. Run the command
-`shasum -a 256 /path/to/tarball` on your tarball to determine its hash.
+hash (typically SHA-512 or SHA-256) of your tarball matches what is set in [conf/checksums][checksums].
+Run the command `shasum -a 512 /path/to/tarball` on your tarball to determine its hash.
+The entry in [conf/checksums][checksums] can optionally include the algorithm as a prefix. If the algorithm
+is not specified then Muchos will infer the algorithm based on the length of the hash. Currently Muchos
+supports using sha512 / sha384 / sha256 / sha224 / sha1 / md5 hashes for the checksum.
 
 The `muchos setup` command will install and start Accumulo, Hadoop, and Zookeeper.  The optional
 services below will only be set up if configured in the `[nodes]` section of [muchos.props]:
diff --git a/ansible/roles/accumulo/tasks/download.yml b/ansible/roles/accumulo/tasks/download.yml
index eb51ff3..2d52f18 100644
--- a/ansible/roles/accumulo/tasks/download.yml
+++ b/ansible/roles/accumulo/tasks/download.yml
@@ -26,10 +26,10 @@
   stat: path={{ tarballs_dir }}/{{ accumulo_tarball }}
   register: accumulo
 - name: "download Accumulo tarball to proxy"
-  get_url: url={{ item.urlp }}/{{ item.fn }} dest={{ tarballs_dir }}/{{ item.fn }} sha256sum={{ item.sum }} force=no
+  get_url: url={{ item.urlp }}/{{ item.fn }} dest={{ tarballs_dir }}/{{ item.fn }} checksum="{{ item.sum }}" force=no
   register: gresult
   until: "'OK' in gresult.msg or 'file already exists' in gresult.msg"
   retries: 3
   with_items:
-    - { urlp: "{{ apache_mirror.stdout }}/accumulo/{{ accumulo_version }}", fn: "{{ accumulo_tarball }}", sum: "{{ accumulo_sha256 }}" }
+    - { urlp: "{{ apache_mirror.stdout }}/accumulo/{{ accumulo_version }}", fn: "{{ accumulo_tarball }}", sum: "{{ accumulo_checksum }}" }
   when: accumulo.stat.exists == False
diff --git a/ansible/roles/fluo/tasks/download.yml b/ansible/roles/fluo/tasks/download.yml
index fc0e2b4..f4fe2a2 100644
--- a/ansible/roles/fluo/tasks/download.yml
+++ b/ansible/roles/fluo/tasks/download.yml
@@ -26,10 +26,10 @@
   stat: path={{ tarballs_dir }}/{{ fluo_tarball }}
   register: fluo
 - name: "download Fluo tarball to proxy"
-  get_url: url={{ item.urlp }}/{{ item.fn }} dest={{ tarballs_dir }}/{{ item.fn }} sha256sum={{ item.sum }} force=no
+  get_url: url={{ item.urlp }}/{{ item.fn }} dest={{ tarballs_dir }}/{{ item.fn }} checksum="{{ item.sum }}" force=no
   register: gresult
   until: "'OK' in gresult.msg or 'file already exists' in gresult.msg"
   retries: 3
   with_items:
-    - { urlp: "{{ apache_mirror.stdout }}/fluo/fluo/{{ fluo_version }}", fn: "{{ fluo_tarball }}", sum: "{{ fluo_sha256 }}" }
+    - { urlp: "{{ apache_mirror.stdout }}/fluo/fluo/{{ fluo_version }}", fn: "{{ fluo_tarball }}", sum: "{{ fluo_checksum }}" }
   when: fluo.stat.exists == False
diff --git a/ansible/roles/fluo_yarn/tasks/download.yml b/ansible/roles/fluo_yarn/tasks/download.yml
index 0873473..19c43c7 100644
--- a/ansible/roles/fluo_yarn/tasks/download.yml
+++ b/ansible/roles/fluo_yarn/tasks/download.yml
@@ -26,10 +26,10 @@
   stat: path={{ tarballs_dir }}/{{ fluo_yarn_tarball }}
   register: fluo_yarn
 - name: "download Fluo YARN tarball to proxy"
-  get_url: url={{ item.urlp }}/{{ item.fn }} dest={{ tarballs_dir }}/{{ item.fn }} sha256sum={{ item.sum }} force=no
+  get_url: url={{ item.urlp }}/{{ item.fn }} dest={{ tarballs_dir }}/{{ item.fn }} checksum="{{ item.sum }}" force=no
   register: gresult
   until: "'OK' in gresult.msg or 'file already exists' in gresult.msg"
   retries: 3
   with_items:
-    - { urlp: "{{ apache_mirror.stdout }}/fluo/fluo-yarn/{{ fluo_yarn_version }}", fn: "{{ fluo_yarn_tarball }}", sum: "{{ fluo_yarn_sha256 }}" }
+    - { urlp: "{{ apache_mirror.stdout }}/fluo/fluo-yarn/{{ fluo_yarn_version }}", fn: "{{ fluo_yarn_tarball }}", sum: "{{ fluo_yarn_checksum }}" }
   when: fluo_yarn.stat.exists == False
diff --git a/ansible/roles/grafana/defaults/main.yml b/ansible/roles/grafana/defaults/main.yml
index cbd5302..e576c0d 100644
--- a/ansible/roles/grafana/defaults/main.yml
+++ b/ansible/roles/grafana/defaults/main.yml
@@ -16,4 +16,4 @@
 #
 
 grafana_rpm: grafana-2.5.0-1.x86_64.rpm
-grafana_sha256: ada02f1d4cdaa6397ef1c2e013200d0e8f30675205092a817e81d47ea17f47ad
+grafana_checksum: "sha256:ada02f1d4cdaa6397ef1c2e013200d0e8f30675205092a817e81d47ea17f47ad"
diff --git a/ansible/roles/grafana/tasks/main.yml b/ansible/roles/grafana/tasks/main.yml
index e66d219..cfce62c 100644
--- a/ansible/roles/grafana/tasks/main.yml
+++ b/ansible/roles/grafana/tasks/main.yml
@@ -20,7 +20,7 @@
   args:
     url: https://grafanarel.s3.amazonaws.com/builds/{{ grafana_rpm }}
     dest: /tmp/{{ grafana_rpm }}
-    sha256sum: "{{ grafana_sha256 }}"
+    checksum: "{{ grafana_checksum }}"
     force: no
 - name: "ensure grafana is installed"
   yum: name=/tmp/{{ grafana_rpm }} state=present
diff --git a/ansible/roles/influxdb/defaults/main.yml b/ansible/roles/influxdb/defaults/main.yml
index c18bdde..d3ca1d6 100644
--- a/ansible/roles/influxdb/defaults/main.yml
+++ b/ansible/roles/influxdb/defaults/main.yml
@@ -16,4 +16,4 @@
 #
 
 influxdb_rpm: influxdb-0.9.4.2-1.x86_64.rpm
-influxdb_sha256: 9a2b6cfdf70e0050a46d07cc0f4135a7538eaac1a3c1745d860d454179f771fb
+influxdb_checksum: "sha256:9a2b6cfdf70e0050a46d07cc0f4135a7538eaac1a3c1745d860d454179f771fb"
diff --git a/ansible/roles/influxdb/tasks/main.yml b/ansible/roles/influxdb/tasks/main.yml
index 1f2897d..0cc4eac 100644
--- a/ansible/roles/influxdb/tasks/main.yml
+++ b/ansible/roles/influxdb/tasks/main.yml
@@ -20,7 +20,7 @@
   args:
     url: https://influxdb.s3.amazonaws.com/{{ influxdb_rpm }}
     dest: /tmp/{{ influxdb_rpm }}
-    sha256sum: "{{ influxdb_sha256 }}"
+    checksum: "{{ influxdb_checksum }}"
     force: no
 - name: "ensure influxdb is installed"
   yum: name=/tmp/{{ influxdb_rpm }} state=present
diff --git a/ansible/roles/proxy/tasks/download.yml b/ansible/roles/proxy/tasks/download.yml
index 62e30b2..66fc525 100644
--- a/ansible/roles/proxy/tasks/download.yml
+++ b/ansible/roles/proxy/tasks/download.yml
@@ -23,15 +23,15 @@
   failed_when: "'http' not in apache_mirror.stdout"
   changed_when: False
 - name: "download common tarballs to proxy"
-  get_url: url={{ item.urlp }}/{{ item.fn }} dest={{ tarballs_dir }}/{{ item.fn }} sha256sum={{ item.sum }} force=no
+  get_url: url={{ item.urlp }}/{{ item.fn }} dest={{ tarballs_dir }}/{{ item.fn }} checksum="{{ item.sum }}" force=no
   register: gresult
   until: "'OK' in gresult.msg or 'file already exists' in gresult.msg"
   retries: 3
   with_items:
-    - { urlp: "{{ apache_mirror.stdout }}/zookeeper/zookeeper-{{ zookeeper_version }}", fn: "{{ zookeeper_tarball }}", sum: "{{ zookeeper_sha256 }}" }
-    - { urlp: "{{ apache_mirror.stdout }}/hadoop/common/hadoop-{{ hadoop_version }}", fn: "{{ hadoop_tarball }}", sum: "{{ hadoop_sha256 }}" }
-    - { urlp: "{{ apache_mirror.stdout }}/maven/maven-3/{{ maven_version }}/binaries", fn: "{{ maven_tarball }}", sum: "{{ maven_sha256 }}" }
-    - { urlp: "https://github.com/github/hub/releases/download/v{{ hub_version }}", fn: "{{ hub_tarball }}", sum: "{{ hub_sha256 }}" }
+    - { urlp: "{{ apache_mirror.stdout }}/zookeeper/zookeeper-{{ zookeeper_version }}", fn: "{{ zookeeper_tarball }}", sum: "{{ zookeeper_checksum }}" }
+    - { urlp: "{{ apache_mirror.stdout }}/hadoop/common/hadoop-{{ hadoop_version }}", fn: "{{ hadoop_tarball }}", sum: "{{ hadoop_checksum }}" }
+    - { urlp: "{{ apache_mirror.stdout }}/maven/maven-3/{{ maven_version }}/binaries", fn: "{{ maven_tarball }}", sum: "{{ maven_checksum }}" }
+    - { urlp: "https://github.com/github/hub/releases/download/v{{ hub_version }}", fn: "{{ hub_tarball }}", sum: "{{ hub_checksum }}" }
 
 # This is currently needed to run hadoop with Java 11 (see https://github.com/apache/fluo-muchos/issues/266)      
 - name: "Download javax.activation-api for Hadoop 3 when Java 11 is used"
diff --git a/ansible/roles/spark/tasks/download.yml b/ansible/roles/spark/tasks/download.yml
index 728dce1..d28f030 100644
--- a/ansible/roles/spark/tasks/download.yml
+++ b/ansible/roles/spark/tasks/download.yml
@@ -26,10 +26,10 @@
   stat: path={{ tarballs_dir }}/{{ spark_tarball }}
   register: spark
 - name: "download Spark tarball to proxy"
-  get_url: url={{ item.urlp }}/{{ item.fn }} dest={{ tarballs_dir }}/{{ item.fn }} sha256sum={{ item.sum }} force=no
+  get_url: url={{ item.urlp }}/{{ item.fn }} dest={{ tarballs_dir }}/{{ item.fn }} checksum="{{ item.sum }}" force=no
   register: gresult
   until: "'OK' in gresult.msg or 'file already exists' in gresult.msg"
   retries: 3
   with_items:
-    - { urlp: "{{ apache_mirror.stdout }}/spark/spark-{{ spark_version }}", fn: "{{ spark_tarball }}", sum: "{{ spark_sha256 }}" }
+    - { urlp: "{{ apache_mirror.stdout }}/spark/spark-{{ spark_version }}", fn: "{{ spark_tarball }}", sum: "{{ spark_checksum }}" }
   when: spark.stat.exists == False
diff --git a/conf/muchos.props.example b/conf/muchos.props.example
index 172945b..b34a437 100644
--- a/conf/muchos.props.example
+++ b/conf/muchos.props.example
@@ -35,7 +35,7 @@ proxy_hostname = leader1
 accumulo_instance = muchos
 # Accumluo Password
 accumulo_password = secret
-# Software versions (set sha-256 in conf/checksums)
+# Software versions (make sure you have a corresponding entry for the checksum in conf/checksums)
 hadoop_version = 3.2.1
 zookeeper_version = 3.4.14
 spark_version = 2.3.2
diff --git a/lib/muchos/config/base.py b/lib/muchos/config/base.py
index 0f01aff..37458ed 100644
--- a/lib/muchos/config/base.py
+++ b/lib/muchos/config/base.py
@@ -82,31 +82,39 @@ _PLAY_VAR_DEFAULTS = {
   'accumulo_dcache_size': None,
   'accumulo_icache_size': None,
   'accumulo_imap_size': None,
-  'accumulo_sha256': None,
+  'accumulo_checksum': None,
   'accumulo_tserv_mem': None,
-  'fluo_sha256': None,
+  'fluo_checksum': None,
   'fluo_worker_instances_multiplier': None,
   'fluo_worker_mem_mb': None,
   'fluo_worker_threads': None,
-  'fluo_yarn_sha256': None,
-  'hadoop_sha256': None,
+  'fluo_yarn_checksum': None,
+  'hadoop_checksum': None,
   'hub_version': '2.2.3',
   'hub_home': '"{{ install_dir }}/hub-linux-amd64-{{ hub_version }}"',
   'hub_tarball': 'hub-linux-amd64-{{ hub_version }}.tgz',
-  'hub_sha256': '54c35a459a4241b7ae4c28bcfea0ceef849dd2f8a9dd2b82ba2ba964a743e6bc',
-  'maven_sha256': '2528c35a99c30f8940cc599ba15d34359d58bec57af58c1075519b8cd33b69e7',
+  'hub_checksum': 'sha256:54c35a459a4241b7ae4c28bcfea0ceef849dd2f8a9dd2b82ba2ba964a743e6bc',
+  'maven_checksum': 'sha256:2528c35a99c30f8940cc599ba15d34359d58bec57af58c1075519b8cd33b69e7',
   'metrics_drive_ids': None,
   'mount_root': None,
   'node_type_map': None,
-  'spark_sha256': None,
+  'spark_checksum': None,
   'shutdown_delay_minutes': None,
   'twill_reserve_mem_mb': None,
   'yarn_nm_mem_mb': None,
-  'zookeeper_sha256': None
+  'zookeeper_checksum': None
 }
 
 _EXTRA_VAR_DEFAULTS = {}
 
+HASHLEN_ALGO_MAP = {
+        32: "md5",
+        40: "sha1",
+        56: "sha224",
+        64: "sha256",
+        96: "sha384",
+        128: "sha512"
+}
 
 class BaseConfig(ConfigParser, metaclass=ABCMeta):
     def __init__(self, deploy_path, config_path, hosts_path, checksums_path, templates_path, cluster_name):
@@ -131,7 +139,7 @@ class BaseConfig(ConfigParser, metaclass=ABCMeta):
 
     def ansible_play_vars(self):
         software_checksums = {
-            '{}_sha256'.format(k): self.checksum(k) for
+            '{}_checksum'.format(k): self.checksum(k) for
             k in ['accumulo', 'fluo', 'fluo_yarn', 'hadoop', 'spark', 'zookeeper']
         }
         return dict(
@@ -260,6 +268,15 @@ class BaseConfig(ConfigParser, metaclass=ABCMeta):
     def checksum(self, software):
         return self.checksum_ver(software, self.version(software))
 
+    def infer_hash_algo(self, hashstring):
+        # assign the algorithm based on length. These are the default supported algorithms for Ansible
+        hashlen = len(hashstring)
+
+        if hashlen in HASHLEN_ALGO_MAP:
+            return HASHLEN_ALGO_MAP[hashlen]
+        else:
+            return None
+
     def checksum_ver(self, software, version):
         if not os.path.isfile(self.checksums_path):
             exit('ERROR - A checksums file does not exist at %s' % self.hosts_path)
@@ -276,7 +293,16 @@ class BaseConfig(ConfigParser, metaclass=ABCMeta):
                         continue
                     args = line.split(':')
                     if len(args) == 3:
-                        self.checksums_d["{0}:{1}".format(args[0], args[1])] = args[2]
+                        inferred_algo = self.infer_hash_algo(args[2])
+                        if inferred_algo is not None:
+                            self.checksums_d["{0}:{1}".format(args[0], args[1])] = "{0}:{1}".format(self.infer_hash_algo(args[2]), args[2])
+                        else:
+                            exit('ERROR - Bad line %s in checksums %s' % (line, self.checksums_path))
+
+                    elif len(args) == 4:
+                        if args[2] not in HASHLEN_ALGO_MAP.values():
+                            exit('ERROR - Unsupported hash algorithm %s in checksums %s' % (line, self.checksums_path))
+                        self.checksums_d["{0}:{1}".format(args[0], args[1])] = "{0}:{1}".format(args[2], args[3])
                     else:
                         exit('ERROR - Bad line %s in checksums %s' % (line, self.checksums_path))
 
diff --git a/lib/tests/azure/test_config.py b/lib/tests/azure/test_config.py
index e9dcfd2..1b7e4c7 100644
--- a/lib/tests/azure/test_config.py
+++ b/lib/tests/azure/test_config.py
@@ -27,8 +27,8 @@ def test_azure_cluster():
 
     c.cluster_type = 'azure'
 
-    assert c.checksum_ver('accumulo', '1.9.0') == 'f68a6145029a9ea843b0305c90a7f5f0334d8a8ceeea94734267ec36421fe7fe'
-    assert c.checksum('accumulo') == 'df172111698c7a73aa031de09bd5589263a6b824482fbb9b4f0440a16602ed47'
+    assert c.checksum_ver('accumulo', '1.9.0') == 'sha256:f68a6145029a9ea843b0305c90a7f5f0334d8a8ceeea94734267ec36421fe7fe'
+    assert c.checksum('accumulo') == 'sha256:df172111698c7a73aa031de09bd5589263a6b824482fbb9b4f0440a16602ed47'
     assert c.get('azure', 'vm_sku') == 'Standard_D8s_v3'
     assert c.get('azure', 'managed_disk_type') == 'Standard_LRS'
     assert c.user_home() == '/home/centos'
diff --git a/lib/tests/ec2/test_config.py b/lib/tests/ec2/test_config.py
index fe06fe5..640f1ff 100644
--- a/lib/tests/ec2/test_config.py
+++ b/lib/tests/ec2/test_config.py
@@ -21,8 +21,8 @@ from muchos.config import Ec2DeployConfig
 def test_ec2_cluster():
     c = Ec2DeployConfig("muchos", '../conf/muchos.props.example', '../conf/hosts/example/example_cluster',
                      '../conf/checksums', '../conf/templates', 'mycluster')
-    assert c.checksum_ver('accumulo', '1.9.0') == 'f68a6145029a9ea843b0305c90a7f5f0334d8a8ceeea94734267ec36421fe7fe'
-    assert c.checksum('accumulo') == 'df172111698c7a73aa031de09bd5589263a6b824482fbb9b4f0440a16602ed47'
+    assert c.checksum_ver('accumulo', '1.9.0') == 'sha256:f68a6145029a9ea843b0305c90a7f5f0334d8a8ceeea94734267ec36421fe7fe'
+    assert c.checksum('accumulo') == 'sha256:df172111698c7a73aa031de09bd5589263a6b824482fbb9b4f0440a16602ed47'
     assert c.get('ec2', 'default_instance_type') == 'm5d.large'
     assert c.get('ec2', 'worker_instance_type') == 'm5d.large'
     assert c.get('ec2', 'aws_ami') == 'ami-9887c6e7'