You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@datalab.apache.org by my...@apache.org on 2021/08/09 08:57:17 UTC
[incubator-datalab] branch DATALAB-2409 updated: [DATALAB-2409] -
fab.py refactored
This is an automated email from the ASF dual-hosted git repository.
mykolabodnar pushed a commit to branch DATALAB-2409
in repository https://gitbox.apache.org/repos/asf/incubator-datalab.git
The following commit(s) were added to refs/heads/DATALAB-2409 by this push:
new 8ecd2ce [DATALAB-2409] - fab.py refactored
8ecd2ce is described below
commit 8ecd2ce476caa4de0125f2b2cbef85608c5fdeea
Author: bodnarmykola <bo...@gmail.com>
AuthorDate: Mon Aug 9 11:56:59 2021 +0300
[DATALAB-2409] - fab.py refactored
---
.../src/base/scripts/install_prerequisites.py | 11 +-
.../src/general/conf/datalab.ini | 12 +
.../src/general/lib/os/fab.py | 1616 ++++++++++----------
.../general/scripts/aws/common_create_instance.py | 13 +-
.../scripts/aws/common_create_role_policy.py | 15 +-
.../scripts/aws/common_create_security_group.py | 18 +-
.../general/scripts/aws/common_create_subnet.py | 31 +-
.../scripts/aws/ssn_associate_elastic_ip.py | 12 +-
.../src/general/scripts/aws/ssn_configure.py | 2 +-
.../src/general/scripts/aws/ssn_create_endpoint.py | 19 +-
.../src/general/scripts/aws/ssn_create_vpc.py | 2 +-
.../scripts/aws/ssn_terminate_aws_resources.py | 8 +-
12 files changed, 916 insertions(+), 843 deletions(-)
diff --git a/infrastructure-provisioning/src/base/scripts/install_prerequisites.py b/infrastructure-provisioning/src/base/scripts/install_prerequisites.py
index b75ae24..354ad28 100644
--- a/infrastructure-provisioning/src/base/scripts/install_prerequisites.py
+++ b/infrastructure-provisioning/src/base/scripts/install_prerequisites.py
@@ -26,6 +26,7 @@ import json
import os
from datalab.common_lib import *
from datalab.fab import *
+from datalab.logger import logging
from fabric import *
from patchwork.files import exists
from patchwork import files
@@ -44,25 +45,25 @@ args = parser.parse_args()
if __name__ == "__main__":
- print("Configure connections")
+ logging.info("Configure connections")
global conn
conn = init_datalab_connection(args.hostname, args.user, args.keyfile)
deeper_config = json.loads(args.additional_config)
- print("Updating hosts file")
+ logging.info("Updating hosts file")
update_hosts_file(args.user)
- print("Updating repositories and installing requested tools.")
+ logging.info("Updating repositories and installing requested tools.")
try:
ensure_pkg(args.user)
except:
traceback.print_exc()
sys.exit(1)
- print("Installing python packages: {}".format(args.pip_packages))
+ logging.info("Installing python packages: {}".format(args.pip_packages))
ensure_pip(args.pip_packages)
- print("Installing NTPd")
+ logging.info("Installing NTPd")
ensure_ntpd(args.user, args.edge_private_ip)
conn.close()
diff --git a/infrastructure-provisioning/src/general/conf/datalab.ini b/infrastructure-provisioning/src/general/conf/datalab.ini
index 7a21340..5133f5c 100644
--- a/infrastructure-provisioning/src/general/conf/datalab.ini
+++ b/infrastructure-provisioning/src/general/conf/datalab.ini
@@ -106,6 +106,18 @@ awscli = 1.20.8
pymongo = 3.12.0
pyyaml = 5.4.1
jinja2 = 3.0.1
+ipython = 7.26.0
+ipykernel = 6.0.3
+numpy = 1.21.1
+scipy = 1.7.1
+matplotlib = 3.4.2
+pandas = 1.3.1
+sympy = 1.8
+pillow = 8.3.1
+scikit-learn = 0.24.2
+
+
+
#--- [aws] section contains all common parameters related to Amazon ---#
[aws]
diff --git a/infrastructure-provisioning/src/general/lib/os/fab.py b/infrastructure-provisioning/src/general/lib/os/fab.py
index cf33332..c360095 100644
--- a/infrastructure-provisioning/src/general/lib/os/fab.py
+++ b/infrastructure-provisioning/src/general/lib/os/fab.py
@@ -33,10 +33,171 @@ import subprocess
from datalab.actions_lib import *
from datalab.common_lib import *
from datalab.meta_lib import *
+from datalab.logger import logging
from fabric import *
from patchwork.files import exists
from patchwork import files
+
+# general functions for all resources
+def init_datalab_connection(hostname, username, keyfile):
+ try:
+ global conn
+ attempt = 0
+ while attempt < 15:
+ logging.info('connection attempt {}'.format(attempt))
+ conn = Connection(host=hostname, user=username, connect_kwargs={'banner_timeout': 200,
+ 'key_filename': keyfile})
+ conn.config.run.echo = True
+ try:
+ conn.run('ls')
+ conn.config.run.echo = True
+ return conn
+ except:
+ attempt += 1
+ time.sleep(10)
+ except Exception as err:
+ logging.error('Function init_datalab_connection error:', str(err))
+ traceback.print_exc()
+ sys.exit(1)
+
+
+def ensure_pip(requisites):
+ try:
+ if not exists(conn, '/home/{}/.ensure_dir/pip_path_added'.format(os.environ['conf_os_user'])):
+ conn.sudo('bash -l -c "echo PATH=$PATH:/usr/local/bin/:/opt/spark/bin/ >> /etc/profile"')
+ conn.sudo('bash -l -c "echo export PATH >> /etc/profile"')
+ conn.sudo('pip3 install -UI pip=={} --no-cache-dir'.format(os.environ['conf_pip_version']))
+ conn.sudo('pip3 install -U setuptools=={}'.format(os.environ['notebook_setuptools_version']))
+ conn.sudo('pip3 install -UI {} --no-cache-dir'.format(requisites))
+ conn.sudo('touch /home/{}/.ensure_dir/pip_path_added'.format(os.environ['conf_os_user']))
+ except Exception as err:
+ logging.error('Function ensure_pip error:', str(err))
+ traceback.print_exc()
+ sys.exit(1)
+
+
+def id_generator(size=10, chars=string.digits + string.ascii_letters):
+ return ''.join(random.choice(chars) for _ in range(size))
+
+
+def replace_multi_symbols(string, symbol, symbol_cut=False):
+ try:
+ symbol_amount = 0
+ for i in range(len(string)):
+ if string[i] == symbol:
+ symbol_amount = symbol_amount + 1
+ while symbol_amount > 1:
+ string = string.replace(symbol + symbol, symbol)
+ symbol_amount = symbol_amount - 1
+ if symbol_cut and string[-1] == symbol:
+ string = string[:-1]
+ return string
+ except Exception as err:
+ logging.error('Function replace_multi_symbols error:', str(err))
+ traceback.print_exc()
+ sys.exit(1)
+
+
+def append_result(error, exception=''):
+ try:
+ ts = time.time()
+ st = datetime.datetime.fromtimestamp(ts).strftime('%Y-%m-%d %H:%M:%S')
+ if exception:
+ error_message = "[Error-{}]: {}. Exception: {}".format(st, error, str(exception))
+ logging.error(error_message)
+ else:
+ error_message = "[Error-{}]: {}.".format(st, error)
+ logging.error(error_message)
+ with open('/root/result.json', 'a+') as f:
+ text = f.read()
+ if len(text) == 0:
+ res = '{"error": ""}'
+ with open('/root/result.json', 'w') as f:
+ f.write(res)
+ with open("/root/result.json") as f:
+ data = json.load(f)
+ data['error'] = data['error'] + error_message
+ with open("/root/result.json", 'w') as f:
+ json.dump(data, f)
+ logging.error(data)
+ except Exception as err:
+ logging.error('Function append_result error:', str(err))
+ traceback.print_exc()
+ sys.exit(1)
+
+
+def put_resource_status(resource, status, datalab_path, os_user, hostname):
+ try:
+ keyfile = os.environ['conf_key_dir'] + os.environ['conf_key_name'] + ".pem"
+ init_datalab_connection(hostname, os_user, keyfile)
+ conn.sudo(
+ 'python3 ' + datalab_path + 'tmp/resource_status.py --resource {} --status {}'.format(resource, status))
+ conn.close()
+ except Exception as err:
+ logging.error('Function put_resource_status error:', str(err))
+ traceback.print_exc()
+ sys.exit(1)
+
+
+def ensure_ciphers():
+ try:
+ conn.sudo(
+ '''bash -c "echo -e '\nKexAlgorithms curve25519-sha256@libssh.org,diffie-hellman-group-exchange-sha256' >> /etc/ssh/sshd_config"''')
+ conn.sudo(
+ '''bash -c "echo -e 'Ciphers aes256-gcm@openssh.com,aes128-gcm@openssh.com,chacha20-poly1305@openssh.com,aes256-ctr,aes192-ctr,aes128-ctr' >> /etc/ssh/sshd_config"''')
+ conn.sudo(
+ '''bash -c "echo -e '\tKexAlgorithms curve25519-sha256@libssh.org,diffie-hellman-group-exchange-sha256' >> /etc/ssh/ssh_config"''')
+ conn.sudo(
+ '''bash -c "echo -e '\tCiphers aes256-gcm@openssh.com,aes128-gcm@openssh.com,chacha20-poly1305@openssh.com,aes256-ctr,aes192-ctr,aes128-ctr' >> /etc/ssh/ssh_config"''')
+ try:
+ conn.sudo('service ssh reload')
+ except:
+ conn.sudo('service sshd reload')
+ except Exception as err:
+ logging.error('Function pensure_ciphers error:', str(err))
+ traceback.print_exc()
+ sys.exit(1)
+
+
+def manage_npm_pkg(command):
+ try:
+ npm_count = 0
+ installed = False
+ npm_registry = ['https://registry.npmjs.org/', 'https://registry.npmjs.com/']
+ while not installed:
+ if npm_count > 60:
+ logging.error("NPM registry is not available, please try later")
+ sys.exit(1)
+ else:
+ try:
+ if npm_count % 2 == 0:
+ conn.sudo('npm config set registry {}'.format(npm_registry[0]))
+ else:
+ conn.sudo('npm config set registry {}'.format(npm_registry[1]))
+ conn.sudo('{}'.format(command))
+ installed = True
+ except:
+ npm_count += 1
+ time.sleep(50)
+ except Exception as err:
+ logging.error('Function manage_npm_pkg error:', str(err))
+ traceback.print_exc()
+ sys.exit(1)
+
+
+def update_hosts_file(os_user):
+ try:
+ if not exists(conn, '/home/{}/.ensure_dir/hosts_file_updated'.format(os_user)):
+ conn.sudo('sed -i "s/^127.0.0.1 localhost/127.0.0.1 localhost localhost.localdomain/g" /etc/hosts')
+ conn.sudo('touch /home/{}/.ensure_dir/hosts_file_updated'.format(os_user))
+ except Exception as err:
+ logging.error('Function update_hosts_file error:', str(err))
+ traceback.print_exc()
+ sys.exit(1)
+
+
+# functions for all computation resources
def ensure_python_venv(python_venv_version):
try:
if not exists(conn, '/opt/python/python{}'.format(python_venv_version)):
@@ -60,47 +221,54 @@ def ensure_python_venv(python_venv_version):
conn.sudo('''bash -l -c 'virtualenv /opt/python/python{0}' '''.format(python_venv_version))
venv_command = 'source /opt/python/python{}/bin/activate'.format(python_venv_version)
pip_command = '/opt/python/python{0}/bin/pip{1}'.format(python_venv_version, python_venv_version[:3])
- conn.sudo('''bash -l -c '{0} && {1} install -U pip=={2}' '''.format(venv_command, pip_command, os.environ['conf_pip_version']))
- conn.sudo('''bash -l -c '{0} && {1} install ipython ipykernel --no-cache-dir' '''.format(venv_command, pip_command))
- conn.sudo('''bash -l -c '{0} && {1} install NumPy=={2} SciPy Matplotlib pandas Sympy Pillow sklearn --no-cache-dir' '''.format(venv_command, pip_command, os.environ['notebook_numpy_version']))
+ conn.sudo('''bash -l -c '{0} && {1} install -UI pip=={2}' '''.format(venv_command, pip_command,
+ os.environ['conf_pip_version']))
+ conn.sudo('''bash -l -c '{} && {} install -UI ipython=={} ipykernel=={} NumPy=={} SciPy=={} Matplotlib=={}
+ pandas=={} Sympy=={} Pillow=={} scikit-learn=={} --no-cache-dir' '''.format(venv_command, pip_command,
+ os.environ[
+ 'pip_packages_ipython'],
+ os.environ[
+ 'pip_packagesipykernel'],
+ os.environ[
+ 'pip_packages_numpy'],
+ os.environ[
+ 'pip_packages_scipy'],
+ os.environ[
+ 'pip_packages_matplotlib'],
+ os.environ[
+ 'pip_packages_pandas'],
+ os.environ[
+ 'pip_packages_sympy'],
+ os.environ[
+ 'pip_packages_pillow'],
+ os.environ[
+ 'pip_packages_scikit-learn']))
except Exception as err:
- print('Error:', str(err))
+ logging.error('Function ensure_python_venv error:', str(err))
+ traceback.print_exc()
sys.exit(1)
-def install_venv_pip_pkg(pkg_name, pkg_version = ''):
+
+def install_venv_pip_pkg(pkg_name, pkg_version=''):
try:
venv_install_command = 'source /opt/python/python{0}/bin/activate && /opt/python/python{0}/bin/pip{1}'.format(
os.environ['notebook_python_venv_version'], os.environ['notebook_python_venv_version'][:3])
if pkg_version:
- pip_pkg = '{}=={}'.format(pkg_name,pkg_version)
+ pip_pkg = '{}=={}'.format(pkg_name, pkg_version)
else:
pip_pkg = pkg_name
- conn.sudo('''bash -l -c '{0} install {1} --no-cache-dir' '''.format(venv_install_command, pip_pkg))
+ conn.sudo('''bash -l -c '{0} install -U {1} --no-cache-dir' '''.format(venv_install_command, pip_pkg))
except Exception as err:
- print('Error:', str(err))
- sys.exit(1)
-
-def ensure_pip(requisites):
- try:
- if not exists(conn,'/home/{}/.ensure_dir/pip_path_added'.format(os.environ['conf_os_user'])):
- conn.sudo('bash -l -c "echo PATH=$PATH:/usr/local/bin/:/opt/spark/bin/ >> /etc/profile"')
- conn.sudo('bash -l -c "echo export PATH >> /etc/profile"')
- conn.sudo('pip3 install -UI pip=={} --no-cache-dir'.format(os.environ['conf_pip_version']))
- conn.sudo('pip3 install -U setuptools=={}'.format(os.environ['notebook_setuptools_version']))
- conn.sudo('pip3 install -UI {} --no-cache-dir'.format(requisites))
- conn.sudo('touch /home/{}/.ensure_dir/pip_path_added'.format(os.environ['conf_os_user']))
- except:
+ logging.error('Function install_venv_pip_pkg error:', str(err))
+ traceback.print_exc()
sys.exit(1)
-def dataengine_dir_prepare(cluster_dir):
- subprocess.run('mkdir -p ' + cluster_dir, shell=True, check=True)
-
-
-def install_pip_pkg(requisites, pip_version, lib_group, dataengine_service = False):
+def install_pip_pkg(requisites, pip_version, lib_group, dataengine_service=False):
status = list()
- error_parser = "Could not|No matching|ImportError:|failed|EnvironmentError:|requires|FileNotFoundError:|RuntimeError:|error:"
+ error_parser = "Could not|No matching|ImportError:|failed|EnvironmentError:|requires|FileNotFoundError:|" \
+ "RuntimeError:|error:"
try:
if dataengine_service:
install_command = pip_version
@@ -109,13 +277,6 @@ def install_pip_pkg(requisites, pip_version, lib_group, dataengine_service = Fal
else:
install_command = 'source /opt/python/python{0}/bin/activate && /opt/python/python{0}/bin/pip{1}'.format(
os.environ['notebook_python_venv_version'], os.environ['notebook_python_venv_version'][:3])
- #if pip_version == 'pip3' and not exists(conn, '/bin/pip3'):
- # for v in range(4, 8):
- # if exists(conn, '/bin/pip3.{}'.format(v)):
- # conn.sudo('ln -s /bin/pip3.{} /bin/pip3'.format(v))
- #conn.sudo('{} install -U pip=={} setuptools=={}'.format(pip_version, os.environ['conf_pip_version'], os.environ['notebook_setuptools_version']))
- #conn.sudo('{} install -U pip=={} --no-cache-dir'.format(pip_version, os.environ['conf_pip_version']))
- #conn.sudo('{} install --upgrade pip=={}'.format(pip_version, os.environ['conf_pip_version']))
for pip_pkg in requisites:
name, vers = pip_pkg
if pip_pkg[1] == '' or pip_pkg[1] == 'N/A':
@@ -168,8 +329,8 @@ def install_pip_pkg(requisites, pip_version, lib_group, dataengine_service = Fal
[i for i in ver if pip_pkg.split("==")[0].lower() in i][0].split('==')[1]
status_msg = "installed"
versions = []
- if 'Could not find a version that satisfies the requirement' in err and 'ERROR: No matching distribution found for {}=='.format(
- name) in err:
+ if 'Could not find a version that satisfies the requirement' in err \
+ and 'ERROR: No matching distribution found for {}=='.format(name) in err:
versions = err[err.find("(from versions: ") + 16: err.find(") ")]
if versions != '' and versions != 'none':
versions = versions.split(', ')
@@ -180,7 +341,8 @@ def install_pip_pkg(requisites, pip_version, lib_group, dataengine_service = Fal
conn.sudo('cat /tmp/{0}_install_{1}.tmp | if ! grep -w -i -E "Installing collected packages:" > '
'/tmp/{0}_install_{1}.dep; then echo "" > /tmp/{0}_install_{1}.dep;fi'.format(pip_version, name))
- dep = conn.sudo('cat /tmp/{0}_install_{1}.dep'.format(pip_version, name)).stdout.replace('\n', '').strip()[31:]
+ dep = conn.sudo('cat /tmp/{0}_install_{1}.dep'.format(pip_version, name)).stdout.replace('\n', '').strip()[
+ 31:]
if dep == '':
dep = []
else:
@@ -190,8 +352,10 @@ def install_pip_pkg(requisites, pip_version, lib_group, dataengine_service = Fal
dep[n] = ''
else:
conn.sudo('{0} show {1} 2>&1 | if ! grep Version: > '
- '/tmp/{0}_install_{1}.log; then echo "" > /tmp/{0}_install_{1}.log;fi'.format(pip_version, i))
- dep[n] = conn.sudo('cat /tmp/{0}_install_{1}.log'.format(pip_version, i)).stdout.replace('\n', '').replace('Version: ', '{} v.'.format(i))
+ '/tmp/{0}_install_{1}.log; then echo "" > /tmp/{0}_install_{1}.log;fi'.format(
+ pip_version, i))
+ dep[n] = conn.sudo('cat /tmp/{0}_install_{1}.log'.format(
+ pip_version, i)).stdout.replace('\n', '').replace('Version: ', '{} v.'.format(i))
dep = [i for i in dep if i]
status.append({"group": lib_group, "name": name, "version": version, "status": status_msg,
"error_message": err, "available_versions": versions, "add_pkgs": dep})
@@ -200,489 +364,169 @@ def install_pip_pkg(requisites, pip_version, lib_group, dataengine_service = Fal
except Exception as err:
for pip_pkg in requisites:
name, vers = pip_pkg
- status.append({"group": lib_group, "name": name, "version": vers, "status": 'installation_error', "error_message": err})
- print("Failed to install {} packages: {}".format(pip_version, err))
+ status.append({"group": lib_group, "name": name, "version": vers, "status": 'installation_error',
+ "error_message": err})
+ logging.error("Failed to install {} packages: {}".format(pip_version, err))
+ traceback.print_exc()
return status
-
-def id_generator(size=10, chars=string.digits + string.ascii_letters):
- return ''.join(random.choice(chars) for _ in range(size))
-
-
-def ensure_dataengine_tensorflow_jars(jars_dir):
- subprocess.run('wget https://dl.bintray.com/spark-packages/maven/tapanalyticstoolkit/spark-tensorflow-connector/1.0.0-s_2.11/spark-tensorflow-connector-1.0.0-s_2.11.jar \
- -O {}spark-tensorflow-connector-1.0.0-s_2.11.jar'.format(jars_dir), shell=True, check=True)
-
-
-def prepare(dataengine_service_dir, yarn_dir):
- subprocess.run('mkdir -p ' + dataengine_service_dir, shell=True, check=True)
- subprocess.run('mkdir -p ' + yarn_dir, shell=True, check=True)
- subprocess.run('sudo mkdir -p /opt/python/', shell=True, check=True)
- result = os.path.exists(dataengine_service_dir + 'usr/')
- return result
+def install_r_pkg(requisites):
+ status = list()
+ error_parser = "ERROR:|error:|Cannot|failed|Please run|requires|Error|Skipping|couldn't find"
+ if os.environ['conf_resource'] == 'dataengine-service':
+ ensure_dataengine_service_devtools()
+ try:
+ for r_pkg in requisites:
+ name, vers = r_pkg
+ version = vers
+ if vers == 'N/A':
+ vers = ''
+ else:
+ vers = '"{}"'.format(vers)
+ if name == 'sparklyr':
+ conn.run(
+ 'sudo -i R -e \'devtools::install_version("{0}", version = {1}, repos = "http://cran.us.r-project.org", '
+ 'dependencies = NA)\' 2>&1 | tee /tmp/install_{0}.tmp; if ! grep -w -E "({2})" /tmp/install_{0}.tmp '
+ '> /tmp/install_{0}.log; then echo "" > /tmp/install_{0}.log;fi'.format(name, vers, error_parser))
+ else:
+ conn.sudo(
+ 'R -e \'devtools::install_version("{0}", version = {1}, repos = "https://cloud.r-project.org", '
+ 'dependencies = NA)\' 2>&1 | tee /tmp/install_{0}.tmp; if ! grep -w -E "({2})" /tmp/install_{0}.tmp > '
+ '/tmp/install_{0}.log; then echo "" > /tmp/install_{0}.log;fi'.format(name, vers, error_parser))
+ dep = conn.sudo('grep "(NA.*->". /tmp/install_' + name + '.tmp | awk \'{print $1}\'').stdout.replace('\n',
+ ' ')
+ dep_ver = conn.sudo('grep "(NA.*->". /tmp/install_' + name + '.tmp | awk \'{print $4}\'').stdout.replace(
+ '\n', ' ').replace(')', '').split(' ')
+ if dep == '':
+ dep = []
+ else:
+ dep = dep.split(' ')
+ for n, i in enumerate(dep):
+ if i == name:
+ dep[n] = ''
+ else:
+ dep[n] = '{} v.{}'.format(dep[n], dep_ver[n])
+ dep = [i for i in dep if i]
+ conn.sudo('hostname')
+ err = conn.sudo('cat /tmp/install_{0}.log'.format(name)).stdout.replace('"', "'").replace('\n', '')
+ conn.sudo(
+ 'R -e \'installed.packages()[,c(3:4)]\' | if ! grep -w {0} > /tmp/install_{0}.list; then echo "" > /tmp/install_{0}.list;fi'.format(
+ name))
+ res = conn.sudo('cat /tmp/install_{0}.list'.format(name)).stdout.replace('\n', '')
+ if err:
+ status_msg = 'installation_error'
+ if 'couldn\'t find package \'{}\''.format(name) in err:
+ status_msg = 'invalid_name'
+ elif res:
+ ansi_escape = re.compile(r'\x1b[^m]*m')
+ version = ansi_escape.sub('', res).split("\n")[0].split('"')[1]
+ status_msg = 'installed'
+ if 'Error in download_version_url(package, version, repos, type) :' in err or 'Error in parse_deps(paste(spec,' in err:
+ conn.sudo('R -e \'install.packages("versions", repos="https://cloud.r-project.org", dep=TRUE)\'')
+ versions = conn.sudo('R -e \'library(versions); available.versions("' + name + '")\' 2>&1 | grep -A 50 '
+ '\'date available\' | awk \'{print $2}\'').stdout.strip().replace(
+ '\n', ' ')[5:].split(' ')
+ if versions != ['']:
+ status_msg = 'invalid_version'
+ else:
+ versions = []
+ else:
+ versions = []
+ status.append(
+ {"group": "r_pkg", "name": name, "version": version, "status": status_msg, "error_message": err,
+ "available_versions": versions, "add_pkgs": dep})
+ conn.sudo('rm /tmp/*{}*'.format(name))
+ return status
+ except Exception as err:
+ for r_pkg in requisites:
+ name, vers = r_pkg
+ status.append(
+ {"group": "r_pkg", "name": name, "version": vers, "status": 'installation_error', "error_message": err})
+ logging.error("Failed to install R packages:", err)
+ traceback.print_exc()
+ return status
-def configuring_notebook(dataengine_service_version):
- jars_path = '/opt/' + dataengine_service_version + '/jars/'
- subprocess.run("""sudo bash -c "find """ + jars_path + """ -name '*netty*' | xargs rm -f" """, shell=True, check=True)
+def update_spark_jars(jars_dir='/opt/jars'):
+ try:
+ configs = conn.sudo('find /opt/ /etc/ /usr/lib/ -name spark-defaults.conf -type f').stdout.split('\n')
+ if exists(conn, jars_dir):
+ for conf in filter(None, configs):
+ des_path = ''
+ all_jars = conn.sudo('find {0} -name "*.jar"'.format(jars_dir)).stdout.split('\n')
+ if ('-des-' in conf):
+ des_path = '/'.join(conf.split('/')[:3])
+ all_jars = find_des_jars(all_jars, des_path)
+ conn.sudo('''sed -i '/^# Generated\|^spark.jars/d' {0}'''.format(conf))
+ conn.sudo(''' bash -l -c 'echo "# Generated spark.jars by DataLab from {0}\nspark.jars {1}" >> {2}' '''
+ .format(','.join(filter(None, [jars_dir, des_path])), ','.join(all_jars), conf))
+ # conn.sudo("sed -i 's/^[[:space:]]*//' {0}".format(conf))
+ else:
+ logging.info("Can't find directory {0} with jar files".format(jars_dir))
+ except Exception as err:
+ logging.error('Function update_spark_jars error:', str(err))
+ traceback.print_exc()
+ sys.exit(1)
-def append_result(error, exception=''):
- ts = time.time()
- st = datetime.datetime.fromtimestamp(ts).strftime('%Y-%m-%d %H:%M:%S')
- if exception:
- error_message = "[Error-{}]: {}. Exception: {}".format(st, error, str(exception))
- print(error_message)
- else:
- error_message = "[Error-{}]: {}.".format(st, error)
- print(error_message)
- with open('/root/result.json', 'a+') as f:
- text = f.read()
- if len(text) == 0:
- res = '{"error": ""}'
- with open('/root/result.json', 'w') as f:
- f.write(res)
- with open("/root/result.json") as f:
- data = json.load(f)
- data['error'] = data['error'] + error_message
- with open("/root/result.json", 'w') as f:
- json.dump(data, f)
- print(data)
+def install_java_pkg(requisites):
+ status = list()
+ error_parser = "ERROR|error|No such|no such|Please run|requires|module not found|Exception"
+ templates_dir = '/root/templates/'
+ ivy_dir = '/opt/ivy'
+ ivy_cache_dir = '{0}/cache/'.format(ivy_dir)
+ ivy_settings = 'ivysettings.xml'
+ dest_dir = '/opt/jars/java'
+ try:
+ ivy_jar = conn.sudo('find /opt /usr -name "*ivy-{0}.jar" | head -n 1'.format(
+ os.environ['notebook_ivy_version'])).stdout.replace('\n', '')
+ conn.sudo('mkdir -p {0} {1}'.format(ivy_dir, dest_dir))
+ conn.put('{0}{1}'.format(templates_dir, ivy_settings), '/tmp/{}'.format(ivy_settings))
+ conn.sudo('cp -f /tmp/{1} {0}/{1}'.format(ivy_dir, ivy_settings))
+ proxy_string = conn.sudo('cat /etc/profile | grep http_proxy | cut -f2 -d"="').stdout.replace('\n', '')
+ proxy_re = '(?P<proto>http.*)://(?P<host>[^:/ ]+):(?P<port>[0-9]*)'
+ proxy_find = re.search(proxy_re, proxy_string)
+ java_proxy = "export _JAVA_OPTIONS='-Dhttp.proxyHost={0} -Dhttp.proxyPort={1} \
+ -Dhttps.proxyHost={0} -Dhttps.proxyPort={1}'".format(proxy_find.group('host'), proxy_find.group('port'))
+ for java_pkg in requisites:
+ conn.sudo('rm -rf {0}'.format(ivy_cache_dir))
+ conn.sudo('mkdir -p {0}'.format(ivy_cache_dir))
+ group, artifact, version, override = java_pkg
+ logging.info("Installing package (override: {3}): {0}:{1}:{2}".format(group, artifact, version, override))
+ conn.sudo(
+ '''bash -c "{8}; java -jar {0} -settings {1}/{2} -cache {3} -dependency {4} {5} {6} 2>&1 | tee /tmp/install_{5}.tmp; if ! grep -w -E \\"({7})\\" /tmp/install_{5}.tmp > /tmp/install_{5}.log; then echo \\"\\" > /tmp/install_{5}.log;fi" '''.format(
+ ivy_jar, ivy_dir, ivy_settings, ivy_cache_dir, group, artifact, version, error_parser, java_proxy))
+ err = conn.sudo('cat /tmp/install_{0}.log'.format(artifact)).stdout.replace('"', "'").strip()
+ conn.sudo('find {0} -name "{1}*.jar" | head -n 1 | rev | cut -f1 -d "/" | rev | \
+ if ! grep -w -i {1} > /tmp/install_{1}.list; then echo "" > /tmp/install_{1}.list;fi'.format(
+ ivy_cache_dir, artifact))
+ res = conn.sudo('cat /tmp/install_{0}.list'.format(artifact)).stdout.replace('\n', '')
+ if res:
+ conn.sudo('cp -f $(find {0} -name "*.jar" | xargs) {1}'.format(ivy_cache_dir, dest_dir))
+ status.append({"group": "java", "name": "{0}:{1}".format(group, artifact), "version": version,
+ "status": "installed"})
+ else:
+ status.append(
+ {"group": "java", "name": "{0}:{1}".format(group, artifact), "status": "installation_error",
+ "error_message": err})
+ update_spark_jars()
+ conn.sudo('rm -rf /tmp/*{}*'.format(artifact))
+ return status
+ except Exception as err:
+ for java_pkg in requisites:
+ group, artifact, version, override = java_pkg
+ status.append({"group": "java", "name": "{0}:{1}".format(group, artifact), "status": "installation_error",
+ "error_message": err})
+ logging.error("Failed to install {} packages".format(requisites))
+ traceback.print_exc()
+ return status
-def put_resource_status(resource, status, datalab_path, os_user, hostname):
- keyfile = os.environ['conf_key_dir'] + os.environ['conf_key_name'] + ".pem"
- init_datalab_connection(hostname, os_user, keyfile)
- conn.sudo('python3 ' + datalab_path + 'tmp/resource_status.py --resource {} --status {}'.format(resource, status))
- conn.close()
-
-
-def configure_jupyter(os_user, jupyter_conf_file, templates_dir, jupyter_version, exploratory_name):
- if not exists(conn,'/home/' + os_user + '/.ensure_dir/jupyter_ensured'):
- try:
- if os.environ['conf_deeplearning_cloud_ami'] == 'false' or os.environ['application'] != 'deeplearning':
- conn.sudo('pip3 install notebook=={} --no-cache-dir'.format(jupyter_version))
- conn.sudo('pip3 install jupyter --no-cache-dir')
- conn.sudo('rm -rf {}'.format(jupyter_conf_file))
- conn.run('jupyter notebook --generate-config --config {}'.format(jupyter_conf_file))
- conn.run('mkdir -p ~/.jupyter/custom/')
- conn.run('echo "#notebook-container { width: auto; }" > ~/.jupyter/custom/custom.css')
- conn.sudo('echo "c.NotebookApp.ip = \'0.0.0.0\'" >> {}'.format(jupyter_conf_file))
- conn.sudo('echo "c.NotebookApp.base_url = \'/{0}/\'" >> {1}'.format(exploratory_name, jupyter_conf_file))
- conn.sudo('echo c.NotebookApp.open_browser = False >> {}'.format(jupyter_conf_file))
- conn.sudo('echo \'c.NotebookApp.cookie_secret = b"{0}"\' >> {1}'.format(id_generator(), jupyter_conf_file))
- conn.sudo('''echo "c.NotebookApp.token = u''" >> {}'''.format(jupyter_conf_file))
- conn.sudo('echo \'c.KernelSpecManager.ensure_native_kernel = False\' >> {}'.format(jupyter_conf_file))
- if os.environ['conf_deeplearning_cloud_ami'] == 'true' and os.environ['application'] == 'deeplearning':
- conn.sudo(
- '''echo "c.NotebookApp.kernel_spec_manager_class = 'environment_kernels.EnvironmentKernelSpecManager'" >> {}'''.format(
- jupyter_conf_file))
- conn.sudo(
- '''echo "c.EnvironmentKernelSpecManager.conda_env_dirs=['/home/ubuntu/anaconda3/envs']" >> {}'''.format(
- jupyter_conf_file))
- conn.put(templates_dir + 'jupyter-notebook.service', '/tmp/jupyter-notebook.service')
- conn.sudo("chmod 644 /tmp/jupyter-notebook.service")
- if os.environ['application'] == 'tensor':
- conn.sudo("sed -i '/ExecStart/s|-c \"|-c \"export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/opt/cudnn/lib64:/usr/local/cuda/lib64; |g' /tmp/jupyter-notebook.service")
- elif os.environ['application'] == 'deeplearning' and os.environ['conf_deeplearning_cloud_ami'] == 'false':
- conn.sudo("sed -i '/ExecStart/s|-c \"|-c \"export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/opt/cudnn/lib64:/usr/local/cuda/lib64:/usr/lib64/openmpi/lib: ; export PYTHONPATH=/home/" + os_user +
- "/caffe/python:/home/" + os_user + "/pytorch/build:$PYTHONPATH ; |g' /tmp/jupyter-notebook.service")
- conn.sudo("sed -i 's|CONF_PATH|{}|' /tmp/jupyter-notebook.service".format(jupyter_conf_file))
- conn.sudo("sed -i 's|OS_USR|{}|' /tmp/jupyter-notebook.service".format(os_user))
- http_proxy = conn.run('''bash -l -c 'echo $http_proxy' ''').stdout.replace('\n','')
- https_proxy = conn.run('''bash -l -c 'echo $https_proxy' ''').stdout.replace('\n','')
- #sudo('sed -i \'/\[Service\]/ a\Environment=\"HTTP_PROXY={}\"\' /tmp/jupyter-notebook.service'.format(
- # http_proxy))
- #sudo('sed -i \'/\[Service\]/ a\Environment=\"HTTPS_PROXY={}\"\' /tmp/jupyter-notebook.service'.format(
- # https_proxy))
- java_home = conn.run("update-alternatives --query java | grep -o --color=never \'/.*/java-8.*/jre\'").stdout.splitlines()[0]
- conn.sudo('sed -i \'/\[Service\]/ a\Environment=\"JAVA_HOME={}\"\' /tmp/jupyter-notebook.service'.format(
- java_home))
- conn.sudo('\cp /tmp/jupyter-notebook.service /etc/systemd/system/jupyter-notebook.service')
- conn.sudo('chown -R {0}:{0} /home/{0}/.local'.format(os_user))
- conn.sudo('mkdir -p /mnt/var')
- conn.sudo('chown {0}:{0} /mnt/var'.format(os_user))
- if os.environ['application'] == 'jupyter' or os.environ['application'] == 'deeplearning':
- try:
- conn.sudo('jupyter-kernelspec remove -f python3 || echo "Such kernel doesnt exists"')
- conn.sudo('jupyter-kernelspec remove -f python2 || echo "Such kernel doesnt exists"')
- except Exception as err:
- print('Error:', str(err))
- conn.sudo("systemctl daemon-reload")
- conn.sudo("systemctl enable jupyter-notebook")
- conn.sudo("systemctl start jupyter-notebook")
- conn.sudo('touch /home/{}/.ensure_dir/jupyter_ensured'.format(os_user))
- except:
- sys.exit(1)
- else:
- try:
- conn.sudo(
- 'sed -i "s/c.NotebookApp.base_url =.*/c.NotebookApp.base_url = \'\/{0}\/\'/" {1}'.format(exploratory_name, jupyter_conf_file))
- conn.sudo("systemctl restart jupyter-notebook")
- except Exception as err:
- print('Error:', str(err))
- sys.exit(1)
-
-def remove_unexisting_kernel(os_user):
- if not exists(conn,'/home/{}/.ensure_dir/unexisting_kernel_removed'.format(os_user)):
- try:
- conn.sudo('jupyter-kernelspec remove -f python3')
- conn.sudo('touch /home/{}/.ensure_dir/unexisting_kernel_removed'.format(os_user))
- except Exception as err:
- print('Error:', str(err))
- sys.exit(1)
-
-def configure_docker(os_user):
- try:
- if not exists(conn,'/home/' + os_user + '/.ensure_dir/docker_ensured'):
- docker_version = os.environ['ssn_docker_version']
- conn.sudo('curl -fsSL https://download.docker.com/linux/ubuntu/gpg | sudo apt-key add -')
- conn.sudo('add-apt-repository "deb [arch=amd64] https://download.docker.com/linux/ubuntu $(lsb_release -cs) \
- stable"')
- #datalab.common_lib.manage_pkg('update', 'remote', '')
- conn.sudo('apt-get update')
- conn.sudo('apt-cache policy docker-ce')
- #datalab.common_lib.manage_pkg('-y install', 'remote', 'docker-ce=5:{}~3-0~ubuntu-focal'.format(docker_version))
- conn.sudo('apt-get install -y docker-ce=5:{}~3-0~ubuntu-focal'.format(docker_version))
- conn.sudo('touch /home/{}/.ensure_dir/docker_ensured'.format(os_user))
- except Exception as err:
- print('Failed to configure Docker:', str(err))
- sys.exit(1)
-
-def ensure_jupyterlab_files(os_user, jupyterlab_dir, jupyterlab_image, jupyter_conf_file, jupyterlab_conf_file, exploratory_name, edge_ip):
- if not exists(conn,jupyterlab_dir):
- try:
- conn.sudo('mkdir {}'.format(jupyterlab_dir))
-# conn.put(templates_dir + 'pyspark_local_template.json', '/tmp/pyspark_local_template.json')
-# conn.put(templates_dir + 'py3spark_local_template.json', '/tmp/py3spark_local_template.json')
- conn.put('/root/Dockerfile_jupyterlab', '/tmp/Dockerfile_jupyterlab')
- conn.put('/root/scripts/jupyterlab_run.sh', '/tmp/jupyterlab_run.sh')
- conn.put('/root/scripts/build.sh', '/tmp/build.sh')
- conn.put('/root/scripts/start.sh', '/tmp/start.sh')
-# conn.sudo('\cp /tmp/pyspark_local_template.json ' + jupyterlab_dir + 'pyspark_local_template.json')
-# conn.sudo('\cp /tmp/py3spark_local_template.json ' + jupyterlab_dir + 'py3spark_local_template.json')
-# conn.sudo('sed -i \'s/3.5/3.6/g\' {}py3spark_local_template.json'.format(jupyterlab_dir))
- conn.sudo('mv /tmp/jupyterlab_run.sh {}jupyterlab_run.sh'.format(jupyterlab_dir))
- conn.sudo('mv /tmp/Dockerfile_jupyterlab {}Dockerfile_jupyterlab'.format(jupyterlab_dir))
- conn.sudo('mv /tmp/build.sh {}build.sh'.format(jupyterlab_dir))
- conn.sudo('mv /tmp/start.sh {}start.sh'.format(jupyterlab_dir))
-# conn.sudo('sed -i \'s/nb_user/{}/g\' {}Dockerfile_jupyterlab'.format(os_user, jupyterlab_dir))
- conn.sudo('sed -i \'s/jupyterlab_image/{}/g\' {}Dockerfile_jupyterlab'.format(jupyterlab_image, jupyterlab_dir))
- conn.sudo('sed -i \'s/nb_user/{}/g\' {}start.sh'.format(os_user, jupyterlab_dir))
-# conn.sudo('sed -i \'s/jup_version/{}/g\' {}Dockerfile_jupyterlab'.format(jupyter_version, jupyterlab_dir))
-# conn.sudo('sed -i \'s/hadoop_version/{}/g\' {}Dockerfile_jupyterlab'.format(os.environ['notebook_hadoop_version'], jupyterlab_dir))
-# conn.sudo('sed -i \'s/tornado_version/{}/g\' {}Dockerfile_jupyterlab'.format(os.environ['notebook_tornado_version'], jupyterlab_dir))
-# conn.sudo('sed -i \'s/matplotlib_version/{}/g\' {}Dockerfile_jupyterlab'.format(os.environ['notebook_matplotlib_version'], jupyterlab_dir))
-# conn.sudo('sed -i \'s/numpy_version/{}/g\' {}Dockerfile_jupyterlab'.format(os.environ['notebook_numpy_version'], jupyterlab_dir))
-# conn.sudo('sed -i \'s/spark_version/{}/g\' {}Dockerfile_jupyterlab'.format(os.environ['notebook_spark_version'], jupyterlab_dir))
-# conn.sudo('sed -i \'s/scala_version/{}/g\' {}Dockerfile_jupyterlab'.format(os.environ['notebook_scala_version'], jupyterlab_dir))
- conn.sudo('sed -i \'s/CONF_PATH/{}/g\' {}jupyterlab_run.sh'.format(jupyterlab_conf_file, jupyterlab_dir))
- conn.sudo('touch {}'.format(jupyter_conf_file))
- conn.sudo('''bash -l -c "echo 'c.NotebookApp.ip = \\"0.0.0.0\\" ' >> {}" '''.format(jupyter_conf_file))
- conn.sudo('''bash -l -c "echo 'c.NotebookApp.base_url = \\"/{0}/\\"' >> {1}" '''.format(exploratory_name, jupyter_conf_file))
- conn.sudo('''bash -l -c 'echo "c.NotebookApp.open_browser = False" >> {}' '''.format(jupyter_conf_file))
- conn.sudo('''bash -l -c "echo 'c.NotebookApp.cookie_secret = b\\"{0}\\"' >> {1}" '''.format(id_generator(), jupyter_conf_file))
- conn.sudo('''bash -l -c "echo \\"c.NotebookApp.token = u''\\" >> {}" '''.format(jupyter_conf_file))
- conn.sudo('''bash -l -c 'echo "c.KernelSpecManager.ensure_native_kernel = False" >> {}' '''.format(jupyter_conf_file))
- conn.sudo('chown datalab-user:datalab-user /opt')
- conn.sudo('''bash -l -c 'echo -e "Host git.epam.com\n HostName git.epam.com\n ProxyCommand nc -X connect -x {}:3128 %h %p\n" > /home/{}/.ssh/config' '''.format(
- edge_ip, os_user))
- conn.sudo('''bash -l -c 'echo -e "Host github.com\n HostName github.com\n ProxyCommand nc -X connect -x {}:3128 %h %p" >> /home/{}/.ssh/config' '''.format(edge_ip, os_user))
-# conn.sudo('touch {}'.format(spark_script))
-# conn.sudo('echo "#!/bin/bash" >> {}'.format(spark_script))
-# conn.sudo(
-# 'echo "PYJ=\`find /opt/spark/ -name \'*py4j*.zip\' | tr \'\\n\' \':\' | sed \'s|:$||g\'\`; sed -i \'s|PY4J|\'$PYJ\'|g\' /tmp/pyspark_local_template.json" >> {}'.format(
-# spark_script))
- # conn.sudo(
- # 'echo "sed -i \'14s/:",/:\\/home\\/datalab-user\\/caffe\\/python:\\/home\\/datalab-user\\/pytorch\\/build:",/\' /tmp/pyspark_local_template.json" >> {}'.format(
- # spark_script))
-# conn.sudo('echo \'sed -i "s|SP_VER|{}|g" /tmp/pyspark_local_template.json\' >> {}'.format(os.environ['notebook_spark_version'], spark_script))
-# conn.sudo(
-# 'echo "PYJ=\`find /opt/spark/ -name \'*py4j*.zip\' | tr \'\\n\' \':\' | sed \'s|:$||g\'\`; sed -i \'s|PY4J|\'$PYJ\'|g\' /tmp/py3spark_local_template.json" >> {}'.format(
-# spark_script))
- # conn.sudo(
- # 'echo "sed -i \'14s/:",/:\\/home\\/datalab-user\\/caffe\\/python:\\/home\\/datalab-user\\/pytorch\\/build:",/\' /tmp/py3spark_local_template.json" >> {}'.format(
- # spark_script))
-# conn.sudo('echo \'sed -i "s|SP_VER|{}|g" /tmp/py3spark_local_template.json\' >> {}'.format(os.environ['notebook_spark_version'], spark_script))
-# conn.sudo('echo "cp /tmp/pyspark_local_template.json /home/{}/.local/share/jupyter/kernels/pyspark_local/kernel.json" >> {}'.format(os_user, spark_script))
-# conn.sudo(
-# 'echo "cp /tmp/py3spark_local_template.json /home/{}/.local/share/jupyter/kernels/py3spark_local/kernel.json" >> {}'.format(
-# os_user, spark_script))
-# conn.sudo('git clone https://github.com/legion-platform/legion.git')
-# conn.sudo('cp {}sdk/Pipfile {}sdk_Pipfile'.format(legion_dir, jupyterlab_dir))
-# conn.sudo('cp {}sdk/Pipfile.lock {}sdk_Pipfile.lock'.format(legion_dir, jupyterlab_dir))
-# conn.sudo('cp {}toolchains/python/Pipfile {}toolchains_Pipfile'.format(legion_dir, jupyterlab_dir))
-# conn.sudo('cp {}toolchains/python/Pipfile.lock {}toolchains_Pipfile.lock'.format(legion_dir, jupyterlab_dir))
-# conn.sudo('cp {}cli/Pipfile {}cli_Pipfile'.format(legion_dir, jupyterlab_dir))
-# conn.sudo('cp {}cli/Pipfile.lock {}cli_Pipfile.lock'.format(legion_dir, jupyterlab_dir))
-# conn.sudo('cp -r {}sdk {}sdk'.format(legion_dir, jupyterlab_dir))
-# conn.sudo('cp -r {}toolchains/python {}toolchains_python'.format(legion_dir, jupyterlab_dir))
-# conn.sudo('cp -r {}cli {}cli'.format(legion_dir, jupyterlab_dir))
- except Exception as err:
- print('Error:', str(err))
- sys.exit(1)
- else:
- try:
- conn.sudo(
- 'sed -i "s/c.NotebookApp.base_url =.*/c.NotebookApp.base_url = \'\/{0}\/\'/" {1}'.format(
- exploratory_name, jupyter_conf_file))
- except Exception as err:
- print('Error:', str(err))
- sys.exit(1)
-
-
-def ensure_pyspark_local_kernel(os_user, pyspark_local_path_dir, templates_dir, spark_version):
- if not exists(conn,'/home/' + os_user + '/.ensure_dir/pyspark_local_kernel_ensured'):
- try:
- conn.sudo('mkdir -p ' + pyspark_local_path_dir)
- conn.sudo('touch ' + pyspark_local_path_dir + 'kernel.json')
- conn.put(templates_dir + 'pyspark_local_template.json', '/tmp/pyspark_local_template.json')
- conn.sudo('''bash -l -c "PYJ=`find /opt/spark/ -name '*py4j*.zip' | tr '\\n' ':' | sed 's|:$||g'`; sed -i 's|PY4J|'$PYJ'|g' /tmp/pyspark_local_template.json" ''')
- conn.sudo('sed -i "s|SP_VER|' + spark_version + '|g" /tmp/pyspark_local_template.json')
- conn.sudo('sed -i \'/PYTHONPATH\"\:/s|\(.*\)"|\\1/home/{0}/caffe/python:/home/{0}/pytorch/build:"|\' /tmp/pyspark_local_template.json'.format(os_user))
- conn.sudo('\cp /tmp/pyspark_local_template.json ' + pyspark_local_path_dir + 'kernel.json')
- conn.sudo('touch /home/' + os_user + '/.ensure_dir/pyspark_local_kernel_ensured')
- except:
- sys.exit(1)
-
-
-def ensure_py3spark_local_kernel(os_user, py3spark_local_path_dir, templates_dir, spark_version, python_venv_path, python_venv_version):
- if not exists(conn,'/home/' + os_user + '/.ensure_dir/py3spark_local_kernel_ensured'):
- try:
- conn.sudo('mkdir -p ' + py3spark_local_path_dir)
- conn.sudo('touch ' + py3spark_local_path_dir + 'kernel.json')
- conn.put(templates_dir + 'py3spark_local_template.json', '/tmp/py3spark_local_template.json')
- conn.sudo(
- '''bash -l -c "PYJ=`find /opt/spark/ -name '*py4j*.zip' | tr '\\n' ':' | sed 's|:$||g'`; sed -i 's|PY4J|'$PYJ'|g' /tmp/py3spark_local_template.json" ''')
- conn.sudo('sed -i "s|PYTHON_VENV_PATH|' + python_venv_path + '|g" /tmp/py3spark_local_template.json')
- conn.sudo('sed -i "s|PYTHON_VENV_VERSION|' + python_venv_version + '|g" /tmp/py3spark_local_template.json')
- conn.sudo('sed -i "s|PYTHON_VENV_SHORT_VERSION|' + python_venv_version[:3] + '|g" /tmp/py3spark_local_template.json')
- conn.sudo('sed -i "s|SP_VER|' + spark_version + '|g" /tmp/py3spark_local_template.json')
- conn.sudo('sed -i \'/PYTHONPATH\"\:/s|\(.*\)"|\\1/home/{0}/caffe/python:/home/{0}/pytorch/build:"|\' /tmp/py3spark_local_template.json'.format(os_user))
- conn.sudo('\cp /tmp/py3spark_local_template.json ' + py3spark_local_path_dir + 'kernel.json')
- conn.sudo('touch /home/' + os_user + '/.ensure_dir/py3spark_local_kernel_ensured')
- except:
- sys.exit(1)
-
-
-def pyspark_kernel(kernels_dir, dataengine_service_version, cluster_name, spark_version, bucket, user_name, region, os_user='',
- application='', pip_mirror='', numpy_version='1.14.3'):
- spark_path = '/opt/{0}/{1}/spark/'.format(dataengine_service_version, cluster_name)
- subprocess.run('mkdir -p {0}pyspark_{1}/'.format(kernels_dir, cluster_name), shell=True, check=True)
- kernel_path = '{0}pyspark_{1}/kernel.json'.format(kernels_dir, cluster_name)
- template_file = "/tmp/pyspark_dataengine-service_template.json"
- with open(template_file, 'r') as f:
- text = f.read()
- text = text.replace('CLUSTER_NAME', cluster_name)
- text = text.replace('SPARK_VERSION', 'Spark-' + spark_version)
- text = text.replace('SPARK_PATH', spark_path)
- text = text.replace('PYTHON_SHORT_VERSION', '3.8')
- text = text.replace('PYTHON_FULL_VERSION', '3.8')
- text = text.replace('PYTHON_PATH', '/usr/bin/python3.8')
- text = text.replace('DATAENGINE-SERVICE_VERSION', dataengine_service_version)
- with open(kernel_path, 'w') as f:
- f.write(text)
- subprocess.run('touch /tmp/kernel_var.json', shell=True, check=True)
- subprocess.run('''bash -l -c "PYJ=`find /opt/{0}/{1}/spark/ -name '*py4j*.zip' | tr '\\n' ':' | sed 's|:$||g'`; cat {2} | sed 's|PY4J|'$PYJ'|g' | sed \'/PYTHONPATH\"\:/s|\(.*\)\"|\\1/home/{3}/caffe/python:/home/{3}/pytorch/build:\"|\' > /tmp/kernel_var.json" '''.
- format(dataengine_service_version, cluster_name, kernel_path, os_user), shell=True, check=True)
- subprocess.run('sudo mv /tmp/kernel_var.json ' + kernel_path, shell=True, check=True)
- get_cluster_python_version(region, bucket, user_name, cluster_name)
- with open('/tmp/python_version') as f:
- python_version = f.read()
- if python_version != '\n':
- installing_python(region, bucket, user_name, cluster_name, application, pip_mirror, numpy_version)
- subprocess.run('mkdir -p {0}py3spark_{1}/'.format(kernels_dir, cluster_name), shell=True, check=True)
- kernel_path = '{0}py3spark_{1}/kernel.json'.format(kernels_dir, cluster_name)
- template_file = "/tmp/pyspark_dataengine-service_template.json"
- with open(template_file, 'r') as f:
- text = f.read()
- text = text.replace('CLUSTER_NAME', cluster_name)
- text = text.replace('SPARK_VERSION', 'Spark-' + spark_version)
- text = text.replace('SPARK_PATH', spark_path)
- text = text.replace('PYTHON_SHORT_VERSION', python_version[0:3])
- text = text.replace('PYTHON_FULL_VERSION', python_version[0:3])
- text = text.replace('PYTHON_PATH', '/opt/python/python' + python_version[:5] + '/bin/python' +
- python_version[:3])
- text = text.replace('DATAENGINE-SERVICE_VERSION', dataengine_service_version)
- with open(kernel_path, 'w') as f:
- f.write(text)
- subprocess.run('touch /tmp/kernel_var.json', shell=True, check=True)
- subprocess.run('''bash -l -c "PYJ=`find /opt/{0}/{1}/spark/ -name '*py4j*.zip' | tr '\\n' ':' | sed 's|:$||g'`; cat {2} | sed 's|PY4J|'$PYJ'|g' | sed \'/PYTHONPATH\"\:/s|\(.*\)\"|\\1/home/{3}/caffe/python:/home/{3}/pytorch/build:\"|\' > /tmp/kernel_var.json" '''
- .format(dataengine_service_version, cluster_name, kernel_path, os_user), shell=True, check=True)
- subprocess.run('sudo mv /tmp/kernel_var.json {}'.format(kernel_path), shell=True, check=True)
-
-
-def ensure_ciphers():
- try:
- conn.sudo('''bash -c "echo -e '\nKexAlgorithms curve25519-sha256@libssh.org,diffie-hellman-group-exchange-sha256' >> /etc/ssh/sshd_config"''')
- conn.sudo('''bash -c "echo -e 'Ciphers aes256-gcm@openssh.com,aes128-gcm@openssh.com,chacha20-poly1305@openssh.com,aes256-ctr,aes192-ctr,aes128-ctr' >> /etc/ssh/sshd_config"''')
- conn.sudo('''bash -c "echo -e '\tKexAlgorithms curve25519-sha256@libssh.org,diffie-hellman-group-exchange-sha256' >> /etc/ssh/ssh_config"''')
- conn.sudo('''bash -c "echo -e '\tCiphers aes256-gcm@openssh.com,aes128-gcm@openssh.com,chacha20-poly1305@openssh.com,aes256-ctr,aes192-ctr,aes128-ctr' >> /etc/ssh/ssh_config"''')
- try:
- conn.sudo('service ssh reload')
- except:
- conn.sudo('service sshd reload')
- except Exception as err:
- traceback.print_exc()
- print('Failed to ensure ciphers: ', str(err))
- sys.exit(1)
-
-def ensure_dataengine_service_devtools():
- try:
- if not exists(conn, '/home/{}/dataengine-service-devtools-ensured'.format(os.environ['conf_os_user'])):
- if os.environ['conf_cloud_provider'] in 'aws':
- manage_pkg('-y install', 'remote', 'libcurl libcurl-devel')
- elif (os.environ['conf_cloud_provider'] in 'gcp') and (
- '-w-' in conn.sudo('hostname').stdout.replace('\n', '')):
- # manage_pkg('-y build-dep', 'remote', 'libcurl4-gnutls-dev libxml2-dev')
- manage_pkg('-y install', 'remote', 'libxml2-dev libcurl4-openssl-dev pkg-config')
- conn.sudo('R -e "install.packages(\'devtools\', repos = \'cloud.r-project.org\')"')
- if (os.environ['conf_cloud_provider'] in 'gcp') and (
- "R_LIBS_SITE" not in conn.sudo('cat /opt/conda/miniconda3/lib/R/etc/Renviron').stdout):
- conn.sudo(
- '''bash -l -c 'echo "R_LIBS_SITE=${R_LIBS_SITE-'/usr/local/lib/R/site-library:/usr/lib/R/site-library:/usr/lib/R/library'}" >> /opt/conda/miniconda3/lib/R/etc/Renviron' ''')
- conn.sudo('touch /home/{}/dataengine-service-devtools-ensured'.format(os.environ['conf_os_user']))
- except Exception as err:
- print('Failed to ensure devtools for dataproc with err: {}'.format(err))
- sys.exit(1)
-
-def install_r_pkg(requisites):
- status = list()
- error_parser = "ERROR:|error:|Cannot|failed|Please run|requires|Error|Skipping|couldn't find"
- if os.environ['conf_resource'] == 'dataengine-service':
- ensure_dataengine_service_devtools()
- try:
- for r_pkg in requisites:
- name, vers = r_pkg
- version = vers
- if vers =='N/A':
- vers = ''
- else:
- vers = '"{}"'.format(vers)
- if name == 'sparklyr':
- conn.run('sudo -i R -e \'devtools::install_version("{0}", version = {1}, repos = "http://cran.us.r-project.org", '
- 'dependencies = NA)\' 2>&1 | tee /tmp/install_{0}.tmp; if ! grep -w -E "({2})" /tmp/install_{0}.tmp '
- '> /tmp/install_{0}.log; then echo "" > /tmp/install_{0}.log;fi'.format(name, vers, error_parser))
- else:
- conn.sudo('R -e \'devtools::install_version("{0}", version = {1}, repos = "https://cloud.r-project.org", '
- 'dependencies = NA)\' 2>&1 | tee /tmp/install_{0}.tmp; if ! grep -w -E "({2})" /tmp/install_{0}.tmp > '
- '/tmp/install_{0}.log; then echo "" > /tmp/install_{0}.log;fi'.format(name, vers, error_parser))
- dep = conn.sudo('grep "(NA.*->". /tmp/install_' + name + '.tmp | awk \'{print $1}\'').stdout.replace('\n', ' ')
- dep_ver = conn.sudo('grep "(NA.*->". /tmp/install_' + name + '.tmp | awk \'{print $4}\'').stdout.replace('\n', ' ').replace(')', '').split(' ')
- if dep == '':
- dep = []
- else:
- dep = dep.split(' ')
- for n, i in enumerate(dep):
- if i == name:
- dep[n] = ''
- else:
- dep[n] = '{} v.{}'.format(dep[n], dep_ver[n])
- dep = [i for i in dep if i]
- conn.sudo('hostname')
- err = conn.sudo('cat /tmp/install_{0}.log'.format(name)).stdout.replace('"', "'").replace('\n', '')
- conn.sudo('R -e \'installed.packages()[,c(3:4)]\' | if ! grep -w {0} > /tmp/install_{0}.list; then echo "" > /tmp/install_{0}.list;fi'.format(name))
- res = conn.sudo('cat /tmp/install_{0}.list'.format(name)).stdout.replace('\n', '')
- if err:
- status_msg = 'installation_error'
- if 'couldn\'t find package \'{}\''.format(name) in err:
- status_msg = 'invalid_name'
- elif res:
- ansi_escape = re.compile(r'\x1b[^m]*m')
- version = ansi_escape.sub('', res).split("\n")[0].split('"')[1]
- status_msg = 'installed'
- if 'Error in download_version_url(package, version, repos, type) :' in err or 'Error in parse_deps(paste(spec,' in err:
- conn.sudo('R -e \'install.packages("versions", repos="https://cloud.r-project.org", dep=TRUE)\'')
- versions = conn.sudo('R -e \'library(versions); available.versions("' + name + '")\' 2>&1 | grep -A 50 '
- '\'date available\' | awk \'{print $2}\'').stdout.strip().replace('\n', ' ')[5:].split(' ')
- if versions != ['']:
- status_msg = 'invalid_version'
- else:
- versions = []
- else:
- versions = []
- status.append({"group": "r_pkg", "name": name, "version": version, "status": status_msg, "error_message": err, "available_versions": versions, "add_pkgs": dep})
- conn.sudo('rm /tmp/*{}*'.format(name))
- return status
- except Exception as err:
- for r_pkg in requisites:
- name, vers = r_pkg
- status.append(
- {"group": "r_pkg", "name": name, "version": vers, "status": 'installation_error', "error_message": err})
- print("Failed to install R packages")
- return status
-
-
-def update_spark_jars(jars_dir='/opt/jars'):
- try:
- configs = conn.sudo('find /opt/ /etc/ /usr/lib/ -name spark-defaults.conf -type f').stdout.split('\n')
- if exists(conn, jars_dir):
- for conf in filter(None, configs):
- des_path = ''
- all_jars = conn.sudo('find {0} -name "*.jar"'.format(jars_dir)).stdout.split('\n')
- if ('-des-' in conf):
- des_path = '/'.join(conf.split('/')[:3])
- all_jars = find_des_jars(all_jars, des_path)
- conn.sudo('''sed -i '/^# Generated\|^spark.jars/d' {0}'''.format(conf))
- conn.sudo(''' bash -l -c 'echo "# Generated spark.jars by DataLab from {0}\nspark.jars {1}" >> {2}' '''
- .format(','.join(filter(None, [jars_dir, des_path])), ','.join(all_jars), conf))
- # conn.sudo("sed -i 's/^[[:space:]]*//' {0}".format(conf))
- else:
- print("Can't find directory {0} with jar files".format(jars_dir))
- except Exception as err:
- append_result("Failed to update spark.jars parameter", str(err))
- print("Failed to update spark.jars parameter")
- sys.exit(1)
-
-
-def install_java_pkg(requisites):
- status = list()
- error_parser = "ERROR|error|No such|no such|Please run|requires|module not found|Exception"
- templates_dir = '/root/templates/'
- ivy_dir = '/opt/ivy'
- ivy_cache_dir = '{0}/cache/'.format(ivy_dir)
- ivy_settings = 'ivysettings.xml'
- dest_dir = '/opt/jars/java'
- try:
- ivy_jar = conn.sudo('find /opt /usr -name "*ivy-{0}.jar" | head -n 1'.format(os.environ['notebook_ivy_version'])).stdout.replace('\n','')
- conn.sudo('mkdir -p {0} {1}'.format(ivy_dir, dest_dir))
- conn.put('{0}{1}'.format(templates_dir, ivy_settings), '/tmp/{}'.format(ivy_settings))
- conn.sudo('cp -f /tmp/{1} {0}/{1}'.format(ivy_dir, ivy_settings))
- proxy_string = conn.sudo('cat /etc/profile | grep http_proxy | cut -f2 -d"="').stdout.replace('\n','')
- proxy_re = '(?P<proto>http.*)://(?P<host>[^:/ ]+):(?P<port>[0-9]*)'
- proxy_find = re.search(proxy_re, proxy_string)
- java_proxy = "export _JAVA_OPTIONS='-Dhttp.proxyHost={0} -Dhttp.proxyPort={1} \
- -Dhttps.proxyHost={0} -Dhttps.proxyPort={1}'".format(proxy_find.group('host'), proxy_find.group('port'))
- for java_pkg in requisites:
- conn.sudo('rm -rf {0}'.format(ivy_cache_dir))
- conn.sudo('mkdir -p {0}'.format(ivy_cache_dir))
- group, artifact, version, override = java_pkg
- print("Installing package (override: {3}): {0}:{1}:{2}".format(group, artifact, version, override))
- conn.sudo('''bash -c "{8}; java -jar {0} -settings {1}/{2} -cache {3} -dependency {4} {5} {6} 2>&1 | tee /tmp/install_{5}.tmp; if ! grep -w -E \\"({7})\\" /tmp/install_{5}.tmp > /tmp/install_{5}.log; then echo \\"\\" > /tmp/install_{5}.log;fi" '''.format(ivy_jar, ivy_dir, ivy_settings, ivy_cache_dir, group, artifact, version, error_parser, java_proxy))
- err = conn.sudo('cat /tmp/install_{0}.log'.format(artifact)).stdout.replace('"', "'").strip()
- conn.sudo('find {0} -name "{1}*.jar" | head -n 1 | rev | cut -f1 -d "/" | rev | \
- if ! grep -w -i {1} > /tmp/install_{1}.list; then echo "" > /tmp/install_{1}.list;fi'.format(ivy_cache_dir, artifact))
- res = conn.sudo('cat /tmp/install_{0}.list'.format(artifact)).stdout.replace('\n','')
- if res:
- conn.sudo('cp -f $(find {0} -name "*.jar" | xargs) {1}'.format(ivy_cache_dir, dest_dir))
- status.append({"group": "java", "name": "{0}:{1}".format(group, artifact), "version": version, "status": "installed"})
- else:
- status.append({"group": "java", "name": "{0}:{1}".format(group, artifact), "status": "installation_error", "error_message": err})
- update_spark_jars()
- conn.sudo('rm -rf /tmp/*{}*'.format(artifact))
- return status
- except Exception as err:
- for java_pkg in requisites:
- group, artifact, version, override = java_pkg
- status.append({"group": "java", "name": "{0}:{1}".format(group, artifact), "status": "installation_error",
- "error_message": err})
- print("Failed to install {} packages".format(requisites))
- return status
-
def get_available_r_pkgs():
try:
r_pkgs = dict()
- conn.sudo('R -e \'write.table(available.packages(contriburl="https://cloud.r-project.org/src/contrib"), file="/tmp/r.csv", row.names=F, col.names=F, sep=",")\'')
+ conn.sudo(
+ 'R -e \'write.table(available.packages(contriburl="https://cloud.r-project.org/src/contrib"), file="/tmp/r.csv", row.names=F, col.names=F, sep=",")\'')
conn.get("/tmp/r.csv", "r.csv")
with open('r.csv', 'r') as csvfile:
reader = csv.reader(csvfile, delimiter=',')
@@ -690,34 +534,18 @@ def get_available_r_pkgs():
r_pkgs[row[0]] = row[1]
return r_pkgs
except Exception as err:
- print("Failed to install {} ".format(err))
+ logging.error('Function get_available_r_pkgs error:', str(err))
+ traceback.print_exc()
sys.exit(1)
-def ensure_toree_local_kernel(os_user, toree_link, scala_kernel_path, files_dir, scala_version, spark_version):
- if not exists(conn,'/home/' + os_user + '/.ensure_dir/toree_local_kernel_ensured'):
- try:
- conn.sudo('pip install ' + toree_link + ' --no-cache-dir')
- conn.sudo('ln -s /opt/spark/ /usr/local/spark')
- conn.sudo('jupyter toree install')
- conn.sudo('mv ' + scala_kernel_path + 'lib/* /tmp/')
- conn.put(files_dir + 'toree-assembly-0.5.0.jar', '/tmp/toree-assembly-0.5.0.jar')
- conn.sudo('mv /tmp/toree-assembly-0.5.0.jar ' + scala_kernel_path + 'lib/')
- conn.sudo(
- 'sed -i "s|Apache Toree - Scala|Local Apache Toree - Scala (Scala-' + scala_version +
- ', Spark-' + spark_version + ')|g" ' + scala_kernel_path + 'kernel.json')
- conn.sudo('touch /home/' + os_user + '/.ensure_dir/toree_local_kernel_ensured')
- except:
- sys.exit(1)
-
-
def install_ungit(os_user, notebook_name, edge_ip):
- if not exists(conn,'/home/{}/.ensure_dir/ungit_ensured'.format(os_user)):
+ if not exists(conn, '/home/{}/.ensure_dir/ungit_ensured'.format(os_user)):
try:
manage_npm_pkg('npm -g install ungit@{}'.format(os.environ['notebook_ungit_version']))
conn.put('/root/templates/ungit.service', '/tmp/ungit.service')
conn.sudo("sed -i 's|OS_USR|{}|' /tmp/ungit.service".format(os_user))
- http_proxy = conn.run('''bash -l -c 'echo $http_proxy' ''').stdout.replace('\n','')
+ http_proxy = conn.run('''bash -l -c 'echo $http_proxy' ''').stdout.replace('\n', '')
conn.sudo("sed -i 's|PROXY_HOST|{}|g' /tmp/ungit.service".format(http_proxy))
conn.sudo("sed -i 's|NOTEBOOK_NAME|{}|' /tmp/ungit.service".format(
notebook_name))
@@ -734,37 +562,51 @@ def install_ungit(os_user, notebook_name, edge_ip):
conn.run('echo "spark-warehouse/" >> ~/.gitignore')
conn.run('echo "metastore_db/" >> ~/.gitignore')
conn.run('echo "derby.log" >> ~/.gitignore')
- conn.sudo('''bash -l -c 'echo -e "Host git.epam.com\n HostName git.epam.com\n ProxyCommand nc -X connect -x {}:3128 %h %p\n" > /home/{}/.ssh/config' '''.format(
+ conn.sudo(
+ '''bash -l -c 'echo -e "Host git.epam.com\n HostName git.epam.com\n ProxyCommand nc -X connect -x {}:3128 %h %p\n" > /home/{}/.ssh/config' '''.format(
edge_ip, os_user))
- conn.sudo('''bash -l -c 'echo -e "Host github.com\n HostName github.com\n ProxyCommand nc -X connect -x {}:3128 %h %p" >> /home/{}/.ssh/config' '''.format(
+ conn.sudo(
+ '''bash -l -c 'echo -e "Host github.com\n HostName github.com\n ProxyCommand nc -X connect -x {}:3128 %h %p" >> /home/{}/.ssh/config' '''.format(
edge_ip, os_user))
- conn.sudo('''bash -l -c 'echo -e "Host gitlab.com\n HostName gitlab.com\n ProxyCommand nc -X connect -x {}:3128 %h %p" >> /home/{}/.ssh/config' '''.format(
+ conn.sudo(
+ '''bash -l -c 'echo -e "Host gitlab.com\n HostName gitlab.com\n ProxyCommand nc -X connect -x {}:3128 %h %p" >> /home/{}/.ssh/config' '''.format(
edge_ip, os_user))
conn.sudo('systemctl daemon-reload')
conn.sudo('systemctl enable ungit.service')
conn.sudo('systemctl start ungit.service')
conn.sudo('touch /home/{}/.ensure_dir/ungit_ensured'.format(os_user))
- except:
+ except Exception as err:
+ logging.error('Function install_ungit error:', str(err))
+ traceback.print_exc()
sys.exit(1)
else:
try:
conn.sudo("sed -i 's|--rootPath=/.*-ungit|--rootPath=/{}-ungit|' /etc/systemd/system/ungit.service".format(
notebook_name))
- http_proxy = conn.run('''bash -l -c 'echo $http_proxy' ''').stdout.replace('\n','')
- conn.sudo("sed -i 's|HTTPS_PROXY=.*3128|HTTPS_PROXY={}|g' /etc/systemd/system/ungit.service".format(http_proxy))
- conn.sudo("sed -i 's|HTTP_PROXY=.*3128|HTTP_PROXY={}|g' /etc/systemd/system/ungit.service".format(http_proxy))
+ http_proxy = conn.run('''bash -l -c 'echo $http_proxy' ''').stdout.replace('\n', '')
+ conn.sudo(
+ "sed -i 's|HTTPS_PROXY=.*3128|HTTPS_PROXY={}|g' /etc/systemd/system/ungit.service".format(http_proxy))
+ conn.sudo(
+ "sed -i 's|HTTP_PROXY=.*3128|HTTP_PROXY={}|g' /etc/systemd/system/ungit.service".format(http_proxy))
conn.sudo('systemctl daemon-reload')
conn.sudo('systemctl restart ungit.service')
- except:
+ except Exception as err:
+ logging.error('Function install_ungit error:', str(err))
+ traceback.print_exc()
sys.exit(1)
- conn.run('''bash -l -c 'git config --global http.proxy $http_proxy' ''')
- conn.run('''bash -l -c 'git config --global https.proxy $https_proxy' ''')
+ try:
+ conn.run('''bash -l -c 'git config --global http.proxy $http_proxy' ''')
+ conn.run('''bash -l -c 'git config --global https.proxy $https_proxy' ''')
+ except Exception as err:
+ logging.error('Function install_ungit error:', str(err))
+ traceback.print_exc()
+ sys.exit(1)
def install_inactivity_checker(os_user, ip_address, rstudio=False):
- if not exists(conn,'/home/{}/.ensure_dir/inactivity_ensured'.format(os_user)):
+ if not exists(conn, '/home/{}/.ensure_dir/inactivity_ensured'.format(os_user)):
try:
- if not exists(conn,'/opt/inactivity'):
+ if not exists(conn, '/opt/inactivity'):
conn.sudo('mkdir /opt/inactivity')
conn.put('/root/templates/inactive.service', '/tmp/inactive.service')
conn.sudo('cp /tmp/inactive.service /etc/systemd/system/inactive.service')
@@ -786,35 +628,323 @@ def install_inactivity_checker(os_user, ip_address, rstudio=False):
conn.sudo('systemctl start inactive.timer')
conn.sudo('touch /home/{}/.ensure_dir/inactive_ensured'.format(os_user))
except Exception as err:
- print('Failed to setup inactivity check service!', str(err))
+ logging.error('Function install_inactivity_checker error:', str(err))
+ traceback.print_exc()
+ sys.exit(1)
+
+
+def get_spark_memory(creds=False, os_user='', hostname='', keyfile=''):
+ try:
+ if creds:
+ con = init_datalab_connection(hostname, os_user, keyfile)
+ mem = con.sudo('free -m | grep Mem | tr -s " " ":" | cut -f 2 -d ":"').stdout.replace('\n', '')
+ instance_memory = int(mem)
+ else:
+ mem = conn.sudo('free -m | grep Mem | tr -s " " ":" | cut -f 2 -d ":"').stdout.replace('\n', '')
+ instance_memory = int(mem)
+ spark_memory = round(instance_memory * 90 / 100)
+ return spark_memory
+ except Exception as err:
+ logging.error('Function install_inactivity_checker error:', str(err))
+ traceback.print_exc()
+ sys.exit(1)
+
+
+# functions for dataengine/dataengine-service resources
+def ensure_dataengine_service_devtools():
+ try:
+ if not exists(conn, '/home/{}/dataengine-service-devtools-ensured'.format(os.environ['conf_os_user'])):
+ if os.environ['conf_cloud_provider'] in 'aws':
+ manage_pkg('-y install', 'remote', 'libcurl libcurl-devel')
+ elif (os.environ['conf_cloud_provider'] in 'gcp') and (
+ '-w-' in conn.sudo('hostname').stdout.replace('\n', '')):
+ # manage_pkg('-y build-dep', 'remote', 'libcurl4-gnutls-dev libxml2-dev')
+ manage_pkg('-y install', 'remote', 'libxml2-dev libcurl4-openssl-dev pkg-config')
+ conn.sudo('R -e "install.packages(\'devtools\', repos = \'cloud.r-project.org\')"')
+ if (os.environ['conf_cloud_provider'] in 'gcp') and (
+ "R_LIBS_SITE" not in conn.sudo('cat /opt/conda/miniconda3/lib/R/etc/Renviron').stdout):
+ conn.sudo(
+ '''bash -l -c 'echo "R_LIBS_SITE=${R_LIBS_SITE-'/usr/local/lib/R/site-library:/usr/lib/R/site-library:/usr/lib/R/library'}" >> /opt/conda/miniconda3/lib/R/etc/Renviron' ''')
+ conn.sudo('touch /home/{}/dataengine-service-devtools-ensured'.format(os.environ['conf_os_user']))
+ except Exception as err:
+ logging.error('Function ensure_dataengine_service_devtools error:', str(err))
+ traceback.print_exc()
+ sys.exit(1)
+
+
+def configure_data_engine_service_livy(hostname, os_user, keyfile):
+ try:
+ init_datalab_connection(hostname, os_user, keyfile)
+ if exists(conn, '/usr/local/lib/livy'):
+ conn.sudo('rm -r /usr/local/lib/livy')
+ conn.sudo('wget -P /tmp/ --user={} --password={} '
+ '{}/repository/packages/livy.tar.gz --no-check-certificate'
+ .format(os.environ['conf_repository_user'],
+ os.environ['conf_repository_pass'], os.environ['conf_repository_address']))
+ conn.sudo('tar -xzvf /tmp/livy.tar.gz -C /usr/local/lib/')
+ conn.sudo('ln -s /usr/local/lib/incubator-livy /usr/local/lib/livy')
+ conn.put('/root/templates/dataengine-service_livy-env.sh', '/usr/local/lib/livy/conf/livy-env.sh')
+ conn.put('/root/templates/dataengine-service_livy.service', '/tmp/livy.service')
+ conn.sudo("sed -i 's|OS_USER|{}|' /tmp/livy.service".format(os_user))
+ conn.sudo('mv /tmp/livy.service /etc/systemd/system/livy.service')
+ conn.sudo('systemctl daemon-reload')
+ conn.sudo('systemctl enable livy.service')
+ conn.sudo('systemctl start livy.service')
+ conn.close()
+ except Exception as err:
+ logging.error('Function configure_data_engine_service_livy error:', str(err))
+ traceback.print_exc()
+ sys.exit(1)
+
+
+def remove_rstudio_dataengines_kernel(cluster_name, os_user):
+ try:
+ cluster_re = ['-{}"'.format(cluster_name),
+ '-{}-'.format(cluster_name),
+ '-{}/'.format(cluster_name)]
+ conn.get('/home/{}/.Rprofile'.format(os_user), 'Rprofile')
+ data = open('Rprofile').read()
+ conf = filter(None, data.split('\n'))
+ # Filter config from any math of cluster_name in line,
+ # separated by defined symbols to avoid partly matches
+ conf = [i for i in conf if not any(x in i for x in cluster_re)]
+ comment_all = lambda x: x if x.startswith('#master') else '#{}'.format(x)
+ uncomment = lambda x: x[1:] if not x.startswith('#master') else x
+ conf = [comment_all(i) for i in conf]
+ conf = [uncomment(i) for i in conf]
+ last_spark = max([conf.index(i) for i in conf if 'master=' in i] or [0])
+ active_cluster = conf[last_spark].split('"')[-2] if last_spark != 0 else None
+ conf = conf[:last_spark] + [conf[l][1:] for l in range(last_spark, len(conf)) if conf[l].startswith("#")] \
+ + [conf[l] for l in range(last_spark, len(conf)) if not conf[l].startswith('#')]
+ with open('.Rprofile', 'w') as f:
+ for line in conf:
+ f.write('{}\n'.format(line))
+ conn.put('.Rprofile', '/home/{}/.Rprofile'.format(os_user))
+ conn.get('/home/{}/.Renviron'.format(os_user), 'Renviron')
+ data = open('Renviron').read()
+ conf = filter(None, data.split('\n'))
+ comment_all = lambda x: x if x.startswith('#') else '#{}'.format(x)
+ conf = [comment_all(i) for i in conf]
+ # Filter config from any math of cluster_name in line,
+ # separated by defined symbols to avoid partly matches
+ conf = [i for i in conf if not any(x in i for x in cluster_re)]
+ if active_cluster:
+ activate_cluster = lambda x: x[1:] if active_cluster in x else x
+ conf = [activate_cluster(i) for i in conf]
+ else:
+ last_spark = max([conf.index(i) for i in conf if 'SPARK_HOME' in i])
+ conf = conf[:last_spark] + [conf[l][1:] for l in range(last_spark, len(conf)) if conf[l].startswith("#")]
+ with open('.Renviron', 'w') as f:
+ for line in conf:
+ f.write('{}\n'.format(line))
+ conn.put('.Renviron', '/home/{}/.Renviron'.format(os_user))
+ if len(conf) == 1:
+ conn.sudo('rm -f /home/{}/.ensure_dir/rstudio_dataengine_ensured'.format(os_user))
+ conn.sudo('rm -f /home/{}/.ensure_dir/rstudio_dataengine-service_ensured'.format(os_user))
+ conn.sudo('''R -e "source('/home/{}/.Rprofile')"'''.format(os_user))
+ except Exception as err:
+ logging.error('Function remove_rstudio_dataengines_kernel error:', str(err))
+ traceback.print_exc()
+ sys.exit(1)
+
+#following function should be checked if it needed
+def configure_data_engine_service_pip(hostname, os_user, keyfile, emr=False):
+ try:
+ init_datalab_connection(hostname, os_user, keyfile)
+ # datalab.common_lib.manage_pkg('-y install', 'remote', 'python3-pip')
+ if not exists(conn, '/usr/bin/pip3') and conn.sudo("python3.9 -V 2>/dev/null | awk '{print $2}'").stdout:
+ conn.sudo('ln -s /usr/bin/pip-3.9 /usr/bin/pip3')
+ elif not exists(conn, '/usr/bin/pip3') and conn.sudo("python3.8 -V 2>/dev/null | awk '{print $2}'").stdout:
+ conn.sudo('ln -s /usr/bin/pip-3.8 /usr/bin/pip3')
+ elif not exists(conn, '/usr/bin/pip3') and conn.sudo("python3.7 -V 2>/dev/null | awk '{print $2}'").stdout:
+ conn.sudo('ln -s /usr/bin/pip-3.7 /usr/bin/pip3')
+ elif not exists(conn, '/usr/bin/pip3') and conn.sudo("python3.6 -V 2>/dev/null | awk '{print $2}'").stdout:
+ conn.sudo('ln -s /usr/bin/pip-3.6 /usr/bin/pip3')
+ elif not exists(conn, '/usr/bin/pip3') and conn.sudo("python3.5 -V 2>/dev/null | awk '{print $2}'").stdout:
+ conn.sudo('ln -s /usr/bin/pip-3.5 /usr/bin/pip3')
+ if emr:
+ conn.sudo('pip3 install -U pip=={}'.format(os.environ['conf_pip_version']))
+ conn.sudo('ln -s /usr/local/bin/pip3.7 /bin/pip3.7')
+ conn.sudo('''bash -c -l 'echo "export PATH=$PATH:/usr/local/bin" >> /etc/profile' ''')
+ conn.sudo('bash -c -l "source /etc/profile"')
+ conn.run('bash -c -l "source /etc/profile"')
+ conn.close()
+ except Exception as err:
+ logging.error('Function configure_data_engine_service_pip error:', str(err))
+ traceback.print_exc()
+ sys.exit(1)
+
+
+## following function should be removed after configurion kernels connection via live for all computation resources:
+def dataengine_dir_prepare(cluster_dir):
+ try:
+ subprocess.run('mkdir -p ' + cluster_dir, shell=True, check=True)
+ except Exception as err:
+ logging.error('Function dataengine_dir_prepare error:', str(err))
+ traceback.print_exc()
+ sys.exit(1)
+
+
+def ensure_dataengine_tensorflow_jars(jars_dir):
+ subprocess.run('wget https://dl.bintray.com/spark-packages/maven/tapanalyticstoolkit/spark-tensorflow-connector/'
+ '1.0.0-s_2.11/spark-tensorflow-connector-1.0.0-s_2.11.jar'
+ ' -O {}spark-tensorflow-connector-1.0.0-s_2.11.jar'.format(jars_dir), shell=True, check=True)
+
+
+def prepare(dataengine_service_dir, yarn_dir):
+ try:
+ subprocess.run('mkdir -p ' + dataengine_service_dir, shell=True, check=True)
+ subprocess.run('mkdir -p ' + yarn_dir, shell=True, check=True)
+ subprocess.run('sudo mkdir -p /opt/python/', shell=True, check=True)
+ result = os.path.exists(dataengine_service_dir + 'usr/')
+ return result
+ except Exception as err:
+ logging.error('Function prepare error:', str(err))
+ traceback.print_exc()
+ sys.exit(1)
+
+
+def configuring_notebook(dataengine_service_version):
+ try:
+ jars_path = '/opt/' + dataengine_service_version + '/jars/'
+ subprocess.run("""sudo bash -c "find """ + jars_path + """ -name '*netty*' | xargs rm -f" """, shell=True,
+ check=True)
+ except Exception as err:
+ logging.error('Function configuring_notebook error:', str(err))
+ traceback.print_exc()
+ sys.exit(1)
+
+
+def find_cluster_kernels():
+ try:
+ de = [i for i in conn.sudo(
+ '''bash -l -c 'find /opt/ -maxdepth 1 -name "*-de-*" -type d | rev | cut -f 1 -d "/" | rev | xargs -r' ''').stdout.replace(
+ '\n', '').split(' ') if i != '']
+ des = [i for i in conn.sudo(
+ '''bash -l -c 'find /opt/ -maxdepth 2 -name "*-des-*" -type d | rev | cut -f 1,2 -d "/" | rev | xargs -r' ''').stdout.replace(
+ '\n', '').split(' ') if i != '']
+ return (de, des)
+ except Exception as err:
+ logging.error('Function find_cluster_kernels error:', str(err))
+ traceback.print_exc()
+ sys.exit(1)
+
+
+# functions for jupyter deeplearning and tensor notebooks
+def configure_jupyter(os_user, jupyter_conf_file, templates_dir, jupyter_version, exploratory_name):
+ if not exists(conn, '/home/' + os_user + '/.ensure_dir/jupyter_ensured'):
+ try:
+ if os.environ['conf_deeplearning_cloud_ami'] == 'false' or os.environ['application'] != 'deeplearning':
+ conn.sudo('pip3 install notebook=={} --no-cache-dir'.format(jupyter_version))
+ conn.sudo('pip3 install jupyter --no-cache-dir')
+ conn.sudo('rm -rf {}'.format(jupyter_conf_file))
+ conn.run('jupyter notebook --generate-config --config {}'.format(jupyter_conf_file))
+ conn.run('mkdir -p ~/.jupyter/custom/')
+ conn.run('echo "#notebook-container { width: auto; }" > ~/.jupyter/custom/custom.css')
+ conn.sudo('echo "c.NotebookApp.ip = \'0.0.0.0\'" >> {}'.format(jupyter_conf_file))
+ conn.sudo('echo "c.NotebookApp.base_url = \'/{0}/\'" >> {1}'.format(exploratory_name, jupyter_conf_file))
+ conn.sudo('echo c.NotebookApp.open_browser = False >> {}'.format(jupyter_conf_file))
+ conn.sudo('echo \'c.NotebookApp.cookie_secret = b"{0}"\' >> {1}'.format(id_generator(), jupyter_conf_file))
+ conn.sudo('''echo "c.NotebookApp.token = u''" >> {}'''.format(jupyter_conf_file))
+ conn.sudo('echo \'c.KernelSpecManager.ensure_native_kernel = False\' >> {}'.format(jupyter_conf_file))
+ if os.environ['conf_deeplearning_cloud_ami'] == 'true' and os.environ['application'] == 'deeplearning':
+ conn.sudo(
+ '''echo "c.NotebookApp.kernel_spec_manager_class = 'environment_kernels.EnvironmentKernelSpecManager'" >> {}'''.format(
+ jupyter_conf_file))
+ conn.sudo(
+ '''echo "c.EnvironmentKernelSpecManager.conda_env_dirs=['/home/ubuntu/anaconda3/envs']" >> {}'''.format(
+ jupyter_conf_file))
+ conn.put(templates_dir + 'jupyter-notebook.service', '/tmp/jupyter-notebook.service')
+ conn.sudo("chmod 644 /tmp/jupyter-notebook.service")
+ if os.environ['application'] == 'tensor':
+ conn.sudo(
+ "sed -i '/ExecStart/s|-c \"|-c \"export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/opt/cudnn/lib64:/usr/local/cuda/lib64; |g' /tmp/jupyter-notebook.service")
+ elif os.environ['application'] == 'deeplearning' and os.environ['conf_deeplearning_cloud_ami'] == 'false':
+ conn.sudo(
+ "sed -i '/ExecStart/s|-c \"|-c \"export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/opt/cudnn/lib64:/usr/local/cuda/lib64:/usr/lib64/openmpi/lib: ; export PYTHONPATH=/home/" + os_user +
+ "/caffe/python:/home/" + os_user + "/pytorch/build:$PYTHONPATH ; |g' /tmp/jupyter-notebook.service")
+ conn.sudo("sed -i 's|CONF_PATH|{}|' /tmp/jupyter-notebook.service".format(jupyter_conf_file))
+ conn.sudo("sed -i 's|OS_USR|{}|' /tmp/jupyter-notebook.service".format(os_user))
+ java_home = conn.run(
+ "update-alternatives --query java | grep -o --color=never \'/.*/java-8.*/jre\'").stdout.splitlines()[0]
+ conn.sudo('sed -i \'/\[Service\]/ a\Environment=\"JAVA_HOME={}\"\' /tmp/jupyter-notebook.service'.format(
+ java_home))
+ conn.sudo('\cp /tmp/jupyter-notebook.service /etc/systemd/system/jupyter-notebook.service')
+ conn.sudo('chown -R {0}:{0} /home/{0}/.local'.format(os_user))
+ conn.sudo('mkdir -p /mnt/var')
+ conn.sudo('chown {0}:{0} /mnt/var'.format(os_user))
+ if os.environ['application'] == 'jupyter' or os.environ['application'] == 'deeplearning':
+ try:
+ conn.sudo('jupyter-kernelspec remove -f python3 || echo "Such kernel doesnt exists"')
+ conn.sudo('jupyter-kernelspec remove -f python2 || echo "Such kernel doesnt exists"')
+ except Exception as err:
+ logging.error('Error:', str(err))
+ conn.sudo("systemctl daemon-reload")
+ conn.sudo("systemctl enable jupyter-notebook")
+ conn.sudo("systemctl start jupyter-notebook")
+ conn.sudo('touch /home/{}/.ensure_dir/jupyter_ensured'.format(os_user))
+ except Exception as err:
+ logging.error('Function configure_jupyter error:', str(err))
+ traceback.print_exc()
+ sys.exit(1)
+ else:
+ try:
+ conn.sudo(
+ 'sed -i "s/c.NotebookApp.base_url =.*/c.NotebookApp.base_url = \'\/{0}\/\'/" {1}'.format(
+ exploratory_name, jupyter_conf_file))
+ conn.sudo("systemctl restart jupyter-notebook")
+ except Exception as err:
+ logging.error('Function configure_jupyter error:', str(err))
+ traceback.print_exc()
sys.exit(1)
-def set_git_proxy(os_user, hostname, keyfile, proxy_host):
- init_datalab_connection(hostname, os_user, keyfile)
- conn.run('git config --global http.proxy {}'.format(proxy_host))
- conn.run('git config --global https.proxy {}'.format(proxy_host))
- conn.close()
-
-
-def set_mongo_parameters(client, mongo_parameters):
- for i in mongo_parameters:
- client.datalabdb.settings.insert_one({"_id": i, "value": mongo_parameters[i]})
+def ensure_py3spark_local_kernel(os_user, py3spark_local_path_dir, templates_dir, spark_version, python_venv_path,
+ python_venv_version):
+ if not exists(conn, '/home/' + os_user + '/.ensure_dir/py3spark_local_kernel_ensured'):
+ try:
+ conn.sudo('mkdir -p ' + py3spark_local_path_dir)
+ conn.sudo('touch ' + py3spark_local_path_dir + 'kernel.json')
+ conn.put(templates_dir + 'py3spark_local_template.json', '/tmp/py3spark_local_template.json')
+ conn.sudo(
+ '''bash -l -c "PYJ=`find /opt/spark/ -name '*py4j*.zip' | tr '\\n' ':' | sed 's|:$||g'`; sed -i 's|PY4J|'$PYJ'|g' /tmp/py3spark_local_template.json" ''')
+ conn.sudo('sed -i "s|PYTHON_VENV_PATH|' + python_venv_path + '|g" /tmp/py3spark_local_template.json')
+ conn.sudo('sed -i "s|PYTHON_VENV_VERSION|' + python_venv_version + '|g" /tmp/py3spark_local_template.json')
+ conn.sudo('sed -i "s|PYTHON_VENV_SHORT_VERSION|' + python_venv_version[
+ :3] + '|g" /tmp/py3spark_local_template.json')
+ conn.sudo('sed -i "s|SP_VER|' + spark_version + '|g" /tmp/py3spark_local_template.json')
+ conn.sudo(
+ 'sed -i \'/PYTHONPATH\"\:/s|\(.*\)"|\\1/home/{0}/caffe/python:/home/{0}/pytorch/build:"|\' /tmp/py3spark_local_template.json'.format(
+ os_user))
+ conn.sudo('\cp /tmp/py3spark_local_template.json ' + py3spark_local_path_dir + 'kernel.json')
+ conn.sudo('touch /home/' + os_user + '/.ensure_dir/py3spark_local_kernel_ensured')
+ except Exception as err:
+ logging.error('Function ensure_py3spark_local_kernel error:', str(err))
+ traceback.print_exc()
+ sys.exit(1)
-def install_r_packages(os_user):
- if not exists(conn,'/home/' + os_user + '/.ensure_dir/r_packages_ensured'):
- conn.sudo('R -e "install.packages(\'devtools\', repos = \'https://cloud.r-project.org\')"')
- conn.sudo('R -e "install.packages(\'knitr\', repos = \'https://cloud.r-project.org\')"')
- conn.sudo('R -e "install.packages(\'ggplot2\', repos = \'https://cloud.r-project.org\')"')
- conn.sudo('R -e "install.packages(c(\'devtools\',\'mplot\', \'googleVis\'), '
- 'repos = \'https://cloud.r-project.org\'); require(devtools); install_github(\'ramnathv/rCharts\')"')
- conn.sudo('R -e \'install.packages("versions", repos="https://cloud.r-project.org", dep=TRUE)\'')
- conn.sudo('touch /home/' + os_user + '/.ensure_dir/r_packages_ensured')
+def ensure_toree_local_kernel(os_user, toree_link, scala_kernel_path, files_dir, scala_version, spark_version):
+ if not exists(conn, '/home/' + os_user + '/.ensure_dir/toree_local_kernel_ensured'):
+ try:
+ conn.sudo('pip install ' + toree_link + ' --no-cache-dir')
+ conn.sudo('ln -s /opt/spark/ /usr/local/spark')
+ conn.sudo('jupyter toree install')
+ conn.sudo('mv ' + scala_kernel_path + 'lib/* /tmp/')
+ conn.put(files_dir + 'toree-assembly-0.5.0.jar', '/tmp/toree-assembly-0.5.0.jar')
+ conn.sudo('mv /tmp/toree-assembly-0.5.0.jar ' + scala_kernel_path + 'lib/')
+ conn.sudo(
+ 'sed -i "s|Apache Toree - Scala|Local Apache Toree - Scala (Scala-' + scala_version +
+ ', Spark-' + spark_version + ')|g" ' + scala_kernel_path + 'kernel.json')
+ conn.sudo('touch /home/' + os_user + '/.ensure_dir/toree_local_kernel_ensured')
+ except Exception as err:
+ logging.error('Function ensure_toree_local_kernel error:', str(err))
+ traceback.print_exc()
+ sys.exit(1)
def add_breeze_library_local(os_user):
- if not exists(conn,'/home/' + os_user + '/.ensure_dir/breeze_local_ensured'):
+ if not exists(conn, '/home/' + os_user + '/.ensure_dir/breeze_local_ensured'):
try:
breeze_tmp_dir = '/tmp/breeze_tmp_local/'
jars_dir = '/opt/jars/'
@@ -837,156 +967,88 @@ def add_breeze_library_local(os_user):
{1}spark-kernel-brunel-all-{0}.jar'.format('2.3', breeze_tmp_dir))
conn.sudo('mv {0}* {1}'.format(breeze_tmp_dir, jars_dir))
conn.sudo('touch /home/' + os_user + '/.ensure_dir/breeze_local_ensured')
- except:
+ except Exception as err:
+ logging.error('Function add_breeze_library_local error:', str(err))
+ traceback.print_exc()
sys.exit(1)
-def configure_data_engine_service_pip(hostname, os_user, keyfile, emr=False):
- init_datalab_connection(hostname, os_user, keyfile)
- #datalab.common_lib.manage_pkg('-y install', 'remote', 'python3-pip')
- if not exists(conn,'/usr/bin/pip3') and conn.sudo("python3.9 -V 2>/dev/null | awk '{print $2}'").stdout:
- conn.sudo('ln -s /usr/bin/pip-3.9 /usr/bin/pip3')
- elif not exists(conn,'/usr/bin/pip3') and conn.sudo("python3.8 -V 2>/dev/null | awk '{print $2}'").stdout:
- conn.sudo('ln -s /usr/bin/pip-3.8 /usr/bin/pip3')
- elif not exists(conn,'/usr/bin/pip3') and conn.sudo("python3.7 -V 2>/dev/null | awk '{print $2}'").stdout:
- conn.sudo('ln -s /usr/bin/pip-3.7 /usr/bin/pip3')
- elif not exists(conn,'/usr/bin/pip3') and conn.sudo("python3.6 -V 2>/dev/null | awk '{print $2}'").stdout:
- conn.sudo('ln -s /usr/bin/pip-3.6 /usr/bin/pip3')
- elif not exists(conn,'/usr/bin/pip3') and conn.sudo("python3.5 -V 2>/dev/null | awk '{print $2}'").stdout:
- conn.sudo('ln -s /usr/bin/pip-3.5 /usr/bin/pip3')
- if emr:
- conn.sudo('pip3 install -U pip=={}'.format(os.environ['conf_pip_version']))
- conn.sudo('ln -s /usr/local/bin/pip3.7 /bin/pip3.7')
- conn.sudo('''bash -c -l 'echo "export PATH=$PATH:/usr/local/bin" >> /etc/profile' ''')
- conn.sudo('bash -c -l "source /etc/profile"')
- conn.run('bash -c -l "source /etc/profile"')
- conn.close()
-
-def configure_data_engine_service_livy(hostname, os_user, keyfile):
- init_datalab_connection(hostname, os_user, keyfile)
- if exists(conn,'/usr/local/lib/livy'):
- conn.sudo('rm -r /usr/local/lib/livy')
- conn.sudo('wget -P /tmp/ --user={} --password={} '
- '{}/repository/packages/livy.tar.gz --no-check-certificate'
- .format(os.environ['conf_repository_user'],
- os.environ['conf_repository_pass'], os.environ['conf_repository_address']))
- conn.sudo('tar -xzvf /tmp/livy.tar.gz -C /usr/local/lib/')
- conn.sudo('ln -s /usr/local/lib/incubator-livy /usr/local/lib/livy')
- conn.put('/root/templates/dataengine-service_livy-env.sh', '/usr/local/lib/livy/conf/livy-env.sh')
- conn.put('/root/templates/dataengine-service_livy.service', '/tmp/livy.service')
- conn.sudo("sed -i 's|OS_USER|{}|' /tmp/livy.service".format(os_user))
- conn.sudo('mv /tmp/livy.service /etc/systemd/system/livy.service')
- conn.sudo('systemctl daemon-reload')
- conn.sudo('systemctl enable livy.service')
- conn.sudo('systemctl start livy.service')
- conn.close()
-
-def remove_rstudio_dataengines_kernel(cluster_name, os_user):
- try:
- cluster_re = ['-{}"'.format(cluster_name),
- '-{}-'.format(cluster_name),
- '-{}/'.format(cluster_name)]
- conn.get('/home/{}/.Rprofile'.format(os_user), 'Rprofile')
- data = open('Rprofile').read()
- conf = filter(None, data.split('\n'))
- # Filter config from any math of cluster_name in line,
- # separated by defined symbols to avoid partly matches
- conf = [i for i in conf if not any(x in i for x in cluster_re)]
- comment_all = lambda x: x if x.startswith('#master') else '#{}'.format(x)
- uncomment = lambda x: x[1:] if not x.startswith('#master') else x
- conf =[comment_all(i) for i in conf]
- conf =[uncomment(i) for i in conf]
- last_spark = max([conf.index(i) for i in conf if 'master=' in i] or [0])
- active_cluster = conf[last_spark].split('"')[-2] if last_spark != 0 else None
- conf = conf[:last_spark] + [conf[l][1:] for l in range(last_spark, len(conf)) if conf[l].startswith("#")] \
- + [conf[l] for l in range(last_spark, len(conf)) if not conf[l].startswith('#')]
- with open('.Rprofile', 'w') as f:
- for line in conf:
- f.write('{}\n'.format(line))
- conn.put('.Rprofile', '/home/{}/.Rprofile'.format(os_user))
- conn.get('/home/{}/.Renviron'.format(os_user), 'Renviron')
- data = open('Renviron').read()
- conf = filter(None, data.split('\n'))
- comment_all = lambda x: x if x.startswith('#') else '#{}'.format(x)
- conf = [comment_all(i) for i in conf]
- # Filter config from any math of cluster_name in line,
- # separated by defined symbols to avoid partly matches
- conf = [i for i in conf if not any(x in i for x in cluster_re)]
- if active_cluster:
- activate_cluster = lambda x: x[1:] if active_cluster in x else x
- conf = [activate_cluster(i) for i in conf]
- else:
- last_spark = max([conf.index(i) for i in conf if 'SPARK_HOME' in i])
- conf = conf[:last_spark] + [conf[l][1:] for l in range(last_spark, len(conf)) if conf[l].startswith("#")]
- with open('.Renviron', 'w') as f:
- for line in conf:
- f.write('{}\n'.format(line))
- conn.put('.Renviron', '/home/{}/.Renviron'.format(os_user))
- if len(conf) == 1:
- conn.sudo('rm -f /home/{}/.ensure_dir/rstudio_dataengine_ensured'.format(os_user))
- conn.sudo('rm -f /home/{}/.ensure_dir/rstudio_dataengine-service_ensured'.format(os_user))
- conn.sudo('''R -e "source('/home/{}/.Rprofile')"'''.format(os_user))
- except:
- sys.exit(1)
+def remove_unexisting_kernel(os_user):
+ if not exists(conn, '/home/{}/.ensure_dir/unexisting_kernel_removed'.format(os_user)):
+ try:
+ conn.sudo('jupyter-kernelspec remove -f python3')
+ conn.sudo('touch /home/{}/.ensure_dir/unexisting_kernel_removed'.format(os_user))
+ except Exception as err:
+ logging.error('Function remove_unexisting_kernel error:', str(err))
+ traceback.print_exc()
+ sys.exit(1)
-def restart_zeppelin(creds=False, os_user='', hostname='', keyfile=''):
- if creds:
- init_datalab_connection(hostname, os_user, keyfile)
- conn.sudo("systemctl daemon-reload")
- conn.sudo("systemctl restart zeppelin-notebook")
- if creds:
- conn.close()
+# functions for jupyterlab notebook
+def ensure_jupyterlab_files(os_user, jupyterlab_dir, jupyterlab_image, jupyter_conf_file, jupyterlab_conf_file,
+ exploratory_name, edge_ip):
+ if not exists(conn, jupyterlab_dir):
+ try:
+ conn.sudo('mkdir {}'.format(jupyterlab_dir))
+ conn.put('/root/Dockerfile_jupyterlab', '/tmp/Dockerfile_jupyterlab')
+ conn.put('/root/scripts/jupyterlab_run.sh', '/tmp/jupyterlab_run.sh')
+ conn.put('/root/scripts/build.sh', '/tmp/build.sh')
+ conn.put('/root/scripts/start.sh', '/tmp/start.sh')
-def get_spark_memory(creds=False, os_user='', hostname='', keyfile=''):
- if creds:
- con = init_datalab_connection(hostname, os_user, keyfile)
- mem = con.sudo('free -m | grep Mem | tr -s " " ":" | cut -f 2 -d ":"').stdout.replace('\n', '')
- instance_memory = int(mem)
+ conn.sudo('mv /tmp/jupyterlab_run.sh {}jupyterlab_run.sh'.format(jupyterlab_dir))
+ conn.sudo('mv /tmp/Dockerfile_jupyterlab {}Dockerfile_jupyterlab'.format(jupyterlab_dir))
+ conn.sudo('mv /tmp/build.sh {}build.sh'.format(jupyterlab_dir))
+ conn.sudo('mv /tmp/start.sh {}start.sh'.format(jupyterlab_dir))
+ conn.sudo(
+ 'sed -i \'s/jupyterlab_image/{}/g\' {}Dockerfile_jupyterlab'.format(jupyterlab_image, jupyterlab_dir))
+ conn.sudo('sed -i \'s/nb_user/{}/g\' {}start.sh'.format(os_user, jupyterlab_dir))
+ conn.sudo('sed -i \'s/CONF_PATH/{}/g\' {}jupyterlab_run.sh'.format(jupyterlab_conf_file, jupyterlab_dir))
+ conn.sudo('touch {}'.format(jupyter_conf_file))
+ conn.sudo('''bash -l -c "echo 'c.NotebookApp.ip = \\"0.0.0.0\\" ' >> {}" '''.format(jupyter_conf_file))
+ conn.sudo('''bash -l -c "echo 'c.NotebookApp.base_url = \\"/{0}/\\"' >> {1}" '''.format(exploratory_name,
+ jupyter_conf_file))
+ conn.sudo('''bash -l -c 'echo "c.NotebookApp.open_browser = False" >> {}' '''.format(jupyter_conf_file))
+ conn.sudo('''bash -l -c "echo 'c.NotebookApp.cookie_secret = b\\"{0}\\"' >> {1}" '''.format(id_generator(),
+ jupyter_conf_file))
+ conn.sudo('''bash -l -c "echo \\"c.NotebookApp.token = u''\\" >> {}" '''.format(jupyter_conf_file))
+ conn.sudo('''bash -l -c 'echo "c.KernelSpecManager.ensure_native_kernel = False" >> {}' '''.format(
+ jupyter_conf_file))
+ conn.sudo('chown datalab-user:datalab-user /opt')
+ conn.sudo(
+ '''bash -l -c 'echo -e "Host git.epam.com\n HostName git.epam.com\n ProxyCommand nc -X connect -x {}:3128 %h %p\n" > /home/{}/.ssh/config' '''.format(
+ edge_ip, os_user))
+ conn.sudo(
+ '''bash -l -c 'echo -e "Host github.com\n HostName github.com\n ProxyCommand nc -X connect -x {}:3128 %h %p" >> /home/{}/.ssh/config' '''.format(
+ edge_ip, os_user))
+ except Exception as err:
+ logging.error('Function ensure_jupyterlab_files error:', str(err))
+ traceback.print_exc()
+ sys.exit(1)
else:
- mem = conn.sudo('free -m | grep Mem | tr -s " " ":" | cut -f 2 -d ":"').stdout.replace('\n','')
- instance_memory = int(mem)
- spark_memory = round(instance_memory*90/100)
- return spark_memory
-
-
-def replace_multi_symbols(string, symbol, symbol_cut=False):
- try:
- symbol_amount = 0
- for i in range(len(string)):
- if string[i] == symbol:
- symbol_amount = symbol_amount + 1
- while symbol_amount > 1:
- string = string.replace(symbol + symbol, symbol)
- symbol_amount = symbol_amount - 1
- if symbol_cut and string[-1] == symbol:
- string = string[:-1]
- return string
- except Exception as err:
- logging.info("Error with replacing multi symbols: " + str(err) + "\n Traceback: " + traceback.print_exc(
- file=sys.stdout))
- append_result(str({"error": "Error with replacing multi symbols",
- "error_message": str(err) + "\n Traceback: " + traceback.print_exc(file=sys.stdout)}))
- traceback.print_exc(file=sys.stdout)
-
-
-def update_pyopenssl_lib(os_user):
- if not exists(conn,'/home/{}/.ensure_dir/pyopenssl_updated'.format(os_user)):
try:
- if exists(conn, '/usr/bin/pip3'):
- conn.sudo('pip3 install -U pyopenssl')
- conn.sudo('touch /home/{}/.ensure_dir/pyopenssl_updated'.format(os_user))
- except:
+ conn.sudo(
+ 'sed -i "s/c.NotebookApp.base_url =.*/c.NotebookApp.base_url = \'\/{0}\/\'/" {1}'.format(
+ exploratory_name, jupyter_conf_file))
+ except Exception as err:
+ logging.error('Function ensure_jupyterlab_files error:', str(err))
+ traceback.print_exc()
sys.exit(1)
-def find_cluster_kernels():
+#functions for zeppelin notebook/dataengine
+def install_r_packages(os_user):
try:
- de = [i for i in conn.sudo('''bash -l -c 'find /opt/ -maxdepth 1 -name "*-de-*" -type d | rev | cut -f 1 -d "/" | rev | xargs -r' ''').stdout.replace('\n', '').split(' ') if i != '']
- des = [i for i in conn.sudo('''bash -l -c 'find /opt/ -maxdepth 2 -name "*-des-*" -type d | rev | cut -f 1,2 -d "/" | rev | xargs -r' ''').stdout.replace('\n', '').split(' ') if i != '']
- return (de, des)
+ if not exists(conn, '/home/' + os_user + '/.ensure_dir/r_packages_ensured'):
+ conn.sudo('R -e "install.packages(\'devtools\', repos = \'https://cloud.r-project.org\')"')
+ conn.sudo('R -e "install.packages(\'knitr\', repos = \'https://cloud.r-project.org\')"')
+ conn.sudo('R -e "install.packages(\'ggplot2\', repos = \'https://cloud.r-project.org\')"')
+ conn.sudo('R -e "install.packages(c(\'devtools\',\'mplot\', \'googleVis\'), '
+ 'repos = \'https://cloud.r-project.org\'); require(devtools); install_github(\'ramnathv/rCharts\')"')
+ conn.sudo('R -e \'install.packages("versions", repos="https://cloud.r-project.org", dep=TRUE)\'')
+ conn.sudo('touch /home/' + os_user + '/.ensure_dir/r_packages_ensured')
except Exception as err:
- print('Failed to find cluster kernels.', str(err))
+ logging.error('Function install_r_packages error:', str(err))
+ traceback.print_exc()
sys.exit(1)
@@ -1004,7 +1066,7 @@ def update_zeppelin_interpreters(multiple_clusters, r_enabled, interpreter_mode=
if r_enabled:
groups.append({"class": "org.apache.zeppelin.livy.LivySparkRInterpreter", "name": "sparkr"})
else:
- groups = [{"class": "org.apache.zeppelin.spark.SparkInterpreter","name": "spark"},
+ groups = [{"class": "org.apache.zeppelin.spark.SparkInterpreter", "name": "spark"},
{"class": "org.apache.zeppelin.spark.PySparkInterpreter", "name": "pyspark"},
{"class": "org.apache.zeppelin.spark.SparkSqlInterpreter", "name": "sql"}]
if r_enabled:
@@ -1035,43 +1097,26 @@ def update_zeppelin_interpreters(multiple_clusters, r_enabled, interpreter_mode=
f.write(json.dumps(data, indent=2))
subprocess.run('sudo systemctl restart zeppelin-notebook', shell=True, check=True)
except Exception as err:
- print('Failed to update Zeppelin interpreters', str(err))
- sys.exit(1)
-
-
-def update_hosts_file(os_user):
- try:
- if not exists(conn,'/home/{}/.ensure_dir/hosts_file_updated'.format(os_user)):
- conn.sudo('sed -i "s/^127.0.0.1 localhost/127.0.0.1 localhost localhost.localdomain/g" /etc/hosts')
- conn.sudo('touch /home/{}/.ensure_dir/hosts_file_updated'.format(os_user))
- except Exception as err:
+ logging.error('Function update_zeppelin_interpreters error:', str(err))
traceback.print_exc()
- print('Failed to update hosts file', str(err))
sys.exit(1)
-def ensure_docker_compose(os_user):
- try:
- configure_docker(os_user)
- if not exists(conn,'/home/{}/.ensure_dir/docker_compose_ensured'.format(os_user)):
- docker_compose_version = "1.24.1"
- conn.sudo('curl -L https://github.com/docker/compose/releases/download/{}/docker-compose-`uname -s`-`uname -m` -o /usr/local/bin/docker-compose'.format(docker_compose_version))
- conn.sudo('chmod +x /usr/local/bin/docker-compose')
- conn.sudo('touch /home/{}/.ensure_dir/docker_compose_ensured'.format(os_user))
- conn.sudo('systemctl daemon-reload')
- conn.sudo('systemctl restart docker')
- return True
- except:
- return False
-def configure_superset(os_user, keycloak_auth_server_url, keycloak_realm_name, keycloak_client_id, keycloak_client_secret, edge_instance_private_ip, edge_instance_public_ip, superset_name):
- print('Superset configuring')
+#functions for superset notebook (rewrite require)
+def configure_superset(os_user, keycloak_auth_server_url, keycloak_realm_name, keycloak_client_id,
+ keycloak_client_secret, edge_instance_private_ip, edge_instance_public_ip, superset_name):
+ logging.info('Superset configuring')
try:
- if not exists(conn,'/home/{}/incubator-superset'.format(os_user)):
- conn.sudo('''bash -c 'cd /home/{} && wget https://github.com/apache/incubator-superset/archive/{}.tar.gz' '''.format(
+ if not exists(conn, '/home/{}/incubator-superset'.format(os_user)):
+ conn.sudo(
+ '''bash -c 'cd /home/{} && wget https://github.com/apache/incubator-superset/archive/{}.tar.gz' '''.format(
os_user, os.environ['notebook_superset_version']))
- conn.sudo('''bash -c 'cd /home/{} && tar -xzf {}.tar.gz' '''.format(os_user, os.environ['notebook_superset_version']))
- conn.sudo('''bash -c 'cd /home/{} && ln -sf incubator-superset-{} incubator-superset' '''.format(os_user, os.environ['notebook_superset_version']))
- if not exists(conn,'/tmp/superset-notebook_installed'):
+ conn.sudo('''bash -c 'cd /home/{} && tar -xzf {}.tar.gz' '''.format(os_user, os.environ[
+ 'notebook_superset_version']))
+ conn.sudo('''bash -c 'cd /home/{} && ln -sf incubator-superset-{} incubator-superset' '''.format(os_user,
+ os.environ[
+ 'notebook_superset_version']))
+ if not exists(conn, '/tmp/superset-notebook_installed'):
conn.sudo('mkdir -p /opt/datalab/templates')
conn.local('cd /root/templates; tar -zcvf /tmp/templates.tar.gz *')
conn.put('/tmp/templates.tar.gz', '/tmp/templates.tar.gz')
@@ -1090,64 +1135,81 @@ def configure_superset(os_user, keycloak_auth_server_url, keycloak_realm_name, k
keycloak_auth_server_url))
conn.sudo('sed -i \'s/KEYCLOAK_REALM_NAME/{}/g\' /opt/datalab/templates/superset_config.py'.format(
keycloak_realm_name))
- conn.sudo('sed -i \'s/EDGE_IP/{}/g\' /opt/datalab/templates/superset_config.py'.format(edge_instance_public_ip))
+ conn.sudo(
+ 'sed -i \'s/EDGE_IP/{}/g\' /opt/datalab/templates/superset_config.py'.format(edge_instance_public_ip))
conn.sudo('sed -i \'s/SUPERSET_NAME/{}/g\' /opt/datalab/templates/superset_config.py'.format(superset_name))
conn.sudo('cp -f /opt/datalab/templates/.env /home/{}/incubator-superset/contrib/docker/'.format(os_user))
- conn.sudo('cp -f /opt/datalab/templates/docker-compose.yml /home/{}/incubator-superset/contrib/docker/'.format(
- os_user))
- conn.sudo('cp -f /opt/datalab/templates/id_provider.json /home/{}/incubator-superset/contrib/docker/'.format(
- os_user))
+ conn.sudo(
+ 'cp -f /opt/datalab/templates/docker-compose.yml /home/{}/incubator-superset/contrib/docker/'.format(
+ os_user))
+ conn.sudo(
+ 'cp -f /opt/datalab/templates/id_provider.json /home/{}/incubator-superset/contrib/docker/'.format(
+ os_user))
conn.sudo(
'cp -f /opt/datalab/templates/requirements-extra.txt /home/{}/incubator-superset/contrib/docker/'.format(
os_user))
- conn.sudo('cp -f /opt/datalab/templates/superset_config.py /home/{}/incubator-superset/contrib/docker/'.format(
- os_user))
+ conn.sudo(
+ 'cp -f /opt/datalab/templates/superset_config.py /home/{}/incubator-superset/contrib/docker/'.format(
+ os_user))
conn.sudo('cp -f /opt/datalab/templates/docker-init.sh /home/{}/incubator-superset/contrib/docker/'.format(
os_user))
conn.sudo('touch /tmp/superset-notebook_installed')
except Exception as err:
- print("Failed configure superset: " + str(err))
+ logging.error('Function configure_superset error:', str(err))
+ traceback.print_exc()
sys.exit(1)
-def manage_npm_pkg(command):
+
+# function move to debian and check if needed
+def configure_docker(os_user):
try:
- npm_count = 0
- installed = False
- npm_registry = ['https://registry.npmjs.org/', 'https://registry.npmjs.com/']
- while not installed:
- if npm_count > 60:
- print("NPM registry is not available, please try later")
- sys.exit(1)
- else:
- try:
- if npm_count % 2 == 0:
- conn.sudo('npm config set registry {}'.format(npm_registry[0]))
- else:
- conn.sudo('npm config set registry {}'.format(npm_registry[1]))
- conn.sudo('{}'.format(command))
- installed = True
- except:
- npm_count += 1
- time.sleep(50)
- except:
+ if not exists(conn, '/home/' + os_user + '/.ensure_dir/docker_ensured'):
+ docker_version = os.environ['ssn_docker_version']
+ conn.sudo('curl -fsSL https://download.docker.com/linux/ubuntu/gpg | sudo apt-key add -')
+ conn.sudo('add-apt-repository "deb [arch=amd64] https://download.docker.com/linux/ubuntu $(lsb_release -cs) \
+ stable"')
+ # datalab.common_lib.manage_pkg('update', 'remote', '')
+ conn.sudo('apt-get update')
+ conn.sudo('apt-cache policy docker-ce')
+ # datalab.common_lib.manage_pkg('-y install', 'remote', 'docker-ce=5:{}~3-0~ubuntu-focal'.format(docker_version))
+ conn.sudo('apt-get install -y docker-ce=5:{}~3-0~ubuntu-focal'.format(docker_version))
+ conn.sudo('touch /home/{}/.ensure_dir/docker_ensured'.format(os_user))
+ except Exception as err:
+ print('Failed to configure Docker:', str(err))
sys.exit(1)
-def init_datalab_connection(hostname, username, keyfile):
+def ensure_docker_compose(os_user):
try:
- global conn
- attempt = 0
- while attempt < 15:
- print('connection attempt {}'.format(attempt))
- conn = Connection(host = hostname, user = username, connect_kwargs={'banner_timeout': 200,
- 'key_filename': keyfile})
- conn.config.run.echo = True
- try:
- conn.run('ls')
- conn.config.run.echo = True
- return conn
- except:
- attempt += 1
- time.sleep(10)
+ configure_docker(os_user)
+ if not exists(conn, '/home/{}/.ensure_dir/docker_compose_ensured'.format(os_user)):
+ docker_compose_version = "1.24.1"
+ conn.sudo(
+ 'curl -L https://github.com/docker/compose/releases/download/{}/docker-compose-`uname -s`-`uname -m` -o /usr/local/bin/docker-compose'.format(
+ docker_compose_version))
+ conn.sudo('chmod +x /usr/local/bin/docker-compose')
+ conn.sudo('touch /home/{}/.ensure_dir/docker_compose_ensured'.format(os_user))
+ conn.sudo('systemctl daemon-reload')
+ conn.sudo('systemctl restart docker')
+ return True
except:
- traceback.print_exc()
- sys.exit(1)
+ return False
+
+
+
+#function is just for azure, need check if it's needed
+def set_git_proxy(os_user, hostname, keyfile, proxy_host):
+ init_datalab_connection(hostname, os_user, keyfile)
+ conn.run('git config --global http.proxy {}'.format(proxy_host))
+ conn.run('git config --global https.proxy {}'.format(proxy_host))
+ conn.close()
+
+
+#function should be checked whether it needed
+def update_pyopenssl_lib(os_user):
+ if not exists(conn, '/home/{}/.ensure_dir/pyopenssl_updated'.format(os_user)):
+ try:
+ if exists(conn, '/usr/bin/pip3'):
+ conn.sudo('pip3 install -U pyopenssl')
+ conn.sudo('touch /home/{}/.ensure_dir/pyopenssl_updated'.format(os_user))
+ except:
+ sys.exit(1)
\ No newline at end of file
diff --git a/infrastructure-provisioning/src/general/scripts/aws/common_create_instance.py b/infrastructure-provisioning/src/general/scripts/aws/common_create_instance.py
index a626c03..5ca2f1a 100644
--- a/infrastructure-provisioning/src/general/scripts/aws/common_create_instance.py
+++ b/infrastructure-provisioning/src/general/scripts/aws/common_create_instance.py
@@ -26,6 +26,7 @@ import json
import sys
from datalab.actions_lib import *
from datalab.meta_lib import *
+from datalab.logger import logging
parser = argparse.ArgumentParser()
parser.add_argument('--node_name', type=str, default='')
@@ -50,16 +51,16 @@ if __name__ == "__main__":
try:
instance_id = get_instance_by_name(args.infra_tag_name, args.node_name)
if instance_id == '':
- print("Creating instance {0} of type {1} in subnet {2} with tag {3}.".
+ logging.info("Creating instance {0} of type {1} in subnet {2} with tag {3}.".
format(args.node_name, args.instance_type, args.subnet_id, json.dumps(instance_tag)))
instance_id = create_instance(args, instance_tag, args.primary_disk_size)
else:
- print("REQUESTED INSTANCE ALREADY EXISTS AND RUNNING")
- print("Instance_id {}".format(instance_id))
- print("Public_hostname {}".format(get_instance_attr(instance_id, 'public_dns_name')))
- print("Private_hostname {}".format(get_instance_attr(instance_id, 'private_dns_name')))
+ logging.info("REQUESTED INSTANCE ALREADY EXISTS AND RUNNING")
+ logging.info("Instance_id {}".format(instance_id))
+ logging.info("Public_hostname {}".format(get_instance_attr(instance_id, 'public_dns_name')))
+ logging.info("Private_hostname {}".format(get_instance_attr(instance_id, 'private_dns_name')))
except Exception as err:
- print('Error: {0}'.format(err))
+ logging.error('Error: {0}'.format(err))
sys.exit(1)
else:
parser.print_help()
diff --git a/infrastructure-provisioning/src/general/scripts/aws/common_create_role_policy.py b/infrastructure-provisioning/src/general/scripts/aws/common_create_role_policy.py
index 2794e9c..c8135a2 100644
--- a/infrastructure-provisioning/src/general/scripts/aws/common_create_role_policy.py
+++ b/infrastructure-provisioning/src/general/scripts/aws/common_create_role_policy.py
@@ -25,6 +25,7 @@ import argparse
import sys
from datalab.actions_lib import *
from datalab.meta_lib import *
+from datalab.logger import logging
parser = argparse.ArgumentParser()
parser.add_argument('--role_name', type=str, default='')
@@ -46,26 +47,26 @@ if __name__ == "__main__":
if role_name == '':
tag = {"Key": args.infra_tag_name, "Value": args.infra_tag_value}
user_tag = {"Key": "user:tag", "Value": args.user_tag_value}
- print("Creating role {0}, profile name {1}".format(args.role_name, args.role_profile_name))
+ logging.info("Creating role {0}, profile name {1}".format(args.role_name, args.role_profile_name))
create_iam_role(args.role_name, args.role_profile_name, args.region, tag=tag, user_tag=user_tag)
else:
- print("ROLE AND ROLE PROFILE ARE ALREADY CREATED")
- print("ROLE {} created. IAM group {} created".format(args.role_name, args.role_profile_name))
+ logging.info("ROLE AND ROLE PROFILE ARE ALREADY CREATED")
+ logging.info("ROLE {} created. IAM group {} created".format(args.role_name, args.role_profile_name))
- print("ATTACHING POLICIES TO ROLE")
+ logging.info("ATTACHING POLICIES TO ROLE")
if args.policy_file_name != '':
create_attach_policy(args.policy_name, args.role_name, args.policy_file_name)
else:
if args.policy_arn == '':
- print("POLICY ARN is empty, there is nothing to attach.")
+ logging.info("POLICY ARN is empty, there is nothing to attach.")
success = True
else:
policy_arn_bits = eval(args.policy_arn)
for bit in policy_arn_bits:
attach_policy(args.role_name, bit)
- print("POLICY {} created".format(args.policy_name))
+ logging.info("POLICY {} created".format(args.policy_name))
except Exception as err:
- print('Error: {0}'.format(err))
+ logging.error('Error: {0}'.format(err))
else:
parser.print_help()
sys.exit(2)
diff --git a/infrastructure-provisioning/src/general/scripts/aws/common_create_security_group.py b/infrastructure-provisioning/src/general/scripts/aws/common_create_security_group.py
index b15a1b9..7dd1808 100644
--- a/infrastructure-provisioning/src/general/scripts/aws/common_create_security_group.py
+++ b/infrastructure-provisioning/src/general/scripts/aws/common_create_security_group.py
@@ -26,6 +26,7 @@ import json
import sys
from datalab.actions_lib import *
from datalab.meta_lib import *
+from datalab.logger import logging
parser = argparse.ArgumentParser()
parser.add_argument('--name', type=str, default='')
@@ -46,7 +47,7 @@ if __name__ == "__main__":
rules = json.loads(args.security_group_rules)
egress = json.loads(args.egress)
except Exception as err:
- print('Error: {0}'.format(err))
+ logging.error('Error: {0}'.format(err))
sys.exit(1)
tag = {"Key": args.infra_tag_name, "Value": args.infra_tag_value}
nb_sg_id = get_security_group_by_name(args.nb_sg_name + '-sg')
@@ -54,29 +55,26 @@ if __name__ == "__main__":
try:
security_group_id = get_security_group_by_name(args.name)
if security_group_id == '':
- print("Creating security group {0} for vpc {1} with tag {2}.".format(args.name, args.vpc_id,
+ logging.info("Creating security group {0} for vpc {1} with tag {2}.".format(args.name, args.vpc_id,
json.dumps(tag)))
security_group_id = create_security_group(args.name, args.vpc_id, rules, egress, tag)
if nb_sg_id != '' and args.resource == 'edge':
- print("Updating Notebook security group {}".format(nb_sg_id))
+ logging.info("Updating Notebook security group {}".format(nb_sg_id))
rule = {'IpProtocol': '-1', 'FromPort': -1, 'ToPort': -1,
'UserIdGroupPairs': [{'GroupId': security_group_id}]}
add_inbound_sg_rule(nb_sg_id, rule)
add_outbound_sg_rule(nb_sg_id, rule)
else:
if nb_sg_id != '' and args.resource == 'edge':
- print("Updating Notebook security group {}".format(nb_sg_id))
+ logging.info("Updating Notebook security group {}".format(nb_sg_id))
rule = {'IpProtocol': '-1', 'FromPort': -1, 'ToPort': -1,
'UserIdGroupPairs': [{'GroupId': security_group_id}]}
add_inbound_sg_rule(nb_sg_id, rule)
add_outbound_sg_rule(nb_sg_id, rule)
- print("REQUESTED SECURITY GROUP WITH NAME {} ALREADY EXISTS".format(args.name))
- print("SECURITY_GROUP_ID: {}".format(security_group_id))
- if args.ssn:
- with open('/tmp/ssn_sg_id', 'w') as f:
- f.write(security_group_id)
+ logging.info("REQUESTED SECURITY GROUP WITH NAME {} ALREADY EXISTS".format(args.name))
+ logging.info("SECURITY_GROUP_ID: {}".format(security_group_id))
except Exception as err:
- print('Error: {0}'.format(err))
+ logging.error('Error: {0}'.format(err))
else:
parser.print_help()
sys.exit(2)
diff --git a/infrastructure-provisioning/src/general/scripts/aws/common_create_subnet.py b/infrastructure-provisioning/src/general/scripts/aws/common_create_subnet.py
index 80cffb8..739b97c 100644
--- a/infrastructure-provisioning/src/general/scripts/aws/common_create_subnet.py
+++ b/infrastructure-provisioning/src/general/scripts/aws/common_create_subnet.py
@@ -29,6 +29,7 @@ import sys
from botocore import exceptions
from datalab.actions_lib import *
from datalab.meta_lib import *
+from datalab.logger import logging
parser = argparse.ArgumentParser()
parser.add_argument('--vpc_id', type=str, default='')
@@ -46,6 +47,8 @@ args = parser.parse_args()
if __name__ == "__main__":
tag = {"Key": args.infra_tag_name, "Value": args.subnet_name}
tag_name = {"Key": "Name", "Value": args.subnet_name}
+
+ #defining subnet cidr
try:
if args.user_subnets_range == '' or args.ssn:
ec2 = boto3.resource('ec2')
@@ -64,7 +67,6 @@ if __name__ == "__main__":
int(addr.split("/")[0].split(".")[3]),
int(addr.split("/")[1]))
sorted_subnets_cidr = sorted(subnets_cidr, key=sortkey)
-
last_ip = first_vpc_ip
previous_subnet_size = private_subnet_size
for cidr in sorted_subnets_cidr:
@@ -75,7 +77,6 @@ if __name__ == "__main__":
previous_subnet_size = subnet_size
else:
break
-
datalab_subnet_cidr = ''
if previous_subnet_size < private_subnet_size:
while True:
@@ -107,31 +108,37 @@ if __name__ == "__main__":
existed_subnet_list.append(i.get('CidrBlock'))
available_subnets = list(set(pre_defined_subnet_list) - set(existed_subnet_list))
if not available_subnets:
- print("There is no available subnet to create. Aborting...")
+ logging.error("There is no available subnet to create. Aborting...")
sys.exit(1)
else:
datalab_subnet_cidr = available_subnets[0]
+
+ #checking existing subnets
if args.ssn:
subnet_id = get_subnet_by_cidr(datalab_subnet_cidr, args.vpc_id)
subnet_check = get_subnet_by_tag(tag, False, args.vpc_id)
else:
subnet_id = get_subnet_by_cidr(datalab_subnet_cidr, args.vpc_id)
subnet_check = get_subnet_by_tag(tag, args.vpc_id)
+
+ #creating subnet
if not subnet_check:
if subnet_id == '':
- print("Creating subnet {0} in vpc {1} with tag {2}".
+ logging.info("Creating subnet {0} in vpc {1} with tag {2}".
format(datalab_subnet_cidr, args.vpc_id, json.dumps(tag)))
subnet_id = create_subnet(args.vpc_id, datalab_subnet_cidr, tag, args.zone)
create_tag(subnet_id, tag_name)
else:
- print("REQUESTED SUBNET ALREADY EXISTS. USING CIDR {}".format(subnet_check))
+ logging.info("REQUESTED SUBNET ALREADY EXISTS. USING CIDR {}".format(subnet_check))
subnet_id = get_subnet_by_cidr(subnet_check)
- print("SUBNET_ID: {}".format(subnet_id))
+ logging.info("SUBNET_ID: {}".format(subnet_id))
+
+ #associating subnet with route table
if not args.ssn:
if os.environ['edge_is_nat'] == 'true':
- print('Subnet will be associted with route table for NAT')
+ logging.info('Subnet will be associted with route table for NAT')
else:
- print("Associating route_table with the subnet")
+ logging.info("Associating route_table with the subnet")
ec2 = boto3.resource('ec2')
if os.environ['conf_duo_vpc_enable'] == 'true':
rt = get_route_table_by_tag(args.infra_tag_value + '-secondary-tag', args.infra_tag_value)
@@ -144,15 +151,13 @@ if __name__ == "__main__":
create_peer_routes(os.environ['aws_peering_id'], args.infra_tag_value)
except exceptions.ClientError as err:
if 'Resource.AlreadyAssociated' in str(err):
- print('Other route table is already associted with this subnet. Skipping...')
+ logging.info('Other route table is already associated with this subnet. Skipping...')
else:
- print("Associating route_table with the subnet")
+ logging.info("Associating route_table with the subnet")
ec2 = boto3.resource('ec2')
rt = get_route_table_by_tag(args.infra_tag_name, args.infra_tag_value)
route_table = ec2.RouteTable(rt)
route_table.associate_with_subnet(SubnetId=subnet_id)
- with open('/tmp/ssn_subnet_id', 'w') as f:
- f.write(subnet_id)
except Exception as err:
- print('Error: {0}'.format(err))
+ logging.error('Error: {0}'.format(err))
sys.exit(1)
diff --git a/infrastructure-provisioning/src/general/scripts/aws/ssn_associate_elastic_ip.py b/infrastructure-provisioning/src/general/scripts/aws/ssn_associate_elastic_ip.py
index 6615744..6d66440 100644
--- a/infrastructure-provisioning/src/general/scripts/aws/ssn_associate_elastic_ip.py
+++ b/infrastructure-provisioning/src/general/scripts/aws/ssn_associate_elastic_ip.py
@@ -27,6 +27,7 @@ import sys
from datalab.actions_lib import *
from datalab.fab import *
from datalab.meta_lib import *
+from datalab.logger import logging
parser = argparse.ArgumentParser()
parser.add_argument('--elastic_ip', type=str, default='')
@@ -36,14 +37,9 @@ parser.add_argument('--infra_tag_value', type=str, default='')
args = parser.parse_args()
if __name__ == "__main__":
- local_log_filename = "{}_{}.log".format(os.environ['conf_resource'], os.environ['request_id'])
- local_log_filepath = "/logs/" + os.environ['conf_resource'] + "/" + local_log_filename
- logging.basicConfig(format='%(levelname)-8s [%(asctime)s] %(message)s',
- level=logging.DEBUG,
- filename=local_log_filepath)
try:
if args.elastic_ip == 'None':
- print("Allocating Elastic IP")
+ logging.info("Allocating Elastic IP")
allocation_id = allocate_elastic_ip()
tag = {"Key": args.infra_tag_name, "Value": args.infra_tag_value}
tag_name = {"Key": "Name", "Value": args.infra_tag_value}
@@ -52,8 +48,8 @@ if __name__ == "__main__":
else:
allocation_id = get_allocation_id_by_elastic_ip(args.elastic_ip)
- print("Associating Elastic IP to SSN")
+ logging.info("Associating Elastic IP to SSN")
associate_elastic_ip(args.ssn_id, allocation_id)
except Exception as err:
- print('Error: {0}'.format(err))
+ logging.error('Error: {0}'.format(err))
sys.exit(1)
diff --git a/infrastructure-provisioning/src/general/scripts/aws/ssn_configure.py b/infrastructure-provisioning/src/general/scripts/aws/ssn_configure.py
index a8e7ce1..6a22967 100644
--- a/infrastructure-provisioning/src/general/scripts/aws/ssn_configure.py
+++ b/infrastructure-provisioning/src/general/scripts/aws/ssn_configure.py
@@ -602,7 +602,7 @@ if __name__ == "__main__":
f.write(json.dumps(res))
logging.info('Upload response file')
- local_log_filepath = "/logs/{}/{}_{}.log".format(+ os.environ['conf_resource'], os.environ['conf_resource'],
+ local_log_filepath = "/logs/{}/{}_{}.log".format(os.environ['conf_resource'], os.environ['conf_resource'],
os.environ['request_id'])
params = "--instance_name {} --local_log_filepath {} --os_user {} --instance_hostname {}". \
format(ssn_conf['instance_name'], local_log_filepath, ssn_conf['datalab_ssh_user'],
diff --git a/infrastructure-provisioning/src/general/scripts/aws/ssn_create_endpoint.py b/infrastructure-provisioning/src/general/scripts/aws/ssn_create_endpoint.py
index 9a04ac0..d8474df 100644
--- a/infrastructure-provisioning/src/general/scripts/aws/ssn_create_endpoint.py
+++ b/infrastructure-provisioning/src/general/scripts/aws/ssn_create_endpoint.py
@@ -29,6 +29,7 @@ import time
from datalab.actions_lib import *
from datalab.meta_lib import *
from datalab.ssn_lib import *
+from datalab.logger import logging
parser = argparse.ArgumentParser()
parser.add_argument('--vpc_id', type=str, default='')
@@ -41,12 +42,13 @@ if __name__ == "__main__":
tag = {"Key": args.infra_tag_name, "Value": args.infra_tag_value}
waiter = time.sleep(10)
if args.vpc_id:
- print("Creating Endpoint in vpc {}, region {} with tag {}.".format(args.vpc_id, args.region, json.dumps(tag)))
+ logging.info("Creating Endpoint in vpc {}, region {} with tag {}."
+ .format(args.vpc_id, args.region, json.dumps(tag)))
ec2 = boto3.client('ec2')
route_table = []
endpoint = ''
service_name = 'com.amazonaws.{}.s3'.format(args.region)
- print('Vars are: {}, {}, {}'.format(args.vpc_id, service_name, json.dumps(tag)))
+ logging.info('Vars are: {}, {}, {}'.format(args.vpc_id, service_name, json.dumps(tag)))
try:
route_table = get_route_tables(args.vpc_id, json.dumps(tag))
if not route_table:
@@ -59,12 +61,12 @@ if __name__ == "__main__":
waiter
else:
break
- print('Created Route-Table with ID: {}'.format(route_table))
+ logging.info('Created Route-Table with ID: {}'.format(route_table))
create_tag(route_table, json.dumps(tag))
create_tag(route_table, json.dumps({"Key": "Name", "Value": "{}-rt".format(args.infra_tag_value)}))
endpoints = get_vpc_endpoints(args.vpc_id)
if not endpoints:
- print('Creating EP')
+ logging.info('Creating EP')
endpoint = ec2.create_vpc_endpoint(
VpcId=args.vpc_id,
ServiceName=service_name,
@@ -72,7 +74,8 @@ if __name__ == "__main__":
)
endpoint = endpoint['VpcEndpoint']['VpcEndpointId']
else:
- print('For current VPC {} endpoint already exists. ID: {}. Route table list will be modified'.format(args.vpc_id, endpoints[0].get('VpcEndpointId')))
+ logging.info('For current VPC {} endpoint already exists. ID: {}. Route table list will be modified'
+ .format(args.vpc_id, endpoints[0].get('VpcEndpointId')))
endpoint_id = endpoints[0].get('VpcEndpointId')
result = ec2.modify_vpc_endpoint(
VpcEndpointId=endpoint_id,
@@ -80,10 +83,10 @@ if __name__ == "__main__":
)
if result:
endpoint = endpoint_id
- print("ENDPOINT: {}".format(endpoint))
+ logging.info("ENDPOINT: {}".format(endpoint))
except botocore.exceptions.ClientError as err:
- print(err.response['Error']['Message'])
- print('Failed to create endpoint. Removing RT')
+ logging.error(err.response['Error']['Message'])
+ logging.info('Failed to create endpoint. Removing RT')
ec2.delete_route_table(
RouteTableId=route_table[0]
)
diff --git a/infrastructure-provisioning/src/general/scripts/aws/ssn_create_vpc.py b/infrastructure-provisioning/src/general/scripts/aws/ssn_create_vpc.py
index 9d6e77b..9064a27 100644
--- a/infrastructure-provisioning/src/general/scripts/aws/ssn_create_vpc.py
+++ b/infrastructure-provisioning/src/general/scripts/aws/ssn_create_vpc.py
@@ -56,7 +56,7 @@ if __name__ == "__main__":
rt_id = create_rt(vpc_id, args.infra_tag_name, args.infra_tag_value, args.secondary)
create_tag(rt_id, rt_tag_name)
else:
- logging.info("REQUESTED {}VPC ALREADY EXISTS".format(sec_str))
+ logging.info("REQUESTED {} VPC ALREADY EXISTS".format(sec_str))
logging.info("{0}VPC_ID: {1}".format(sec_str, vpc_id))
args.vpc_id = vpc_id
except Exception as err:
diff --git a/infrastructure-provisioning/src/general/scripts/aws/ssn_terminate_aws_resources.py b/infrastructure-provisioning/src/general/scripts/aws/ssn_terminate_aws_resources.py
index a3780bd..a3fbb38 100644
--- a/infrastructure-provisioning/src/general/scripts/aws/ssn_terminate_aws_resources.py
+++ b/infrastructure-provisioning/src/general/scripts/aws/ssn_terminate_aws_resources.py
@@ -25,6 +25,7 @@ import argparse
import boto3
import logging
import datalab.ssn_lib
+from datalab.logger import logging
import os
import sys
@@ -43,13 +44,6 @@ tag2 = args.service_base_name + '-secondary-tag'
##############
if __name__ == "__main__":
- # configuring logs
- local_log_filename = "{}_{}.log".format(os.environ['conf_resource'], os.environ['request_id'])
- local_log_filepath = "/logs/" + os.environ['conf_resource'] + "/" + local_log_filename
- logging.basicConfig(format='%(levelname)-8s [%(asctime)s] %(message)s',
- level=logging.DEBUG,
- handlers=[logging.StreamHandler(), logging.FileHandler(local_log_filepath)])
-
logging.info('Terminating EMR cluster')
try:
clusters_list = datalab.meta_lib.get_emr_list(args.tag_name)
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@datalab.apache.org
For additional commands, e-mail: commits-help@datalab.apache.org