You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@datalab.apache.org by lf...@apache.org on 2021/02/05 15:43:40 UTC
[incubator-datalab] 07/08: [DATALAB-2091]: partially replaced local
with subprocess.run(
This is an automated email from the ASF dual-hosted git repository.
lfrolov pushed a commit to branch DATALAB-2091-fab2
in repository https://gitbox.apache.org/repos/asf/incubator-datalab.git
commit 722045aaee23354cc77c7fc49c10fce5c16a0ccf
Author: leonidfrolov <fr...@gmail.com>
AuthorDate: Fri Feb 5 17:26:00 2021 +0200
[DATALAB-2091]: partially replaced local with subprocess.run(
---
.../src/general/lib/azure/actions_lib.py | 73 +++++----
.../src/general/lib/gcp/actions_lib.py | 181 +++++++++++----------
.../src/general/scripts/aws/dataengine_start.py | 3 +-
.../src/general/scripts/aws/tensor_configure.py | 17 +-
.../general/scripts/azure/common_start_notebook.py | 4 +-
.../src/general/scripts/azure/edge_configure.py | 13 +-
.../src/general/scripts/azure/project_prepare.py | 25 +--
.../jupyter_install_dataengine-service_kernels.py | 2 +-
.../general/scripts/gcp/jupyterlab_configure.py | 21 +--
.../src/general/scripts/gcp/zeppelin_configure.py | 17 +-
.../examples/scenario_zeppelin/zeppelin_tests.py | 12 +-
11 files changed, 188 insertions(+), 180 deletions(-)
diff --git a/infrastructure-provisioning/src/general/lib/azure/actions_lib.py b/infrastructure-provisioning/src/general/lib/azure/actions_lib.py
index 15f3937..4e837f6 100644
--- a/infrastructure-provisioning/src/general/lib/azure/actions_lib.py
+++ b/infrastructure-provisioning/src/general/lib/azure/actions_lib.py
@@ -32,6 +32,7 @@ import sys
import time
import traceback
import urllib2
+import subprocess
from azure.common.client_factory import get_client_from_auth_file
from azure.datalake.store import core, lib
from azure.mgmt.authorization import AuthorizationManagementClient
@@ -1178,28 +1179,28 @@ def configure_local_spark(jars_dir, templates_dir, memory_type='driver'):
def configure_dataengine_spark(cluster_name, jars_dir, cluster_dir, datalake_enabled, spark_configs=''):
- local("jar_list=`find {0} -name '*.jar' | tr '\\n' ',' | sed 's/,$//'` ; echo \"spark.jars $jar_list\" >> \
- /tmp/{1}/notebook_spark-defaults_local.conf".format(jars_dir, cluster_name))
+ subprocess.run("jar_list=`find {0} -name '*.jar' | tr '\\n' ',' | sed 's/,$//'` ; echo \"spark.jars $jar_list\" >> \
+ /tmp/{1}/notebook_spark-defaults_local.conf".format(jars_dir, cluster_name), shell=True)
if os.path.exists('{0}spark/conf/spark-defaults.conf'.format(cluster_dir)):
- additional_spark_properties = local('diff --changed-group-format="%>" --unchanged-group-format="" '
+ additional_spark_properties = subprocess.run('diff --changed-group-format="%>" --unchanged-group-format="" '
'/tmp/{0}/notebook_spark-defaults_local.conf '
'{1}spark/conf/spark-defaults.conf | grep -v "^#"'.format(
- cluster_name, cluster_dir), capture_output=True)
+ cluster_name, cluster_dir), capture_output=True, shell=True)
for property in additional_spark_properties.split('\n'):
- local('echo "{0}" >> /tmp/{1}/notebook_spark-defaults_local.conf'.format(property, cluster_name))
+ subprocess.run('echo "{0}" >> /tmp/{1}/notebook_spark-defaults_local.conf'.format(property, cluster_name), shell=True)
if os.path.exists('{0}'.format(cluster_dir)):
- local('cp -f /tmp/{0}/notebook_spark-defaults_local.conf {1}spark/conf/spark-defaults.conf'.format(cluster_name,
- cluster_dir))
+ subprocess.run('cp -f /tmp/{0}/notebook_spark-defaults_local.conf {1}spark/conf/spark-defaults.conf'.format(cluster_name,
+ cluster_dir), shell=True)
if datalake_enabled == 'false':
- local('cp -f /opt/spark/conf/core-site.xml {}spark/conf/'.format(cluster_dir))
+ subprocess.run('cp -f /opt/spark/conf/core-site.xml {}spark/conf/'.format(cluster_dir), shell=True)
else:
- local('cp -f /opt/hadoop/etc/hadoop/core-site.xml {}hadoop/etc/hadoop/core-site.xml'.format(cluster_dir))
+ subprocess.run('cp -f /opt/hadoop/etc/hadoop/core-site.xml {}hadoop/etc/hadoop/core-site.xml'.format(cluster_dir), shell=True)
if spark_configs and os.path.exists('{0}'.format(cluster_dir)):
- datalab_header = local('cat /tmp/{0}/notebook_spark-defaults_local.conf | grep "^#"'.format(cluster_name),
- capture_output=True)
+ datalab_header = subprocess.run('cat /tmp/{0}/notebook_spark-defaults_local.conf | grep "^#"'.format(cluster_name),
+ capture_output=True, shell=True)
spark_configurations = ast.literal_eval(spark_configs)
new_spark_defaults = list()
- spark_defaults = local('cat {0}spark/conf/spark-defaults.conf'.format(cluster_dir), capture_output=True)
+ spark_defaults = subprocess.run('cat {0}spark/conf/spark-defaults.conf'.format(cluster_dir), capture_output=True, shell=True)
current_spark_properties = spark_defaults.split('\n')
for param in current_spark_properties:
if param.split(' ')[0] != '#':
@@ -1212,11 +1213,11 @@ def configure_dataengine_spark(cluster_name, jars_dir, cluster_dir, datalake_ena
new_spark_defaults.append(property + ' ' + config['Properties'][property])
new_spark_defaults.append(param)
new_spark_defaults = set(new_spark_defaults)
- local("echo '{0}' > {1}/spark/conf/spark-defaults.conf".format(datalab_header, cluster_dir))
+ subprocess.run("echo '{0}' > {1}/spark/conf/spark-defaults.conf".format(datalab_header, cluster_dir), shell=True)
for prop in new_spark_defaults:
prop = prop.rstrip()
- local('echo "{0}" >> {1}/spark/conf/spark-defaults.conf'.format(prop, cluster_dir))
- local('sed -i "/^\s*$/d" {0}/spark/conf/spark-defaults.conf'.format(cluster_dir))
+ subprocess.run('echo "{0}" >> {1}/spark/conf/spark-defaults.conf'.format(prop, cluster_dir), shell=True)
+ subprocess.run('sed -i "/^\s*$/d" {0}/spark/conf/spark-defaults.conf'.format(cluster_dir), shell=True)
def remount_azure_disk(creds=False, os_user='', hostname='', keyfile=''):
@@ -1317,33 +1318,33 @@ def ensure_local_spark(os_user, spark_link, spark_version, hadoop_version, local
def install_dataengine_spark(cluster_name, spark_link, spark_version, hadoop_version, cluster_dir, os_user, datalake_enabled):
try:
if datalake_enabled == 'false':
- local('wget ' + spark_link + ' -O /tmp/' + cluster_name + '/spark-' + spark_version + '-bin-hadoop' + hadoop_version + '.tgz')
- local('tar -zxvf /tmp/' + cluster_name + '/spark-' + spark_version + '-bin-hadoop' + hadoop_version + '.tgz -C /opt/')
- local('mv /opt/spark-' + spark_version + '-bin-hadoop' + hadoop_version + ' ' + cluster_dir + 'spark/')
- local('chown -R ' + os_user + ':' + os_user + ' ' + cluster_dir + 'spark/')
+ subprocess.run('wget ' + spark_link + ' -O /tmp/' + cluster_name + '/spark-' + spark_version + '-bin-hadoop' + hadoop_version + '.tgz', shell=True)
+ subprocess.run('tar -zxvf /tmp/' + cluster_name + '/spark-' + spark_version + '-bin-hadoop' + hadoop_version + '.tgz -C /opt/', shell=True)
+ subprocess.run('mv /opt/spark-' + spark_version + '-bin-hadoop' + hadoop_version + ' ' + cluster_dir + 'spark/', shell=True)
+ subprocess.run('chown -R ' + os_user + ':' + os_user + ' ' + cluster_dir + 'spark/', shell=True)
else:
# Downloading Spark without Hadoop
- local('wget https://archive.apache.org/dist/spark/spark-{0}/spark-{0}-bin-without-hadoop.tgz -O /tmp/{1}/spark-{0}-bin-without-hadoop.tgz'
- .format(spark_version, cluster_name))
- local('tar -zxvf /tmp/' + cluster_name + '/spark-{}-bin-without-hadoop.tgz -C /opt/'.format(spark_version))
- local('mv /opt/spark-{}-bin-without-hadoop {}spark/'.format(spark_version, cluster_dir))
- local('chown -R {0}:{0} {1}/spark/'.format(os_user, cluster_dir))
+ subprocess.run('wget https://archive.apache.org/dist/spark/spark-{0}/spark-{0}-bin-without-hadoop.tgz -O /tmp/{1}/spark-{0}-bin-without-hadoop.tgz'
+ .format(spark_version, cluster_name), shell=True)
+ subprocess.run('tar -zxvf /tmp/' + cluster_name + '/spark-{}-bin-without-hadoop.tgz -C /opt/'.format(spark_version), shell=True)
+ subprocess.run('mv /opt/spark-{}-bin-without-hadoop {}spark/'.format(spark_version, cluster_dir), shell=True)
+ subprocess.run('chown -R {0}:{0} {1}/spark/'.format(os_user, cluster_dir), shell=True)
# Downloading Hadoop
hadoop_version = '3.0.0'
- local('wget https://archive.apache.org/dist/hadoop/common/hadoop-{0}/hadoop-{0}.tar.gz -O /tmp/{1}/hadoop-{0}.tar.gz'
- .format(hadoop_version, cluster_name))
- local('tar -zxvf /tmp/' + cluster_name + '/hadoop-{0}.tar.gz -C /opt/'.format(hadoop_version))
- local('mv /opt/hadoop-{0} {1}hadoop/'.format(hadoop_version, cluster_dir))
- local('chown -R {0}:{0} {1}hadoop/'.format(os_user, cluster_dir))
+ subprocess.run('wget https://archive.apache.org/dist/hadoop/common/hadoop-{0}/hadoop-{0}.tar.gz -O /tmp/{1}/hadoop-{0}.tar.gz'
+ .format(hadoop_version, cluster_name), shell=True)
+ subprocess.run('tar -zxvf /tmp/' + cluster_name + '/hadoop-{0}.tar.gz -C /opt/'.format(hadoop_version), shell=True)
+ subprocess.run('mv /opt/hadoop-{0} {1}hadoop/'.format(hadoop_version, cluster_dir), shell=True)
+ subprocess.run('chown -R {0}:{0} {1}hadoop/'.format(os_user, cluster_dir), shell=True)
# Configuring Hadoop and Spark
java_path = datalab.common_lib.find_java_path_local()
- local('echo "export JAVA_HOME={}" >> {}hadoop/etc/hadoop/hadoop-env.sh'.format(java_path, cluster_dir))
- local("""echo 'export HADOOP_CLASSPATH="$HADOOP_HOME/share/hadoop/tools/lib/*"' >> {}hadoop/etc/hadoop/hadoop-env.sh""".format(cluster_dir))
- local('echo "export HADOOP_HOME={0}hadoop/" >> {0}spark/conf/spark-env.sh'.format(cluster_dir))
- local('echo "export SPARK_HOME={0}spark/" >> {0}spark/conf/spark-env.sh'.format(cluster_dir))
- spark_dist_classpath = local('{}hadoop/bin/hadoop classpath'.format(cluster_dir), capture_output=True)
- local('echo "export SPARK_DIST_CLASSPATH={}" >> {}spark/conf/spark-env.sh'.format(
- spark_dist_classpath, cluster_dir))
+ subprocess.run('echo "export JAVA_HOME={}" >> {}hadoop/etc/hadoop/hadoop-env.sh'.format(java_path, cluster_dir), shell=True)
+ subprocess.run("""echo 'export HADOOP_CLASSPATH="$HADOOP_HOME/share/hadoop/tools/lib/*"' >> {}hadoop/etc/hadoop/hadoop-env.sh""".format(cluster_dir), shell=True)
+ subprocess.run('echo "export HADOOP_HOME={0}hadoop/" >> {0}spark/conf/spark-env.sh'.format(cluster_dir), shell=True)
+ subprocess.run('echo "export SPARK_HOME={0}spark/" >> {0}spark/conf/spark-env.sh'.format(cluster_dir), shell=True)
+ spark_dist_classpath = subprocess.run('{}hadoop/bin/hadoop classpath'.format(cluster_dir), capture_output=True, shell=True)
+ subprocess.run('echo "export SPARK_DIST_CLASSPATH={}" >> {}spark/conf/spark-env.sh'.format(
+ spark_dist_classpath, cluster_dir), shell=True)
except:
sys.exit(1)
diff --git a/infrastructure-provisioning/src/general/lib/gcp/actions_lib.py b/infrastructure-provisioning/src/general/lib/gcp/actions_lib.py
index 0616f89..f65cad4 100644
--- a/infrastructure-provisioning/src/general/lib/gcp/actions_lib.py
+++ b/infrastructure-provisioning/src/general/lib/gcp/actions_lib.py
@@ -33,6 +33,7 @@ import sys
import time
import traceback
import urllib3
+import subprocess
from Crypto.PublicKey import RSA
from datalab.fab import *
from fabric import *
@@ -1094,49 +1095,49 @@ class GCPActions:
print("Downloading jars...")
GCPActions().get_from_bucket(args.bucket, 'jars/{0}/jars.tar.gz'.format(args.dataproc_version), '/tmp/jars.tar.gz')
GCPActions().get_from_bucket(args.bucket, 'jars/{0}/jars-checksum.chk'.format(args.dataproc_version), '/tmp/jars-checksum.chk')
- if 'WARNING' in local('md5sum -c /tmp/jars-checksum.chk', capture_output=True):
- local('rm -f /tmp/jars.tar.gz')
+ if 'WARNING' in subprocess.run('md5sum -c /tmp/jars-checksum.chk', capture_output=True, shell=True):
+ subprocess.run('rm -f /tmp/jars.tar.gz', shell=True)
GCPActions().get_from_bucket(args.bucket, 'jars/{0}/jars.tar.gz'.format(args.cluster_name), '/tmp/jars.tar.gz')
- if 'WARNING' in local('md5sum -c /tmp/jars-checksum.chk', capture_output=True):
+ if 'WARNING' in subprocess.run('md5sum -c /tmp/jars-checksum.chk', capture_output=True, shell=True):
print("The checksum of jars.tar.gz is mismatched. It could be caused by gcp network issue.")
sys.exit(1)
- local('tar -zhxvf /tmp/jars.tar.gz -C {}'.format(dataproc_dir))
+ subprocess.run('tar -zhxvf /tmp/jars.tar.gz -C {}'.format(dataproc_dir), shell=True)
def yarn(self, args, yarn_dir):
print("Downloading yarn configuration...")
bucket = self.storage_client.get_bucket(args.bucket)
list_files = bucket.list_blobs(prefix='{0}/{1}/config/'.format(args.user_name, args.cluster_name))
- local('mkdir -p /tmp/{0}/{1}/config/'.format(args.user_name, args.cluster_name))
+ subprocess.run('mkdir -p /tmp/{0}/{1}/config/'.format(args.user_name, args.cluster_name), shell=True)
for item in list_files:
local_file = '/tmp/{0}/{1}/config/{2}'.format(args.user_name, args.cluster_name, item.name.split("/")[-1:][0])
GCPActions().get_from_bucket(args.bucket, item.name, local_file)
- local('sudo mv /tmp/{0}/{1}/config/* {2}'.format(args.user_name, args.cluster_name, yarn_dir))
- local('sudo rm -rf /tmp/{}'.format(args.user_name))
+ subprocess.run('sudo mv /tmp/{0}/{1}/config/* {2}'.format(args.user_name, args.cluster_name, yarn_dir), shell=True)
+ subprocess.run('sudo rm -rf /tmp/{}'.format(args.user_name), shell=True)
def install_dataproc_spark(self, args):
print("Installing spark...")
GCPActions().get_from_bucket(args.bucket, '{0}/{1}/spark.tar.gz'.format(args.user_name, args.cluster_name), '/tmp/spark.tar.gz')
GCPActions().get_from_bucket(args.bucket, '{0}/{1}/spark-checksum.chk'.format(args.user_name, args.cluster_name), '/tmp/spark-checksum.chk')
- if 'WARNING' in local('md5sum -c /tmp/spark-checksum.chk', capture_output=True):
- local('rm -f /tmp/spark.tar.gz')
+ if 'WARNING' in subprocess.run('md5sum -c /tmp/spark-checksum.chk', capture_output=True, shell=True):
+ subprocess.run('rm -f /tmp/spark.tar.gz', shell=True)
GCPActions().get_from_bucket(args.bucket, '{0}/{1}/spark.tar.gz'.format(args.user_name, args.cluster_name), '/tmp/spark.tar.gz')
- if 'WARNING' in local('md5sum -c /tmp/spark-checksum.chk', capture_output=True):
+ if 'WARNING' in subprocess.run('md5sum -c /tmp/spark-checksum.chk', capture_output=True, shell=True):
print("The checksum of spark.tar.gz is mismatched. It could be caused by gcp network issue.")
sys.exit(1)
- local('sudo tar -zhxvf /tmp/spark.tar.gz -C /opt/{0}/{1}/'.format(args.dataproc_version, args.cluster_name))
+ subprocess.run('sudo tar -zhxvf /tmp/spark.tar.gz -C /opt/{0}/{1}/'.format(args.dataproc_version, args.cluster_name), shell=True)
def spark_defaults(self, args):
spark_def_path = '/opt/{0}/{1}/spark/conf/spark-env.sh'.format(args.dataproc_version, args.cluster_name)
- local(""" sudo bash -c " sed -i '/#/d' {}" """.format(spark_def_path))
- local(""" sudo bash -c " sed -i '/^\s*$/d' {}" """.format(spark_def_path))
- local(""" sudo bash -c " sed -i 's|/usr/lib/hadoop|/opt/{0}/jars/usr/lib/hadoop|g' {1}" """.format(args.dataproc_version, spark_def_path))
- local(""" sudo bash -c " sed -i 's|/etc/hadoop/conf|/opt/{0}/{1}/conf|g' {2}" """.format(args.dataproc_version, args.cluster_name, spark_def_path))
- local(""" sudo bash -c " sed -i '/\$HADOOP_HOME\/\*/a SPARK_DIST_CLASSPATH=\\"\$SPARK_DIST_CLASSPATH:\$HADOOP_HOME\/client\/*\\"' {}" """.format(spark_def_path))
- local(""" sudo bash -c " sed -i '/\$HADOOP_YARN_HOME\/\*/a SPARK_DIST_CLASSPATH=\\"\$SPARK_DIST_CLASSPATH:\/opt\/jars\/\*\\"' {}" """.format(spark_def_path))
- local(""" sudo bash -c " sed -i 's|/hadoop/spark/work|/tmp/hadoop/spark/work|g' {}" """.format(spark_def_path))
- local(""" sudo bash -c " sed -i 's|/hadoop/spark/tmp|/tmp/hadoop/spark/tmp|g' {}" """.format(spark_def_path))
- local(""" sudo bash -c " sed -i 's/STANDALONE_SPARK_MASTER_HOST.*/STANDALONE_SPARK_MASTER_HOST={0}-m/g' {1}" """.format(args.cluster_name, spark_def_path))
- local(""" sudo bash -c " sed -i 's|/hadoop_gcs_connector_metadata_cache|/tmp/hadoop_gcs_connector_metadata_cache|g' /opt/{0}/{1}/conf/core-site.xml" """.format(args.dataproc_version, args.cluster_name))
+ subprocess.run(""" sudo bash -c " sed -i '/#/d' {}" """.format(spark_def_path), shell=True)
+ subprocess.run(""" sudo bash -c " sed -i '/^\s*$/d' {}" """.format(spark_def_path), shell=True)
+ subprocess.run(""" sudo bash -c " sed -i 's|/usr/lib/hadoop|/opt/{0}/jars/usr/lib/hadoop|g' {1}" """.format(args.dataproc_version, spark_def_path), shell=True)
+ subprocess.run(""" sudo bash -c " sed -i 's|/etc/hadoop/conf|/opt/{0}/{1}/conf|g' {2}" """.format(args.dataproc_version, args.cluster_name, spark_def_path), shell=True)
+ subprocess.run(""" sudo bash -c " sed -i '/\$HADOOP_HOME\/\*/a SPARK_DIST_CLASSPATH=\\"\$SPARK_DIST_CLASSPATH:\$HADOOP_HOME\/client\/*\\"' {}" """.format(spark_def_path), shell=True)
+ subprocess.run(""" sudo bash -c " sed -i '/\$HADOOP_YARN_HOME\/\*/a SPARK_DIST_CLASSPATH=\\"\$SPARK_DIST_CLASSPATH:\/opt\/jars\/\*\\"' {}" """.format(spark_def_path), shell=True)
+ subprocess.run(""" sudo bash -c " sed -i 's|/hadoop/spark/work|/tmp/hadoop/spark/work|g' {}" """.format(spark_def_path), shell=True)
+ subprocess.run(""" sudo bash -c " sed -i 's|/hadoop/spark/tmp|/tmp/hadoop/spark/tmp|g' {}" """.format(spark_def_path), shell=True)
+ subprocess.run(""" sudo bash -c " sed -i 's/STANDALONE_SPARK_MASTER_HOST.*/STANDALONE_SPARK_MASTER_HOST={0}-m/g' {1}" """.format(args.cluster_name, spark_def_path), shell=True)
+ subprocess.run(""" sudo bash -c " sed -i 's|/hadoop_gcs_connector_metadata_cache|/tmp/hadoop_gcs_connector_metadata_cache|g' /opt/{0}/{1}/conf/core-site.xml" """.format(args.dataproc_version, args.cluster_name), shell=True)
def remove_kernels(self, notebook_name, dataproc_name, dataproc_version, ssh_user, key_path, computational_name):
try:
@@ -1210,40 +1211,40 @@ class GCPActions:
python_version = python_version[0:5]
livy_port = ''
livy_path = '/opt/{0}/{1}/livy/'.format(dataproc_version, cluster_name)
- local('echo \"Configuring dataproc path for Zeppelin\"')
- local('sed -i \"s/^export SPARK_HOME.*/export SPARK_HOME=\/opt\/{0}\/{1}\/spark/\" /opt/zeppelin/conf/zeppelin-env.sh'
- .format(dataproc_version, cluster_name))
- local('sed -i \"s/^export HADOOP_CONF_DIR.*/export HADOOP_CONF_DIR=\/opt\/{0}\/{1}\/conf/\" /opt/{0}/{1}/spark/conf/spark-env.sh'
- .format(dataproc_version, cluster_name))
- local('sed -i "/spark.executorEnv.PYTHONPATH/d" /opt/{0}/{1}/spark/conf/spark-defaults.conf'.format(dataproc_version, cluster_name))
- local('sed -i "/spark.yarn.dist.files/d" /opt/{0}/{1}/spark/conf/spark-defaults.conf'.format(dataproc_version, cluster_name))
- local('sudo chown {0}:{0} -R /opt/zeppelin/'.format(os_user))
- local('sudo systemctl restart zeppelin-notebook.service')
+ subprocess.run('echo \"Configuring dataproc path for Zeppelin\"', shell=True)
+ subprocess.run('sed -i \"s/^export SPARK_HOME.*/export SPARK_HOME=\/opt\/{0}\/{1}\/spark/\" /opt/zeppelin/conf/zeppelin-env.sh'
+ .format(dataproc_version, cluster_name), shell=True)
+ subprocess.run('sed -i \"s/^export HADOOP_CONF_DIR.*/export HADOOP_CONF_DIR=\/opt\/{0}\/{1}\/conf/\" /opt/{0}/{1}/spark/conf/spark-env.sh'
+ .format(dataproc_version, cluster_name), shell=True)
+ subprocess.run('sed -i "/spark.executorEnv.PYTHONPATH/d" /opt/{0}/{1}/spark/conf/spark-defaults.conf'.format(dataproc_version, cluster_name), shell=True)
+ subprocess.run('sed -i "/spark.yarn.dist.files/d" /opt/{0}/{1}/spark/conf/spark-defaults.conf'.format(dataproc_version, cluster_name), shell=True)
+ subprocess.run('sudo chown {0}:{0} -R /opt/zeppelin/'.format(os_user), shell=True)
+ subprocess.run('sudo systemctl restart zeppelin-notebook.service', shell=True)
while not zeppelin_restarted:
- local('sleep 5')
- result = local('sudo bash -c "nmap -p 8080 localhost | grep closed > /dev/null" ; echo $?', capture_output=True)
+ subprocess.run('sleep 5', shell=True)
+ result = subprocess.run('sudo bash -c "nmap -p 8080 localhost | grep closed > /dev/null" ; echo $?', capture_output=True, shell=True)
result = result[:1]
if result == '1':
zeppelin_restarted = True
- local('sleep 5')
- local('echo \"Configuring dataproc spark interpreter for Zeppelin\"')
+ subprocess.run('sleep 5', shell=True)
+ subprocess.run('echo \"Configuring dataproc spark interpreter for Zeppelin\"', shell=True)
if multiple_clusters == 'true':
while not port_number_found:
- port_free = local('sudo bash -c "nmap -p ' + str(default_port) +
- ' localhost | grep closed > /dev/null" ; echo $?', capture_output=True)
+ port_free = subprocess.run('sudo bash -c "nmap -p ' + str(default_port) +
+ ' localhost | grep closed > /dev/null" ; echo $?', capture_output=True, shell=True)
port_free = port_free[:1]
if port_free == '0':
livy_port = default_port
port_number_found = True
else:
default_port += 1
- local('sudo echo "livy.server.port = {0}" >> {1}conf/livy.conf'.format(str(livy_port), livy_path))
- local('sudo echo "livy.spark.master = yarn" >> {}conf/livy.conf'.format(livy_path))
+ subprocess.run('sudo echo "livy.server.port = {0}" >> {1}conf/livy.conf'.format(str(livy_port), livy_path), shell=True)
+ subprocess.run('sudo echo "livy.spark.master = yarn" >> {}conf/livy.conf'.format(livy_path), shell=True)
if os.path.exists('{}conf/spark-blacklist.conf'.format(livy_path)):
- local('sudo sed -i "s/^/#/g" {}conf/spark-blacklist.conf'.format(livy_path))
- local('sudo echo "export SPARK_HOME={0}" >> {1}conf/livy-env.sh'.format(spark_dir, livy_path))
- local('sudo echo "export HADOOP_CONF_DIR={0}" >> {1}conf/livy-env.sh'.format(yarn_dir, livy_path))
- local('sudo echo "export PYSPARK3_PYTHON=python{0}" >> {1}conf/livy-env.sh'.format(python_version[0:3], livy_path))
+ subprocess.run('sudo sed -i "s/^/#/g" {}conf/spark-blacklist.conf'.format(livy_path), shell=True)
+ subprocess.run('sudo echo "export SPARK_HOME={0}" >> {1}conf/livy-env.sh'.format(spark_dir, livy_path), shell=True)
+ subprocess.run('sudo echo "export HADOOP_CONF_DIR={0}" >> {1}conf/livy-env.sh'.format(yarn_dir, livy_path), shell=True)
+ subprocess.run('sudo echo "export PYSPARK3_PYTHON=python{0}" >> {1}conf/livy-env.sh'.format(python_version[0:3], livy_path), shell=True)
template_file = "/tmp/dataengine-service_interpreter.json"
fr = open(template_file, 'r+')
text = fr.read()
@@ -1255,18 +1256,18 @@ class GCPActions:
fw.close()
for _ in range(5):
try:
- local("curl --noproxy localhost -H 'Content-Type: application/json' -X POST -d " +
- "@/tmp/dataengine-service_interpreter.json http://localhost:8080/api/interpreter/setting")
+ subprocess.run("curl --noproxy localhost -H 'Content-Type: application/json' -X POST -d " +
+ "@/tmp/dataengine-service_interpreter.json http://localhost:8080/api/interpreter/setting", shell=True)
break
except:
- local('sleep 5')
- local('sudo cp /opt/livy-server-cluster.service /etc/systemd/system/livy-server-{}.service'.format(str(livy_port)))
- local("sudo sed -i 's|OS_USER|{0}|' /etc/systemd/system/livy-server-{1}.service".format(os_user, str(livy_port)))
- local("sudo sed -i 's|LIVY_PATH|{0}|' /etc/systemd/system/livy-server-{1}.service".format(livy_path, str(livy_port)))
- local('sudo chmod 644 /etc/systemd/system/livy-server-{}.service'.format(str(livy_port)))
- local('sudo systemctl daemon-reload')
- local('sudo systemctl enable livy-server-{}'.format(str(livy_port)))
- local('sudo systemctl start livy-server-{}'.format(str(livy_port)))
+ subprocess.run('sleep 5', shell=True)
+ subprocess.run('sudo cp /opt/livy-server-cluster.service /etc/systemd/system/livy-server-{}.service'.format(str(livy_port)), shell=True)
+ subprocess.run("sudo sed -i 's|OS_USER|{0}|' /etc/systemd/system/livy-server-{1}.service".format(os_user, str(livy_port)), shell=True)
+ subprocess.run("sudo sed -i 's|LIVY_PATH|{0}|' /etc/systemd/system/livy-server-{1}.service".format(livy_path, str(livy_port)), shell=True)
+ subprocess.run('sudo chmod 644 /etc/systemd/system/livy-server-{}.service'.format(str(livy_port)), shell=True)
+ subprocess.run('sudo systemctl daemon-reload', shell=True)
+ subprocess.run('sudo systemctl enable livy-server-{}'.format(str(livy_port)), shell=True)
+ subprocess.run('sudo systemctl start livy-server-{}'.format(str(livy_port)), shell=True)
else:
template_file = "/tmp/dataengine-service_interpreter.json"
p_versions = ["2", "{}-dp".format(python_version[:3])]
@@ -1284,12 +1285,12 @@ class GCPActions:
fw.close()
for _ in range(5):
try:
- local("curl --noproxy localhost -H 'Content-Type: application/json' -X POST -d " +
- "@/tmp/dataproc_spark_py{}_interpreter.json http://localhost:8080/api/interpreter/setting".format(p_version))
+ subprocess.run("curl --noproxy localhost -H 'Content-Type: application/json' -X POST -d " +
+ "@/tmp/dataproc_spark_py{}_interpreter.json http://localhost:8080/api/interpreter/setting".format(p_version), shell=True)
break
except:
- local('sleep 5')
- local('touch /home/{0}/.ensure_dir/dataengine-service_{1}_interpreter_ensured'.format(os_user, cluster_name))
+ subprocess.run('sleep 5', shell=True)
+ subprocess.run('touch /home/{0}/.ensure_dir/dataengine-service_{1}_interpreter_ensured'.format(os_user, cluster_name), shell=True)
except:
sys.exit(1)
@@ -1300,26 +1301,26 @@ class GCPActions:
python_version = f.read()
python_version = python_version[0:5]
if not os.path.exists('/opt/python/python{}'.format(python_version)):
- local('wget https://www.python.org/ftp/python/{0}/Python-{0}.tgz -O /tmp/Python-{0}.tgz'.format(python_version))
- local('tar zxvf /tmp/Python-{}.tgz -C /tmp/'.format(python_version))
- local('cd /tmp/Python-{0}; ./configure --prefix=/opt/python/python{0} --with-zlib-dir=/usr/local/lib/ --with-ensurepip=install'.format(python_version))
- local('cd /tmp/Python-{}; sudo make altinstall'.format(python_version))
- local('cd /tmp/; sudo rm -rf Python-{}/'.format(python_version))
- local('sudo -i virtualenv /opt/python/python{}'.format(python_version))
+ subprocess.run('wget https://www.python.org/ftp/python/{0}/Python-{0}.tgz -O /tmp/Python-{0}.tgz'.format(python_version), shell=True)
+ subprocess.run('tar zxvf /tmp/Python-{}.tgz -C /tmp/'.format(python_version), shell=True)
+ subprocess.run('cd /tmp/Python-{0}; ./configure --prefix=/opt/python/python{0} --with-zlib-dir=/usr/local/lib/ --with-ensurepip=install'.format(python_version), shell=True)
+ subprocess.run('cd /tmp/Python-{}; sudo make altinstall'.format(python_version), shell=True)
+ subprocess.run('cd /tmp/; sudo rm -rf Python-{}/'.format(python_version), shell=True)
+ subprocess.run('sudo -i virtualenv /opt/python/python{}'.format(python_version), shell=True)
venv_command = 'source /opt/python/python{}/bin/activate'.format(python_version)
pip_command = '/opt/python/python{0}/bin/pip{1}'.format(python_version, python_version[:3])
- local('{0} && sudo -i {1} install -U pip==9.0.3'.format(venv_command, pip_command))
- local('{0} && sudo -i {1} install pyzmq==17.0.0'.format(venv_command, pip_command))
- local('{0} && sudo -i {1} install ipython ipykernel --no-cache-dir'.format(venv_command, pip_command))
- local('{0} && sudo -i {1} install boto boto3 NumPy=={2} SciPy Matplotlib pandas Sympy Pillow sklearn --no-cache-dir'
- .format(venv_command, pip_command, numpy_version))
+ subprocess.run('{0} && sudo -i {1} install -U pip==9.0.3'.format(venv_command, pip_command), shell=True)
+ subprocess.run('{0} && sudo -i {1} install pyzmq==17.0.0'.format(venv_command, pip_command), shell=True)
+ subprocess.run('{0} && sudo -i {1} install ipython ipykernel --no-cache-dir'.format(venv_command, pip_command), shell=True)
+ subprocess.run('{0} && sudo -i {1} install boto boto3 NumPy=={2} SciPy Matplotlib pandas Sympy Pillow sklearn --no-cache-dir'
+ .format(venv_command, pip_command, numpy_version), shell=True)
if application == 'deeplearning':
- local('{0} && sudo -i {1} install mxnet-cu80 opencv-python keras Theano --no-cache-dir'.format(venv_command, pip_command))
+ subprocess.run('{0} && sudo -i {1} install mxnet-cu80 opencv-python keras Theano --no-cache-dir'.format(venv_command, pip_command), shell=True)
python_without_dots = python_version.replace('.', '')
- local('{0} && sudo -i {1} install https://cntk.ai/PythonWheel/GPU/cntk-2.0rc3-cp{2}-cp{2}m-linux_x86_64.whl --no-cache-dir'
- .format(venv_command, pip_command, python_without_dots[:2]))
- local('sudo rm -rf /usr/bin/python{}-dp'.format(python_version[0:3]))
- local('sudo ln -fs /opt/python/python{0}/bin/python{1} /usr/bin/python{1}-dp'.format(python_version, python_version[0:3]))
+ subprocess.run('{0} && sudo -i {1} install https://cntk.ai/PythonWheel/GPU/cntk-2.0rc3-cp{2}-cp{2}m-linux_x86_64.whl --no-cache-dir'
+ .format(venv_command, pip_command, python_without_dots[:2]), shell=True)
+ subprocess.run('sudo rm -rf /usr/bin/python{}-dp'.format(python_version[0:3]), shell=True)
+ subprocess.run('sudo ln -fs /opt/python/python{0}/bin/python{1} /usr/bin/python{1}-dp'.format(python_version, python_version[0:3]), shell=True)
except Exception as err:
logging.info(
"Unable to install python: " + str(err) + "\n Traceback: " + traceback.print_exc(
@@ -1507,32 +1508,32 @@ def remove_dataengine_kernels(notebook_name, os_user, key_path, cluster_name):
def install_dataengine_spark(cluster_name, spark_link, spark_version, hadoop_version, cluster_dir, os_user, datalake_enabled):
- local('wget ' + spark_link + ' -O /tmp/' + cluster_name + '/spark-' + spark_version + '-bin-hadoop' + hadoop_version + '.tgz')
- local('tar -zxvf /tmp/' + cluster_name + '/spark-' + spark_version + '-bin-hadoop' + hadoop_version + '.tgz -C /opt/')
- local('mv /opt/spark-' + spark_version + '-bin-hadoop' + hadoop_version + ' ' + cluster_dir + 'spark/')
- local('chown -R ' + os_user + ':' + os_user + ' ' + cluster_dir + 'spark/')
+ subprocess.run('wget ' + spark_link + ' -O /tmp/' + cluster_name + '/spark-' + spark_version + '-bin-hadoop' + hadoop_version + '.tgz', shell=True)
+ subprocess.run('tar -zxvf /tmp/' + cluster_name + '/spark-' + spark_version + '-bin-hadoop' + hadoop_version + '.tgz -C /opt/', shell=True)
+ subprocess.run('mv /opt/spark-' + spark_version + '-bin-hadoop' + hadoop_version + ' ' + cluster_dir + 'spark/', shell=True)
+ subprocess.run('chown -R ' + os_user + ':' + os_user + ' ' + cluster_dir + 'spark/', shell=True)
def configure_dataengine_spark(cluster_name, jars_dir, cluster_dir, datalake_enabled, spark_configs=''):
- local("jar_list=`find {0} -name '*.jar' | tr '\\n' ',' | sed 's/,$//'` ; echo \"spark.jars $jar_list\" >> \
- /tmp/{1}/notebook_spark-defaults_local.conf".format(jars_dir, cluster_name))
+ subprocess.run("jar_list=`find {0} -name '*.jar' | tr '\\n' ',' | sed 's/,$//'` ; echo \"spark.jars $jar_list\" >> \
+ /tmp/{1}/notebook_spark-defaults_local.conf".format(jars_dir, cluster_name), shell=True)
if os.path.exists('{0}spark/conf/spark-defaults.conf'.format(cluster_dir)):
- additional_spark_properties = local('diff --changed-group-format="%>" --unchanged-group-format="" '
+ additional_spark_properties = subprocess.run('diff --changed-group-format="%>" --unchanged-group-format="" '
'/tmp/{0}/notebook_spark-defaults_local.conf '
'{1}spark/conf/spark-defaults.conf | grep -v "^#"'.format(
- cluster_name, cluster_dir), capture_output=True)
+ cluster_name, cluster_dir), capture_output=True, shell=True)
for property in additional_spark_properties.split('\n'):
- local('echo "{0}" >> /tmp/{1}/notebook_spark-defaults_local.conf'.format(property, cluster_name))
+ subprocess.run('echo "{0}" >> /tmp/{1}/notebook_spark-defaults_local.conf'.format(property, cluster_name), shell=True)
if os.path.exists('{0}'.format(cluster_dir)):
- local('cp -f /tmp/{0}/notebook_spark-defaults_local.conf {1}spark/conf/spark-defaults.conf'.format(cluster_name,
- cluster_dir))
- local('cp -f /opt/spark/conf/core-site.xml {}spark/conf/'.format(cluster_dir))
+ subprocess.run('cp -f /tmp/{0}/notebook_spark-defaults_local.conf {1}spark/conf/spark-defaults.conf'.format(cluster_name,
+ cluster_dir), shell=True)
+ subprocess.run('cp -f /opt/spark/conf/core-site.xml {}spark/conf/'.format(cluster_dir), shell=True)
if spark_configs and os.path.exists('{0}'.format(cluster_dir)):
- datalab_header = local('cat /tmp/{0}/notebook_spark-defaults_local.conf | grep "^#"'.format(cluster_name),
- capture_output=True)
+ datalab_header = subprocess.run('cat /tmp/{0}/notebook_spark-defaults_local.conf | grep "^#"'.format(cluster_name),
+ capture_output=True, shell=True)
spark_configurations = ast.literal_eval(spark_configs)
new_spark_defaults = list()
- spark_defaults = local('cat {0}spark/conf/spark-defaults.conf'.format(cluster_dir), capture_output=True)
+ spark_defaults = subprocess.run('cat {0}spark/conf/spark-defaults.conf'.format(cluster_dir), capture_output=True, shell=True)
current_spark_properties = spark_defaults.split('\n')
for param in current_spark_properties:
if param.split(' ')[0] != '#':
@@ -1545,11 +1546,11 @@ def configure_dataengine_spark(cluster_name, jars_dir, cluster_dir, datalake_ena
new_spark_defaults.append(property + ' ' + config['Properties'][property])
new_spark_defaults.append(param)
new_spark_defaults = set(new_spark_defaults)
- local("echo '{0}' > {1}/spark/conf/spark-defaults.conf".format(datalab_header, cluster_dir))
+ subprocess.run("echo '{0}' > {1}/spark/conf/spark-defaults.conf".format(datalab_header, cluster_dir), shell=True)
for prop in new_spark_defaults:
prop = prop.rstrip()
- local('echo "{0}" >> {1}/spark/conf/spark-defaults.conf'.format(prop, cluster_dir))
- local('sed -i "/^\s*$/d" {0}/spark/conf/spark-defaults.conf'.format(cluster_dir))
+ subprocess.run('echo "{0}" >> {1}/spark/conf/spark-defaults.conf'.format(prop, cluster_dir), shell=True)
+ subprocess.run('sed -i "/^\s*$/d" {0}/spark/conf/spark-defaults.conf'.format(cluster_dir), shell=True)
def find_des_jars(all_jars, des_path):
diff --git a/infrastructure-provisioning/src/general/scripts/aws/dataengine_start.py b/infrastructure-provisioning/src/general/scripts/aws/dataengine_start.py
index 1f71fac..223617c 100644
--- a/infrastructure-provisioning/src/general/scripts/aws/dataengine_start.py
+++ b/infrastructure-provisioning/src/general/scripts/aws/dataengine_start.py
@@ -29,6 +29,7 @@ import logging
import os
import sys
import traceback
+import subprocess
from fabric import *
@@ -96,7 +97,7 @@ if __name__ == "__main__":
.format(os.environ['conf_os_user'], data_engine['notebook_ip'], data_engine['keyfile'],
data_engine['computational_ip'])
try:
- local("~/scripts/{}.py {}".format('update_inactivity_on_start', params))
+ subprocess.run("~/scripts/{}.py {}".format('update_inactivity_on_start', params), shell=True)
except Exception as err:
traceback.print_exc()
datalab.fab.append_result("Failed to update last activity time.", str(err))
diff --git a/infrastructure-provisioning/src/general/scripts/aws/tensor_configure.py b/infrastructure-provisioning/src/general/scripts/aws/tensor_configure.py
index c1fced4..105743c 100644
--- a/infrastructure-provisioning/src/general/scripts/aws/tensor_configure.py
+++ b/infrastructure-provisioning/src/general/scripts/aws/tensor_configure.py
@@ -30,6 +30,7 @@ import logging
import os
import sys
import traceback
+import subprocess
from fabric import *
parser = argparse.ArgumentParser()
@@ -118,7 +119,7 @@ if __name__ == "__main__":
notebook_config['initial_user'], notebook_config['datalab_ssh_user'], notebook_config['sudo_group'])
try:
- local("~/scripts/{}.py {}".format('create_ssh_user', params))
+ subprocess.run("~/scripts/{}.py {}".format('create_ssh_user', params), shell=True)
except:
traceback.print_exc()
raise Exception
@@ -136,7 +137,7 @@ if __name__ == "__main__":
.format(instance_hostname, notebook_config['instance_name'], keyfile_name, json.dumps(additional_config),
notebook_config['datalab_ssh_user'])
try:
- local("~/scripts/{}.py {}".format('common_configure_proxy', params))
+ subprocess.run("~/scripts/{}.py {}".format('common_configure_proxy', params), shell=True)
except:
traceback.print_exc()
raise Exception
@@ -153,7 +154,7 @@ if __name__ == "__main__":
format(instance_hostname, keyfile_name, notebook_config['datalab_ssh_user'], os.environ['aws_region'],
edge_instance_private_ip)
try:
- local("~/scripts/{}.py {}".format('install_prerequisites', params))
+ subprocess.run("~/scripts/{}.py {}".format('install_prerequisites', params), shell=True)
except:
traceback.print_exc()
raise Exception
@@ -173,7 +174,7 @@ if __name__ == "__main__":
os.environ['aws_region'], notebook_config['datalab_ssh_user'],
notebook_config['ip_address'], notebook_config['exploratory_name'], edge_ip)
try:
- local("~/scripts/{}.py {}".format('configure_tensor_node', params))
+ subprocess.run("~/scripts/{}.py {}".format('configure_tensor_node', params), shell=True)
except:
traceback.print_exc()
raise Exception
@@ -190,7 +191,7 @@ if __name__ == "__main__":
params = "--hostname {} --keyfile {} --additional_config '{}' --user {}".format(
instance_hostname, keyfile_name, json.dumps(additional_config), notebook_config['datalab_ssh_user'])
try:
- local("~/scripts/{}.py {}".format('install_user_key', params))
+ subprocess.run("~/scripts/{}.py {}".format('install_user_key', params), shell=True)
except:
traceback.print_exc()
raise Exception
@@ -205,7 +206,7 @@ if __name__ == "__main__":
params = '--os_user {} --notebook_ip {} --keyfile "{}"' \
.format(notebook_config['datalab_ssh_user'], instance_hostname, keyfile_name)
try:
- local("~/scripts/{}.py {}".format('manage_git_creds', params))
+ subprocess.run("~/scripts/{}.py {}".format('manage_git_creds', params), shell=True)
except:
datalab.fab.append_result("Failed setup git credentials")
raise Exception
@@ -222,7 +223,7 @@ if __name__ == "__main__":
.format(instance_hostname, keyfile_name, notebook_config['datalab_ssh_user'],
notebook_config['tag_name'], notebook_config['instance_name'])
try:
- local("~/scripts/{}.py {}".format('common_remove_remote_kernels', params))
+ subprocess.run("~/scripts/{}.py {}".format('common_remove_remote_kernels', params), shell=True)
except:
traceback.print_exc()
raise Exception
@@ -242,7 +243,7 @@ if __name__ == "__main__":
.format(edge_instance_hostname, keyfile_name, notebook_config['datalab_ssh_user'], 'jupyter',
notebook_config['exploratory_name'], json.dumps(additional_info))
try:
- local("~/scripts/{}.py {}".format('common_configure_reverse_proxy', params))
+ subprocess.run("~/scripts/{}.py {}".format('common_configure_reverse_proxy', params), shell=True)
except:
datalab.fab.append_result("Failed edge reverse proxy template")
raise Exception
diff --git a/infrastructure-provisioning/src/general/scripts/azure/common_start_notebook.py b/infrastructure-provisioning/src/general/scripts/azure/common_start_notebook.py
index e8ab8b0..7f7de84 100644
--- a/infrastructure-provisioning/src/general/scripts/azure/common_start_notebook.py
+++ b/infrastructure-provisioning/src/general/scripts/azure/common_start_notebook.py
@@ -70,7 +70,7 @@ if __name__ == "__main__":
params = '--os_user {} --notebook_ip {} --keyfile "{}"' \
.format(os.environ['conf_os_user'], notebook_config['notebook_ip'], notebook_config['keyfile'])
try:
- local("~/scripts/{}.py {}".format('manage_git_creds', params))
+ subprocess.run("~/scripts/{}.py {}".format('manage_git_creds', params), shell=True)
except:
traceback.print_exc()
raise Exception
@@ -107,7 +107,7 @@ if __name__ == "__main__":
params = '--os_user {} --notebook_ip {} --keyfile "{}"' \
.format(os.environ['conf_os_user'], notebook_config['notebook_ip'], notebook_config['keyfile'])
try:
- local("~/scripts/{}.py {}".format('update_inactivity_on_start', params))
+ subprocess.run("~/scripts/{}.py {}".format('update_inactivity_on_start', params), shell=True)
except:
traceback.print_exc()
raise Exception
diff --git a/infrastructure-provisioning/src/general/scripts/azure/edge_configure.py b/infrastructure-provisioning/src/general/scripts/azure/edge_configure.py
index dde21cd..1329488 100644
--- a/infrastructure-provisioning/src/general/scripts/azure/edge_configure.py
+++ b/infrastructure-provisioning/src/general/scripts/azure/edge_configure.py
@@ -30,6 +30,7 @@ import os
import sys
import traceback
import uuid
+import subprocess
from fabric import *
if __name__ == "__main__":
@@ -156,7 +157,7 @@ if __name__ == "__main__":
edge_conf['initial_user'], edge_conf['datalab_ssh_user'], edge_conf['sudo_group'])
try:
- local("~/scripts/{}.py {}".format('create_ssh_user', params))
+ subprocess.run("~/scripts/{}.py {}".format('create_ssh_user', params), shell=True)
except:
traceback.print_exc()
raise Exception
@@ -172,7 +173,7 @@ if __name__ == "__main__":
edge_conf['instance_hostname'], edge_conf['keyfile_name'], edge_conf['datalab_ssh_user'],
os.environ['azure_region'])
try:
- local("~/scripts/{}.py {}".format('install_prerequisites', params))
+ subprocess.run("~/scripts/{}.py {}".format('install_prerequisites', params), shell=True)
except:
traceback.print_exc()
raise Exception
@@ -197,7 +198,7 @@ if __name__ == "__main__":
edge_conf['instance_hostname'], edge_conf['keyfile_name'], json.dumps(additional_config),
edge_conf['datalab_ssh_user'])
try:
- local("~/scripts/{}.py {}".format('configure_http_proxy', params))
+ subprocess.run("~/scripts/{}.py {}".format('configure_http_proxy', params), shell=True)
except:
traceback.print_exc()
raise Exception
@@ -216,7 +217,7 @@ if __name__ == "__main__":
edge_conf['instance_hostname'], edge_conf['keyfile_name'], json.dumps(additional_config),
edge_conf['datalab_ssh_user'])
try:
- local("~/scripts/{}.py {}".format('install_user_key', params))
+ subprocess.run("~/scripts/{}.py {}".format('install_user_key', params), shell=True)
except:
traceback.print_exc()
raise Exception
@@ -236,7 +237,7 @@ if __name__ == "__main__":
edge_conf['keycloak_client_secret'], edge_conf['step_cert_sans'])
try:
- local("~/scripts/{}.py {}".format('configure_nginx_reverse_proxy', params))
+ subprocess.run("~/scripts/{}.py {}".format('configure_nginx_reverse_proxy', params), shell=True)
except:
traceback.print_exc()
raise Exception
@@ -253,7 +254,7 @@ if __name__ == "__main__":
edge_conf['keycloak_client_secret'], edge_conf['instance_hostname'], edge_conf['project_name'],
edge_conf['endpoint_name'], edge_conf['edge_hostname'])
try:
- local("~/scripts/{}.py {}".format('configure_keycloak', keycloak_params))
+ subprocess.run("~/scripts/{}.py {}".format('configure_keycloak', keycloak_params), shell=True)
except:
traceback.print_exc()
raise Exception
diff --git a/infrastructure-provisioning/src/general/scripts/azure/project_prepare.py b/infrastructure-provisioning/src/general/scripts/azure/project_prepare.py
index 44fb0b9..b7e67ae 100644
--- a/infrastructure-provisioning/src/general/scripts/azure/project_prepare.py
+++ b/infrastructure-provisioning/src/general/scripts/azure/project_prepare.py
@@ -29,6 +29,7 @@ import logging
import os
import sys
import traceback
+import subprocess
from Crypto.PublicKey import RSA
from fabric import *
@@ -130,8 +131,8 @@ if __name__ == "__main__":
try:
project_conf['user_key'] = os.environ['key']
try:
- local('echo "{0}" >> {1}{2}.pub'.format(project_conf['user_key'], os.environ['conf_key_dir'],
- project_conf['project_name']))
+ subprocess.run('echo "{0}" >> {1}{2}.pub'.format(project_conf['user_key'], os.environ['conf_key_dir'],
+ project_conf['project_name']), shell=True)
except:
print("ADMINSs PUBLIC KEY DOES NOT INSTALLED")
except KeyError:
@@ -152,7 +153,7 @@ if __name__ == "__main__":
format(project_conf['resource_group_name'], project_conf['vpc_name'], project_conf['region'],
project_conf['vpc_cidr'], project_conf['private_subnet_name'], project_conf['private_subnet_prefix'])
try:
- local("~/scripts/{}.py {}".format('common_create_subnet', params))
+ subprocess.run("~/scripts/{}.py {}".format('common_create_subnet', params), shell=True)
except:
traceback.print_exc()
raise Exception
@@ -206,7 +207,7 @@ if __name__ == "__main__":
format(project_conf['resource_group_name'], os.environ['azure_edge_security_group_name'],
project_conf['region'], json.dumps({"product": "datalab"}), json.dumps(edge_list_rules))
try:
- local("~/scripts/{}.py {}".format('common_create_security_group', params))
+ subprocess.run("~/scripts/{}.py {}".format('common_create_security_group', params), shell=True)
except Exception as err:
AzureActions.remove_subnet(project_conf['resource_group_name'], project_conf['vpc_name'],
project_conf['private_subnet_name'])
@@ -483,7 +484,7 @@ if __name__ == "__main__":
format(project_conf['resource_group_name'], project_conf['edge_security_group_name'],
project_conf['region'], json.dumps(project_conf['instance_tags']), json.dumps(edge_list_rules))
try:
- local("~/scripts/{}.py {}".format('common_create_security_group', params))
+ subprocess.run("~/scripts/{}.py {}".format('common_create_security_group', params), shell=True)
except Exception as err:
AzureActions.remove_subnet(project_conf['resource_group_name'], project_conf['vpc_name'],
project_conf['private_subnet_name'])
@@ -589,7 +590,7 @@ if __name__ == "__main__":
format(project_conf['resource_group_name'], project_conf['notebook_security_group_name'],
project_conf['region'], json.dumps(project_conf['instance_tags']), json.dumps(notebook_list_rules))
try:
- local("~/scripts/{}.py {}".format('common_create_security_group', params))
+ subprocess.run("~/scripts/{}.py {}".format('common_create_security_group', params), shell=True)
except:
traceback.print_exc()
raise Exception
@@ -697,7 +698,7 @@ if __name__ == "__main__":
project_conf['resource_group_name'], project_conf['master_security_group_name'], project_conf['region'],
json.dumps(project_conf['instance_tags']), json.dumps(cluster_list_rules))
try:
- local("~/scripts/{}.py {}".format('common_create_security_group', params))
+ subprocess.run("~/scripts/{}.py {}".format('common_create_security_group', params), shell=True)
except:
traceback.print_exc()
raise Exception
@@ -724,7 +725,7 @@ if __name__ == "__main__":
project_conf['resource_group_name'], project_conf['slave_security_group_name'], project_conf['region'],
json.dumps(project_conf['instance_tags']), json.dumps(cluster_list_rules))
try:
- local("~/scripts/{}.py {}".format('common_create_security_group', params))
+ subprocess.run("~/scripts/{}.py {}".format('common_create_security_group', params), shell=True)
except:
traceback.print_exc()
raise Exception
@@ -752,7 +753,7 @@ if __name__ == "__main__":
params = "--container_name {} --account_tags '{}' --resource_group_name {} --region {}". \
format(project_conf['shared_container_name'], json.dumps(project_conf['shared_storage_account_tags']),
project_conf['resource_group_name'], project_conf['region'])
- local("~/scripts/{}.py {}".format('common_create_storage_account', params))
+ subprocess.run("~/scripts/{}.py {}".format('common_create_storage_account', params), shell=True)
except Exception as err:
datalab.fab.append_result("Failed to create storage account.", str(err))
AzureActions.remove_subnet(project_conf['resource_group_name'], project_conf['vpc_name'],
@@ -779,7 +780,7 @@ if __name__ == "__main__":
format(project_conf['edge_container_name'], json.dumps(project_conf['storage_account_tags']),
project_conf['resource_group_name'], project_conf['region'])
try:
- local("~/scripts/{}.py {}".format('common_create_storage_account', params))
+ subprocess.run("~/scripts/{}.py {}".format('common_create_storage_account', params), shell=True)
except:
traceback.print_exc()
raise Exception
@@ -814,7 +815,7 @@ if __name__ == "__main__":
project_conf['azure_ad_user_name'],
project_conf['service_base_name'])
try:
- local("~/scripts/{}.py {}".format('common_create_datalake_directory', params))
+ subprocess.run("~/scripts/{}.py {}".format('common_create_datalake_directory', params), shell=True)
except:
traceback.print_exc()
raise Exception
@@ -870,7 +871,7 @@ if __name__ == "__main__":
project_conf['instance_storage_account_type'],
project_conf['image_name'], json.dumps(project_conf['instance_tags']))
try:
- local("~/scripts/{}.py {}".format('common_create_instance', params))
+ subprocess.run("~/scripts/{}.py {}".format('common_create_instance', params), shell=True)
except:
traceback.print_exc()
raise Exception
diff --git a/infrastructure-provisioning/src/general/scripts/gcp/jupyter_install_dataengine-service_kernels.py b/infrastructure-provisioning/src/general/scripts/gcp/jupyter_install_dataengine-service_kernels.py
index d8097f2..bee6da4 100644
--- a/infrastructure-provisioning/src/general/scripts/gcp/jupyter_install_dataengine-service_kernels.py
+++ b/infrastructure-provisioning/src/general/scripts/gcp/jupyter_install_dataengine-service_kernels.py
@@ -62,7 +62,7 @@ def configure_notebook(args):
conn.sudo('chmod 755 /usr/local/bin/create_configs.py')
conn.sudo('mkdir -p /usr/lib/python3.8/datalab/')
conn.run('mkdir -p /tmp/datalab_libs/')
- local('scp -i {} /usr/lib/python3.8/datalab/*.py {}:/tmp/datalab_libs/'.format(args.keyfile, env.host_string))
+ conn.local('scp -i {} /usr/lib/python3.8/datalab/*.py {}:/tmp/datalab_libs/'.format(args.keyfile, env.host_string))
conn.run('chmod a+x /tmp/datalab_libs/*')
conn.sudo('mv /tmp/datalab_libs/* /usr/lib/python3.8/datalab/')
if exists('/usr/lib64'):
diff --git a/infrastructure-provisioning/src/general/scripts/gcp/jupyterlab_configure.py b/infrastructure-provisioning/src/general/scripts/gcp/jupyterlab_configure.py
index 26dddf1..f82d719 100644
--- a/infrastructure-provisioning/src/general/scripts/gcp/jupyterlab_configure.py
+++ b/infrastructure-provisioning/src/general/scripts/gcp/jupyterlab_configure.py
@@ -29,6 +29,7 @@ import logging
import os
import sys
import traceback
+import subprocess
from fabric import *
if __name__ == "__main__":
@@ -110,7 +111,7 @@ if __name__ == "__main__":
notebook_config['datalab_ssh_user'], notebook_config['sudo_group'])
try:
- local("~/scripts/{}.py {}".format('create_ssh_user', params))
+ subprocess.run("~/scripts/{}.py {}".format('create_ssh_user', params), shell=True)
except:
traceback.print_exc()
raise Exception
@@ -128,7 +129,7 @@ if __name__ == "__main__":
.format(instance_hostname, notebook_config['instance_name'], notebook_config['ssh_key_path'],
json.dumps(additional_config), notebook_config['datalab_ssh_user'])
try:
- local("~/scripts/{}.py {}".format('common_configure_proxy', params))
+ subprocess.run("~/scripts/{}.py {}".format('common_configure_proxy', params), shell=True)
except:
traceback.print_exc()
raise Exception
@@ -145,7 +146,7 @@ if __name__ == "__main__":
format(instance_hostname, notebook_config['ssh_key_path'], notebook_config['datalab_ssh_user'],
os.environ['gcp_region'], edge_instance_private_ip)
try:
- local("~/scripts/{}.py {}".format('install_prerequisites', params))
+ subprocess.run("~/scripts/{}.py {}".format('install_prerequisites', params), shell=True)
except:
traceback.print_exc()
raise Exception
@@ -169,7 +170,7 @@ if __name__ == "__main__":
os.environ['notebook_scala_version'], os.environ['notebook_r_mirror'],
notebook_config['exploratory_name'], )
try:
- local("~/scripts/{}.py {}".format('configure_jupyterlab_node', params))
+ subprocess.run("~/scripts/{}.py {}".format('configure_jupyterlab_node', params), shell=True)
except:
traceback.print_exc()
raise Exception
@@ -187,7 +188,7 @@ if __name__ == "__main__":
instance_hostname, notebook_config['ssh_key_path'], json.dumps(additional_config),
notebook_config['datalab_ssh_user'])
try:
- local("~/scripts/{}.py {}".format('install_user_key', params))
+ subprocess.run("~/scripts/{}.py {}".format('install_user_key', params), shell=True)
except:
datalab.fab.append_result("Failed installing users key")
raise Exception
@@ -202,8 +203,8 @@ if __name__ == "__main__":
params = '--os_user {} --notebook_ip {} --keyfile "{}"' \
.format(notebook_config['datalab_ssh_user'], instance_hostname, notebook_config['ssh_key_path'])
try:
- local("~/scripts/{}.py {}".format('common_download_git_certfile', params))
- local("~/scripts/{}.py {}".format('manage_git_creds', params))
+ subprocess.run("~/scripts/{}.py {}".format('common_download_git_certfile', params), shell=True)
+ subprocess.run("~/scripts/{}.py {}".format('manage_git_creds', params), shell=True)
except:
datalab.fab.append_result("Failed setup git credentials")
raise Exception
@@ -255,7 +256,7 @@ if __name__ == "__main__":
notebook_config['exploratory_name'],
json.dumps(additional_info))
try:
- local("~/scripts/{}.py {}".format('common_configure_reverse_proxy', params))
+ subprocess.run("~/scripts/{}.py {}".format('common_configure_reverse_proxy', params), shell=True)
except:
datalab.fab.append_result("Failed edge reverse proxy template")
raise Exception
@@ -274,7 +275,7 @@ if __name__ == "__main__":
notebook_config['ssh_key_path'],
notebook_config['datalab_ssh_user'])
try:
- local("~/scripts/configure_proxy_for_docker.py {}".format(params))
+ subprocess.run("~/scripts/configure_proxy_for_docker.py {}".format(params), shell=True)
except:
traceback.print_exc()
raise Exception
@@ -294,7 +295,7 @@ if __name__ == "__main__":
notebook_config['ssh_key_path'],
notebook_config['datalab_ssh_user'])
try:
- local("~/scripts/jupyterlab_container_start.py {}".format(params))
+ subprocess.run("~/scripts/jupyterlab_container_start.py {}".format(params), shell=True)
except:
traceback.print_exc()
raise Exception
diff --git a/infrastructure-provisioning/src/general/scripts/gcp/zeppelin_configure.py b/infrastructure-provisioning/src/general/scripts/gcp/zeppelin_configure.py
index 928c7ce..52c798f 100644
--- a/infrastructure-provisioning/src/general/scripts/gcp/zeppelin_configure.py
+++ b/infrastructure-provisioning/src/general/scripts/gcp/zeppelin_configure.py
@@ -29,6 +29,7 @@ import logging
import os
import sys
import traceback
+import subprocess
from fabric import *
if __name__ == "__main__":
@@ -110,7 +111,7 @@ if __name__ == "__main__":
instance_hostname, notebook_config['ssh_key_path'], notebook_config['initial_user'],
notebook_config['datalab_ssh_user'], notebook_config['sudo_group'])
try:
- local("~/scripts/{}.py {}".format('create_ssh_user', params))
+ subprocess.run("~/scripts/{}.py {}".format('create_ssh_user', params), shell=True)
except:
traceback.print_exc()
raise Exception
@@ -128,7 +129,7 @@ if __name__ == "__main__":
.format(instance_hostname, notebook_config['instance_name'], notebook_config['ssh_key_path'],
json.dumps(additional_config), notebook_config['datalab_ssh_user'])
try:
- local("~/scripts/{}.py {}".format('common_configure_proxy', params))
+ subprocess.run("~/scripts/{}.py {}".format('common_configure_proxy', params), shell=True)
except:
traceback.print_exc()
raise Exception
@@ -145,7 +146,7 @@ if __name__ == "__main__":
format(instance_hostname, notebook_config['ssh_key_path'], notebook_config['datalab_ssh_user'],
os.environ['gcp_region'], edge_instance_private_ip)
try:
- local("~/scripts/{}.py {}".format('install_prerequisites', params))
+ subprocess.run("~/scripts/{}.py {}".format('install_prerequisites', params), shell=True)
except:
traceback.print_exc()
raise Exception
@@ -180,7 +181,7 @@ if __name__ == "__main__":
os.environ['notebook_r_mirror'], 'null',
notebook_config['exploratory_name'], edge_instance_private_ip)
try:
- local("~/scripts/{}.py {}".format('configure_zeppelin_node', params))
+ subprocess.run("~/scripts/{}.py {}".format('configure_zeppelin_node', params), shell=True)
except:
traceback.print_exc()
raise Exception
@@ -198,7 +199,7 @@ if __name__ == "__main__":
instance_hostname, notebook_config['ssh_key_path'], json.dumps(additional_config),
notebook_config['datalab_ssh_user'])
try:
- local("~/scripts/{}.py {}".format('install_user_key', params))
+ subprocess.run("~/scripts/{}.py {}".format('install_user_key', params), shell=True)
except:
datalab.fab.append_result("Failed installing users key")
raise Exception
@@ -213,8 +214,8 @@ if __name__ == "__main__":
params = '--os_user {} --notebook_ip {} --keyfile "{}"' \
.format(notebook_config['datalab_ssh_user'], instance_hostname, notebook_config['ssh_key_path'])
try:
- local("~/scripts/{}.py {}".format('common_download_git_certfile', params))
- local("~/scripts/{}.py {}".format('manage_git_creds', params))
+ subprocess.run("~/scripts/{}.py {}".format('common_download_git_certfile', params), shell=True)
+ subprocess.run("~/scripts/{}.py {}".format('manage_git_creds', params), shell=True)
except:
datalab.fab.append_result("Failed setup git credentials")
raise Exception
@@ -266,7 +267,7 @@ if __name__ == "__main__":
notebook_config['exploratory_name'],
json.dumps(additional_info))
try:
- local("~/scripts/{}.py {}".format('common_configure_reverse_proxy', params))
+ subprocess.run("~/scripts/{}.py {}".format('common_configure_reverse_proxy', params), shell=True)
except:
datalab.fab.append_result("Failed edge reverse proxy template")
raise Exception
diff --git a/integration-tests/examples/scenario_zeppelin/zeppelin_tests.py b/integration-tests/examples/scenario_zeppelin/zeppelin_tests.py
index ff43d7d..0579e55 100644
--- a/integration-tests/examples/scenario_zeppelin/zeppelin_tests.py
+++ b/integration-tests/examples/scenario_zeppelin/zeppelin_tests.py
@@ -25,7 +25,7 @@ import os, sys, json
from fabric import *
import argparse
import requests
-
+import subprocess
parser = argparse.ArgumentParser()
parser.add_argument('--storage', type=str, default='')
@@ -38,7 +38,7 @@ args = parser.parse_args()
def prepare_templates():
- local('mv /tmp/zeppelin /home/{0}/test_templates'.format(args.os_user))
+ subprocess.run('mv /tmp/zeppelin /home/{0}/test_templates'.format(args.os_user), shell=True)
def get_storage():
storages = {"aws": args.storage,
@@ -52,7 +52,7 @@ def get_storage():
def get_note_status(note_id, notebook_ip):
running = False
- local('sleep 5')
+ subprocess.run('sleep 5', shell=True)
response = requests.get('http://{0}:8080/api/notebook/job/{1}'.format(notebook_ip, note_id))
status = json.loads(response.content)
for i in status.get('body'):
@@ -63,7 +63,7 @@ def get_note_status(note_id, notebook_ip):
print('Error in notebook')
sys.exit(1)
if running:
- local('sleep 5')
+ subprocess.run('sleep 5', shell=True)
get_note_status(note_id, notebook_ip)
else:
return "OK"
@@ -113,7 +113,7 @@ def restart_interpreter(notebook_ip, interpreter):
response = requests.put('http://{0}:8080/api/interpreter/setting/restart/{1}'.format(notebook_ip, id))
status = json.loads(response.content)
if status.get('status') == 'OK':
- local('sleep 5')
+ subprocess.run('sleep 5', shell=True)
return "OK"
else:
print('Failed to restart interpreter')
@@ -168,7 +168,7 @@ def run_spark():
if __name__ == "__main__":
try:
- notebook_ip = local('hostname -I', capture_output=True)
+ notebook_ip = subprocess.run('hostname -I', capture_output=True, shell=True)
prepare_templates()
run_pyspark()
run_sparkr()
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@datalab.apache.org
For additional commands, e-mail: commits-help@datalab.apache.org