You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@dlab.apache.org by my...@apache.org on 2020/01/21 11:43:55 UTC
[incubator-dlab] 01/01: [DLAB-1409] - [AWS] actual R version in
Jupyter UI for DES/Jupyter fixed
This is an automated email from the ASF dual-hosted git repository.
mykolabodnar pushed a commit to branch DLAB-1409
in repository https://gitbox.apache.org/repos/asf/incubator-dlab.git
commit e87a8a0ae862890af20a2a1f0c4ecab78afa4050
Author: Mykola_Bodnar1 <bo...@gmail.com>
AuthorDate: Tue Jan 21 13:43:35 2020 +0200
[DLAB-1409] - [AWS] actual R version in Jupyter UI for DES/Jupyter fixed
---
.../scripts/aws/dataengine-service_jars_parser.py | 14 +++++++++++++-
.../jupyter_dataengine-service_create_configs.py | 14 ++++++--------
.../jupyter_install_dataengine-service_kernels.py | 21 +++++++++++++++------
3 files changed, 34 insertions(+), 15 deletions(-)
diff --git a/infrastructure-provisioning/src/general/scripts/aws/dataengine-service_jars_parser.py b/infrastructure-provisioning/src/general/scripts/aws/dataengine-service_jars_parser.py
index 5b46445..0db0830 100644
--- a/infrastructure-provisioning/src/general/scripts/aws/dataengine-service_jars_parser.py
+++ b/infrastructure-provisioning/src/general/scripts/aws/dataengine-service_jars_parser.py
@@ -54,10 +54,14 @@ if __name__ == "__main__":
else:
endpoint = "https://s3-{}.amazonaws.com".format(args.region)
os.system('touch /tmp/scala_version')
- scala_ver = subprocess.check_output("spark-submit --version 2>&1 | grep -o -P 'Scala version \K.{0,7}'",
+ scala_ver = subprocess.check_output("spark-submit --version 2>&1 | awk '/Scala version / {gsub(/,/, \"\"); print $4}'",
shell=True).decode('UTF-8')
with open('/tmp/scala_version', 'w') as outfile:
outfile.write(scala_ver)
+ os.system('touch /tmp/r_version')
+ r_ver = subprocess.check_output("R --version | awk '/version / {print $3}'", shell=True).decode('UTF-8')
+ with open('/tmp/r_version', 'w') as outfile:
+ outfile.write(r_ver)
os.system('touch /tmp/python_version')
python_ver = subprocess.check_output("python3.5 -V 2>/dev/null | awk '{print $2}'", shell=True)
if python_ver != '':
@@ -139,4 +143,12 @@ if __name__ == "__main__":
format(args.bucket,
args.user_name,
args.cluster_name,
+ endpoint, args.region))
+ os.system('aws s3 cp /tmp/r_version '
+ 's3://{}/{}/{}/ '
+ '--endpoint-url {} '
+ '--region {} --sse AES256'.
+ format(args.bucket,
+ args.user_name,
+ args.cluster_name,
endpoint, args.region))
\ No newline at end of file
diff --git a/infrastructure-provisioning/src/general/scripts/aws/jupyter_dataengine-service_create_configs.py b/infrastructure-provisioning/src/general/scripts/aws/jupyter_dataengine-service_create_configs.py
index b1dfa90..ba21b9a 100644
--- a/infrastructure-provisioning/src/general/scripts/aws/jupyter_dataengine-service_create_configs.py
+++ b/infrastructure-provisioning/src/general/scripts/aws/jupyter_dataengine-service_create_configs.py
@@ -42,6 +42,8 @@ parser.add_argument('--cluster_name', type=str, default='')
parser.add_argument('--dry_run', type=str, default='false')
parser.add_argument('--emr_version', type=str, default='')
parser.add_argument('--spark_version', type=str, default='')
+parser.add_argument('--scala_version', type=str, default='')
+parser.add_argument('--r_version', type=str, default='')
parser.add_argument('--hadoop_version', type=str, default='')
parser.add_argument('--region', type=str, default='')
parser.add_argument('--excluded_lines', type=str, default='')
@@ -50,8 +52,6 @@ parser.add_argument('--os_user', type=str, default='')
parser.add_argument('--pip_mirror', type=str, default='')
parser.add_argument('--numpy_version', type=str, default='')
parser.add_argument('--application', type=str, default='')
-parser.add_argument('--r_enabled', type=str, default='')
-parser.add_argument('--scala_version', type=str, default='')
args = parser.parse_args()
emr_dir = '/opt/' + args.emr_version + '/jars/'
@@ -65,14 +65,13 @@ def r_kernel(args):
local('mkdir -p {}/r_{}/'.format(kernels_dir, args.cluster_name))
kernel_path = "{}/r_{}/kernel.json".format(kernels_dir, args.cluster_name)
template_file = "/tmp/r_dataengine-service_template.json"
- r_version = local("R --version | awk '/version / {print $3}'", capture = True)
with open(template_file, 'r') as f:
text = f.read()
text = text.replace('CLUSTER_NAME', args.cluster_name)
text = text.replace('SPARK_PATH', spark_path)
text = text.replace('SPARK_VERSION', 'Spark-' + args.spark_version)
- text = text.replace('R_KERNEL_VERSION', 'R-{}'.format(str(r_version)))
+ text = text.replace('R_KERNEL_VERSION', 'R-{}'.format(args.r_version))
text = text.replace('DATAENGINE-SERVICE_VERSION', args.emr_version)
if 'emr-4.' in args.emr_version:
text = text.replace('YARN_CLI_TYPE', 'yarn-client')
@@ -86,7 +85,7 @@ def r_kernel(args):
def toree_kernel(args):
spark_path = '/opt/' + args.emr_version + '/' + args.cluster_name + '/spark/'
- scala_version = local('spark-submit --version 2>&1 | grep -o -P "Scala version \K.{0,7}"', capture=True)
+ scala_version = local("Spark-submit --version 2>&1 | awk '/Scala version / {gsub(/,/, \"\"); print $4}'")
if args.emr_version == 'emr-4.3.0' or args.emr_version == 'emr-4.6.0' or args.emr_version == 'emr-4.8.0':
local('mkdir -p ' + kernels_dir + 'toree_' + args.cluster_name + '/')
kernel_path = kernels_dir + "toree_" + args.cluster_name + "/kernel.json"
@@ -164,8 +163,6 @@ def add_breeze_library_emr(args):
"""\/jars\/usr\/other\/*/' """ + spark_defaults_path + """" """)
-
-
if __name__ == "__main__":
if args.dry_run == 'true':
parser.print_help()
@@ -178,7 +175,8 @@ if __name__ == "__main__":
pyspark_kernel(kernels_dir, args.emr_version, args.cluster_name, args.spark_version, args.bucket,
args.project_name, args.region, args.os_user, args.application, args.pip_mirror, args.numpy_version)
toree_kernel(args)
- if args.r_enabled == 'true':
+ if args.r_version != 'false':
+ print('R version: {}'.format(args.r_version))
r_kernel(args)
spark_defaults(args)
configuring_notebook(args.emr_version)
diff --git a/infrastructure-provisioning/src/general/scripts/aws/jupyter_install_dataengine-service_kernels.py b/infrastructure-provisioning/src/general/scripts/aws/jupyter_install_dataengine-service_kernels.py
index 146eaff..fb29f0a 100644
--- a/infrastructure-provisioning/src/general/scripts/aws/jupyter_install_dataengine-service_kernels.py
+++ b/infrastructure-provisioning/src/general/scripts/aws/jupyter_install_dataengine-service_kernels.py
@@ -83,9 +83,18 @@ if __name__ == "__main__":
s3_client.download_file(args.bucket, args.project_name + '/' + args.cluster_name + '/scala_version',
'/tmp/scala_version')
with file('/tmp/scala_version') as f:
- scala_version = str(f.read()).replace(',', '')
- sudo("/usr/bin/python /usr/local/bin/jupyter_dataengine-service_create_configs.py --bucket " + args.bucket +
- " --cluster_name " + args.cluster_name + " --emr_version " + args.emr_version + " --spark_version " +
- spark_version + " --hadoop_version " + hadoop_version + " --region " + args.region + " --excluded_lines '"
- + args.emr_excluded_spark_properties + "' --project_name " + args.project_name + " --os_user " + args.os_user +
- " --pip_mirror " + args.pip_mirror + " --numpy_version " + numpy_version + " --application " + args.application + " --r_enabled " + r_enabled + " --scala_version " + scala_version)
+ scala_version = str(f.read()).rstrip()
+ print(scala_version)
+ if r_enabled == 'true':
+ s3_client.download_file(args.bucket, args.project_name + '/' + args.cluster_name + '/r_version', '/tmp/r_version')
+ with file('/tmp/r_version') as g:
+ r_version = str(g.read()).rstrip()
+ print(r_version)
+ else:
+ r_version = 'false'
+ sudo("/usr/bin/python /usr/local/bin/jupyter_dataengine-service_create_configs.py --bucket " + args.bucket
+ + " --cluster_name " + args.cluster_name + " --emr_version " + args.emr_version + " --spark_version "
+ + spark_version + " --scala_version " + scala_version + " --r_version " + r_version + " --hadoop_version "
+ + hadoop_version + " --region " + args.region + " --excluded_lines '" + args.emr_excluded_spark_properties
+ + "' --project_name " + args.project_name + " --os_user " + args.os_user + " --pip_mirror "
+ + args.pip_mirror + " --numpy_version " + numpy_version + " --application " + args.application)
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@dlab.apache.org
For additional commands, e-mail: commits-help@dlab.apache.org