You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@dlab.apache.org by my...@apache.org on 2020/01/21 11:43:55 UTC

[incubator-dlab] 01/01: [DLAB-1409] - [AWS] actual R version in Jupyter UI for DES/Jupyter fixed

This is an automated email from the ASF dual-hosted git repository.

mykolabodnar pushed a commit to branch DLAB-1409
in repository https://gitbox.apache.org/repos/asf/incubator-dlab.git

commit e87a8a0ae862890af20a2a1f0c4ecab78afa4050
Author: Mykola_Bodnar1 <bo...@gmail.com>
AuthorDate: Tue Jan 21 13:43:35 2020 +0200

    [DLAB-1409] - [AWS] actual R version in Jupyter UI for DES/Jupyter fixed
---
 .../scripts/aws/dataengine-service_jars_parser.py   | 14 +++++++++++++-
 .../jupyter_dataengine-service_create_configs.py    | 14 ++++++--------
 .../jupyter_install_dataengine-service_kernels.py   | 21 +++++++++++++++------
 3 files changed, 34 insertions(+), 15 deletions(-)

diff --git a/infrastructure-provisioning/src/general/scripts/aws/dataengine-service_jars_parser.py b/infrastructure-provisioning/src/general/scripts/aws/dataengine-service_jars_parser.py
index 5b46445..0db0830 100644
--- a/infrastructure-provisioning/src/general/scripts/aws/dataengine-service_jars_parser.py
+++ b/infrastructure-provisioning/src/general/scripts/aws/dataengine-service_jars_parser.py
@@ -54,10 +54,14 @@ if __name__ == "__main__":
     else:
         endpoint = "https://s3-{}.amazonaws.com".format(args.region)
     os.system('touch /tmp/scala_version')
-    scala_ver = subprocess.check_output("spark-submit --version 2>&1 | grep -o -P 'Scala version \K.{0,7}'",
+    scala_ver = subprocess.check_output("spark-submit --version 2>&1 | awk '/Scala version / {gsub(/,/, \"\"); print $4}'",
                                         shell=True).decode('UTF-8')
     with open('/tmp/scala_version', 'w') as outfile:
         outfile.write(scala_ver)
+    os.system('touch /tmp/r_version')
+    r_ver = subprocess.check_output("R --version | awk '/version / {print $3}'", shell=True).decode('UTF-8')
+    with open('/tmp/r_version', 'w') as outfile:
+        outfile.write(r_ver)
     os.system('touch /tmp/python_version')
     python_ver = subprocess.check_output("python3.5 -V 2>/dev/null | awk '{print $2}'", shell=True)
     if python_ver != '':
@@ -139,4 +143,12 @@ if __name__ == "__main__":
               format(args.bucket,
                      args.user_name,
                      args.cluster_name,
+                     endpoint, args.region))
+    os.system('aws s3 cp /tmp/r_version '
+              's3://{}/{}/{}/ '
+              '--endpoint-url {} '
+              '--region {} --sse AES256'.
+              format(args.bucket,
+                     args.user_name,
+                     args.cluster_name,
                      endpoint, args.region))
\ No newline at end of file
diff --git a/infrastructure-provisioning/src/general/scripts/aws/jupyter_dataengine-service_create_configs.py b/infrastructure-provisioning/src/general/scripts/aws/jupyter_dataengine-service_create_configs.py
index b1dfa90..ba21b9a 100644
--- a/infrastructure-provisioning/src/general/scripts/aws/jupyter_dataengine-service_create_configs.py
+++ b/infrastructure-provisioning/src/general/scripts/aws/jupyter_dataengine-service_create_configs.py
@@ -42,6 +42,8 @@ parser.add_argument('--cluster_name', type=str, default='')
 parser.add_argument('--dry_run', type=str, default='false')
 parser.add_argument('--emr_version', type=str, default='')
 parser.add_argument('--spark_version', type=str, default='')
+parser.add_argument('--scala_version', type=str, default='')
+parser.add_argument('--r_version', type=str, default='')
 parser.add_argument('--hadoop_version', type=str, default='')
 parser.add_argument('--region', type=str, default='')
 parser.add_argument('--excluded_lines', type=str, default='')
@@ -50,8 +52,6 @@ parser.add_argument('--os_user', type=str, default='')
 parser.add_argument('--pip_mirror', type=str, default='')
 parser.add_argument('--numpy_version', type=str, default='')
 parser.add_argument('--application', type=str, default='')
-parser.add_argument('--r_enabled', type=str, default='')
-parser.add_argument('--scala_version', type=str, default='')
 args = parser.parse_args()
 
 emr_dir = '/opt/' + args.emr_version + '/jars/'
@@ -65,14 +65,13 @@ def r_kernel(args):
     local('mkdir -p {}/r_{}/'.format(kernels_dir, args.cluster_name))
     kernel_path = "{}/r_{}/kernel.json".format(kernels_dir, args.cluster_name)
     template_file = "/tmp/r_dataengine-service_template.json"
-    r_version = local("R --version | awk '/version / {print $3}'", capture = True)
 
     with open(template_file, 'r') as f:
         text = f.read()
     text = text.replace('CLUSTER_NAME', args.cluster_name)
     text = text.replace('SPARK_PATH', spark_path)
     text = text.replace('SPARK_VERSION', 'Spark-' + args.spark_version)
-    text = text.replace('R_KERNEL_VERSION', 'R-{}'.format(str(r_version)))
+    text = text.replace('R_KERNEL_VERSION', 'R-{}'.format(args.r_version))
     text = text.replace('DATAENGINE-SERVICE_VERSION', args.emr_version)
     if 'emr-4.' in args.emr_version:
         text = text.replace('YARN_CLI_TYPE', 'yarn-client')
@@ -86,7 +85,7 @@ def r_kernel(args):
 
 def toree_kernel(args):
     spark_path = '/opt/' + args.emr_version + '/' + args.cluster_name + '/spark/'
-    scala_version = local('spark-submit --version 2>&1 | grep -o -P "Scala version \K.{0,7}"', capture=True)
+    scala_version = local("Spark-submit --version 2>&1 | awk '/Scala version / {gsub(/,/, \"\"); print $4}'")
     if args.emr_version == 'emr-4.3.0' or args.emr_version == 'emr-4.6.0' or args.emr_version == 'emr-4.8.0':
         local('mkdir -p ' + kernels_dir + 'toree_' + args.cluster_name + '/')
         kernel_path = kernels_dir + "toree_" + args.cluster_name + "/kernel.json"
@@ -164,8 +163,6 @@ def add_breeze_library_emr(args):
           """\/jars\/usr\/other\/*/' """ + spark_defaults_path + """" """)
 
 
-
-
 if __name__ == "__main__":
     if args.dry_run == 'true':
         parser.print_help()
@@ -178,7 +175,8 @@ if __name__ == "__main__":
         pyspark_kernel(kernels_dir, args.emr_version, args.cluster_name, args.spark_version, args.bucket,
                        args.project_name, args.region, args.os_user, args.application, args.pip_mirror, args.numpy_version)
         toree_kernel(args)
-        if args.r_enabled == 'true':
+        if args.r_version != 'false':
+            print('R version: {}'.format(args.r_version))
             r_kernel(args)
         spark_defaults(args)
         configuring_notebook(args.emr_version)
diff --git a/infrastructure-provisioning/src/general/scripts/aws/jupyter_install_dataengine-service_kernels.py b/infrastructure-provisioning/src/general/scripts/aws/jupyter_install_dataengine-service_kernels.py
index 146eaff..fb29f0a 100644
--- a/infrastructure-provisioning/src/general/scripts/aws/jupyter_install_dataengine-service_kernels.py
+++ b/infrastructure-provisioning/src/general/scripts/aws/jupyter_install_dataengine-service_kernels.py
@@ -83,9 +83,18 @@ if __name__ == "__main__":
     s3_client.download_file(args.bucket, args.project_name + '/' + args.cluster_name + '/scala_version',
                             '/tmp/scala_version')
     with file('/tmp/scala_version') as f:
-        scala_version = str(f.read()).replace(',', '')
-    sudo("/usr/bin/python /usr/local/bin/jupyter_dataengine-service_create_configs.py --bucket " + args.bucket +
-         " --cluster_name " + args.cluster_name + " --emr_version " + args.emr_version + " --spark_version " +
-         spark_version + " --hadoop_version " + hadoop_version + " --region " + args.region + " --excluded_lines '"
-         + args.emr_excluded_spark_properties + "' --project_name " + args.project_name + " --os_user " + args.os_user +
-         " --pip_mirror " + args.pip_mirror + " --numpy_version " + numpy_version + " --application " + args.application + " --r_enabled " + r_enabled + " --scala_version " + scala_version)
+        scala_version = str(f.read()).rstrip()
+        print(scala_version)
+    if r_enabled == 'true':
+        s3_client.download_file(args.bucket, args.project_name + '/' + args.cluster_name + '/r_version', '/tmp/r_version')
+        with file('/tmp/r_version') as g:
+            r_version = str(g.read()).rstrip()
+            print(r_version)
+    else:
+        r_version = 'false'
+    sudo("/usr/bin/python /usr/local/bin/jupyter_dataengine-service_create_configs.py --bucket " + args.bucket
+         + " --cluster_name " + args.cluster_name + " --emr_version " + args.emr_version + " --spark_version "
+         + spark_version + " --scala_version " + scala_version + " --r_version " + r_version + " --hadoop_version "
+         + hadoop_version + " --region " + args.region + " --excluded_lines '" + args.emr_excluded_spark_properties
+         + "' --project_name " + args.project_name + " --os_user " + args.os_user + " --pip_mirror "
+         + args.pip_mirror + " --numpy_version " + numpy_version + " --application " + args.application)


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@dlab.apache.org
For additional commands, e-mail: commits-help@dlab.apache.org