You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@dlab.apache.org by my...@apache.org on 2020/07/13 14:28:18 UTC

[incubator-dlab] branch DLAB-515 updated: [DLAB-515] - [AWS] EMR kernels connection via sparkmagic/livy fixed

This is an automated email from the ASF dual-hosted git repository.

mykolabodnar pushed a commit to branch DLAB-515
in repository https://gitbox.apache.org/repos/asf/incubator-dlab.git


The following commit(s) were added to refs/heads/DLAB-515 by this push:
     new 8b86370  [DLAB-515] - [AWS] EMR kernels connection via sparkmagic/livy fixed
8b86370 is described below

commit 8b86370d49076134bf26662a10fb94eaf1b0deea
Author: bodnarmykola <bo...@gmail.com>
AuthorDate: Mon Jul 13 17:27:38 2020 +0300

    [DLAB-515] - [AWS] EMR kernels connection via sparkmagic/livy fixed
---
 .../jupyter_dataengine-service_create_configs.py   | 42 ++++++++++++++--------
 .../jupyter_install_dataengine-service_kernels.py  | 19 ++++++----
 2 files changed, 40 insertions(+), 21 deletions(-)

diff --git a/infrastructure-provisioning/src/general/scripts/aws/jupyter_dataengine-service_create_configs.py b/infrastructure-provisioning/src/general/scripts/aws/jupyter_dataengine-service_create_configs.py
index 46cda9c..9a95b71 100644
--- a/infrastructure-provisioning/src/general/scripts/aws/jupyter_dataengine-service_create_configs.py
+++ b/infrastructure-provisioning/src/general/scripts/aws/jupyter_dataengine-service_create_configs.py
@@ -53,6 +53,7 @@ parser.add_argument('--pip_mirror', type=str, default='')
 parser.add_argument('--numpy_version', type=str, default='')
 parser.add_argument('--application', type=str, default='')
 parser.add_argument('--master_ip', type=str, default='')
+parser.add_argument('--python_version', type=str, default='')
 args = parser.parse_args()
 
 emr_dir = '/opt/' + args.emr_version + '/jars/'
@@ -170,10 +171,23 @@ def install_sparkamagic_kernels(args):
         local('sudo jupyter-kernelspec install {}/sparkmagic/kernels/sparkkernel --user'.format(sparkmagic_dir))
         local('sudo jupyter-kernelspec install {}/sparkmagic/kernels/pysparkkernel --user'.format(sparkmagic_dir))
         local('sudo jupyter-kernelspec install {}/sparkmagic/kernels/sparkrkernel --user'.format(sparkmagic_dir))
+        pyspark_kernel_name = 'PySpark (Python-{0} / Spark-{1} ) [{2}]'.format(args.python_version, args.spark_version,
+                                                                         args.cluster_name)
+        local('sed -i \'s|PySpark|{0}|g\' /home/{1}/.local/share/jupyter/kernels/pysparkkernel/kernel.json'.format(
+            pyspark_kernel_name, args.os_user))
+        spark_kernel_name = 'PySpark (Scala-{0} / Spark-{1} ) [{2}]'.format(args.scala_version, args.spark_version,
+                                                                         args.cluster_name)
+        local('sed -i \'s|Spark|{0}|g\' /home/{1}/.local/share/jupyter/kernels/sparkkernel/kernel.json'.format(
+            spark_kernel_name, args.os_user))
+        sparkr_kernel_name = 'SparkR (R-{0} / Spark-{1} ) [{2}]'.format(args.r_version, args.spark_version,
+                                                                            args.cluster_name)
+        local('sed -i \'s|SparkR|{0}|g\' /home/{1}/.local/share/jupyter/kernels/sparkrkernel/kernel.json'.format(
+            sparkr_kernel_name, args.os_user))
         local('mkdir -p /home/' + args.os_user + '/.sparkmagic')
         local('cp -f /tmp/sparkmagic_config_template.json /home/' + args.os_user + '/.sparkmagic/config.json')
         local('sed -i \'s|LIVY_HOST|{0}|g\' /home/{1}/.sparkmagic/config.json'.format(
                 args.master_ip, args.os_user))
+        local('sudo chown -R {0}:{0} /home/{0}/.sparkmagic/'.format(args.os_user))
     except:
         sys.exit(1)
 
@@ -184,17 +198,17 @@ if __name__ == "__main__":
         parser.print_help()
     else:
         install_sparkamagic_kernels(args)
-        result = prepare(emr_dir, yarn_dir)
-        if result == False :
-            jars(args, emr_dir)
-        yarn(args, yarn_dir)
-        install_emr_spark(args)
-        pyspark_kernel(kernels_dir, args.emr_version, args.cluster_name, args.spark_version, args.bucket,
-                       args.project_name, args.region, args.os_user, args.application, args.pip_mirror, args.numpy_version)
-        toree_kernel(args)
-        if args.r_version != 'false':
-            print('R version: {}'.format(args.r_version))
-            r_kernel(args)
-        spark_defaults(args)
-        configuring_notebook(args.emr_version)
-        add_breeze_library_emr(args)
+        #result = prepare(emr_dir, yarn_dir)
+        #if result == False :
+        #    jars(args, emr_dir)
+        #yarn(args, yarn_dir)
+        #install_emr_spark(args)
+        #pyspark_kernel(kernels_dir, args.emr_version, args.cluster_name, args.spark_version, args.bucket,
+        #               args.project_name, args.region, args.os_user, args.application, args.pip_mirror, args.numpy_version)
+        #toree_kernel(args)
+        #if args.r_version != 'false':
+        #    print('R version: {}'.format(args.r_version))
+        #    r_kernel(args)
+        #spark_defaults(args)
+        #configuring_notebook(args.emr_version)
+        #add_breeze_library_emr(args)
diff --git a/infrastructure-provisioning/src/general/scripts/aws/jupyter_install_dataengine-service_kernels.py b/infrastructure-provisioning/src/general/scripts/aws/jupyter_install_dataengine-service_kernels.py
index 0b82ce1..37d102a 100644
--- a/infrastructure-provisioning/src/general/scripts/aws/jupyter_install_dataengine-service_kernels.py
+++ b/infrastructure-provisioning/src/general/scripts/aws/jupyter_install_dataengine-service_kernels.py
@@ -51,14 +51,14 @@ def configure_notebook(args):
     files_dir = '/root/files/'
     scripts_dir = '/root/scripts/'
     put(templates_dir + 'sparkmagic_config_template.json', '/tmp/sparkmagic_config_template.json')
-    put(templates_dir + 'pyspark_dataengine-service_template.json', '/tmp/pyspark_dataengine-service_template.json')
-    put(templates_dir + 'r_dataengine-service_template.json', '/tmp/r_dataengine-service_template.json')
-    put(templates_dir + 'toree_dataengine-service_template.json','/tmp/toree_dataengine-service_template.json')
+    #put(templates_dir + 'pyspark_dataengine-service_template.json', '/tmp/pyspark_dataengine-service_template.json')
+    #put(templates_dir + 'r_dataengine-service_template.json', '/tmp/r_dataengine-service_template.json')
+    #put(templates_dir + 'toree_dataengine-service_template.json','/tmp/toree_dataengine-service_template.json')
     put(scripts_dir + '{}_dataengine-service_create_configs.py'.format(args.application),
         '/tmp/jupyter_dataengine-service_create_configs.py')
-    put(files_dir + 'toree_kernel.tar.gz', '/tmp/toree_kernel.tar.gz')
-    put(templates_dir + 'toree_dataengine-service_templatev2.json', '/tmp/toree_dataengine-service_templatev2.json')
-    put(templates_dir + 'run_template.sh', '/tmp/run_template.sh')
+    #put(files_dir + 'toree_kernel.tar.gz', '/tmp/toree_kernel.tar.gz')
+    #put(templates_dir + 'toree_dataengine-service_templatev2.json', '/tmp/toree_dataengine-service_templatev2.json')
+    #put(templates_dir + 'run_template.sh', '/tmp/run_template.sh')
     sudo('\cp /tmp/jupyter_dataengine-service_create_configs.py /usr/local/bin/jupyter_dataengine-service_create_configs.py')
     sudo('chmod 755 /usr/local/bin/jupyter_dataengine-service_create_configs.py')
     sudo('mkdir -p /usr/lib/python2.7/dlab/')
@@ -83,9 +83,14 @@ if __name__ == "__main__":
     s3_client = boto3.client('s3', config=Config(signature_version='s3v4'), region_name=args.region)
     s3_client.download_file(args.bucket, args.project_name + '/' + args.cluster_name + '/scala_version',
                             '/tmp/scala_version')
+    s3_client.download_file(args.bucket, args.project_name + '/' + args.cluster_name + '/python_version',
+                            '/tmp/python_version')
     with file('/tmp/scala_version') as f:
         scala_version = str(f.read()).rstrip()
         print(scala_version)
+    with file('/tmp/python_version') as f:
+        python_version = str(f.read()).rstrip()
+        print(python_version)
     if r_enabled == 'true':
         s3_client.download_file(args.bucket, args.project_name + '/' + args.cluster_name + '/r_version', '/tmp/r_version')
         with file('/tmp/r_version') as g:
@@ -102,4 +107,4 @@ if __name__ == "__main__":
          + hadoop_version + " --region " + args.region + " --excluded_lines '" + args.emr_excluded_spark_properties
          + "' --project_name " + args.project_name + " --os_user " + args.os_user + " --pip_mirror "
          + args.pip_mirror + " --numpy_version " + numpy_version + " --application "
-         + args.application + " --master_ip " + master_ip)
+         + args.application + " --master_ip " + master_ip + " --python_version " + python_version)


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@dlab.apache.org
For additional commands, e-mail: commits-help@dlab.apache.org