You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@dlab.apache.org by my...@apache.org on 2020/07/13 08:41:03 UTC

[incubator-dlab] branch DLAB-515 updated: [DLAB-515] - [AWS] EMR kernels connection via sparkmagic/livy implemented

This is an automated email from the ASF dual-hosted git repository.

mykolabodnar pushed a commit to branch DLAB-515
in repository https://gitbox.apache.org/repos/asf/incubator-dlab.git


The following commit(s) were added to refs/heads/DLAB-515 by this push:
     new 3521c5f  [DLAB-515] - [AWS] EMR kernels connection via sparkmagic/livy implemented
3521c5f is described below

commit 3521c5f9ae61eaeb98413e04a785efb0f920a354
Author: bodnarmykola <bo...@gmail.com>
AuthorDate: Mon Jul 13 11:40:17 2020 +0300

    [DLAB-515] - [AWS] EMR kernels connection via sparkmagic/livy implemented
---
 .../src/general/files/aws/deeplearning_Dockerfile    |  1 +
 .../src/general/files/aws/jupyter_Dockerfile         |  1 +
 .../src/general/lib/os/debian/notebook_lib.py        |  5 +++--
 .../src/general/lib/os/fab.py                        |  4 ++--
 .../aws/jupyter_dataengine-service_create_configs.py | 17 +++++++++++++++++
 .../jupyter_install_dataengine-service_kernels.py    |  7 ++++++-
 .../templates/os/sparkmagic_config_template.json     | 20 ++++++++++++++++++++
 7 files changed, 50 insertions(+), 5 deletions(-)

diff --git a/infrastructure-provisioning/src/general/files/aws/deeplearning_Dockerfile b/infrastructure-provisioning/src/general/files/aws/deeplearning_Dockerfile
index fb6551f..587a1b6 100644
--- a/infrastructure-provisioning/src/general/files/aws/deeplearning_Dockerfile
+++ b/infrastructure-provisioning/src/general/files/aws/deeplearning_Dockerfile
@@ -42,6 +42,7 @@ COPY general/templates/os/inactive.service /root/templates/
 COPY general/templates/os/inactive.timer /root/templates/
 COPY general/files/os/toree-assembly-0.3.0.jar /root/files/
 COPY general/files/os/toree_kernel.tar.gz /root/files/
+COPY general/templates/os/sparkmagic_config_template.json /root/templates/
 COPY general/templates/os/pyspark_dataengine-service_template.json /root/templates/
 COPY general/templates/os/r_dataengine-service_template.json /root/templates/
 COPY general/templates/os/toree_dataengine-service_* /root/templates/
diff --git a/infrastructure-provisioning/src/general/files/aws/jupyter_Dockerfile b/infrastructure-provisioning/src/general/files/aws/jupyter_Dockerfile
index 4c83fac..a2d6198 100644
--- a/infrastructure-provisioning/src/general/files/aws/jupyter_Dockerfile
+++ b/infrastructure-provisioning/src/general/files/aws/jupyter_Dockerfile
@@ -35,6 +35,7 @@ COPY general/templates/os/pyspark_local_template.json /root/templates/
 COPY general/templates/os/py3spark_local_template.json /root/templates/
 COPY general/templates/os/pyspark_dataengine-service_template.json /root/templates/
 COPY general/templates/os/r_dataengine-service_template.json /root/templates/
+COPY general/templates/os/sparkmagic_config_template.json /root/templates/
 COPY general/templates/os/r_template.json /root/templates/
 COPY general/templates/os/run_template.sh /root/templates/
 COPY general/templates/os/toree_dataengine-service_* /root/templates/
diff --git a/infrastructure-provisioning/src/general/lib/os/debian/notebook_lib.py b/infrastructure-provisioning/src/general/lib/os/debian/notebook_lib.py
index 4e1efe1..7310d0d 100644
--- a/infrastructure-provisioning/src/general/lib/os/debian/notebook_lib.py
+++ b/infrastructure-provisioning/src/general/lib/os/debian/notebook_lib.py
@@ -268,12 +268,13 @@ def ensure_python3_libraries(os_user):
         try:
             #manage_pkg('-y install', 'remote', 'python3-setuptools')
             manage_pkg('-y install', 'remote', 'python3-pip')
+            manage_pkg('-y install', 'remote', 'libkrb5-dev')
             sudo('pip3 install setuptools=={}'.format(os.environ['notebook_setuptools_version']))
             try:
-                sudo('pip3 install tornado=={0} ipython==7.9.0 ipykernel=={1} --no-cache-dir' \
+                sudo('pip3 install tornado=={0} ipython==7.9.0 ipykernel=={1} sparkmagic --no-cache-dir' \
                      .format(os.environ['notebook_tornado_version'], os.environ['notebook_ipykernel_version']))
             except:
-                sudo('pip3 install tornado=={0} ipython==5.0.0 ipykernel=={1} --no-cache-dir' \
+                sudo('pip3 install tornado=={0} ipython==5.0.0 ipykernel=={1} sparkmagic --no-cache-dir' \
                      .format(os.environ['notebook_tornado_version'], os.environ['notebook_ipykernel_version']))
             sudo('pip3 install -U pip=={} --no-cache-dir'.format(os.environ['conf_pip_version']))
             sudo('pip3 install boto3 --no-cache-dir')
diff --git a/infrastructure-provisioning/src/general/lib/os/fab.py b/infrastructure-provisioning/src/general/lib/os/fab.py
index d664a5b..2acb5f8 100644
--- a/infrastructure-provisioning/src/general/lib/os/fab.py
+++ b/infrastructure-provisioning/src/general/lib/os/fab.py
@@ -180,9 +180,9 @@ def configure_jupyter(os_user, jupyter_conf_file, templates_dir, jupyter_version
             sudo("sed -i 's|OS_USR|{}|' /tmp/jupyter-notebook.service".format(os_user))
             http_proxy = run('echo $http_proxy')
             https_proxy = run('echo $https_proxy')
-            sudo('sed -i \'/\[Service\]/ a\Environment=\"HTTP_PROXY={}\"\'  /tmp/jupyter-notebook.service'.format(
+            #sudo('sed -i \'/\[Service\]/ a\Environment=\"HTTP_PROXY={}\"\'  /tmp/jupyter-notebook.service'.format(
                 http_proxy))
-            sudo('sed -i \'/\[Service\]/ a\Environment=\"HTTPS_PROXY={}\"\'  /tmp/jupyter-notebook.service'.format(
+            #sudo('sed -i \'/\[Service\]/ a\Environment=\"HTTPS_PROXY={}\"\'  /tmp/jupyter-notebook.service'.format(
                 https_proxy))
             java_home = run("update-alternatives --query java | grep -o \'/.*/java-8.*/jre\'").splitlines()[0]
             sudo('sed -i \'/\[Service\]/ a\Environment=\"JAVA_HOME={}\"\'  /tmp/jupyter-notebook.service'.format(
diff --git a/infrastructure-provisioning/src/general/scripts/aws/jupyter_dataengine-service_create_configs.py b/infrastructure-provisioning/src/general/scripts/aws/jupyter_dataengine-service_create_configs.py
index ed3daee..46cda9c 100644
--- a/infrastructure-provisioning/src/general/scripts/aws/jupyter_dataengine-service_create_configs.py
+++ b/infrastructure-provisioning/src/general/scripts/aws/jupyter_dataengine-service_create_configs.py
@@ -52,6 +52,7 @@ parser.add_argument('--os_user', type=str, default='')
 parser.add_argument('--pip_mirror', type=str, default='')
 parser.add_argument('--numpy_version', type=str, default='')
 parser.add_argument('--application', type=str, default='')
+parser.add_argument('--master_ip', type=str, default='')
 args = parser.parse_args()
 
 emr_dir = '/opt/' + args.emr_version + '/jars/'
@@ -162,11 +163,27 @@ def add_breeze_library_emr(args):
     local(""" sudo bash -c "sed -i '/spark.driver.extraClassPath/s/$/:\/opt\/""" + args.emr_version +
           """\/jars\/usr\/other\/*/' """ + spark_defaults_path + """" """)
 
+def install_sparkamagic_kernels(args):
+    try:
+        local('sudo jupyter nbextension enable --py --sys-prefix widgetsnbextension')
+        sparkmagic_dir = local("sudo pip3 show sparkmagic | grep 'Location: ' | awk '{print $2}'", capture=True)
+        local('sudo jupyter-kernelspec install {}/sparkmagic/kernels/sparkkernel --user'.format(sparkmagic_dir))
+        local('sudo jupyter-kernelspec install {}/sparkmagic/kernels/pysparkkernel --user'.format(sparkmagic_dir))
+        local('sudo jupyter-kernelspec install {}/sparkmagic/kernels/sparkrkernel --user'.format(sparkmagic_dir))
+        local('mkdir -p /home/' + args.os_user + '/.sparkmagic')
+        local('cp -f /tmp/sparkmagic_config_template.json /home/' + args.os_user + '/.sparkmagic/config.json')
+        local('sed -i \'s|LIVY_HOST|{0}|g\' /home/{1}/.sparkmagic/config.json'.format(
+                args.master_ip, args.os_user))
+    except:
+        sys.exit(1)
+
+
 
 if __name__ == "__main__":
     if args.dry_run == 'true':
         parser.print_help()
     else:
+        install_sparkamagic_kernels(args)
         result = prepare(emr_dir, yarn_dir)
         if result == False :
             jars(args, emr_dir)
diff --git a/infrastructure-provisioning/src/general/scripts/aws/jupyter_install_dataengine-service_kernels.py b/infrastructure-provisioning/src/general/scripts/aws/jupyter_install_dataengine-service_kernels.py
index fb29f0a..0b82ce1 100644
--- a/infrastructure-provisioning/src/general/scripts/aws/jupyter_install_dataengine-service_kernels.py
+++ b/infrastructure-provisioning/src/general/scripts/aws/jupyter_install_dataengine-service_kernels.py
@@ -50,6 +50,7 @@ def configure_notebook(args):
     templates_dir = '/root/templates/'
     files_dir = '/root/files/'
     scripts_dir = '/root/scripts/'
+    put(templates_dir + 'sparkmagic_config_template.json', '/tmp/sparkmagic_config_template.json')
     put(templates_dir + 'pyspark_dataengine-service_template.json', '/tmp/pyspark_dataengine-service_template.json')
     put(templates_dir + 'r_dataengine-service_template.json', '/tmp/r_dataengine-service_template.json')
     put(templates_dir + 'toree_dataengine-service_template.json','/tmp/toree_dataengine-service_template.json')
@@ -92,9 +93,13 @@ if __name__ == "__main__":
             print(r_version)
     else:
         r_version = 'false'
+    cluster_id = get_emr_id_by_name(args.cluster_name)
+    master_instances = get_emr_instances_list(cluster_id, 'MASTER')
+    master_ip = master_instances[0].get('PrivateIpAddress')
     sudo("/usr/bin/python /usr/local/bin/jupyter_dataengine-service_create_configs.py --bucket " + args.bucket
          + " --cluster_name " + args.cluster_name + " --emr_version " + args.emr_version + " --spark_version "
          + spark_version + " --scala_version " + scala_version + " --r_version " + r_version + " --hadoop_version "
          + hadoop_version + " --region " + args.region + " --excluded_lines '" + args.emr_excluded_spark_properties
          + "' --project_name " + args.project_name + " --os_user " + args.os_user + " --pip_mirror "
-         + args.pip_mirror + " --numpy_version " + numpy_version + " --application " + args.application)
+         + args.pip_mirror + " --numpy_version " + numpy_version + " --application "
+         + args.application + " --master_ip " + master_ip)
diff --git a/infrastructure-provisioning/src/general/templates/os/sparkmagic_config_template.json b/infrastructure-provisioning/src/general/templates/os/sparkmagic_config_template.json
new file mode 100644
index 0000000..e6fa8ef
--- /dev/null
+++ b/infrastructure-provisioning/src/general/templates/os/sparkmagic_config_template.json
@@ -0,0 +1,20 @@
+{
+  "kernel_python_credentials" : {
+    "username": "",
+    "password": "",
+    "url": "http://LIVY_HOST:8998",
+    "auth": "None"
+  },
+  "kernel_scala_credentials" : {
+    "username": "",
+    "password": "",
+    "url": "http://LIVY_HOST:8998",
+    "auth": "None"
+  },
+  "kernel_r_credentials": {
+    "username": "",
+    "password": "",
+    "url": "http://LIVY_HOST:8998",
+    "auth": "None"
+  }
+}
\ No newline at end of file


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@dlab.apache.org
For additional commands, e-mail: commits-help@dlab.apache.org