You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@dlab.apache.org by my...@apache.org on 2020/07/13 08:41:03 UTC
[incubator-dlab] branch DLAB-515 updated: [DLAB-515] - [AWS] EMR
kernels connection via sparkmagic/livy implemented
This is an automated email from the ASF dual-hosted git repository.
mykolabodnar pushed a commit to branch DLAB-515
in repository https://gitbox.apache.org/repos/asf/incubator-dlab.git
The following commit(s) were added to refs/heads/DLAB-515 by this push:
new 3521c5f [DLAB-515] - [AWS] EMR kernels connection via sparkmagic/livy implemented
3521c5f is described below
commit 3521c5f9ae61eaeb98413e04a785efb0f920a354
Author: bodnarmykola <bo...@gmail.com>
AuthorDate: Mon Jul 13 11:40:17 2020 +0300
[DLAB-515] - [AWS] EMR kernels connection via sparkmagic/livy implemented
---
.../src/general/files/aws/deeplearning_Dockerfile | 1 +
.../src/general/files/aws/jupyter_Dockerfile | 1 +
.../src/general/lib/os/debian/notebook_lib.py | 5 +++--
.../src/general/lib/os/fab.py | 4 ++--
.../aws/jupyter_dataengine-service_create_configs.py | 17 +++++++++++++++++
.../jupyter_install_dataengine-service_kernels.py | 7 ++++++-
.../templates/os/sparkmagic_config_template.json | 20 ++++++++++++++++++++
7 files changed, 50 insertions(+), 5 deletions(-)
diff --git a/infrastructure-provisioning/src/general/files/aws/deeplearning_Dockerfile b/infrastructure-provisioning/src/general/files/aws/deeplearning_Dockerfile
index fb6551f..587a1b6 100644
--- a/infrastructure-provisioning/src/general/files/aws/deeplearning_Dockerfile
+++ b/infrastructure-provisioning/src/general/files/aws/deeplearning_Dockerfile
@@ -42,6 +42,7 @@ COPY general/templates/os/inactive.service /root/templates/
COPY general/templates/os/inactive.timer /root/templates/
COPY general/files/os/toree-assembly-0.3.0.jar /root/files/
COPY general/files/os/toree_kernel.tar.gz /root/files/
+COPY general/templates/os/sparkmagic_config_template.json /root/templates/
COPY general/templates/os/pyspark_dataengine-service_template.json /root/templates/
COPY general/templates/os/r_dataengine-service_template.json /root/templates/
COPY general/templates/os/toree_dataengine-service_* /root/templates/
diff --git a/infrastructure-provisioning/src/general/files/aws/jupyter_Dockerfile b/infrastructure-provisioning/src/general/files/aws/jupyter_Dockerfile
index 4c83fac..a2d6198 100644
--- a/infrastructure-provisioning/src/general/files/aws/jupyter_Dockerfile
+++ b/infrastructure-provisioning/src/general/files/aws/jupyter_Dockerfile
@@ -35,6 +35,7 @@ COPY general/templates/os/pyspark_local_template.json /root/templates/
COPY general/templates/os/py3spark_local_template.json /root/templates/
COPY general/templates/os/pyspark_dataengine-service_template.json /root/templates/
COPY general/templates/os/r_dataengine-service_template.json /root/templates/
+COPY general/templates/os/sparkmagic_config_template.json /root/templates/
COPY general/templates/os/r_template.json /root/templates/
COPY general/templates/os/run_template.sh /root/templates/
COPY general/templates/os/toree_dataengine-service_* /root/templates/
diff --git a/infrastructure-provisioning/src/general/lib/os/debian/notebook_lib.py b/infrastructure-provisioning/src/general/lib/os/debian/notebook_lib.py
index 4e1efe1..7310d0d 100644
--- a/infrastructure-provisioning/src/general/lib/os/debian/notebook_lib.py
+++ b/infrastructure-provisioning/src/general/lib/os/debian/notebook_lib.py
@@ -268,12 +268,13 @@ def ensure_python3_libraries(os_user):
try:
#manage_pkg('-y install', 'remote', 'python3-setuptools')
manage_pkg('-y install', 'remote', 'python3-pip')
+ manage_pkg('-y install', 'remote', 'libkrb5-dev')
sudo('pip3 install setuptools=={}'.format(os.environ['notebook_setuptools_version']))
try:
- sudo('pip3 install tornado=={0} ipython==7.9.0 ipykernel=={1} --no-cache-dir' \
+ sudo('pip3 install tornado=={0} ipython==7.9.0 ipykernel=={1} sparkmagic --no-cache-dir' \
.format(os.environ['notebook_tornado_version'], os.environ['notebook_ipykernel_version']))
except:
- sudo('pip3 install tornado=={0} ipython==5.0.0 ipykernel=={1} --no-cache-dir' \
+ sudo('pip3 install tornado=={0} ipython==5.0.0 ipykernel=={1} sparkmagic --no-cache-dir' \
.format(os.environ['notebook_tornado_version'], os.environ['notebook_ipykernel_version']))
sudo('pip3 install -U pip=={} --no-cache-dir'.format(os.environ['conf_pip_version']))
sudo('pip3 install boto3 --no-cache-dir')
diff --git a/infrastructure-provisioning/src/general/lib/os/fab.py b/infrastructure-provisioning/src/general/lib/os/fab.py
index d664a5b..2acb5f8 100644
--- a/infrastructure-provisioning/src/general/lib/os/fab.py
+++ b/infrastructure-provisioning/src/general/lib/os/fab.py
@@ -180,9 +180,9 @@ def configure_jupyter(os_user, jupyter_conf_file, templates_dir, jupyter_version
sudo("sed -i 's|OS_USR|{}|' /tmp/jupyter-notebook.service".format(os_user))
http_proxy = run('echo $http_proxy')
https_proxy = run('echo $https_proxy')
- sudo('sed -i \'/\[Service\]/ a\Environment=\"HTTP_PROXY={}\"\' /tmp/jupyter-notebook.service'.format(
+ #sudo('sed -i \'/\[Service\]/ a\Environment=\"HTTP_PROXY={}\"\' /tmp/jupyter-notebook.service'.format(
http_proxy))
- sudo('sed -i \'/\[Service\]/ a\Environment=\"HTTPS_PROXY={}\"\' /tmp/jupyter-notebook.service'.format(
+ #sudo('sed -i \'/\[Service\]/ a\Environment=\"HTTPS_PROXY={}\"\' /tmp/jupyter-notebook.service'.format(
https_proxy))
java_home = run("update-alternatives --query java | grep -o \'/.*/java-8.*/jre\'").splitlines()[0]
sudo('sed -i \'/\[Service\]/ a\Environment=\"JAVA_HOME={}\"\' /tmp/jupyter-notebook.service'.format(
diff --git a/infrastructure-provisioning/src/general/scripts/aws/jupyter_dataengine-service_create_configs.py b/infrastructure-provisioning/src/general/scripts/aws/jupyter_dataengine-service_create_configs.py
index ed3daee..46cda9c 100644
--- a/infrastructure-provisioning/src/general/scripts/aws/jupyter_dataengine-service_create_configs.py
+++ b/infrastructure-provisioning/src/general/scripts/aws/jupyter_dataengine-service_create_configs.py
@@ -52,6 +52,7 @@ parser.add_argument('--os_user', type=str, default='')
parser.add_argument('--pip_mirror', type=str, default='')
parser.add_argument('--numpy_version', type=str, default='')
parser.add_argument('--application', type=str, default='')
+parser.add_argument('--master_ip', type=str, default='')
args = parser.parse_args()
emr_dir = '/opt/' + args.emr_version + '/jars/'
@@ -162,11 +163,27 @@ def add_breeze_library_emr(args):
local(""" sudo bash -c "sed -i '/spark.driver.extraClassPath/s/$/:\/opt\/""" + args.emr_version +
"""\/jars\/usr\/other\/*/' """ + spark_defaults_path + """" """)
+def install_sparkamagic_kernels(args):
+ try:
+ local('sudo jupyter nbextension enable --py --sys-prefix widgetsnbextension')
+ sparkmagic_dir = local("sudo pip3 show sparkmagic | grep 'Location: ' | awk '{print $2}'", capture=True)
+ local('sudo jupyter-kernelspec install {}/sparkmagic/kernels/sparkkernel --user'.format(sparkmagic_dir))
+ local('sudo jupyter-kernelspec install {}/sparkmagic/kernels/pysparkkernel --user'.format(sparkmagic_dir))
+ local('sudo jupyter-kernelspec install {}/sparkmagic/kernels/sparkrkernel --user'.format(sparkmagic_dir))
+ local('mkdir -p /home/' + args.os_user + '/.sparkmagic')
+ local('cp -f /tmp/sparkmagic_config_template.json /home/' + args.os_user + '/.sparkmagic/config.json')
+ local('sed -i \'s|LIVY_HOST|{0}|g\' /home/{1}/.sparkmagic/config.json'.format(
+ args.master_ip, args.os_user))
+ except:
+ sys.exit(1)
+
+
if __name__ == "__main__":
if args.dry_run == 'true':
parser.print_help()
else:
+ install_sparkamagic_kernels(args)
result = prepare(emr_dir, yarn_dir)
if result == False :
jars(args, emr_dir)
diff --git a/infrastructure-provisioning/src/general/scripts/aws/jupyter_install_dataengine-service_kernels.py b/infrastructure-provisioning/src/general/scripts/aws/jupyter_install_dataengine-service_kernels.py
index fb29f0a..0b82ce1 100644
--- a/infrastructure-provisioning/src/general/scripts/aws/jupyter_install_dataengine-service_kernels.py
+++ b/infrastructure-provisioning/src/general/scripts/aws/jupyter_install_dataengine-service_kernels.py
@@ -50,6 +50,7 @@ def configure_notebook(args):
templates_dir = '/root/templates/'
files_dir = '/root/files/'
scripts_dir = '/root/scripts/'
+ put(templates_dir + 'sparkmagic_config_template.json', '/tmp/sparkmagic_config_template.json')
put(templates_dir + 'pyspark_dataengine-service_template.json', '/tmp/pyspark_dataengine-service_template.json')
put(templates_dir + 'r_dataengine-service_template.json', '/tmp/r_dataengine-service_template.json')
put(templates_dir + 'toree_dataengine-service_template.json','/tmp/toree_dataengine-service_template.json')
@@ -92,9 +93,13 @@ if __name__ == "__main__":
print(r_version)
else:
r_version = 'false'
+ cluster_id = get_emr_id_by_name(args.cluster_name)
+ master_instances = get_emr_instances_list(cluster_id, 'MASTER')
+ master_ip = master_instances[0].get('PrivateIpAddress')
sudo("/usr/bin/python /usr/local/bin/jupyter_dataengine-service_create_configs.py --bucket " + args.bucket
+ " --cluster_name " + args.cluster_name + " --emr_version " + args.emr_version + " --spark_version "
+ spark_version + " --scala_version " + scala_version + " --r_version " + r_version + " --hadoop_version "
+ hadoop_version + " --region " + args.region + " --excluded_lines '" + args.emr_excluded_spark_properties
+ "' --project_name " + args.project_name + " --os_user " + args.os_user + " --pip_mirror "
- + args.pip_mirror + " --numpy_version " + numpy_version + " --application " + args.application)
+ + args.pip_mirror + " --numpy_version " + numpy_version + " --application "
+ + args.application + " --master_ip " + master_ip)
diff --git a/infrastructure-provisioning/src/general/templates/os/sparkmagic_config_template.json b/infrastructure-provisioning/src/general/templates/os/sparkmagic_config_template.json
new file mode 100644
index 0000000..e6fa8ef
--- /dev/null
+++ b/infrastructure-provisioning/src/general/templates/os/sparkmagic_config_template.json
@@ -0,0 +1,20 @@
+{
+ "kernel_python_credentials" : {
+ "username": "",
+ "password": "",
+ "url": "http://LIVY_HOST:8998",
+ "auth": "None"
+ },
+ "kernel_scala_credentials" : {
+ "username": "",
+ "password": "",
+ "url": "http://LIVY_HOST:8998",
+ "auth": "None"
+ },
+ "kernel_r_credentials": {
+ "username": "",
+ "password": "",
+ "url": "http://LIVY_HOST:8998",
+ "auth": "None"
+ }
+}
\ No newline at end of file
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@dlab.apache.org
For additional commands, e-mail: commits-help@dlab.apache.org