You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@datalab.apache.org by lf...@apache.org on 2021/11/18 16:27:44 UTC

[incubator-datalab] branch develop updated: Deeplearning notebook and cluster creation fails fixed

This is an automated email from the ASF dual-hosted git repository.

lfrolov pushed a commit to branch develop
in repository https://gitbox.apache.org/repos/asf/incubator-datalab.git


The following commit(s) were added to refs/heads/develop by this push:
     new e2d78dc  Deeplearning notebook and cluster creation fails fixed
     new a571142  Merge pull request #1345 from MarianHladun/DL-2592
e2d78dc is described below

commit e2d78dcd442dadd92fbc86156f975da3466f3c98
Author: Marian_Hladun <ma...@gmail.com>
AuthorDate: Thu Nov 18 11:47:45 2021 +0200

    Deeplearning notebook and cluster creation fails fixed
---
 .../src/dataengine/scripts/configure_dataengine.py |  5 ++++-
 .../scripts/configure_deep_learning_node.py        | 11 +++++++----
 .../src/general/lib/gcp/actions_lib.py             |  9 +++++++--
 .../src/general/lib/os/debian/notebook_lib.py      | 22 ++++++++++++++++++----
 .../src/general/lib/os/fab.py                      |  3 +++
 .../src/general/scripts/aws/dataengine_prepare.py  |  5 ++++-
 .../src/general/scripts/gcp/dataengine_prepare.py  |  5 ++++-
 .../os/deeplearning_install_dataengine_kernels.py  |  4 +++-
 8 files changed, 50 insertions(+), 14 deletions(-)

diff --git a/infrastructure-provisioning/src/dataengine/scripts/configure_dataengine.py b/infrastructure-provisioning/src/dataengine/scripts/configure_dataengine.py
index d567e27..aeed90d 100644
--- a/infrastructure-provisioning/src/dataengine/scripts/configure_dataengine.py
+++ b/infrastructure-provisioning/src/dataengine/scripts/configure_dataengine.py
@@ -146,7 +146,10 @@ if __name__ == "__main__":
         ensure_python3_specific_version(python3_version, args.os_user)
 
     # INSTALL PYTHON IN VIRTUALENV
-    if os.environ['conf_deeplearning_cloud_ami'] != 'true':
+    if os.environ['conf_deeplearning_cloud_ami'] == 'true' and os.environ['conf_cloud_provider'] == 'azure' and \
+            os.environ['application'] == 'deeplearning':
+        print('Python Virtualenv already configured')
+    else:
         print("Configure Python Virtualenv")
         ensure_python_venv(python_venv_version)
 
diff --git a/infrastructure-provisioning/src/deeplearning/scripts/configure_deep_learning_node.py b/infrastructure-provisioning/src/deeplearning/scripts/configure_deep_learning_node.py
index 98848f8..5a0e535 100644
--- a/infrastructure-provisioning/src/deeplearning/scripts/configure_deep_learning_node.py
+++ b/infrastructure-provisioning/src/deeplearning/scripts/configure_deep_learning_node.py
@@ -143,11 +143,13 @@ if __name__ == "__main__":
         print("Install Python 3 modules")
         ensure_python3_libraries(args.os_user)
 
-        if os.environ['conf_deeplearning_cloud_ami'] == 'true' and os.environ['conf_cloud_provider'] != 'gcp':
-            # INSTALL AND CONFIGURE JUPYTER NOTEBOOK
+        # INSTALL AND CONFIGURE JUPYTER NOTEBOOK
+        if os.environ['conf_cloud_provider'] != 'gcp':
             print("Configure Jupyter")
             configure_jupyter(args.os_user, jupyter_conf_file, templates_dir, args.jupyter_version,
                               args.exploratory_name)
+        else:
+            configure_jupyterlab_at_gcp_image(args.os_user, args.exploratory_name)
 
         print("Configure Python Virtualenv")
         ensure_python_venv_deeplearn(python_venv_version)
@@ -163,8 +165,9 @@ if __name__ == "__main__":
         install_keras(args.os_user, keras_version)
         print("Installing Caffe2")
         install_caffe2(args.os_user, caffe2_version, cmake_version)
-        print("Install CNTK Python library")
-        install_cntk(args.os_user, cntk_version)
+        if os.environ['conf_cloud_provider'] != 'gcp':
+            print("Install CNTK Python library")
+            install_cntk(args.os_user, cntk_version)
         print("Installing MXNET")
         install_mxnet(args.os_user, mxnet_version)
 
diff --git a/infrastructure-provisioning/src/general/lib/gcp/actions_lib.py b/infrastructure-provisioning/src/general/lib/gcp/actions_lib.py
index fa1d891..a699155 100644
--- a/infrastructure-provisioning/src/general/lib/gcp/actions_lib.py
+++ b/infrastructure-provisioning/src/general/lib/gcp/actions_lib.py
@@ -1413,8 +1413,13 @@ def configure_local_spark(jars_dir, templates_dir, memory_type='driver'):
                                   .format(memory_type, spark_memory))
         if not exists(datalab.fab.conn,'/opt/spark/conf/spark-env.sh'):
             datalab.fab.conn.sudo('mv /opt/spark/conf/spark-env.sh.template /opt/spark/conf/spark-env.sh')
-        java_home = datalab.fab.conn.run("update-alternatives --query java | grep -o --color=never \'/.*/java-8.*/jre\'").stdout.splitlines()[0]
-        datalab.fab.conn.sudo('''bash -l -c 'echo "export JAVA_HOME={}" >> /opt/spark/conf/spark-env.sh' '''.format(java_home))
+        if os.environ['conf_deeplearning_cloud_ami'] == 'true' and os.environ['conf_cloud_provider'] == 'gcp':
+            java_home = '/usr/lib/jvm/adoptopenjdk-8-hotspot-amd64/jre'
+        else:
+            java_home = datalab.fab.conn.run(
+                "update-alternatives --query java | grep -o --color=never \'/.*/java-8.*/jre\'").stdout.splitlines()[0]
+        datalab.fab.conn.sudo(
+            '''bash -l -c 'echo "export JAVA_HOME={}" >> /opt/spark/conf/spark-env.sh' '''.format(java_home))
         if 'spark_configurations' in os.environ:
             datalab_header = datalab.fab.conn.sudo('cat /tmp/notebook_spark-defaults_local.conf | grep "^#"').stdout
             spark_configurations = ast.literal_eval(os.environ['spark_configurations'])
diff --git a/infrastructure-provisioning/src/general/lib/os/debian/notebook_lib.py b/infrastructure-provisioning/src/general/lib/os/debian/notebook_lib.py
index 5f007f7..a6d2aa1 100644
--- a/infrastructure-provisioning/src/general/lib/os/debian/notebook_lib.py
+++ b/infrastructure-provisioning/src/general/lib/os/debian/notebook_lib.py
@@ -225,13 +225,21 @@ def ensure_jre_jdk(os_user):
         try:
             manage_pkg('-y install', 'remote', 'default-jre')
             manage_pkg('-y install', 'remote', 'default-jdk')
-            manage_pkg('-y install', 'remote', 'openjdk-8-jdk')
-            manage_pkg('-y install', 'remote', 'openjdk-8-jre')
+            if os.environ['conf_deeplearning_cloud_ami'] == 'true' and os.environ['conf_cloud_provider'] == 'gcp':
+                datalab.fab.conn.sudo(
+                    'wget -qO - https://adoptopenjdk.jfrog.io/adoptopenjdk/api/gpg/key/public | sudo apt-key add -')
+                datalab.fab.conn.sudo('add-apt-repository --yes https://adoptopenjdk.jfrog.io/adoptopenjdk/deb/')
+                datalab.fab.conn.sudo('apt-get update')
+                datalab.fab.conn.sudo('apt-get install adoptopenjdk-8-hotspot -y')
+            else:
+                manage_pkg('-y install', 'remote', 'openjdk-8-jdk')
+                manage_pkg('-y install', 'remote', 'openjdk-8-jre')
             datalab.fab.conn.sudo('touch /home/' + os_user + '/.ensure_dir/jre_jdk_ensured')
         except:
             sys.exit(1)
 
 
+
 def ensure_additional_python_libs(os_user):
     if not exists(datalab.fab.conn,'/home/' + os_user + '/.ensure_dir/additional_python_libs_ensured'):
         try:
@@ -263,7 +271,12 @@ def ensure_python3_libraries(os_user):
             manage_pkg('-y install', 'remote', 'python3-pip')
             manage_pkg('-y install', 'remote', 'libkrb5-dev')
             datalab.fab.conn.sudo('pip3 install -U keyrings.alt backoff')
-            datalab.fab.conn.sudo('pip3 install setuptools=={}'.format(os.environ['notebook_setuptools_version']))
+            if os.environ['conf_cloud_provider'] == 'aws' and os.environ['conf_deeplearning_cloud_ami'] == 'true': # чекнути чи добавилось
+                datalab.fab.conn.sudo('pip3 install --upgrade --user pyqt5==5.12')
+                datalab.fab.conn.sudo('pip3 install --upgrade --user pyqtwebengine==5.12')
+                datalab.fab.conn.sudo('pip3 install setuptools')
+            else:
+                datalab.fab.conn.sudo('pip3 install setuptools=={}'.format(os.environ['notebook_setuptools_version']))
             try:
                 datalab.fab.conn.sudo('pip3 install tornado=={0} ipython==7.21.0 ipykernel=={1} sparkmagic --no-cache-dir' \
                      .format(os.environ['notebook_tornado_version'], os.environ['notebook_ipykernel_version']))
@@ -407,11 +420,12 @@ def install_livy_dependencies_emr(os_user):
 
 def install_nodejs(os_user):
     if not exists(datalab.fab.conn,'/home/{}/.ensure_dir/nodejs_ensured'.format(os_user)):
+        if os.environ['conf_cloud_provider'] == 'gcp' and os.environ['application'] == 'deeplearning':
+            datalab.fab.conn.sudo('add-apt-repository --remove ppa:deadsnakes/ppa -y')
         datalab.fab.conn.sudo('curl -sL https://deb.nodesource.com/setup_15.x | sudo -E bash -')
         manage_pkg('-y install', 'remote', 'nodejs')
         datalab.fab.conn.sudo('touch /home/{}/.ensure_dir/nodejs_ensured'.format(os_user))
 
-
 def install_os_pkg(requisites):
     status = list()
     error_parser = "Could not|No matching|Error:|E:|failed|Requires:"
diff --git a/infrastructure-provisioning/src/general/lib/os/fab.py b/infrastructure-provisioning/src/general/lib/os/fab.py
index 722fb98..996fc7c 100644
--- a/infrastructure-provisioning/src/general/lib/os/fab.py
+++ b/infrastructure-provisioning/src/general/lib/os/fab.py
@@ -984,6 +984,9 @@ def configure_jupyter(os_user, jupyter_conf_file, templates_dir, jupyter_version
                 conn.sudo('rm -rf {}'.format(jupyter_conf_file))
             elif os.environ['application'] != 'tensor':
                 conn.sudo('pip3 install environment_kernels')
+            if os.environ['conf_cloud_provider'] == 'aws':
+                conn.sudo('chown -R {0} /home/{0}/.local'.format(os_user))
+                conn.sudo('chgrp -R {0} /home/{0}/.local'.format(os_user))
             conn.run('jupyter notebook --generate-config --config {}'.format(jupyter_conf_file))
             conn.run('mkdir -p ~/.jupyter/custom/')
             conn.run('echo "#notebook-container { width: auto; }" > ~/.jupyter/custom/custom.css')
diff --git a/infrastructure-provisioning/src/general/scripts/aws/dataengine_prepare.py b/infrastructure-provisioning/src/general/scripts/aws/dataengine_prepare.py
index 1ef501d..4593fc5 100644
--- a/infrastructure-provisioning/src/general/scripts/aws/dataengine_prepare.py
+++ b/infrastructure-provisioning/src/general/scripts/aws/dataengine_prepare.py
@@ -96,7 +96,10 @@ if __name__ == "__main__":
                                                                              data_engine['cluster_name'])}
         data_engine['cluster_nodes_billing_tag'] = {"Key": os.environ['conf_billing_tag_key'],
                                                     "Value": os.environ['conf_billing_tag_value']}
-        data_engine['primary_disk_size'] = '30'
+        if os.environ['conf_deeplearning_cloud_ami'] == 'true' and os.environ['application'] == 'deeplearning':
+            data_engine['primary_disk_size'] = '150'
+        else:
+            data_engine['primary_disk_size'] = '30'
         data_engine['instance_class'] = 'dataengine'
 
         if os.environ['conf_shared_image_enabled'] == 'false':
diff --git a/infrastructure-provisioning/src/general/scripts/gcp/dataengine_prepare.py b/infrastructure-provisioning/src/general/scripts/gcp/dataengine_prepare.py
index 2db42e6..84ee186 100644
--- a/infrastructure-provisioning/src/general/scripts/gcp/dataengine_prepare.py
+++ b/infrastructure-provisioning/src/general/scripts/gcp/dataengine_prepare.py
@@ -106,7 +106,10 @@ if __name__ == "__main__":
         data_engine['instance_count'] = int(os.environ['dataengine_instance_count'])
         data_engine['notebook_name'] = os.environ['notebook_instance_name']
 
-        data_engine['primary_disk_size'] = '30'
+        if os.environ['conf_deeplearning_cloud_ami'] == 'true' and os.environ['application'] == 'deeplearning':
+            data_engine['primary_disk_size'] = '150'
+        else:
+            data_engine['primary_disk_size'] = '30'
         data_engine['secondary_disk_size'] = os.environ['notebook_disk_size']
 
         data_engine['shared_image_enabled'] = os.environ['conf_shared_image_enabled']
diff --git a/infrastructure-provisioning/src/general/scripts/os/deeplearning_install_dataengine_kernels.py b/infrastructure-provisioning/src/general/scripts/os/deeplearning_install_dataengine_kernels.py
index 35b3025..9bef2b8 100644
--- a/infrastructure-provisioning/src/general/scripts/os/deeplearning_install_dataengine_kernels.py
+++ b/infrastructure-provisioning/src/general/scripts/os/deeplearning_install_dataengine_kernels.py
@@ -84,11 +84,13 @@ def install_sparkamagic_kernels(args):
         datalab.fab.conn.sudo('sed -i \'s|PySpark|{0}|g\' /home/{1}/.local/share/jupyter/kernels/pysparkkernel/kernel.json'.format(
             pyspark_kernel_name, args.os_user))
         datalab.fab.conn.sudo('bash -l -c "spark-submit --version" ').stdout.rstrip()
-        scala_version = datalab.fab.conn.sudo('''bash -l -c '"spark-submit --version 2>&1 | grep -o -P "Scala version \K.{0,7}' ''').stdout.rstrip("\n\r")
+        scala_version = datalab.fab.conn.sudo('''bash -l -c "spark-submit --version 2>&1 | grep -o -P 'Scala version \K.{0,7}'" ''').stdout.rstrip("\n\r")
         spark_kernel_name = 'Spark (Scala-{0} / Spark-{1} ) [{2}]'.format(scala_version, args.spark_version,
                                                                          args.cluster_name)
         datalab.fab.conn.sudo('sed -i \'s|Spark|{0}|g\' /home/{1}/.local/share/jupyter/kernels/sparkkernel/kernel.json'.format(
             spark_kernel_name, args.os_user))
+        if os.environ['conf_cloud_provider'] in ('gcp', 'aws') and os.environ['application'] == 'deeplearning':
+            datalab.fab.conn.sudo('apt install r-base -y')
         r_version = datalab.fab.conn.sudo(''' bash -l -c 'R --version | grep -o -P "R version \K.{0,5}"' ''').stdout.rstrip("\n\r")
         sparkr_kernel_name = 'SparkR (R-{0} / Spark-{1} ) [{2}]'.format(r_version, args.spark_version,
                                                                             args.cluster_name)

---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@datalab.apache.org
For additional commands, e-mail: commits-help@datalab.apache.org