You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@datalab.apache.org by my...@apache.org on 2021/04/06 10:44:37 UTC

[incubator-datalab] branch DATALAB-2248 created (now 2cb1df8)

This is an automated email from the ASF dual-hosted git repository.

mykolabodnar pushed a change to branch DATALAB-2248
in repository https://gitbox.apache.org/repos/asf/incubator-datalab.git.


      at 2cb1df8  [DataLab-2248] - [GCP] GPU for Jupyter added

This branch includes the following new commits:

     new 2cb1df8  [DataLab-2248] - [GCP] GPU for Jupyter added

The 1 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@datalab.apache.org
For additional commands, e-mail: commits-help@datalab.apache.org


[incubator-datalab] 01/01: [DataLab-2248] - [GCP] GPU for Jupyter added

Posted by my...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

mykolabodnar pushed a commit to branch DATALAB-2248
in repository https://gitbox.apache.org/repos/asf/incubator-datalab.git

commit 2cb1df85f44c30b9c2ceb1afcb4d346a77e86de8
Author: bodnarmykola <bo...@gmail.com>
AuthorDate: Tue Apr 6 13:44:00 2021 +0300

    [DataLab-2248] - [GCP] GPU for Jupyter added
---
 .../src/general/lib/os/debian/notebook_lib.py      | 16 ++++++++
 .../src/general/lib/os/fab.py                      |  6 ++-
 .../src/general/scripts/gcp/common_install_gpu.py  | 47 ++++++++++++++++++++++
 .../general/scripts/gcp/common_prepare_notebook.py |  2 +-
 .../src/general/scripts/gcp/jupyter_configure.py   | 16 ++++++++
 5 files changed, 84 insertions(+), 3 deletions(-)

diff --git a/infrastructure-provisioning/src/general/lib/os/debian/notebook_lib.py b/infrastructure-provisioning/src/general/lib/os/debian/notebook_lib.py
index 81976cf..62d1730 100644
--- a/infrastructure-provisioning/src/general/lib/os/debian/notebook_lib.py
+++ b/infrastructure-provisioning/src/general/lib/os/debian/notebook_lib.py
@@ -268,6 +268,22 @@ def ensure_python3_libraries(os_user):
         except:
             sys.exit(1)
 
+def install_nvidia_drivers(os_user)
+    if not exists(datalab.fab.conn,'/home/{}/.ensure_dir/nvidia_ensured'.format(os_user)):
+        try:
+            # install nvidia drivers
+            datalab.fab.conn.sudo('wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/x86_64/cuda-ubuntu2004.pin')
+            datalab.fab.conn.sudo('mv cuda-ubuntu2004.pin /etc/apt/preferences.d/cuda-repository-pin-600')
+            datalab.fab.conn.sudo('apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/x86_64/7fa2af80.pub')
+            datalab.fab.conn.sudo('add-apt-repository "deb https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/x86_64/ /"')
+            manage_pkg('update', 'remote', '')
+            manage_pkg('-y install', 'remote', 'cuda')
+            #clean space on disk
+            manage_pkg('clean', 'remote', 'all')
+            datalab.fab.conn.sudo('touch /home/{}/.ensure_dir/nvidia_ensured'.format(os_user))
+        except Exception as err:
+            print('Failed to install_nvidia_drivers: ', str(err))
+            sys.exit(1)
 
 def install_tensor(os_user, cuda_version, cuda_file_name,
                    cudnn_version, cudnn_file_name, tensorflow_version,
diff --git a/infrastructure-provisioning/src/general/lib/os/fab.py b/infrastructure-provisioning/src/general/lib/os/fab.py
index 62efd97..86f77ae 100644
--- a/infrastructure-provisioning/src/general/lib/os/fab.py
+++ b/infrastructure-provisioning/src/general/lib/os/fab.py
@@ -226,7 +226,10 @@ def configure_jupyter(os_user, jupyter_conf_file, templates_dir, jupyter_version
             conn.sudo('mkdir -p /mnt/var')
             conn.sudo('chown {0}:{0} /mnt/var'.format(os_user))
             if os.environ['application'] == 'jupyter':
-                conn.sudo('jupyter-kernelspec remove -f python3 || echo "Such kernel doesnt exists"')
+                try:
+                    conn.sudo('jupyter-kernelspec remove -f python3 || echo "Such kernel doesnt exists"')
+                except Exception as err:
+                    print('Error:', str(err))
             conn.sudo("systemctl daemon-reload")
             conn.sudo("systemctl enable jupyter-notebook")
             conn.sudo("systemctl start jupyter-notebook")
@@ -248,7 +251,6 @@ def remove_unexisting_kernel():
         conn.sudo('jupyter-kernelspec remove -f python3')
     except Exception as err:
         print('Error:', str(err))
-        sys.exit(1)
 
 def configure_docker(os_user):
     try:
diff --git a/infrastructure-provisioning/src/general/scripts/gcp/common_install_gpu.py b/infrastructure-provisioning/src/general/scripts/gcp/common_install_gpu.py
new file mode 100644
index 0000000..733236d
--- /dev/null
+++ b/infrastructure-provisioning/src/general/scripts/gcp/common_install_gpu.py
@@ -0,0 +1,47 @@
+#!/usr/bin/python3
+
+# *****************************************************************************
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+# ******************************************************************************
+
+import argparse
+import os
+import sys
+from datalab.fab import *
+from datalab.notebook_lib import *
+
+parser = argparse.ArgumentParser()
+parser.add_argument('--hostname', type=str, default='')
+parser.add_argument('--keyfile', type=str, default='')
+parser.add_argument('--os_user', type=str, default='')
+args = parser.parse_args()
+
+##############
+# Run script #
+##############
+if __name__ == "__main__":
+    print("Configure connections")
+    global conn
+    conn = datalab.fab.init_datalab_connection(args.hostname, args.os_user, args.keyfile)
+
+    print('Installing GPU drivers')
+    install_nvidia_drivers(args.os_user)
+
+    conn.close()
\ No newline at end of file
diff --git a/infrastructure-provisioning/src/general/scripts/gcp/common_prepare_notebook.py b/infrastructure-provisioning/src/general/scripts/gcp/common_prepare_notebook.py
index eb6ded2..2cb3f64 100644
--- a/infrastructure-provisioning/src/general/scripts/gcp/common_prepare_notebook.py
+++ b/infrastructure-provisioning/src/general/scripts/gcp/common_prepare_notebook.py
@@ -144,7 +144,7 @@ if __name__ == "__main__":
 
         notebook_config['gpu_accelerator_type'] = 'None'
 
-        if os.environ['application'] in ('tensor', 'tensor-rstudio', 'deeplearning'):
+        if os.environ['application'] in ('tensor', 'tensor-rstudio', 'deeplearning') or os.environ['gpu_enabled'] == 'True':
             notebook_config['gpu_accelerator_type'] = os.environ['gcp_gpu_accelerator_type']
 
         notebook_config['network_tag'] = '{0}-{1}-{2}-ps'.format(notebook_config['service_base_name'],
diff --git a/infrastructure-provisioning/src/general/scripts/gcp/jupyter_configure.py b/infrastructure-provisioning/src/general/scripts/gcp/jupyter_configure.py
index 544e95a..14a48f6 100644
--- a/infrastructure-provisioning/src/general/scripts/gcp/jupyter_configure.py
+++ b/infrastructure-provisioning/src/general/scripts/gcp/jupyter_configure.py
@@ -237,6 +237,22 @@ if __name__ == "__main__":
             GCPActions.remove_image(notebook_config['expected_secondary_image_name'])
             sys.exit(1)
 
+    if os.environ['gpu_enabled'] == 'True':
+        try:
+            print('[INSTALLING GPU DRIVERS]')
+            params = "--hostname {} --keyfile {} --os_user {}".format(
+                instance_hostname, notebook_config['ssh_key_path'], notebook_config['datalab_ssh_user'])
+            try:
+                subprocess.run("~/scripts/{}.py {}".format('common_install_gpu', params), shell=True, check=True)
+            except:
+                datalab.fab.append_result("Failed installing users key")
+                raise Exception
+
+        except Exception as err:
+            datalab.fab.append_result("Failed to install GPU drivers.", str(err))
+            GCPActions.remove_instance(notebook_config['instance_name'], notebook_config['zone'])
+            sys.exit(1)
+
     try:
         print('[SETUP EDGE REVERSE PROXY TEMPLATE]')
         logging.info('[SETUP EDGE REVERSE PROXY TEMPLATE]')

---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@datalab.apache.org
For additional commands, e-mail: commits-help@datalab.apache.org