You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@datalab.apache.org by my...@apache.org on 2021/04/06 10:44:38 UTC

[incubator-datalab] 01/01: [DataLab-2248] - [GCP] GPU for Jupyter added

This is an automated email from the ASF dual-hosted git repository.

mykolabodnar pushed a commit to branch DATALAB-2248
in repository https://gitbox.apache.org/repos/asf/incubator-datalab.git

commit 2cb1df85f44c30b9c2ceb1afcb4d346a77e86de8
Author: bodnarmykola <bo...@gmail.com>
AuthorDate: Tue Apr 6 13:44:00 2021 +0300

    [DataLab-2248] - [GCP] GPU for Jupyter added
---
 .../src/general/lib/os/debian/notebook_lib.py      | 16 ++++++++
 .../src/general/lib/os/fab.py                      |  6 ++-
 .../src/general/scripts/gcp/common_install_gpu.py  | 47 ++++++++++++++++++++++
 .../general/scripts/gcp/common_prepare_notebook.py |  2 +-
 .../src/general/scripts/gcp/jupyter_configure.py   | 16 ++++++++
 5 files changed, 84 insertions(+), 3 deletions(-)

diff --git a/infrastructure-provisioning/src/general/lib/os/debian/notebook_lib.py b/infrastructure-provisioning/src/general/lib/os/debian/notebook_lib.py
index 81976cf..62d1730 100644
--- a/infrastructure-provisioning/src/general/lib/os/debian/notebook_lib.py
+++ b/infrastructure-provisioning/src/general/lib/os/debian/notebook_lib.py
@@ -268,6 +268,22 @@ def ensure_python3_libraries(os_user):
         except:
             sys.exit(1)
 
+def install_nvidia_drivers(os_user)
+    if not exists(datalab.fab.conn,'/home/{}/.ensure_dir/nvidia_ensured'.format(os_user)):
+        try:
+            # install nvidia drivers
+            datalab.fab.conn.sudo('wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/x86_64/cuda-ubuntu2004.pin')
+            datalab.fab.conn.sudo('mv cuda-ubuntu2004.pin /etc/apt/preferences.d/cuda-repository-pin-600')
+            datalab.fab.conn.sudo('apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/x86_64/7fa2af80.pub')
+            datalab.fab.conn.sudo('add-apt-repository "deb https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/x86_64/ /"')
+            manage_pkg('update', 'remote', '')
+            manage_pkg('-y install', 'remote', 'cuda')
+            #clean space on disk
+            manage_pkg('clean', 'remote', 'all')
+            datalab.fab.conn.sudo('touch /home/{}/.ensure_dir/nvidia_ensured'.format(os_user))
+        except Exception as err:
+            print('Failed to install_nvidia_drivers: ', str(err))
+            sys.exit(1)
 
 def install_tensor(os_user, cuda_version, cuda_file_name,
                    cudnn_version, cudnn_file_name, tensorflow_version,
diff --git a/infrastructure-provisioning/src/general/lib/os/fab.py b/infrastructure-provisioning/src/general/lib/os/fab.py
index 62efd97..86f77ae 100644
--- a/infrastructure-provisioning/src/general/lib/os/fab.py
+++ b/infrastructure-provisioning/src/general/lib/os/fab.py
@@ -226,7 +226,10 @@ def configure_jupyter(os_user, jupyter_conf_file, templates_dir, jupyter_version
             conn.sudo('mkdir -p /mnt/var')
             conn.sudo('chown {0}:{0} /mnt/var'.format(os_user))
             if os.environ['application'] == 'jupyter':
-                conn.sudo('jupyter-kernelspec remove -f python3 || echo "Such kernel doesnt exists"')
+                try:
+                    conn.sudo('jupyter-kernelspec remove -f python3 || echo "Such kernel doesnt exists"')
+                except Exception as err:
+                    print('Error:', str(err))
             conn.sudo("systemctl daemon-reload")
             conn.sudo("systemctl enable jupyter-notebook")
             conn.sudo("systemctl start jupyter-notebook")
@@ -248,7 +251,6 @@ def remove_unexisting_kernel():
         conn.sudo('jupyter-kernelspec remove -f python3')
     except Exception as err:
         print('Error:', str(err))
-        sys.exit(1)
 
 def configure_docker(os_user):
     try:
diff --git a/infrastructure-provisioning/src/general/scripts/gcp/common_install_gpu.py b/infrastructure-provisioning/src/general/scripts/gcp/common_install_gpu.py
new file mode 100644
index 0000000..733236d
--- /dev/null
+++ b/infrastructure-provisioning/src/general/scripts/gcp/common_install_gpu.py
@@ -0,0 +1,47 @@
+#!/usr/bin/python3
+
+# *****************************************************************************
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+# ******************************************************************************
+
+import argparse
+import os
+import sys
+from datalab.fab import *
+from datalab.notebook_lib import *
+
+parser = argparse.ArgumentParser()
+parser.add_argument('--hostname', type=str, default='')
+parser.add_argument('--keyfile', type=str, default='')
+parser.add_argument('--os_user', type=str, default='')
+args = parser.parse_args()
+
+##############
+# Run script #
+##############
+if __name__ == "__main__":
+    print("Configure connections")
+    global conn
+    conn = datalab.fab.init_datalab_connection(args.hostname, args.os_user, args.keyfile)
+
+    print('Installing GPU drivers')
+    install_nvidia_drivers(args.os_user)
+
+    conn.close()
\ No newline at end of file
diff --git a/infrastructure-provisioning/src/general/scripts/gcp/common_prepare_notebook.py b/infrastructure-provisioning/src/general/scripts/gcp/common_prepare_notebook.py
index eb6ded2..2cb3f64 100644
--- a/infrastructure-provisioning/src/general/scripts/gcp/common_prepare_notebook.py
+++ b/infrastructure-provisioning/src/general/scripts/gcp/common_prepare_notebook.py
@@ -144,7 +144,7 @@ if __name__ == "__main__":
 
         notebook_config['gpu_accelerator_type'] = 'None'
 
-        if os.environ['application'] in ('tensor', 'tensor-rstudio', 'deeplearning'):
+        if os.environ['application'] in ('tensor', 'tensor-rstudio', 'deeplearning') or os.environ['gpu_enabled'] == 'True':
             notebook_config['gpu_accelerator_type'] = os.environ['gcp_gpu_accelerator_type']
 
         notebook_config['network_tag'] = '{0}-{1}-{2}-ps'.format(notebook_config['service_base_name'],
diff --git a/infrastructure-provisioning/src/general/scripts/gcp/jupyter_configure.py b/infrastructure-provisioning/src/general/scripts/gcp/jupyter_configure.py
index 544e95a..14a48f6 100644
--- a/infrastructure-provisioning/src/general/scripts/gcp/jupyter_configure.py
+++ b/infrastructure-provisioning/src/general/scripts/gcp/jupyter_configure.py
@@ -237,6 +237,22 @@ if __name__ == "__main__":
             GCPActions.remove_image(notebook_config['expected_secondary_image_name'])
             sys.exit(1)
 
+    if os.environ['gpu_enabled'] == 'True':
+        try:
+            print('[INSTALLING GPU DRIVERS]')
+            params = "--hostname {} --keyfile {} --os_user {}".format(
+                instance_hostname, notebook_config['ssh_key_path'], notebook_config['datalab_ssh_user'])
+            try:
+                subprocess.run("~/scripts/{}.py {}".format('common_install_gpu', params), shell=True, check=True)
+            except:
+                datalab.fab.append_result("Failed installing users key")
+                raise Exception
+
+        except Exception as err:
+            datalab.fab.append_result("Failed to install GPU drivers.", str(err))
+            GCPActions.remove_instance(notebook_config['instance_name'], notebook_config['zone'])
+            sys.exit(1)
+
     try:
         print('[SETUP EDGE REVERSE PROXY TEMPLATE]')
         logging.info('[SETUP EDGE REVERSE PROXY TEMPLATE]')

---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@datalab.apache.org
For additional commands, e-mail: commits-help@datalab.apache.org