You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@datalab.apache.org by my...@apache.org on 2021/06/22 10:32:12 UTC
[incubator-datalab] 01/01: [DATALAB-2372] - [GCP] Deeplearning
deploy from cloud image implemented
This is an automated email from the ASF dual-hosted git repository.
mykolabodnar pushed a commit to branch DATALAB-2372
in repository https://gitbox.apache.org/repos/asf/incubator-datalab.git
commit e6d921eb3b81df1b56394cb043841617af75cd20
Author: bodnarmykola <bo...@gmail.com>
AuthorDate: Tue Jun 22 13:31:23 2021 +0300
[DATALAB-2372] - [GCP] Deeplearning deploy from cloud image implemented
---
.../src/base/scripts/install_user_key.py | 8 ++++--
.../scripts/configure_deep_learning_node.py | 32 ++++++++++++++++++++--
.../files/gcp/deeplearning_description.json | 2 +-
.../src/general/lib/gcp/meta_lib.py | 17 ++++++++++++
.../general/scripts/gcp/common_prepare_notebook.py | 20 +++++++++++---
5 files changed, 69 insertions(+), 10 deletions(-)
diff --git a/infrastructure-provisioning/src/base/scripts/install_user_key.py b/infrastructure-provisioning/src/base/scripts/install_user_key.py
index 3d417ab..d7a5faf 100644
--- a/infrastructure-provisioning/src/base/scripts/install_user_key.py
+++ b/infrastructure-provisioning/src/base/scripts/install_user_key.py
@@ -66,9 +66,11 @@ if __name__ == "__main__":
except:
print('Fail connection')
sys.exit(2)
-
- print("Ensuring safest ssh ciphers")
- ensure_ciphers()
+ try:
+ print("Ensuring safest ssh ciphers")
+ ensure_ciphers()
+ except:
+ print('Faild to install safest ssh ciphers')
print("Installing users key...")
try:
diff --git a/infrastructure-provisioning/src/deeplearning/scripts/configure_deep_learning_node.py b/infrastructure-provisioning/src/deeplearning/scripts/configure_deep_learning_node.py
index 00600e5..54f8601 100644
--- a/infrastructure-provisioning/src/deeplearning/scripts/configure_deep_learning_node.py
+++ b/infrastructure-provisioning/src/deeplearning/scripts/configure_deep_learning_node.py
@@ -90,6 +90,22 @@ def install_itorch(os_user):
conn.sudo('chown -R {0}:{0} /home/{0}/.local/share/jupyter/'.format(os_user))
conn.sudo('touch /home/{}/.ensure_dir/itorch_ensured'.format(os_user))
+def configure_jupyterlab_at_gcp_image(os_user, exploratory_name):
+ if not exists(conn, '/home/{}/.ensure_dir/jupyterlab_ensured'.format(os_user)):
+ jupyter_conf_file = '/home/jupyter/.jupyter/jupyter_notebook_config.py'
+ conn.sudo('''bash -l -c 'sed -i "s|c.NotebookApp|#c.NotebookApp|g" {}' '''.format(jupyter_conf_file))
+ conn.sudo('''bash -l -c "echo 'c.NotebookApp.ip = \\"0.0.0.0\\" ' >> {}" '''.format(jupyter_conf_file))
+ conn.sudo('''bash -l -c "echo 'c.NotebookApp.port = 8888' >> {}" '''.format(jupyter_conf_file))
+ conn.sudo('''bash -l -c "echo 'c.NotebookApp.base_url = \\"/{0}/\\"' >> {1}" '''.format(exploratory_name,
+ jupyter_conf_file))
+ conn.sudo('''bash -l -c "echo 'c.NotebookApp.open_browser = False' >> {}" '''.format(jupyter_conf_file))
+ conn.sudo('''bash -l -c "echo 'c.NotebookApp.allow_remote_access = True' >> {}" '''.format(jupyter_conf_file))
+ conn.sudo('''bash -l -c "echo 'c.NotebookApp.cookie_secret = b\\"{0}\\"' >> {1}" '''.format(id_generator(),
+ jupyter_conf_file))
+ conn.sudo('''bash -l -c "echo \\"c.NotebookApp.token = u''\\" >> {}" '''.format(jupyter_conf_file))
+ conn.sudo('systemctl restart jupyter')
+ conn.sudo('touch /home/{}/.ensure_dir/jupyterlab_ensured'.format(os_user))
+
if __name__ == "__main__":
print("Configure connections")
@@ -105,7 +121,16 @@ if __name__ == "__main__":
except:
sys.exit(1)
print("Mount additional volume")
- prepare_disk(args.os_user)
+ if os.environ['conf_cloud_provider'] == 'gcp' and os.environ['conf_deeplearning_cloud_ami'] == 'true':
+ print('Additional disk premounted by google image')
+ print('Installing nvidia drivers')
+ try:
+ conn.sudo('/opt/deeplearning/install-driver.sh')
+ except:
+ traceback.print_exc()
+ sys.exit(1)
+ else:
+ prepare_disk(args.os_user)
if os.environ['conf_deeplearning_cloud_ami'] == 'false':
# INSTALL LANGUAGES
@@ -157,10 +182,13 @@ if __name__ == "__main__":
ensure_additional_python_libs(args.os_user)
print("Install Matplotlib")
ensure_matplot(args.os_user)
- elif os.environ['conf_deeplearning_cloud_ami'] == 'true':
+ elif os.environ['conf_deeplearning_cloud_ami'] == 'true' and os.environ['conf_cloud_provider'] != 'gcp':
# CONFIGURE JUPYTER NOTEBOOK
print("Configure Jupyter")
configure_jupyter(args.os_user, jupyter_conf_file, templates_dir, args.jupyter_version, args.exploratory_name)
+ else:
+ configure_jupyterlab_at_gcp_image(args.os_user, args.exploratory_name)
+
# INSTALL UNGIT
print("Install nodejs")
diff --git a/infrastructure-provisioning/src/general/files/gcp/deeplearning_description.json b/infrastructure-provisioning/src/general/files/gcp/deeplearning_description.json
index 79ed687..f3dd752 100644
--- a/infrastructure-provisioning/src/general/files/gcp/deeplearning_description.json
+++ b/infrastructure-provisioning/src/general/files/gcp/deeplearning_description.json
@@ -20,7 +20,7 @@
"exploratory_environment_images" :
[
{"Image family": "common-cu110", "Description": "Google Deep Learning Image: Base, m67 CUDA11.0, A debian-10 Linux based image with CUDA 11.0 preinstalled."},
- {"Image family": "ccommon-cu100", "Description": "Google Deep Learning Image: Base, m67 CUDA10.0. A debian-10 Linux based image with CUDA 10.0 preinstalled."},
+ {"Image family": "common-cu100", "Description": "Google Deep Learning Image: Base, m67 CUDA10.0. A debian-10 Linux based image with CUDA 10.0 preinstalled."},
{"Image family": "common-cu92", "Description": "Google Deep Learning Image: Base, m67 CUDA 9.2, A Debian based image with CUDA 9.2 pre-installed."},
{"Image family": "pytorch-latest-gpu", "Description": "Google Deep Learning Image: PyTorch 1.8, m67 CUDA 110, A debian-10 Linux based image with PyTorch 1.8 pre-installed."},
{"Image family": "rapids-latest-gpu-experimental", "Description": "Google RAPIDS 0.5.1 with XGBoost, m64, RAPIDS 0.5.1 with XGBoost with CUDA 10.0."},
diff --git a/infrastructure-provisioning/src/general/lib/gcp/meta_lib.py b/infrastructure-provisioning/src/general/lib/gcp/meta_lib.py
index 583ec5f..be5d17b 100644
--- a/infrastructure-provisioning/src/general/lib/gcp/meta_lib.py
+++ b/infrastructure-provisioning/src/general/lib/gcp/meta_lib.py
@@ -394,6 +394,23 @@ class GCPMeta:
"error_message": str(err) + "\n Traceback: " + traceback.print_exc(file=sys.stdout)}))
traceback.print_exc(file=sys.stdout)
+ def get_deeplearning_image_by_family(self, family_name):
+ try:
+ request = self.service.images().getFromFamily(project='deeplearning-platform-release', family=family_name)
+ try:
+ return request.execute()
+ except errors.HttpError as err:
+ if err.resp.status == 404:
+ return ''
+ else:
+ raise err
+ except Exception as err:
+ logging.info("Error with getting image by family: " + str(err) + "\n Traceback: " + traceback.print_exc(
+ file=sys.stdout))
+ append_result(str({"error": "Error with getting image by family",
+ "error_message": str(err) + "\n Traceback: " + traceback.print_exc(file=sys.stdout)}))
+ traceback.print_exc(file=sys.stdout)
+
def get_disk(self, disk_name):
try:
request = self.service.disks().get(project=self.project, zone=os.environ['gcp_zone'], disk=disk_name)
diff --git a/infrastructure-provisioning/src/general/scripts/gcp/common_prepare_notebook.py b/infrastructure-provisioning/src/general/scripts/gcp/common_prepare_notebook.py
index 2cb3f64..1c3c038 100644
--- a/infrastructure-provisioning/src/general/scripts/gcp/common_prepare_notebook.py
+++ b/infrastructure-provisioning/src/general/scripts/gcp/common_prepare_notebook.py
@@ -119,19 +119,31 @@ if __name__ == "__main__":
os.environ['application'], os.environ['notebook_image_name'].replace('_', '-').lower()) if (x != 'None' and x != '')
else notebook_config['expected_primary_image_name'])(str(os.environ.get('notebook_image_name')))
print('Searching pre-configured images')
- notebook_config['primary_image_name'] = GCPMeta.get_image_by_name(
- notebook_config['notebook_primary_image_name'])
+
+ if os.environ['conf_deeplearning_cloud_ami'] == 'true' and os.environ['application'] == 'deeplearning':
+ notebook_config['primary_image_name'] = GCPMeta.get_deeplearning_image_by_family(os.environ['notebook_image_name'])
+ if notebook_config['primary_image_name']:
+ deeplearning_ami = 'true'
+ else:
+ notebook_config['primary_image_name'] = GCPMeta.get_image_by_name(notebook_config['notebook_primary_image_name'])
+ deeplearning_ami = 'false'
if notebook_config['primary_image_name'] == '':
notebook_config['primary_image_name'] = os.environ['gcp_{}_image_name'.format(os.environ['conf_os_family'])]
else:
print('Pre-configured primary image found. Using: {}'.format(
notebook_config['primary_image_name'].get('name')))
- notebook_config['primary_image_name'] = 'global/images/{}'.format(
+ if deeplearning_ami == 'true':
+ notebook_config['primary_image_name'] = 'projects/deeplearning-platform-release/global/images/{}'.format(
+ notebook_config['primary_image_name'].get('name'))
+ else:
+ notebook_config['primary_image_name'] = 'global/images/{}'.format(
notebook_config['primary_image_name'].get('name'))
notebook_config['notebook_secondary_image_name'] = (lambda x: '{0}-{1}-{2}-{3}-secondary-image-{4}'.format(
notebook_config['service_base_name'], notebook_config['project_name'], notebook_config['endpoint_name'],
- os.environ['application'], os.environ['notebook_image_name'].replace('_', '-').lower()) if (x != 'None' and x != '')
+ os.environ['application'], os.environ['notebook_image_name'].replace('_', '-').lower()[:63]) if (x != 'None' and x != '')
else notebook_config['expected_secondary_image_name'])(str(os.environ.get('notebook_image_name')))
+ if notebook_config['notebook_secondary_image_name'][:63].endswith('-'):
+ notebook_config['notebook_secondary_image_name'] = notebook_config['notebook_secondary_image_name'][:63][:-1]
notebook_config['secondary_image_name'] = GCPMeta.get_image_by_name(
notebook_config['notebook_secondary_image_name'])
if notebook_config['secondary_image_name'] == '':
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@datalab.apache.org
For additional commands, e-mail: commits-help@datalab.apache.org