You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@datalab.apache.org by my...@apache.org on 2021/06/08 13:45:40 UTC

[incubator-datalab] 01/02: [DATALAB-2398] - [Jupyter with TensorFlow][GCP] python specific version via vevn usage implemented

This is an automated email from the ASF dual-hosted git repository.

mykolabodnar pushed a commit to branch DATALAB-2398
in repository https://gitbox.apache.org/repos/asf/incubator-datalab.git

commit 5b71bea8100d421a3bf1c69473bffa90d535688e
Author: bodnarmykola <bo...@gmail.com>
AuthorDate: Tue Jun 8 16:44:35 2021 +0300

    [DATALAB-2398] - [Jupyter with TensorFlow][GCP] python specific version via vevn usage implemented
---
 .../src/general/lib/os/debian/notebook_lib.py      | 62 +++++++++++-----------
 .../src/general/lib/os/fab.py                      | 12 +++++
 .../src/general/scripts/gcp/tensor_configure.py    | 16 ++++++
 .../src/general/templates/os/tensorboard.service   |  2 +-
 .../src/tensor/scripts/configure_tensor_node.py    | 12 +++--
 5 files changed, 70 insertions(+), 34 deletions(-)

diff --git a/infrastructure-provisioning/src/general/lib/os/debian/notebook_lib.py b/infrastructure-provisioning/src/general/lib/os/debian/notebook_lib.py
index d6d7d63..ec725c2 100644
--- a/infrastructure-provisioning/src/general/lib/os/debian/notebook_lib.py
+++ b/infrastructure-provisioning/src/general/lib/os/debian/notebook_lib.py
@@ -293,37 +293,37 @@ def install_tensor(os_user, cuda_version, cuda_file_name,
     if not exists(datalab.fab.conn,'/home/{}/.ensure_dir/tensor_ensured'.format(os_user)):
         try:
             # install nvidia drivers
-            datalab.fab.conn.sudo('''bash -c 'echo "blacklist nouveau" >> /etc/modprobe.d/blacklist-nouveau.conf' ''')
-            datalab.fab.conn.sudo('''bash -c 'echo "options nouveau modeset=0" >> /etc/modprobe.d/blacklist-nouveau.conf' ''')
-            datalab.fab.conn.sudo('update-initramfs -u')
-            datalab.fab.conn.sudo('reboot', warn=True)
-            time.sleep(60)
-            manage_pkg('-y install', 'remote', 'dkms libglvnd-dev')
-            kernel_version = datalab.fab.conn.run('uname -r | tr -d "[..0-9-]"').stdout.replace('\n','')
-            if kernel_version == 'azure':
-                manage_pkg('-y install', 'remote', 'linux-modules-`uname -r`')
-            else:
+            #datalab.fab.conn.sudo('''bash -c 'echo "blacklist nouveau" >> /etc/modprobe.d/blacklist-nouveau.conf' ''')
+            #datalab.fab.conn.sudo('''bash -c 'echo "options nouveau modeset=0" >> /etc/modprobe.d/blacklist-nouveau.conf' ''')
+            #datalab.fab.conn.sudo('update-initramfs -u')
+            #datalab.fab.conn.sudo('reboot', warn=True)
+            #time.sleep(60)
+            ##manage_pkg('-y install', 'remote', 'dkms libglvnd-dev')
+            #kernel_version = datalab.fab.conn.run('uname -r | tr -d "[..0-9-]"').stdout.replace('\n','')
+            #if kernel_version == 'azure':
+            #    manage_pkg('-y install', 'remote', 'linux-modules-`uname -r`')
+            #else:
                 # legacy support for old kernels
-                datalab.fab.conn.sudo(''' bash -c 'if [[ $(apt-cache search linux-image-`uname -r`) ]]; then apt-get -y '''
-                '''install linux-image-`uname -r`; else apt-get -y install linux-modules-`uname -r`; fi;' ''')
-            datalab.fab.conn.sudo('wget https://us.download.nvidia.com/tesla/{0}/NVIDIA-Linux-x86_64-{0}.run -O '
-                 '/home/{1}/NVIDIA-Linux-x86_64-{0}.run'.format(nvidia_version, os_user))
-            datalab.fab.conn.sudo('/bin/bash /home/{0}/NVIDIA-Linux-x86_64-{1}.run -s --dkms'.format(os_user, nvidia_version))
-            datalab.fab.conn.sudo('rm -f /home/{0}/NVIDIA-Linux-x86_64-{1}.run'.format(os_user, nvidia_version))
+            #    datalab.fab.conn.sudo(''' bash -c 'if [[ $(apt-cache search linux-image-`uname -r`) ]]; then apt-get -y '''
+            #    '''install linux-image-`uname -r`; else apt-get -y install linux-modules-`uname -r`; fi;' ''')
+            #datalab.fab.conn.sudo('wget https://us.download.nvidia.com/tesla/{0}/NVIDIA-Linux-x86_64-{0}.run -O '
+            #     '/home/{1}/NVIDIA-Linux-x86_64-{0}.run'.format(nvidia_version, os_user))
+            #datalab.fab.conn.sudo('/bin/bash /home/{0}/NVIDIA-Linux-x86_64-{1}.run -s --dkms'.format(os_user, nvidia_version))
+            #datalab.fab.conn.sudo('rm -f /home/{0}/NVIDIA-Linux-x86_64-{1}.run'.format(os_user, nvidia_version))
             # install cuda
-            datalab.fab.conn.sudo('python3 -m pip install --upgrade pip=={0} wheel numpy=={1} --no-cache-dir'.format(
-                os.environ['conf_pip_version'], os.environ['notebook_numpy_version']))
-            datalab.fab.conn.sudo('wget -P /opt https://developer.download.nvidia.com/compute/cuda/{0}/Prod/local_installers/{1}'.format(
-                cuda_version, cuda_file_name))
-            datalab.fab.conn.sudo('apt -y install gcc-8 g++-8')
-            datalab.fab.conn.sudo('update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-8 8')
-            datalab.fab.conn.sudo('update-alternatives --install /usr/bin/g++ g++ /usr/bin/g++-8 8')
-            datalab.fab.conn.sudo('sh /opt/{} --silent --toolkit'.format(cuda_file_name))
+            #datalab.fab.conn.sudo('python3 -m pip install --upgrade pip=={0} wheel numpy=={1} --no-cache-dir'.format(
+            #    os.environ['conf_pip_version'], os.environ['notebook_numpy_version']))
+            #datalab.fab.conn.sudo('wget -P /opt https://developer.download.nvidia.com/compute/cuda/{0}/Prod/local_installers/{1}'.format(
+            #    cuda_version, cuda_file_name))
+            #datalab.fab.conn.sudo('apt -y install gcc-8 g++-8')
+            ##datalab.fab.conn.sudo('update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-8 8')
+            #datalab.fab.conn.sudo('update-alternatives --install /usr/bin/g++ g++ /usr/bin/g++-8 8')
+            #datalab.fab.conn.sudo('sh /opt/{} --silent --toolkit'.format(cuda_file_name))
             #datalab.fab.conn.sudo('update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-9 9')
             #datalab.fab.conn.sudo('update-alternatives --install /usr/bin/g++ g++ /usr/bin/g++-9 9')
-            datalab.fab.conn.sudo('mv /usr/local/cuda-{} /opt/'.format(cuda_version))
-            datalab.fab.conn.sudo('ln -s /opt/cuda-{0} /usr/local/cuda-{0}'.format(cuda_version))
-            datalab.fab.conn.sudo('rm -f /opt/{}'.format(cuda_file_name))
+            #datalab.fab.conn.sudo('mv /usr/local/cuda-{} /opt/'.format(cuda_version))
+            #datalab.fab.conn.sudo('ln -s /opt/cuda-{0} /usr/local/cuda-{0}'.format(cuda_version))
+            #datalab.fab.conn.sudo('rm -f /opt/{}'.format(cuda_file_name))
             # install cuDNN
             datalab.fab.conn.run('wget https://developer.download.nvidia.com/compute/redist/cudnn/v{0}/{1} -O /tmp/{1}'.format(
                 cudnn_version, cudnn_file_name))
@@ -336,11 +336,13 @@ def install_tensor(os_user, cuda_version, cuda_file_name,
             datalab.fab.conn.run('''bash -l -c 'echo "export LD_LIBRARY_PATH=\"$LD_LIBRARY_PATH:/opt/cudnn/lib64:/usr/local/cuda/lib64\"" >> ~/.bashrc' ''')
             # install TensorFlow and run TensorBoard
             # datalab.fab.conn.sudo('python2.7 -m pip install --upgrade https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow_gpu-{}-cp27-none-linux_x86_64.whl --no-cache-dir'.format(tensorflow_version))
-            datalab.fab.conn.sudo('python3 -m pip install --upgrade tensorflow-gpu=={} --no-cache-dir'.format(tensorflow_version))
+            datalab.fab.install_venv_pip_pkg('tensorflow-gpu',tensorflow_version)
             datalab.fab.conn.sudo('mkdir /var/log/tensorboard')
             datalab.fab.conn.sudo('chown {0}:{0} -R /var/log/tensorboard'.format(os_user))
             datalab.fab.conn.put('{}tensorboard.service'.format(templates_dir), '/tmp/tensorboard.service')
             datalab.fab.conn.sudo("sed -i 's|OS_USR|{}|' /tmp/tensorboard.service".format(os_user))
+            venv_activation = 'source /opt/python/python{0}/bin/activate &&'.format(os.environ['notebook_python_venv_version'], os.environ['notebook_python_venv_version'][:3])
+            datalab.fab.conn.sudo("sed -i 's|VENV_ACTIVATION|{}|' /tmp/tensorboard.service".format(venv_activation))
             http_proxy = datalab.fab.conn.run('''bash -l -c 'echo $http_proxy' ''').stdout.replace('\n','')
             https_proxy = datalab.fab.conn.run('''bash -l -c 'echo $https_proxy' ''').stdout.replace('\n','')
             datalab.fab.conn.sudo('sed -i \'/\[Service\]/ a\Environment=\"HTTP_PROXY={}\"\'  /tmp/tensorboard.service'.format(
@@ -520,13 +522,13 @@ def install_cntk(os_user, cntk_version):
 
 def install_keras(os_user, keras_version):
     if not exists(datalab.fab.conn,'/home/{}/.ensure_dir/keras_ensured'.format(os_user)):
-        datalab.fab.conn.sudo('pip3 install keras=={} --no-cache-dir'.format(keras_version))
+        datalab.fab.install_venv_pip_pkg('keras',keras_version)
         datalab.fab.conn.sudo('touch /home/{}/.ensure_dir/keras_ensured'.format(os_user))
 
 
 def install_theano(os_user, theano_version):
     if not exists(datalab.fab.conn,'/home/{}/.ensure_dir/theano_ensured'.format(os_user)):
-        datalab.fab.conn.sudo('python3 -m pip install Theano=={} --no-cache-dir'.format(theano_version))
+        datalab.fab.install_venv_pip_pkg('Theano',theano_version)
         datalab.fab.conn.sudo('touch /home/{}/.ensure_dir/theano_ensured'.format(os_user))
 
 
diff --git a/infrastructure-provisioning/src/general/lib/os/fab.py b/infrastructure-provisioning/src/general/lib/os/fab.py
index 9447242..5f1d6b8 100644
--- a/infrastructure-provisioning/src/general/lib/os/fab.py
+++ b/infrastructure-provisioning/src/general/lib/os/fab.py
@@ -57,6 +57,18 @@ def ensure_python_venv(python_venv_version):
         print('Error:', str(err))
         sys.exit(1)
 
+def install_venv_pip_pkg(pkg_name, pkg_version = ''):
+    try:
+        venv_install_command = 'source /opt/python/python{0}/bin/activate && /opt/python/python{0}/bin/pip{1}'.format(
+            os.environ['notebook_python_venv_version'], os.environ['notebook_python_venv_version'][:3])
+        if pkg_version:
+            pip_pkg = '{}=={}'.format(pkg_name,pkg_version)
+        else:
+            pip_pkg = pkg_name
+        conn.sudo('''bash -l -c '{0} install {1} --no-cache-dir' '''.format(venv_install_command, pip_pkg))
+    except Exception as err:
+        print('Error:', str(err))
+        sys.exit(1)
 
 def ensure_pip(requisites):
     try:
diff --git a/infrastructure-provisioning/src/general/scripts/gcp/tensor_configure.py b/infrastructure-provisioning/src/general/scripts/gcp/tensor_configure.py
index f26bb69..9708b3b 100644
--- a/infrastructure-provisioning/src/general/scripts/gcp/tensor_configure.py
+++ b/infrastructure-provisioning/src/general/scripts/gcp/tensor_configure.py
@@ -155,6 +155,22 @@ if __name__ == "__main__":
         GCPActions.remove_instance(notebook_config['instance_name'], notebook_config['zone'])
         sys.exit(1)
 
+    #Installing GPU drivers
+    try:
+        print('[INSTALLING GPU DRIVERS]')
+        params = "--hostname {} --keyfile {} --os_user {}".format(
+            instance_hostname, notebook_config['ssh_key_path'], notebook_config['datalab_ssh_user'])
+        try:
+            subprocess.run("~/scripts/{}.py {}".format('common_install_gpu', params), shell=True, check=True)
+        except:
+            datalab.fab.append_result("Failed installing users key")
+            raise Exception
+
+    except Exception as err:
+        datalab.fab.append_result("Failed to install GPU drivers.", str(err))
+        GCPActions.remove_instance(notebook_config['instance_name'], notebook_config['zone'])
+        sys.exit(1)
+
     # installing and configuring TensorFlow and all dependencies
     try:
         logging.info('[CONFIGURE TENSORFLOW NOTEBOOK INSTANCE]')
diff --git a/infrastructure-provisioning/src/general/templates/os/tensorboard.service b/infrastructure-provisioning/src/general/templates/os/tensorboard.service
index f7dee8b..bf7c949 100644
--- a/infrastructure-provisioning/src/general/templates/os/tensorboard.service
+++ b/infrastructure-provisioning/src/general/templates/os/tensorboard.service
@@ -25,7 +25,7 @@ Description=Tensorflow Tensorboard
 [Service]
 Type=simple
 PIDFile=/var/run/tensorboard.pid
-ExecStart=/bin/bash -c "export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/opt/cudnn/lib64:/usr/local/cuda/lib64; tensorboard --logdir=/var/log/tensorboard --host 0.0.0.0 --port 6006"
+ExecStart=/bin/bash -c "VENV_ACTIVATION export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/opt/cudnn/lib64:/usr/local/cuda/lib64; tensorboard --logdir=/var/log/tensorboard --host 0.0.0.0 --port 6006"
 ExecStop=/bin/bash -c "for i in $(ps aux | grep 'tensorboard' | grep -v grep | awk '{print $2}'); do kill -9 $i; done"
 User=OS_USR
 Group=OS_USR
diff --git a/infrastructure-provisioning/src/tensor/scripts/configure_tensor_node.py b/infrastructure-provisioning/src/tensor/scripts/configure_tensor_node.py
index 6953719..c9b5e3f 100644
--- a/infrastructure-provisioning/src/tensor/scripts/configure_tensor_node.py
+++ b/infrastructure-provisioning/src/tensor/scripts/configure_tensor_node.py
@@ -49,6 +49,8 @@ jupyter_version = os.environ['notebook_jupyter_version']
 nvidia_version = os.environ['notebook_nvidia_version']
 theano_version = os.environ['notebook_theano_version']
 keras_version = os.environ['notebook_keras_version']
+python_venv_version = os.environ['notebook_python_venv_version']
+python_venv_path = '/opt/python/python{0}/bin/python{1}'.format(python_venv_version, python_venv_version[:3])
 if args.region == 'cn-north-1':
     spark_link = "http://mirrors.hust.edu.cn/apache/spark/spark-" + spark_version + "/spark-" + spark_version + \
                  "-bin-hadoop" + hadoop_version + ".tgz"
@@ -93,6 +95,10 @@ if __name__ == "__main__":
     print("Install Python 3 modules")
     ensure_python3_libraries(args.os_user)
 
+    # INSTALL PYTHON IN VIRTUALENV
+    print("Configure Python Virtualenv")
+    ensure_python_venv(python_venv_version)
+
     # INSTALL TENSORFLOW AND OTHER DEEP LEARNING LIBRARIES
     print("Install TensorFlow")
     install_tensor(args.os_user, cuda_version, cuda_file_name,
@@ -116,10 +122,10 @@ if __name__ == "__main__":
     configure_local_spark(jars_dir, templates_dir)
 
     # INSTALL JUPYTER KERNELS
-    print("Install pyspark local kernel for Jupyter")
-    ensure_pyspark_local_kernel(args.os_user, pyspark_local_path_dir, templates_dir, spark_version)
+    #print("Install pyspark local kernel for Jupyter")
+    #ensure_pyspark_local_kernel(args.os_user, pyspark_local_path_dir, templates_dir, spark_version)
     print("Install py3spark local kernel for Jupyter")
-    ensure_py3spark_local_kernel(args.os_user, py3spark_local_path_dir, templates_dir, spark_version)
+    ensure_py3spark_local_kernel(args.os_user, py3spark_local_path_dir, templates_dir, spark_version, python_venv_path, python_venv_version)
 
     # INSTALL UNGIT
     print("Install nodejs")

---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@datalab.apache.org
For additional commands, e-mail: commits-help@datalab.apache.org