You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@datalab.apache.org by my...@apache.org on 2021/06/08 13:45:40 UTC
[incubator-datalab] 01/02: [DATALAB-2398] - [Jupyter with
TensorFlow][GCP] python specific version via vevn usage implemented
This is an automated email from the ASF dual-hosted git repository.
mykolabodnar pushed a commit to branch DATALAB-2398
in repository https://gitbox.apache.org/repos/asf/incubator-datalab.git
commit 5b71bea8100d421a3bf1c69473bffa90d535688e
Author: bodnarmykola <bo...@gmail.com>
AuthorDate: Tue Jun 8 16:44:35 2021 +0300
[DATALAB-2398] - [Jupyter with TensorFlow][GCP] python specific version via vevn usage implemented
---
.../src/general/lib/os/debian/notebook_lib.py | 62 +++++++++++-----------
.../src/general/lib/os/fab.py | 12 +++++
.../src/general/scripts/gcp/tensor_configure.py | 16 ++++++
.../src/general/templates/os/tensorboard.service | 2 +-
.../src/tensor/scripts/configure_tensor_node.py | 12 +++--
5 files changed, 70 insertions(+), 34 deletions(-)
diff --git a/infrastructure-provisioning/src/general/lib/os/debian/notebook_lib.py b/infrastructure-provisioning/src/general/lib/os/debian/notebook_lib.py
index d6d7d63..ec725c2 100644
--- a/infrastructure-provisioning/src/general/lib/os/debian/notebook_lib.py
+++ b/infrastructure-provisioning/src/general/lib/os/debian/notebook_lib.py
@@ -293,37 +293,37 @@ def install_tensor(os_user, cuda_version, cuda_file_name,
if not exists(datalab.fab.conn,'/home/{}/.ensure_dir/tensor_ensured'.format(os_user)):
try:
# install nvidia drivers
- datalab.fab.conn.sudo('''bash -c 'echo "blacklist nouveau" >> /etc/modprobe.d/blacklist-nouveau.conf' ''')
- datalab.fab.conn.sudo('''bash -c 'echo "options nouveau modeset=0" >> /etc/modprobe.d/blacklist-nouveau.conf' ''')
- datalab.fab.conn.sudo('update-initramfs -u')
- datalab.fab.conn.sudo('reboot', warn=True)
- time.sleep(60)
- manage_pkg('-y install', 'remote', 'dkms libglvnd-dev')
- kernel_version = datalab.fab.conn.run('uname -r | tr -d "[..0-9-]"').stdout.replace('\n','')
- if kernel_version == 'azure':
- manage_pkg('-y install', 'remote', 'linux-modules-`uname -r`')
- else:
+ #datalab.fab.conn.sudo('''bash -c 'echo "blacklist nouveau" >> /etc/modprobe.d/blacklist-nouveau.conf' ''')
+ #datalab.fab.conn.sudo('''bash -c 'echo "options nouveau modeset=0" >> /etc/modprobe.d/blacklist-nouveau.conf' ''')
+ #datalab.fab.conn.sudo('update-initramfs -u')
+ #datalab.fab.conn.sudo('reboot', warn=True)
+ #time.sleep(60)
+ ##manage_pkg('-y install', 'remote', 'dkms libglvnd-dev')
+ #kernel_version = datalab.fab.conn.run('uname -r | tr -d "[..0-9-]"').stdout.replace('\n','')
+ #if kernel_version == 'azure':
+ # manage_pkg('-y install', 'remote', 'linux-modules-`uname -r`')
+ #else:
# legacy support for old kernels
- datalab.fab.conn.sudo(''' bash -c 'if [[ $(apt-cache search linux-image-`uname -r`) ]]; then apt-get -y '''
- '''install linux-image-`uname -r`; else apt-get -y install linux-modules-`uname -r`; fi;' ''')
- datalab.fab.conn.sudo('wget https://us.download.nvidia.com/tesla/{0}/NVIDIA-Linux-x86_64-{0}.run -O '
- '/home/{1}/NVIDIA-Linux-x86_64-{0}.run'.format(nvidia_version, os_user))
- datalab.fab.conn.sudo('/bin/bash /home/{0}/NVIDIA-Linux-x86_64-{1}.run -s --dkms'.format(os_user, nvidia_version))
- datalab.fab.conn.sudo('rm -f /home/{0}/NVIDIA-Linux-x86_64-{1}.run'.format(os_user, nvidia_version))
+ # datalab.fab.conn.sudo(''' bash -c 'if [[ $(apt-cache search linux-image-`uname -r`) ]]; then apt-get -y '''
+ # '''install linux-image-`uname -r`; else apt-get -y install linux-modules-`uname -r`; fi;' ''')
+ #datalab.fab.conn.sudo('wget https://us.download.nvidia.com/tesla/{0}/NVIDIA-Linux-x86_64-{0}.run -O '
+ # '/home/{1}/NVIDIA-Linux-x86_64-{0}.run'.format(nvidia_version, os_user))
+ #datalab.fab.conn.sudo('/bin/bash /home/{0}/NVIDIA-Linux-x86_64-{1}.run -s --dkms'.format(os_user, nvidia_version))
+ #datalab.fab.conn.sudo('rm -f /home/{0}/NVIDIA-Linux-x86_64-{1}.run'.format(os_user, nvidia_version))
# install cuda
- datalab.fab.conn.sudo('python3 -m pip install --upgrade pip=={0} wheel numpy=={1} --no-cache-dir'.format(
- os.environ['conf_pip_version'], os.environ['notebook_numpy_version']))
- datalab.fab.conn.sudo('wget -P /opt https://developer.download.nvidia.com/compute/cuda/{0}/Prod/local_installers/{1}'.format(
- cuda_version, cuda_file_name))
- datalab.fab.conn.sudo('apt -y install gcc-8 g++-8')
- datalab.fab.conn.sudo('update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-8 8')
- datalab.fab.conn.sudo('update-alternatives --install /usr/bin/g++ g++ /usr/bin/g++-8 8')
- datalab.fab.conn.sudo('sh /opt/{} --silent --toolkit'.format(cuda_file_name))
+ #datalab.fab.conn.sudo('python3 -m pip install --upgrade pip=={0} wheel numpy=={1} --no-cache-dir'.format(
+ # os.environ['conf_pip_version'], os.environ['notebook_numpy_version']))
+ #datalab.fab.conn.sudo('wget -P /opt https://developer.download.nvidia.com/compute/cuda/{0}/Prod/local_installers/{1}'.format(
+ # cuda_version, cuda_file_name))
+ #datalab.fab.conn.sudo('apt -y install gcc-8 g++-8')
+ ##datalab.fab.conn.sudo('update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-8 8')
+ #datalab.fab.conn.sudo('update-alternatives --install /usr/bin/g++ g++ /usr/bin/g++-8 8')
+ #datalab.fab.conn.sudo('sh /opt/{} --silent --toolkit'.format(cuda_file_name))
#datalab.fab.conn.sudo('update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-9 9')
#datalab.fab.conn.sudo('update-alternatives --install /usr/bin/g++ g++ /usr/bin/g++-9 9')
- datalab.fab.conn.sudo('mv /usr/local/cuda-{} /opt/'.format(cuda_version))
- datalab.fab.conn.sudo('ln -s /opt/cuda-{0} /usr/local/cuda-{0}'.format(cuda_version))
- datalab.fab.conn.sudo('rm -f /opt/{}'.format(cuda_file_name))
+ #datalab.fab.conn.sudo('mv /usr/local/cuda-{} /opt/'.format(cuda_version))
+ #datalab.fab.conn.sudo('ln -s /opt/cuda-{0} /usr/local/cuda-{0}'.format(cuda_version))
+ #datalab.fab.conn.sudo('rm -f /opt/{}'.format(cuda_file_name))
# install cuDNN
datalab.fab.conn.run('wget https://developer.download.nvidia.com/compute/redist/cudnn/v{0}/{1} -O /tmp/{1}'.format(
cudnn_version, cudnn_file_name))
@@ -336,11 +336,13 @@ def install_tensor(os_user, cuda_version, cuda_file_name,
datalab.fab.conn.run('''bash -l -c 'echo "export LD_LIBRARY_PATH=\"$LD_LIBRARY_PATH:/opt/cudnn/lib64:/usr/local/cuda/lib64\"" >> ~/.bashrc' ''')
# install TensorFlow and run TensorBoard
# datalab.fab.conn.sudo('python2.7 -m pip install --upgrade https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow_gpu-{}-cp27-none-linux_x86_64.whl --no-cache-dir'.format(tensorflow_version))
- datalab.fab.conn.sudo('python3 -m pip install --upgrade tensorflow-gpu=={} --no-cache-dir'.format(tensorflow_version))
+ datalab.fab.install_venv_pip_pkg('tensorflow-gpu',tensorflow_version)
datalab.fab.conn.sudo('mkdir /var/log/tensorboard')
datalab.fab.conn.sudo('chown {0}:{0} -R /var/log/tensorboard'.format(os_user))
datalab.fab.conn.put('{}tensorboard.service'.format(templates_dir), '/tmp/tensorboard.service')
datalab.fab.conn.sudo("sed -i 's|OS_USR|{}|' /tmp/tensorboard.service".format(os_user))
+ venv_activation = 'source /opt/python/python{0}/bin/activate &&'.format(os.environ['notebook_python_venv_version'], os.environ['notebook_python_venv_version'][:3])
+ datalab.fab.conn.sudo("sed -i 's|VENV_ACTIVATION|{}|' /tmp/tensorboard.service".format(venv_activation))
http_proxy = datalab.fab.conn.run('''bash -l -c 'echo $http_proxy' ''').stdout.replace('\n','')
https_proxy = datalab.fab.conn.run('''bash -l -c 'echo $https_proxy' ''').stdout.replace('\n','')
datalab.fab.conn.sudo('sed -i \'/\[Service\]/ a\Environment=\"HTTP_PROXY={}\"\' /tmp/tensorboard.service'.format(
@@ -520,13 +522,13 @@ def install_cntk(os_user, cntk_version):
def install_keras(os_user, keras_version):
if not exists(datalab.fab.conn,'/home/{}/.ensure_dir/keras_ensured'.format(os_user)):
- datalab.fab.conn.sudo('pip3 install keras=={} --no-cache-dir'.format(keras_version))
+ datalab.fab.install_venv_pip_pkg('keras',keras_version)
datalab.fab.conn.sudo('touch /home/{}/.ensure_dir/keras_ensured'.format(os_user))
def install_theano(os_user, theano_version):
if not exists(datalab.fab.conn,'/home/{}/.ensure_dir/theano_ensured'.format(os_user)):
- datalab.fab.conn.sudo('python3 -m pip install Theano=={} --no-cache-dir'.format(theano_version))
+ datalab.fab.install_venv_pip_pkg('Theano',theano_version)
datalab.fab.conn.sudo('touch /home/{}/.ensure_dir/theano_ensured'.format(os_user))
diff --git a/infrastructure-provisioning/src/general/lib/os/fab.py b/infrastructure-provisioning/src/general/lib/os/fab.py
index 9447242..5f1d6b8 100644
--- a/infrastructure-provisioning/src/general/lib/os/fab.py
+++ b/infrastructure-provisioning/src/general/lib/os/fab.py
@@ -57,6 +57,18 @@ def ensure_python_venv(python_venv_version):
print('Error:', str(err))
sys.exit(1)
+def install_venv_pip_pkg(pkg_name, pkg_version = ''):
+ try:
+ venv_install_command = 'source /opt/python/python{0}/bin/activate && /opt/python/python{0}/bin/pip{1}'.format(
+ os.environ['notebook_python_venv_version'], os.environ['notebook_python_venv_version'][:3])
+ if pkg_version:
+ pip_pkg = '{}=={}'.format(pkg_name,pkg_version)
+ else:
+ pip_pkg = pkg_name
+ conn.sudo('''bash -l -c '{0} install {1} --no-cache-dir' '''.format(venv_install_command, pip_pkg))
+ except Exception as err:
+ print('Error:', str(err))
+ sys.exit(1)
def ensure_pip(requisites):
try:
diff --git a/infrastructure-provisioning/src/general/scripts/gcp/tensor_configure.py b/infrastructure-provisioning/src/general/scripts/gcp/tensor_configure.py
index f26bb69..9708b3b 100644
--- a/infrastructure-provisioning/src/general/scripts/gcp/tensor_configure.py
+++ b/infrastructure-provisioning/src/general/scripts/gcp/tensor_configure.py
@@ -155,6 +155,22 @@ if __name__ == "__main__":
GCPActions.remove_instance(notebook_config['instance_name'], notebook_config['zone'])
sys.exit(1)
+ #Installing GPU drivers
+ try:
+ print('[INSTALLING GPU DRIVERS]')
+ params = "--hostname {} --keyfile {} --os_user {}".format(
+ instance_hostname, notebook_config['ssh_key_path'], notebook_config['datalab_ssh_user'])
+ try:
+ subprocess.run("~/scripts/{}.py {}".format('common_install_gpu', params), shell=True, check=True)
+ except:
+ datalab.fab.append_result("Failed installing users key")
+ raise Exception
+
+ except Exception as err:
+ datalab.fab.append_result("Failed to install GPU drivers.", str(err))
+ GCPActions.remove_instance(notebook_config['instance_name'], notebook_config['zone'])
+ sys.exit(1)
+
# installing and configuring TensorFlow and all dependencies
try:
logging.info('[CONFIGURE TENSORFLOW NOTEBOOK INSTANCE]')
diff --git a/infrastructure-provisioning/src/general/templates/os/tensorboard.service b/infrastructure-provisioning/src/general/templates/os/tensorboard.service
index f7dee8b..bf7c949 100644
--- a/infrastructure-provisioning/src/general/templates/os/tensorboard.service
+++ b/infrastructure-provisioning/src/general/templates/os/tensorboard.service
@@ -25,7 +25,7 @@ Description=Tensorflow Tensorboard
[Service]
Type=simple
PIDFile=/var/run/tensorboard.pid
-ExecStart=/bin/bash -c "export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/opt/cudnn/lib64:/usr/local/cuda/lib64; tensorboard --logdir=/var/log/tensorboard --host 0.0.0.0 --port 6006"
+ExecStart=/bin/bash -c "VENV_ACTIVATION export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/opt/cudnn/lib64:/usr/local/cuda/lib64; tensorboard --logdir=/var/log/tensorboard --host 0.0.0.0 --port 6006"
ExecStop=/bin/bash -c "for i in $(ps aux | grep 'tensorboard' | grep -v grep | awk '{print $2}'); do kill -9 $i; done"
User=OS_USR
Group=OS_USR
diff --git a/infrastructure-provisioning/src/tensor/scripts/configure_tensor_node.py b/infrastructure-provisioning/src/tensor/scripts/configure_tensor_node.py
index 6953719..c9b5e3f 100644
--- a/infrastructure-provisioning/src/tensor/scripts/configure_tensor_node.py
+++ b/infrastructure-provisioning/src/tensor/scripts/configure_tensor_node.py
@@ -49,6 +49,8 @@ jupyter_version = os.environ['notebook_jupyter_version']
nvidia_version = os.environ['notebook_nvidia_version']
theano_version = os.environ['notebook_theano_version']
keras_version = os.environ['notebook_keras_version']
+python_venv_version = os.environ['notebook_python_venv_version']
+python_venv_path = '/opt/python/python{0}/bin/python{1}'.format(python_venv_version, python_venv_version[:3])
if args.region == 'cn-north-1':
spark_link = "http://mirrors.hust.edu.cn/apache/spark/spark-" + spark_version + "/spark-" + spark_version + \
"-bin-hadoop" + hadoop_version + ".tgz"
@@ -93,6 +95,10 @@ if __name__ == "__main__":
print("Install Python 3 modules")
ensure_python3_libraries(args.os_user)
+ # INSTALL PYTHON IN VIRTUALENV
+ print("Configure Python Virtualenv")
+ ensure_python_venv(python_venv_version)
+
# INSTALL TENSORFLOW AND OTHER DEEP LEARNING LIBRARIES
print("Install TensorFlow")
install_tensor(args.os_user, cuda_version, cuda_file_name,
@@ -116,10 +122,10 @@ if __name__ == "__main__":
configure_local_spark(jars_dir, templates_dir)
# INSTALL JUPYTER KERNELS
- print("Install pyspark local kernel for Jupyter")
- ensure_pyspark_local_kernel(args.os_user, pyspark_local_path_dir, templates_dir, spark_version)
+ #print("Install pyspark local kernel for Jupyter")
+ #ensure_pyspark_local_kernel(args.os_user, pyspark_local_path_dir, templates_dir, spark_version)
print("Install py3spark local kernel for Jupyter")
- ensure_py3spark_local_kernel(args.os_user, py3spark_local_path_dir, templates_dir, spark_version)
+ ensure_py3spark_local_kernel(args.os_user, py3spark_local_path_dir, templates_dir, spark_version, python_venv_path, python_venv_version)
# INSTALL UNGIT
print("Install nodejs")
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@datalab.apache.org
For additional commands, e-mail: commits-help@datalab.apache.org