You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@datalab.apache.org by lf...@apache.org on 2021/06/14 07:20:10 UTC

[incubator-datalab] 01/01: [DATALAB-2414]: made dataengine use virtualenv

This is an automated email from the ASF dual-hosted git repository.

lfrolov pushed a commit to branch DATALAB-2414
in repository https://gitbox.apache.org/repos/asf/incubator-datalab.git

commit 867c2330fd3f08e1d015d94b5bef808d2eca7624
Author: leonidfrolov <fr...@gmail.com>
AuthorDate: Fri Jun 11 17:49:43 2021 +0300

    [DATALAB-2414]: made dataengine use virtualenv
---
 .../src/dataengine/scripts/configure_dataengine.py     |  7 +++++++
 infrastructure-provisioning/src/general/lib/os/fab.py  | 18 ++++++++++++++----
 .../src/general/scripts/os/common_clean_instance.py    |  2 +-
 .../scripts/os/jupyter_dataengine_create_configs.py    |  4 ++--
 4 files changed, 24 insertions(+), 7 deletions(-)

diff --git a/infrastructure-provisioning/src/dataengine/scripts/configure_dataengine.py b/infrastructure-provisioning/src/dataengine/scripts/configure_dataengine.py
index 19423cf..597a3a9 100644
--- a/infrastructure-provisioning/src/dataengine/scripts/configure_dataengine.py
+++ b/infrastructure-provisioning/src/dataengine/scripts/configure_dataengine.py
@@ -53,6 +53,8 @@ cmake_version = os.environ['notebook_cmake_version']
 cntk_version = os.environ['notebook_cntk_version']
 mxnet_version = os.environ['notebook_mxnet_version']
 python3_version = "3.4"
+python_venv_version = os.environ['notebook_python_venv_version']
+python_venv_path = '/opt/python/python{0}/bin/python{1}'.format(python_venv_version, python_venv_version[:3])
 scala_link = "https://www.scala-lang.org/files/archive/"
 if args.region == 'cn-north-1':
     spark_link = "http://mirrors.hust.edu.cn/apache/spark/spark-" + spark_version + "/spark-" + spark_version + \
@@ -144,6 +146,10 @@ if __name__ == "__main__":
         print("Install python3 specific version")
         ensure_python3_specific_version(python3_version, args.os_user)
 
+    # INSTALL PYTHON IN VIRTUALENV
+    print("Configure Python Virtualenv")
+    ensure_python_venv(python_venv_version)
+
     # INSTALL SPARK AND CLOUD STORAGE JARS FOR SPARK
     print("Install Spark")
     ensure_local_spark(args.os_user, spark_link, spark_version, hadoop_version, local_spark_path)
@@ -207,6 +213,7 @@ if __name__ == "__main__":
         conn.sudo('mv /tmp/incubator-livy /opt/livy')
         conn.sudo('mkdir /var/log/livy')
         conn.put('/root/templates/livy-env.sh', '/tmp/livy-env.sh')
+        conn.sudo("sed -i 's|python3|{}|' /tmp/livy-env.sh".format(python_venv_path))
         conn.sudo('mv /tmp/livy-env.sh /opt/livy/conf/livy-env.sh')
         conn.sudo('chown -R -L {0}:{0} /opt/livy/'.format(args.os_user))
         conn.sudo('chown -R {0}:{0} /var/log/livy'.format(args.os_user))
diff --git a/infrastructure-provisioning/src/general/lib/os/fab.py b/infrastructure-provisioning/src/general/lib/os/fab.py
index a4d055e..484bcf2 100644
--- a/infrastructure-provisioning/src/general/lib/os/fab.py
+++ b/infrastructure-provisioning/src/general/lib/os/fab.py
@@ -41,13 +41,23 @@ from patchwork import files
 def ensure_python_venv(python_venv_version):
     try:
         if not exists(conn, '/opt/python/python{}'.format(python_venv_version)):
-            conn.sudo('wget https://www.python.org/ftp/python/{0}/Python-{0}.tgz -O /tmp/Python-{0}.tgz'.format(python_venv_version))
+            conn.sudo('wget https://www.python.org/ftp/python/{0}/Python-{0}.tgz -O /tmp/Python-{0}.tgz'.format(
+                python_venv_version))
             conn.sudo('tar zxvf /tmp/Python-{}.tgz -C /tmp/'.format(python_venv_version))
-            conn.sudo('''bash -l -c 'cd /tmp/Python-{0} && ./configure --prefix=/opt/python/python{0} --with-zlib-dir=/usr/local/lib/ --with-ensurepip=install --enable-shared' '''.format(python_venv_version))
+            if os.environ['application'] in ('rstudio', 'tensor-rstudio'):
+                conn.sudo('''bash -l -c 'cd /tmp/Python-{0} && ./configure --prefix=/opt/python/python{0} '''
+                          '''--with-zlib-dir=/usr/local/lib/ --with-ensurepip=install --enable-shared' '''.format(
+                    python_venv_version))
+                conn.sudo(
+                    '''bash -l -c 'echo "export LD_LIBRARY_PATH=/opt/python/python{}/lib" >> /etc/profile' '''.format(
+                        python_venv_version))
+            else:
+                conn.sudo(
+                    '''bash -l -c 'cd /tmp/Python-{0} && ./configure --prefix=/opt/python/python{0} '''
+                    '''--with-zlib-dir=/usr/local/lib/ --with-ensurepip=install' '''.format(
+                        python_venv_version))
             conn.sudo('''bash -l -c 'cd /tmp/Python-{0} && make altinstall' '''.format(python_venv_version))
             conn.sudo('''bash -l -c 'cd /tmp && rm -rf Python-{}' '''.format(python_venv_version))
-            conn.sudo(
-                '''bash -l -c 'echo "export LD_LIBRARY_PATH=/opt/python/python{}/lib" >> /etc/profile' '''.format(python_venv_version))
             conn.sudo('''bash -l -c 'virtualenv /opt/python/python{0}' '''.format(python_venv_version))
             venv_command = 'source /opt/python/python{}/bin/activate'.format(python_venv_version)
             pip_command = '/opt/python/python{0}/bin/pip{1}'.format(python_venv_version, python_venv_version[:3])
diff --git a/infrastructure-provisioning/src/general/scripts/os/common_clean_instance.py b/infrastructure-provisioning/src/general/scripts/os/common_clean_instance.py
index acc0a09..6b24356 100644
--- a/infrastructure-provisioning/src/general/scripts/os/common_clean_instance.py
+++ b/infrastructure-provisioning/src/general/scripts/os/common_clean_instance.py
@@ -154,5 +154,5 @@ if __name__ == "__main__":
                 clean_tensor_rstudio()
     else:
         print('Found default ami, do not make clean')
-    conn.close()
+    #conn.close()
     sys.exit(0)
\ No newline at end of file
diff --git a/infrastructure-provisioning/src/general/scripts/os/jupyter_dataengine_create_configs.py b/infrastructure-provisioning/src/general/scripts/os/jupyter_dataengine_create_configs.py
index 880c414..94f7475 100644
--- a/infrastructure-provisioning/src/general/scripts/os/jupyter_dataengine_create_configs.py
+++ b/infrastructure-provisioning/src/general/scripts/os/jupyter_dataengine_create_configs.py
@@ -155,8 +155,8 @@ def install_sparkamagic_kernels(args):
         subprocess.run('sudo jupyter-kernelspec install {}/sparkmagic/kernels/sparkkernel --user'.format(sparkmagic_dir), shell=True, check=True)
         subprocess.run('sudo jupyter-kernelspec install {}/sparkmagic/kernels/pysparkkernel --user'.format(sparkmagic_dir), shell=True, check=True)
         subprocess.run('sudo jupyter-kernelspec install {}/sparkmagic/kernels/sparkrkernel --user'.format(sparkmagic_dir), shell=True, check=True)
-        pyspark_kernel_name = 'PySpark (Python-3.8 / Spark-{0} ) [{1}]'.format(args.spark_version,
-                                                                         args.cluster_name)
+        pyspark_kernel_name = 'PySpark (Python-{2} / Spark-{0} ) [{1}]'.format(args.spark_version,
+                                                                         args.cluster_name, os.environ['notebook_python_venv_version'][:3])
         subprocess.run('sed -i \'s|PySpark|{0}|g\' /home/{1}/.local/share/jupyter/kernels/pysparkkernel/kernel.json'.format(
             pyspark_kernel_name, args.os_user), shell=True, check=True)
         scala_version = subprocess.run('spark-submit --version 2>&1 | grep -o -P "Scala version \K.{0,7}"', capture_output=True, shell=True, check=True).stdout.decode('UTF-8').rstrip("\n\r")

---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@datalab.apache.org
For additional commands, e-mail: commits-help@datalab.apache.org