You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@datalab.apache.org by lf...@apache.org on 2021/06/14 07:20:09 UTC

[incubator-datalab] branch DATALAB-2414 created (now 867c233)

This is an automated email from the ASF dual-hosted git repository.

lfrolov pushed a change to branch DATALAB-2414
in repository https://gitbox.apache.org/repos/asf/incubator-datalab.git.


      at 867c233  [DATALAB-2414]: made dataengine use virtualenv

This branch includes the following new commits:

     new 867c233  [DATALAB-2414]: made dataengine use virtualenv

The 1 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@datalab.apache.org
For additional commands, e-mail: commits-help@datalab.apache.org


[incubator-datalab] 01/01: [DATALAB-2414]: made dataengine use virtualenv

Posted by lf...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

lfrolov pushed a commit to branch DATALAB-2414
in repository https://gitbox.apache.org/repos/asf/incubator-datalab.git

commit 867c2330fd3f08e1d015d94b5bef808d2eca7624
Author: leonidfrolov <fr...@gmail.com>
AuthorDate: Fri Jun 11 17:49:43 2021 +0300

    [DATALAB-2414]: made dataengine use virtualenv
---
 .../src/dataengine/scripts/configure_dataengine.py     |  7 +++++++
 infrastructure-provisioning/src/general/lib/os/fab.py  | 18 ++++++++++++++----
 .../src/general/scripts/os/common_clean_instance.py    |  2 +-
 .../scripts/os/jupyter_dataengine_create_configs.py    |  4 ++--
 4 files changed, 24 insertions(+), 7 deletions(-)

diff --git a/infrastructure-provisioning/src/dataengine/scripts/configure_dataengine.py b/infrastructure-provisioning/src/dataengine/scripts/configure_dataengine.py
index 19423cf..597a3a9 100644
--- a/infrastructure-provisioning/src/dataengine/scripts/configure_dataengine.py
+++ b/infrastructure-provisioning/src/dataengine/scripts/configure_dataengine.py
@@ -53,6 +53,8 @@ cmake_version = os.environ['notebook_cmake_version']
 cntk_version = os.environ['notebook_cntk_version']
 mxnet_version = os.environ['notebook_mxnet_version']
 python3_version = "3.4"
+python_venv_version = os.environ['notebook_python_venv_version']
+python_venv_path = '/opt/python/python{0}/bin/python{1}'.format(python_venv_version, python_venv_version[:3])
 scala_link = "https://www.scala-lang.org/files/archive/"
 if args.region == 'cn-north-1':
     spark_link = "http://mirrors.hust.edu.cn/apache/spark/spark-" + spark_version + "/spark-" + spark_version + \
@@ -144,6 +146,10 @@ if __name__ == "__main__":
         print("Install python3 specific version")
         ensure_python3_specific_version(python3_version, args.os_user)
 
+    # INSTALL PYTHON IN VIRTUALENV
+    print("Configure Python Virtualenv")
+    ensure_python_venv(python_venv_version)
+
     # INSTALL SPARK AND CLOUD STORAGE JARS FOR SPARK
     print("Install Spark")
     ensure_local_spark(args.os_user, spark_link, spark_version, hadoop_version, local_spark_path)
@@ -207,6 +213,7 @@ if __name__ == "__main__":
         conn.sudo('mv /tmp/incubator-livy /opt/livy')
         conn.sudo('mkdir /var/log/livy')
         conn.put('/root/templates/livy-env.sh', '/tmp/livy-env.sh')
+        conn.sudo("sed -i 's|python3|{}|' /tmp/livy-env.sh".format(python_venv_path))
         conn.sudo('mv /tmp/livy-env.sh /opt/livy/conf/livy-env.sh')
         conn.sudo('chown -R -L {0}:{0} /opt/livy/'.format(args.os_user))
         conn.sudo('chown -R {0}:{0} /var/log/livy'.format(args.os_user))
diff --git a/infrastructure-provisioning/src/general/lib/os/fab.py b/infrastructure-provisioning/src/general/lib/os/fab.py
index a4d055e..484bcf2 100644
--- a/infrastructure-provisioning/src/general/lib/os/fab.py
+++ b/infrastructure-provisioning/src/general/lib/os/fab.py
@@ -41,13 +41,23 @@ from patchwork import files
 def ensure_python_venv(python_venv_version):
     try:
         if not exists(conn, '/opt/python/python{}'.format(python_venv_version)):
-            conn.sudo('wget https://www.python.org/ftp/python/{0}/Python-{0}.tgz -O /tmp/Python-{0}.tgz'.format(python_venv_version))
+            conn.sudo('wget https://www.python.org/ftp/python/{0}/Python-{0}.tgz -O /tmp/Python-{0}.tgz'.format(
+                python_venv_version))
             conn.sudo('tar zxvf /tmp/Python-{}.tgz -C /tmp/'.format(python_venv_version))
-            conn.sudo('''bash -l -c 'cd /tmp/Python-{0} && ./configure --prefix=/opt/python/python{0} --with-zlib-dir=/usr/local/lib/ --with-ensurepip=install --enable-shared' '''.format(python_venv_version))
+            if os.environ['application'] in ('rstudio', 'tensor-rstudio'):
+                conn.sudo('''bash -l -c 'cd /tmp/Python-{0} && ./configure --prefix=/opt/python/python{0} '''
+                          '''--with-zlib-dir=/usr/local/lib/ --with-ensurepip=install --enable-shared' '''.format(
+                    python_venv_version))
+                conn.sudo(
+                    '''bash -l -c 'echo "export LD_LIBRARY_PATH=/opt/python/python{}/lib" >> /etc/profile' '''.format(
+                        python_venv_version))
+            else:
+                conn.sudo(
+                    '''bash -l -c 'cd /tmp/Python-{0} && ./configure --prefix=/opt/python/python{0} '''
+                    '''--with-zlib-dir=/usr/local/lib/ --with-ensurepip=install' '''.format(
+                        python_venv_version))
             conn.sudo('''bash -l -c 'cd /tmp/Python-{0} && make altinstall' '''.format(python_venv_version))
             conn.sudo('''bash -l -c 'cd /tmp && rm -rf Python-{}' '''.format(python_venv_version))
-            conn.sudo(
-                '''bash -l -c 'echo "export LD_LIBRARY_PATH=/opt/python/python{}/lib" >> /etc/profile' '''.format(python_venv_version))
             conn.sudo('''bash -l -c 'virtualenv /opt/python/python{0}' '''.format(python_venv_version))
             venv_command = 'source /opt/python/python{}/bin/activate'.format(python_venv_version)
             pip_command = '/opt/python/python{0}/bin/pip{1}'.format(python_venv_version, python_venv_version[:3])
diff --git a/infrastructure-provisioning/src/general/scripts/os/common_clean_instance.py b/infrastructure-provisioning/src/general/scripts/os/common_clean_instance.py
index acc0a09..6b24356 100644
--- a/infrastructure-provisioning/src/general/scripts/os/common_clean_instance.py
+++ b/infrastructure-provisioning/src/general/scripts/os/common_clean_instance.py
@@ -154,5 +154,5 @@ if __name__ == "__main__":
                 clean_tensor_rstudio()
     else:
         print('Found default ami, do not make clean')
-    conn.close()
+    #conn.close()
     sys.exit(0)
\ No newline at end of file
diff --git a/infrastructure-provisioning/src/general/scripts/os/jupyter_dataengine_create_configs.py b/infrastructure-provisioning/src/general/scripts/os/jupyter_dataengine_create_configs.py
index 880c414..94f7475 100644
--- a/infrastructure-provisioning/src/general/scripts/os/jupyter_dataengine_create_configs.py
+++ b/infrastructure-provisioning/src/general/scripts/os/jupyter_dataengine_create_configs.py
@@ -155,8 +155,8 @@ def install_sparkamagic_kernels(args):
         subprocess.run('sudo jupyter-kernelspec install {}/sparkmagic/kernels/sparkkernel --user'.format(sparkmagic_dir), shell=True, check=True)
         subprocess.run('sudo jupyter-kernelspec install {}/sparkmagic/kernels/pysparkkernel --user'.format(sparkmagic_dir), shell=True, check=True)
         subprocess.run('sudo jupyter-kernelspec install {}/sparkmagic/kernels/sparkrkernel --user'.format(sparkmagic_dir), shell=True, check=True)
-        pyspark_kernel_name = 'PySpark (Python-3.8 / Spark-{0} ) [{1}]'.format(args.spark_version,
-                                                                         args.cluster_name)
+        pyspark_kernel_name = 'PySpark (Python-{2} / Spark-{0} ) [{1}]'.format(args.spark_version,
+                                                                         args.cluster_name, os.environ['notebook_python_venv_version'][:3])
         subprocess.run('sed -i \'s|PySpark|{0}|g\' /home/{1}/.local/share/jupyter/kernels/pysparkkernel/kernel.json'.format(
             pyspark_kernel_name, args.os_user), shell=True, check=True)
         scala_version = subprocess.run('spark-submit --version 2>&1 | grep -o -P "Scala version \K.{0,7}"', capture_output=True, shell=True, check=True).stdout.decode('UTF-8').rstrip("\n\r")

---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@datalab.apache.org
For additional commands, e-mail: commits-help@datalab.apache.org