You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@datalab.apache.org by lf...@apache.org on 2021/06/24 08:00:08 UTC

[incubator-datalab] branch DATALAB-2445 created (now dc417e8)

This is an automated email from the ASF dual-hosted git repository.

lfrolov pushed a change to branch DATALAB-2445
in repository https://gitbox.apache.org/repos/asf/incubator-datalab.git.


      at dc417e8  [DATALAB-2429]: fixed zeppelin dataproc creation

This branch includes the following new commits:

     new dd3b031  [DATALAB-2429]: fixed rstudio dataproc
     new dc417e8  [DATALAB-2429]: fixed zeppelin dataproc creation

The 2 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@datalab.apache.org
For additional commands, e-mail: commits-help@datalab.apache.org


[incubator-datalab] 01/02: [DATALAB-2429]: fixed rstudio dataproc

Posted by lf...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

lfrolov pushed a commit to branch DATALAB-2445
in repository https://gitbox.apache.org/repos/asf/incubator-datalab.git

commit dd3b0312921cd8784dd9f751da45ae3f52d17645
Author: leonidfrolov <fr...@gmail.com>
AuthorDate: Thu Jun 24 09:53:27 2021 +0300

    [DATALAB-2429]: fixed rstudio dataproc
---
 .../gcp/rstudio_install_dataengine-service_kernels.py      | 14 ++++++++------
 .../general/templates/gcp/dataengine-service_cluster.json  |  4 ++--
 .../templates/gcp/dataengine-service_cluster_with_gpu.json |  4 ++--
 3 files changed, 12 insertions(+), 10 deletions(-)

diff --git a/infrastructure-provisioning/src/general/scripts/gcp/rstudio_install_dataengine-service_kernels.py b/infrastructure-provisioning/src/general/scripts/gcp/rstudio_install_dataengine-service_kernels.py
index d5119bf..be225cd 100644
--- a/infrastructure-provisioning/src/general/scripts/gcp/rstudio_install_dataengine-service_kernels.py
+++ b/infrastructure-provisioning/src/general/scripts/gcp/rstudio_install_dataengine-service_kernels.py
@@ -27,6 +27,7 @@ from datalab.actions_lib import *
 from datalab.meta_lib import *
 from fabric import *
 import subprocess
+import time
 
 parser = argparse.ArgumentParser()
 parser.add_argument('--bucket', type=str, default='')
@@ -53,10 +54,11 @@ def configure_notebook(args):
     conn.sudo('\cp /tmp/create_configs.py /usr/local/bin/create_configs.py')
     conn.sudo('chmod 755 /usr/local/bin/create_configs.py')
     conn.sudo('mkdir -p /usr/lib/python3.8/datalab/')
-    conn.run('mkdir -p /tmp/datalab_libs/')
-    subprocess.run('scp -i {} /usr/lib/python3.8/datalab/*.py {}:/tmp/datalab_libs/'.format(args.keyfile, args.notebook_ip), shell=True, check=True)
-    conn.run('chmod a+x /tmp/datalab_libs/*')
-    conn.sudo('mv /tmp/datalab_libs/* /usr/lib/python3.8/datalab/')
+    conn.run('mkdir -p /home/{}/datalab_libs/'.format(args.os_user))
+    conn.local('scp -i {0} /usr/lib/python3.8/datalab/*.py {1}@{2}:/home/{1}/datalab_libs/'.format(args.keyfile, args.os_user, args.notebook_ip))
+    conn.run('chmod a+x /home/{}/datalab_libs/*'.format(args.os_user))
+    conn.sudo('mv /home/{}/datalab_libs/* /usr/lib/python3.8/datalab/'.format(args.os_user))
+    conn.sudo('rm -rf /home/{}/datalab_libs/'.format(args.os_user))
     if exists(conn, '/usr/lib64'):
         conn.sudo('mkdir -p /usr/lib64/python3.8')
         conn.sudo('ln -fs /usr/lib/python3.8/datalab /usr/lib64/python3.8/datalab')
@@ -68,8 +70,8 @@ if __name__ == "__main__":
     configure_notebook(args)
     spark_version = datalab.actions_lib.GCPActions().get_cluster_app_version(args.bucket, args.project_name, args.cluster_name, 'spark')
     hadoop_version = datalab.actions_lib.GCPActions().get_cluster_app_version(args.bucket, args.project_name, args.cluster_name, 'hadoop')
-    conn.sudo('echo "[global]" > /etc/pip.conf; echo "proxy = $(cat /etc/profile | grep proxy | head -n1 | cut -f2 -d=)" >> /etc/pip.conf')
+    conn.sudo('''bash -l -c 'echo "[global]" > /etc/pip.conf; echo "proxy = $(cat /etc/profile | grep proxy | head -n1 | cut -f2 -d=)" >> /etc/pip.conf' ''')
     conn.sudo('''bash -l -c 'echo "use_proxy=yes" > ~/.wgetrc; proxy=$(cat /etc/profile | grep proxy | head -n1 | cut -f2 -d=); echo "http_proxy=$proxy" >> ~/.wgetrc; echo "https_proxy=$proxy" >> ~/.wgetrc' ''')
-    conn.sudo('unset http_proxy https_proxy; export gcp_project_id="{0}"; export conf_resource="{1}"; /usr/bin/python3 /usr/local/bin/create_configs.py --bucket {2} --cluster_name {3} --dataproc_version {4} --spark_version {5} --hadoop_version {6} --region {7} --user_name {8} --os_user {9} --pip_mirror {10} --application {11}'
+    conn.sudo('''bash -l -c 'unset http_proxy https_proxy; export gcp_project_id="{0}"; export conf_resource="{1}"; /usr/bin/python3 /usr/local/bin/create_configs.py --bucket {2} --cluster_name {3} --dataproc_version {4} --spark_version {5} --hadoop_version {6} --region {7} --user_name {8} --os_user {9} --pip_mirror {10} --application {11}' '''
          .format(os.environ['gcp_project_id'], os.environ['conf_resource'], args.bucket, args.cluster_name, args.dataproc_version, spark_version, hadoop_version,
                  args.region, args.project_name, args.os_user, args.pip_mirror, args.application))
diff --git a/infrastructure-provisioning/src/general/templates/gcp/dataengine-service_cluster.json b/infrastructure-provisioning/src/general/templates/gcp/dataengine-service_cluster.json
index 9f8367d..98f2300 100644
--- a/infrastructure-provisioning/src/general/templates/gcp/dataengine-service_cluster.json
+++ b/infrastructure-provisioning/src/general/templates/gcp/dataengine-service_cluster.json
@@ -20,7 +20,7 @@
             "numInstances": "NUM_MASTERS",
             "machineTypeUri": "MASTER_INSTANCE",
             "diskConfig": {
-                "bootDiskSizeGb": 30,
+                "bootDiskSizeGb": 35,
                 "numLocalSsds": 0
             }
         },
@@ -28,7 +28,7 @@
             "numInstances": "NUM_SLAVES",
             "machineTypeUri": "SLAVE_INSTANCE",
             "diskConfig": {
-                "bootDiskSizeGb": 30,
+                "bootDiskSizeGb": 35,
                 "numLocalSsds": 0
             }
         },
diff --git a/infrastructure-provisioning/src/general/templates/gcp/dataengine-service_cluster_with_gpu.json b/infrastructure-provisioning/src/general/templates/gcp/dataengine-service_cluster_with_gpu.json
index 4a58bdd..2cf9ef7 100644
--- a/infrastructure-provisioning/src/general/templates/gcp/dataengine-service_cluster_with_gpu.json
+++ b/infrastructure-provisioning/src/general/templates/gcp/dataengine-service_cluster_with_gpu.json
@@ -20,7 +20,7 @@
             "numInstances": "NUM_MASTERS",
             "machineTypeUri": "MASTER_INSTANCE",
             "diskConfig": {
-                "bootDiskSizeGb": 30,
+                "bootDiskSizeGb": 35,
                 "numLocalSsds": 0
             },
             "accelerators": [
@@ -34,7 +34,7 @@
             "numInstances": "NUM_SLAVES",
             "machineTypeUri": "SLAVE_INSTANCE",
             "diskConfig": {
-                "bootDiskSizeGb": 30,
+                "bootDiskSizeGb": 35,
                 "numLocalSsds": 0
             },
             "accelerators": [

---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@datalab.apache.org
For additional commands, e-mail: commits-help@datalab.apache.org


[incubator-datalab] 02/02: [DATALAB-2429]: fixed zeppelin dataproc creation

Posted by lf...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

lfrolov pushed a commit to branch DATALAB-2445
in repository https://gitbox.apache.org/repos/asf/incubator-datalab.git

commit dc417e8c9521ae66016726b79c090b01d81322b6
Author: leonidfrolov <fr...@gmail.com>
AuthorDate: Thu Jun 24 10:59:41 2021 +0300

    [DATALAB-2429]: fixed zeppelin dataproc creation
---
 .../gcp/zeppelin_install_dataengine-service_kernels.py | 18 ++++++++++--------
 1 file changed, 10 insertions(+), 8 deletions(-)

diff --git a/infrastructure-provisioning/src/general/scripts/gcp/zeppelin_install_dataengine-service_kernels.py b/infrastructure-provisioning/src/general/scripts/gcp/zeppelin_install_dataengine-service_kernels.py
index 6170589..8269b93 100644
--- a/infrastructure-provisioning/src/general/scripts/gcp/zeppelin_install_dataengine-service_kernels.py
+++ b/infrastructure-provisioning/src/general/scripts/gcp/zeppelin_install_dataengine-service_kernels.py
@@ -57,10 +57,11 @@ def configure_notebook(args):
     conn.sudo('\cp /tmp/create_configs.py /usr/local/bin/create_configs.py')
     conn.sudo('chmod 755 /usr/local/bin/create_configs.py')
     conn.sudo('mkdir -p /usr/lib/python3.8/datalab/')
-    conn.run('mkdir -p /tmp/datalab_libs/')
-    conn.local('scp -i {} /usr/lib/python3.8/datalab/*.py {}:/tmp/datalab_libs/'.format(args.keyfile, args.notebook_ip))
-    conn.run('chmod a+x /tmp/datalab_libs/*')
-    conn.sudo('mv /tmp/datalab_libs/* /usr/lib/python3.8/datalab/')
+    conn.run('mkdir -p /home/{}/datalab_libs/'.format(args.os_user))
+    conn.local('scp -i {0} /usr/lib/python3.8/datalab/*.py {1}@{2}:/home/{1}/datalab_libs/'.format(args.keyfile, args.os_user, args.notebook_ip))
+    conn.run('chmod a+x /home/{}/datalab_libs/*'.format(args.os_user))
+    conn.sudo('mv /home/{}/datalab_libs/* /usr/lib/python3.8/datalab/'.format(args.os_user))
+    conn.sudo('rm -rf /home/{}/datalab_libs/'.format(args.os_user))
     if exists(conn, '/usr/lib64'):
         conn.sudo('mkdir -p /usr/lib64/python3.8')
         conn.sudo('ln -fs /usr/lib/python3.8/datalab /usr/lib64/python3.8/datalab')
@@ -73,8 +74,9 @@ if __name__ == "__main__":
     r_enabled = os.environ['notebook_r_enabled']
     spark_version = datalab.actions_lib.GCPActions().get_cluster_app_version(args.bucket, args.project_name, args.cluster_name, 'spark')
     hadoop_version = datalab.actions_lib.GCPActions().get_cluster_app_version(args.bucket, args.project_name, args.cluster_name, 'hadoop')
-    conn.sudo('echo "[global]" > /etc/pip.conf; echo "proxy = $(cat /etc/profile | grep proxy | head -n1 | cut -f2 -d=)" >> /etc/pip.conf')
+    conn.sudo('''bash -l -c 'echo "[global]" > /etc/pip.conf; echo "proxy = $(cat /etc/profile | grep proxy | head -n1 | cut -f2 -d=)" >> /etc/pip.conf' ''')
     conn.sudo('''bash -l -c 'echo "use_proxy=yes" > ~/.wgetrc; proxy=$(cat /etc/profile | grep proxy | head -n1 | cut -f2 -d=); echo "http_proxy=$proxy" >> ~/.wgetrc; echo "https_proxy=$proxy" >> ~/.wgetrc' ''')
-    conn.sudo('unset http_proxy https_proxy; export gcp_project_id="{0}"; export conf_resource="{1}"; /usr/bin/python3 /usr/local/bin/create_configs.py --bucket {2} --cluster_name {3} --dataproc_version {4} --spark_version {5} --hadoop_version {6} --region {7} --user_name {8} --os_user {9} --pip_mirror {10} --application {11} --livy_version {12} --multiple_clusters {13} --r_enabled {14}'
-         .format(os.environ['gcp_project_id'], os.environ['conf_resource'], args.bucket, args.cluster_name, args.dataproc_version, spark_version, hadoop_version,
-                 args.region, args.project_name, args.os_user, args.pip_mirror, args.application, os.environ['notebook_livy_version'], os.environ['notebook_multiple_clusters'], r_enabled))
\ No newline at end of file
+    conn.sudo('''bash -l -c 'unset http_proxy https_proxy; export gcp_project_id="{0}"; export conf_resource="{1}"; /usr/bin/python3 /usr/local/bin/create_configs.py --bucket {2} --cluster_name {3} --dataproc_version {4} --spark_version {5} --hadoop_version {6} --region {7} --user_name {8} --os_user {9} --pip_mirror {10} --application {11} --livy_version {12} --multiple_clusters {13} --r_enabled {14}' '''
+        .format(os.environ['gcp_project_id'], os.environ['conf_resource'], args.bucket, args.cluster_name, args.dataproc_version,
+                spark_version, hadoop_version, args.region, args.project_name, args.os_user, args.pip_mirror, args.application,
+                os.environ['notebook_livy_version'], os.environ['notebook_multiple_clusters'], r_enabled))
\ No newline at end of file

---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@datalab.apache.org
For additional commands, e-mail: commits-help@datalab.apache.org